xref: /openbmc/linux/net/ipv6/route.c (revision 9c7370a166b4e157137bfbfe2ad296d57147547c)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  *	Linux INET6 implementation
31da177e4SLinus Torvalds  *	FIB front-end.
41da177e4SLinus Torvalds  *
51da177e4SLinus Torvalds  *	Authors:
61da177e4SLinus Torvalds  *	Pedro Roque		<roque@di.fc.ul.pt>
71da177e4SLinus Torvalds  *
81da177e4SLinus Torvalds  *	This program is free software; you can redistribute it and/or
91da177e4SLinus Torvalds  *      modify it under the terms of the GNU General Public License
101da177e4SLinus Torvalds  *      as published by the Free Software Foundation; either version
111da177e4SLinus Torvalds  *      2 of the License, or (at your option) any later version.
121da177e4SLinus Torvalds  */
131da177e4SLinus Torvalds 
141da177e4SLinus Torvalds /*	Changes:
151da177e4SLinus Torvalds  *
161da177e4SLinus Torvalds  *	YOSHIFUJI Hideaki @USAGI
171da177e4SLinus Torvalds  *		reworked default router selection.
181da177e4SLinus Torvalds  *		- respect outgoing interface
191da177e4SLinus Torvalds  *		- select from (probably) reachable routers (i.e.
201da177e4SLinus Torvalds  *		routers in REACHABLE, STALE, DELAY or PROBE states).
211da177e4SLinus Torvalds  *		- always select the same router if it is (probably)
221da177e4SLinus Torvalds  *		reachable.  otherwise, round-robin the list.
23c0bece9fSYOSHIFUJI Hideaki  *	Ville Nuorvala
24c0bece9fSYOSHIFUJI Hideaki  *		Fixed routing subtrees.
251da177e4SLinus Torvalds  */
261da177e4SLinus Torvalds 
27f3213831SJoe Perches #define pr_fmt(fmt) "IPv6: " fmt
28f3213831SJoe Perches 
294fc268d2SRandy Dunlap #include <linux/capability.h>
301da177e4SLinus Torvalds #include <linux/errno.h>
31bc3b2d7fSPaul Gortmaker #include <linux/export.h>
321da177e4SLinus Torvalds #include <linux/types.h>
331da177e4SLinus Torvalds #include <linux/times.h>
341da177e4SLinus Torvalds #include <linux/socket.h>
351da177e4SLinus Torvalds #include <linux/sockios.h>
361da177e4SLinus Torvalds #include <linux/net.h>
371da177e4SLinus Torvalds #include <linux/route.h>
381da177e4SLinus Torvalds #include <linux/netdevice.h>
391da177e4SLinus Torvalds #include <linux/in6.h>
407bc570c8SYOSHIFUJI Hideaki #include <linux/mroute6.h>
411da177e4SLinus Torvalds #include <linux/init.h>
421da177e4SLinus Torvalds #include <linux/if_arp.h>
431da177e4SLinus Torvalds #include <linux/proc_fs.h>
441da177e4SLinus Torvalds #include <linux/seq_file.h>
455b7c931dSDaniel Lezcano #include <linux/nsproxy.h>
465a0e3ad6STejun Heo #include <linux/slab.h>
47457c4cbcSEric W. Biederman #include <net/net_namespace.h>
481da177e4SLinus Torvalds #include <net/snmp.h>
491da177e4SLinus Torvalds #include <net/ipv6.h>
501da177e4SLinus Torvalds #include <net/ip6_fib.h>
511da177e4SLinus Torvalds #include <net/ip6_route.h>
521da177e4SLinus Torvalds #include <net/ndisc.h>
531da177e4SLinus Torvalds #include <net/addrconf.h>
541da177e4SLinus Torvalds #include <net/tcp.h>
551da177e4SLinus Torvalds #include <linux/rtnetlink.h>
561da177e4SLinus Torvalds #include <net/dst.h>
571da177e4SLinus Torvalds #include <net/xfrm.h>
588d71740cSTom Tucker #include <net/netevent.h>
5921713ebcSThomas Graf #include <net/netlink.h>
6051ebd318SNicolas Dichtel #include <net/nexthop.h>
611da177e4SLinus Torvalds 
621da177e4SLinus Torvalds #include <asm/uaccess.h>
631da177e4SLinus Torvalds 
641da177e4SLinus Torvalds #ifdef CONFIG_SYSCTL
651da177e4SLinus Torvalds #include <linux/sysctl.h>
661da177e4SLinus Torvalds #endif
671da177e4SLinus Torvalds 
68afc154e9SHannes Frederic Sowa enum rt6_nud_state {
697e980569SJiri Benc 	RT6_NUD_FAIL_HARD = -3,
707e980569SJiri Benc 	RT6_NUD_FAIL_PROBE = -2,
717e980569SJiri Benc 	RT6_NUD_FAIL_DO_RR = -1,
72afc154e9SHannes Frederic Sowa 	RT6_NUD_SUCCEED = 1
73afc154e9SHannes Frederic Sowa };
74afc154e9SHannes Frederic Sowa 
7583a09abdSMartin KaFai Lau static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
761da177e4SLinus Torvalds static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
770dbaee3bSDavid S. Miller static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
78ebb762f2SSteffen Klassert static unsigned int	 ip6_mtu(const struct dst_entry *dst);
791da177e4SLinus Torvalds static struct dst_entry *ip6_negative_advice(struct dst_entry *);
801da177e4SLinus Torvalds static void		ip6_dst_destroy(struct dst_entry *);
811da177e4SLinus Torvalds static void		ip6_dst_ifdown(struct dst_entry *,
821da177e4SLinus Torvalds 				       struct net_device *dev, int how);
83569d3645SDaniel Lezcano static int		 ip6_dst_gc(struct dst_ops *ops);
841da177e4SLinus Torvalds 
851da177e4SLinus Torvalds static int		ip6_pkt_discard(struct sk_buff *skb);
86aad88724SEric Dumazet static int		ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb);
877150aedeSKamala R static int		ip6_pkt_prohibit(struct sk_buff *skb);
88aad88724SEric Dumazet static int		ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb);
891da177e4SLinus Torvalds static void		ip6_link_failure(struct sk_buff *skb);
906700c270SDavid S. Miller static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
916700c270SDavid S. Miller 					   struct sk_buff *skb, u32 mtu);
926700c270SDavid S. Miller static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
936700c270SDavid S. Miller 					struct sk_buff *skb);
944b32b5adSMartin KaFai Lau static void		rt6_dst_from_metrics_check(struct rt6_info *rt);
9552bd4c0cSNicolas Dichtel static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
961da177e4SLinus Torvalds 
9770ceb4f5SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTE_INFO
98efa2cea0SDaniel Lezcano static struct rt6_info *rt6_add_route_info(struct net *net,
99b71d1d42SEric Dumazet 					   const struct in6_addr *prefix, int prefixlen,
100b71d1d42SEric Dumazet 					   const struct in6_addr *gwaddr, int ifindex,
10195c96174SEric Dumazet 					   unsigned int pref);
102efa2cea0SDaniel Lezcano static struct rt6_info *rt6_get_route_info(struct net *net,
103b71d1d42SEric Dumazet 					   const struct in6_addr *prefix, int prefixlen,
104b71d1d42SEric Dumazet 					   const struct in6_addr *gwaddr, int ifindex);
10570ceb4f5SYOSHIFUJI Hideaki #endif
10670ceb4f5SYOSHIFUJI Hideaki 
1078d0b94afSMartin KaFai Lau struct uncached_list {
1088d0b94afSMartin KaFai Lau 	spinlock_t		lock;
1098d0b94afSMartin KaFai Lau 	struct list_head	head;
1108d0b94afSMartin KaFai Lau };
1118d0b94afSMartin KaFai Lau 
1128d0b94afSMartin KaFai Lau static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
1138d0b94afSMartin KaFai Lau 
1148d0b94afSMartin KaFai Lau static void rt6_uncached_list_add(struct rt6_info *rt)
1158d0b94afSMartin KaFai Lau {
1168d0b94afSMartin KaFai Lau 	struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
1178d0b94afSMartin KaFai Lau 
1188d0b94afSMartin KaFai Lau 	rt->dst.flags |= DST_NOCACHE;
1198d0b94afSMartin KaFai Lau 	rt->rt6i_uncached_list = ul;
1208d0b94afSMartin KaFai Lau 
1218d0b94afSMartin KaFai Lau 	spin_lock_bh(&ul->lock);
1228d0b94afSMartin KaFai Lau 	list_add_tail(&rt->rt6i_uncached, &ul->head);
1238d0b94afSMartin KaFai Lau 	spin_unlock_bh(&ul->lock);
1248d0b94afSMartin KaFai Lau }
1258d0b94afSMartin KaFai Lau 
1268d0b94afSMartin KaFai Lau static void rt6_uncached_list_del(struct rt6_info *rt)
1278d0b94afSMartin KaFai Lau {
1288d0b94afSMartin KaFai Lau 	if (!list_empty(&rt->rt6i_uncached)) {
1298d0b94afSMartin KaFai Lau 		struct uncached_list *ul = rt->rt6i_uncached_list;
1308d0b94afSMartin KaFai Lau 
1318d0b94afSMartin KaFai Lau 		spin_lock_bh(&ul->lock);
1328d0b94afSMartin KaFai Lau 		list_del(&rt->rt6i_uncached);
1338d0b94afSMartin KaFai Lau 		spin_unlock_bh(&ul->lock);
1348d0b94afSMartin KaFai Lau 	}
1358d0b94afSMartin KaFai Lau }
1368d0b94afSMartin KaFai Lau 
1378d0b94afSMartin KaFai Lau static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
1388d0b94afSMartin KaFai Lau {
1398d0b94afSMartin KaFai Lau 	struct net_device *loopback_dev = net->loopback_dev;
1408d0b94afSMartin KaFai Lau 	int cpu;
1418d0b94afSMartin KaFai Lau 
1428d0b94afSMartin KaFai Lau 	for_each_possible_cpu(cpu) {
1438d0b94afSMartin KaFai Lau 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
1448d0b94afSMartin KaFai Lau 		struct rt6_info *rt;
1458d0b94afSMartin KaFai Lau 
1468d0b94afSMartin KaFai Lau 		spin_lock_bh(&ul->lock);
1478d0b94afSMartin KaFai Lau 		list_for_each_entry(rt, &ul->head, rt6i_uncached) {
1488d0b94afSMartin KaFai Lau 			struct inet6_dev *rt_idev = rt->rt6i_idev;
1498d0b94afSMartin KaFai Lau 			struct net_device *rt_dev = rt->dst.dev;
1508d0b94afSMartin KaFai Lau 
1518d0b94afSMartin KaFai Lau 			if (rt_idev && (rt_idev->dev == dev || !dev) &&
1528d0b94afSMartin KaFai Lau 			    rt_idev->dev != loopback_dev) {
1538d0b94afSMartin KaFai Lau 				rt->rt6i_idev = in6_dev_get(loopback_dev);
1548d0b94afSMartin KaFai Lau 				in6_dev_put(rt_idev);
1558d0b94afSMartin KaFai Lau 			}
1568d0b94afSMartin KaFai Lau 
1578d0b94afSMartin KaFai Lau 			if (rt_dev && (rt_dev == dev || !dev) &&
1588d0b94afSMartin KaFai Lau 			    rt_dev != loopback_dev) {
1598d0b94afSMartin KaFai Lau 				rt->dst.dev = loopback_dev;
1608d0b94afSMartin KaFai Lau 				dev_hold(rt->dst.dev);
1618d0b94afSMartin KaFai Lau 				dev_put(rt_dev);
1628d0b94afSMartin KaFai Lau 			}
1638d0b94afSMartin KaFai Lau 		}
1648d0b94afSMartin KaFai Lau 		spin_unlock_bh(&ul->lock);
1658d0b94afSMartin KaFai Lau 	}
1668d0b94afSMartin KaFai Lau }
1678d0b94afSMartin KaFai Lau 
168d52d3997SMartin KaFai Lau static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
169d52d3997SMartin KaFai Lau {
170d52d3997SMartin KaFai Lau 	return dst_metrics_write_ptr(rt->dst.from);
171d52d3997SMartin KaFai Lau }
172d52d3997SMartin KaFai Lau 
17306582540SDavid S. Miller static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
17406582540SDavid S. Miller {
17506582540SDavid S. Miller 	struct rt6_info *rt = (struct rt6_info *)dst;
17606582540SDavid S. Miller 
177d52d3997SMartin KaFai Lau 	if (rt->rt6i_flags & RTF_PCPU)
178d52d3997SMartin KaFai Lau 		return rt6_pcpu_cow_metrics(rt);
179d52d3997SMartin KaFai Lau 	else if (rt->rt6i_flags & RTF_CACHE)
1804b32b5adSMartin KaFai Lau 		return NULL;
1814b32b5adSMartin KaFai Lau 	else
1823b471175SMartin KaFai Lau 		return dst_cow_metrics_generic(dst, old);
18306582540SDavid S. Miller }
18406582540SDavid S. Miller 
185f894cbf8SDavid S. Miller static inline const void *choose_neigh_daddr(struct rt6_info *rt,
186f894cbf8SDavid S. Miller 					     struct sk_buff *skb,
187f894cbf8SDavid S. Miller 					     const void *daddr)
18839232973SDavid S. Miller {
18939232973SDavid S. Miller 	struct in6_addr *p = &rt->rt6i_gateway;
19039232973SDavid S. Miller 
191a7563f34SDavid S. Miller 	if (!ipv6_addr_any(p))
19239232973SDavid S. Miller 		return (const void *) p;
193f894cbf8SDavid S. Miller 	else if (skb)
194f894cbf8SDavid S. Miller 		return &ipv6_hdr(skb)->daddr;
19539232973SDavid S. Miller 	return daddr;
19639232973SDavid S. Miller }
19739232973SDavid S. Miller 
198f894cbf8SDavid S. Miller static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
199f894cbf8SDavid S. Miller 					  struct sk_buff *skb,
200f894cbf8SDavid S. Miller 					  const void *daddr)
201d3aaeb38SDavid S. Miller {
20239232973SDavid S. Miller 	struct rt6_info *rt = (struct rt6_info *) dst;
20339232973SDavid S. Miller 	struct neighbour *n;
20439232973SDavid S. Miller 
205f894cbf8SDavid S. Miller 	daddr = choose_neigh_daddr(rt, skb, daddr);
2068e022ee6SYOSHIFUJI Hideaki / 吉藤英明 	n = __ipv6_neigh_lookup(dst->dev, daddr);
207f83c7790SDavid S. Miller 	if (n)
208f83c7790SDavid S. Miller 		return n;
209f83c7790SDavid S. Miller 	return neigh_create(&nd_tbl, daddr, dst->dev);
210f83c7790SDavid S. Miller }
211f83c7790SDavid S. Miller 
2129a7ec3a9SDaniel Lezcano static struct dst_ops ip6_dst_ops_template = {
2131da177e4SLinus Torvalds 	.family			=	AF_INET6,
2141da177e4SLinus Torvalds 	.gc			=	ip6_dst_gc,
2151da177e4SLinus Torvalds 	.gc_thresh		=	1024,
2161da177e4SLinus Torvalds 	.check			=	ip6_dst_check,
2170dbaee3bSDavid S. Miller 	.default_advmss		=	ip6_default_advmss,
218ebb762f2SSteffen Klassert 	.mtu			=	ip6_mtu,
21906582540SDavid S. Miller 	.cow_metrics		=	ipv6_cow_metrics,
2201da177e4SLinus Torvalds 	.destroy		=	ip6_dst_destroy,
2211da177e4SLinus Torvalds 	.ifdown			=	ip6_dst_ifdown,
2221da177e4SLinus Torvalds 	.negative_advice	=	ip6_negative_advice,
2231da177e4SLinus Torvalds 	.link_failure		=	ip6_link_failure,
2241da177e4SLinus Torvalds 	.update_pmtu		=	ip6_rt_update_pmtu,
2256e157b6aSDavid S. Miller 	.redirect		=	rt6_do_redirect,
2261ac06e03SHerbert Xu 	.local_out		=	__ip6_local_out,
227d3aaeb38SDavid S. Miller 	.neigh_lookup		=	ip6_neigh_lookup,
2281da177e4SLinus Torvalds };
2291da177e4SLinus Torvalds 
230ebb762f2SSteffen Klassert static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
231ec831ea7SRoland Dreier {
232618f9bc7SSteffen Klassert 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
233618f9bc7SSteffen Klassert 
234618f9bc7SSteffen Klassert 	return mtu ? : dst->dev->mtu;
235ec831ea7SRoland Dreier }
236ec831ea7SRoland Dreier 
2376700c270SDavid S. Miller static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
2386700c270SDavid S. Miller 					 struct sk_buff *skb, u32 mtu)
23914e50e57SDavid S. Miller {
24014e50e57SDavid S. Miller }
24114e50e57SDavid S. Miller 
2426700c270SDavid S. Miller static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
2436700c270SDavid S. Miller 				      struct sk_buff *skb)
244b587ee3bSDavid S. Miller {
245b587ee3bSDavid S. Miller }
246b587ee3bSDavid S. Miller 
2470972ddb2SHeld Bernhard static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
2480972ddb2SHeld Bernhard 					 unsigned long old)
2490972ddb2SHeld Bernhard {
2500972ddb2SHeld Bernhard 	return NULL;
2510972ddb2SHeld Bernhard }
2520972ddb2SHeld Bernhard 
25314e50e57SDavid S. Miller static struct dst_ops ip6_dst_blackhole_ops = {
25414e50e57SDavid S. Miller 	.family			=	AF_INET6,
25514e50e57SDavid S. Miller 	.destroy		=	ip6_dst_destroy,
25614e50e57SDavid S. Miller 	.check			=	ip6_dst_check,
257ebb762f2SSteffen Klassert 	.mtu			=	ip6_blackhole_mtu,
258214f45c9SEric Dumazet 	.default_advmss		=	ip6_default_advmss,
25914e50e57SDavid S. Miller 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
260b587ee3bSDavid S. Miller 	.redirect		=	ip6_rt_blackhole_redirect,
2610972ddb2SHeld Bernhard 	.cow_metrics		=	ip6_rt_blackhole_cow_metrics,
262d3aaeb38SDavid S. Miller 	.neigh_lookup		=	ip6_neigh_lookup,
26314e50e57SDavid S. Miller };
26414e50e57SDavid S. Miller 
26562fa8a84SDavid S. Miller static const u32 ip6_template_metrics[RTAX_MAX] = {
26614edd87dSLi RongQing 	[RTAX_HOPLIMIT - 1] = 0,
26762fa8a84SDavid S. Miller };
26862fa8a84SDavid S. Miller 
269fb0af4c7SEric Dumazet static const struct rt6_info ip6_null_entry_template = {
2701da177e4SLinus Torvalds 	.dst = {
2711da177e4SLinus Torvalds 		.__refcnt	= ATOMIC_INIT(1),
2721da177e4SLinus Torvalds 		.__use		= 1,
2732c20cbd7SNicolas Dichtel 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
2741da177e4SLinus Torvalds 		.error		= -ENETUNREACH,
2751da177e4SLinus Torvalds 		.input		= ip6_pkt_discard,
2761da177e4SLinus Torvalds 		.output		= ip6_pkt_discard_out,
2771da177e4SLinus Torvalds 	},
2781da177e4SLinus Torvalds 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
2794f724279SJean-Mickael Guerin 	.rt6i_protocol  = RTPROT_KERNEL,
2801da177e4SLinus Torvalds 	.rt6i_metric	= ~(u32) 0,
2811da177e4SLinus Torvalds 	.rt6i_ref	= ATOMIC_INIT(1),
2821da177e4SLinus Torvalds };
2831da177e4SLinus Torvalds 
284101367c2SThomas Graf #ifdef CONFIG_IPV6_MULTIPLE_TABLES
285101367c2SThomas Graf 
286fb0af4c7SEric Dumazet static const struct rt6_info ip6_prohibit_entry_template = {
287101367c2SThomas Graf 	.dst = {
288101367c2SThomas Graf 		.__refcnt	= ATOMIC_INIT(1),
289101367c2SThomas Graf 		.__use		= 1,
2902c20cbd7SNicolas Dichtel 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
291101367c2SThomas Graf 		.error		= -EACCES,
2929ce8ade0SThomas Graf 		.input		= ip6_pkt_prohibit,
2939ce8ade0SThomas Graf 		.output		= ip6_pkt_prohibit_out,
294101367c2SThomas Graf 	},
295101367c2SThomas Graf 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
2964f724279SJean-Mickael Guerin 	.rt6i_protocol  = RTPROT_KERNEL,
297101367c2SThomas Graf 	.rt6i_metric	= ~(u32) 0,
298101367c2SThomas Graf 	.rt6i_ref	= ATOMIC_INIT(1),
299101367c2SThomas Graf };
300101367c2SThomas Graf 
301fb0af4c7SEric Dumazet static const struct rt6_info ip6_blk_hole_entry_template = {
302101367c2SThomas Graf 	.dst = {
303101367c2SThomas Graf 		.__refcnt	= ATOMIC_INIT(1),
304101367c2SThomas Graf 		.__use		= 1,
3052c20cbd7SNicolas Dichtel 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
306101367c2SThomas Graf 		.error		= -EINVAL,
307352e512cSHerbert Xu 		.input		= dst_discard,
308aad88724SEric Dumazet 		.output		= dst_discard_sk,
309101367c2SThomas Graf 	},
310101367c2SThomas Graf 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
3114f724279SJean-Mickael Guerin 	.rt6i_protocol  = RTPROT_KERNEL,
312101367c2SThomas Graf 	.rt6i_metric	= ~(u32) 0,
313101367c2SThomas Graf 	.rt6i_ref	= ATOMIC_INIT(1),
314101367c2SThomas Graf };
315101367c2SThomas Graf 
316101367c2SThomas Graf #endif
317101367c2SThomas Graf 
3181da177e4SLinus Torvalds /* allocate dst with ip6_dst_ops */
319d52d3997SMartin KaFai Lau static struct rt6_info *__ip6_dst_alloc(struct net *net,
320957c665fSDavid S. Miller 					struct net_device *dev,
321ad706862SMartin KaFai Lau 					int flags)
3221da177e4SLinus Torvalds {
32397bab73fSDavid S. Miller 	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
3246f3118b5SNicolas Dichtel 					0, DST_OBSOLETE_FORCE_CHK, flags);
325cf911662SDavid S. Miller 
32697bab73fSDavid S. Miller 	if (rt) {
3278104891bSSteffen Klassert 		struct dst_entry *dst = &rt->dst;
3288104891bSSteffen Klassert 
3298104891bSSteffen Klassert 		memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
33051ebd318SNicolas Dichtel 		INIT_LIST_HEAD(&rt->rt6i_siblings);
3318d0b94afSMartin KaFai Lau 		INIT_LIST_HEAD(&rt->rt6i_uncached);
33297bab73fSDavid S. Miller 	}
333cf911662SDavid S. Miller 	return rt;
3341da177e4SLinus Torvalds }
3351da177e4SLinus Torvalds 
336d52d3997SMartin KaFai Lau static struct rt6_info *ip6_dst_alloc(struct net *net,
337d52d3997SMartin KaFai Lau 				      struct net_device *dev,
338ad706862SMartin KaFai Lau 				      int flags)
339d52d3997SMartin KaFai Lau {
340ad706862SMartin KaFai Lau 	struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
341d52d3997SMartin KaFai Lau 
342d52d3997SMartin KaFai Lau 	if (rt) {
343d52d3997SMartin KaFai Lau 		rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
344d52d3997SMartin KaFai Lau 		if (rt->rt6i_pcpu) {
345d52d3997SMartin KaFai Lau 			int cpu;
346d52d3997SMartin KaFai Lau 
347d52d3997SMartin KaFai Lau 			for_each_possible_cpu(cpu) {
348d52d3997SMartin KaFai Lau 				struct rt6_info **p;
349d52d3997SMartin KaFai Lau 
350d52d3997SMartin KaFai Lau 				p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
351d52d3997SMartin KaFai Lau 				/* no one shares rt */
352d52d3997SMartin KaFai Lau 				*p =  NULL;
353d52d3997SMartin KaFai Lau 			}
354d52d3997SMartin KaFai Lau 		} else {
355d52d3997SMartin KaFai Lau 			dst_destroy((struct dst_entry *)rt);
356d52d3997SMartin KaFai Lau 			return NULL;
357d52d3997SMartin KaFai Lau 		}
358d52d3997SMartin KaFai Lau 	}
359d52d3997SMartin KaFai Lau 
360d52d3997SMartin KaFai Lau 	return rt;
361d52d3997SMartin KaFai Lau }
362d52d3997SMartin KaFai Lau 
3631da177e4SLinus Torvalds static void ip6_dst_destroy(struct dst_entry *dst)
3641da177e4SLinus Torvalds {
3651da177e4SLinus Torvalds 	struct rt6_info *rt = (struct rt6_info *)dst;
366ecd98837SYOSHIFUJI Hideaki / 吉藤英明 	struct dst_entry *from = dst->from;
3678d0b94afSMartin KaFai Lau 	struct inet6_dev *idev;
3681da177e4SLinus Torvalds 
3698e2ec639SYan, Zheng 	dst_destroy_metrics_generic(dst);
370d52d3997SMartin KaFai Lau 	free_percpu(rt->rt6i_pcpu);
3718d0b94afSMartin KaFai Lau 	rt6_uncached_list_del(rt);
3728d0b94afSMartin KaFai Lau 
3738d0b94afSMartin KaFai Lau 	idev = rt->rt6i_idev;
37438308473SDavid S. Miller 	if (idev) {
3751da177e4SLinus Torvalds 		rt->rt6i_idev = NULL;
3761da177e4SLinus Torvalds 		in6_dev_put(idev);
3771da177e4SLinus Torvalds 	}
3781716a961SGao feng 
379ecd98837SYOSHIFUJI Hideaki / 吉藤英明 	dst->from = NULL;
380ecd98837SYOSHIFUJI Hideaki / 吉藤英明 	dst_release(from);
381b3419363SDavid S. Miller }
382b3419363SDavid S. Miller 
3831da177e4SLinus Torvalds static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
3841da177e4SLinus Torvalds 			   int how)
3851da177e4SLinus Torvalds {
3861da177e4SLinus Torvalds 	struct rt6_info *rt = (struct rt6_info *)dst;
3871da177e4SLinus Torvalds 	struct inet6_dev *idev = rt->rt6i_idev;
3885a3e55d6SDenis V. Lunev 	struct net_device *loopback_dev =
389c346dca1SYOSHIFUJI Hideaki 		dev_net(dev)->loopback_dev;
3901da177e4SLinus Torvalds 
39197cac082SDavid S. Miller 	if (dev != loopback_dev) {
39297cac082SDavid S. Miller 		if (idev && idev->dev == dev) {
3935a3e55d6SDenis V. Lunev 			struct inet6_dev *loopback_idev =
3945a3e55d6SDenis V. Lunev 				in6_dev_get(loopback_dev);
39538308473SDavid S. Miller 			if (loopback_idev) {
3961da177e4SLinus Torvalds 				rt->rt6i_idev = loopback_idev;
3971da177e4SLinus Torvalds 				in6_dev_put(idev);
3981da177e4SLinus Torvalds 			}
3991da177e4SLinus Torvalds 		}
40097cac082SDavid S. Miller 	}
4011da177e4SLinus Torvalds }
4021da177e4SLinus Torvalds 
403a50feda5SEric Dumazet static bool rt6_check_expired(const struct rt6_info *rt)
4041da177e4SLinus Torvalds {
4051716a961SGao feng 	if (rt->rt6i_flags & RTF_EXPIRES) {
4061716a961SGao feng 		if (time_after(jiffies, rt->dst.expires))
407a50feda5SEric Dumazet 			return true;
4081716a961SGao feng 	} else if (rt->dst.from) {
4093fd91fb3SLi RongQing 		return rt6_check_expired((struct rt6_info *) rt->dst.from);
4101716a961SGao feng 	}
411a50feda5SEric Dumazet 	return false;
4121da177e4SLinus Torvalds }
4131da177e4SLinus Torvalds 
41451ebd318SNicolas Dichtel /* Multipath route selection:
41551ebd318SNicolas Dichtel  *   Hash based function using packet header and flowlabel.
41651ebd318SNicolas Dichtel  * Adapted from fib_info_hashfn()
41751ebd318SNicolas Dichtel  */
41851ebd318SNicolas Dichtel static int rt6_info_hash_nhsfn(unsigned int candidate_count,
41951ebd318SNicolas Dichtel 			       const struct flowi6 *fl6)
42051ebd318SNicolas Dichtel {
42151ebd318SNicolas Dichtel 	unsigned int val = fl6->flowi6_proto;
42251ebd318SNicolas Dichtel 
423c08977bbSYOSHIFUJI Hideaki / 吉藤英明 	val ^= ipv6_addr_hash(&fl6->daddr);
424c08977bbSYOSHIFUJI Hideaki / 吉藤英明 	val ^= ipv6_addr_hash(&fl6->saddr);
42551ebd318SNicolas Dichtel 
42651ebd318SNicolas Dichtel 	/* Work only if this not encapsulated */
42751ebd318SNicolas Dichtel 	switch (fl6->flowi6_proto) {
42851ebd318SNicolas Dichtel 	case IPPROTO_UDP:
42951ebd318SNicolas Dichtel 	case IPPROTO_TCP:
43051ebd318SNicolas Dichtel 	case IPPROTO_SCTP:
431b3ce5ae1SNicolas Dichtel 		val ^= (__force u16)fl6->fl6_sport;
432b3ce5ae1SNicolas Dichtel 		val ^= (__force u16)fl6->fl6_dport;
43351ebd318SNicolas Dichtel 		break;
43451ebd318SNicolas Dichtel 
43551ebd318SNicolas Dichtel 	case IPPROTO_ICMPV6:
436b3ce5ae1SNicolas Dichtel 		val ^= (__force u16)fl6->fl6_icmp_type;
437b3ce5ae1SNicolas Dichtel 		val ^= (__force u16)fl6->fl6_icmp_code;
43851ebd318SNicolas Dichtel 		break;
43951ebd318SNicolas Dichtel 	}
44051ebd318SNicolas Dichtel 	/* RFC6438 recommands to use flowlabel */
441b3ce5ae1SNicolas Dichtel 	val ^= (__force u32)fl6->flowlabel;
44251ebd318SNicolas Dichtel 
44351ebd318SNicolas Dichtel 	/* Perhaps, we need to tune, this function? */
44451ebd318SNicolas Dichtel 	val = val ^ (val >> 7) ^ (val >> 12);
44551ebd318SNicolas Dichtel 	return val % candidate_count;
44651ebd318SNicolas Dichtel }
44751ebd318SNicolas Dichtel 
44851ebd318SNicolas Dichtel static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
44952bd4c0cSNicolas Dichtel 					     struct flowi6 *fl6, int oif,
45052bd4c0cSNicolas Dichtel 					     int strict)
45151ebd318SNicolas Dichtel {
45251ebd318SNicolas Dichtel 	struct rt6_info *sibling, *next_sibling;
45351ebd318SNicolas Dichtel 	int route_choosen;
45451ebd318SNicolas Dichtel 
45551ebd318SNicolas Dichtel 	route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
45651ebd318SNicolas Dichtel 	/* Don't change the route, if route_choosen == 0
45751ebd318SNicolas Dichtel 	 * (siblings does not include ourself)
45851ebd318SNicolas Dichtel 	 */
45951ebd318SNicolas Dichtel 	if (route_choosen)
46051ebd318SNicolas Dichtel 		list_for_each_entry_safe(sibling, next_sibling,
46151ebd318SNicolas Dichtel 				&match->rt6i_siblings, rt6i_siblings) {
46251ebd318SNicolas Dichtel 			route_choosen--;
46351ebd318SNicolas Dichtel 			if (route_choosen == 0) {
46452bd4c0cSNicolas Dichtel 				if (rt6_score_route(sibling, oif, strict) < 0)
46552bd4c0cSNicolas Dichtel 					break;
46651ebd318SNicolas Dichtel 				match = sibling;
46751ebd318SNicolas Dichtel 				break;
46851ebd318SNicolas Dichtel 			}
46951ebd318SNicolas Dichtel 		}
47051ebd318SNicolas Dichtel 	return match;
47151ebd318SNicolas Dichtel }
47251ebd318SNicolas Dichtel 
4731da177e4SLinus Torvalds /*
474c71099acSThomas Graf  *	Route lookup. Any table->tb6_lock is implied.
4751da177e4SLinus Torvalds  */
4761da177e4SLinus Torvalds 
4778ed67789SDaniel Lezcano static inline struct rt6_info *rt6_device_match(struct net *net,
4788ed67789SDaniel Lezcano 						    struct rt6_info *rt,
479b71d1d42SEric Dumazet 						    const struct in6_addr *saddr,
4801da177e4SLinus Torvalds 						    int oif,
481d420895eSYOSHIFUJI Hideaki 						    int flags)
4821da177e4SLinus Torvalds {
4831da177e4SLinus Torvalds 	struct rt6_info *local = NULL;
4841da177e4SLinus Torvalds 	struct rt6_info *sprt;
4851da177e4SLinus Torvalds 
486dd3abc4eSYOSHIFUJI Hideaki 	if (!oif && ipv6_addr_any(saddr))
487dd3abc4eSYOSHIFUJI Hideaki 		goto out;
488dd3abc4eSYOSHIFUJI Hideaki 
489d8d1f30bSChangli Gao 	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
490d1918542SDavid S. Miller 		struct net_device *dev = sprt->dst.dev;
491dd3abc4eSYOSHIFUJI Hideaki 
492dd3abc4eSYOSHIFUJI Hideaki 		if (oif) {
4931da177e4SLinus Torvalds 			if (dev->ifindex == oif)
4941da177e4SLinus Torvalds 				return sprt;
4951da177e4SLinus Torvalds 			if (dev->flags & IFF_LOOPBACK) {
49638308473SDavid S. Miller 				if (!sprt->rt6i_idev ||
4971da177e4SLinus Torvalds 				    sprt->rt6i_idev->dev->ifindex != oif) {
498d420895eSYOSHIFUJI Hideaki 					if (flags & RT6_LOOKUP_F_IFACE && oif)
4991da177e4SLinus Torvalds 						continue;
5001da177e4SLinus Torvalds 					if (local && (!oif ||
5011da177e4SLinus Torvalds 						      local->rt6i_idev->dev->ifindex == oif))
5021da177e4SLinus Torvalds 						continue;
5031da177e4SLinus Torvalds 				}
5041da177e4SLinus Torvalds 				local = sprt;
5051da177e4SLinus Torvalds 			}
506dd3abc4eSYOSHIFUJI Hideaki 		} else {
507dd3abc4eSYOSHIFUJI Hideaki 			if (ipv6_chk_addr(net, saddr, dev,
508dd3abc4eSYOSHIFUJI Hideaki 					  flags & RT6_LOOKUP_F_IFACE))
509dd3abc4eSYOSHIFUJI Hideaki 				return sprt;
510dd3abc4eSYOSHIFUJI Hideaki 		}
5111da177e4SLinus Torvalds 	}
5121da177e4SLinus Torvalds 
513dd3abc4eSYOSHIFUJI Hideaki 	if (oif) {
5141da177e4SLinus Torvalds 		if (local)
5151da177e4SLinus Torvalds 			return local;
5161da177e4SLinus Torvalds 
517d420895eSYOSHIFUJI Hideaki 		if (flags & RT6_LOOKUP_F_IFACE)
5188ed67789SDaniel Lezcano 			return net->ipv6.ip6_null_entry;
5191da177e4SLinus Torvalds 	}
520dd3abc4eSYOSHIFUJI Hideaki out:
5211da177e4SLinus Torvalds 	return rt;
5221da177e4SLinus Torvalds }
5231da177e4SLinus Torvalds 
52427097255SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTER_PREF
525c2f17e82SHannes Frederic Sowa struct __rt6_probe_work {
526c2f17e82SHannes Frederic Sowa 	struct work_struct work;
527c2f17e82SHannes Frederic Sowa 	struct in6_addr target;
528c2f17e82SHannes Frederic Sowa 	struct net_device *dev;
529c2f17e82SHannes Frederic Sowa };
530c2f17e82SHannes Frederic Sowa 
531c2f17e82SHannes Frederic Sowa static void rt6_probe_deferred(struct work_struct *w)
532c2f17e82SHannes Frederic Sowa {
533c2f17e82SHannes Frederic Sowa 	struct in6_addr mcaddr;
534c2f17e82SHannes Frederic Sowa 	struct __rt6_probe_work *work =
535c2f17e82SHannes Frederic Sowa 		container_of(w, struct __rt6_probe_work, work);
536c2f17e82SHannes Frederic Sowa 
537c2f17e82SHannes Frederic Sowa 	addrconf_addr_solict_mult(&work->target, &mcaddr);
538c2f17e82SHannes Frederic Sowa 	ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL);
539c2f17e82SHannes Frederic Sowa 	dev_put(work->dev);
540662f5533SMichael Büsch 	kfree(work);
541c2f17e82SHannes Frederic Sowa }
542c2f17e82SHannes Frederic Sowa 
54327097255SYOSHIFUJI Hideaki static void rt6_probe(struct rt6_info *rt)
54427097255SYOSHIFUJI Hideaki {
545f2c31e32SEric Dumazet 	struct neighbour *neigh;
54627097255SYOSHIFUJI Hideaki 	/*
54727097255SYOSHIFUJI Hideaki 	 * Okay, this does not seem to be appropriate
54827097255SYOSHIFUJI Hideaki 	 * for now, however, we need to check if it
54927097255SYOSHIFUJI Hideaki 	 * is really so; aka Router Reachability Probing.
55027097255SYOSHIFUJI Hideaki 	 *
55127097255SYOSHIFUJI Hideaki 	 * Router Reachability Probe MUST be rate-limited
55227097255SYOSHIFUJI Hideaki 	 * to no more than one per minute.
55327097255SYOSHIFUJI Hideaki 	 */
5542152caeaSYOSHIFUJI Hideaki / 吉藤英明 	if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
555fdd6681dSAmerigo Wang 		return;
5562152caeaSYOSHIFUJI Hideaki / 吉藤英明 	rcu_read_lock_bh();
5572152caeaSYOSHIFUJI Hideaki / 吉藤英明 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
5582152caeaSYOSHIFUJI Hideaki / 吉藤英明 	if (neigh) {
5592152caeaSYOSHIFUJI Hideaki / 吉藤英明 		write_lock(&neigh->lock);
5602152caeaSYOSHIFUJI Hideaki / 吉藤英明 		if (neigh->nud_state & NUD_VALID)
5612152caeaSYOSHIFUJI Hideaki / 吉藤英明 			goto out;
5627ff74a59SYOSHIFUJI Hideaki / 吉藤英明 	}
5632152caeaSYOSHIFUJI Hideaki / 吉藤英明 
5642152caeaSYOSHIFUJI Hideaki / 吉藤英明 	if (!neigh ||
56552e16356SYOSHIFUJI Hideaki 	    time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
566c2f17e82SHannes Frederic Sowa 		struct __rt6_probe_work *work;
56727097255SYOSHIFUJI Hideaki 
568c2f17e82SHannes Frederic Sowa 		work = kmalloc(sizeof(*work), GFP_ATOMIC);
569c2f17e82SHannes Frederic Sowa 
570c2f17e82SHannes Frederic Sowa 		if (neigh && work)
5717e980569SJiri Benc 			__neigh_set_probe_once(neigh);
5722152caeaSYOSHIFUJI Hideaki / 吉藤英明 
573c2f17e82SHannes Frederic Sowa 		if (neigh)
574c2f17e82SHannes Frederic Sowa 			write_unlock(&neigh->lock);
575c2f17e82SHannes Frederic Sowa 
576c2f17e82SHannes Frederic Sowa 		if (work) {
577c2f17e82SHannes Frederic Sowa 			INIT_WORK(&work->work, rt6_probe_deferred);
578c2f17e82SHannes Frederic Sowa 			work->target = rt->rt6i_gateway;
579c2f17e82SHannes Frederic Sowa 			dev_hold(rt->dst.dev);
580c2f17e82SHannes Frederic Sowa 			work->dev = rt->dst.dev;
581c2f17e82SHannes Frederic Sowa 			schedule_work(&work->work);
582c2f17e82SHannes Frederic Sowa 		}
583f2c31e32SEric Dumazet 	} else {
5842152caeaSYOSHIFUJI Hideaki / 吉藤英明 out:
5852152caeaSYOSHIFUJI Hideaki / 吉藤英明 		write_unlock(&neigh->lock);
58627097255SYOSHIFUJI Hideaki 	}
5872152caeaSYOSHIFUJI Hideaki / 吉藤英明 	rcu_read_unlock_bh();
588f2c31e32SEric Dumazet }
58927097255SYOSHIFUJI Hideaki #else
59027097255SYOSHIFUJI Hideaki static inline void rt6_probe(struct rt6_info *rt)
59127097255SYOSHIFUJI Hideaki {
59227097255SYOSHIFUJI Hideaki }
59327097255SYOSHIFUJI Hideaki #endif
59427097255SYOSHIFUJI Hideaki 
5951da177e4SLinus Torvalds /*
596554cfb7eSYOSHIFUJI Hideaki  * Default Router Selection (RFC 2461 6.3.6)
5971da177e4SLinus Torvalds  */
598b6f99a21SDave Jones static inline int rt6_check_dev(struct rt6_info *rt, int oif)
5991da177e4SLinus Torvalds {
600d1918542SDavid S. Miller 	struct net_device *dev = rt->dst.dev;
601161980f4SDavid S. Miller 	if (!oif || dev->ifindex == oif)
602554cfb7eSYOSHIFUJI Hideaki 		return 2;
603161980f4SDavid S. Miller 	if ((dev->flags & IFF_LOOPBACK) &&
604161980f4SDavid S. Miller 	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
605161980f4SDavid S. Miller 		return 1;
606554cfb7eSYOSHIFUJI Hideaki 	return 0;
6071da177e4SLinus Torvalds }
6081da177e4SLinus Torvalds 
609afc154e9SHannes Frederic Sowa static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
6101da177e4SLinus Torvalds {
611f2c31e32SEric Dumazet 	struct neighbour *neigh;
612afc154e9SHannes Frederic Sowa 	enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
613f2c31e32SEric Dumazet 
6144d0c5911SYOSHIFUJI Hideaki 	if (rt->rt6i_flags & RTF_NONEXTHOP ||
6154d0c5911SYOSHIFUJI Hideaki 	    !(rt->rt6i_flags & RTF_GATEWAY))
616afc154e9SHannes Frederic Sowa 		return RT6_NUD_SUCCEED;
617145a3621SYOSHIFUJI Hideaki / 吉藤英明 
618145a3621SYOSHIFUJI Hideaki / 吉藤英明 	rcu_read_lock_bh();
619145a3621SYOSHIFUJI Hideaki / 吉藤英明 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
620145a3621SYOSHIFUJI Hideaki / 吉藤英明 	if (neigh) {
621145a3621SYOSHIFUJI Hideaki / 吉藤英明 		read_lock(&neigh->lock);
622554cfb7eSYOSHIFUJI Hideaki 		if (neigh->nud_state & NUD_VALID)
623afc154e9SHannes Frederic Sowa 			ret = RT6_NUD_SUCCEED;
624398bcbebSYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTER_PREF
625a5a81f0bSPaul Marks 		else if (!(neigh->nud_state & NUD_FAILED))
626afc154e9SHannes Frederic Sowa 			ret = RT6_NUD_SUCCEED;
6277e980569SJiri Benc 		else
6287e980569SJiri Benc 			ret = RT6_NUD_FAIL_PROBE;
629398bcbebSYOSHIFUJI Hideaki #endif
630145a3621SYOSHIFUJI Hideaki / 吉藤英明 		read_unlock(&neigh->lock);
631afc154e9SHannes Frederic Sowa 	} else {
632afc154e9SHannes Frederic Sowa 		ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
6337e980569SJiri Benc 		      RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
634a5a81f0bSPaul Marks 	}
635145a3621SYOSHIFUJI Hideaki / 吉藤英明 	rcu_read_unlock_bh();
636145a3621SYOSHIFUJI Hideaki / 吉藤英明 
637a5a81f0bSPaul Marks 	return ret;
6381da177e4SLinus Torvalds }
6391da177e4SLinus Torvalds 
640554cfb7eSYOSHIFUJI Hideaki static int rt6_score_route(struct rt6_info *rt, int oif,
641554cfb7eSYOSHIFUJI Hideaki 			   int strict)
642554cfb7eSYOSHIFUJI Hideaki {
643a5a81f0bSPaul Marks 	int m;
6444d0c5911SYOSHIFUJI Hideaki 
6454d0c5911SYOSHIFUJI Hideaki 	m = rt6_check_dev(rt, oif);
64677d16f45SYOSHIFUJI Hideaki 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
647afc154e9SHannes Frederic Sowa 		return RT6_NUD_FAIL_HARD;
648ebacaaa0SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTER_PREF
649ebacaaa0SYOSHIFUJI Hideaki 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
650ebacaaa0SYOSHIFUJI Hideaki #endif
651afc154e9SHannes Frederic Sowa 	if (strict & RT6_LOOKUP_F_REACHABLE) {
652afc154e9SHannes Frederic Sowa 		int n = rt6_check_neigh(rt);
653afc154e9SHannes Frederic Sowa 		if (n < 0)
654afc154e9SHannes Frederic Sowa 			return n;
655afc154e9SHannes Frederic Sowa 	}
656554cfb7eSYOSHIFUJI Hideaki 	return m;
657554cfb7eSYOSHIFUJI Hideaki }
658554cfb7eSYOSHIFUJI Hideaki 
659f11e6659SDavid S. Miller static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
660afc154e9SHannes Frederic Sowa 				   int *mpri, struct rt6_info *match,
661afc154e9SHannes Frederic Sowa 				   bool *do_rr)
662554cfb7eSYOSHIFUJI Hideaki {
663554cfb7eSYOSHIFUJI Hideaki 	int m;
664afc154e9SHannes Frederic Sowa 	bool match_do_rr = false;
665554cfb7eSYOSHIFUJI Hideaki 
666554cfb7eSYOSHIFUJI Hideaki 	if (rt6_check_expired(rt))
667f11e6659SDavid S. Miller 		goto out;
668554cfb7eSYOSHIFUJI Hideaki 
669554cfb7eSYOSHIFUJI Hideaki 	m = rt6_score_route(rt, oif, strict);
6707e980569SJiri Benc 	if (m == RT6_NUD_FAIL_DO_RR) {
671afc154e9SHannes Frederic Sowa 		match_do_rr = true;
672afc154e9SHannes Frederic Sowa 		m = 0; /* lowest valid score */
6737e980569SJiri Benc 	} else if (m == RT6_NUD_FAIL_HARD) {
674f11e6659SDavid S. Miller 		goto out;
6751da177e4SLinus Torvalds 	}
676f11e6659SDavid S. Miller 
677afc154e9SHannes Frederic Sowa 	if (strict & RT6_LOOKUP_F_REACHABLE)
678afc154e9SHannes Frederic Sowa 		rt6_probe(rt);
679afc154e9SHannes Frederic Sowa 
6807e980569SJiri Benc 	/* note that m can be RT6_NUD_FAIL_PROBE at this point */
681afc154e9SHannes Frederic Sowa 	if (m > *mpri) {
682afc154e9SHannes Frederic Sowa 		*do_rr = match_do_rr;
683afc154e9SHannes Frederic Sowa 		*mpri = m;
684afc154e9SHannes Frederic Sowa 		match = rt;
685afc154e9SHannes Frederic Sowa 	}
686f11e6659SDavid S. Miller out:
687f11e6659SDavid S. Miller 	return match;
6881da177e4SLinus Torvalds }
6891da177e4SLinus Torvalds 
690f11e6659SDavid S. Miller static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
691f11e6659SDavid S. Miller 				     struct rt6_info *rr_head,
692afc154e9SHannes Frederic Sowa 				     u32 metric, int oif, int strict,
693afc154e9SHannes Frederic Sowa 				     bool *do_rr)
694f11e6659SDavid S. Miller {
6959fbdcfafSSteffen Klassert 	struct rt6_info *rt, *match, *cont;
696f11e6659SDavid S. Miller 	int mpri = -1;
697f11e6659SDavid S. Miller 
698f11e6659SDavid S. Miller 	match = NULL;
6999fbdcfafSSteffen Klassert 	cont = NULL;
7009fbdcfafSSteffen Klassert 	for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
7019fbdcfafSSteffen Klassert 		if (rt->rt6i_metric != metric) {
7029fbdcfafSSteffen Klassert 			cont = rt;
7039fbdcfafSSteffen Klassert 			break;
7049fbdcfafSSteffen Klassert 		}
7059fbdcfafSSteffen Klassert 
706afc154e9SHannes Frederic Sowa 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
7079fbdcfafSSteffen Klassert 	}
7089fbdcfafSSteffen Klassert 
7099fbdcfafSSteffen Klassert 	for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
7109fbdcfafSSteffen Klassert 		if (rt->rt6i_metric != metric) {
7119fbdcfafSSteffen Klassert 			cont = rt;
7129fbdcfafSSteffen Klassert 			break;
7139fbdcfafSSteffen Klassert 		}
7149fbdcfafSSteffen Klassert 
7159fbdcfafSSteffen Klassert 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
7169fbdcfafSSteffen Klassert 	}
7179fbdcfafSSteffen Klassert 
7189fbdcfafSSteffen Klassert 	if (match || !cont)
7199fbdcfafSSteffen Klassert 		return match;
7209fbdcfafSSteffen Klassert 
7219fbdcfafSSteffen Klassert 	for (rt = cont; rt; rt = rt->dst.rt6_next)
722afc154e9SHannes Frederic Sowa 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
723f11e6659SDavid S. Miller 
724f11e6659SDavid S. Miller 	return match;
725f11e6659SDavid S. Miller }
726f11e6659SDavid S. Miller 
727f11e6659SDavid S. Miller static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
728f11e6659SDavid S. Miller {
729f11e6659SDavid S. Miller 	struct rt6_info *match, *rt0;
7308ed67789SDaniel Lezcano 	struct net *net;
731afc154e9SHannes Frederic Sowa 	bool do_rr = false;
732f11e6659SDavid S. Miller 
733f11e6659SDavid S. Miller 	rt0 = fn->rr_ptr;
734f11e6659SDavid S. Miller 	if (!rt0)
735f11e6659SDavid S. Miller 		fn->rr_ptr = rt0 = fn->leaf;
736f11e6659SDavid S. Miller 
737afc154e9SHannes Frederic Sowa 	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
738afc154e9SHannes Frederic Sowa 			     &do_rr);
739f11e6659SDavid S. Miller 
740afc154e9SHannes Frederic Sowa 	if (do_rr) {
741d8d1f30bSChangli Gao 		struct rt6_info *next = rt0->dst.rt6_next;
742f11e6659SDavid S. Miller 
743554cfb7eSYOSHIFUJI Hideaki 		/* no entries matched; do round-robin */
744f11e6659SDavid S. Miller 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
745f11e6659SDavid S. Miller 			next = fn->leaf;
746f11e6659SDavid S. Miller 
747f11e6659SDavid S. Miller 		if (next != rt0)
748f11e6659SDavid S. Miller 			fn->rr_ptr = next;
749554cfb7eSYOSHIFUJI Hideaki 	}
750554cfb7eSYOSHIFUJI Hideaki 
751d1918542SDavid S. Miller 	net = dev_net(rt0->dst.dev);
752a02cec21SEric Dumazet 	return match ? match : net->ipv6.ip6_null_entry;
7531da177e4SLinus Torvalds }
7541da177e4SLinus Torvalds 
7558b9df265SMartin KaFai Lau static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
7568b9df265SMartin KaFai Lau {
7578b9df265SMartin KaFai Lau 	return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
7588b9df265SMartin KaFai Lau }
7598b9df265SMartin KaFai Lau 
76070ceb4f5SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTE_INFO
76170ceb4f5SYOSHIFUJI Hideaki int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
762b71d1d42SEric Dumazet 		  const struct in6_addr *gwaddr)
76370ceb4f5SYOSHIFUJI Hideaki {
764c346dca1SYOSHIFUJI Hideaki 	struct net *net = dev_net(dev);
76570ceb4f5SYOSHIFUJI Hideaki 	struct route_info *rinfo = (struct route_info *) opt;
76670ceb4f5SYOSHIFUJI Hideaki 	struct in6_addr prefix_buf, *prefix;
76770ceb4f5SYOSHIFUJI Hideaki 	unsigned int pref;
7684bed72e4SYOSHIFUJI Hideaki 	unsigned long lifetime;
76970ceb4f5SYOSHIFUJI Hideaki 	struct rt6_info *rt;
77070ceb4f5SYOSHIFUJI Hideaki 
77170ceb4f5SYOSHIFUJI Hideaki 	if (len < sizeof(struct route_info)) {
77270ceb4f5SYOSHIFUJI Hideaki 		return -EINVAL;
77370ceb4f5SYOSHIFUJI Hideaki 	}
77470ceb4f5SYOSHIFUJI Hideaki 
77570ceb4f5SYOSHIFUJI Hideaki 	/* Sanity check for prefix_len and length */
77670ceb4f5SYOSHIFUJI Hideaki 	if (rinfo->length > 3) {
77770ceb4f5SYOSHIFUJI Hideaki 		return -EINVAL;
77870ceb4f5SYOSHIFUJI Hideaki 	} else if (rinfo->prefix_len > 128) {
77970ceb4f5SYOSHIFUJI Hideaki 		return -EINVAL;
78070ceb4f5SYOSHIFUJI Hideaki 	} else if (rinfo->prefix_len > 64) {
78170ceb4f5SYOSHIFUJI Hideaki 		if (rinfo->length < 2) {
78270ceb4f5SYOSHIFUJI Hideaki 			return -EINVAL;
78370ceb4f5SYOSHIFUJI Hideaki 		}
78470ceb4f5SYOSHIFUJI Hideaki 	} else if (rinfo->prefix_len > 0) {
78570ceb4f5SYOSHIFUJI Hideaki 		if (rinfo->length < 1) {
78670ceb4f5SYOSHIFUJI Hideaki 			return -EINVAL;
78770ceb4f5SYOSHIFUJI Hideaki 		}
78870ceb4f5SYOSHIFUJI Hideaki 	}
78970ceb4f5SYOSHIFUJI Hideaki 
79070ceb4f5SYOSHIFUJI Hideaki 	pref = rinfo->route_pref;
79170ceb4f5SYOSHIFUJI Hideaki 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
7923933fc95SJens Rosenboom 		return -EINVAL;
79370ceb4f5SYOSHIFUJI Hideaki 
7944bed72e4SYOSHIFUJI Hideaki 	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
79570ceb4f5SYOSHIFUJI Hideaki 
79670ceb4f5SYOSHIFUJI Hideaki 	if (rinfo->length == 3)
79770ceb4f5SYOSHIFUJI Hideaki 		prefix = (struct in6_addr *)rinfo->prefix;
79870ceb4f5SYOSHIFUJI Hideaki 	else {
79970ceb4f5SYOSHIFUJI Hideaki 		/* this function is safe */
80070ceb4f5SYOSHIFUJI Hideaki 		ipv6_addr_prefix(&prefix_buf,
80170ceb4f5SYOSHIFUJI Hideaki 				 (struct in6_addr *)rinfo->prefix,
80270ceb4f5SYOSHIFUJI Hideaki 				 rinfo->prefix_len);
80370ceb4f5SYOSHIFUJI Hideaki 		prefix = &prefix_buf;
80470ceb4f5SYOSHIFUJI Hideaki 	}
80570ceb4f5SYOSHIFUJI Hideaki 
806f104a567SDuan Jiong 	if (rinfo->prefix_len == 0)
807f104a567SDuan Jiong 		rt = rt6_get_dflt_router(gwaddr, dev);
808f104a567SDuan Jiong 	else
809f104a567SDuan Jiong 		rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
810f104a567SDuan Jiong 					gwaddr, dev->ifindex);
81170ceb4f5SYOSHIFUJI Hideaki 
81270ceb4f5SYOSHIFUJI Hideaki 	if (rt && !lifetime) {
813e0a1ad73SThomas Graf 		ip6_del_rt(rt);
81470ceb4f5SYOSHIFUJI Hideaki 		rt = NULL;
81570ceb4f5SYOSHIFUJI Hideaki 	}
81670ceb4f5SYOSHIFUJI Hideaki 
81770ceb4f5SYOSHIFUJI Hideaki 	if (!rt && lifetime)
818efa2cea0SDaniel Lezcano 		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
81970ceb4f5SYOSHIFUJI Hideaki 					pref);
82070ceb4f5SYOSHIFUJI Hideaki 	else if (rt)
82170ceb4f5SYOSHIFUJI Hideaki 		rt->rt6i_flags = RTF_ROUTEINFO |
82270ceb4f5SYOSHIFUJI Hideaki 				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
82370ceb4f5SYOSHIFUJI Hideaki 
82470ceb4f5SYOSHIFUJI Hideaki 	if (rt) {
8251716a961SGao feng 		if (!addrconf_finite_timeout(lifetime))
8261716a961SGao feng 			rt6_clean_expires(rt);
8271716a961SGao feng 		else
8281716a961SGao feng 			rt6_set_expires(rt, jiffies + HZ * lifetime);
8291716a961SGao feng 
83094e187c0SAmerigo Wang 		ip6_rt_put(rt);
83170ceb4f5SYOSHIFUJI Hideaki 	}
83270ceb4f5SYOSHIFUJI Hideaki 	return 0;
83370ceb4f5SYOSHIFUJI Hideaki }
83470ceb4f5SYOSHIFUJI Hideaki #endif
83570ceb4f5SYOSHIFUJI Hideaki 
836a3c00e46SMartin KaFai Lau static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
837a3c00e46SMartin KaFai Lau 					struct in6_addr *saddr)
838a3c00e46SMartin KaFai Lau {
839a3c00e46SMartin KaFai Lau 	struct fib6_node *pn;
840a3c00e46SMartin KaFai Lau 	while (1) {
841a3c00e46SMartin KaFai Lau 		if (fn->fn_flags & RTN_TL_ROOT)
842a3c00e46SMartin KaFai Lau 			return NULL;
843a3c00e46SMartin KaFai Lau 		pn = fn->parent;
844a3c00e46SMartin KaFai Lau 		if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
845a3c00e46SMartin KaFai Lau 			fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
846a3c00e46SMartin KaFai Lau 		else
847a3c00e46SMartin KaFai Lau 			fn = pn;
848a3c00e46SMartin KaFai Lau 		if (fn->fn_flags & RTN_RTINFO)
849a3c00e46SMartin KaFai Lau 			return fn;
850a3c00e46SMartin KaFai Lau 	}
851a3c00e46SMartin KaFai Lau }
852c71099acSThomas Graf 
8538ed67789SDaniel Lezcano static struct rt6_info *ip6_pol_route_lookup(struct net *net,
8548ed67789SDaniel Lezcano 					     struct fib6_table *table,
8554c9483b2SDavid S. Miller 					     struct flowi6 *fl6, int flags)
8561da177e4SLinus Torvalds {
8571da177e4SLinus Torvalds 	struct fib6_node *fn;
8581da177e4SLinus Torvalds 	struct rt6_info *rt;
8591da177e4SLinus Torvalds 
860c71099acSThomas Graf 	read_lock_bh(&table->tb6_lock);
8614c9483b2SDavid S. Miller 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
862c71099acSThomas Graf restart:
863c71099acSThomas Graf 	rt = fn->leaf;
8644c9483b2SDavid S. Miller 	rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
86551ebd318SNicolas Dichtel 	if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
86652bd4c0cSNicolas Dichtel 		rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
867a3c00e46SMartin KaFai Lau 	if (rt == net->ipv6.ip6_null_entry) {
868a3c00e46SMartin KaFai Lau 		fn = fib6_backtrack(fn, &fl6->saddr);
869a3c00e46SMartin KaFai Lau 		if (fn)
870a3c00e46SMartin KaFai Lau 			goto restart;
871a3c00e46SMartin KaFai Lau 	}
872d8d1f30bSChangli Gao 	dst_use(&rt->dst, jiffies);
873c71099acSThomas Graf 	read_unlock_bh(&table->tb6_lock);
8741da177e4SLinus Torvalds 	return rt;
875c71099acSThomas Graf 
876c71099acSThomas Graf }
877c71099acSThomas Graf 
878ea6e574eSFlorian Westphal struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
879ea6e574eSFlorian Westphal 				    int flags)
880ea6e574eSFlorian Westphal {
881ea6e574eSFlorian Westphal 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
882ea6e574eSFlorian Westphal }
883ea6e574eSFlorian Westphal EXPORT_SYMBOL_GPL(ip6_route_lookup);
884ea6e574eSFlorian Westphal 
8859acd9f3aSYOSHIFUJI Hideaki struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
8869acd9f3aSYOSHIFUJI Hideaki 			    const struct in6_addr *saddr, int oif, int strict)
887c71099acSThomas Graf {
8884c9483b2SDavid S. Miller 	struct flowi6 fl6 = {
8894c9483b2SDavid S. Miller 		.flowi6_oif = oif,
8904c9483b2SDavid S. Miller 		.daddr = *daddr,
891c71099acSThomas Graf 	};
892c71099acSThomas Graf 	struct dst_entry *dst;
89377d16f45SYOSHIFUJI Hideaki 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
894c71099acSThomas Graf 
895adaa70bbSThomas Graf 	if (saddr) {
8964c9483b2SDavid S. Miller 		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
897adaa70bbSThomas Graf 		flags |= RT6_LOOKUP_F_HAS_SADDR;
898adaa70bbSThomas Graf 	}
899adaa70bbSThomas Graf 
9004c9483b2SDavid S. Miller 	dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
901c71099acSThomas Graf 	if (dst->error == 0)
902c71099acSThomas Graf 		return (struct rt6_info *) dst;
903c71099acSThomas Graf 
904c71099acSThomas Graf 	dst_release(dst);
905c71099acSThomas Graf 
9061da177e4SLinus Torvalds 	return NULL;
9071da177e4SLinus Torvalds }
9087159039aSYOSHIFUJI Hideaki EXPORT_SYMBOL(rt6_lookup);
9097159039aSYOSHIFUJI Hideaki 
910c71099acSThomas Graf /* ip6_ins_rt is called with FREE table->tb6_lock.
9111da177e4SLinus Torvalds    It takes new route entry, the addition fails by any reason the
9121da177e4SLinus Torvalds    route is freed. In any case, if caller does not hold it, it may
9131da177e4SLinus Torvalds    be destroyed.
9141da177e4SLinus Torvalds  */
9151da177e4SLinus Torvalds 
916e5fd387aSMichal Kubeček static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
917e715b6d3SFlorian Westphal 			struct mx6_config *mxc)
9181da177e4SLinus Torvalds {
9191da177e4SLinus Torvalds 	int err;
920c71099acSThomas Graf 	struct fib6_table *table;
9211da177e4SLinus Torvalds 
922c71099acSThomas Graf 	table = rt->rt6i_table;
923c71099acSThomas Graf 	write_lock_bh(&table->tb6_lock);
924e715b6d3SFlorian Westphal 	err = fib6_add(&table->tb6_root, rt, info, mxc);
925c71099acSThomas Graf 	write_unlock_bh(&table->tb6_lock);
9261da177e4SLinus Torvalds 
9271da177e4SLinus Torvalds 	return err;
9281da177e4SLinus Torvalds }
9291da177e4SLinus Torvalds 
93040e22e8fSThomas Graf int ip6_ins_rt(struct rt6_info *rt)
93140e22e8fSThomas Graf {
932e715b6d3SFlorian Westphal 	struct nl_info info = {	.nl_net = dev_net(rt->dst.dev), };
933e715b6d3SFlorian Westphal 	struct mx6_config mxc = { .mx = NULL, };
934e715b6d3SFlorian Westphal 
935e715b6d3SFlorian Westphal 	return __ip6_ins_rt(rt, &info, &mxc);
93640e22e8fSThomas Graf }
93740e22e8fSThomas Graf 
9388b9df265SMartin KaFai Lau static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
93921efcfa0SEric Dumazet 					   const struct in6_addr *daddr,
940b71d1d42SEric Dumazet 					   const struct in6_addr *saddr)
9411da177e4SLinus Torvalds {
9421da177e4SLinus Torvalds 	struct rt6_info *rt;
9431da177e4SLinus Torvalds 
9441da177e4SLinus Torvalds 	/*
9451da177e4SLinus Torvalds 	 *	Clone the route.
9461da177e4SLinus Torvalds 	 */
9471da177e4SLinus Torvalds 
948d52d3997SMartin KaFai Lau 	if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
94983a09abdSMartin KaFai Lau 		ort = (struct rt6_info *)ort->dst.from;
9501da177e4SLinus Torvalds 
951ad706862SMartin KaFai Lau 	rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0);
95283a09abdSMartin KaFai Lau 
95383a09abdSMartin KaFai Lau 	if (!rt)
95483a09abdSMartin KaFai Lau 		return NULL;
95583a09abdSMartin KaFai Lau 
95683a09abdSMartin KaFai Lau 	ip6_rt_copy_init(rt, ort);
9578b9df265SMartin KaFai Lau 	rt->rt6i_flags |= RTF_CACHE;
95883a09abdSMartin KaFai Lau 	rt->rt6i_metric = 0;
95983a09abdSMartin KaFai Lau 	rt->dst.flags |= DST_HOST;
96083a09abdSMartin KaFai Lau 	rt->rt6i_dst.addr = *daddr;
96183a09abdSMartin KaFai Lau 	rt->rt6i_dst.plen = 128;
9628b9df265SMartin KaFai Lau 
9638b9df265SMartin KaFai Lau 	if (!rt6_is_gw_or_nonexthop(ort)) {
964bb3c3686SDavid S. Miller 		if (ort->rt6i_dst.plen != 128 &&
96521efcfa0SEric Dumazet 		    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
96658c4fb86SYOSHIFUJI Hideaki 			rt->rt6i_flags |= RTF_ANYCAST;
9671da177e4SLinus Torvalds #ifdef CONFIG_IPV6_SUBTREES
9681da177e4SLinus Torvalds 		if (rt->rt6i_src.plen && saddr) {
9694e3fd7a0SAlexey Dobriyan 			rt->rt6i_src.addr = *saddr;
9701da177e4SLinus Torvalds 			rt->rt6i_src.plen = 128;
9711da177e4SLinus Torvalds 		}
9721da177e4SLinus Torvalds #endif
97395a9a5baSYOSHIFUJI Hideaki 	}
97495a9a5baSYOSHIFUJI Hideaki 
975299d9939SYOSHIFUJI Hideaki 	return rt;
976299d9939SYOSHIFUJI Hideaki }
977299d9939SYOSHIFUJI Hideaki 
978d52d3997SMartin KaFai Lau static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
979d52d3997SMartin KaFai Lau {
980d52d3997SMartin KaFai Lau 	struct rt6_info *pcpu_rt;
981d52d3997SMartin KaFai Lau 
982d52d3997SMartin KaFai Lau 	pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
983ad706862SMartin KaFai Lau 				  rt->dst.dev, rt->dst.flags);
984d52d3997SMartin KaFai Lau 
985d52d3997SMartin KaFai Lau 	if (!pcpu_rt)
986d52d3997SMartin KaFai Lau 		return NULL;
987d52d3997SMartin KaFai Lau 	ip6_rt_copy_init(pcpu_rt, rt);
988d52d3997SMartin KaFai Lau 	pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
989d52d3997SMartin KaFai Lau 	pcpu_rt->rt6i_flags |= RTF_PCPU;
990d52d3997SMartin KaFai Lau 	return pcpu_rt;
991d52d3997SMartin KaFai Lau }
992d52d3997SMartin KaFai Lau 
993d52d3997SMartin KaFai Lau /* It should be called with read_lock_bh(&tb6_lock) acquired */
994d52d3997SMartin KaFai Lau static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
995d52d3997SMartin KaFai Lau {
996a73e4195SMartin KaFai Lau 	struct rt6_info *pcpu_rt, **p;
997d52d3997SMartin KaFai Lau 
998d52d3997SMartin KaFai Lau 	p = this_cpu_ptr(rt->rt6i_pcpu);
999d52d3997SMartin KaFai Lau 	pcpu_rt = *p;
1000d52d3997SMartin KaFai Lau 
1001a73e4195SMartin KaFai Lau 	if (pcpu_rt) {
1002a73e4195SMartin KaFai Lau 		dst_hold(&pcpu_rt->dst);
1003a73e4195SMartin KaFai Lau 		rt6_dst_from_metrics_check(pcpu_rt);
1004a73e4195SMartin KaFai Lau 	}
1005a73e4195SMartin KaFai Lau 	return pcpu_rt;
1006a73e4195SMartin KaFai Lau }
1007a73e4195SMartin KaFai Lau 
1008a73e4195SMartin KaFai Lau static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
1009a73e4195SMartin KaFai Lau {
1010*9c7370a1SMartin KaFai Lau 	struct fib6_table *table = rt->rt6i_table;
1011a73e4195SMartin KaFai Lau 	struct rt6_info *pcpu_rt, *prev, **p;
1012d52d3997SMartin KaFai Lau 
1013d52d3997SMartin KaFai Lau 	pcpu_rt = ip6_rt_pcpu_alloc(rt);
1014d52d3997SMartin KaFai Lau 	if (!pcpu_rt) {
1015d52d3997SMartin KaFai Lau 		struct net *net = dev_net(rt->dst.dev);
1016d52d3997SMartin KaFai Lau 
1017*9c7370a1SMartin KaFai Lau 		dst_hold(&net->ipv6.ip6_null_entry->dst);
1018*9c7370a1SMartin KaFai Lau 		return net->ipv6.ip6_null_entry;
1019d52d3997SMartin KaFai Lau 	}
1020d52d3997SMartin KaFai Lau 
1021*9c7370a1SMartin KaFai Lau 	read_lock_bh(&table->tb6_lock);
1022*9c7370a1SMartin KaFai Lau 	if (rt->rt6i_pcpu) {
1023a73e4195SMartin KaFai Lau 		p = this_cpu_ptr(rt->rt6i_pcpu);
1024d52d3997SMartin KaFai Lau 		prev = cmpxchg(p, NULL, pcpu_rt);
1025d52d3997SMartin KaFai Lau 		if (prev) {
1026d52d3997SMartin KaFai Lau 			/* If someone did it before us, return prev instead */
1027d52d3997SMartin KaFai Lau 			dst_destroy(&pcpu_rt->dst);
1028d52d3997SMartin KaFai Lau 			pcpu_rt = prev;
1029d52d3997SMartin KaFai Lau 		}
1030*9c7370a1SMartin KaFai Lau 	} else {
1031*9c7370a1SMartin KaFai Lau 		/* rt has been removed from the fib6 tree
1032*9c7370a1SMartin KaFai Lau 		 * before we have a chance to acquire the read_lock.
1033*9c7370a1SMartin KaFai Lau 		 * In this case, don't brother to create a pcpu rt
1034*9c7370a1SMartin KaFai Lau 		 * since rt is going away anyway.  The next
1035*9c7370a1SMartin KaFai Lau 		 * dst_check() will trigger a re-lookup.
1036*9c7370a1SMartin KaFai Lau 		 */
1037*9c7370a1SMartin KaFai Lau 		dst_destroy(&pcpu_rt->dst);
1038*9c7370a1SMartin KaFai Lau 		pcpu_rt = rt;
1039*9c7370a1SMartin KaFai Lau 	}
1040d52d3997SMartin KaFai Lau 	dst_hold(&pcpu_rt->dst);
1041d52d3997SMartin KaFai Lau 	rt6_dst_from_metrics_check(pcpu_rt);
1042*9c7370a1SMartin KaFai Lau 	read_unlock_bh(&table->tb6_lock);
1043d52d3997SMartin KaFai Lau 	return pcpu_rt;
1044d52d3997SMartin KaFai Lau }
1045d52d3997SMartin KaFai Lau 
10468ed67789SDaniel Lezcano static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
10474c9483b2SDavid S. Miller 				      struct flowi6 *fl6, int flags)
10481da177e4SLinus Torvalds {
1049367efcb9SMartin KaFai Lau 	struct fib6_node *fn, *saved_fn;
105045e4fd26SMartin KaFai Lau 	struct rt6_info *rt;
1051c71099acSThomas Graf 	int strict = 0;
10521da177e4SLinus Torvalds 
105377d16f45SYOSHIFUJI Hideaki 	strict |= flags & RT6_LOOKUP_F_IFACE;
1054367efcb9SMartin KaFai Lau 	if (net->ipv6.devconf_all->forwarding == 0)
1055367efcb9SMartin KaFai Lau 		strict |= RT6_LOOKUP_F_REACHABLE;
10561da177e4SLinus Torvalds 
1057c71099acSThomas Graf 	read_lock_bh(&table->tb6_lock);
10581da177e4SLinus Torvalds 
10594c9483b2SDavid S. Miller 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1060367efcb9SMartin KaFai Lau 	saved_fn = fn;
10611da177e4SLinus Torvalds 
1062a3c00e46SMartin KaFai Lau redo_rt6_select:
1063367efcb9SMartin KaFai Lau 	rt = rt6_select(fn, oif, strict);
106452bd4c0cSNicolas Dichtel 	if (rt->rt6i_nsiblings)
1065367efcb9SMartin KaFai Lau 		rt = rt6_multipath_select(rt, fl6, oif, strict);
1066a3c00e46SMartin KaFai Lau 	if (rt == net->ipv6.ip6_null_entry) {
1067a3c00e46SMartin KaFai Lau 		fn = fib6_backtrack(fn, &fl6->saddr);
1068a3c00e46SMartin KaFai Lau 		if (fn)
1069a3c00e46SMartin KaFai Lau 			goto redo_rt6_select;
1070367efcb9SMartin KaFai Lau 		else if (strict & RT6_LOOKUP_F_REACHABLE) {
1071367efcb9SMartin KaFai Lau 			/* also consider unreachable route */
1072367efcb9SMartin KaFai Lau 			strict &= ~RT6_LOOKUP_F_REACHABLE;
1073367efcb9SMartin KaFai Lau 			fn = saved_fn;
1074367efcb9SMartin KaFai Lau 			goto redo_rt6_select;
1075367efcb9SMartin KaFai Lau 		}
1076a3c00e46SMartin KaFai Lau 	}
1077a3c00e46SMartin KaFai Lau 
1078d52d3997SMartin KaFai Lau 
1079d52d3997SMartin KaFai Lau 	if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
10803da59bd9SMartin KaFai Lau 		dst_use(&rt->dst, jiffies);
1081c71099acSThomas Graf 		read_unlock_bh(&table->tb6_lock);
10821da177e4SLinus Torvalds 
1083d52d3997SMartin KaFai Lau 		rt6_dst_from_metrics_check(rt);
1084d52d3997SMartin KaFai Lau 		return rt;
10853da59bd9SMartin KaFai Lau 	} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
10863da59bd9SMartin KaFai Lau 			    !(rt->rt6i_flags & RTF_GATEWAY))) {
10873da59bd9SMartin KaFai Lau 		/* Create a RTF_CACHE clone which will not be
10883da59bd9SMartin KaFai Lau 		 * owned by the fib6 tree.  It is for the special case where
10893da59bd9SMartin KaFai Lau 		 * the daddr in the skb during the neighbor look-up is different
10903da59bd9SMartin KaFai Lau 		 * from the fl6->daddr used to look-up route here.
10913da59bd9SMartin KaFai Lau 		 */
1092c71099acSThomas Graf 
10933da59bd9SMartin KaFai Lau 		struct rt6_info *uncached_rt;
10943da59bd9SMartin KaFai Lau 
1095d52d3997SMartin KaFai Lau 		dst_use(&rt->dst, jiffies);
1096d52d3997SMartin KaFai Lau 		read_unlock_bh(&table->tb6_lock);
1097d52d3997SMartin KaFai Lau 
10983da59bd9SMartin KaFai Lau 		uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
10993da59bd9SMartin KaFai Lau 		dst_release(&rt->dst);
11003da59bd9SMartin KaFai Lau 
11013da59bd9SMartin KaFai Lau 		if (uncached_rt)
11028d0b94afSMartin KaFai Lau 			rt6_uncached_list_add(uncached_rt);
11033da59bd9SMartin KaFai Lau 		else
11043da59bd9SMartin KaFai Lau 			uncached_rt = net->ipv6.ip6_null_entry;
1105d52d3997SMartin KaFai Lau 
11063da59bd9SMartin KaFai Lau 		dst_hold(&uncached_rt->dst);
11073da59bd9SMartin KaFai Lau 		return uncached_rt;
11083da59bd9SMartin KaFai Lau 
1109d52d3997SMartin KaFai Lau 	} else {
1110d52d3997SMartin KaFai Lau 		/* Get a percpu copy */
1111d52d3997SMartin KaFai Lau 
1112d52d3997SMartin KaFai Lau 		struct rt6_info *pcpu_rt;
1113d52d3997SMartin KaFai Lau 
1114d52d3997SMartin KaFai Lau 		rt->dst.lastuse = jiffies;
1115d52d3997SMartin KaFai Lau 		rt->dst.__use++;
1116d52d3997SMartin KaFai Lau 		pcpu_rt = rt6_get_pcpu_route(rt);
1117d52d3997SMartin KaFai Lau 
1118*9c7370a1SMartin KaFai Lau 		if (pcpu_rt) {
1119a73e4195SMartin KaFai Lau 			read_unlock_bh(&table->tb6_lock);
1120*9c7370a1SMartin KaFai Lau 		} else {
1121*9c7370a1SMartin KaFai Lau 			/* We have to do the read_unlock first
1122*9c7370a1SMartin KaFai Lau 			 * because rt6_make_pcpu_route() may trigger
1123*9c7370a1SMartin KaFai Lau 			 * ip6_dst_gc() which will take the write_lock.
1124*9c7370a1SMartin KaFai Lau 			 */
1125*9c7370a1SMartin KaFai Lau 			dst_hold(&rt->dst);
1126*9c7370a1SMartin KaFai Lau 			read_unlock_bh(&table->tb6_lock);
1127*9c7370a1SMartin KaFai Lau 			pcpu_rt = rt6_make_pcpu_route(rt);
1128*9c7370a1SMartin KaFai Lau 			dst_release(&rt->dst);
1129*9c7370a1SMartin KaFai Lau 		}
1130*9c7370a1SMartin KaFai Lau 
1131d52d3997SMartin KaFai Lau 		return pcpu_rt;
1132*9c7370a1SMartin KaFai Lau 
1133d52d3997SMartin KaFai Lau 	}
1134c71099acSThomas Graf }
1135c71099acSThomas Graf 
11368ed67789SDaniel Lezcano static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
11374c9483b2SDavid S. Miller 					    struct flowi6 *fl6, int flags)
11384acad72dSPavel Emelyanov {
11394c9483b2SDavid S. Miller 	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
11404acad72dSPavel Emelyanov }
11414acad72dSPavel Emelyanov 
114272331bc0SShmulik Ladkani static struct dst_entry *ip6_route_input_lookup(struct net *net,
114372331bc0SShmulik Ladkani 						struct net_device *dev,
114472331bc0SShmulik Ladkani 						struct flowi6 *fl6, int flags)
114572331bc0SShmulik Ladkani {
114672331bc0SShmulik Ladkani 	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
114772331bc0SShmulik Ladkani 		flags |= RT6_LOOKUP_F_IFACE;
114872331bc0SShmulik Ladkani 
114972331bc0SShmulik Ladkani 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
115072331bc0SShmulik Ladkani }
115172331bc0SShmulik Ladkani 
1152c71099acSThomas Graf void ip6_route_input(struct sk_buff *skb)
1153c71099acSThomas Graf {
1154b71d1d42SEric Dumazet 	const struct ipv6hdr *iph = ipv6_hdr(skb);
1155c346dca1SYOSHIFUJI Hideaki 	struct net *net = dev_net(skb->dev);
1156adaa70bbSThomas Graf 	int flags = RT6_LOOKUP_F_HAS_SADDR;
11574c9483b2SDavid S. Miller 	struct flowi6 fl6 = {
11584c9483b2SDavid S. Miller 		.flowi6_iif = skb->dev->ifindex,
11594c9483b2SDavid S. Miller 		.daddr = iph->daddr,
11604c9483b2SDavid S. Miller 		.saddr = iph->saddr,
11616502ca52SYOSHIFUJI Hideaki / 吉藤英明 		.flowlabel = ip6_flowinfo(iph),
11624c9483b2SDavid S. Miller 		.flowi6_mark = skb->mark,
11634c9483b2SDavid S. Miller 		.flowi6_proto = iph->nexthdr,
1164c71099acSThomas Graf 	};
1165adaa70bbSThomas Graf 
116672331bc0SShmulik Ladkani 	skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1167c71099acSThomas Graf }
1168c71099acSThomas Graf 
11698ed67789SDaniel Lezcano static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
11704c9483b2SDavid S. Miller 					     struct flowi6 *fl6, int flags)
1171c71099acSThomas Graf {
11724c9483b2SDavid S. Miller 	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1173c71099acSThomas Graf }
1174c71099acSThomas Graf 
11759c7a4f9cSFlorian Westphal struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
11764c9483b2SDavid S. Miller 				    struct flowi6 *fl6)
1177c71099acSThomas Graf {
1178c71099acSThomas Graf 	int flags = 0;
1179c71099acSThomas Graf 
11801fb9489bSPavel Emelyanov 	fl6->flowi6_iif = LOOPBACK_IFINDEX;
11814dc27d1cSDavid McCullough 
11824c9483b2SDavid S. Miller 	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
118377d16f45SYOSHIFUJI Hideaki 		flags |= RT6_LOOKUP_F_IFACE;
1184c71099acSThomas Graf 
11854c9483b2SDavid S. Miller 	if (!ipv6_addr_any(&fl6->saddr))
1186adaa70bbSThomas Graf 		flags |= RT6_LOOKUP_F_HAS_SADDR;
11870c9a2ac1SYOSHIFUJI Hideaki / 吉藤英明 	else if (sk)
11880c9a2ac1SYOSHIFUJI Hideaki / 吉藤英明 		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1189adaa70bbSThomas Graf 
11904c9483b2SDavid S. Miller 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
11911da177e4SLinus Torvalds }
11927159039aSYOSHIFUJI Hideaki EXPORT_SYMBOL(ip6_route_output);
11931da177e4SLinus Torvalds 
11942774c131SDavid S. Miller struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
119514e50e57SDavid S. Miller {
11965c1e6aa3SDavid S. Miller 	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
119714e50e57SDavid S. Miller 	struct dst_entry *new = NULL;
119814e50e57SDavid S. Miller 
1199f5b0a874SDavid S. Miller 	rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
120014e50e57SDavid S. Miller 	if (rt) {
1201d8d1f30bSChangli Gao 		new = &rt->dst;
120214e50e57SDavid S. Miller 
12038104891bSSteffen Klassert 		memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
12048104891bSSteffen Klassert 
120514e50e57SDavid S. Miller 		new->__use = 1;
1206352e512cSHerbert Xu 		new->input = dst_discard;
1207aad88724SEric Dumazet 		new->output = dst_discard_sk;
120814e50e57SDavid S. Miller 
120921efcfa0SEric Dumazet 		if (dst_metrics_read_only(&ort->dst))
121021efcfa0SEric Dumazet 			new->_metrics = ort->dst._metrics;
121121efcfa0SEric Dumazet 		else
1212defb3519SDavid S. Miller 			dst_copy_metrics(new, &ort->dst);
121314e50e57SDavid S. Miller 		rt->rt6i_idev = ort->rt6i_idev;
121414e50e57SDavid S. Miller 		if (rt->rt6i_idev)
121514e50e57SDavid S. Miller 			in6_dev_hold(rt->rt6i_idev);
121614e50e57SDavid S. Miller 
12174e3fd7a0SAlexey Dobriyan 		rt->rt6i_gateway = ort->rt6i_gateway;
12181716a961SGao feng 		rt->rt6i_flags = ort->rt6i_flags;
121914e50e57SDavid S. Miller 		rt->rt6i_metric = 0;
122014e50e57SDavid S. Miller 
122114e50e57SDavid S. Miller 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
122214e50e57SDavid S. Miller #ifdef CONFIG_IPV6_SUBTREES
122314e50e57SDavid S. Miller 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
122414e50e57SDavid S. Miller #endif
122514e50e57SDavid S. Miller 
122614e50e57SDavid S. Miller 		dst_free(new);
122714e50e57SDavid S. Miller 	}
122814e50e57SDavid S. Miller 
122969ead7afSDavid S. Miller 	dst_release(dst_orig);
123069ead7afSDavid S. Miller 	return new ? new : ERR_PTR(-ENOMEM);
123114e50e57SDavid S. Miller }
123214e50e57SDavid S. Miller 
12331da177e4SLinus Torvalds /*
12341da177e4SLinus Torvalds  *	Destination cache support functions
12351da177e4SLinus Torvalds  */
12361da177e4SLinus Torvalds 
12374b32b5adSMartin KaFai Lau static void rt6_dst_from_metrics_check(struct rt6_info *rt)
12384b32b5adSMartin KaFai Lau {
12394b32b5adSMartin KaFai Lau 	if (rt->dst.from &&
12404b32b5adSMartin KaFai Lau 	    dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
12414b32b5adSMartin KaFai Lau 		dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
12424b32b5adSMartin KaFai Lau }
12434b32b5adSMartin KaFai Lau 
12443da59bd9SMartin KaFai Lau static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
12453da59bd9SMartin KaFai Lau {
12463da59bd9SMartin KaFai Lau 	if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
12473da59bd9SMartin KaFai Lau 		return NULL;
12483da59bd9SMartin KaFai Lau 
12493da59bd9SMartin KaFai Lau 	if (rt6_check_expired(rt))
12503da59bd9SMartin KaFai Lau 		return NULL;
12513da59bd9SMartin KaFai Lau 
12523da59bd9SMartin KaFai Lau 	return &rt->dst;
12533da59bd9SMartin KaFai Lau }
12543da59bd9SMartin KaFai Lau 
12553da59bd9SMartin KaFai Lau static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
12563da59bd9SMartin KaFai Lau {
12573da59bd9SMartin KaFai Lau 	if (rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
12583da59bd9SMartin KaFai Lau 	    rt6_check((struct rt6_info *)(rt->dst.from), cookie))
12593da59bd9SMartin KaFai Lau 		return &rt->dst;
12603da59bd9SMartin KaFai Lau 	else
12613da59bd9SMartin KaFai Lau 		return NULL;
12623da59bd9SMartin KaFai Lau }
12633da59bd9SMartin KaFai Lau 
12641da177e4SLinus Torvalds static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
12651da177e4SLinus Torvalds {
12661da177e4SLinus Torvalds 	struct rt6_info *rt;
12671da177e4SLinus Torvalds 
12681da177e4SLinus Torvalds 	rt = (struct rt6_info *) dst;
12691da177e4SLinus Torvalds 
12706f3118b5SNicolas Dichtel 	/* All IPV6 dsts are created with ->obsolete set to the value
12716f3118b5SNicolas Dichtel 	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
12726f3118b5SNicolas Dichtel 	 * into this function always.
12736f3118b5SNicolas Dichtel 	 */
1274e3bc10bdSHannes Frederic Sowa 
12754b32b5adSMartin KaFai Lau 	rt6_dst_from_metrics_check(rt);
12764b32b5adSMartin KaFai Lau 
1277d52d3997SMartin KaFai Lau 	if ((rt->rt6i_flags & RTF_PCPU) || unlikely(dst->flags & DST_NOCACHE))
12783da59bd9SMartin KaFai Lau 		return rt6_dst_from_check(rt, cookie);
12793da59bd9SMartin KaFai Lau 	else
12803da59bd9SMartin KaFai Lau 		return rt6_check(rt, cookie);
12811da177e4SLinus Torvalds }
12821da177e4SLinus Torvalds 
12831da177e4SLinus Torvalds static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
12841da177e4SLinus Torvalds {
12851da177e4SLinus Torvalds 	struct rt6_info *rt = (struct rt6_info *) dst;
12861da177e4SLinus Torvalds 
12871da177e4SLinus Torvalds 	if (rt) {
128854c1a859SYOSHIFUJI Hideaki / 吉藤英明 		if (rt->rt6i_flags & RTF_CACHE) {
128954c1a859SYOSHIFUJI Hideaki / 吉藤英明 			if (rt6_check_expired(rt)) {
1290e0a1ad73SThomas Graf 				ip6_del_rt(rt);
129154c1a859SYOSHIFUJI Hideaki / 吉藤英明 				dst = NULL;
12921da177e4SLinus Torvalds 			}
129354c1a859SYOSHIFUJI Hideaki / 吉藤英明 		} else {
129454c1a859SYOSHIFUJI Hideaki / 吉藤英明 			dst_release(dst);
129554c1a859SYOSHIFUJI Hideaki / 吉藤英明 			dst = NULL;
129654c1a859SYOSHIFUJI Hideaki / 吉藤英明 		}
129754c1a859SYOSHIFUJI Hideaki / 吉藤英明 	}
129854c1a859SYOSHIFUJI Hideaki / 吉藤英明 	return dst;
12991da177e4SLinus Torvalds }
13001da177e4SLinus Torvalds 
13011da177e4SLinus Torvalds static void ip6_link_failure(struct sk_buff *skb)
13021da177e4SLinus Torvalds {
13031da177e4SLinus Torvalds 	struct rt6_info *rt;
13041da177e4SLinus Torvalds 
13053ffe533cSAlexey Dobriyan 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
13061da177e4SLinus Torvalds 
1307adf30907SEric Dumazet 	rt = (struct rt6_info *) skb_dst(skb);
13081da177e4SLinus Torvalds 	if (rt) {
13091eb4f758SHannes Frederic Sowa 		if (rt->rt6i_flags & RTF_CACHE) {
13101eb4f758SHannes Frederic Sowa 			dst_hold(&rt->dst);
13111eb4f758SHannes Frederic Sowa 			if (ip6_del_rt(rt))
13121eb4f758SHannes Frederic Sowa 				dst_free(&rt->dst);
13131eb4f758SHannes Frederic Sowa 		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
13141da177e4SLinus Torvalds 			rt->rt6i_node->fn_sernum = -1;
13151da177e4SLinus Torvalds 		}
13161da177e4SLinus Torvalds 	}
13171eb4f758SHannes Frederic Sowa }
13181da177e4SLinus Torvalds 
131945e4fd26SMartin KaFai Lau static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
132045e4fd26SMartin KaFai Lau {
132145e4fd26SMartin KaFai Lau 	struct net *net = dev_net(rt->dst.dev);
132245e4fd26SMartin KaFai Lau 
132345e4fd26SMartin KaFai Lau 	rt->rt6i_flags |= RTF_MODIFIED;
132445e4fd26SMartin KaFai Lau 	rt->rt6i_pmtu = mtu;
132545e4fd26SMartin KaFai Lau 	rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
132645e4fd26SMartin KaFai Lau }
132745e4fd26SMartin KaFai Lau 
132845e4fd26SMartin KaFai Lau static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
132945e4fd26SMartin KaFai Lau 				 const struct ipv6hdr *iph, u32 mtu)
13301da177e4SLinus Torvalds {
13311da177e4SLinus Torvalds 	struct rt6_info *rt6 = (struct rt6_info *)dst;
13321da177e4SLinus Torvalds 
133345e4fd26SMartin KaFai Lau 	if (rt6->rt6i_flags & RTF_LOCAL)
133445e4fd26SMartin KaFai Lau 		return;
133545e4fd26SMartin KaFai Lau 
133681aded24SDavid S. Miller 	dst_confirm(dst);
133745e4fd26SMartin KaFai Lau 	mtu = max_t(u32, mtu, IPV6_MIN_MTU);
133845e4fd26SMartin KaFai Lau 	if (mtu >= dst_mtu(dst))
133945e4fd26SMartin KaFai Lau 		return;
134081aded24SDavid S. Miller 
134145e4fd26SMartin KaFai Lau 	if (rt6->rt6i_flags & RTF_CACHE) {
134245e4fd26SMartin KaFai Lau 		rt6_do_update_pmtu(rt6, mtu);
134345e4fd26SMartin KaFai Lau 	} else {
134445e4fd26SMartin KaFai Lau 		const struct in6_addr *daddr, *saddr;
134545e4fd26SMartin KaFai Lau 		struct rt6_info *nrt6;
13469d289715SHagen Paul Pfeifer 
134745e4fd26SMartin KaFai Lau 		if (iph) {
134845e4fd26SMartin KaFai Lau 			daddr = &iph->daddr;
134945e4fd26SMartin KaFai Lau 			saddr = &iph->saddr;
135045e4fd26SMartin KaFai Lau 		} else if (sk) {
135145e4fd26SMartin KaFai Lau 			daddr = &sk->sk_v6_daddr;
135245e4fd26SMartin KaFai Lau 			saddr = &inet6_sk(sk)->saddr;
135345e4fd26SMartin KaFai Lau 		} else {
135445e4fd26SMartin KaFai Lau 			return;
13551da177e4SLinus Torvalds 		}
135645e4fd26SMartin KaFai Lau 		nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
135745e4fd26SMartin KaFai Lau 		if (nrt6) {
135845e4fd26SMartin KaFai Lau 			rt6_do_update_pmtu(nrt6, mtu);
135945e4fd26SMartin KaFai Lau 
136045e4fd26SMartin KaFai Lau 			/* ip6_ins_rt(nrt6) will bump the
136145e4fd26SMartin KaFai Lau 			 * rt6->rt6i_node->fn_sernum
136245e4fd26SMartin KaFai Lau 			 * which will fail the next rt6_check() and
136345e4fd26SMartin KaFai Lau 			 * invalidate the sk->sk_dst_cache.
136445e4fd26SMartin KaFai Lau 			 */
136545e4fd26SMartin KaFai Lau 			ip6_ins_rt(nrt6);
136645e4fd26SMartin KaFai Lau 		}
136745e4fd26SMartin KaFai Lau 	}
136845e4fd26SMartin KaFai Lau }
136945e4fd26SMartin KaFai Lau 
137045e4fd26SMartin KaFai Lau static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
137145e4fd26SMartin KaFai Lau 			       struct sk_buff *skb, u32 mtu)
137245e4fd26SMartin KaFai Lau {
137345e4fd26SMartin KaFai Lau 	__ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
13741da177e4SLinus Torvalds }
13751da177e4SLinus Torvalds 
137642ae66c8SDavid S. Miller void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
137742ae66c8SDavid S. Miller 		     int oif, u32 mark)
137881aded24SDavid S. Miller {
137981aded24SDavid S. Miller 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
138081aded24SDavid S. Miller 	struct dst_entry *dst;
138181aded24SDavid S. Miller 	struct flowi6 fl6;
138281aded24SDavid S. Miller 
138381aded24SDavid S. Miller 	memset(&fl6, 0, sizeof(fl6));
138481aded24SDavid S. Miller 	fl6.flowi6_oif = oif;
13851b3c61dcSLorenzo Colitti 	fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
138681aded24SDavid S. Miller 	fl6.daddr = iph->daddr;
138781aded24SDavid S. Miller 	fl6.saddr = iph->saddr;
13886502ca52SYOSHIFUJI Hideaki / 吉藤英明 	fl6.flowlabel = ip6_flowinfo(iph);
138981aded24SDavid S. Miller 
139081aded24SDavid S. Miller 	dst = ip6_route_output(net, NULL, &fl6);
139181aded24SDavid S. Miller 	if (!dst->error)
139245e4fd26SMartin KaFai Lau 		__ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
139381aded24SDavid S. Miller 	dst_release(dst);
139481aded24SDavid S. Miller }
139581aded24SDavid S. Miller EXPORT_SYMBOL_GPL(ip6_update_pmtu);
139681aded24SDavid S. Miller 
139781aded24SDavid S. Miller void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
139881aded24SDavid S. Miller {
139981aded24SDavid S. Miller 	ip6_update_pmtu(skb, sock_net(sk), mtu,
140081aded24SDavid S. Miller 			sk->sk_bound_dev_if, sk->sk_mark);
140181aded24SDavid S. Miller }
140281aded24SDavid S. Miller EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
140381aded24SDavid S. Miller 
1404b55b76b2SDuan Jiong /* Handle redirects */
1405b55b76b2SDuan Jiong struct ip6rd_flowi {
1406b55b76b2SDuan Jiong 	struct flowi6 fl6;
1407b55b76b2SDuan Jiong 	struct in6_addr gateway;
1408b55b76b2SDuan Jiong };
1409b55b76b2SDuan Jiong 
1410b55b76b2SDuan Jiong static struct rt6_info *__ip6_route_redirect(struct net *net,
1411b55b76b2SDuan Jiong 					     struct fib6_table *table,
1412b55b76b2SDuan Jiong 					     struct flowi6 *fl6,
1413b55b76b2SDuan Jiong 					     int flags)
1414b55b76b2SDuan Jiong {
1415b55b76b2SDuan Jiong 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1416b55b76b2SDuan Jiong 	struct rt6_info *rt;
1417b55b76b2SDuan Jiong 	struct fib6_node *fn;
1418b55b76b2SDuan Jiong 
1419b55b76b2SDuan Jiong 	/* Get the "current" route for this destination and
1420b55b76b2SDuan Jiong 	 * check if the redirect has come from approriate router.
1421b55b76b2SDuan Jiong 	 *
1422b55b76b2SDuan Jiong 	 * RFC 4861 specifies that redirects should only be
1423b55b76b2SDuan Jiong 	 * accepted if they come from the nexthop to the target.
1424b55b76b2SDuan Jiong 	 * Due to the way the routes are chosen, this notion
1425b55b76b2SDuan Jiong 	 * is a bit fuzzy and one might need to check all possible
1426b55b76b2SDuan Jiong 	 * routes.
1427b55b76b2SDuan Jiong 	 */
1428b55b76b2SDuan Jiong 
1429b55b76b2SDuan Jiong 	read_lock_bh(&table->tb6_lock);
1430b55b76b2SDuan Jiong 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1431b55b76b2SDuan Jiong restart:
1432b55b76b2SDuan Jiong 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1433b55b76b2SDuan Jiong 		if (rt6_check_expired(rt))
1434b55b76b2SDuan Jiong 			continue;
1435b55b76b2SDuan Jiong 		if (rt->dst.error)
1436b55b76b2SDuan Jiong 			break;
1437b55b76b2SDuan Jiong 		if (!(rt->rt6i_flags & RTF_GATEWAY))
1438b55b76b2SDuan Jiong 			continue;
1439b55b76b2SDuan Jiong 		if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1440b55b76b2SDuan Jiong 			continue;
1441b55b76b2SDuan Jiong 		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1442b55b76b2SDuan Jiong 			continue;
1443b55b76b2SDuan Jiong 		break;
1444b55b76b2SDuan Jiong 	}
1445b55b76b2SDuan Jiong 
1446b55b76b2SDuan Jiong 	if (!rt)
1447b55b76b2SDuan Jiong 		rt = net->ipv6.ip6_null_entry;
1448b55b76b2SDuan Jiong 	else if (rt->dst.error) {
1449b55b76b2SDuan Jiong 		rt = net->ipv6.ip6_null_entry;
1450b0a1ba59SMartin KaFai Lau 		goto out;
1451b0a1ba59SMartin KaFai Lau 	}
1452b0a1ba59SMartin KaFai Lau 
1453b0a1ba59SMartin KaFai Lau 	if (rt == net->ipv6.ip6_null_entry) {
1454a3c00e46SMartin KaFai Lau 		fn = fib6_backtrack(fn, &fl6->saddr);
1455a3c00e46SMartin KaFai Lau 		if (fn)
1456a3c00e46SMartin KaFai Lau 			goto restart;
1457b55b76b2SDuan Jiong 	}
1458a3c00e46SMartin KaFai Lau 
1459b0a1ba59SMartin KaFai Lau out:
1460b55b76b2SDuan Jiong 	dst_hold(&rt->dst);
1461b55b76b2SDuan Jiong 
1462b55b76b2SDuan Jiong 	read_unlock_bh(&table->tb6_lock);
1463b55b76b2SDuan Jiong 
1464b55b76b2SDuan Jiong 	return rt;
1465b55b76b2SDuan Jiong };
1466b55b76b2SDuan Jiong 
1467b55b76b2SDuan Jiong static struct dst_entry *ip6_route_redirect(struct net *net,
1468b55b76b2SDuan Jiong 					const struct flowi6 *fl6,
1469b55b76b2SDuan Jiong 					const struct in6_addr *gateway)
1470b55b76b2SDuan Jiong {
1471b55b76b2SDuan Jiong 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1472b55b76b2SDuan Jiong 	struct ip6rd_flowi rdfl;
1473b55b76b2SDuan Jiong 
1474b55b76b2SDuan Jiong 	rdfl.fl6 = *fl6;
1475b55b76b2SDuan Jiong 	rdfl.gateway = *gateway;
1476b55b76b2SDuan Jiong 
1477b55b76b2SDuan Jiong 	return fib6_rule_lookup(net, &rdfl.fl6,
1478b55b76b2SDuan Jiong 				flags, __ip6_route_redirect);
1479b55b76b2SDuan Jiong }
1480b55b76b2SDuan Jiong 
14813a5ad2eeSDavid S. Miller void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
14823a5ad2eeSDavid S. Miller {
14833a5ad2eeSDavid S. Miller 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
14843a5ad2eeSDavid S. Miller 	struct dst_entry *dst;
14853a5ad2eeSDavid S. Miller 	struct flowi6 fl6;
14863a5ad2eeSDavid S. Miller 
14873a5ad2eeSDavid S. Miller 	memset(&fl6, 0, sizeof(fl6));
1488e374c618SJulian Anastasov 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
14893a5ad2eeSDavid S. Miller 	fl6.flowi6_oif = oif;
14903a5ad2eeSDavid S. Miller 	fl6.flowi6_mark = mark;
14913a5ad2eeSDavid S. Miller 	fl6.daddr = iph->daddr;
14923a5ad2eeSDavid S. Miller 	fl6.saddr = iph->saddr;
14936502ca52SYOSHIFUJI Hideaki / 吉藤英明 	fl6.flowlabel = ip6_flowinfo(iph);
14943a5ad2eeSDavid S. Miller 
1495b55b76b2SDuan Jiong 	dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
14966700c270SDavid S. Miller 	rt6_do_redirect(dst, NULL, skb);
14973a5ad2eeSDavid S. Miller 	dst_release(dst);
14983a5ad2eeSDavid S. Miller }
14993a5ad2eeSDavid S. Miller EXPORT_SYMBOL_GPL(ip6_redirect);
15003a5ad2eeSDavid S. Miller 
1501c92a59ecSDuan Jiong void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1502c92a59ecSDuan Jiong 			    u32 mark)
1503c92a59ecSDuan Jiong {
1504c92a59ecSDuan Jiong 	const struct ipv6hdr *iph = ipv6_hdr(skb);
1505c92a59ecSDuan Jiong 	const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1506c92a59ecSDuan Jiong 	struct dst_entry *dst;
1507c92a59ecSDuan Jiong 	struct flowi6 fl6;
1508c92a59ecSDuan Jiong 
1509c92a59ecSDuan Jiong 	memset(&fl6, 0, sizeof(fl6));
1510e374c618SJulian Anastasov 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
1511c92a59ecSDuan Jiong 	fl6.flowi6_oif = oif;
1512c92a59ecSDuan Jiong 	fl6.flowi6_mark = mark;
1513c92a59ecSDuan Jiong 	fl6.daddr = msg->dest;
1514c92a59ecSDuan Jiong 	fl6.saddr = iph->daddr;
1515c92a59ecSDuan Jiong 
1516b55b76b2SDuan Jiong 	dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1517c92a59ecSDuan Jiong 	rt6_do_redirect(dst, NULL, skb);
1518c92a59ecSDuan Jiong 	dst_release(dst);
1519c92a59ecSDuan Jiong }
1520c92a59ecSDuan Jiong 
15213a5ad2eeSDavid S. Miller void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
15223a5ad2eeSDavid S. Miller {
15233a5ad2eeSDavid S. Miller 	ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
15243a5ad2eeSDavid S. Miller }
15253a5ad2eeSDavid S. Miller EXPORT_SYMBOL_GPL(ip6_sk_redirect);
15263a5ad2eeSDavid S. Miller 
15270dbaee3bSDavid S. Miller static unsigned int ip6_default_advmss(const struct dst_entry *dst)
15281da177e4SLinus Torvalds {
15290dbaee3bSDavid S. Miller 	struct net_device *dev = dst->dev;
15300dbaee3bSDavid S. Miller 	unsigned int mtu = dst_mtu(dst);
15310dbaee3bSDavid S. Miller 	struct net *net = dev_net(dev);
15320dbaee3bSDavid S. Miller 
15331da177e4SLinus Torvalds 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
15341da177e4SLinus Torvalds 
15355578689aSDaniel Lezcano 	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
15365578689aSDaniel Lezcano 		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
15371da177e4SLinus Torvalds 
15381da177e4SLinus Torvalds 	/*
15391da177e4SLinus Torvalds 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
15401da177e4SLinus Torvalds 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
15411da177e4SLinus Torvalds 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
15421da177e4SLinus Torvalds 	 * rely only on pmtu discovery"
15431da177e4SLinus Torvalds 	 */
15441da177e4SLinus Torvalds 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
15451da177e4SLinus Torvalds 		mtu = IPV6_MAXPLEN;
15461da177e4SLinus Torvalds 	return mtu;
15471da177e4SLinus Torvalds }
15481da177e4SLinus Torvalds 
1549ebb762f2SSteffen Klassert static unsigned int ip6_mtu(const struct dst_entry *dst)
1550d33e4553SDavid S. Miller {
15514b32b5adSMartin KaFai Lau 	const struct rt6_info *rt = (const struct rt6_info *)dst;
15524b32b5adSMartin KaFai Lau 	unsigned int mtu = rt->rt6i_pmtu;
1553d33e4553SDavid S. Miller 	struct inet6_dev *idev;
1554618f9bc7SSteffen Klassert 
1555618f9bc7SSteffen Klassert 	if (mtu)
155630f78d8eSEric Dumazet 		goto out;
1557618f9bc7SSteffen Klassert 
15584b32b5adSMartin KaFai Lau 	mtu = dst_metric_raw(dst, RTAX_MTU);
15594b32b5adSMartin KaFai Lau 	if (mtu)
15604b32b5adSMartin KaFai Lau 		goto out;
15614b32b5adSMartin KaFai Lau 
1562618f9bc7SSteffen Klassert 	mtu = IPV6_MIN_MTU;
1563d33e4553SDavid S. Miller 
1564d33e4553SDavid S. Miller 	rcu_read_lock();
1565d33e4553SDavid S. Miller 	idev = __in6_dev_get(dst->dev);
1566d33e4553SDavid S. Miller 	if (idev)
1567d33e4553SDavid S. Miller 		mtu = idev->cnf.mtu6;
1568d33e4553SDavid S. Miller 	rcu_read_unlock();
1569d33e4553SDavid S. Miller 
157030f78d8eSEric Dumazet out:
157130f78d8eSEric Dumazet 	return min_t(unsigned int, mtu, IP6_MAX_MTU);
1572d33e4553SDavid S. Miller }
1573d33e4553SDavid S. Miller 
15743b00944cSYOSHIFUJI Hideaki static struct dst_entry *icmp6_dst_gc_list;
15753b00944cSYOSHIFUJI Hideaki static DEFINE_SPINLOCK(icmp6_dst_lock);
15765d0bbeebSThomas Graf 
15773b00944cSYOSHIFUJI Hideaki struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
157887a11578SDavid S. Miller 				  struct flowi6 *fl6)
15791da177e4SLinus Torvalds {
158087a11578SDavid S. Miller 	struct dst_entry *dst;
15811da177e4SLinus Torvalds 	struct rt6_info *rt;
15821da177e4SLinus Torvalds 	struct inet6_dev *idev = in6_dev_get(dev);
1583c346dca1SYOSHIFUJI Hideaki 	struct net *net = dev_net(dev);
15841da177e4SLinus Torvalds 
158538308473SDavid S. Miller 	if (unlikely(!idev))
1586122bdf67SEric Dumazet 		return ERR_PTR(-ENODEV);
15871da177e4SLinus Torvalds 
1588ad706862SMartin KaFai Lau 	rt = ip6_dst_alloc(net, dev, 0);
158938308473SDavid S. Miller 	if (unlikely(!rt)) {
15901da177e4SLinus Torvalds 		in6_dev_put(idev);
159187a11578SDavid S. Miller 		dst = ERR_PTR(-ENOMEM);
15921da177e4SLinus Torvalds 		goto out;
15931da177e4SLinus Torvalds 	}
15941da177e4SLinus Torvalds 
15958e2ec639SYan, Zheng 	rt->dst.flags |= DST_HOST;
15968e2ec639SYan, Zheng 	rt->dst.output  = ip6_output;
1597d8d1f30bSChangli Gao 	atomic_set(&rt->dst.__refcnt, 1);
1598550bab42SJulian Anastasov 	rt->rt6i_gateway  = fl6->daddr;
159987a11578SDavid S. Miller 	rt->rt6i_dst.addr = fl6->daddr;
16008e2ec639SYan, Zheng 	rt->rt6i_dst.plen = 128;
16018e2ec639SYan, Zheng 	rt->rt6i_idev     = idev;
160214edd87dSLi RongQing 	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
16031da177e4SLinus Torvalds 
16043b00944cSYOSHIFUJI Hideaki 	spin_lock_bh(&icmp6_dst_lock);
1605d8d1f30bSChangli Gao 	rt->dst.next = icmp6_dst_gc_list;
1606d8d1f30bSChangli Gao 	icmp6_dst_gc_list = &rt->dst;
16073b00944cSYOSHIFUJI Hideaki 	spin_unlock_bh(&icmp6_dst_lock);
16081da177e4SLinus Torvalds 
16095578689aSDaniel Lezcano 	fib6_force_start_gc(net);
16101da177e4SLinus Torvalds 
161187a11578SDavid S. Miller 	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
161287a11578SDavid S. Miller 
16131da177e4SLinus Torvalds out:
161487a11578SDavid S. Miller 	return dst;
16151da177e4SLinus Torvalds }
16161da177e4SLinus Torvalds 
16173d0f24a7SStephen Hemminger int icmp6_dst_gc(void)
16181da177e4SLinus Torvalds {
1619e9476e95SHagen Paul Pfeifer 	struct dst_entry *dst, **pprev;
16203d0f24a7SStephen Hemminger 	int more = 0;
16211da177e4SLinus Torvalds 
16223b00944cSYOSHIFUJI Hideaki 	spin_lock_bh(&icmp6_dst_lock);
16233b00944cSYOSHIFUJI Hideaki 	pprev = &icmp6_dst_gc_list;
16245d0bbeebSThomas Graf 
16251da177e4SLinus Torvalds 	while ((dst = *pprev) != NULL) {
16261da177e4SLinus Torvalds 		if (!atomic_read(&dst->__refcnt)) {
16271da177e4SLinus Torvalds 			*pprev = dst->next;
16281da177e4SLinus Torvalds 			dst_free(dst);
16291da177e4SLinus Torvalds 		} else {
16301da177e4SLinus Torvalds 			pprev = &dst->next;
16313d0f24a7SStephen Hemminger 			++more;
16321da177e4SLinus Torvalds 		}
16331da177e4SLinus Torvalds 	}
16341da177e4SLinus Torvalds 
16353b00944cSYOSHIFUJI Hideaki 	spin_unlock_bh(&icmp6_dst_lock);
16365d0bbeebSThomas Graf 
16373d0f24a7SStephen Hemminger 	return more;
16381da177e4SLinus Torvalds }
16391da177e4SLinus Torvalds 
16401e493d19SDavid S. Miller static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
16411e493d19SDavid S. Miller 			    void *arg)
16421e493d19SDavid S. Miller {
16431e493d19SDavid S. Miller 	struct dst_entry *dst, **pprev;
16441e493d19SDavid S. Miller 
16451e493d19SDavid S. Miller 	spin_lock_bh(&icmp6_dst_lock);
16461e493d19SDavid S. Miller 	pprev = &icmp6_dst_gc_list;
16471e493d19SDavid S. Miller 	while ((dst = *pprev) != NULL) {
16481e493d19SDavid S. Miller 		struct rt6_info *rt = (struct rt6_info *) dst;
16491e493d19SDavid S. Miller 		if (func(rt, arg)) {
16501e493d19SDavid S. Miller 			*pprev = dst->next;
16511e493d19SDavid S. Miller 			dst_free(dst);
16521e493d19SDavid S. Miller 		} else {
16531e493d19SDavid S. Miller 			pprev = &dst->next;
16541e493d19SDavid S. Miller 		}
16551e493d19SDavid S. Miller 	}
16561e493d19SDavid S. Miller 	spin_unlock_bh(&icmp6_dst_lock);
16571e493d19SDavid S. Miller }
16581e493d19SDavid S. Miller 
1659569d3645SDaniel Lezcano static int ip6_dst_gc(struct dst_ops *ops)
16601da177e4SLinus Torvalds {
166186393e52SAlexey Dobriyan 	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
16627019b78eSDaniel Lezcano 	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
16637019b78eSDaniel Lezcano 	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
16647019b78eSDaniel Lezcano 	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
16657019b78eSDaniel Lezcano 	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
16667019b78eSDaniel Lezcano 	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1667fc66f95cSEric Dumazet 	int entries;
16681da177e4SLinus Torvalds 
1669fc66f95cSEric Dumazet 	entries = dst_entries_get_fast(ops);
167049a18d86SMichal Kubeček 	if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1671fc66f95cSEric Dumazet 	    entries <= rt_max_size)
16721da177e4SLinus Torvalds 		goto out;
16731da177e4SLinus Torvalds 
16746891a346SBenjamin Thery 	net->ipv6.ip6_rt_gc_expire++;
167514956643SLi RongQing 	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
1676fc66f95cSEric Dumazet 	entries = dst_entries_get_slow(ops);
1677fc66f95cSEric Dumazet 	if (entries < ops->gc_thresh)
16787019b78eSDaniel Lezcano 		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
16791da177e4SLinus Torvalds out:
16807019b78eSDaniel Lezcano 	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1681fc66f95cSEric Dumazet 	return entries > rt_max_size;
16821da177e4SLinus Torvalds }
16831da177e4SLinus Torvalds 
1684e715b6d3SFlorian Westphal static int ip6_convert_metrics(struct mx6_config *mxc,
1685e715b6d3SFlorian Westphal 			       const struct fib6_config *cfg)
1686e715b6d3SFlorian Westphal {
1687e715b6d3SFlorian Westphal 	struct nlattr *nla;
1688e715b6d3SFlorian Westphal 	int remaining;
1689e715b6d3SFlorian Westphal 	u32 *mp;
1690e715b6d3SFlorian Westphal 
169163159f29SIan Morris 	if (!cfg->fc_mx)
1692e715b6d3SFlorian Westphal 		return 0;
1693e715b6d3SFlorian Westphal 
1694e715b6d3SFlorian Westphal 	mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1695e715b6d3SFlorian Westphal 	if (unlikely(!mp))
1696e715b6d3SFlorian Westphal 		return -ENOMEM;
1697e715b6d3SFlorian Westphal 
1698e715b6d3SFlorian Westphal 	nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1699e715b6d3SFlorian Westphal 		int type = nla_type(nla);
1700e715b6d3SFlorian Westphal 
1701e715b6d3SFlorian Westphal 		if (type) {
1702ea697639SDaniel Borkmann 			u32 val;
1703ea697639SDaniel Borkmann 
1704e715b6d3SFlorian Westphal 			if (unlikely(type > RTAX_MAX))
1705e715b6d3SFlorian Westphal 				goto err;
1706ea697639SDaniel Borkmann 			if (type == RTAX_CC_ALGO) {
1707ea697639SDaniel Borkmann 				char tmp[TCP_CA_NAME_MAX];
1708e715b6d3SFlorian Westphal 
1709ea697639SDaniel Borkmann 				nla_strlcpy(tmp, nla, sizeof(tmp));
1710ea697639SDaniel Borkmann 				val = tcp_ca_get_key_by_name(tmp);
1711ea697639SDaniel Borkmann 				if (val == TCP_CA_UNSPEC)
1712ea697639SDaniel Borkmann 					goto err;
1713ea697639SDaniel Borkmann 			} else {
1714ea697639SDaniel Borkmann 				val = nla_get_u32(nla);
1715ea697639SDaniel Borkmann 			}
1716ea697639SDaniel Borkmann 
1717ea697639SDaniel Borkmann 			mp[type - 1] = val;
1718e715b6d3SFlorian Westphal 			__set_bit(type - 1, mxc->mx_valid);
1719e715b6d3SFlorian Westphal 		}
1720e715b6d3SFlorian Westphal 	}
1721e715b6d3SFlorian Westphal 
1722e715b6d3SFlorian Westphal 	mxc->mx = mp;
1723e715b6d3SFlorian Westphal 
1724e715b6d3SFlorian Westphal 	return 0;
1725e715b6d3SFlorian Westphal  err:
1726e715b6d3SFlorian Westphal 	kfree(mp);
1727e715b6d3SFlorian Westphal 	return -EINVAL;
1728e715b6d3SFlorian Westphal }
17291da177e4SLinus Torvalds 
173086872cb5SThomas Graf int ip6_route_add(struct fib6_config *cfg)
17311da177e4SLinus Torvalds {
17321da177e4SLinus Torvalds 	int err;
17335578689aSDaniel Lezcano 	struct net *net = cfg->fc_nlinfo.nl_net;
17341da177e4SLinus Torvalds 	struct rt6_info *rt = NULL;
17351da177e4SLinus Torvalds 	struct net_device *dev = NULL;
17361da177e4SLinus Torvalds 	struct inet6_dev *idev = NULL;
1737c71099acSThomas Graf 	struct fib6_table *table;
1738e715b6d3SFlorian Westphal 	struct mx6_config mxc = { .mx = NULL, };
17391da177e4SLinus Torvalds 	int addr_type;
17401da177e4SLinus Torvalds 
174186872cb5SThomas Graf 	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
17421da177e4SLinus Torvalds 		return -EINVAL;
17431da177e4SLinus Torvalds #ifndef CONFIG_IPV6_SUBTREES
174486872cb5SThomas Graf 	if (cfg->fc_src_len)
17451da177e4SLinus Torvalds 		return -EINVAL;
17461da177e4SLinus Torvalds #endif
174786872cb5SThomas Graf 	if (cfg->fc_ifindex) {
17481da177e4SLinus Torvalds 		err = -ENODEV;
17495578689aSDaniel Lezcano 		dev = dev_get_by_index(net, cfg->fc_ifindex);
17501da177e4SLinus Torvalds 		if (!dev)
17511da177e4SLinus Torvalds 			goto out;
17521da177e4SLinus Torvalds 		idev = in6_dev_get(dev);
17531da177e4SLinus Torvalds 		if (!idev)
17541da177e4SLinus Torvalds 			goto out;
17551da177e4SLinus Torvalds 	}
17561da177e4SLinus Torvalds 
175786872cb5SThomas Graf 	if (cfg->fc_metric == 0)
175886872cb5SThomas Graf 		cfg->fc_metric = IP6_RT_PRIO_USER;
17591da177e4SLinus Torvalds 
1760c71099acSThomas Graf 	err = -ENOBUFS;
176138308473SDavid S. Miller 	if (cfg->fc_nlinfo.nlh &&
1762d71314b4SMatti Vaittinen 	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1763d71314b4SMatti Vaittinen 		table = fib6_get_table(net, cfg->fc_table);
176438308473SDavid S. Miller 		if (!table) {
1765f3213831SJoe Perches 			pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1766d71314b4SMatti Vaittinen 			table = fib6_new_table(net, cfg->fc_table);
1767d71314b4SMatti Vaittinen 		}
1768d71314b4SMatti Vaittinen 	} else {
1769d71314b4SMatti Vaittinen 		table = fib6_new_table(net, cfg->fc_table);
1770d71314b4SMatti Vaittinen 	}
177138308473SDavid S. Miller 
177238308473SDavid S. Miller 	if (!table)
1773c71099acSThomas Graf 		goto out;
1774c71099acSThomas Graf 
1775ad706862SMartin KaFai Lau 	rt = ip6_dst_alloc(net, NULL,
1776ad706862SMartin KaFai Lau 			   (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
17771da177e4SLinus Torvalds 
177838308473SDavid S. Miller 	if (!rt) {
17791da177e4SLinus Torvalds 		err = -ENOMEM;
17801da177e4SLinus Torvalds 		goto out;
17811da177e4SLinus Torvalds 	}
17821da177e4SLinus Torvalds 
17831716a961SGao feng 	if (cfg->fc_flags & RTF_EXPIRES)
17841716a961SGao feng 		rt6_set_expires(rt, jiffies +
17851716a961SGao feng 				clock_t_to_jiffies(cfg->fc_expires));
17861716a961SGao feng 	else
17871716a961SGao feng 		rt6_clean_expires(rt);
17881da177e4SLinus Torvalds 
178986872cb5SThomas Graf 	if (cfg->fc_protocol == RTPROT_UNSPEC)
179086872cb5SThomas Graf 		cfg->fc_protocol = RTPROT_BOOT;
179186872cb5SThomas Graf 	rt->rt6i_protocol = cfg->fc_protocol;
179286872cb5SThomas Graf 
179386872cb5SThomas Graf 	addr_type = ipv6_addr_type(&cfg->fc_dst);
17941da177e4SLinus Torvalds 
17951da177e4SLinus Torvalds 	if (addr_type & IPV6_ADDR_MULTICAST)
1796d8d1f30bSChangli Gao 		rt->dst.input = ip6_mc_input;
1797ab79ad14SMaciej Żenczykowski 	else if (cfg->fc_flags & RTF_LOCAL)
1798ab79ad14SMaciej Żenczykowski 		rt->dst.input = ip6_input;
17991da177e4SLinus Torvalds 	else
1800d8d1f30bSChangli Gao 		rt->dst.input = ip6_forward;
18011da177e4SLinus Torvalds 
1802d8d1f30bSChangli Gao 	rt->dst.output = ip6_output;
18031da177e4SLinus Torvalds 
180486872cb5SThomas Graf 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
180586872cb5SThomas Graf 	rt->rt6i_dst.plen = cfg->fc_dst_len;
1806afc4eef8SMartin KaFai Lau 	if (rt->rt6i_dst.plen == 128)
180711d53b49SDavid S. Miller 		rt->dst.flags |= DST_HOST;
18081da177e4SLinus Torvalds 
18091da177e4SLinus Torvalds #ifdef CONFIG_IPV6_SUBTREES
181086872cb5SThomas Graf 	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
181186872cb5SThomas Graf 	rt->rt6i_src.plen = cfg->fc_src_len;
18121da177e4SLinus Torvalds #endif
18131da177e4SLinus Torvalds 
181486872cb5SThomas Graf 	rt->rt6i_metric = cfg->fc_metric;
18151da177e4SLinus Torvalds 
18161da177e4SLinus Torvalds 	/* We cannot add true routes via loopback here,
18171da177e4SLinus Torvalds 	   they would result in kernel looping; promote them to reject routes
18181da177e4SLinus Torvalds 	 */
181986872cb5SThomas Graf 	if ((cfg->fc_flags & RTF_REJECT) ||
182038308473SDavid S. Miller 	    (dev && (dev->flags & IFF_LOOPBACK) &&
182138308473SDavid S. Miller 	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
182238308473SDavid S. Miller 	     !(cfg->fc_flags & RTF_LOCAL))) {
18231da177e4SLinus Torvalds 		/* hold loopback dev/idev if we haven't done so. */
18245578689aSDaniel Lezcano 		if (dev != net->loopback_dev) {
18251da177e4SLinus Torvalds 			if (dev) {
18261da177e4SLinus Torvalds 				dev_put(dev);
18271da177e4SLinus Torvalds 				in6_dev_put(idev);
18281da177e4SLinus Torvalds 			}
18295578689aSDaniel Lezcano 			dev = net->loopback_dev;
18301da177e4SLinus Torvalds 			dev_hold(dev);
18311da177e4SLinus Torvalds 			idev = in6_dev_get(dev);
18321da177e4SLinus Torvalds 			if (!idev) {
18331da177e4SLinus Torvalds 				err = -ENODEV;
18341da177e4SLinus Torvalds 				goto out;
18351da177e4SLinus Torvalds 			}
18361da177e4SLinus Torvalds 		}
18371da177e4SLinus Torvalds 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1838ef2c7d7bSNicolas Dichtel 		switch (cfg->fc_type) {
1839ef2c7d7bSNicolas Dichtel 		case RTN_BLACKHOLE:
1840ef2c7d7bSNicolas Dichtel 			rt->dst.error = -EINVAL;
1841aad88724SEric Dumazet 			rt->dst.output = dst_discard_sk;
18427150aedeSKamala R 			rt->dst.input = dst_discard;
1843ef2c7d7bSNicolas Dichtel 			break;
1844ef2c7d7bSNicolas Dichtel 		case RTN_PROHIBIT:
1845ef2c7d7bSNicolas Dichtel 			rt->dst.error = -EACCES;
18467150aedeSKamala R 			rt->dst.output = ip6_pkt_prohibit_out;
18477150aedeSKamala R 			rt->dst.input = ip6_pkt_prohibit;
1848ef2c7d7bSNicolas Dichtel 			break;
1849b4949ab2SNicolas Dichtel 		case RTN_THROW:
1850ef2c7d7bSNicolas Dichtel 		default:
18517150aedeSKamala R 			rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
18527150aedeSKamala R 					: -ENETUNREACH;
18537150aedeSKamala R 			rt->dst.output = ip6_pkt_discard_out;
18547150aedeSKamala R 			rt->dst.input = ip6_pkt_discard;
1855ef2c7d7bSNicolas Dichtel 			break;
1856ef2c7d7bSNicolas Dichtel 		}
18571da177e4SLinus Torvalds 		goto install_route;
18581da177e4SLinus Torvalds 	}
18591da177e4SLinus Torvalds 
186086872cb5SThomas Graf 	if (cfg->fc_flags & RTF_GATEWAY) {
1861b71d1d42SEric Dumazet 		const struct in6_addr *gw_addr;
18621da177e4SLinus Torvalds 		int gwa_type;
18631da177e4SLinus Torvalds 
186486872cb5SThomas Graf 		gw_addr = &cfg->fc_gateway;
1865330567b7SFlorian Westphal 		gwa_type = ipv6_addr_type(gw_addr);
186648ed7b26SFlorian Westphal 
186748ed7b26SFlorian Westphal 		/* if gw_addr is local we will fail to detect this in case
186848ed7b26SFlorian Westphal 		 * address is still TENTATIVE (DAD in progress). rt6_lookup()
186948ed7b26SFlorian Westphal 		 * will return already-added prefix route via interface that
187048ed7b26SFlorian Westphal 		 * prefix route was assigned to, which might be non-loopback.
187148ed7b26SFlorian Westphal 		 */
187248ed7b26SFlorian Westphal 		err = -EINVAL;
1873330567b7SFlorian Westphal 		if (ipv6_chk_addr_and_flags(net, gw_addr,
1874330567b7SFlorian Westphal 					    gwa_type & IPV6_ADDR_LINKLOCAL ?
1875330567b7SFlorian Westphal 					    dev : NULL, 0, 0))
187648ed7b26SFlorian Westphal 			goto out;
187748ed7b26SFlorian Westphal 
18784e3fd7a0SAlexey Dobriyan 		rt->rt6i_gateway = *gw_addr;
18791da177e4SLinus Torvalds 
18801da177e4SLinus Torvalds 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
18811da177e4SLinus Torvalds 			struct rt6_info *grt;
18821da177e4SLinus Torvalds 
18831da177e4SLinus Torvalds 			/* IPv6 strictly inhibits using not link-local
18841da177e4SLinus Torvalds 			   addresses as nexthop address.
18851da177e4SLinus Torvalds 			   Otherwise, router will not able to send redirects.
18861da177e4SLinus Torvalds 			   It is very good, but in some (rare!) circumstances
18871da177e4SLinus Torvalds 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
18881da177e4SLinus Torvalds 			   some exceptions. --ANK
18891da177e4SLinus Torvalds 			 */
18901da177e4SLinus Torvalds 			if (!(gwa_type & IPV6_ADDR_UNICAST))
18911da177e4SLinus Torvalds 				goto out;
18921da177e4SLinus Torvalds 
18935578689aSDaniel Lezcano 			grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
18941da177e4SLinus Torvalds 
18951da177e4SLinus Torvalds 			err = -EHOSTUNREACH;
189638308473SDavid S. Miller 			if (!grt)
18971da177e4SLinus Torvalds 				goto out;
18981da177e4SLinus Torvalds 			if (dev) {
1899d1918542SDavid S. Miller 				if (dev != grt->dst.dev) {
190094e187c0SAmerigo Wang 					ip6_rt_put(grt);
19011da177e4SLinus Torvalds 					goto out;
19021da177e4SLinus Torvalds 				}
19031da177e4SLinus Torvalds 			} else {
1904d1918542SDavid S. Miller 				dev = grt->dst.dev;
19051da177e4SLinus Torvalds 				idev = grt->rt6i_idev;
19061da177e4SLinus Torvalds 				dev_hold(dev);
19071da177e4SLinus Torvalds 				in6_dev_hold(grt->rt6i_idev);
19081da177e4SLinus Torvalds 			}
19091da177e4SLinus Torvalds 			if (!(grt->rt6i_flags & RTF_GATEWAY))
19101da177e4SLinus Torvalds 				err = 0;
191194e187c0SAmerigo Wang 			ip6_rt_put(grt);
19121da177e4SLinus Torvalds 
19131da177e4SLinus Torvalds 			if (err)
19141da177e4SLinus Torvalds 				goto out;
19151da177e4SLinus Torvalds 		}
19161da177e4SLinus Torvalds 		err = -EINVAL;
191738308473SDavid S. Miller 		if (!dev || (dev->flags & IFF_LOOPBACK))
19181da177e4SLinus Torvalds 			goto out;
19191da177e4SLinus Torvalds 	}
19201da177e4SLinus Torvalds 
19211da177e4SLinus Torvalds 	err = -ENODEV;
192238308473SDavid S. Miller 	if (!dev)
19231da177e4SLinus Torvalds 		goto out;
19241da177e4SLinus Torvalds 
1925c3968a85SDaniel Walter 	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1926c3968a85SDaniel Walter 		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1927c3968a85SDaniel Walter 			err = -EINVAL;
1928c3968a85SDaniel Walter 			goto out;
1929c3968a85SDaniel Walter 		}
19304e3fd7a0SAlexey Dobriyan 		rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1931c3968a85SDaniel Walter 		rt->rt6i_prefsrc.plen = 128;
1932c3968a85SDaniel Walter 	} else
1933c3968a85SDaniel Walter 		rt->rt6i_prefsrc.plen = 0;
1934c3968a85SDaniel Walter 
193586872cb5SThomas Graf 	rt->rt6i_flags = cfg->fc_flags;
19361da177e4SLinus Torvalds 
19371da177e4SLinus Torvalds install_route:
1938d8d1f30bSChangli Gao 	rt->dst.dev = dev;
19391da177e4SLinus Torvalds 	rt->rt6i_idev = idev;
1940c71099acSThomas Graf 	rt->rt6i_table = table;
194163152fc0SDaniel Lezcano 
1942c346dca1SYOSHIFUJI Hideaki 	cfg->fc_nlinfo.nl_net = dev_net(dev);
194363152fc0SDaniel Lezcano 
1944e715b6d3SFlorian Westphal 	err = ip6_convert_metrics(&mxc, cfg);
1945e715b6d3SFlorian Westphal 	if (err)
1946e715b6d3SFlorian Westphal 		goto out;
19471da177e4SLinus Torvalds 
1948e715b6d3SFlorian Westphal 	err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
1949e715b6d3SFlorian Westphal 
1950e715b6d3SFlorian Westphal 	kfree(mxc.mx);
1951e715b6d3SFlorian Westphal 	return err;
19521da177e4SLinus Torvalds out:
19531da177e4SLinus Torvalds 	if (dev)
19541da177e4SLinus Torvalds 		dev_put(dev);
19551da177e4SLinus Torvalds 	if (idev)
19561da177e4SLinus Torvalds 		in6_dev_put(idev);
19571da177e4SLinus Torvalds 	if (rt)
1958d8d1f30bSChangli Gao 		dst_free(&rt->dst);
19591da177e4SLinus Torvalds 	return err;
19601da177e4SLinus Torvalds }
19611da177e4SLinus Torvalds 
196286872cb5SThomas Graf static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
19631da177e4SLinus Torvalds {
19641da177e4SLinus Torvalds 	int err;
1965c71099acSThomas Graf 	struct fib6_table *table;
1966d1918542SDavid S. Miller 	struct net *net = dev_net(rt->dst.dev);
19671da177e4SLinus Torvalds 
19686825a26cSGao feng 	if (rt == net->ipv6.ip6_null_entry) {
19696825a26cSGao feng 		err = -ENOENT;
19706825a26cSGao feng 		goto out;
19716825a26cSGao feng 	}
19726c813a72SPatrick McHardy 
1973c71099acSThomas Graf 	table = rt->rt6i_table;
1974c71099acSThomas Graf 	write_lock_bh(&table->tb6_lock);
197586872cb5SThomas Graf 	err = fib6_del(rt, info);
1976c71099acSThomas Graf 	write_unlock_bh(&table->tb6_lock);
19771da177e4SLinus Torvalds 
19786825a26cSGao feng out:
197994e187c0SAmerigo Wang 	ip6_rt_put(rt);
19801da177e4SLinus Torvalds 	return err;
19811da177e4SLinus Torvalds }
19821da177e4SLinus Torvalds 
1983e0a1ad73SThomas Graf int ip6_del_rt(struct rt6_info *rt)
1984e0a1ad73SThomas Graf {
19854d1169c1SDenis V. Lunev 	struct nl_info info = {
1986d1918542SDavid S. Miller 		.nl_net = dev_net(rt->dst.dev),
19874d1169c1SDenis V. Lunev 	};
1988528c4cebSDenis V. Lunev 	return __ip6_del_rt(rt, &info);
1989e0a1ad73SThomas Graf }
1990e0a1ad73SThomas Graf 
199186872cb5SThomas Graf static int ip6_route_del(struct fib6_config *cfg)
19921da177e4SLinus Torvalds {
1993c71099acSThomas Graf 	struct fib6_table *table;
19941da177e4SLinus Torvalds 	struct fib6_node *fn;
19951da177e4SLinus Torvalds 	struct rt6_info *rt;
19961da177e4SLinus Torvalds 	int err = -ESRCH;
19971da177e4SLinus Torvalds 
19985578689aSDaniel Lezcano 	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
199938308473SDavid S. Miller 	if (!table)
2000c71099acSThomas Graf 		return err;
20011da177e4SLinus Torvalds 
2002c71099acSThomas Graf 	read_lock_bh(&table->tb6_lock);
2003c71099acSThomas Graf 
2004c71099acSThomas Graf 	fn = fib6_locate(&table->tb6_root,
200586872cb5SThomas Graf 			 &cfg->fc_dst, cfg->fc_dst_len,
200686872cb5SThomas Graf 			 &cfg->fc_src, cfg->fc_src_len);
20071da177e4SLinus Torvalds 
20081da177e4SLinus Torvalds 	if (fn) {
2009d8d1f30bSChangli Gao 		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
20101f56a01fSMartin KaFai Lau 			if ((rt->rt6i_flags & RTF_CACHE) &&
20111f56a01fSMartin KaFai Lau 			    !(cfg->fc_flags & RTF_CACHE))
20121f56a01fSMartin KaFai Lau 				continue;
201386872cb5SThomas Graf 			if (cfg->fc_ifindex &&
2014d1918542SDavid S. Miller 			    (!rt->dst.dev ||
2015d1918542SDavid S. Miller 			     rt->dst.dev->ifindex != cfg->fc_ifindex))
20161da177e4SLinus Torvalds 				continue;
201786872cb5SThomas Graf 			if (cfg->fc_flags & RTF_GATEWAY &&
201886872cb5SThomas Graf 			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
20191da177e4SLinus Torvalds 				continue;
202086872cb5SThomas Graf 			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
20211da177e4SLinus Torvalds 				continue;
2022d8d1f30bSChangli Gao 			dst_hold(&rt->dst);
2023c71099acSThomas Graf 			read_unlock_bh(&table->tb6_lock);
20241da177e4SLinus Torvalds 
202586872cb5SThomas Graf 			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
20261da177e4SLinus Torvalds 		}
20271da177e4SLinus Torvalds 	}
2028c71099acSThomas Graf 	read_unlock_bh(&table->tb6_lock);
20291da177e4SLinus Torvalds 
20301da177e4SLinus Torvalds 	return err;
20311da177e4SLinus Torvalds }
20321da177e4SLinus Torvalds 
20336700c270SDavid S. Miller static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
2034a6279458SYOSHIFUJI Hideaki {
2035e8599ff4SDavid S. Miller 	struct net *net = dev_net(skb->dev);
2036a6279458SYOSHIFUJI Hideaki 	struct netevent_redirect netevent;
2037e8599ff4SDavid S. Miller 	struct rt6_info *rt, *nrt = NULL;
2038e8599ff4SDavid S. Miller 	struct ndisc_options ndopts;
2039e8599ff4SDavid S. Miller 	struct inet6_dev *in6_dev;
2040e8599ff4SDavid S. Miller 	struct neighbour *neigh;
204171bcdba0SYOSHIFUJI Hideaki / 吉藤英明 	struct rd_msg *msg;
20426e157b6aSDavid S. Miller 	int optlen, on_link;
20436e157b6aSDavid S. Miller 	u8 *lladdr;
2044e8599ff4SDavid S. Miller 
204529a3cad5SSimon Horman 	optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
204671bcdba0SYOSHIFUJI Hideaki / 吉藤英明 	optlen -= sizeof(*msg);
2047e8599ff4SDavid S. Miller 
2048e8599ff4SDavid S. Miller 	if (optlen < 0) {
20496e157b6aSDavid S. Miller 		net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
2050e8599ff4SDavid S. Miller 		return;
2051e8599ff4SDavid S. Miller 	}
2052e8599ff4SDavid S. Miller 
205371bcdba0SYOSHIFUJI Hideaki / 吉藤英明 	msg = (struct rd_msg *)icmp6_hdr(skb);
2054e8599ff4SDavid S. Miller 
205571bcdba0SYOSHIFUJI Hideaki / 吉藤英明 	if (ipv6_addr_is_multicast(&msg->dest)) {
20566e157b6aSDavid S. Miller 		net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
2057e8599ff4SDavid S. Miller 		return;
2058e8599ff4SDavid S. Miller 	}
2059e8599ff4SDavid S. Miller 
20606e157b6aSDavid S. Miller 	on_link = 0;
206171bcdba0SYOSHIFUJI Hideaki / 吉藤英明 	if (ipv6_addr_equal(&msg->dest, &msg->target)) {
2062e8599ff4SDavid S. Miller 		on_link = 1;
206371bcdba0SYOSHIFUJI Hideaki / 吉藤英明 	} else if (ipv6_addr_type(&msg->target) !=
2064e8599ff4SDavid S. Miller 		   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
20656e157b6aSDavid S. Miller 		net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
2066e8599ff4SDavid S. Miller 		return;
2067e8599ff4SDavid S. Miller 	}
2068e8599ff4SDavid S. Miller 
2069e8599ff4SDavid S. Miller 	in6_dev = __in6_dev_get(skb->dev);
2070e8599ff4SDavid S. Miller 	if (!in6_dev)
2071e8599ff4SDavid S. Miller 		return;
2072e8599ff4SDavid S. Miller 	if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2073e8599ff4SDavid S. Miller 		return;
2074e8599ff4SDavid S. Miller 
2075e8599ff4SDavid S. Miller 	/* RFC2461 8.1:
2076e8599ff4SDavid S. Miller 	 *	The IP source address of the Redirect MUST be the same as the current
2077e8599ff4SDavid S. Miller 	 *	first-hop router for the specified ICMP Destination Address.
2078e8599ff4SDavid S. Miller 	 */
2079e8599ff4SDavid S. Miller 
208071bcdba0SYOSHIFUJI Hideaki / 吉藤英明 	if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
2081e8599ff4SDavid S. Miller 		net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2082e8599ff4SDavid S. Miller 		return;
2083e8599ff4SDavid S. Miller 	}
20846e157b6aSDavid S. Miller 
20856e157b6aSDavid S. Miller 	lladdr = NULL;
2086e8599ff4SDavid S. Miller 	if (ndopts.nd_opts_tgt_lladdr) {
2087e8599ff4SDavid S. Miller 		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2088e8599ff4SDavid S. Miller 					     skb->dev);
2089e8599ff4SDavid S. Miller 		if (!lladdr) {
2090e8599ff4SDavid S. Miller 			net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2091e8599ff4SDavid S. Miller 			return;
2092e8599ff4SDavid S. Miller 		}
2093e8599ff4SDavid S. Miller 	}
2094e8599ff4SDavid S. Miller 
20956e157b6aSDavid S. Miller 	rt = (struct rt6_info *) dst;
20966e157b6aSDavid S. Miller 	if (rt == net->ipv6.ip6_null_entry) {
20976e157b6aSDavid S. Miller 		net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
20986e157b6aSDavid S. Miller 		return;
20996e157b6aSDavid S. Miller 	}
21006e157b6aSDavid S. Miller 
21016e157b6aSDavid S. Miller 	/* Redirect received -> path was valid.
21026e157b6aSDavid S. Miller 	 * Look, redirects are sent only in response to data packets,
21036e157b6aSDavid S. Miller 	 * so that this nexthop apparently is reachable. --ANK
21046e157b6aSDavid S. Miller 	 */
21056e157b6aSDavid S. Miller 	dst_confirm(&rt->dst);
21066e157b6aSDavid S. Miller 
210771bcdba0SYOSHIFUJI Hideaki / 吉藤英明 	neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
2108e8599ff4SDavid S. Miller 	if (!neigh)
2109e8599ff4SDavid S. Miller 		return;
2110e8599ff4SDavid S. Miller 
21111da177e4SLinus Torvalds 	/*
21121da177e4SLinus Torvalds 	 *	We have finally decided to accept it.
21131da177e4SLinus Torvalds 	 */
21141da177e4SLinus Torvalds 
21151da177e4SLinus Torvalds 	neigh_update(neigh, lladdr, NUD_STALE,
21161da177e4SLinus Torvalds 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
21171da177e4SLinus Torvalds 		     NEIGH_UPDATE_F_OVERRIDE|
21181da177e4SLinus Torvalds 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
21191da177e4SLinus Torvalds 				     NEIGH_UPDATE_F_ISROUTER))
21201da177e4SLinus Torvalds 		     );
21211da177e4SLinus Torvalds 
212283a09abdSMartin KaFai Lau 	nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
212338308473SDavid S. Miller 	if (!nrt)
21241da177e4SLinus Torvalds 		goto out;
21251da177e4SLinus Torvalds 
21261da177e4SLinus Torvalds 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
21271da177e4SLinus Torvalds 	if (on_link)
21281da177e4SLinus Torvalds 		nrt->rt6i_flags &= ~RTF_GATEWAY;
21291da177e4SLinus Torvalds 
21304e3fd7a0SAlexey Dobriyan 	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
21311da177e4SLinus Torvalds 
213240e22e8fSThomas Graf 	if (ip6_ins_rt(nrt))
21331da177e4SLinus Torvalds 		goto out;
21341da177e4SLinus Torvalds 
2135d8d1f30bSChangli Gao 	netevent.old = &rt->dst;
2136d8d1f30bSChangli Gao 	netevent.new = &nrt->dst;
213771bcdba0SYOSHIFUJI Hideaki / 吉藤英明 	netevent.daddr = &msg->dest;
213860592833SYOSHIFUJI Hideaki / 吉藤英明 	netevent.neigh = neigh;
21398d71740cSTom Tucker 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
21408d71740cSTom Tucker 
21411da177e4SLinus Torvalds 	if (rt->rt6i_flags & RTF_CACHE) {
21426e157b6aSDavid S. Miller 		rt = (struct rt6_info *) dst_clone(&rt->dst);
2143e0a1ad73SThomas Graf 		ip6_del_rt(rt);
21441da177e4SLinus Torvalds 	}
21451da177e4SLinus Torvalds 
21461da177e4SLinus Torvalds out:
2147e8599ff4SDavid S. Miller 	neigh_release(neigh);
21486e157b6aSDavid S. Miller }
21496e157b6aSDavid S. Miller 
21501da177e4SLinus Torvalds /*
21511da177e4SLinus Torvalds  *	Misc support functions
21521da177e4SLinus Torvalds  */
21531da177e4SLinus Torvalds 
21544b32b5adSMartin KaFai Lau static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
21554b32b5adSMartin KaFai Lau {
21564b32b5adSMartin KaFai Lau 	BUG_ON(from->dst.from);
21574b32b5adSMartin KaFai Lau 
21584b32b5adSMartin KaFai Lau 	rt->rt6i_flags &= ~RTF_EXPIRES;
21594b32b5adSMartin KaFai Lau 	dst_hold(&from->dst);
21604b32b5adSMartin KaFai Lau 	rt->dst.from = &from->dst;
21614b32b5adSMartin KaFai Lau 	dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
21624b32b5adSMartin KaFai Lau }
21634b32b5adSMartin KaFai Lau 
216483a09abdSMartin KaFai Lau static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
21651da177e4SLinus Torvalds {
2166d8d1f30bSChangli Gao 	rt->dst.input = ort->dst.input;
2167d8d1f30bSChangli Gao 	rt->dst.output = ort->dst.output;
216883a09abdSMartin KaFai Lau 	rt->rt6i_dst = ort->rt6i_dst;
2169d8d1f30bSChangli Gao 	rt->dst.error = ort->dst.error;
21701da177e4SLinus Torvalds 	rt->rt6i_idev = ort->rt6i_idev;
21711da177e4SLinus Torvalds 	if (rt->rt6i_idev)
21721da177e4SLinus Torvalds 		in6_dev_hold(rt->rt6i_idev);
2173d8d1f30bSChangli Gao 	rt->dst.lastuse = jiffies;
21744e3fd7a0SAlexey Dobriyan 	rt->rt6i_gateway = ort->rt6i_gateway;
21751716a961SGao feng 	rt->rt6i_flags = ort->rt6i_flags;
21761716a961SGao feng 	rt6_set_from(rt, ort);
217783a09abdSMartin KaFai Lau 	rt->rt6i_metric = ort->rt6i_metric;
21781da177e4SLinus Torvalds #ifdef CONFIG_IPV6_SUBTREES
217983a09abdSMartin KaFai Lau 	rt->rt6i_src = ort->rt6i_src;
21801da177e4SLinus Torvalds #endif
218183a09abdSMartin KaFai Lau 	rt->rt6i_prefsrc = ort->rt6i_prefsrc;
2182c71099acSThomas Graf 	rt->rt6i_table = ort->rt6i_table;
21831da177e4SLinus Torvalds }
21841da177e4SLinus Torvalds 
218570ceb4f5SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTE_INFO
2186efa2cea0SDaniel Lezcano static struct rt6_info *rt6_get_route_info(struct net *net,
2187b71d1d42SEric Dumazet 					   const struct in6_addr *prefix, int prefixlen,
2188b71d1d42SEric Dumazet 					   const struct in6_addr *gwaddr, int ifindex)
218970ceb4f5SYOSHIFUJI Hideaki {
219070ceb4f5SYOSHIFUJI Hideaki 	struct fib6_node *fn;
219170ceb4f5SYOSHIFUJI Hideaki 	struct rt6_info *rt = NULL;
2192c71099acSThomas Graf 	struct fib6_table *table;
219370ceb4f5SYOSHIFUJI Hideaki 
2194efa2cea0SDaniel Lezcano 	table = fib6_get_table(net, RT6_TABLE_INFO);
219538308473SDavid S. Miller 	if (!table)
2196c71099acSThomas Graf 		return NULL;
2197c71099acSThomas Graf 
21985744dd9bSLi RongQing 	read_lock_bh(&table->tb6_lock);
2199c71099acSThomas Graf 	fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
220070ceb4f5SYOSHIFUJI Hideaki 	if (!fn)
220170ceb4f5SYOSHIFUJI Hideaki 		goto out;
220270ceb4f5SYOSHIFUJI Hideaki 
2203d8d1f30bSChangli Gao 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2204d1918542SDavid S. Miller 		if (rt->dst.dev->ifindex != ifindex)
220570ceb4f5SYOSHIFUJI Hideaki 			continue;
220670ceb4f5SYOSHIFUJI Hideaki 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
220770ceb4f5SYOSHIFUJI Hideaki 			continue;
220870ceb4f5SYOSHIFUJI Hideaki 		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
220970ceb4f5SYOSHIFUJI Hideaki 			continue;
2210d8d1f30bSChangli Gao 		dst_hold(&rt->dst);
221170ceb4f5SYOSHIFUJI Hideaki 		break;
221270ceb4f5SYOSHIFUJI Hideaki 	}
221370ceb4f5SYOSHIFUJI Hideaki out:
22145744dd9bSLi RongQing 	read_unlock_bh(&table->tb6_lock);
221570ceb4f5SYOSHIFUJI Hideaki 	return rt;
221670ceb4f5SYOSHIFUJI Hideaki }
221770ceb4f5SYOSHIFUJI Hideaki 
2218efa2cea0SDaniel Lezcano static struct rt6_info *rt6_add_route_info(struct net *net,
2219b71d1d42SEric Dumazet 					   const struct in6_addr *prefix, int prefixlen,
2220b71d1d42SEric Dumazet 					   const struct in6_addr *gwaddr, int ifindex,
222195c96174SEric Dumazet 					   unsigned int pref)
222270ceb4f5SYOSHIFUJI Hideaki {
222386872cb5SThomas Graf 	struct fib6_config cfg = {
222486872cb5SThomas Graf 		.fc_table	= RT6_TABLE_INFO,
2225238fc7eaSRami Rosen 		.fc_metric	= IP6_RT_PRIO_USER,
222686872cb5SThomas Graf 		.fc_ifindex	= ifindex,
222786872cb5SThomas Graf 		.fc_dst_len	= prefixlen,
222886872cb5SThomas Graf 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
222986872cb5SThomas Graf 				  RTF_UP | RTF_PREF(pref),
223015e47304SEric W. Biederman 		.fc_nlinfo.portid = 0,
2231efa2cea0SDaniel Lezcano 		.fc_nlinfo.nlh = NULL,
2232efa2cea0SDaniel Lezcano 		.fc_nlinfo.nl_net = net,
223386872cb5SThomas Graf 	};
223470ceb4f5SYOSHIFUJI Hideaki 
22354e3fd7a0SAlexey Dobriyan 	cfg.fc_dst = *prefix;
22364e3fd7a0SAlexey Dobriyan 	cfg.fc_gateway = *gwaddr;
223786872cb5SThomas Graf 
2238e317da96SYOSHIFUJI Hideaki 	/* We should treat it as a default route if prefix length is 0. */
2239e317da96SYOSHIFUJI Hideaki 	if (!prefixlen)
224086872cb5SThomas Graf 		cfg.fc_flags |= RTF_DEFAULT;
224170ceb4f5SYOSHIFUJI Hideaki 
224286872cb5SThomas Graf 	ip6_route_add(&cfg);
224370ceb4f5SYOSHIFUJI Hideaki 
2244efa2cea0SDaniel Lezcano 	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
224570ceb4f5SYOSHIFUJI Hideaki }
224670ceb4f5SYOSHIFUJI Hideaki #endif
224770ceb4f5SYOSHIFUJI Hideaki 
2248b71d1d42SEric Dumazet struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
22491da177e4SLinus Torvalds {
22501da177e4SLinus Torvalds 	struct rt6_info *rt;
2251c71099acSThomas Graf 	struct fib6_table *table;
22521da177e4SLinus Torvalds 
2253c346dca1SYOSHIFUJI Hideaki 	table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
225438308473SDavid S. Miller 	if (!table)
2255c71099acSThomas Graf 		return NULL;
22561da177e4SLinus Torvalds 
22575744dd9bSLi RongQing 	read_lock_bh(&table->tb6_lock);
2258d8d1f30bSChangli Gao 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2259d1918542SDavid S. Miller 		if (dev == rt->dst.dev &&
2260045927ffSYOSHIFUJI Hideaki 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
22611da177e4SLinus Torvalds 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
22621da177e4SLinus Torvalds 			break;
22631da177e4SLinus Torvalds 	}
22641da177e4SLinus Torvalds 	if (rt)
2265d8d1f30bSChangli Gao 		dst_hold(&rt->dst);
22665744dd9bSLi RongQing 	read_unlock_bh(&table->tb6_lock);
22671da177e4SLinus Torvalds 	return rt;
22681da177e4SLinus Torvalds }
22691da177e4SLinus Torvalds 
2270b71d1d42SEric Dumazet struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
2271ebacaaa0SYOSHIFUJI Hideaki 				     struct net_device *dev,
2272ebacaaa0SYOSHIFUJI Hideaki 				     unsigned int pref)
22731da177e4SLinus Torvalds {
227486872cb5SThomas Graf 	struct fib6_config cfg = {
227586872cb5SThomas Graf 		.fc_table	= RT6_TABLE_DFLT,
2276238fc7eaSRami Rosen 		.fc_metric	= IP6_RT_PRIO_USER,
227786872cb5SThomas Graf 		.fc_ifindex	= dev->ifindex,
227886872cb5SThomas Graf 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
227986872cb5SThomas Graf 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
228015e47304SEric W. Biederman 		.fc_nlinfo.portid = 0,
22815578689aSDaniel Lezcano 		.fc_nlinfo.nlh = NULL,
2282c346dca1SYOSHIFUJI Hideaki 		.fc_nlinfo.nl_net = dev_net(dev),
228386872cb5SThomas Graf 	};
22841da177e4SLinus Torvalds 
22854e3fd7a0SAlexey Dobriyan 	cfg.fc_gateway = *gwaddr;
22861da177e4SLinus Torvalds 
228786872cb5SThomas Graf 	ip6_route_add(&cfg);
22881da177e4SLinus Torvalds 
22891da177e4SLinus Torvalds 	return rt6_get_dflt_router(gwaddr, dev);
22901da177e4SLinus Torvalds }
22911da177e4SLinus Torvalds 
22927b4da532SDaniel Lezcano void rt6_purge_dflt_routers(struct net *net)
22931da177e4SLinus Torvalds {
22941da177e4SLinus Torvalds 	struct rt6_info *rt;
2295c71099acSThomas Graf 	struct fib6_table *table;
2296c71099acSThomas Graf 
2297c71099acSThomas Graf 	/* NOTE: Keep consistent with rt6_get_dflt_router */
22987b4da532SDaniel Lezcano 	table = fib6_get_table(net, RT6_TABLE_DFLT);
229938308473SDavid S. Miller 	if (!table)
2300c71099acSThomas Graf 		return;
23011da177e4SLinus Torvalds 
23021da177e4SLinus Torvalds restart:
2303c71099acSThomas Graf 	read_lock_bh(&table->tb6_lock);
2304d8d1f30bSChangli Gao 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
23053e8b0ac3SLorenzo Colitti 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
23063e8b0ac3SLorenzo Colitti 		    (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
2307d8d1f30bSChangli Gao 			dst_hold(&rt->dst);
2308c71099acSThomas Graf 			read_unlock_bh(&table->tb6_lock);
2309e0a1ad73SThomas Graf 			ip6_del_rt(rt);
23101da177e4SLinus Torvalds 			goto restart;
23111da177e4SLinus Torvalds 		}
23121da177e4SLinus Torvalds 	}
2313c71099acSThomas Graf 	read_unlock_bh(&table->tb6_lock);
23141da177e4SLinus Torvalds }
23151da177e4SLinus Torvalds 
23165578689aSDaniel Lezcano static void rtmsg_to_fib6_config(struct net *net,
23175578689aSDaniel Lezcano 				 struct in6_rtmsg *rtmsg,
231886872cb5SThomas Graf 				 struct fib6_config *cfg)
231986872cb5SThomas Graf {
232086872cb5SThomas Graf 	memset(cfg, 0, sizeof(*cfg));
232186872cb5SThomas Graf 
232286872cb5SThomas Graf 	cfg->fc_table = RT6_TABLE_MAIN;
232386872cb5SThomas Graf 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
232486872cb5SThomas Graf 	cfg->fc_metric = rtmsg->rtmsg_metric;
232586872cb5SThomas Graf 	cfg->fc_expires = rtmsg->rtmsg_info;
232686872cb5SThomas Graf 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
232786872cb5SThomas Graf 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
232886872cb5SThomas Graf 	cfg->fc_flags = rtmsg->rtmsg_flags;
232986872cb5SThomas Graf 
23305578689aSDaniel Lezcano 	cfg->fc_nlinfo.nl_net = net;
2331f1243c2dSBenjamin Thery 
23324e3fd7a0SAlexey Dobriyan 	cfg->fc_dst = rtmsg->rtmsg_dst;
23334e3fd7a0SAlexey Dobriyan 	cfg->fc_src = rtmsg->rtmsg_src;
23344e3fd7a0SAlexey Dobriyan 	cfg->fc_gateway = rtmsg->rtmsg_gateway;
233586872cb5SThomas Graf }
233686872cb5SThomas Graf 
23375578689aSDaniel Lezcano int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
23381da177e4SLinus Torvalds {
233986872cb5SThomas Graf 	struct fib6_config cfg;
23401da177e4SLinus Torvalds 	struct in6_rtmsg rtmsg;
23411da177e4SLinus Torvalds 	int err;
23421da177e4SLinus Torvalds 
23431da177e4SLinus Torvalds 	switch (cmd) {
23441da177e4SLinus Torvalds 	case SIOCADDRT:		/* Add a route */
23451da177e4SLinus Torvalds 	case SIOCDELRT:		/* Delete a route */
2346af31f412SEric W. Biederman 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
23471da177e4SLinus Torvalds 			return -EPERM;
23481da177e4SLinus Torvalds 		err = copy_from_user(&rtmsg, arg,
23491da177e4SLinus Torvalds 				     sizeof(struct in6_rtmsg));
23501da177e4SLinus Torvalds 		if (err)
23511da177e4SLinus Torvalds 			return -EFAULT;
23521da177e4SLinus Torvalds 
23535578689aSDaniel Lezcano 		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
235486872cb5SThomas Graf 
23551da177e4SLinus Torvalds 		rtnl_lock();
23561da177e4SLinus Torvalds 		switch (cmd) {
23571da177e4SLinus Torvalds 		case SIOCADDRT:
235886872cb5SThomas Graf 			err = ip6_route_add(&cfg);
23591da177e4SLinus Torvalds 			break;
23601da177e4SLinus Torvalds 		case SIOCDELRT:
236186872cb5SThomas Graf 			err = ip6_route_del(&cfg);
23621da177e4SLinus Torvalds 			break;
23631da177e4SLinus Torvalds 		default:
23641da177e4SLinus Torvalds 			err = -EINVAL;
23651da177e4SLinus Torvalds 		}
23661da177e4SLinus Torvalds 		rtnl_unlock();
23671da177e4SLinus Torvalds 
23681da177e4SLinus Torvalds 		return err;
23693ff50b79SStephen Hemminger 	}
23701da177e4SLinus Torvalds 
23711da177e4SLinus Torvalds 	return -EINVAL;
23721da177e4SLinus Torvalds }
23731da177e4SLinus Torvalds 
23741da177e4SLinus Torvalds /*
23751da177e4SLinus Torvalds  *	Drop the packet on the floor
23761da177e4SLinus Torvalds  */
23771da177e4SLinus Torvalds 
2378d5fdd6baSBrian Haley static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
23791da177e4SLinus Torvalds {
2380612f09e8SYOSHIFUJI Hideaki 	int type;
2381adf30907SEric Dumazet 	struct dst_entry *dst = skb_dst(skb);
2382612f09e8SYOSHIFUJI Hideaki 	switch (ipstats_mib_noroutes) {
2383612f09e8SYOSHIFUJI Hideaki 	case IPSTATS_MIB_INNOROUTES:
23840660e03fSArnaldo Carvalho de Melo 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
238545bb0060SUlrich Weber 		if (type == IPV6_ADDR_ANY) {
23863bd653c8SDenis V. Lunev 			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
23873bd653c8SDenis V. Lunev 				      IPSTATS_MIB_INADDRERRORS);
2388612f09e8SYOSHIFUJI Hideaki 			break;
2389612f09e8SYOSHIFUJI Hideaki 		}
2390612f09e8SYOSHIFUJI Hideaki 		/* FALLTHROUGH */
2391612f09e8SYOSHIFUJI Hideaki 	case IPSTATS_MIB_OUTNOROUTES:
23923bd653c8SDenis V. Lunev 		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
23933bd653c8SDenis V. Lunev 			      ipstats_mib_noroutes);
2394612f09e8SYOSHIFUJI Hideaki 		break;
2395612f09e8SYOSHIFUJI Hideaki 	}
23963ffe533cSAlexey Dobriyan 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
23971da177e4SLinus Torvalds 	kfree_skb(skb);
23981da177e4SLinus Torvalds 	return 0;
23991da177e4SLinus Torvalds }
24001da177e4SLinus Torvalds 
24019ce8ade0SThomas Graf static int ip6_pkt_discard(struct sk_buff *skb)
24029ce8ade0SThomas Graf {
2403612f09e8SYOSHIFUJI Hideaki 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
24049ce8ade0SThomas Graf }
24059ce8ade0SThomas Graf 
2406aad88724SEric Dumazet static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb)
24071da177e4SLinus Torvalds {
2408adf30907SEric Dumazet 	skb->dev = skb_dst(skb)->dev;
2409612f09e8SYOSHIFUJI Hideaki 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
24101da177e4SLinus Torvalds }
24111da177e4SLinus Torvalds 
24129ce8ade0SThomas Graf static int ip6_pkt_prohibit(struct sk_buff *skb)
24139ce8ade0SThomas Graf {
2414612f09e8SYOSHIFUJI Hideaki 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
24159ce8ade0SThomas Graf }
24169ce8ade0SThomas Graf 
2417aad88724SEric Dumazet static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb)
24189ce8ade0SThomas Graf {
2419adf30907SEric Dumazet 	skb->dev = skb_dst(skb)->dev;
2420612f09e8SYOSHIFUJI Hideaki 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
24219ce8ade0SThomas Graf }
24229ce8ade0SThomas Graf 
24231da177e4SLinus Torvalds /*
24241da177e4SLinus Torvalds  *	Allocate a dst for local (unicast / anycast) address.
24251da177e4SLinus Torvalds  */
24261da177e4SLinus Torvalds 
24271da177e4SLinus Torvalds struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
24281da177e4SLinus Torvalds 				    const struct in6_addr *addr,
24298f031519SDavid S. Miller 				    bool anycast)
24301da177e4SLinus Torvalds {
2431c346dca1SYOSHIFUJI Hideaki 	struct net *net = dev_net(idev->dev);
2432a3300ef4SHannes Frederic Sowa 	struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
2433ad706862SMartin KaFai Lau 					    DST_NOCOUNT);
2434a3300ef4SHannes Frederic Sowa 	if (!rt)
24351da177e4SLinus Torvalds 		return ERR_PTR(-ENOMEM);
24361da177e4SLinus Torvalds 
24371da177e4SLinus Torvalds 	in6_dev_hold(idev);
24381da177e4SLinus Torvalds 
243911d53b49SDavid S. Miller 	rt->dst.flags |= DST_HOST;
2440d8d1f30bSChangli Gao 	rt->dst.input = ip6_input;
2441d8d1f30bSChangli Gao 	rt->dst.output = ip6_output;
24421da177e4SLinus Torvalds 	rt->rt6i_idev = idev;
24431da177e4SLinus Torvalds 
24441da177e4SLinus Torvalds 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
244558c4fb86SYOSHIFUJI Hideaki 	if (anycast)
244658c4fb86SYOSHIFUJI Hideaki 		rt->rt6i_flags |= RTF_ANYCAST;
244758c4fb86SYOSHIFUJI Hideaki 	else
24481da177e4SLinus Torvalds 		rt->rt6i_flags |= RTF_LOCAL;
24491da177e4SLinus Torvalds 
2450550bab42SJulian Anastasov 	rt->rt6i_gateway  = *addr;
24514e3fd7a0SAlexey Dobriyan 	rt->rt6i_dst.addr = *addr;
24521da177e4SLinus Torvalds 	rt->rt6i_dst.plen = 128;
24535578689aSDaniel Lezcano 	rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
24541da177e4SLinus Torvalds 
2455d8d1f30bSChangli Gao 	atomic_set(&rt->dst.__refcnt, 1);
24561da177e4SLinus Torvalds 
24571da177e4SLinus Torvalds 	return rt;
24581da177e4SLinus Torvalds }
24591da177e4SLinus Torvalds 
2460c3968a85SDaniel Walter int ip6_route_get_saddr(struct net *net,
2461c3968a85SDaniel Walter 			struct rt6_info *rt,
2462b71d1d42SEric Dumazet 			const struct in6_addr *daddr,
2463c3968a85SDaniel Walter 			unsigned int prefs,
2464c3968a85SDaniel Walter 			struct in6_addr *saddr)
2465c3968a85SDaniel Walter {
2466e16e888bSMarkus Stenberg 	struct inet6_dev *idev =
2467e16e888bSMarkus Stenberg 		rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
2468c3968a85SDaniel Walter 	int err = 0;
2469e16e888bSMarkus Stenberg 	if (rt && rt->rt6i_prefsrc.plen)
24704e3fd7a0SAlexey Dobriyan 		*saddr = rt->rt6i_prefsrc.addr;
2471c3968a85SDaniel Walter 	else
2472c3968a85SDaniel Walter 		err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2473c3968a85SDaniel Walter 					 daddr, prefs, saddr);
2474c3968a85SDaniel Walter 	return err;
2475c3968a85SDaniel Walter }
2476c3968a85SDaniel Walter 
2477c3968a85SDaniel Walter /* remove deleted ip from prefsrc entries */
2478c3968a85SDaniel Walter struct arg_dev_net_ip {
2479c3968a85SDaniel Walter 	struct net_device *dev;
2480c3968a85SDaniel Walter 	struct net *net;
2481c3968a85SDaniel Walter 	struct in6_addr *addr;
2482c3968a85SDaniel Walter };
2483c3968a85SDaniel Walter 
2484c3968a85SDaniel Walter static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2485c3968a85SDaniel Walter {
2486c3968a85SDaniel Walter 	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2487c3968a85SDaniel Walter 	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2488c3968a85SDaniel Walter 	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2489c3968a85SDaniel Walter 
2490d1918542SDavid S. Miller 	if (((void *)rt->dst.dev == dev || !dev) &&
2491c3968a85SDaniel Walter 	    rt != net->ipv6.ip6_null_entry &&
2492c3968a85SDaniel Walter 	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2493c3968a85SDaniel Walter 		/* remove prefsrc entry */
2494c3968a85SDaniel Walter 		rt->rt6i_prefsrc.plen = 0;
2495c3968a85SDaniel Walter 	}
2496c3968a85SDaniel Walter 	return 0;
2497c3968a85SDaniel Walter }
2498c3968a85SDaniel Walter 
2499c3968a85SDaniel Walter void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2500c3968a85SDaniel Walter {
2501c3968a85SDaniel Walter 	struct net *net = dev_net(ifp->idev->dev);
2502c3968a85SDaniel Walter 	struct arg_dev_net_ip adni = {
2503c3968a85SDaniel Walter 		.dev = ifp->idev->dev,
2504c3968a85SDaniel Walter 		.net = net,
2505c3968a85SDaniel Walter 		.addr = &ifp->addr,
2506c3968a85SDaniel Walter 	};
25070c3584d5SLi RongQing 	fib6_clean_all(net, fib6_remove_prefsrc, &adni);
2508c3968a85SDaniel Walter }
2509c3968a85SDaniel Walter 
2510be7a010dSDuan Jiong #define RTF_RA_ROUTER		(RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2511be7a010dSDuan Jiong #define RTF_CACHE_GATEWAY	(RTF_GATEWAY | RTF_CACHE)
2512be7a010dSDuan Jiong 
2513be7a010dSDuan Jiong /* Remove routers and update dst entries when gateway turn into host. */
2514be7a010dSDuan Jiong static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2515be7a010dSDuan Jiong {
2516be7a010dSDuan Jiong 	struct in6_addr *gateway = (struct in6_addr *)arg;
2517be7a010dSDuan Jiong 
2518be7a010dSDuan Jiong 	if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2519be7a010dSDuan Jiong 	     ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2520be7a010dSDuan Jiong 	     ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2521be7a010dSDuan Jiong 		return -1;
2522be7a010dSDuan Jiong 	}
2523be7a010dSDuan Jiong 	return 0;
2524be7a010dSDuan Jiong }
2525be7a010dSDuan Jiong 
2526be7a010dSDuan Jiong void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2527be7a010dSDuan Jiong {
2528be7a010dSDuan Jiong 	fib6_clean_all(net, fib6_clean_tohost, gateway);
2529be7a010dSDuan Jiong }
2530be7a010dSDuan Jiong 
25318ed67789SDaniel Lezcano struct arg_dev_net {
25328ed67789SDaniel Lezcano 	struct net_device *dev;
25338ed67789SDaniel Lezcano 	struct net *net;
25348ed67789SDaniel Lezcano };
25358ed67789SDaniel Lezcano 
25361da177e4SLinus Torvalds static int fib6_ifdown(struct rt6_info *rt, void *arg)
25371da177e4SLinus Torvalds {
2538bc3ef660Sstephen hemminger 	const struct arg_dev_net *adn = arg;
2539bc3ef660Sstephen hemminger 	const struct net_device *dev = adn->dev;
25408ed67789SDaniel Lezcano 
2541d1918542SDavid S. Miller 	if ((rt->dst.dev == dev || !dev) &&
2542c159d30cSDavid S. Miller 	    rt != adn->net->ipv6.ip6_null_entry)
25431da177e4SLinus Torvalds 		return -1;
2544c159d30cSDavid S. Miller 
25451da177e4SLinus Torvalds 	return 0;
25461da177e4SLinus Torvalds }
25471da177e4SLinus Torvalds 
2548f3db4851SDaniel Lezcano void rt6_ifdown(struct net *net, struct net_device *dev)
25491da177e4SLinus Torvalds {
25508ed67789SDaniel Lezcano 	struct arg_dev_net adn = {
25518ed67789SDaniel Lezcano 		.dev = dev,
25528ed67789SDaniel Lezcano 		.net = net,
25538ed67789SDaniel Lezcano 	};
25548ed67789SDaniel Lezcano 
25550c3584d5SLi RongQing 	fib6_clean_all(net, fib6_ifdown, &adn);
25561e493d19SDavid S. Miller 	icmp6_clean_all(fib6_ifdown, &adn);
25578d0b94afSMartin KaFai Lau 	rt6_uncached_list_flush_dev(net, dev);
25581da177e4SLinus Torvalds }
25591da177e4SLinus Torvalds 
256095c96174SEric Dumazet struct rt6_mtu_change_arg {
25611da177e4SLinus Torvalds 	struct net_device *dev;
256295c96174SEric Dumazet 	unsigned int mtu;
25631da177e4SLinus Torvalds };
25641da177e4SLinus Torvalds 
25651da177e4SLinus Torvalds static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
25661da177e4SLinus Torvalds {
25671da177e4SLinus Torvalds 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
25681da177e4SLinus Torvalds 	struct inet6_dev *idev;
25691da177e4SLinus Torvalds 
25701da177e4SLinus Torvalds 	/* In IPv6 pmtu discovery is not optional,
25711da177e4SLinus Torvalds 	   so that RTAX_MTU lock cannot disable it.
25721da177e4SLinus Torvalds 	   We still use this lock to block changes
25731da177e4SLinus Torvalds 	   caused by addrconf/ndisc.
25741da177e4SLinus Torvalds 	*/
25751da177e4SLinus Torvalds 
25761da177e4SLinus Torvalds 	idev = __in6_dev_get(arg->dev);
257738308473SDavid S. Miller 	if (!idev)
25781da177e4SLinus Torvalds 		return 0;
25791da177e4SLinus Torvalds 
25801da177e4SLinus Torvalds 	/* For administrative MTU increase, there is no way to discover
25811da177e4SLinus Torvalds 	   IPv6 PMTU increase, so PMTU increase should be updated here.
25821da177e4SLinus Torvalds 	   Since RFC 1981 doesn't include administrative MTU increase
25831da177e4SLinus Torvalds 	   update PMTU increase is a MUST. (i.e. jumbo frame)
25841da177e4SLinus Torvalds 	 */
25851da177e4SLinus Torvalds 	/*
25861da177e4SLinus Torvalds 	   If new MTU is less than route PMTU, this new MTU will be the
25871da177e4SLinus Torvalds 	   lowest MTU in the path, update the route PMTU to reflect PMTU
25881da177e4SLinus Torvalds 	   decreases; if new MTU is greater than route PMTU, and the
25891da177e4SLinus Torvalds 	   old MTU is the lowest MTU in the path, update the route PMTU
25901da177e4SLinus Torvalds 	   to reflect the increase. In this case if the other nodes' MTU
25911da177e4SLinus Torvalds 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
25921da177e4SLinus Torvalds 	   PMTU discouvery.
25931da177e4SLinus Torvalds 	 */
2594d1918542SDavid S. Miller 	if (rt->dst.dev == arg->dev &&
25954b32b5adSMartin KaFai Lau 	    !dst_metric_locked(&rt->dst, RTAX_MTU)) {
25964b32b5adSMartin KaFai Lau 		if (rt->rt6i_flags & RTF_CACHE) {
25974b32b5adSMartin KaFai Lau 			/* For RTF_CACHE with rt6i_pmtu == 0
25984b32b5adSMartin KaFai Lau 			 * (i.e. a redirected route),
25994b32b5adSMartin KaFai Lau 			 * the metrics of its rt->dst.from has already
26004b32b5adSMartin KaFai Lau 			 * been updated.
26014b32b5adSMartin KaFai Lau 			 */
26024b32b5adSMartin KaFai Lau 			if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
26034b32b5adSMartin KaFai Lau 				rt->rt6i_pmtu = arg->mtu;
26044b32b5adSMartin KaFai Lau 		} else if (dst_mtu(&rt->dst) >= arg->mtu ||
2605d8d1f30bSChangli Gao 			   (dst_mtu(&rt->dst) < arg->mtu &&
26064b32b5adSMartin KaFai Lau 			    dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2607defb3519SDavid S. Miller 			dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2608566cfd8fSSimon Arlott 		}
26094b32b5adSMartin KaFai Lau 	}
26101da177e4SLinus Torvalds 	return 0;
26111da177e4SLinus Torvalds }
26121da177e4SLinus Torvalds 
261395c96174SEric Dumazet void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
26141da177e4SLinus Torvalds {
2615c71099acSThomas Graf 	struct rt6_mtu_change_arg arg = {
2616c71099acSThomas Graf 		.dev = dev,
2617c71099acSThomas Graf 		.mtu = mtu,
2618c71099acSThomas Graf 	};
26191da177e4SLinus Torvalds 
26200c3584d5SLi RongQing 	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
26211da177e4SLinus Torvalds }
26221da177e4SLinus Torvalds 
2623ef7c79edSPatrick McHardy static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
26245176f91eSThomas Graf 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
262586872cb5SThomas Graf 	[RTA_OIF]               = { .type = NLA_U32 },
2626ab364a6fSThomas Graf 	[RTA_IIF]		= { .type = NLA_U32 },
262786872cb5SThomas Graf 	[RTA_PRIORITY]          = { .type = NLA_U32 },
262886872cb5SThomas Graf 	[RTA_METRICS]           = { .type = NLA_NESTED },
262951ebd318SNicolas Dichtel 	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
2630c78ba6d6SLubomir Rintel 	[RTA_PREF]              = { .type = NLA_U8 },
263186872cb5SThomas Graf };
263286872cb5SThomas Graf 
263386872cb5SThomas Graf static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
263486872cb5SThomas Graf 			      struct fib6_config *cfg)
26351da177e4SLinus Torvalds {
263686872cb5SThomas Graf 	struct rtmsg *rtm;
263786872cb5SThomas Graf 	struct nlattr *tb[RTA_MAX+1];
2638c78ba6d6SLubomir Rintel 	unsigned int pref;
263986872cb5SThomas Graf 	int err;
26401da177e4SLinus Torvalds 
264186872cb5SThomas Graf 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
264286872cb5SThomas Graf 	if (err < 0)
264386872cb5SThomas Graf 		goto errout;
26441da177e4SLinus Torvalds 
264586872cb5SThomas Graf 	err = -EINVAL;
264686872cb5SThomas Graf 	rtm = nlmsg_data(nlh);
264786872cb5SThomas Graf 	memset(cfg, 0, sizeof(*cfg));
264886872cb5SThomas Graf 
264986872cb5SThomas Graf 	cfg->fc_table = rtm->rtm_table;
265086872cb5SThomas Graf 	cfg->fc_dst_len = rtm->rtm_dst_len;
265186872cb5SThomas Graf 	cfg->fc_src_len = rtm->rtm_src_len;
265286872cb5SThomas Graf 	cfg->fc_flags = RTF_UP;
265386872cb5SThomas Graf 	cfg->fc_protocol = rtm->rtm_protocol;
2654ef2c7d7bSNicolas Dichtel 	cfg->fc_type = rtm->rtm_type;
265586872cb5SThomas Graf 
2656ef2c7d7bSNicolas Dichtel 	if (rtm->rtm_type == RTN_UNREACHABLE ||
2657ef2c7d7bSNicolas Dichtel 	    rtm->rtm_type == RTN_BLACKHOLE ||
2658b4949ab2SNicolas Dichtel 	    rtm->rtm_type == RTN_PROHIBIT ||
2659b4949ab2SNicolas Dichtel 	    rtm->rtm_type == RTN_THROW)
266086872cb5SThomas Graf 		cfg->fc_flags |= RTF_REJECT;
266186872cb5SThomas Graf 
2662ab79ad14SMaciej Żenczykowski 	if (rtm->rtm_type == RTN_LOCAL)
2663ab79ad14SMaciej Żenczykowski 		cfg->fc_flags |= RTF_LOCAL;
2664ab79ad14SMaciej Żenczykowski 
26651f56a01fSMartin KaFai Lau 	if (rtm->rtm_flags & RTM_F_CLONED)
26661f56a01fSMartin KaFai Lau 		cfg->fc_flags |= RTF_CACHE;
26671f56a01fSMartin KaFai Lau 
266815e47304SEric W. Biederman 	cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
266986872cb5SThomas Graf 	cfg->fc_nlinfo.nlh = nlh;
26703b1e0a65SYOSHIFUJI Hideaki 	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
267186872cb5SThomas Graf 
267286872cb5SThomas Graf 	if (tb[RTA_GATEWAY]) {
267367b61f6cSJiri Benc 		cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
267486872cb5SThomas Graf 		cfg->fc_flags |= RTF_GATEWAY;
26751da177e4SLinus Torvalds 	}
267686872cb5SThomas Graf 
267786872cb5SThomas Graf 	if (tb[RTA_DST]) {
267886872cb5SThomas Graf 		int plen = (rtm->rtm_dst_len + 7) >> 3;
267986872cb5SThomas Graf 
268086872cb5SThomas Graf 		if (nla_len(tb[RTA_DST]) < plen)
268186872cb5SThomas Graf 			goto errout;
268286872cb5SThomas Graf 
268386872cb5SThomas Graf 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
26841da177e4SLinus Torvalds 	}
268586872cb5SThomas Graf 
268686872cb5SThomas Graf 	if (tb[RTA_SRC]) {
268786872cb5SThomas Graf 		int plen = (rtm->rtm_src_len + 7) >> 3;
268886872cb5SThomas Graf 
268986872cb5SThomas Graf 		if (nla_len(tb[RTA_SRC]) < plen)
269086872cb5SThomas Graf 			goto errout;
269186872cb5SThomas Graf 
269286872cb5SThomas Graf 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
26931da177e4SLinus Torvalds 	}
269486872cb5SThomas Graf 
2695c3968a85SDaniel Walter 	if (tb[RTA_PREFSRC])
269667b61f6cSJiri Benc 		cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
2697c3968a85SDaniel Walter 
269886872cb5SThomas Graf 	if (tb[RTA_OIF])
269986872cb5SThomas Graf 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
270086872cb5SThomas Graf 
270186872cb5SThomas Graf 	if (tb[RTA_PRIORITY])
270286872cb5SThomas Graf 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
270386872cb5SThomas Graf 
270486872cb5SThomas Graf 	if (tb[RTA_METRICS]) {
270586872cb5SThomas Graf 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
270686872cb5SThomas Graf 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
27071da177e4SLinus Torvalds 	}
270886872cb5SThomas Graf 
270986872cb5SThomas Graf 	if (tb[RTA_TABLE])
271086872cb5SThomas Graf 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
271186872cb5SThomas Graf 
271251ebd318SNicolas Dichtel 	if (tb[RTA_MULTIPATH]) {
271351ebd318SNicolas Dichtel 		cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
271451ebd318SNicolas Dichtel 		cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
271551ebd318SNicolas Dichtel 	}
271651ebd318SNicolas Dichtel 
2717c78ba6d6SLubomir Rintel 	if (tb[RTA_PREF]) {
2718c78ba6d6SLubomir Rintel 		pref = nla_get_u8(tb[RTA_PREF]);
2719c78ba6d6SLubomir Rintel 		if (pref != ICMPV6_ROUTER_PREF_LOW &&
2720c78ba6d6SLubomir Rintel 		    pref != ICMPV6_ROUTER_PREF_HIGH)
2721c78ba6d6SLubomir Rintel 			pref = ICMPV6_ROUTER_PREF_MEDIUM;
2722c78ba6d6SLubomir Rintel 		cfg->fc_flags |= RTF_PREF(pref);
2723c78ba6d6SLubomir Rintel 	}
2724c78ba6d6SLubomir Rintel 
272586872cb5SThomas Graf 	err = 0;
272686872cb5SThomas Graf errout:
272786872cb5SThomas Graf 	return err;
27281da177e4SLinus Torvalds }
27291da177e4SLinus Torvalds 
273051ebd318SNicolas Dichtel static int ip6_route_multipath(struct fib6_config *cfg, int add)
273151ebd318SNicolas Dichtel {
273251ebd318SNicolas Dichtel 	struct fib6_config r_cfg;
273351ebd318SNicolas Dichtel 	struct rtnexthop *rtnh;
273451ebd318SNicolas Dichtel 	int remaining;
273551ebd318SNicolas Dichtel 	int attrlen;
273651ebd318SNicolas Dichtel 	int err = 0, last_err = 0;
273751ebd318SNicolas Dichtel 
273835f1b4e9SMichal Kubeček 	remaining = cfg->fc_mp_len;
273951ebd318SNicolas Dichtel beginning:
274051ebd318SNicolas Dichtel 	rtnh = (struct rtnexthop *)cfg->fc_mp;
274151ebd318SNicolas Dichtel 
274251ebd318SNicolas Dichtel 	/* Parse a Multipath Entry */
274351ebd318SNicolas Dichtel 	while (rtnh_ok(rtnh, remaining)) {
274451ebd318SNicolas Dichtel 		memcpy(&r_cfg, cfg, sizeof(*cfg));
274551ebd318SNicolas Dichtel 		if (rtnh->rtnh_ifindex)
274651ebd318SNicolas Dichtel 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
274751ebd318SNicolas Dichtel 
274851ebd318SNicolas Dichtel 		attrlen = rtnh_attrlen(rtnh);
274951ebd318SNicolas Dichtel 		if (attrlen > 0) {
275051ebd318SNicolas Dichtel 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
275151ebd318SNicolas Dichtel 
275251ebd318SNicolas Dichtel 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
275351ebd318SNicolas Dichtel 			if (nla) {
275467b61f6cSJiri Benc 				r_cfg.fc_gateway = nla_get_in6_addr(nla);
275551ebd318SNicolas Dichtel 				r_cfg.fc_flags |= RTF_GATEWAY;
275651ebd318SNicolas Dichtel 			}
275751ebd318SNicolas Dichtel 		}
275851ebd318SNicolas Dichtel 		err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
275951ebd318SNicolas Dichtel 		if (err) {
276051ebd318SNicolas Dichtel 			last_err = err;
276151ebd318SNicolas Dichtel 			/* If we are trying to remove a route, do not stop the
276251ebd318SNicolas Dichtel 			 * loop when ip6_route_del() fails (because next hop is
276351ebd318SNicolas Dichtel 			 * already gone), we should try to remove all next hops.
276451ebd318SNicolas Dichtel 			 */
276551ebd318SNicolas Dichtel 			if (add) {
276651ebd318SNicolas Dichtel 				/* If add fails, we should try to delete all
276751ebd318SNicolas Dichtel 				 * next hops that have been already added.
276851ebd318SNicolas Dichtel 				 */
276951ebd318SNicolas Dichtel 				add = 0;
277035f1b4e9SMichal Kubeček 				remaining = cfg->fc_mp_len - remaining;
277151ebd318SNicolas Dichtel 				goto beginning;
277251ebd318SNicolas Dichtel 			}
277351ebd318SNicolas Dichtel 		}
27741a72418bSNicolas Dichtel 		/* Because each route is added like a single route we remove
277527596472SMichal Kubeček 		 * these flags after the first nexthop: if there is a collision,
277627596472SMichal Kubeček 		 * we have already failed to add the first nexthop:
277727596472SMichal Kubeček 		 * fib6_add_rt2node() has rejected it; when replacing, old
277827596472SMichal Kubeček 		 * nexthops have been replaced by first new, the rest should
277927596472SMichal Kubeček 		 * be added to it.
27801a72418bSNicolas Dichtel 		 */
278127596472SMichal Kubeček 		cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
278227596472SMichal Kubeček 						     NLM_F_REPLACE);
278351ebd318SNicolas Dichtel 		rtnh = rtnh_next(rtnh, &remaining);
278451ebd318SNicolas Dichtel 	}
278551ebd318SNicolas Dichtel 
278651ebd318SNicolas Dichtel 	return last_err;
278751ebd318SNicolas Dichtel }
278851ebd318SNicolas Dichtel 
2789661d2967SThomas Graf static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
27901da177e4SLinus Torvalds {
279186872cb5SThomas Graf 	struct fib6_config cfg;
279286872cb5SThomas Graf 	int err;
27931da177e4SLinus Torvalds 
279486872cb5SThomas Graf 	err = rtm_to_fib6_config(skb, nlh, &cfg);
279586872cb5SThomas Graf 	if (err < 0)
279686872cb5SThomas Graf 		return err;
279786872cb5SThomas Graf 
279851ebd318SNicolas Dichtel 	if (cfg.fc_mp)
279951ebd318SNicolas Dichtel 		return ip6_route_multipath(&cfg, 0);
280051ebd318SNicolas Dichtel 	else
280186872cb5SThomas Graf 		return ip6_route_del(&cfg);
28021da177e4SLinus Torvalds }
28031da177e4SLinus Torvalds 
2804661d2967SThomas Graf static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
28051da177e4SLinus Torvalds {
280686872cb5SThomas Graf 	struct fib6_config cfg;
280786872cb5SThomas Graf 	int err;
28081da177e4SLinus Torvalds 
280986872cb5SThomas Graf 	err = rtm_to_fib6_config(skb, nlh, &cfg);
281086872cb5SThomas Graf 	if (err < 0)
281186872cb5SThomas Graf 		return err;
281286872cb5SThomas Graf 
281351ebd318SNicolas Dichtel 	if (cfg.fc_mp)
281451ebd318SNicolas Dichtel 		return ip6_route_multipath(&cfg, 1);
281551ebd318SNicolas Dichtel 	else
281686872cb5SThomas Graf 		return ip6_route_add(&cfg);
28171da177e4SLinus Torvalds }
28181da177e4SLinus Torvalds 
2819339bf98fSThomas Graf static inline size_t rt6_nlmsg_size(void)
2820339bf98fSThomas Graf {
2821339bf98fSThomas Graf 	return NLMSG_ALIGN(sizeof(struct rtmsg))
2822339bf98fSThomas Graf 	       + nla_total_size(16) /* RTA_SRC */
2823339bf98fSThomas Graf 	       + nla_total_size(16) /* RTA_DST */
2824339bf98fSThomas Graf 	       + nla_total_size(16) /* RTA_GATEWAY */
2825339bf98fSThomas Graf 	       + nla_total_size(16) /* RTA_PREFSRC */
2826339bf98fSThomas Graf 	       + nla_total_size(4) /* RTA_TABLE */
2827339bf98fSThomas Graf 	       + nla_total_size(4) /* RTA_IIF */
2828339bf98fSThomas Graf 	       + nla_total_size(4) /* RTA_OIF */
2829339bf98fSThomas Graf 	       + nla_total_size(4) /* RTA_PRIORITY */
28306a2b9ce0SNoriaki TAKAMIYA 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2831ea697639SDaniel Borkmann 	       + nla_total_size(sizeof(struct rta_cacheinfo))
2832c78ba6d6SLubomir Rintel 	       + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
2833c78ba6d6SLubomir Rintel 	       + nla_total_size(1); /* RTA_PREF */
2834339bf98fSThomas Graf }
2835339bf98fSThomas Graf 
2836191cd582SBrian Haley static int rt6_fill_node(struct net *net,
2837191cd582SBrian Haley 			 struct sk_buff *skb, struct rt6_info *rt,
28380d51aa80SJamal Hadi Salim 			 struct in6_addr *dst, struct in6_addr *src,
283915e47304SEric W. Biederman 			 int iif, int type, u32 portid, u32 seq,
28407bc570c8SYOSHIFUJI Hideaki 			 int prefix, int nowait, unsigned int flags)
28411da177e4SLinus Torvalds {
28424b32b5adSMartin KaFai Lau 	u32 metrics[RTAX_MAX];
28431da177e4SLinus Torvalds 	struct rtmsg *rtm;
28441da177e4SLinus Torvalds 	struct nlmsghdr *nlh;
2845e3703b3dSThomas Graf 	long expires;
28469e762a4aSPatrick McHardy 	u32 table;
28471da177e4SLinus Torvalds 
28481da177e4SLinus Torvalds 	if (prefix) {	/* user wants prefix routes only */
28491da177e4SLinus Torvalds 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
28501da177e4SLinus Torvalds 			/* success since this is not a prefix route */
28511da177e4SLinus Torvalds 			return 1;
28521da177e4SLinus Torvalds 		}
28531da177e4SLinus Torvalds 	}
28541da177e4SLinus Torvalds 
285515e47304SEric W. Biederman 	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
285638308473SDavid S. Miller 	if (!nlh)
285726932566SPatrick McHardy 		return -EMSGSIZE;
28582d7202bfSThomas Graf 
28592d7202bfSThomas Graf 	rtm = nlmsg_data(nlh);
28601da177e4SLinus Torvalds 	rtm->rtm_family = AF_INET6;
28611da177e4SLinus Torvalds 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
28621da177e4SLinus Torvalds 	rtm->rtm_src_len = rt->rt6i_src.plen;
28631da177e4SLinus Torvalds 	rtm->rtm_tos = 0;
2864c71099acSThomas Graf 	if (rt->rt6i_table)
28659e762a4aSPatrick McHardy 		table = rt->rt6i_table->tb6_id;
2866c71099acSThomas Graf 	else
28679e762a4aSPatrick McHardy 		table = RT6_TABLE_UNSPEC;
28689e762a4aSPatrick McHardy 	rtm->rtm_table = table;
2869c78679e8SDavid S. Miller 	if (nla_put_u32(skb, RTA_TABLE, table))
2870c78679e8SDavid S. Miller 		goto nla_put_failure;
2871ef2c7d7bSNicolas Dichtel 	if (rt->rt6i_flags & RTF_REJECT) {
2872ef2c7d7bSNicolas Dichtel 		switch (rt->dst.error) {
2873ef2c7d7bSNicolas Dichtel 		case -EINVAL:
2874ef2c7d7bSNicolas Dichtel 			rtm->rtm_type = RTN_BLACKHOLE;
2875ef2c7d7bSNicolas Dichtel 			break;
2876ef2c7d7bSNicolas Dichtel 		case -EACCES:
2877ef2c7d7bSNicolas Dichtel 			rtm->rtm_type = RTN_PROHIBIT;
2878ef2c7d7bSNicolas Dichtel 			break;
2879b4949ab2SNicolas Dichtel 		case -EAGAIN:
2880b4949ab2SNicolas Dichtel 			rtm->rtm_type = RTN_THROW;
2881b4949ab2SNicolas Dichtel 			break;
2882ef2c7d7bSNicolas Dichtel 		default:
28831da177e4SLinus Torvalds 			rtm->rtm_type = RTN_UNREACHABLE;
2884ef2c7d7bSNicolas Dichtel 			break;
2885ef2c7d7bSNicolas Dichtel 		}
2886ef2c7d7bSNicolas Dichtel 	}
2887ab79ad14SMaciej Żenczykowski 	else if (rt->rt6i_flags & RTF_LOCAL)
2888ab79ad14SMaciej Żenczykowski 		rtm->rtm_type = RTN_LOCAL;
2889d1918542SDavid S. Miller 	else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
28901da177e4SLinus Torvalds 		rtm->rtm_type = RTN_LOCAL;
28911da177e4SLinus Torvalds 	else
28921da177e4SLinus Torvalds 		rtm->rtm_type = RTN_UNICAST;
28931da177e4SLinus Torvalds 	rtm->rtm_flags = 0;
28941da177e4SLinus Torvalds 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
28951da177e4SLinus Torvalds 	rtm->rtm_protocol = rt->rt6i_protocol;
28961da177e4SLinus Torvalds 	if (rt->rt6i_flags & RTF_DYNAMIC)
28971da177e4SLinus Torvalds 		rtm->rtm_protocol = RTPROT_REDIRECT;
2898f0396f60SDenis Ovsienko 	else if (rt->rt6i_flags & RTF_ADDRCONF) {
2899f0396f60SDenis Ovsienko 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
29001da177e4SLinus Torvalds 			rtm->rtm_protocol = RTPROT_RA;
2901f0396f60SDenis Ovsienko 		else
2902f0396f60SDenis Ovsienko 			rtm->rtm_protocol = RTPROT_KERNEL;
2903f0396f60SDenis Ovsienko 	}
29041da177e4SLinus Torvalds 
29051da177e4SLinus Torvalds 	if (rt->rt6i_flags & RTF_CACHE)
29061da177e4SLinus Torvalds 		rtm->rtm_flags |= RTM_F_CLONED;
29071da177e4SLinus Torvalds 
29081da177e4SLinus Torvalds 	if (dst) {
2909930345eaSJiri Benc 		if (nla_put_in6_addr(skb, RTA_DST, dst))
2910c78679e8SDavid S. Miller 			goto nla_put_failure;
29111da177e4SLinus Torvalds 		rtm->rtm_dst_len = 128;
29121da177e4SLinus Torvalds 	} else if (rtm->rtm_dst_len)
2913930345eaSJiri Benc 		if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
2914c78679e8SDavid S. Miller 			goto nla_put_failure;
29151da177e4SLinus Torvalds #ifdef CONFIG_IPV6_SUBTREES
29161da177e4SLinus Torvalds 	if (src) {
2917930345eaSJiri Benc 		if (nla_put_in6_addr(skb, RTA_SRC, src))
2918c78679e8SDavid S. Miller 			goto nla_put_failure;
29191da177e4SLinus Torvalds 		rtm->rtm_src_len = 128;
2920c78679e8SDavid S. Miller 	} else if (rtm->rtm_src_len &&
2921930345eaSJiri Benc 		   nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
2922c78679e8SDavid S. Miller 		goto nla_put_failure;
29231da177e4SLinus Torvalds #endif
29247bc570c8SYOSHIFUJI Hideaki 	if (iif) {
29257bc570c8SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_MROUTE
29267bc570c8SYOSHIFUJI Hideaki 		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
29278229efdaSBenjamin Thery 			int err = ip6mr_get_route(net, skb, rtm, nowait);
29287bc570c8SYOSHIFUJI Hideaki 			if (err <= 0) {
29297bc570c8SYOSHIFUJI Hideaki 				if (!nowait) {
29307bc570c8SYOSHIFUJI Hideaki 					if (err == 0)
29317bc570c8SYOSHIFUJI Hideaki 						return 0;
29327bc570c8SYOSHIFUJI Hideaki 					goto nla_put_failure;
29337bc570c8SYOSHIFUJI Hideaki 				} else {
29347bc570c8SYOSHIFUJI Hideaki 					if (err == -EMSGSIZE)
29357bc570c8SYOSHIFUJI Hideaki 						goto nla_put_failure;
29367bc570c8SYOSHIFUJI Hideaki 				}
29377bc570c8SYOSHIFUJI Hideaki 			}
29387bc570c8SYOSHIFUJI Hideaki 		} else
29397bc570c8SYOSHIFUJI Hideaki #endif
2940c78679e8SDavid S. Miller 			if (nla_put_u32(skb, RTA_IIF, iif))
2941c78679e8SDavid S. Miller 				goto nla_put_failure;
29427bc570c8SYOSHIFUJI Hideaki 	} else if (dst) {
29431da177e4SLinus Torvalds 		struct in6_addr saddr_buf;
2944c78679e8SDavid S. Miller 		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2945930345eaSJiri Benc 		    nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
2946c78679e8SDavid S. Miller 			goto nla_put_failure;
2947c3968a85SDaniel Walter 	}
2948c3968a85SDaniel Walter 
2949c3968a85SDaniel Walter 	if (rt->rt6i_prefsrc.plen) {
2950c3968a85SDaniel Walter 		struct in6_addr saddr_buf;
29514e3fd7a0SAlexey Dobriyan 		saddr_buf = rt->rt6i_prefsrc.addr;
2952930345eaSJiri Benc 		if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
2953c78679e8SDavid S. Miller 			goto nla_put_failure;
29541da177e4SLinus Torvalds 	}
29552d7202bfSThomas Graf 
29564b32b5adSMartin KaFai Lau 	memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
29574b32b5adSMartin KaFai Lau 	if (rt->rt6i_pmtu)
29584b32b5adSMartin KaFai Lau 		metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
29594b32b5adSMartin KaFai Lau 	if (rtnetlink_put_metrics(skb, metrics) < 0)
29602d7202bfSThomas Graf 		goto nla_put_failure;
29612d7202bfSThomas Graf 
2962dd0cbf29SYOSHIFUJI Hideaki / 吉藤英明 	if (rt->rt6i_flags & RTF_GATEWAY) {
2963930345eaSJiri Benc 		if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
296494f826b8SEric Dumazet 			goto nla_put_failure;
296594f826b8SEric Dumazet 	}
29662d7202bfSThomas Graf 
2967c78679e8SDavid S. Miller 	if (rt->dst.dev &&
2968c78679e8SDavid S. Miller 	    nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2969c78679e8SDavid S. Miller 		goto nla_put_failure;
2970c78679e8SDavid S. Miller 	if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2971c78679e8SDavid S. Miller 		goto nla_put_failure;
29728253947eSLi Wei 
29738253947eSLi Wei 	expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
297469cdf8f9SYOSHIFUJI Hideaki 
297587a50699SDavid S. Miller 	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2976e3703b3dSThomas Graf 		goto nla_put_failure;
29771da177e4SLinus Torvalds 
2978c78ba6d6SLubomir Rintel 	if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
2979c78ba6d6SLubomir Rintel 		goto nla_put_failure;
2980c78ba6d6SLubomir Rintel 
2981053c095aSJohannes Berg 	nlmsg_end(skb, nlh);
2982053c095aSJohannes Berg 	return 0;
29832d7202bfSThomas Graf 
29842d7202bfSThomas Graf nla_put_failure:
298526932566SPatrick McHardy 	nlmsg_cancel(skb, nlh);
298626932566SPatrick McHardy 	return -EMSGSIZE;
29871da177e4SLinus Torvalds }
29881da177e4SLinus Torvalds 
29891b43af54SPatrick McHardy int rt6_dump_route(struct rt6_info *rt, void *p_arg)
29901da177e4SLinus Torvalds {
29911da177e4SLinus Torvalds 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
29921da177e4SLinus Torvalds 	int prefix;
29931da177e4SLinus Torvalds 
29942d7202bfSThomas Graf 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
29952d7202bfSThomas Graf 		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
29961da177e4SLinus Torvalds 		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
29971da177e4SLinus Torvalds 	} else
29981da177e4SLinus Torvalds 		prefix = 0;
29991da177e4SLinus Torvalds 
3000191cd582SBrian Haley 	return rt6_fill_node(arg->net,
3001191cd582SBrian Haley 		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
300215e47304SEric W. Biederman 		     NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
30037bc570c8SYOSHIFUJI Hideaki 		     prefix, 0, NLM_F_MULTI);
30041da177e4SLinus Torvalds }
30051da177e4SLinus Torvalds 
3006661d2967SThomas Graf static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
30071da177e4SLinus Torvalds {
30083b1e0a65SYOSHIFUJI Hideaki 	struct net *net = sock_net(in_skb->sk);
3009ab364a6fSThomas Graf 	struct nlattr *tb[RTA_MAX+1];
30101da177e4SLinus Torvalds 	struct rt6_info *rt;
3011ab364a6fSThomas Graf 	struct sk_buff *skb;
3012ab364a6fSThomas Graf 	struct rtmsg *rtm;
30134c9483b2SDavid S. Miller 	struct flowi6 fl6;
301472331bc0SShmulik Ladkani 	int err, iif = 0, oif = 0;
3015ab364a6fSThomas Graf 
3016ab364a6fSThomas Graf 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
3017ab364a6fSThomas Graf 	if (err < 0)
3018ab364a6fSThomas Graf 		goto errout;
3019ab364a6fSThomas Graf 
3020ab364a6fSThomas Graf 	err = -EINVAL;
30214c9483b2SDavid S. Miller 	memset(&fl6, 0, sizeof(fl6));
3022ab364a6fSThomas Graf 
3023ab364a6fSThomas Graf 	if (tb[RTA_SRC]) {
3024ab364a6fSThomas Graf 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
3025ab364a6fSThomas Graf 			goto errout;
3026ab364a6fSThomas Graf 
30274e3fd7a0SAlexey Dobriyan 		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
3028ab364a6fSThomas Graf 	}
3029ab364a6fSThomas Graf 
3030ab364a6fSThomas Graf 	if (tb[RTA_DST]) {
3031ab364a6fSThomas Graf 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
3032ab364a6fSThomas Graf 			goto errout;
3033ab364a6fSThomas Graf 
30344e3fd7a0SAlexey Dobriyan 		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
3035ab364a6fSThomas Graf 	}
3036ab364a6fSThomas Graf 
3037ab364a6fSThomas Graf 	if (tb[RTA_IIF])
3038ab364a6fSThomas Graf 		iif = nla_get_u32(tb[RTA_IIF]);
3039ab364a6fSThomas Graf 
3040ab364a6fSThomas Graf 	if (tb[RTA_OIF])
304172331bc0SShmulik Ladkani 		oif = nla_get_u32(tb[RTA_OIF]);
3042ab364a6fSThomas Graf 
30432e47b291SLorenzo Colitti 	if (tb[RTA_MARK])
30442e47b291SLorenzo Colitti 		fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
30452e47b291SLorenzo Colitti 
3046ab364a6fSThomas Graf 	if (iif) {
3047ab364a6fSThomas Graf 		struct net_device *dev;
304872331bc0SShmulik Ladkani 		int flags = 0;
304972331bc0SShmulik Ladkani 
30505578689aSDaniel Lezcano 		dev = __dev_get_by_index(net, iif);
3051ab364a6fSThomas Graf 		if (!dev) {
3052ab364a6fSThomas Graf 			err = -ENODEV;
3053ab364a6fSThomas Graf 			goto errout;
3054ab364a6fSThomas Graf 		}
305572331bc0SShmulik Ladkani 
305672331bc0SShmulik Ladkani 		fl6.flowi6_iif = iif;
305772331bc0SShmulik Ladkani 
305872331bc0SShmulik Ladkani 		if (!ipv6_addr_any(&fl6.saddr))
305972331bc0SShmulik Ladkani 			flags |= RT6_LOOKUP_F_HAS_SADDR;
306072331bc0SShmulik Ladkani 
306172331bc0SShmulik Ladkani 		rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
306272331bc0SShmulik Ladkani 							       flags);
306372331bc0SShmulik Ladkani 	} else {
306472331bc0SShmulik Ladkani 		fl6.flowi6_oif = oif;
306572331bc0SShmulik Ladkani 
306672331bc0SShmulik Ladkani 		rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
3067ab364a6fSThomas Graf 	}
30681da177e4SLinus Torvalds 
30691da177e4SLinus Torvalds 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
307038308473SDavid S. Miller 	if (!skb) {
307194e187c0SAmerigo Wang 		ip6_rt_put(rt);
3072ab364a6fSThomas Graf 		err = -ENOBUFS;
3073ab364a6fSThomas Graf 		goto errout;
3074ab364a6fSThomas Graf 	}
30751da177e4SLinus Torvalds 
30761da177e4SLinus Torvalds 	/* Reserve room for dummy headers, this skb can pass
30771da177e4SLinus Torvalds 	   through good chunk of routing engine.
30781da177e4SLinus Torvalds 	 */
3079459a98edSArnaldo Carvalho de Melo 	skb_reset_mac_header(skb);
30801da177e4SLinus Torvalds 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
30811da177e4SLinus Torvalds 
3082d8d1f30bSChangli Gao 	skb_dst_set(skb, &rt->dst);
30831da177e4SLinus Torvalds 
30844c9483b2SDavid S. Miller 	err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
308515e47304SEric W. Biederman 			    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
30867bc570c8SYOSHIFUJI Hideaki 			    nlh->nlmsg_seq, 0, 0, 0);
30871da177e4SLinus Torvalds 	if (err < 0) {
3088ab364a6fSThomas Graf 		kfree_skb(skb);
3089ab364a6fSThomas Graf 		goto errout;
30901da177e4SLinus Torvalds 	}
30911da177e4SLinus Torvalds 
309215e47304SEric W. Biederman 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
3093ab364a6fSThomas Graf errout:
30941da177e4SLinus Torvalds 	return err;
30951da177e4SLinus Torvalds }
30961da177e4SLinus Torvalds 
309786872cb5SThomas Graf void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
30981da177e4SLinus Torvalds {
30991da177e4SLinus Torvalds 	struct sk_buff *skb;
31005578689aSDaniel Lezcano 	struct net *net = info->nl_net;
3101528c4cebSDenis V. Lunev 	u32 seq;
3102528c4cebSDenis V. Lunev 	int err;
31030d51aa80SJamal Hadi Salim 
3104528c4cebSDenis V. Lunev 	err = -ENOBUFS;
310538308473SDavid S. Miller 	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
310686872cb5SThomas Graf 
3107339bf98fSThomas Graf 	skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
310838308473SDavid S. Miller 	if (!skb)
310921713ebcSThomas Graf 		goto errout;
31101da177e4SLinus Torvalds 
3111191cd582SBrian Haley 	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
311215e47304SEric W. Biederman 				event, info->portid, seq, 0, 0, 0);
311326932566SPatrick McHardy 	if (err < 0) {
311426932566SPatrick McHardy 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
311526932566SPatrick McHardy 		WARN_ON(err == -EMSGSIZE);
311626932566SPatrick McHardy 		kfree_skb(skb);
311726932566SPatrick McHardy 		goto errout;
311826932566SPatrick McHardy 	}
311915e47304SEric W. Biederman 	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
31205578689aSDaniel Lezcano 		    info->nlh, gfp_any());
31211ce85fe4SPablo Neira Ayuso 	return;
312221713ebcSThomas Graf errout:
312321713ebcSThomas Graf 	if (err < 0)
31245578689aSDaniel Lezcano 		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
31251da177e4SLinus Torvalds }
31261da177e4SLinus Torvalds 
31278ed67789SDaniel Lezcano static int ip6_route_dev_notify(struct notifier_block *this,
3128351638e7SJiri Pirko 				unsigned long event, void *ptr)
31298ed67789SDaniel Lezcano {
3130351638e7SJiri Pirko 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
3131c346dca1SYOSHIFUJI Hideaki 	struct net *net = dev_net(dev);
31328ed67789SDaniel Lezcano 
31338ed67789SDaniel Lezcano 	if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
3134d8d1f30bSChangli Gao 		net->ipv6.ip6_null_entry->dst.dev = dev;
31358ed67789SDaniel Lezcano 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
31368ed67789SDaniel Lezcano #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3137d8d1f30bSChangli Gao 		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
31388ed67789SDaniel Lezcano 		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
3139d8d1f30bSChangli Gao 		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
31408ed67789SDaniel Lezcano 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
31418ed67789SDaniel Lezcano #endif
31428ed67789SDaniel Lezcano 	}
31438ed67789SDaniel Lezcano 
31448ed67789SDaniel Lezcano 	return NOTIFY_OK;
31458ed67789SDaniel Lezcano }
31468ed67789SDaniel Lezcano 
31471da177e4SLinus Torvalds /*
31481da177e4SLinus Torvalds  *	/proc
31491da177e4SLinus Torvalds  */
31501da177e4SLinus Torvalds 
31511da177e4SLinus Torvalds #ifdef CONFIG_PROC_FS
31521da177e4SLinus Torvalds 
315333120b30SAlexey Dobriyan static const struct file_operations ipv6_route_proc_fops = {
315433120b30SAlexey Dobriyan 	.owner		= THIS_MODULE,
315533120b30SAlexey Dobriyan 	.open		= ipv6_route_open,
315633120b30SAlexey Dobriyan 	.read		= seq_read,
315733120b30SAlexey Dobriyan 	.llseek		= seq_lseek,
31588d2ca1d7SHannes Frederic Sowa 	.release	= seq_release_net,
315933120b30SAlexey Dobriyan };
316033120b30SAlexey Dobriyan 
31611da177e4SLinus Torvalds static int rt6_stats_seq_show(struct seq_file *seq, void *v)
31621da177e4SLinus Torvalds {
316369ddb805SDaniel Lezcano 	struct net *net = (struct net *)seq->private;
31641da177e4SLinus Torvalds 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
316569ddb805SDaniel Lezcano 		   net->ipv6.rt6_stats->fib_nodes,
316669ddb805SDaniel Lezcano 		   net->ipv6.rt6_stats->fib_route_nodes,
316769ddb805SDaniel Lezcano 		   net->ipv6.rt6_stats->fib_rt_alloc,
316869ddb805SDaniel Lezcano 		   net->ipv6.rt6_stats->fib_rt_entries,
316969ddb805SDaniel Lezcano 		   net->ipv6.rt6_stats->fib_rt_cache,
3170fc66f95cSEric Dumazet 		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
317169ddb805SDaniel Lezcano 		   net->ipv6.rt6_stats->fib_discarded_routes);
31721da177e4SLinus Torvalds 
31731da177e4SLinus Torvalds 	return 0;
31741da177e4SLinus Torvalds }
31751da177e4SLinus Torvalds 
31761da177e4SLinus Torvalds static int rt6_stats_seq_open(struct inode *inode, struct file *file)
31771da177e4SLinus Torvalds {
3178de05c557SPavel Emelyanov 	return single_open_net(inode, file, rt6_stats_seq_show);
317969ddb805SDaniel Lezcano }
318069ddb805SDaniel Lezcano 
31819a32144eSArjan van de Ven static const struct file_operations rt6_stats_seq_fops = {
31821da177e4SLinus Torvalds 	.owner	 = THIS_MODULE,
31831da177e4SLinus Torvalds 	.open	 = rt6_stats_seq_open,
31841da177e4SLinus Torvalds 	.read	 = seq_read,
31851da177e4SLinus Torvalds 	.llseek	 = seq_lseek,
3186b6fcbdb4SPavel Emelyanov 	.release = single_release_net,
31871da177e4SLinus Torvalds };
31881da177e4SLinus Torvalds #endif	/* CONFIG_PROC_FS */
31891da177e4SLinus Torvalds 
31901da177e4SLinus Torvalds #ifdef CONFIG_SYSCTL
31911da177e4SLinus Torvalds 
31921da177e4SLinus Torvalds static
3193fe2c6338SJoe Perches int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
31941da177e4SLinus Torvalds 			      void __user *buffer, size_t *lenp, loff_t *ppos)
31951da177e4SLinus Torvalds {
3196c486da34SLucian Adrian Grijincu 	struct net *net;
3197c486da34SLucian Adrian Grijincu 	int delay;
3198c486da34SLucian Adrian Grijincu 	if (!write)
3199c486da34SLucian Adrian Grijincu 		return -EINVAL;
3200c486da34SLucian Adrian Grijincu 
3201c486da34SLucian Adrian Grijincu 	net = (struct net *)ctl->extra1;
3202c486da34SLucian Adrian Grijincu 	delay = net->ipv6.sysctl.flush_delay;
32038d65af78SAlexey Dobriyan 	proc_dointvec(ctl, write, buffer, lenp, ppos);
32042ac3ac8fSMichal Kubeček 	fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
32051da177e4SLinus Torvalds 	return 0;
32061da177e4SLinus Torvalds }
32071da177e4SLinus Torvalds 
3208fe2c6338SJoe Perches struct ctl_table ipv6_route_table_template[] = {
32091da177e4SLinus Torvalds 	{
32101da177e4SLinus Torvalds 		.procname	=	"flush",
32114990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.flush_delay,
32121da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
321389c8b3a1SDave Jones 		.mode		=	0200,
32146d9f239aSAlexey Dobriyan 		.proc_handler	=	ipv6_sysctl_rtcache_flush
32151da177e4SLinus Torvalds 	},
32161da177e4SLinus Torvalds 	{
32171da177e4SLinus Torvalds 		.procname	=	"gc_thresh",
32189a7ec3a9SDaniel Lezcano 		.data		=	&ip6_dst_ops_template.gc_thresh,
32191da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
32201da177e4SLinus Torvalds 		.mode		=	0644,
32216d9f239aSAlexey Dobriyan 		.proc_handler	=	proc_dointvec,
32221da177e4SLinus Torvalds 	},
32231da177e4SLinus Torvalds 	{
32241da177e4SLinus Torvalds 		.procname	=	"max_size",
32254990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
32261da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
32271da177e4SLinus Torvalds 		.mode		=	0644,
32286d9f239aSAlexey Dobriyan 		.proc_handler	=	proc_dointvec,
32291da177e4SLinus Torvalds 	},
32301da177e4SLinus Torvalds 	{
32311da177e4SLinus Torvalds 		.procname	=	"gc_min_interval",
32324990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
32331da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
32341da177e4SLinus Torvalds 		.mode		=	0644,
32356d9f239aSAlexey Dobriyan 		.proc_handler	=	proc_dointvec_jiffies,
32361da177e4SLinus Torvalds 	},
32371da177e4SLinus Torvalds 	{
32381da177e4SLinus Torvalds 		.procname	=	"gc_timeout",
32394990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
32401da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
32411da177e4SLinus Torvalds 		.mode		=	0644,
32426d9f239aSAlexey Dobriyan 		.proc_handler	=	proc_dointvec_jiffies,
32431da177e4SLinus Torvalds 	},
32441da177e4SLinus Torvalds 	{
32451da177e4SLinus Torvalds 		.procname	=	"gc_interval",
32464990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
32471da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
32481da177e4SLinus Torvalds 		.mode		=	0644,
32496d9f239aSAlexey Dobriyan 		.proc_handler	=	proc_dointvec_jiffies,
32501da177e4SLinus Torvalds 	},
32511da177e4SLinus Torvalds 	{
32521da177e4SLinus Torvalds 		.procname	=	"gc_elasticity",
32534990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
32541da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
32551da177e4SLinus Torvalds 		.mode		=	0644,
3256f3d3f616SMin Zhang 		.proc_handler	=	proc_dointvec,
32571da177e4SLinus Torvalds 	},
32581da177e4SLinus Torvalds 	{
32591da177e4SLinus Torvalds 		.procname	=	"mtu_expires",
32604990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
32611da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
32621da177e4SLinus Torvalds 		.mode		=	0644,
32636d9f239aSAlexey Dobriyan 		.proc_handler	=	proc_dointvec_jiffies,
32641da177e4SLinus Torvalds 	},
32651da177e4SLinus Torvalds 	{
32661da177e4SLinus Torvalds 		.procname	=	"min_adv_mss",
32674990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
32681da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
32691da177e4SLinus Torvalds 		.mode		=	0644,
3270f3d3f616SMin Zhang 		.proc_handler	=	proc_dointvec,
32711da177e4SLinus Torvalds 	},
32721da177e4SLinus Torvalds 	{
32731da177e4SLinus Torvalds 		.procname	=	"gc_min_interval_ms",
32744990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
32751da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
32761da177e4SLinus Torvalds 		.mode		=	0644,
32776d9f239aSAlexey Dobriyan 		.proc_handler	=	proc_dointvec_ms_jiffies,
32781da177e4SLinus Torvalds 	},
3279f8572d8fSEric W. Biederman 	{ }
32801da177e4SLinus Torvalds };
32811da177e4SLinus Torvalds 
32822c8c1e72SAlexey Dobriyan struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
3283760f2d01SDaniel Lezcano {
3284760f2d01SDaniel Lezcano 	struct ctl_table *table;
3285760f2d01SDaniel Lezcano 
3286760f2d01SDaniel Lezcano 	table = kmemdup(ipv6_route_table_template,
3287760f2d01SDaniel Lezcano 			sizeof(ipv6_route_table_template),
3288760f2d01SDaniel Lezcano 			GFP_KERNEL);
32895ee09105SYOSHIFUJI Hideaki 
32905ee09105SYOSHIFUJI Hideaki 	if (table) {
32915ee09105SYOSHIFUJI Hideaki 		table[0].data = &net->ipv6.sysctl.flush_delay;
3292c486da34SLucian Adrian Grijincu 		table[0].extra1 = net;
329386393e52SAlexey Dobriyan 		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
32945ee09105SYOSHIFUJI Hideaki 		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
32955ee09105SYOSHIFUJI Hideaki 		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
32965ee09105SYOSHIFUJI Hideaki 		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
32975ee09105SYOSHIFUJI Hideaki 		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
32985ee09105SYOSHIFUJI Hideaki 		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
32995ee09105SYOSHIFUJI Hideaki 		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
33005ee09105SYOSHIFUJI Hideaki 		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
33019c69fabeSAlexey Dobriyan 		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3302464dc801SEric W. Biederman 
3303464dc801SEric W. Biederman 		/* Don't export sysctls to unprivileged users */
3304464dc801SEric W. Biederman 		if (net->user_ns != &init_user_ns)
3305464dc801SEric W. Biederman 			table[0].procname = NULL;
33065ee09105SYOSHIFUJI Hideaki 	}
33075ee09105SYOSHIFUJI Hideaki 
3308760f2d01SDaniel Lezcano 	return table;
3309760f2d01SDaniel Lezcano }
33101da177e4SLinus Torvalds #endif
33111da177e4SLinus Torvalds 
33122c8c1e72SAlexey Dobriyan static int __net_init ip6_route_net_init(struct net *net)
3313cdb18761SDaniel Lezcano {
3314633d424bSPavel Emelyanov 	int ret = -ENOMEM;
33158ed67789SDaniel Lezcano 
331686393e52SAlexey Dobriyan 	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
331786393e52SAlexey Dobriyan 	       sizeof(net->ipv6.ip6_dst_ops));
3318f2fc6a54SBenjamin Thery 
3319fc66f95cSEric Dumazet 	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3320fc66f95cSEric Dumazet 		goto out_ip6_dst_ops;
3321fc66f95cSEric Dumazet 
33228ed67789SDaniel Lezcano 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
33238ed67789SDaniel Lezcano 					   sizeof(*net->ipv6.ip6_null_entry),
33248ed67789SDaniel Lezcano 					   GFP_KERNEL);
33258ed67789SDaniel Lezcano 	if (!net->ipv6.ip6_null_entry)
3326fc66f95cSEric Dumazet 		goto out_ip6_dst_entries;
3327d8d1f30bSChangli Gao 	net->ipv6.ip6_null_entry->dst.path =
33288ed67789SDaniel Lezcano 		(struct dst_entry *)net->ipv6.ip6_null_entry;
3329d8d1f30bSChangli Gao 	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
333062fa8a84SDavid S. Miller 	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
333162fa8a84SDavid S. Miller 			 ip6_template_metrics, true);
33328ed67789SDaniel Lezcano 
33338ed67789SDaniel Lezcano #ifdef CONFIG_IPV6_MULTIPLE_TABLES
33348ed67789SDaniel Lezcano 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
33358ed67789SDaniel Lezcano 					       sizeof(*net->ipv6.ip6_prohibit_entry),
33368ed67789SDaniel Lezcano 					       GFP_KERNEL);
333768fffc67SPeter Zijlstra 	if (!net->ipv6.ip6_prohibit_entry)
333868fffc67SPeter Zijlstra 		goto out_ip6_null_entry;
3339d8d1f30bSChangli Gao 	net->ipv6.ip6_prohibit_entry->dst.path =
33408ed67789SDaniel Lezcano 		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3341d8d1f30bSChangli Gao 	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
334262fa8a84SDavid S. Miller 	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
334362fa8a84SDavid S. Miller 			 ip6_template_metrics, true);
33448ed67789SDaniel Lezcano 
33458ed67789SDaniel Lezcano 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
33468ed67789SDaniel Lezcano 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
33478ed67789SDaniel Lezcano 					       GFP_KERNEL);
334868fffc67SPeter Zijlstra 	if (!net->ipv6.ip6_blk_hole_entry)
334968fffc67SPeter Zijlstra 		goto out_ip6_prohibit_entry;
3350d8d1f30bSChangli Gao 	net->ipv6.ip6_blk_hole_entry->dst.path =
33518ed67789SDaniel Lezcano 		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3352d8d1f30bSChangli Gao 	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
335362fa8a84SDavid S. Miller 	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
335462fa8a84SDavid S. Miller 			 ip6_template_metrics, true);
33558ed67789SDaniel Lezcano #endif
33568ed67789SDaniel Lezcano 
3357b339a47cSPeter Zijlstra 	net->ipv6.sysctl.flush_delay = 0;
3358b339a47cSPeter Zijlstra 	net->ipv6.sysctl.ip6_rt_max_size = 4096;
3359b339a47cSPeter Zijlstra 	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3360b339a47cSPeter Zijlstra 	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3361b339a47cSPeter Zijlstra 	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3362b339a47cSPeter Zijlstra 	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3363b339a47cSPeter Zijlstra 	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3364b339a47cSPeter Zijlstra 	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3365b339a47cSPeter Zijlstra 
33666891a346SBenjamin Thery 	net->ipv6.ip6_rt_gc_expire = 30*HZ;
33676891a346SBenjamin Thery 
33688ed67789SDaniel Lezcano 	ret = 0;
33698ed67789SDaniel Lezcano out:
33708ed67789SDaniel Lezcano 	return ret;
3371f2fc6a54SBenjamin Thery 
337268fffc67SPeter Zijlstra #ifdef CONFIG_IPV6_MULTIPLE_TABLES
337368fffc67SPeter Zijlstra out_ip6_prohibit_entry:
337468fffc67SPeter Zijlstra 	kfree(net->ipv6.ip6_prohibit_entry);
337568fffc67SPeter Zijlstra out_ip6_null_entry:
337668fffc67SPeter Zijlstra 	kfree(net->ipv6.ip6_null_entry);
337768fffc67SPeter Zijlstra #endif
3378fc66f95cSEric Dumazet out_ip6_dst_entries:
3379fc66f95cSEric Dumazet 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3380f2fc6a54SBenjamin Thery out_ip6_dst_ops:
3381f2fc6a54SBenjamin Thery 	goto out;
3382cdb18761SDaniel Lezcano }
3383cdb18761SDaniel Lezcano 
33842c8c1e72SAlexey Dobriyan static void __net_exit ip6_route_net_exit(struct net *net)
3385cdb18761SDaniel Lezcano {
33868ed67789SDaniel Lezcano 	kfree(net->ipv6.ip6_null_entry);
33878ed67789SDaniel Lezcano #ifdef CONFIG_IPV6_MULTIPLE_TABLES
33888ed67789SDaniel Lezcano 	kfree(net->ipv6.ip6_prohibit_entry);
33898ed67789SDaniel Lezcano 	kfree(net->ipv6.ip6_blk_hole_entry);
33908ed67789SDaniel Lezcano #endif
339141bb78b4SXiaotian Feng 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3392cdb18761SDaniel Lezcano }
3393cdb18761SDaniel Lezcano 
3394d189634eSThomas Graf static int __net_init ip6_route_net_init_late(struct net *net)
3395d189634eSThomas Graf {
3396d189634eSThomas Graf #ifdef CONFIG_PROC_FS
3397d4beaa66SGao feng 	proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3398d4beaa66SGao feng 	proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3399d189634eSThomas Graf #endif
3400d189634eSThomas Graf 	return 0;
3401d189634eSThomas Graf }
3402d189634eSThomas Graf 
3403d189634eSThomas Graf static void __net_exit ip6_route_net_exit_late(struct net *net)
3404d189634eSThomas Graf {
3405d189634eSThomas Graf #ifdef CONFIG_PROC_FS
3406ece31ffdSGao feng 	remove_proc_entry("ipv6_route", net->proc_net);
3407ece31ffdSGao feng 	remove_proc_entry("rt6_stats", net->proc_net);
3408d189634eSThomas Graf #endif
3409d189634eSThomas Graf }
3410d189634eSThomas Graf 
3411cdb18761SDaniel Lezcano static struct pernet_operations ip6_route_net_ops = {
3412cdb18761SDaniel Lezcano 	.init = ip6_route_net_init,
3413cdb18761SDaniel Lezcano 	.exit = ip6_route_net_exit,
3414cdb18761SDaniel Lezcano };
3415cdb18761SDaniel Lezcano 
3416c3426b47SDavid S. Miller static int __net_init ipv6_inetpeer_init(struct net *net)
3417c3426b47SDavid S. Miller {
3418c3426b47SDavid S. Miller 	struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3419c3426b47SDavid S. Miller 
3420c3426b47SDavid S. Miller 	if (!bp)
3421c3426b47SDavid S. Miller 		return -ENOMEM;
3422c3426b47SDavid S. Miller 	inet_peer_base_init(bp);
3423c3426b47SDavid S. Miller 	net->ipv6.peers = bp;
3424c3426b47SDavid S. Miller 	return 0;
3425c3426b47SDavid S. Miller }
3426c3426b47SDavid S. Miller 
3427c3426b47SDavid S. Miller static void __net_exit ipv6_inetpeer_exit(struct net *net)
3428c3426b47SDavid S. Miller {
3429c3426b47SDavid S. Miller 	struct inet_peer_base *bp = net->ipv6.peers;
3430c3426b47SDavid S. Miller 
3431c3426b47SDavid S. Miller 	net->ipv6.peers = NULL;
343256a6b248SDavid S. Miller 	inetpeer_invalidate_tree(bp);
3433c3426b47SDavid S. Miller 	kfree(bp);
3434c3426b47SDavid S. Miller }
3435c3426b47SDavid S. Miller 
34362b823f72SDavid S. Miller static struct pernet_operations ipv6_inetpeer_ops = {
3437c3426b47SDavid S. Miller 	.init	=	ipv6_inetpeer_init,
3438c3426b47SDavid S. Miller 	.exit	=	ipv6_inetpeer_exit,
3439c3426b47SDavid S. Miller };
3440c3426b47SDavid S. Miller 
3441d189634eSThomas Graf static struct pernet_operations ip6_route_net_late_ops = {
3442d189634eSThomas Graf 	.init = ip6_route_net_init_late,
3443d189634eSThomas Graf 	.exit = ip6_route_net_exit_late,
3444d189634eSThomas Graf };
3445d189634eSThomas Graf 
34468ed67789SDaniel Lezcano static struct notifier_block ip6_route_dev_notifier = {
34478ed67789SDaniel Lezcano 	.notifier_call = ip6_route_dev_notify,
34488ed67789SDaniel Lezcano 	.priority = 0,
34498ed67789SDaniel Lezcano };
34508ed67789SDaniel Lezcano 
3451433d49c3SDaniel Lezcano int __init ip6_route_init(void)
34521da177e4SLinus Torvalds {
3453433d49c3SDaniel Lezcano 	int ret;
34548d0b94afSMartin KaFai Lau 	int cpu;
3455433d49c3SDaniel Lezcano 
34569a7ec3a9SDaniel Lezcano 	ret = -ENOMEM;
34579a7ec3a9SDaniel Lezcano 	ip6_dst_ops_template.kmem_cachep =
34589a7ec3a9SDaniel Lezcano 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
34599a7ec3a9SDaniel Lezcano 				  SLAB_HWCACHE_ALIGN, NULL);
34609a7ec3a9SDaniel Lezcano 	if (!ip6_dst_ops_template.kmem_cachep)
3461c19a28e1SFernando Carrijo 		goto out;
346214e50e57SDavid S. Miller 
3463fc66f95cSEric Dumazet 	ret = dst_entries_init(&ip6_dst_blackhole_ops);
34648ed67789SDaniel Lezcano 	if (ret)
3465bdb3289fSDaniel Lezcano 		goto out_kmem_cache;
3466bdb3289fSDaniel Lezcano 
3467c3426b47SDavid S. Miller 	ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3468c3426b47SDavid S. Miller 	if (ret)
3469e8803b6cSDavid S. Miller 		goto out_dst_entries;
34702a0c451aSThomas Graf 
34717e52b33bSDavid S. Miller 	ret = register_pernet_subsys(&ip6_route_net_ops);
34727e52b33bSDavid S. Miller 	if (ret)
34737e52b33bSDavid S. Miller 		goto out_register_inetpeer;
3474c3426b47SDavid S. Miller 
34755dc121e9SArnaud Ebalard 	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
34765dc121e9SArnaud Ebalard 
34778ed67789SDaniel Lezcano 	/* Registering of the loopback is done before this portion of code,
34788ed67789SDaniel Lezcano 	 * the loopback reference in rt6_info will not be taken, do it
34798ed67789SDaniel Lezcano 	 * manually for init_net */
3480d8d1f30bSChangli Gao 	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
34818ed67789SDaniel Lezcano 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3482bdb3289fSDaniel Lezcano   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3483d8d1f30bSChangli Gao 	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
34848ed67789SDaniel Lezcano 	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3485d8d1f30bSChangli Gao 	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
34868ed67789SDaniel Lezcano 	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3487bdb3289fSDaniel Lezcano   #endif
3488e8803b6cSDavid S. Miller 	ret = fib6_init();
3489433d49c3SDaniel Lezcano 	if (ret)
34908ed67789SDaniel Lezcano 		goto out_register_subsys;
3491433d49c3SDaniel Lezcano 
3492433d49c3SDaniel Lezcano 	ret = xfrm6_init();
3493433d49c3SDaniel Lezcano 	if (ret)
3494e8803b6cSDavid S. Miller 		goto out_fib6_init;
3495c35b7e72SDaniel Lezcano 
3496433d49c3SDaniel Lezcano 	ret = fib6_rules_init();
3497433d49c3SDaniel Lezcano 	if (ret)
3498433d49c3SDaniel Lezcano 		goto xfrm6_init;
34997e5449c2SDaniel Lezcano 
3500d189634eSThomas Graf 	ret = register_pernet_subsys(&ip6_route_net_late_ops);
3501d189634eSThomas Graf 	if (ret)
3502d189634eSThomas Graf 		goto fib6_rules_init;
3503d189634eSThomas Graf 
3504433d49c3SDaniel Lezcano 	ret = -ENOBUFS;
3505c7ac8679SGreg Rose 	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3506c7ac8679SGreg Rose 	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3507c7ac8679SGreg Rose 	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3508d189634eSThomas Graf 		goto out_register_late_subsys;
3509433d49c3SDaniel Lezcano 
35108ed67789SDaniel Lezcano 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3511cdb18761SDaniel Lezcano 	if (ret)
3512d189634eSThomas Graf 		goto out_register_late_subsys;
35138ed67789SDaniel Lezcano 
35148d0b94afSMartin KaFai Lau 	for_each_possible_cpu(cpu) {
35158d0b94afSMartin KaFai Lau 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
35168d0b94afSMartin KaFai Lau 
35178d0b94afSMartin KaFai Lau 		INIT_LIST_HEAD(&ul->head);
35188d0b94afSMartin KaFai Lau 		spin_lock_init(&ul->lock);
35198d0b94afSMartin KaFai Lau 	}
35208d0b94afSMartin KaFai Lau 
3521433d49c3SDaniel Lezcano out:
3522433d49c3SDaniel Lezcano 	return ret;
3523433d49c3SDaniel Lezcano 
3524d189634eSThomas Graf out_register_late_subsys:
3525d189634eSThomas Graf 	unregister_pernet_subsys(&ip6_route_net_late_ops);
3526433d49c3SDaniel Lezcano fib6_rules_init:
3527433d49c3SDaniel Lezcano 	fib6_rules_cleanup();
3528433d49c3SDaniel Lezcano xfrm6_init:
3529433d49c3SDaniel Lezcano 	xfrm6_fini();
35302a0c451aSThomas Graf out_fib6_init:
35312a0c451aSThomas Graf 	fib6_gc_cleanup();
35328ed67789SDaniel Lezcano out_register_subsys:
35338ed67789SDaniel Lezcano 	unregister_pernet_subsys(&ip6_route_net_ops);
35347e52b33bSDavid S. Miller out_register_inetpeer:
35357e52b33bSDavid S. Miller 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3536fc66f95cSEric Dumazet out_dst_entries:
3537fc66f95cSEric Dumazet 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3538433d49c3SDaniel Lezcano out_kmem_cache:
3539f2fc6a54SBenjamin Thery 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3540433d49c3SDaniel Lezcano 	goto out;
35411da177e4SLinus Torvalds }
35421da177e4SLinus Torvalds 
35431da177e4SLinus Torvalds void ip6_route_cleanup(void)
35441da177e4SLinus Torvalds {
35458ed67789SDaniel Lezcano 	unregister_netdevice_notifier(&ip6_route_dev_notifier);
3546d189634eSThomas Graf 	unregister_pernet_subsys(&ip6_route_net_late_ops);
3547101367c2SThomas Graf 	fib6_rules_cleanup();
35481da177e4SLinus Torvalds 	xfrm6_fini();
35491da177e4SLinus Torvalds 	fib6_gc_cleanup();
3550c3426b47SDavid S. Miller 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
35518ed67789SDaniel Lezcano 	unregister_pernet_subsys(&ip6_route_net_ops);
355241bb78b4SXiaotian Feng 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3553f2fc6a54SBenjamin Thery 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
35541da177e4SLinus Torvalds }
3555