xref: /openbmc/linux/net/ipv6/route.c (revision 3d0f24a74e7957593a5622eb5c04ed6860dd8391)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  *	Linux INET6 implementation
31da177e4SLinus Torvalds  *	FIB front-end.
41da177e4SLinus Torvalds  *
51da177e4SLinus Torvalds  *	Authors:
61da177e4SLinus Torvalds  *	Pedro Roque		<roque@di.fc.ul.pt>
71da177e4SLinus Torvalds  *
81da177e4SLinus Torvalds  *	This program is free software; you can redistribute it and/or
91da177e4SLinus Torvalds  *      modify it under the terms of the GNU General Public License
101da177e4SLinus Torvalds  *      as published by the Free Software Foundation; either version
111da177e4SLinus Torvalds  *      2 of the License, or (at your option) any later version.
121da177e4SLinus Torvalds  */
131da177e4SLinus Torvalds 
141da177e4SLinus Torvalds /*	Changes:
151da177e4SLinus Torvalds  *
161da177e4SLinus Torvalds  *	YOSHIFUJI Hideaki @USAGI
171da177e4SLinus Torvalds  *		reworked default router selection.
181da177e4SLinus Torvalds  *		- respect outgoing interface
191da177e4SLinus Torvalds  *		- select from (probably) reachable routers (i.e.
201da177e4SLinus Torvalds  *		routers in REACHABLE, STALE, DELAY or PROBE states).
211da177e4SLinus Torvalds  *		- always select the same router if it is (probably)
221da177e4SLinus Torvalds  *		reachable.  otherwise, round-robin the list.
23c0bece9fSYOSHIFUJI Hideaki  *	Ville Nuorvala
24c0bece9fSYOSHIFUJI Hideaki  *		Fixed routing subtrees.
251da177e4SLinus Torvalds  */
261da177e4SLinus Torvalds 
274fc268d2SRandy Dunlap #include <linux/capability.h>
281da177e4SLinus Torvalds #include <linux/errno.h>
291da177e4SLinus Torvalds #include <linux/types.h>
301da177e4SLinus Torvalds #include <linux/times.h>
311da177e4SLinus Torvalds #include <linux/socket.h>
321da177e4SLinus Torvalds #include <linux/sockios.h>
331da177e4SLinus Torvalds #include <linux/net.h>
341da177e4SLinus Torvalds #include <linux/route.h>
351da177e4SLinus Torvalds #include <linux/netdevice.h>
361da177e4SLinus Torvalds #include <linux/in6.h>
377bc570c8SYOSHIFUJI Hideaki #include <linux/mroute6.h>
381da177e4SLinus Torvalds #include <linux/init.h>
391da177e4SLinus Torvalds #include <linux/if_arp.h>
401da177e4SLinus Torvalds #include <linux/proc_fs.h>
411da177e4SLinus Torvalds #include <linux/seq_file.h>
425b7c931dSDaniel Lezcano #include <linux/nsproxy.h>
43457c4cbcSEric W. Biederman #include <net/net_namespace.h>
441da177e4SLinus Torvalds #include <net/snmp.h>
451da177e4SLinus Torvalds #include <net/ipv6.h>
461da177e4SLinus Torvalds #include <net/ip6_fib.h>
471da177e4SLinus Torvalds #include <net/ip6_route.h>
481da177e4SLinus Torvalds #include <net/ndisc.h>
491da177e4SLinus Torvalds #include <net/addrconf.h>
501da177e4SLinus Torvalds #include <net/tcp.h>
511da177e4SLinus Torvalds #include <linux/rtnetlink.h>
521da177e4SLinus Torvalds #include <net/dst.h>
531da177e4SLinus Torvalds #include <net/xfrm.h>
548d71740cSTom Tucker #include <net/netevent.h>
5521713ebcSThomas Graf #include <net/netlink.h>
561da177e4SLinus Torvalds 
571da177e4SLinus Torvalds #include <asm/uaccess.h>
581da177e4SLinus Torvalds 
591da177e4SLinus Torvalds #ifdef CONFIG_SYSCTL
601da177e4SLinus Torvalds #include <linux/sysctl.h>
611da177e4SLinus Torvalds #endif
621da177e4SLinus Torvalds 
631da177e4SLinus Torvalds /* Set to 3 to get tracing. */
641da177e4SLinus Torvalds #define RT6_DEBUG 2
651da177e4SLinus Torvalds 
661da177e4SLinus Torvalds #if RT6_DEBUG >= 3
671da177e4SLinus Torvalds #define RDBG(x) printk x
681da177e4SLinus Torvalds #define RT6_TRACE(x...) printk(KERN_DEBUG x)
691da177e4SLinus Torvalds #else
701da177e4SLinus Torvalds #define RDBG(x)
711da177e4SLinus Torvalds #define RT6_TRACE(x...) do { ; } while (0)
721da177e4SLinus Torvalds #endif
731da177e4SLinus Torvalds 
74519fbd87SYOSHIFUJI Hideaki #define CLONE_OFFLINK_ROUTE 0
751da177e4SLinus Torvalds 
761da177e4SLinus Torvalds static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
771da177e4SLinus Torvalds static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
781da177e4SLinus Torvalds static struct dst_entry *ip6_negative_advice(struct dst_entry *);
791da177e4SLinus Torvalds static void		ip6_dst_destroy(struct dst_entry *);
801da177e4SLinus Torvalds static void		ip6_dst_ifdown(struct dst_entry *,
811da177e4SLinus Torvalds 				       struct net_device *dev, int how);
82569d3645SDaniel Lezcano static int		 ip6_dst_gc(struct dst_ops *ops);
831da177e4SLinus Torvalds 
841da177e4SLinus Torvalds static int		ip6_pkt_discard(struct sk_buff *skb);
851da177e4SLinus Torvalds static int		ip6_pkt_discard_out(struct sk_buff *skb);
861da177e4SLinus Torvalds static void		ip6_link_failure(struct sk_buff *skb);
871da177e4SLinus Torvalds static void		ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
881da177e4SLinus Torvalds 
8970ceb4f5SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTE_INFO
90efa2cea0SDaniel Lezcano static struct rt6_info *rt6_add_route_info(struct net *net,
91efa2cea0SDaniel Lezcano 					   struct in6_addr *prefix, int prefixlen,
9270ceb4f5SYOSHIFUJI Hideaki 					   struct in6_addr *gwaddr, int ifindex,
9370ceb4f5SYOSHIFUJI Hideaki 					   unsigned pref);
94efa2cea0SDaniel Lezcano static struct rt6_info *rt6_get_route_info(struct net *net,
95efa2cea0SDaniel Lezcano 					   struct in6_addr *prefix, int prefixlen,
9670ceb4f5SYOSHIFUJI Hideaki 					   struct in6_addr *gwaddr, int ifindex);
9770ceb4f5SYOSHIFUJI Hideaki #endif
9870ceb4f5SYOSHIFUJI Hideaki 
999a7ec3a9SDaniel Lezcano static struct dst_ops ip6_dst_ops_template = {
1001da177e4SLinus Torvalds 	.family			=	AF_INET6,
1011da177e4SLinus Torvalds 	.protocol		=	__constant_htons(ETH_P_IPV6),
1021da177e4SLinus Torvalds 	.gc			=	ip6_dst_gc,
1031da177e4SLinus Torvalds 	.gc_thresh		=	1024,
1041da177e4SLinus Torvalds 	.check			=	ip6_dst_check,
1051da177e4SLinus Torvalds 	.destroy		=	ip6_dst_destroy,
1061da177e4SLinus Torvalds 	.ifdown			=	ip6_dst_ifdown,
1071da177e4SLinus Torvalds 	.negative_advice	=	ip6_negative_advice,
1081da177e4SLinus Torvalds 	.link_failure		=	ip6_link_failure,
1091da177e4SLinus Torvalds 	.update_pmtu		=	ip6_rt_update_pmtu,
1101ac06e03SHerbert Xu 	.local_out		=	__ip6_local_out,
1111da177e4SLinus Torvalds 	.entry_size		=	sizeof(struct rt6_info),
112e2422970SEric Dumazet 	.entries		=	ATOMIC_INIT(0),
1131da177e4SLinus Torvalds };
1141da177e4SLinus Torvalds 
11514e50e57SDavid S. Miller static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
11614e50e57SDavid S. Miller {
11714e50e57SDavid S. Miller }
11814e50e57SDavid S. Miller 
11914e50e57SDavid S. Miller static struct dst_ops ip6_dst_blackhole_ops = {
12014e50e57SDavid S. Miller 	.family			=	AF_INET6,
12114e50e57SDavid S. Miller 	.protocol		=	__constant_htons(ETH_P_IPV6),
12214e50e57SDavid S. Miller 	.destroy		=	ip6_dst_destroy,
12314e50e57SDavid S. Miller 	.check			=	ip6_dst_check,
12414e50e57SDavid S. Miller 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
12514e50e57SDavid S. Miller 	.entry_size		=	sizeof(struct rt6_info),
126e2422970SEric Dumazet 	.entries		=	ATOMIC_INIT(0),
12714e50e57SDavid S. Miller };
12814e50e57SDavid S. Miller 
129bdb3289fSDaniel Lezcano static struct rt6_info ip6_null_entry_template = {
1301da177e4SLinus Torvalds 	.u = {
1311da177e4SLinus Torvalds 		.dst = {
1321da177e4SLinus Torvalds 			.__refcnt	= ATOMIC_INIT(1),
1331da177e4SLinus Torvalds 			.__use		= 1,
1341da177e4SLinus Torvalds 			.obsolete	= -1,
1351da177e4SLinus Torvalds 			.error		= -ENETUNREACH,
1361da177e4SLinus Torvalds 			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
1371da177e4SLinus Torvalds 			.input		= ip6_pkt_discard,
1381da177e4SLinus Torvalds 			.output		= ip6_pkt_discard_out,
1391da177e4SLinus Torvalds 		}
1401da177e4SLinus Torvalds 	},
1411da177e4SLinus Torvalds 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
1421da177e4SLinus Torvalds 	.rt6i_metric	= ~(u32) 0,
1431da177e4SLinus Torvalds 	.rt6i_ref	= ATOMIC_INIT(1),
1441da177e4SLinus Torvalds };
1451da177e4SLinus Torvalds 
146101367c2SThomas Graf #ifdef CONFIG_IPV6_MULTIPLE_TABLES
147101367c2SThomas Graf 
1486723ab54SDavid S. Miller static int ip6_pkt_prohibit(struct sk_buff *skb);
1496723ab54SDavid S. Miller static int ip6_pkt_prohibit_out(struct sk_buff *skb);
1506723ab54SDavid S. Miller 
151280a34c8SAdrian Bunk static struct rt6_info ip6_prohibit_entry_template = {
152101367c2SThomas Graf 	.u = {
153101367c2SThomas Graf 		.dst = {
154101367c2SThomas Graf 			.__refcnt	= ATOMIC_INIT(1),
155101367c2SThomas Graf 			.__use		= 1,
156101367c2SThomas Graf 			.obsolete	= -1,
157101367c2SThomas Graf 			.error		= -EACCES,
158101367c2SThomas Graf 			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
1599ce8ade0SThomas Graf 			.input		= ip6_pkt_prohibit,
1609ce8ade0SThomas Graf 			.output		= ip6_pkt_prohibit_out,
161101367c2SThomas Graf 		}
162101367c2SThomas Graf 	},
163101367c2SThomas Graf 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
164101367c2SThomas Graf 	.rt6i_metric	= ~(u32) 0,
165101367c2SThomas Graf 	.rt6i_ref	= ATOMIC_INIT(1),
166101367c2SThomas Graf };
167101367c2SThomas Graf 
168bdb3289fSDaniel Lezcano static struct rt6_info ip6_blk_hole_entry_template = {
169101367c2SThomas Graf 	.u = {
170101367c2SThomas Graf 		.dst = {
171101367c2SThomas Graf 			.__refcnt	= ATOMIC_INIT(1),
172101367c2SThomas Graf 			.__use		= 1,
173101367c2SThomas Graf 			.obsolete	= -1,
174101367c2SThomas Graf 			.error		= -EINVAL,
175101367c2SThomas Graf 			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
176352e512cSHerbert Xu 			.input		= dst_discard,
177352e512cSHerbert Xu 			.output		= dst_discard,
178101367c2SThomas Graf 		}
179101367c2SThomas Graf 	},
180101367c2SThomas Graf 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
181101367c2SThomas Graf 	.rt6i_metric	= ~(u32) 0,
182101367c2SThomas Graf 	.rt6i_ref	= ATOMIC_INIT(1),
183101367c2SThomas Graf };
184101367c2SThomas Graf 
185101367c2SThomas Graf #endif
186101367c2SThomas Graf 
1871da177e4SLinus Torvalds /* allocate dst with ip6_dst_ops */
188f2fc6a54SBenjamin Thery static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops)
1891da177e4SLinus Torvalds {
190f2fc6a54SBenjamin Thery 	return (struct rt6_info *)dst_alloc(ops);
1911da177e4SLinus Torvalds }
1921da177e4SLinus Torvalds 
1931da177e4SLinus Torvalds static void ip6_dst_destroy(struct dst_entry *dst)
1941da177e4SLinus Torvalds {
1951da177e4SLinus Torvalds 	struct rt6_info *rt = (struct rt6_info *)dst;
1961da177e4SLinus Torvalds 	struct inet6_dev *idev = rt->rt6i_idev;
1971da177e4SLinus Torvalds 
1981da177e4SLinus Torvalds 	if (idev != NULL) {
1991da177e4SLinus Torvalds 		rt->rt6i_idev = NULL;
2001da177e4SLinus Torvalds 		in6_dev_put(idev);
2011da177e4SLinus Torvalds 	}
2021da177e4SLinus Torvalds }
2031da177e4SLinus Torvalds 
2041da177e4SLinus Torvalds static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
2051da177e4SLinus Torvalds 			   int how)
2061da177e4SLinus Torvalds {
2071da177e4SLinus Torvalds 	struct rt6_info *rt = (struct rt6_info *)dst;
2081da177e4SLinus Torvalds 	struct inet6_dev *idev = rt->rt6i_idev;
2095a3e55d6SDenis V. Lunev 	struct net_device *loopback_dev =
210c346dca1SYOSHIFUJI Hideaki 		dev_net(dev)->loopback_dev;
2111da177e4SLinus Torvalds 
2125a3e55d6SDenis V. Lunev 	if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
2135a3e55d6SDenis V. Lunev 		struct inet6_dev *loopback_idev =
2145a3e55d6SDenis V. Lunev 			in6_dev_get(loopback_dev);
2151da177e4SLinus Torvalds 		if (loopback_idev != NULL) {
2161da177e4SLinus Torvalds 			rt->rt6i_idev = loopback_idev;
2171da177e4SLinus Torvalds 			in6_dev_put(idev);
2181da177e4SLinus Torvalds 		}
2191da177e4SLinus Torvalds 	}
2201da177e4SLinus Torvalds }
2211da177e4SLinus Torvalds 
2221da177e4SLinus Torvalds static __inline__ int rt6_check_expired(const struct rt6_info *rt)
2231da177e4SLinus Torvalds {
2241da177e4SLinus Torvalds 	return (rt->rt6i_flags & RTF_EXPIRES &&
2251da177e4SLinus Torvalds 		time_after(jiffies, rt->rt6i_expires));
2261da177e4SLinus Torvalds }
2271da177e4SLinus Torvalds 
228c71099acSThomas Graf static inline int rt6_need_strict(struct in6_addr *daddr)
229c71099acSThomas Graf {
230c71099acSThomas Graf 	return (ipv6_addr_type(daddr) &
2315ce83afaSYOSHIFUJI Hideaki 		(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK));
232c71099acSThomas Graf }
233c71099acSThomas Graf 
2341da177e4SLinus Torvalds /*
235c71099acSThomas Graf  *	Route lookup. Any table->tb6_lock is implied.
2361da177e4SLinus Torvalds  */
2371da177e4SLinus Torvalds 
2388ed67789SDaniel Lezcano static inline struct rt6_info *rt6_device_match(struct net *net,
2398ed67789SDaniel Lezcano 						    struct rt6_info *rt,
240dd3abc4eSYOSHIFUJI Hideaki 						    struct in6_addr *saddr,
2411da177e4SLinus Torvalds 						    int oif,
242d420895eSYOSHIFUJI Hideaki 						    int flags)
2431da177e4SLinus Torvalds {
2441da177e4SLinus Torvalds 	struct rt6_info *local = NULL;
2451da177e4SLinus Torvalds 	struct rt6_info *sprt;
2461da177e4SLinus Torvalds 
247dd3abc4eSYOSHIFUJI Hideaki 	if (!oif && ipv6_addr_any(saddr))
248dd3abc4eSYOSHIFUJI Hideaki 		goto out;
249dd3abc4eSYOSHIFUJI Hideaki 
2507cc48263SEric Dumazet 	for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
2511da177e4SLinus Torvalds 		struct net_device *dev = sprt->rt6i_dev;
252dd3abc4eSYOSHIFUJI Hideaki 
253dd3abc4eSYOSHIFUJI Hideaki 		if (oif) {
2541da177e4SLinus Torvalds 			if (dev->ifindex == oif)
2551da177e4SLinus Torvalds 				return sprt;
2561da177e4SLinus Torvalds 			if (dev->flags & IFF_LOOPBACK) {
2571da177e4SLinus Torvalds 				if (sprt->rt6i_idev == NULL ||
2581da177e4SLinus Torvalds 				    sprt->rt6i_idev->dev->ifindex != oif) {
259d420895eSYOSHIFUJI Hideaki 					if (flags & RT6_LOOKUP_F_IFACE && oif)
2601da177e4SLinus Torvalds 						continue;
2611da177e4SLinus Torvalds 					if (local && (!oif ||
2621da177e4SLinus Torvalds 						      local->rt6i_idev->dev->ifindex == oif))
2631da177e4SLinus Torvalds 						continue;
2641da177e4SLinus Torvalds 				}
2651da177e4SLinus Torvalds 				local = sprt;
2661da177e4SLinus Torvalds 			}
267dd3abc4eSYOSHIFUJI Hideaki 		} else {
268dd3abc4eSYOSHIFUJI Hideaki 			if (ipv6_chk_addr(net, saddr, dev,
269dd3abc4eSYOSHIFUJI Hideaki 					  flags & RT6_LOOKUP_F_IFACE))
270dd3abc4eSYOSHIFUJI Hideaki 				return sprt;
271dd3abc4eSYOSHIFUJI Hideaki 		}
2721da177e4SLinus Torvalds 	}
2731da177e4SLinus Torvalds 
274dd3abc4eSYOSHIFUJI Hideaki 	if (oif) {
2751da177e4SLinus Torvalds 		if (local)
2761da177e4SLinus Torvalds 			return local;
2771da177e4SLinus Torvalds 
278d420895eSYOSHIFUJI Hideaki 		if (flags & RT6_LOOKUP_F_IFACE)
2798ed67789SDaniel Lezcano 			return net->ipv6.ip6_null_entry;
2801da177e4SLinus Torvalds 	}
281dd3abc4eSYOSHIFUJI Hideaki out:
2821da177e4SLinus Torvalds 	return rt;
2831da177e4SLinus Torvalds }
2841da177e4SLinus Torvalds 
28527097255SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTER_PREF
28627097255SYOSHIFUJI Hideaki static void rt6_probe(struct rt6_info *rt)
28727097255SYOSHIFUJI Hideaki {
28827097255SYOSHIFUJI Hideaki 	struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
28927097255SYOSHIFUJI Hideaki 	/*
29027097255SYOSHIFUJI Hideaki 	 * Okay, this does not seem to be appropriate
29127097255SYOSHIFUJI Hideaki 	 * for now, however, we need to check if it
29227097255SYOSHIFUJI Hideaki 	 * is really so; aka Router Reachability Probing.
29327097255SYOSHIFUJI Hideaki 	 *
29427097255SYOSHIFUJI Hideaki 	 * Router Reachability Probe MUST be rate-limited
29527097255SYOSHIFUJI Hideaki 	 * to no more than one per minute.
29627097255SYOSHIFUJI Hideaki 	 */
29727097255SYOSHIFUJI Hideaki 	if (!neigh || (neigh->nud_state & NUD_VALID))
29827097255SYOSHIFUJI Hideaki 		return;
29927097255SYOSHIFUJI Hideaki 	read_lock_bh(&neigh->lock);
30027097255SYOSHIFUJI Hideaki 	if (!(neigh->nud_state & NUD_VALID) &&
30152e16356SYOSHIFUJI Hideaki 	    time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
30227097255SYOSHIFUJI Hideaki 		struct in6_addr mcaddr;
30327097255SYOSHIFUJI Hideaki 		struct in6_addr *target;
30427097255SYOSHIFUJI Hideaki 
30527097255SYOSHIFUJI Hideaki 		neigh->updated = jiffies;
30627097255SYOSHIFUJI Hideaki 		read_unlock_bh(&neigh->lock);
30727097255SYOSHIFUJI Hideaki 
30827097255SYOSHIFUJI Hideaki 		target = (struct in6_addr *)&neigh->primary_key;
30927097255SYOSHIFUJI Hideaki 		addrconf_addr_solict_mult(target, &mcaddr);
31027097255SYOSHIFUJI Hideaki 		ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
31127097255SYOSHIFUJI Hideaki 	} else
31227097255SYOSHIFUJI Hideaki 		read_unlock_bh(&neigh->lock);
31327097255SYOSHIFUJI Hideaki }
31427097255SYOSHIFUJI Hideaki #else
31527097255SYOSHIFUJI Hideaki static inline void rt6_probe(struct rt6_info *rt)
31627097255SYOSHIFUJI Hideaki {
31727097255SYOSHIFUJI Hideaki 	return;
31827097255SYOSHIFUJI Hideaki }
31927097255SYOSHIFUJI Hideaki #endif
32027097255SYOSHIFUJI Hideaki 
3211da177e4SLinus Torvalds /*
322554cfb7eSYOSHIFUJI Hideaki  * Default Router Selection (RFC 2461 6.3.6)
3231da177e4SLinus Torvalds  */
324b6f99a21SDave Jones static inline int rt6_check_dev(struct rt6_info *rt, int oif)
3251da177e4SLinus Torvalds {
326554cfb7eSYOSHIFUJI Hideaki 	struct net_device *dev = rt->rt6i_dev;
327161980f4SDavid S. Miller 	if (!oif || dev->ifindex == oif)
328554cfb7eSYOSHIFUJI Hideaki 		return 2;
329161980f4SDavid S. Miller 	if ((dev->flags & IFF_LOOPBACK) &&
330161980f4SDavid S. Miller 	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
331161980f4SDavid S. Miller 		return 1;
332554cfb7eSYOSHIFUJI Hideaki 	return 0;
3331da177e4SLinus Torvalds }
3341da177e4SLinus Torvalds 
335b6f99a21SDave Jones static inline int rt6_check_neigh(struct rt6_info *rt)
3361da177e4SLinus Torvalds {
337554cfb7eSYOSHIFUJI Hideaki 	struct neighbour *neigh = rt->rt6i_nexthop;
338398bcbebSYOSHIFUJI Hideaki 	int m;
3394d0c5911SYOSHIFUJI Hideaki 	if (rt->rt6i_flags & RTF_NONEXTHOP ||
3404d0c5911SYOSHIFUJI Hideaki 	    !(rt->rt6i_flags & RTF_GATEWAY))
3414d0c5911SYOSHIFUJI Hideaki 		m = 1;
3424d0c5911SYOSHIFUJI Hideaki 	else if (neigh) {
3431da177e4SLinus Torvalds 		read_lock_bh(&neigh->lock);
344554cfb7eSYOSHIFUJI Hideaki 		if (neigh->nud_state & NUD_VALID)
3454d0c5911SYOSHIFUJI Hideaki 			m = 2;
346398bcbebSYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTER_PREF
347398bcbebSYOSHIFUJI Hideaki 		else if (neigh->nud_state & NUD_FAILED)
348398bcbebSYOSHIFUJI Hideaki 			m = 0;
349398bcbebSYOSHIFUJI Hideaki #endif
350398bcbebSYOSHIFUJI Hideaki 		else
351ea73ee23SYOSHIFUJI Hideaki 			m = 1;
3521da177e4SLinus Torvalds 		read_unlock_bh(&neigh->lock);
353398bcbebSYOSHIFUJI Hideaki 	} else
354398bcbebSYOSHIFUJI Hideaki 		m = 0;
355554cfb7eSYOSHIFUJI Hideaki 	return m;
3561da177e4SLinus Torvalds }
3571da177e4SLinus Torvalds 
358554cfb7eSYOSHIFUJI Hideaki static int rt6_score_route(struct rt6_info *rt, int oif,
359554cfb7eSYOSHIFUJI Hideaki 			   int strict)
360554cfb7eSYOSHIFUJI Hideaki {
3614d0c5911SYOSHIFUJI Hideaki 	int m, n;
3624d0c5911SYOSHIFUJI Hideaki 
3634d0c5911SYOSHIFUJI Hideaki 	m = rt6_check_dev(rt, oif);
36477d16f45SYOSHIFUJI Hideaki 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
365554cfb7eSYOSHIFUJI Hideaki 		return -1;
366ebacaaa0SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTER_PREF
367ebacaaa0SYOSHIFUJI Hideaki 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
368ebacaaa0SYOSHIFUJI Hideaki #endif
3694d0c5911SYOSHIFUJI Hideaki 	n = rt6_check_neigh(rt);
370557e92efSYOSHIFUJI Hideaki 	if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
371554cfb7eSYOSHIFUJI Hideaki 		return -1;
372554cfb7eSYOSHIFUJI Hideaki 	return m;
373554cfb7eSYOSHIFUJI Hideaki }
374554cfb7eSYOSHIFUJI Hideaki 
375f11e6659SDavid S. Miller static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
376f11e6659SDavid S. Miller 				   int *mpri, struct rt6_info *match)
377554cfb7eSYOSHIFUJI Hideaki {
378554cfb7eSYOSHIFUJI Hideaki 	int m;
379554cfb7eSYOSHIFUJI Hideaki 
380554cfb7eSYOSHIFUJI Hideaki 	if (rt6_check_expired(rt))
381f11e6659SDavid S. Miller 		goto out;
382554cfb7eSYOSHIFUJI Hideaki 
383554cfb7eSYOSHIFUJI Hideaki 	m = rt6_score_route(rt, oif, strict);
384554cfb7eSYOSHIFUJI Hideaki 	if (m < 0)
385f11e6659SDavid S. Miller 		goto out;
386554cfb7eSYOSHIFUJI Hideaki 
387f11e6659SDavid S. Miller 	if (m > *mpri) {
388ea659e07SYOSHIFUJI Hideaki 		if (strict & RT6_LOOKUP_F_REACHABLE)
38927097255SYOSHIFUJI Hideaki 			rt6_probe(match);
390f11e6659SDavid S. Miller 		*mpri = m;
391554cfb7eSYOSHIFUJI Hideaki 		match = rt;
392ea659e07SYOSHIFUJI Hideaki 	} else if (strict & RT6_LOOKUP_F_REACHABLE) {
39327097255SYOSHIFUJI Hideaki 		rt6_probe(rt);
3941da177e4SLinus Torvalds 	}
395f11e6659SDavid S. Miller 
396f11e6659SDavid S. Miller out:
397f11e6659SDavid S. Miller 	return match;
3981da177e4SLinus Torvalds }
3991da177e4SLinus Torvalds 
400f11e6659SDavid S. Miller static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
401f11e6659SDavid S. Miller 				     struct rt6_info *rr_head,
402f11e6659SDavid S. Miller 				     u32 metric, int oif, int strict)
403f11e6659SDavid S. Miller {
404f11e6659SDavid S. Miller 	struct rt6_info *rt, *match;
405f11e6659SDavid S. Miller 	int mpri = -1;
406f11e6659SDavid S. Miller 
407f11e6659SDavid S. Miller 	match = NULL;
408f11e6659SDavid S. Miller 	for (rt = rr_head; rt && rt->rt6i_metric == metric;
409f11e6659SDavid S. Miller 	     rt = rt->u.dst.rt6_next)
410f11e6659SDavid S. Miller 		match = find_match(rt, oif, strict, &mpri, match);
411f11e6659SDavid S. Miller 	for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
412f11e6659SDavid S. Miller 	     rt = rt->u.dst.rt6_next)
413f11e6659SDavid S. Miller 		match = find_match(rt, oif, strict, &mpri, match);
414f11e6659SDavid S. Miller 
415f11e6659SDavid S. Miller 	return match;
416f11e6659SDavid S. Miller }
417f11e6659SDavid S. Miller 
418f11e6659SDavid S. Miller static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
419f11e6659SDavid S. Miller {
420f11e6659SDavid S. Miller 	struct rt6_info *match, *rt0;
4218ed67789SDaniel Lezcano 	struct net *net;
422f11e6659SDavid S. Miller 
423f11e6659SDavid S. Miller 	RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
4240dc47877SHarvey Harrison 		  __func__, fn->leaf, oif);
425f11e6659SDavid S. Miller 
426f11e6659SDavid S. Miller 	rt0 = fn->rr_ptr;
427f11e6659SDavid S. Miller 	if (!rt0)
428f11e6659SDavid S. Miller 		fn->rr_ptr = rt0 = fn->leaf;
429f11e6659SDavid S. Miller 
430f11e6659SDavid S. Miller 	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
431f11e6659SDavid S. Miller 
432554cfb7eSYOSHIFUJI Hideaki 	if (!match &&
433f11e6659SDavid S. Miller 	    (strict & RT6_LOOKUP_F_REACHABLE)) {
434f11e6659SDavid S. Miller 		struct rt6_info *next = rt0->u.dst.rt6_next;
435f11e6659SDavid S. Miller 
436554cfb7eSYOSHIFUJI Hideaki 		/* no entries matched; do round-robin */
437f11e6659SDavid S. Miller 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
438f11e6659SDavid S. Miller 			next = fn->leaf;
439f11e6659SDavid S. Miller 
440f11e6659SDavid S. Miller 		if (next != rt0)
441f11e6659SDavid S. Miller 			fn->rr_ptr = next;
442554cfb7eSYOSHIFUJI Hideaki 	}
443554cfb7eSYOSHIFUJI Hideaki 
444f11e6659SDavid S. Miller 	RT6_TRACE("%s() => %p\n",
4450dc47877SHarvey Harrison 		  __func__, match);
446554cfb7eSYOSHIFUJI Hideaki 
447c346dca1SYOSHIFUJI Hideaki 	net = dev_net(rt0->rt6i_dev);
4488ed67789SDaniel Lezcano 	return (match ? match : net->ipv6.ip6_null_entry);
4491da177e4SLinus Torvalds }
4501da177e4SLinus Torvalds 
45170ceb4f5SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTE_INFO
45270ceb4f5SYOSHIFUJI Hideaki int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
45370ceb4f5SYOSHIFUJI Hideaki 		  struct in6_addr *gwaddr)
45470ceb4f5SYOSHIFUJI Hideaki {
455c346dca1SYOSHIFUJI Hideaki 	struct net *net = dev_net(dev);
45670ceb4f5SYOSHIFUJI Hideaki 	struct route_info *rinfo = (struct route_info *) opt;
45770ceb4f5SYOSHIFUJI Hideaki 	struct in6_addr prefix_buf, *prefix;
45870ceb4f5SYOSHIFUJI Hideaki 	unsigned int pref;
4594bed72e4SYOSHIFUJI Hideaki 	unsigned long lifetime;
46070ceb4f5SYOSHIFUJI Hideaki 	struct rt6_info *rt;
46170ceb4f5SYOSHIFUJI Hideaki 
46270ceb4f5SYOSHIFUJI Hideaki 	if (len < sizeof(struct route_info)) {
46370ceb4f5SYOSHIFUJI Hideaki 		return -EINVAL;
46470ceb4f5SYOSHIFUJI Hideaki 	}
46570ceb4f5SYOSHIFUJI Hideaki 
46670ceb4f5SYOSHIFUJI Hideaki 	/* Sanity check for prefix_len and length */
46770ceb4f5SYOSHIFUJI Hideaki 	if (rinfo->length > 3) {
46870ceb4f5SYOSHIFUJI Hideaki 		return -EINVAL;
46970ceb4f5SYOSHIFUJI Hideaki 	} else if (rinfo->prefix_len > 128) {
47070ceb4f5SYOSHIFUJI Hideaki 		return -EINVAL;
47170ceb4f5SYOSHIFUJI Hideaki 	} else if (rinfo->prefix_len > 64) {
47270ceb4f5SYOSHIFUJI Hideaki 		if (rinfo->length < 2) {
47370ceb4f5SYOSHIFUJI Hideaki 			return -EINVAL;
47470ceb4f5SYOSHIFUJI Hideaki 		}
47570ceb4f5SYOSHIFUJI Hideaki 	} else if (rinfo->prefix_len > 0) {
47670ceb4f5SYOSHIFUJI Hideaki 		if (rinfo->length < 1) {
47770ceb4f5SYOSHIFUJI Hideaki 			return -EINVAL;
47870ceb4f5SYOSHIFUJI Hideaki 		}
47970ceb4f5SYOSHIFUJI Hideaki 	}
48070ceb4f5SYOSHIFUJI Hideaki 
48170ceb4f5SYOSHIFUJI Hideaki 	pref = rinfo->route_pref;
48270ceb4f5SYOSHIFUJI Hideaki 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
48370ceb4f5SYOSHIFUJI Hideaki 		pref = ICMPV6_ROUTER_PREF_MEDIUM;
48470ceb4f5SYOSHIFUJI Hideaki 
4854bed72e4SYOSHIFUJI Hideaki 	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
48670ceb4f5SYOSHIFUJI Hideaki 
48770ceb4f5SYOSHIFUJI Hideaki 	if (rinfo->length == 3)
48870ceb4f5SYOSHIFUJI Hideaki 		prefix = (struct in6_addr *)rinfo->prefix;
48970ceb4f5SYOSHIFUJI Hideaki 	else {
49070ceb4f5SYOSHIFUJI Hideaki 		/* this function is safe */
49170ceb4f5SYOSHIFUJI Hideaki 		ipv6_addr_prefix(&prefix_buf,
49270ceb4f5SYOSHIFUJI Hideaki 				 (struct in6_addr *)rinfo->prefix,
49370ceb4f5SYOSHIFUJI Hideaki 				 rinfo->prefix_len);
49470ceb4f5SYOSHIFUJI Hideaki 		prefix = &prefix_buf;
49570ceb4f5SYOSHIFUJI Hideaki 	}
49670ceb4f5SYOSHIFUJI Hideaki 
497efa2cea0SDaniel Lezcano 	rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
498efa2cea0SDaniel Lezcano 				dev->ifindex);
49970ceb4f5SYOSHIFUJI Hideaki 
50070ceb4f5SYOSHIFUJI Hideaki 	if (rt && !lifetime) {
501e0a1ad73SThomas Graf 		ip6_del_rt(rt);
50270ceb4f5SYOSHIFUJI Hideaki 		rt = NULL;
50370ceb4f5SYOSHIFUJI Hideaki 	}
50470ceb4f5SYOSHIFUJI Hideaki 
50570ceb4f5SYOSHIFUJI Hideaki 	if (!rt && lifetime)
506efa2cea0SDaniel Lezcano 		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
50770ceb4f5SYOSHIFUJI Hideaki 					pref);
50870ceb4f5SYOSHIFUJI Hideaki 	else if (rt)
50970ceb4f5SYOSHIFUJI Hideaki 		rt->rt6i_flags = RTF_ROUTEINFO |
51070ceb4f5SYOSHIFUJI Hideaki 				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
51170ceb4f5SYOSHIFUJI Hideaki 
51270ceb4f5SYOSHIFUJI Hideaki 	if (rt) {
5134bed72e4SYOSHIFUJI Hideaki 		if (!addrconf_finite_timeout(lifetime)) {
51470ceb4f5SYOSHIFUJI Hideaki 			rt->rt6i_flags &= ~RTF_EXPIRES;
51570ceb4f5SYOSHIFUJI Hideaki 		} else {
51670ceb4f5SYOSHIFUJI Hideaki 			rt->rt6i_expires = jiffies + HZ * lifetime;
51770ceb4f5SYOSHIFUJI Hideaki 			rt->rt6i_flags |= RTF_EXPIRES;
51870ceb4f5SYOSHIFUJI Hideaki 		}
51970ceb4f5SYOSHIFUJI Hideaki 		dst_release(&rt->u.dst);
52070ceb4f5SYOSHIFUJI Hideaki 	}
52170ceb4f5SYOSHIFUJI Hideaki 	return 0;
52270ceb4f5SYOSHIFUJI Hideaki }
52370ceb4f5SYOSHIFUJI Hideaki #endif
52470ceb4f5SYOSHIFUJI Hideaki 
5258ed67789SDaniel Lezcano #define BACKTRACK(__net, saddr)			\
526982f56f3SYOSHIFUJI Hideaki do { \
5278ed67789SDaniel Lezcano 	if (rt == __net->ipv6.ip6_null_entry) {	\
528982f56f3SYOSHIFUJI Hideaki 		struct fib6_node *pn; \
529e0eda7bbSVille Nuorvala 		while (1) { \
530982f56f3SYOSHIFUJI Hideaki 			if (fn->fn_flags & RTN_TL_ROOT) \
531c71099acSThomas Graf 				goto out; \
532982f56f3SYOSHIFUJI Hideaki 			pn = fn->parent; \
533982f56f3SYOSHIFUJI Hideaki 			if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
5348bce65b9SKim Nordlund 				fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
535982f56f3SYOSHIFUJI Hideaki 			else \
536982f56f3SYOSHIFUJI Hideaki 				fn = pn; \
537c71099acSThomas Graf 			if (fn->fn_flags & RTN_RTINFO) \
538c71099acSThomas Graf 				goto restart; \
539c71099acSThomas Graf 		} \
540982f56f3SYOSHIFUJI Hideaki 	} \
541982f56f3SYOSHIFUJI Hideaki } while(0)
542c71099acSThomas Graf 
5438ed67789SDaniel Lezcano static struct rt6_info *ip6_pol_route_lookup(struct net *net,
5448ed67789SDaniel Lezcano 					     struct fib6_table *table,
545c71099acSThomas Graf 					     struct flowi *fl, int flags)
5461da177e4SLinus Torvalds {
5471da177e4SLinus Torvalds 	struct fib6_node *fn;
5481da177e4SLinus Torvalds 	struct rt6_info *rt;
5491da177e4SLinus Torvalds 
550c71099acSThomas Graf 	read_lock_bh(&table->tb6_lock);
551c71099acSThomas Graf 	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
552c71099acSThomas Graf restart:
553c71099acSThomas Graf 	rt = fn->leaf;
554dd3abc4eSYOSHIFUJI Hideaki 	rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags);
5558ed67789SDaniel Lezcano 	BACKTRACK(net, &fl->fl6_src);
556c71099acSThomas Graf out:
55703f49f34SPavel Emelyanov 	dst_use(&rt->u.dst, jiffies);
558c71099acSThomas Graf 	read_unlock_bh(&table->tb6_lock);
5591da177e4SLinus Torvalds 	return rt;
560c71099acSThomas Graf 
561c71099acSThomas Graf }
562c71099acSThomas Graf 
5639acd9f3aSYOSHIFUJI Hideaki struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
5649acd9f3aSYOSHIFUJI Hideaki 			    const struct in6_addr *saddr, int oif, int strict)
565c71099acSThomas Graf {
566c71099acSThomas Graf 	struct flowi fl = {
567c71099acSThomas Graf 		.oif = oif,
568c71099acSThomas Graf 		.nl_u = {
569c71099acSThomas Graf 			.ip6_u = {
570c71099acSThomas Graf 				.daddr = *daddr,
571c71099acSThomas Graf 			},
572c71099acSThomas Graf 		},
573c71099acSThomas Graf 	};
574c71099acSThomas Graf 	struct dst_entry *dst;
57577d16f45SYOSHIFUJI Hideaki 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
576c71099acSThomas Graf 
577adaa70bbSThomas Graf 	if (saddr) {
578adaa70bbSThomas Graf 		memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
579adaa70bbSThomas Graf 		flags |= RT6_LOOKUP_F_HAS_SADDR;
580adaa70bbSThomas Graf 	}
581adaa70bbSThomas Graf 
582606a2b48SDaniel Lezcano 	dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
583c71099acSThomas Graf 	if (dst->error == 0)
584c71099acSThomas Graf 		return (struct rt6_info *) dst;
585c71099acSThomas Graf 
586c71099acSThomas Graf 	dst_release(dst);
587c71099acSThomas Graf 
5881da177e4SLinus Torvalds 	return NULL;
5891da177e4SLinus Torvalds }
5901da177e4SLinus Torvalds 
5917159039aSYOSHIFUJI Hideaki EXPORT_SYMBOL(rt6_lookup);
5927159039aSYOSHIFUJI Hideaki 
593c71099acSThomas Graf /* ip6_ins_rt is called with FREE table->tb6_lock.
5941da177e4SLinus Torvalds    It takes new route entry, the addition fails by any reason the
5951da177e4SLinus Torvalds    route is freed. In any case, if caller does not hold it, it may
5961da177e4SLinus Torvalds    be destroyed.
5971da177e4SLinus Torvalds  */
5981da177e4SLinus Torvalds 
59986872cb5SThomas Graf static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
6001da177e4SLinus Torvalds {
6011da177e4SLinus Torvalds 	int err;
602c71099acSThomas Graf 	struct fib6_table *table;
6031da177e4SLinus Torvalds 
604c71099acSThomas Graf 	table = rt->rt6i_table;
605c71099acSThomas Graf 	write_lock_bh(&table->tb6_lock);
60686872cb5SThomas Graf 	err = fib6_add(&table->tb6_root, rt, info);
607c71099acSThomas Graf 	write_unlock_bh(&table->tb6_lock);
6081da177e4SLinus Torvalds 
6091da177e4SLinus Torvalds 	return err;
6101da177e4SLinus Torvalds }
6111da177e4SLinus Torvalds 
61240e22e8fSThomas Graf int ip6_ins_rt(struct rt6_info *rt)
61340e22e8fSThomas Graf {
6144d1169c1SDenis V. Lunev 	struct nl_info info = {
615c346dca1SYOSHIFUJI Hideaki 		.nl_net = dev_net(rt->rt6i_dev),
6164d1169c1SDenis V. Lunev 	};
617528c4cebSDenis V. Lunev 	return __ip6_ins_rt(rt, &info);
61840e22e8fSThomas Graf }
61940e22e8fSThomas Graf 
62095a9a5baSYOSHIFUJI Hideaki static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
62195a9a5baSYOSHIFUJI Hideaki 				      struct in6_addr *saddr)
6221da177e4SLinus Torvalds {
6231da177e4SLinus Torvalds 	struct rt6_info *rt;
6241da177e4SLinus Torvalds 
6251da177e4SLinus Torvalds 	/*
6261da177e4SLinus Torvalds 	 *	Clone the route.
6271da177e4SLinus Torvalds 	 */
6281da177e4SLinus Torvalds 
6291da177e4SLinus Torvalds 	rt = ip6_rt_copy(ort);
6301da177e4SLinus Torvalds 
6311da177e4SLinus Torvalds 	if (rt) {
63258c4fb86SYOSHIFUJI Hideaki 		if (!(rt->rt6i_flags&RTF_GATEWAY)) {
63358c4fb86SYOSHIFUJI Hideaki 			if (rt->rt6i_dst.plen != 128 &&
63458c4fb86SYOSHIFUJI Hideaki 			    ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
63558c4fb86SYOSHIFUJI Hideaki 				rt->rt6i_flags |= RTF_ANYCAST;
6361da177e4SLinus Torvalds 			ipv6_addr_copy(&rt->rt6i_gateway, daddr);
63758c4fb86SYOSHIFUJI Hideaki 		}
6381da177e4SLinus Torvalds 
63958c4fb86SYOSHIFUJI Hideaki 		ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
6401da177e4SLinus Torvalds 		rt->rt6i_dst.plen = 128;
6411da177e4SLinus Torvalds 		rt->rt6i_flags |= RTF_CACHE;
6421da177e4SLinus Torvalds 		rt->u.dst.flags |= DST_HOST;
6431da177e4SLinus Torvalds 
6441da177e4SLinus Torvalds #ifdef CONFIG_IPV6_SUBTREES
6451da177e4SLinus Torvalds 		if (rt->rt6i_src.plen && saddr) {
6461da177e4SLinus Torvalds 			ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
6471da177e4SLinus Torvalds 			rt->rt6i_src.plen = 128;
6481da177e4SLinus Torvalds 		}
6491da177e4SLinus Torvalds #endif
6501da177e4SLinus Torvalds 
6511da177e4SLinus Torvalds 		rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
6521da177e4SLinus Torvalds 
65395a9a5baSYOSHIFUJI Hideaki 	}
6541da177e4SLinus Torvalds 
6551da177e4SLinus Torvalds 	return rt;
6561da177e4SLinus Torvalds }
65795a9a5baSYOSHIFUJI Hideaki 
658299d9939SYOSHIFUJI Hideaki static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
659299d9939SYOSHIFUJI Hideaki {
660299d9939SYOSHIFUJI Hideaki 	struct rt6_info *rt = ip6_rt_copy(ort);
661299d9939SYOSHIFUJI Hideaki 	if (rt) {
662299d9939SYOSHIFUJI Hideaki 		ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
663299d9939SYOSHIFUJI Hideaki 		rt->rt6i_dst.plen = 128;
664299d9939SYOSHIFUJI Hideaki 		rt->rt6i_flags |= RTF_CACHE;
665299d9939SYOSHIFUJI Hideaki 		rt->u.dst.flags |= DST_HOST;
666299d9939SYOSHIFUJI Hideaki 		rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
667299d9939SYOSHIFUJI Hideaki 	}
668299d9939SYOSHIFUJI Hideaki 	return rt;
669299d9939SYOSHIFUJI Hideaki }
670299d9939SYOSHIFUJI Hideaki 
6718ed67789SDaniel Lezcano static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
6728ce11e6aSAdrian Bunk 				      struct flowi *fl, int flags)
6731da177e4SLinus Torvalds {
6741da177e4SLinus Torvalds 	struct fib6_node *fn;
675519fbd87SYOSHIFUJI Hideaki 	struct rt6_info *rt, *nrt;
676c71099acSThomas Graf 	int strict = 0;
6771da177e4SLinus Torvalds 	int attempts = 3;
678519fbd87SYOSHIFUJI Hideaki 	int err;
67953b7997fSYOSHIFUJI Hideaki 	int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
6801da177e4SLinus Torvalds 
68177d16f45SYOSHIFUJI Hideaki 	strict |= flags & RT6_LOOKUP_F_IFACE;
6821da177e4SLinus Torvalds 
6831da177e4SLinus Torvalds relookup:
684c71099acSThomas Graf 	read_lock_bh(&table->tb6_lock);
6851da177e4SLinus Torvalds 
6868238dd06SYOSHIFUJI Hideaki restart_2:
687c71099acSThomas Graf 	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
6881da177e4SLinus Torvalds 
6891da177e4SLinus Torvalds restart:
6904acad72dSPavel Emelyanov 	rt = rt6_select(fn, oif, strict | reachable);
6918ed67789SDaniel Lezcano 
6928ed67789SDaniel Lezcano 	BACKTRACK(net, &fl->fl6_src);
6938ed67789SDaniel Lezcano 	if (rt == net->ipv6.ip6_null_entry ||
6948238dd06SYOSHIFUJI Hideaki 	    rt->rt6i_flags & RTF_CACHE)
6951da177e4SLinus Torvalds 		goto out;
6961da177e4SLinus Torvalds 
6971da177e4SLinus Torvalds 	dst_hold(&rt->u.dst);
698c71099acSThomas Graf 	read_unlock_bh(&table->tb6_lock);
6991da177e4SLinus Torvalds 
700519fbd87SYOSHIFUJI Hideaki 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
701e40cf353SYOSHIFUJI Hideaki 		nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
702519fbd87SYOSHIFUJI Hideaki 	else {
703519fbd87SYOSHIFUJI Hideaki #if CLONE_OFFLINK_ROUTE
704519fbd87SYOSHIFUJI Hideaki 		nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
705519fbd87SYOSHIFUJI Hideaki #else
706519fbd87SYOSHIFUJI Hideaki 		goto out2;
707519fbd87SYOSHIFUJI Hideaki #endif
708519fbd87SYOSHIFUJI Hideaki 	}
7091da177e4SLinus Torvalds 
7101da177e4SLinus Torvalds 	dst_release(&rt->u.dst);
7118ed67789SDaniel Lezcano 	rt = nrt ? : net->ipv6.ip6_null_entry;
7121da177e4SLinus Torvalds 
713e40cf353SYOSHIFUJI Hideaki 	dst_hold(&rt->u.dst);
714e40cf353SYOSHIFUJI Hideaki 	if (nrt) {
71540e22e8fSThomas Graf 		err = ip6_ins_rt(nrt);
716e40cf353SYOSHIFUJI Hideaki 		if (!err)
717e40cf353SYOSHIFUJI Hideaki 			goto out2;
718e40cf353SYOSHIFUJI Hideaki 	}
719e40cf353SYOSHIFUJI Hideaki 
720e40cf353SYOSHIFUJI Hideaki 	if (--attempts <= 0)
7211da177e4SLinus Torvalds 		goto out2;
7221da177e4SLinus Torvalds 
723519fbd87SYOSHIFUJI Hideaki 	/*
724c71099acSThomas Graf 	 * Race condition! In the gap, when table->tb6_lock was
725519fbd87SYOSHIFUJI Hideaki 	 * released someone could insert this route.  Relookup.
7261da177e4SLinus Torvalds 	 */
7271da177e4SLinus Torvalds 	dst_release(&rt->u.dst);
7281da177e4SLinus Torvalds 	goto relookup;
729e40cf353SYOSHIFUJI Hideaki 
730519fbd87SYOSHIFUJI Hideaki out:
7318238dd06SYOSHIFUJI Hideaki 	if (reachable) {
7328238dd06SYOSHIFUJI Hideaki 		reachable = 0;
7338238dd06SYOSHIFUJI Hideaki 		goto restart_2;
7348238dd06SYOSHIFUJI Hideaki 	}
735519fbd87SYOSHIFUJI Hideaki 	dst_hold(&rt->u.dst);
736c71099acSThomas Graf 	read_unlock_bh(&table->tb6_lock);
7371da177e4SLinus Torvalds out2:
7381da177e4SLinus Torvalds 	rt->u.dst.lastuse = jiffies;
7391da177e4SLinus Torvalds 	rt->u.dst.__use++;
740c71099acSThomas Graf 
741c71099acSThomas Graf 	return rt;
742c71099acSThomas Graf }
743c71099acSThomas Graf 
7448ed67789SDaniel Lezcano static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
7454acad72dSPavel Emelyanov 					    struct flowi *fl, int flags)
7464acad72dSPavel Emelyanov {
7478ed67789SDaniel Lezcano 	return ip6_pol_route(net, table, fl->iif, fl, flags);
7484acad72dSPavel Emelyanov }
7494acad72dSPavel Emelyanov 
750c71099acSThomas Graf void ip6_route_input(struct sk_buff *skb)
751c71099acSThomas Graf {
7520660e03fSArnaldo Carvalho de Melo 	struct ipv6hdr *iph = ipv6_hdr(skb);
753c346dca1SYOSHIFUJI Hideaki 	struct net *net = dev_net(skb->dev);
754adaa70bbSThomas Graf 	int flags = RT6_LOOKUP_F_HAS_SADDR;
755c71099acSThomas Graf 	struct flowi fl = {
756c71099acSThomas Graf 		.iif = skb->dev->ifindex,
757c71099acSThomas Graf 		.nl_u = {
758c71099acSThomas Graf 			.ip6_u = {
759c71099acSThomas Graf 				.daddr = iph->daddr,
760c71099acSThomas Graf 				.saddr = iph->saddr,
76190bcaf7bSAl Viro 				.flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
762c71099acSThomas Graf 			},
763c71099acSThomas Graf 		},
76447dcf0cbSThomas Graf 		.mark = skb->mark,
765c71099acSThomas Graf 		.proto = iph->nexthdr,
766c71099acSThomas Graf 	};
767adaa70bbSThomas Graf 
768adaa70bbSThomas Graf 	if (rt6_need_strict(&iph->daddr))
769adaa70bbSThomas Graf 		flags |= RT6_LOOKUP_F_IFACE;
770c71099acSThomas Graf 
7715578689aSDaniel Lezcano 	skb->dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input);
772c71099acSThomas Graf }
773c71099acSThomas Graf 
7748ed67789SDaniel Lezcano static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
775c71099acSThomas Graf 					     struct flowi *fl, int flags)
776c71099acSThomas Graf {
7778ed67789SDaniel Lezcano 	return ip6_pol_route(net, table, fl->oif, fl, flags);
778c71099acSThomas Graf }
779c71099acSThomas Graf 
7804591db4fSDaniel Lezcano struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
7814591db4fSDaniel Lezcano 				    struct flowi *fl)
782c71099acSThomas Graf {
783c71099acSThomas Graf 	int flags = 0;
784c71099acSThomas Graf 
785c71099acSThomas Graf 	if (rt6_need_strict(&fl->fl6_dst))
78677d16f45SYOSHIFUJI Hideaki 		flags |= RT6_LOOKUP_F_IFACE;
787c71099acSThomas Graf 
788adaa70bbSThomas Graf 	if (!ipv6_addr_any(&fl->fl6_src))
789adaa70bbSThomas Graf 		flags |= RT6_LOOKUP_F_HAS_SADDR;
7907cbca67cSYOSHIFUJI Hideaki 	else if (sk) {
7917cbca67cSYOSHIFUJI Hideaki 		unsigned int prefs = inet6_sk(sk)->srcprefs;
7927cbca67cSYOSHIFUJI Hideaki 		if (prefs & IPV6_PREFER_SRC_TMP)
7937cbca67cSYOSHIFUJI Hideaki 			flags |= RT6_LOOKUP_F_SRCPREF_TMP;
7947cbca67cSYOSHIFUJI Hideaki 		if (prefs & IPV6_PREFER_SRC_PUBLIC)
7957cbca67cSYOSHIFUJI Hideaki 			flags |= RT6_LOOKUP_F_SRCPREF_PUBLIC;
7967cbca67cSYOSHIFUJI Hideaki 		if (prefs & IPV6_PREFER_SRC_COA)
7977cbca67cSYOSHIFUJI Hideaki 			flags |= RT6_LOOKUP_F_SRCPREF_COA;
7987cbca67cSYOSHIFUJI Hideaki 	}
799adaa70bbSThomas Graf 
8004591db4fSDaniel Lezcano 	return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output);
8011da177e4SLinus Torvalds }
8021da177e4SLinus Torvalds 
8037159039aSYOSHIFUJI Hideaki EXPORT_SYMBOL(ip6_route_output);
8041da177e4SLinus Torvalds 
80514e50e57SDavid S. Miller int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
80614e50e57SDavid S. Miller {
80714e50e57SDavid S. Miller 	struct rt6_info *ort = (struct rt6_info *) *dstp;
80814e50e57SDavid S. Miller 	struct rt6_info *rt = (struct rt6_info *)
80914e50e57SDavid S. Miller 		dst_alloc(&ip6_dst_blackhole_ops);
81014e50e57SDavid S. Miller 	struct dst_entry *new = NULL;
81114e50e57SDavid S. Miller 
81214e50e57SDavid S. Miller 	if (rt) {
81314e50e57SDavid S. Miller 		new = &rt->u.dst;
81414e50e57SDavid S. Miller 
81514e50e57SDavid S. Miller 		atomic_set(&new->__refcnt, 1);
81614e50e57SDavid S. Miller 		new->__use = 1;
817352e512cSHerbert Xu 		new->input = dst_discard;
818352e512cSHerbert Xu 		new->output = dst_discard;
81914e50e57SDavid S. Miller 
82014e50e57SDavid S. Miller 		memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
82114e50e57SDavid S. Miller 		new->dev = ort->u.dst.dev;
82214e50e57SDavid S. Miller 		if (new->dev)
82314e50e57SDavid S. Miller 			dev_hold(new->dev);
82414e50e57SDavid S. Miller 		rt->rt6i_idev = ort->rt6i_idev;
82514e50e57SDavid S. Miller 		if (rt->rt6i_idev)
82614e50e57SDavid S. Miller 			in6_dev_hold(rt->rt6i_idev);
82714e50e57SDavid S. Miller 		rt->rt6i_expires = 0;
82814e50e57SDavid S. Miller 
82914e50e57SDavid S. Miller 		ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
83014e50e57SDavid S. Miller 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
83114e50e57SDavid S. Miller 		rt->rt6i_metric = 0;
83214e50e57SDavid S. Miller 
83314e50e57SDavid S. Miller 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
83414e50e57SDavid S. Miller #ifdef CONFIG_IPV6_SUBTREES
83514e50e57SDavid S. Miller 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
83614e50e57SDavid S. Miller #endif
83714e50e57SDavid S. Miller 
83814e50e57SDavid S. Miller 		dst_free(new);
83914e50e57SDavid S. Miller 	}
84014e50e57SDavid S. Miller 
84114e50e57SDavid S. Miller 	dst_release(*dstp);
84214e50e57SDavid S. Miller 	*dstp = new;
84314e50e57SDavid S. Miller 	return (new ? 0 : -ENOMEM);
84414e50e57SDavid S. Miller }
84514e50e57SDavid S. Miller EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
84614e50e57SDavid S. Miller 
8471da177e4SLinus Torvalds /*
8481da177e4SLinus Torvalds  *	Destination cache support functions
8491da177e4SLinus Torvalds  */
8501da177e4SLinus Torvalds 
8511da177e4SLinus Torvalds static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
8521da177e4SLinus Torvalds {
8531da177e4SLinus Torvalds 	struct rt6_info *rt;
8541da177e4SLinus Torvalds 
8551da177e4SLinus Torvalds 	rt = (struct rt6_info *) dst;
8561da177e4SLinus Torvalds 
8571da177e4SLinus Torvalds 	if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
8581da177e4SLinus Torvalds 		return dst;
8591da177e4SLinus Torvalds 
8601da177e4SLinus Torvalds 	return NULL;
8611da177e4SLinus Torvalds }
8621da177e4SLinus Torvalds 
8631da177e4SLinus Torvalds static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
8641da177e4SLinus Torvalds {
8651da177e4SLinus Torvalds 	struct rt6_info *rt = (struct rt6_info *) dst;
8661da177e4SLinus Torvalds 
8671da177e4SLinus Torvalds 	if (rt) {
8681da177e4SLinus Torvalds 		if (rt->rt6i_flags & RTF_CACHE)
869e0a1ad73SThomas Graf 			ip6_del_rt(rt);
8701da177e4SLinus Torvalds 		else
8711da177e4SLinus Torvalds 			dst_release(dst);
8721da177e4SLinus Torvalds 	}
8731da177e4SLinus Torvalds 	return NULL;
8741da177e4SLinus Torvalds }
8751da177e4SLinus Torvalds 
8761da177e4SLinus Torvalds static void ip6_link_failure(struct sk_buff *skb)
8771da177e4SLinus Torvalds {
8781da177e4SLinus Torvalds 	struct rt6_info *rt;
8791da177e4SLinus Torvalds 
8801da177e4SLinus Torvalds 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
8811da177e4SLinus Torvalds 
8821da177e4SLinus Torvalds 	rt = (struct rt6_info *) skb->dst;
8831da177e4SLinus Torvalds 	if (rt) {
8841da177e4SLinus Torvalds 		if (rt->rt6i_flags&RTF_CACHE) {
8851da177e4SLinus Torvalds 			dst_set_expires(&rt->u.dst, 0);
8861da177e4SLinus Torvalds 			rt->rt6i_flags |= RTF_EXPIRES;
8871da177e4SLinus Torvalds 		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
8881da177e4SLinus Torvalds 			rt->rt6i_node->fn_sernum = -1;
8891da177e4SLinus Torvalds 	}
8901da177e4SLinus Torvalds }
8911da177e4SLinus Torvalds 
8921da177e4SLinus Torvalds static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
8931da177e4SLinus Torvalds {
8941da177e4SLinus Torvalds 	struct rt6_info *rt6 = (struct rt6_info*)dst;
8951da177e4SLinus Torvalds 
8961da177e4SLinus Torvalds 	if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
8971da177e4SLinus Torvalds 		rt6->rt6i_flags |= RTF_MODIFIED;
8981da177e4SLinus Torvalds 		if (mtu < IPV6_MIN_MTU) {
8991da177e4SLinus Torvalds 			mtu = IPV6_MIN_MTU;
9001da177e4SLinus Torvalds 			dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
9011da177e4SLinus Torvalds 		}
9021da177e4SLinus Torvalds 		dst->metrics[RTAX_MTU-1] = mtu;
9038d71740cSTom Tucker 		call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
9041da177e4SLinus Torvalds 	}
9051da177e4SLinus Torvalds }
9061da177e4SLinus Torvalds 
9071da177e4SLinus Torvalds static int ipv6_get_mtu(struct net_device *dev);
9081da177e4SLinus Torvalds 
9095578689aSDaniel Lezcano static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu)
9101da177e4SLinus Torvalds {
9111da177e4SLinus Torvalds 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
9121da177e4SLinus Torvalds 
9135578689aSDaniel Lezcano 	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
9145578689aSDaniel Lezcano 		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
9151da177e4SLinus Torvalds 
9161da177e4SLinus Torvalds 	/*
9171da177e4SLinus Torvalds 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
9181da177e4SLinus Torvalds 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
9191da177e4SLinus Torvalds 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
9201da177e4SLinus Torvalds 	 * rely only on pmtu discovery"
9211da177e4SLinus Torvalds 	 */
9221da177e4SLinus Torvalds 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
9231da177e4SLinus Torvalds 		mtu = IPV6_MAXPLEN;
9241da177e4SLinus Torvalds 	return mtu;
9251da177e4SLinus Torvalds }
9261da177e4SLinus Torvalds 
9273b00944cSYOSHIFUJI Hideaki static struct dst_entry *icmp6_dst_gc_list;
9283b00944cSYOSHIFUJI Hideaki static DEFINE_SPINLOCK(icmp6_dst_lock);
9295d0bbeebSThomas Graf 
9303b00944cSYOSHIFUJI Hideaki struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
9311da177e4SLinus Torvalds 				  struct neighbour *neigh,
9329acd9f3aSYOSHIFUJI Hideaki 				  const struct in6_addr *addr)
9331da177e4SLinus Torvalds {
9341da177e4SLinus Torvalds 	struct rt6_info *rt;
9351da177e4SLinus Torvalds 	struct inet6_dev *idev = in6_dev_get(dev);
936c346dca1SYOSHIFUJI Hideaki 	struct net *net = dev_net(dev);
9371da177e4SLinus Torvalds 
9381da177e4SLinus Torvalds 	if (unlikely(idev == NULL))
9391da177e4SLinus Torvalds 		return NULL;
9401da177e4SLinus Torvalds 
941f2fc6a54SBenjamin Thery 	rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops);
9421da177e4SLinus Torvalds 	if (unlikely(rt == NULL)) {
9431da177e4SLinus Torvalds 		in6_dev_put(idev);
9441da177e4SLinus Torvalds 		goto out;
9451da177e4SLinus Torvalds 	}
9461da177e4SLinus Torvalds 
9471da177e4SLinus Torvalds 	dev_hold(dev);
9481da177e4SLinus Torvalds 	if (neigh)
9491da177e4SLinus Torvalds 		neigh_hold(neigh);
9501da177e4SLinus Torvalds 	else
9511da177e4SLinus Torvalds 		neigh = ndisc_get_neigh(dev, addr);
9521da177e4SLinus Torvalds 
9531da177e4SLinus Torvalds 	rt->rt6i_dev	  = dev;
9541da177e4SLinus Torvalds 	rt->rt6i_idev     = idev;
9551da177e4SLinus Torvalds 	rt->rt6i_nexthop  = neigh;
9561da177e4SLinus Torvalds 	atomic_set(&rt->u.dst.__refcnt, 1);
9571da177e4SLinus Torvalds 	rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
9581da177e4SLinus Torvalds 	rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
9595578689aSDaniel Lezcano 	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
9603b00944cSYOSHIFUJI Hideaki 	rt->u.dst.output  = ip6_output;
9611da177e4SLinus Torvalds 
9621da177e4SLinus Torvalds #if 0	/* there's no chance to use these for ndisc */
9631da177e4SLinus Torvalds 	rt->u.dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
9641da177e4SLinus Torvalds 				? DST_HOST
9651da177e4SLinus Torvalds 				: 0;
9661da177e4SLinus Torvalds 	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
9671da177e4SLinus Torvalds 	rt->rt6i_dst.plen = 128;
9681da177e4SLinus Torvalds #endif
9691da177e4SLinus Torvalds 
9703b00944cSYOSHIFUJI Hideaki 	spin_lock_bh(&icmp6_dst_lock);
9713b00944cSYOSHIFUJI Hideaki 	rt->u.dst.next = icmp6_dst_gc_list;
9723b00944cSYOSHIFUJI Hideaki 	icmp6_dst_gc_list = &rt->u.dst;
9733b00944cSYOSHIFUJI Hideaki 	spin_unlock_bh(&icmp6_dst_lock);
9741da177e4SLinus Torvalds 
9755578689aSDaniel Lezcano 	fib6_force_start_gc(net);
9761da177e4SLinus Torvalds 
9771da177e4SLinus Torvalds out:
97840aa7b90SYOSHIFUJI Hideaki 	return &rt->u.dst;
9791da177e4SLinus Torvalds }
9801da177e4SLinus Torvalds 
981*3d0f24a7SStephen Hemminger int icmp6_dst_gc(void)
9821da177e4SLinus Torvalds {
9831da177e4SLinus Torvalds 	struct dst_entry *dst, *next, **pprev;
984*3d0f24a7SStephen Hemminger 	int more = 0;
9851da177e4SLinus Torvalds 
9861da177e4SLinus Torvalds 	next = NULL;
9875d0bbeebSThomas Graf 
9883b00944cSYOSHIFUJI Hideaki 	spin_lock_bh(&icmp6_dst_lock);
9893b00944cSYOSHIFUJI Hideaki 	pprev = &icmp6_dst_gc_list;
9905d0bbeebSThomas Graf 
9911da177e4SLinus Torvalds 	while ((dst = *pprev) != NULL) {
9921da177e4SLinus Torvalds 		if (!atomic_read(&dst->__refcnt)) {
9931da177e4SLinus Torvalds 			*pprev = dst->next;
9941da177e4SLinus Torvalds 			dst_free(dst);
9951da177e4SLinus Torvalds 		} else {
9961da177e4SLinus Torvalds 			pprev = &dst->next;
997*3d0f24a7SStephen Hemminger 			++more;
9981da177e4SLinus Torvalds 		}
9991da177e4SLinus Torvalds 	}
10001da177e4SLinus Torvalds 
10013b00944cSYOSHIFUJI Hideaki 	spin_unlock_bh(&icmp6_dst_lock);
10025d0bbeebSThomas Graf 
1003*3d0f24a7SStephen Hemminger 	return more;
10041da177e4SLinus Torvalds }
10051da177e4SLinus Torvalds 
1006569d3645SDaniel Lezcano static int ip6_dst_gc(struct dst_ops *ops)
10071da177e4SLinus Torvalds {
10081da177e4SLinus Torvalds 	unsigned long now = jiffies;
10097019b78eSDaniel Lezcano 	struct net *net = ops->dst_net;
10107019b78eSDaniel Lezcano 	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
10117019b78eSDaniel Lezcano 	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
10127019b78eSDaniel Lezcano 	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
10137019b78eSDaniel Lezcano 	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
10147019b78eSDaniel Lezcano 	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
10151da177e4SLinus Torvalds 
10167019b78eSDaniel Lezcano 	if (time_after(rt_last_gc + rt_min_interval, now) &&
10177019b78eSDaniel Lezcano 	    atomic_read(&ops->entries) <= rt_max_size)
10181da177e4SLinus Torvalds 		goto out;
10191da177e4SLinus Torvalds 
10206891a346SBenjamin Thery 	net->ipv6.ip6_rt_gc_expire++;
10216891a346SBenjamin Thery 	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
10226891a346SBenjamin Thery 	net->ipv6.ip6_rt_last_gc = now;
10237019b78eSDaniel Lezcano 	if (atomic_read(&ops->entries) < ops->gc_thresh)
10247019b78eSDaniel Lezcano 		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
10251da177e4SLinus Torvalds out:
10267019b78eSDaniel Lezcano 	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
10277019b78eSDaniel Lezcano 	return (atomic_read(&ops->entries) > rt_max_size);
10281da177e4SLinus Torvalds }
10291da177e4SLinus Torvalds 
10301da177e4SLinus Torvalds /* Clean host part of a prefix. Not necessary in radix tree,
10311da177e4SLinus Torvalds    but results in cleaner routing tables.
10321da177e4SLinus Torvalds 
10331da177e4SLinus Torvalds    Remove it only when all the things will work!
10341da177e4SLinus Torvalds  */
10351da177e4SLinus Torvalds 
10361da177e4SLinus Torvalds static int ipv6_get_mtu(struct net_device *dev)
10371da177e4SLinus Torvalds {
10381da177e4SLinus Torvalds 	int mtu = IPV6_MIN_MTU;
10391da177e4SLinus Torvalds 	struct inet6_dev *idev;
10401da177e4SLinus Torvalds 
10411da177e4SLinus Torvalds 	idev = in6_dev_get(dev);
10421da177e4SLinus Torvalds 	if (idev) {
10431da177e4SLinus Torvalds 		mtu = idev->cnf.mtu6;
10441da177e4SLinus Torvalds 		in6_dev_put(idev);
10451da177e4SLinus Torvalds 	}
10461da177e4SLinus Torvalds 	return mtu;
10471da177e4SLinus Torvalds }
10481da177e4SLinus Torvalds 
10496b75d090SYOSHIFUJI Hideaki int ip6_dst_hoplimit(struct dst_entry *dst)
10501da177e4SLinus Torvalds {
10516b75d090SYOSHIFUJI Hideaki 	int hoplimit = dst_metric(dst, RTAX_HOPLIMIT);
10526b75d090SYOSHIFUJI Hideaki 	if (hoplimit < 0) {
10536b75d090SYOSHIFUJI Hideaki 		struct net_device *dev = dst->dev;
10546b75d090SYOSHIFUJI Hideaki 		struct inet6_dev *idev = in6_dev_get(dev);
10551da177e4SLinus Torvalds 		if (idev) {
10561da177e4SLinus Torvalds 			hoplimit = idev->cnf.hop_limit;
10571da177e4SLinus Torvalds 			in6_dev_put(idev);
10586b75d090SYOSHIFUJI Hideaki 		} else
105953b7997fSYOSHIFUJI Hideaki 			hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
10601da177e4SLinus Torvalds 	}
10611da177e4SLinus Torvalds 	return hoplimit;
10621da177e4SLinus Torvalds }
10631da177e4SLinus Torvalds 
10641da177e4SLinus Torvalds /*
10651da177e4SLinus Torvalds  *
10661da177e4SLinus Torvalds  */
10671da177e4SLinus Torvalds 
106886872cb5SThomas Graf int ip6_route_add(struct fib6_config *cfg)
10691da177e4SLinus Torvalds {
10701da177e4SLinus Torvalds 	int err;
10715578689aSDaniel Lezcano 	struct net *net = cfg->fc_nlinfo.nl_net;
10721da177e4SLinus Torvalds 	struct rt6_info *rt = NULL;
10731da177e4SLinus Torvalds 	struct net_device *dev = NULL;
10741da177e4SLinus Torvalds 	struct inet6_dev *idev = NULL;
1075c71099acSThomas Graf 	struct fib6_table *table;
10761da177e4SLinus Torvalds 	int addr_type;
10771da177e4SLinus Torvalds 
107886872cb5SThomas Graf 	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
10791da177e4SLinus Torvalds 		return -EINVAL;
10801da177e4SLinus Torvalds #ifndef CONFIG_IPV6_SUBTREES
108186872cb5SThomas Graf 	if (cfg->fc_src_len)
10821da177e4SLinus Torvalds 		return -EINVAL;
10831da177e4SLinus Torvalds #endif
108486872cb5SThomas Graf 	if (cfg->fc_ifindex) {
10851da177e4SLinus Torvalds 		err = -ENODEV;
10865578689aSDaniel Lezcano 		dev = dev_get_by_index(net, cfg->fc_ifindex);
10871da177e4SLinus Torvalds 		if (!dev)
10881da177e4SLinus Torvalds 			goto out;
10891da177e4SLinus Torvalds 		idev = in6_dev_get(dev);
10901da177e4SLinus Torvalds 		if (!idev)
10911da177e4SLinus Torvalds 			goto out;
10921da177e4SLinus Torvalds 	}
10931da177e4SLinus Torvalds 
109486872cb5SThomas Graf 	if (cfg->fc_metric == 0)
109586872cb5SThomas Graf 		cfg->fc_metric = IP6_RT_PRIO_USER;
10961da177e4SLinus Torvalds 
10975578689aSDaniel Lezcano 	table = fib6_new_table(net, cfg->fc_table);
1098c71099acSThomas Graf 	if (table == NULL) {
1099c71099acSThomas Graf 		err = -ENOBUFS;
1100c71099acSThomas Graf 		goto out;
1101c71099acSThomas Graf 	}
1102c71099acSThomas Graf 
1103f2fc6a54SBenjamin Thery 	rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops);
11041da177e4SLinus Torvalds 
11051da177e4SLinus Torvalds 	if (rt == NULL) {
11061da177e4SLinus Torvalds 		err = -ENOMEM;
11071da177e4SLinus Torvalds 		goto out;
11081da177e4SLinus Torvalds 	}
11091da177e4SLinus Torvalds 
11101da177e4SLinus Torvalds 	rt->u.dst.obsolete = -1;
11116f704992SYOSHIFUJI Hideaki 	rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
11126f704992SYOSHIFUJI Hideaki 				jiffies + clock_t_to_jiffies(cfg->fc_expires) :
11136f704992SYOSHIFUJI Hideaki 				0;
11141da177e4SLinus Torvalds 
111586872cb5SThomas Graf 	if (cfg->fc_protocol == RTPROT_UNSPEC)
111686872cb5SThomas Graf 		cfg->fc_protocol = RTPROT_BOOT;
111786872cb5SThomas Graf 	rt->rt6i_protocol = cfg->fc_protocol;
111886872cb5SThomas Graf 
111986872cb5SThomas Graf 	addr_type = ipv6_addr_type(&cfg->fc_dst);
11201da177e4SLinus Torvalds 
11211da177e4SLinus Torvalds 	if (addr_type & IPV6_ADDR_MULTICAST)
11221da177e4SLinus Torvalds 		rt->u.dst.input = ip6_mc_input;
11231da177e4SLinus Torvalds 	else
11241da177e4SLinus Torvalds 		rt->u.dst.input = ip6_forward;
11251da177e4SLinus Torvalds 
11261da177e4SLinus Torvalds 	rt->u.dst.output = ip6_output;
11271da177e4SLinus Torvalds 
112886872cb5SThomas Graf 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
112986872cb5SThomas Graf 	rt->rt6i_dst.plen = cfg->fc_dst_len;
11301da177e4SLinus Torvalds 	if (rt->rt6i_dst.plen == 128)
11311da177e4SLinus Torvalds 	       rt->u.dst.flags = DST_HOST;
11321da177e4SLinus Torvalds 
11331da177e4SLinus Torvalds #ifdef CONFIG_IPV6_SUBTREES
113486872cb5SThomas Graf 	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
113586872cb5SThomas Graf 	rt->rt6i_src.plen = cfg->fc_src_len;
11361da177e4SLinus Torvalds #endif
11371da177e4SLinus Torvalds 
113886872cb5SThomas Graf 	rt->rt6i_metric = cfg->fc_metric;
11391da177e4SLinus Torvalds 
11401da177e4SLinus Torvalds 	/* We cannot add true routes via loopback here,
11411da177e4SLinus Torvalds 	   they would result in kernel looping; promote them to reject routes
11421da177e4SLinus Torvalds 	 */
114386872cb5SThomas Graf 	if ((cfg->fc_flags & RTF_REJECT) ||
11441da177e4SLinus Torvalds 	    (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
11451da177e4SLinus Torvalds 		/* hold loopback dev/idev if we haven't done so. */
11465578689aSDaniel Lezcano 		if (dev != net->loopback_dev) {
11471da177e4SLinus Torvalds 			if (dev) {
11481da177e4SLinus Torvalds 				dev_put(dev);
11491da177e4SLinus Torvalds 				in6_dev_put(idev);
11501da177e4SLinus Torvalds 			}
11515578689aSDaniel Lezcano 			dev = net->loopback_dev;
11521da177e4SLinus Torvalds 			dev_hold(dev);
11531da177e4SLinus Torvalds 			idev = in6_dev_get(dev);
11541da177e4SLinus Torvalds 			if (!idev) {
11551da177e4SLinus Torvalds 				err = -ENODEV;
11561da177e4SLinus Torvalds 				goto out;
11571da177e4SLinus Torvalds 			}
11581da177e4SLinus Torvalds 		}
11591da177e4SLinus Torvalds 		rt->u.dst.output = ip6_pkt_discard_out;
11601da177e4SLinus Torvalds 		rt->u.dst.input = ip6_pkt_discard;
11611da177e4SLinus Torvalds 		rt->u.dst.error = -ENETUNREACH;
11621da177e4SLinus Torvalds 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
11631da177e4SLinus Torvalds 		goto install_route;
11641da177e4SLinus Torvalds 	}
11651da177e4SLinus Torvalds 
116686872cb5SThomas Graf 	if (cfg->fc_flags & RTF_GATEWAY) {
11671da177e4SLinus Torvalds 		struct in6_addr *gw_addr;
11681da177e4SLinus Torvalds 		int gwa_type;
11691da177e4SLinus Torvalds 
117086872cb5SThomas Graf 		gw_addr = &cfg->fc_gateway;
117186872cb5SThomas Graf 		ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
11721da177e4SLinus Torvalds 		gwa_type = ipv6_addr_type(gw_addr);
11731da177e4SLinus Torvalds 
11741da177e4SLinus Torvalds 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
11751da177e4SLinus Torvalds 			struct rt6_info *grt;
11761da177e4SLinus Torvalds 
11771da177e4SLinus Torvalds 			/* IPv6 strictly inhibits using not link-local
11781da177e4SLinus Torvalds 			   addresses as nexthop address.
11791da177e4SLinus Torvalds 			   Otherwise, router will not able to send redirects.
11801da177e4SLinus Torvalds 			   It is very good, but in some (rare!) circumstances
11811da177e4SLinus Torvalds 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
11821da177e4SLinus Torvalds 			   some exceptions. --ANK
11831da177e4SLinus Torvalds 			 */
11841da177e4SLinus Torvalds 			err = -EINVAL;
11851da177e4SLinus Torvalds 			if (!(gwa_type&IPV6_ADDR_UNICAST))
11861da177e4SLinus Torvalds 				goto out;
11871da177e4SLinus Torvalds 
11885578689aSDaniel Lezcano 			grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
11891da177e4SLinus Torvalds 
11901da177e4SLinus Torvalds 			err = -EHOSTUNREACH;
11911da177e4SLinus Torvalds 			if (grt == NULL)
11921da177e4SLinus Torvalds 				goto out;
11931da177e4SLinus Torvalds 			if (dev) {
11941da177e4SLinus Torvalds 				if (dev != grt->rt6i_dev) {
11951da177e4SLinus Torvalds 					dst_release(&grt->u.dst);
11961da177e4SLinus Torvalds 					goto out;
11971da177e4SLinus Torvalds 				}
11981da177e4SLinus Torvalds 			} else {
11991da177e4SLinus Torvalds 				dev = grt->rt6i_dev;
12001da177e4SLinus Torvalds 				idev = grt->rt6i_idev;
12011da177e4SLinus Torvalds 				dev_hold(dev);
12021da177e4SLinus Torvalds 				in6_dev_hold(grt->rt6i_idev);
12031da177e4SLinus Torvalds 			}
12041da177e4SLinus Torvalds 			if (!(grt->rt6i_flags&RTF_GATEWAY))
12051da177e4SLinus Torvalds 				err = 0;
12061da177e4SLinus Torvalds 			dst_release(&grt->u.dst);
12071da177e4SLinus Torvalds 
12081da177e4SLinus Torvalds 			if (err)
12091da177e4SLinus Torvalds 				goto out;
12101da177e4SLinus Torvalds 		}
12111da177e4SLinus Torvalds 		err = -EINVAL;
12121da177e4SLinus Torvalds 		if (dev == NULL || (dev->flags&IFF_LOOPBACK))
12131da177e4SLinus Torvalds 			goto out;
12141da177e4SLinus Torvalds 	}
12151da177e4SLinus Torvalds 
12161da177e4SLinus Torvalds 	err = -ENODEV;
12171da177e4SLinus Torvalds 	if (dev == NULL)
12181da177e4SLinus Torvalds 		goto out;
12191da177e4SLinus Torvalds 
122086872cb5SThomas Graf 	if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
12211da177e4SLinus Torvalds 		rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
12221da177e4SLinus Torvalds 		if (IS_ERR(rt->rt6i_nexthop)) {
12231da177e4SLinus Torvalds 			err = PTR_ERR(rt->rt6i_nexthop);
12241da177e4SLinus Torvalds 			rt->rt6i_nexthop = NULL;
12251da177e4SLinus Torvalds 			goto out;
12261da177e4SLinus Torvalds 		}
12271da177e4SLinus Torvalds 	}
12281da177e4SLinus Torvalds 
122986872cb5SThomas Graf 	rt->rt6i_flags = cfg->fc_flags;
12301da177e4SLinus Torvalds 
12311da177e4SLinus Torvalds install_route:
123286872cb5SThomas Graf 	if (cfg->fc_mx) {
123386872cb5SThomas Graf 		struct nlattr *nla;
123486872cb5SThomas Graf 		int remaining;
12351da177e4SLinus Torvalds 
123686872cb5SThomas Graf 		nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
12378f4c1f9bSThomas Graf 			int type = nla_type(nla);
123886872cb5SThomas Graf 
123986872cb5SThomas Graf 			if (type) {
124086872cb5SThomas Graf 				if (type > RTAX_MAX) {
12411da177e4SLinus Torvalds 					err = -EINVAL;
12421da177e4SLinus Torvalds 					goto out;
12431da177e4SLinus Torvalds 				}
124486872cb5SThomas Graf 
124586872cb5SThomas Graf 				rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
12461da177e4SLinus Torvalds 			}
12471da177e4SLinus Torvalds 		}
12481da177e4SLinus Torvalds 	}
12491da177e4SLinus Torvalds 
12505ffc02a1SSatoru SATOH 	if (dst_metric(&rt->u.dst, RTAX_HOPLIMIT) == 0)
12511da177e4SLinus Torvalds 		rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
12525ffc02a1SSatoru SATOH 	if (!dst_metric(&rt->u.dst, RTAX_MTU))
12531da177e4SLinus Torvalds 		rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
12545ffc02a1SSatoru SATOH 	if (!dst_metric(&rt->u.dst, RTAX_ADVMSS))
12555578689aSDaniel Lezcano 		rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
12561da177e4SLinus Torvalds 	rt->u.dst.dev = dev;
12571da177e4SLinus Torvalds 	rt->rt6i_idev = idev;
1258c71099acSThomas Graf 	rt->rt6i_table = table;
125963152fc0SDaniel Lezcano 
1260c346dca1SYOSHIFUJI Hideaki 	cfg->fc_nlinfo.nl_net = dev_net(dev);
126163152fc0SDaniel Lezcano 
126286872cb5SThomas Graf 	return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
12631da177e4SLinus Torvalds 
12641da177e4SLinus Torvalds out:
12651da177e4SLinus Torvalds 	if (dev)
12661da177e4SLinus Torvalds 		dev_put(dev);
12671da177e4SLinus Torvalds 	if (idev)
12681da177e4SLinus Torvalds 		in6_dev_put(idev);
12691da177e4SLinus Torvalds 	if (rt)
127040aa7b90SYOSHIFUJI Hideaki 		dst_free(&rt->u.dst);
12711da177e4SLinus Torvalds 	return err;
12721da177e4SLinus Torvalds }
12731da177e4SLinus Torvalds 
127486872cb5SThomas Graf static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
12751da177e4SLinus Torvalds {
12761da177e4SLinus Torvalds 	int err;
1277c71099acSThomas Graf 	struct fib6_table *table;
1278c346dca1SYOSHIFUJI Hideaki 	struct net *net = dev_net(rt->rt6i_dev);
12791da177e4SLinus Torvalds 
12808ed67789SDaniel Lezcano 	if (rt == net->ipv6.ip6_null_entry)
12816c813a72SPatrick McHardy 		return -ENOENT;
12826c813a72SPatrick McHardy 
1283c71099acSThomas Graf 	table = rt->rt6i_table;
1284c71099acSThomas Graf 	write_lock_bh(&table->tb6_lock);
12851da177e4SLinus Torvalds 
128686872cb5SThomas Graf 	err = fib6_del(rt, info);
12871da177e4SLinus Torvalds 	dst_release(&rt->u.dst);
12881da177e4SLinus Torvalds 
1289c71099acSThomas Graf 	write_unlock_bh(&table->tb6_lock);
12901da177e4SLinus Torvalds 
12911da177e4SLinus Torvalds 	return err;
12921da177e4SLinus Torvalds }
12931da177e4SLinus Torvalds 
1294e0a1ad73SThomas Graf int ip6_del_rt(struct rt6_info *rt)
1295e0a1ad73SThomas Graf {
12964d1169c1SDenis V. Lunev 	struct nl_info info = {
1297c346dca1SYOSHIFUJI Hideaki 		.nl_net = dev_net(rt->rt6i_dev),
12984d1169c1SDenis V. Lunev 	};
1299528c4cebSDenis V. Lunev 	return __ip6_del_rt(rt, &info);
1300e0a1ad73SThomas Graf }
1301e0a1ad73SThomas Graf 
130286872cb5SThomas Graf static int ip6_route_del(struct fib6_config *cfg)
13031da177e4SLinus Torvalds {
1304c71099acSThomas Graf 	struct fib6_table *table;
13051da177e4SLinus Torvalds 	struct fib6_node *fn;
13061da177e4SLinus Torvalds 	struct rt6_info *rt;
13071da177e4SLinus Torvalds 	int err = -ESRCH;
13081da177e4SLinus Torvalds 
13095578689aSDaniel Lezcano 	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1310c71099acSThomas Graf 	if (table == NULL)
1311c71099acSThomas Graf 		return err;
13121da177e4SLinus Torvalds 
1313c71099acSThomas Graf 	read_lock_bh(&table->tb6_lock);
1314c71099acSThomas Graf 
1315c71099acSThomas Graf 	fn = fib6_locate(&table->tb6_root,
131686872cb5SThomas Graf 			 &cfg->fc_dst, cfg->fc_dst_len,
131786872cb5SThomas Graf 			 &cfg->fc_src, cfg->fc_src_len);
13181da177e4SLinus Torvalds 
13191da177e4SLinus Torvalds 	if (fn) {
13207cc48263SEric Dumazet 		for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
132186872cb5SThomas Graf 			if (cfg->fc_ifindex &&
13221da177e4SLinus Torvalds 			    (rt->rt6i_dev == NULL ||
132386872cb5SThomas Graf 			     rt->rt6i_dev->ifindex != cfg->fc_ifindex))
13241da177e4SLinus Torvalds 				continue;
132586872cb5SThomas Graf 			if (cfg->fc_flags & RTF_GATEWAY &&
132686872cb5SThomas Graf 			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
13271da177e4SLinus Torvalds 				continue;
132886872cb5SThomas Graf 			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
13291da177e4SLinus Torvalds 				continue;
13301da177e4SLinus Torvalds 			dst_hold(&rt->u.dst);
1331c71099acSThomas Graf 			read_unlock_bh(&table->tb6_lock);
13321da177e4SLinus Torvalds 
133386872cb5SThomas Graf 			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
13341da177e4SLinus Torvalds 		}
13351da177e4SLinus Torvalds 	}
1336c71099acSThomas Graf 	read_unlock_bh(&table->tb6_lock);
13371da177e4SLinus Torvalds 
13381da177e4SLinus Torvalds 	return err;
13391da177e4SLinus Torvalds }
13401da177e4SLinus Torvalds 
13411da177e4SLinus Torvalds /*
13421da177e4SLinus Torvalds  *	Handle redirects
13431da177e4SLinus Torvalds  */
1344a6279458SYOSHIFUJI Hideaki struct ip6rd_flowi {
1345a6279458SYOSHIFUJI Hideaki 	struct flowi fl;
1346a6279458SYOSHIFUJI Hideaki 	struct in6_addr gateway;
1347a6279458SYOSHIFUJI Hideaki };
13481da177e4SLinus Torvalds 
13498ed67789SDaniel Lezcano static struct rt6_info *__ip6_route_redirect(struct net *net,
13508ed67789SDaniel Lezcano 					     struct fib6_table *table,
1351a6279458SYOSHIFUJI Hideaki 					     struct flowi *fl,
1352a6279458SYOSHIFUJI Hideaki 					     int flags)
1353a6279458SYOSHIFUJI Hideaki {
1354a6279458SYOSHIFUJI Hideaki 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1355a6279458SYOSHIFUJI Hideaki 	struct rt6_info *rt;
1356a6279458SYOSHIFUJI Hideaki 	struct fib6_node *fn;
1357c71099acSThomas Graf 
1358e843b9e1SYOSHIFUJI Hideaki 	/*
1359e843b9e1SYOSHIFUJI Hideaki 	 * Get the "current" route for this destination and
1360e843b9e1SYOSHIFUJI Hideaki 	 * check if the redirect has come from approriate router.
1361e843b9e1SYOSHIFUJI Hideaki 	 *
1362e843b9e1SYOSHIFUJI Hideaki 	 * RFC 2461 specifies that redirects should only be
1363e843b9e1SYOSHIFUJI Hideaki 	 * accepted if they come from the nexthop to the target.
1364e843b9e1SYOSHIFUJI Hideaki 	 * Due to the way the routes are chosen, this notion
1365e843b9e1SYOSHIFUJI Hideaki 	 * is a bit fuzzy and one might need to check all possible
1366e843b9e1SYOSHIFUJI Hideaki 	 * routes.
1367e843b9e1SYOSHIFUJI Hideaki 	 */
13681da177e4SLinus Torvalds 
1369c71099acSThomas Graf 	read_lock_bh(&table->tb6_lock);
1370a6279458SYOSHIFUJI Hideaki 	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1371e843b9e1SYOSHIFUJI Hideaki restart:
13727cc48263SEric Dumazet 	for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
13731da177e4SLinus Torvalds 		/*
13741da177e4SLinus Torvalds 		 * Current route is on-link; redirect is always invalid.
13751da177e4SLinus Torvalds 		 *
13761da177e4SLinus Torvalds 		 * Seems, previous statement is not true. It could
13771da177e4SLinus Torvalds 		 * be node, which looks for us as on-link (f.e. proxy ndisc)
13781da177e4SLinus Torvalds 		 * But then router serving it might decide, that we should
13791da177e4SLinus Torvalds 		 * know truth 8)8) --ANK (980726).
13801da177e4SLinus Torvalds 		 */
1381e843b9e1SYOSHIFUJI Hideaki 		if (rt6_check_expired(rt))
1382e843b9e1SYOSHIFUJI Hideaki 			continue;
13831da177e4SLinus Torvalds 		if (!(rt->rt6i_flags & RTF_GATEWAY))
1384e843b9e1SYOSHIFUJI Hideaki 			continue;
1385a6279458SYOSHIFUJI Hideaki 		if (fl->oif != rt->rt6i_dev->ifindex)
1386e843b9e1SYOSHIFUJI Hideaki 			continue;
1387a6279458SYOSHIFUJI Hideaki 		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1388e843b9e1SYOSHIFUJI Hideaki 			continue;
1389e843b9e1SYOSHIFUJI Hideaki 		break;
1390e843b9e1SYOSHIFUJI Hideaki 	}
1391a6279458SYOSHIFUJI Hideaki 
1392cb15d9c2SYOSHIFUJI Hideaki 	if (!rt)
13938ed67789SDaniel Lezcano 		rt = net->ipv6.ip6_null_entry;
13948ed67789SDaniel Lezcano 	BACKTRACK(net, &fl->fl6_src);
1395cb15d9c2SYOSHIFUJI Hideaki out:
1396a6279458SYOSHIFUJI Hideaki 	dst_hold(&rt->u.dst);
1397a6279458SYOSHIFUJI Hideaki 
1398c71099acSThomas Graf 	read_unlock_bh(&table->tb6_lock);
13991da177e4SLinus Torvalds 
1400a6279458SYOSHIFUJI Hideaki 	return rt;
1401a6279458SYOSHIFUJI Hideaki };
1402a6279458SYOSHIFUJI Hideaki 
1403a6279458SYOSHIFUJI Hideaki static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1404a6279458SYOSHIFUJI Hideaki 					   struct in6_addr *src,
1405a6279458SYOSHIFUJI Hideaki 					   struct in6_addr *gateway,
1406a6279458SYOSHIFUJI Hideaki 					   struct net_device *dev)
1407a6279458SYOSHIFUJI Hideaki {
1408adaa70bbSThomas Graf 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1409c346dca1SYOSHIFUJI Hideaki 	struct net *net = dev_net(dev);
1410a6279458SYOSHIFUJI Hideaki 	struct ip6rd_flowi rdfl = {
1411a6279458SYOSHIFUJI Hideaki 		.fl = {
1412a6279458SYOSHIFUJI Hideaki 			.oif = dev->ifindex,
1413a6279458SYOSHIFUJI Hideaki 			.nl_u = {
1414a6279458SYOSHIFUJI Hideaki 				.ip6_u = {
1415a6279458SYOSHIFUJI Hideaki 					.daddr = *dest,
1416a6279458SYOSHIFUJI Hideaki 					.saddr = *src,
1417a6279458SYOSHIFUJI Hideaki 				},
1418a6279458SYOSHIFUJI Hideaki 			},
1419a6279458SYOSHIFUJI Hideaki 		},
1420a6279458SYOSHIFUJI Hideaki 		.gateway = *gateway,
1421a6279458SYOSHIFUJI Hideaki 	};
1422adaa70bbSThomas Graf 
1423adaa70bbSThomas Graf 	if (rt6_need_strict(dest))
1424adaa70bbSThomas Graf 		flags |= RT6_LOOKUP_F_IFACE;
1425a6279458SYOSHIFUJI Hideaki 
14265578689aSDaniel Lezcano 	return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl,
142758f09b78SDaniel Lezcano 						   flags, __ip6_route_redirect);
1428a6279458SYOSHIFUJI Hideaki }
1429a6279458SYOSHIFUJI Hideaki 
1430a6279458SYOSHIFUJI Hideaki void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1431a6279458SYOSHIFUJI Hideaki 		  struct in6_addr *saddr,
1432a6279458SYOSHIFUJI Hideaki 		  struct neighbour *neigh, u8 *lladdr, int on_link)
1433a6279458SYOSHIFUJI Hideaki {
1434a6279458SYOSHIFUJI Hideaki 	struct rt6_info *rt, *nrt = NULL;
1435a6279458SYOSHIFUJI Hideaki 	struct netevent_redirect netevent;
1436c346dca1SYOSHIFUJI Hideaki 	struct net *net = dev_net(neigh->dev);
1437a6279458SYOSHIFUJI Hideaki 
1438a6279458SYOSHIFUJI Hideaki 	rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1439a6279458SYOSHIFUJI Hideaki 
14408ed67789SDaniel Lezcano 	if (rt == net->ipv6.ip6_null_entry) {
14411da177e4SLinus Torvalds 		if (net_ratelimit())
14421da177e4SLinus Torvalds 			printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
14431da177e4SLinus Torvalds 			       "for redirect target\n");
1444a6279458SYOSHIFUJI Hideaki 		goto out;
14451da177e4SLinus Torvalds 	}
14461da177e4SLinus Torvalds 
14471da177e4SLinus Torvalds 	/*
14481da177e4SLinus Torvalds 	 *	We have finally decided to accept it.
14491da177e4SLinus Torvalds 	 */
14501da177e4SLinus Torvalds 
14511da177e4SLinus Torvalds 	neigh_update(neigh, lladdr, NUD_STALE,
14521da177e4SLinus Torvalds 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
14531da177e4SLinus Torvalds 		     NEIGH_UPDATE_F_OVERRIDE|
14541da177e4SLinus Torvalds 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
14551da177e4SLinus Torvalds 				     NEIGH_UPDATE_F_ISROUTER))
14561da177e4SLinus Torvalds 		     );
14571da177e4SLinus Torvalds 
14581da177e4SLinus Torvalds 	/*
14591da177e4SLinus Torvalds 	 * Redirect received -> path was valid.
14601da177e4SLinus Torvalds 	 * Look, redirects are sent only in response to data packets,
14611da177e4SLinus Torvalds 	 * so that this nexthop apparently is reachable. --ANK
14621da177e4SLinus Torvalds 	 */
14631da177e4SLinus Torvalds 	dst_confirm(&rt->u.dst);
14641da177e4SLinus Torvalds 
14651da177e4SLinus Torvalds 	/* Duplicate redirect: silently ignore. */
14661da177e4SLinus Torvalds 	if (neigh == rt->u.dst.neighbour)
14671da177e4SLinus Torvalds 		goto out;
14681da177e4SLinus Torvalds 
14691da177e4SLinus Torvalds 	nrt = ip6_rt_copy(rt);
14701da177e4SLinus Torvalds 	if (nrt == NULL)
14711da177e4SLinus Torvalds 		goto out;
14721da177e4SLinus Torvalds 
14731da177e4SLinus Torvalds 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
14741da177e4SLinus Torvalds 	if (on_link)
14751da177e4SLinus Torvalds 		nrt->rt6i_flags &= ~RTF_GATEWAY;
14761da177e4SLinus Torvalds 
14771da177e4SLinus Torvalds 	ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
14781da177e4SLinus Torvalds 	nrt->rt6i_dst.plen = 128;
14791da177e4SLinus Torvalds 	nrt->u.dst.flags |= DST_HOST;
14801da177e4SLinus Torvalds 
14811da177e4SLinus Torvalds 	ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
14821da177e4SLinus Torvalds 	nrt->rt6i_nexthop = neigh_clone(neigh);
14831da177e4SLinus Torvalds 	/* Reset pmtu, it may be better */
14841da177e4SLinus Torvalds 	nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1485c346dca1SYOSHIFUJI Hideaki 	nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dev_net(neigh->dev),
14865578689aSDaniel Lezcano 							dst_mtu(&nrt->u.dst));
14871da177e4SLinus Torvalds 
148840e22e8fSThomas Graf 	if (ip6_ins_rt(nrt))
14891da177e4SLinus Torvalds 		goto out;
14901da177e4SLinus Torvalds 
14918d71740cSTom Tucker 	netevent.old = &rt->u.dst;
14928d71740cSTom Tucker 	netevent.new = &nrt->u.dst;
14938d71740cSTom Tucker 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
14948d71740cSTom Tucker 
14951da177e4SLinus Torvalds 	if (rt->rt6i_flags&RTF_CACHE) {
1496e0a1ad73SThomas Graf 		ip6_del_rt(rt);
14971da177e4SLinus Torvalds 		return;
14981da177e4SLinus Torvalds 	}
14991da177e4SLinus Torvalds 
15001da177e4SLinus Torvalds out:
15011da177e4SLinus Torvalds 	dst_release(&rt->u.dst);
15021da177e4SLinus Torvalds 	return;
15031da177e4SLinus Torvalds }
15041da177e4SLinus Torvalds 
15051da177e4SLinus Torvalds /*
15061da177e4SLinus Torvalds  *	Handle ICMP "packet too big" messages
15071da177e4SLinus Torvalds  *	i.e. Path MTU discovery
15081da177e4SLinus Torvalds  */
15091da177e4SLinus Torvalds 
15101da177e4SLinus Torvalds void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
15111da177e4SLinus Torvalds 			struct net_device *dev, u32 pmtu)
15121da177e4SLinus Torvalds {
15131da177e4SLinus Torvalds 	struct rt6_info *rt, *nrt;
1514c346dca1SYOSHIFUJI Hideaki 	struct net *net = dev_net(dev);
15151da177e4SLinus Torvalds 	int allfrag = 0;
15161da177e4SLinus Torvalds 
15175578689aSDaniel Lezcano 	rt = rt6_lookup(net, daddr, saddr, dev->ifindex, 0);
15181da177e4SLinus Torvalds 	if (rt == NULL)
15191da177e4SLinus Torvalds 		return;
15201da177e4SLinus Torvalds 
15211da177e4SLinus Torvalds 	if (pmtu >= dst_mtu(&rt->u.dst))
15221da177e4SLinus Torvalds 		goto out;
15231da177e4SLinus Torvalds 
15241da177e4SLinus Torvalds 	if (pmtu < IPV6_MIN_MTU) {
15251da177e4SLinus Torvalds 		/*
15261da177e4SLinus Torvalds 		 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
15271da177e4SLinus Torvalds 		 * MTU (1280) and a fragment header should always be included
15281da177e4SLinus Torvalds 		 * after a node receiving Too Big message reporting PMTU is
15291da177e4SLinus Torvalds 		 * less than the IPv6 Minimum Link MTU.
15301da177e4SLinus Torvalds 		 */
15311da177e4SLinus Torvalds 		pmtu = IPV6_MIN_MTU;
15321da177e4SLinus Torvalds 		allfrag = 1;
15331da177e4SLinus Torvalds 	}
15341da177e4SLinus Torvalds 
15351da177e4SLinus Torvalds 	/* New mtu received -> path was valid.
15361da177e4SLinus Torvalds 	   They are sent only in response to data packets,
15371da177e4SLinus Torvalds 	   so that this nexthop apparently is reachable. --ANK
15381da177e4SLinus Torvalds 	 */
15391da177e4SLinus Torvalds 	dst_confirm(&rt->u.dst);
15401da177e4SLinus Torvalds 
15411da177e4SLinus Torvalds 	/* Host route. If it is static, it would be better
15421da177e4SLinus Torvalds 	   not to override it, but add new one, so that
15431da177e4SLinus Torvalds 	   when cache entry will expire old pmtu
15441da177e4SLinus Torvalds 	   would return automatically.
15451da177e4SLinus Torvalds 	 */
15461da177e4SLinus Torvalds 	if (rt->rt6i_flags & RTF_CACHE) {
15471da177e4SLinus Torvalds 		rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
15481da177e4SLinus Torvalds 		if (allfrag)
15491da177e4SLinus Torvalds 			rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
15505578689aSDaniel Lezcano 		dst_set_expires(&rt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
15511da177e4SLinus Torvalds 		rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
15521da177e4SLinus Torvalds 		goto out;
15531da177e4SLinus Torvalds 	}
15541da177e4SLinus Torvalds 
15551da177e4SLinus Torvalds 	/* Network route.
15561da177e4SLinus Torvalds 	   Two cases are possible:
15571da177e4SLinus Torvalds 	   1. It is connected route. Action: COW
15581da177e4SLinus Torvalds 	   2. It is gatewayed route or NONEXTHOP route. Action: clone it.
15591da177e4SLinus Torvalds 	 */
1560d5315b50SYOSHIFUJI Hideaki 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1561a1e78363SYOSHIFUJI Hideaki 		nrt = rt6_alloc_cow(rt, daddr, saddr);
1562d5315b50SYOSHIFUJI Hideaki 	else
1563d5315b50SYOSHIFUJI Hideaki 		nrt = rt6_alloc_clone(rt, daddr);
1564a1e78363SYOSHIFUJI Hideaki 
1565d5315b50SYOSHIFUJI Hideaki 	if (nrt) {
15661da177e4SLinus Torvalds 		nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
15671da177e4SLinus Torvalds 		if (allfrag)
15681da177e4SLinus Torvalds 			nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1569a1e78363SYOSHIFUJI Hideaki 
15701da177e4SLinus Torvalds 		/* According to RFC 1981, detecting PMTU increase shouldn't be
1571a1e78363SYOSHIFUJI Hideaki 		 * happened within 5 mins, the recommended timer is 10 mins.
1572a1e78363SYOSHIFUJI Hideaki 		 * Here this route expiration time is set to ip6_rt_mtu_expires
1573a1e78363SYOSHIFUJI Hideaki 		 * which is 10 mins. After 10 mins the decreased pmtu is expired
1574a1e78363SYOSHIFUJI Hideaki 		 * and detecting PMTU increase will be automatically happened.
15751da177e4SLinus Torvalds 		 */
15765578689aSDaniel Lezcano 		dst_set_expires(&nrt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
15771da177e4SLinus Torvalds 		nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1578a1e78363SYOSHIFUJI Hideaki 
157940e22e8fSThomas Graf 		ip6_ins_rt(nrt);
15801da177e4SLinus Torvalds 	}
15811da177e4SLinus Torvalds out:
15821da177e4SLinus Torvalds 	dst_release(&rt->u.dst);
15831da177e4SLinus Torvalds }
15841da177e4SLinus Torvalds 
15851da177e4SLinus Torvalds /*
15861da177e4SLinus Torvalds  *	Misc support functions
15871da177e4SLinus Torvalds  */
15881da177e4SLinus Torvalds 
15891da177e4SLinus Torvalds static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
15901da177e4SLinus Torvalds {
1591c346dca1SYOSHIFUJI Hideaki 	struct net *net = dev_net(ort->rt6i_dev);
1592f2fc6a54SBenjamin Thery 	struct rt6_info *rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops);
15931da177e4SLinus Torvalds 
15941da177e4SLinus Torvalds 	if (rt) {
15951da177e4SLinus Torvalds 		rt->u.dst.input = ort->u.dst.input;
15961da177e4SLinus Torvalds 		rt->u.dst.output = ort->u.dst.output;
15971da177e4SLinus Torvalds 
15981da177e4SLinus Torvalds 		memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
159922e1e4d8SVille Nuorvala 		rt->u.dst.error = ort->u.dst.error;
16001da177e4SLinus Torvalds 		rt->u.dst.dev = ort->u.dst.dev;
16011da177e4SLinus Torvalds 		if (rt->u.dst.dev)
16021da177e4SLinus Torvalds 			dev_hold(rt->u.dst.dev);
16031da177e4SLinus Torvalds 		rt->rt6i_idev = ort->rt6i_idev;
16041da177e4SLinus Torvalds 		if (rt->rt6i_idev)
16051da177e4SLinus Torvalds 			in6_dev_hold(rt->rt6i_idev);
16061da177e4SLinus Torvalds 		rt->u.dst.lastuse = jiffies;
16071da177e4SLinus Torvalds 		rt->rt6i_expires = 0;
16081da177e4SLinus Torvalds 
16091da177e4SLinus Torvalds 		ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
16101da177e4SLinus Torvalds 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
16111da177e4SLinus Torvalds 		rt->rt6i_metric = 0;
16121da177e4SLinus Torvalds 
16131da177e4SLinus Torvalds 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
16141da177e4SLinus Torvalds #ifdef CONFIG_IPV6_SUBTREES
16151da177e4SLinus Torvalds 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
16161da177e4SLinus Torvalds #endif
1617c71099acSThomas Graf 		rt->rt6i_table = ort->rt6i_table;
16181da177e4SLinus Torvalds 	}
16191da177e4SLinus Torvalds 	return rt;
16201da177e4SLinus Torvalds }
16211da177e4SLinus Torvalds 
162270ceb4f5SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTE_INFO
1623efa2cea0SDaniel Lezcano static struct rt6_info *rt6_get_route_info(struct net *net,
1624efa2cea0SDaniel Lezcano 					   struct in6_addr *prefix, int prefixlen,
162570ceb4f5SYOSHIFUJI Hideaki 					   struct in6_addr *gwaddr, int ifindex)
162670ceb4f5SYOSHIFUJI Hideaki {
162770ceb4f5SYOSHIFUJI Hideaki 	struct fib6_node *fn;
162870ceb4f5SYOSHIFUJI Hideaki 	struct rt6_info *rt = NULL;
1629c71099acSThomas Graf 	struct fib6_table *table;
163070ceb4f5SYOSHIFUJI Hideaki 
1631efa2cea0SDaniel Lezcano 	table = fib6_get_table(net, RT6_TABLE_INFO);
1632c71099acSThomas Graf 	if (table == NULL)
1633c71099acSThomas Graf 		return NULL;
1634c71099acSThomas Graf 
1635c71099acSThomas Graf 	write_lock_bh(&table->tb6_lock);
1636c71099acSThomas Graf 	fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
163770ceb4f5SYOSHIFUJI Hideaki 	if (!fn)
163870ceb4f5SYOSHIFUJI Hideaki 		goto out;
163970ceb4f5SYOSHIFUJI Hideaki 
16407cc48263SEric Dumazet 	for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
164170ceb4f5SYOSHIFUJI Hideaki 		if (rt->rt6i_dev->ifindex != ifindex)
164270ceb4f5SYOSHIFUJI Hideaki 			continue;
164370ceb4f5SYOSHIFUJI Hideaki 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
164470ceb4f5SYOSHIFUJI Hideaki 			continue;
164570ceb4f5SYOSHIFUJI Hideaki 		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
164670ceb4f5SYOSHIFUJI Hideaki 			continue;
164770ceb4f5SYOSHIFUJI Hideaki 		dst_hold(&rt->u.dst);
164870ceb4f5SYOSHIFUJI Hideaki 		break;
164970ceb4f5SYOSHIFUJI Hideaki 	}
165070ceb4f5SYOSHIFUJI Hideaki out:
1651c71099acSThomas Graf 	write_unlock_bh(&table->tb6_lock);
165270ceb4f5SYOSHIFUJI Hideaki 	return rt;
165370ceb4f5SYOSHIFUJI Hideaki }
165470ceb4f5SYOSHIFUJI Hideaki 
1655efa2cea0SDaniel Lezcano static struct rt6_info *rt6_add_route_info(struct net *net,
1656efa2cea0SDaniel Lezcano 					   struct in6_addr *prefix, int prefixlen,
165770ceb4f5SYOSHIFUJI Hideaki 					   struct in6_addr *gwaddr, int ifindex,
165870ceb4f5SYOSHIFUJI Hideaki 					   unsigned pref)
165970ceb4f5SYOSHIFUJI Hideaki {
166086872cb5SThomas Graf 	struct fib6_config cfg = {
166186872cb5SThomas Graf 		.fc_table	= RT6_TABLE_INFO,
1662238fc7eaSRami Rosen 		.fc_metric	= IP6_RT_PRIO_USER,
166386872cb5SThomas Graf 		.fc_ifindex	= ifindex,
166486872cb5SThomas Graf 		.fc_dst_len	= prefixlen,
166586872cb5SThomas Graf 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
166686872cb5SThomas Graf 				  RTF_UP | RTF_PREF(pref),
1667efa2cea0SDaniel Lezcano 		.fc_nlinfo.pid = 0,
1668efa2cea0SDaniel Lezcano 		.fc_nlinfo.nlh = NULL,
1669efa2cea0SDaniel Lezcano 		.fc_nlinfo.nl_net = net,
167086872cb5SThomas Graf 	};
167170ceb4f5SYOSHIFUJI Hideaki 
167286872cb5SThomas Graf 	ipv6_addr_copy(&cfg.fc_dst, prefix);
167386872cb5SThomas Graf 	ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
167486872cb5SThomas Graf 
1675e317da96SYOSHIFUJI Hideaki 	/* We should treat it as a default route if prefix length is 0. */
1676e317da96SYOSHIFUJI Hideaki 	if (!prefixlen)
167786872cb5SThomas Graf 		cfg.fc_flags |= RTF_DEFAULT;
167870ceb4f5SYOSHIFUJI Hideaki 
167986872cb5SThomas Graf 	ip6_route_add(&cfg);
168070ceb4f5SYOSHIFUJI Hideaki 
1681efa2cea0SDaniel Lezcano 	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
168270ceb4f5SYOSHIFUJI Hideaki }
168370ceb4f5SYOSHIFUJI Hideaki #endif
168470ceb4f5SYOSHIFUJI Hideaki 
16851da177e4SLinus Torvalds struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
16861da177e4SLinus Torvalds {
16871da177e4SLinus Torvalds 	struct rt6_info *rt;
1688c71099acSThomas Graf 	struct fib6_table *table;
16891da177e4SLinus Torvalds 
1690c346dca1SYOSHIFUJI Hideaki 	table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1691c71099acSThomas Graf 	if (table == NULL)
1692c71099acSThomas Graf 		return NULL;
16931da177e4SLinus Torvalds 
1694c71099acSThomas Graf 	write_lock_bh(&table->tb6_lock);
16957cc48263SEric Dumazet 	for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
16961da177e4SLinus Torvalds 		if (dev == rt->rt6i_dev &&
1697045927ffSYOSHIFUJI Hideaki 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
16981da177e4SLinus Torvalds 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
16991da177e4SLinus Torvalds 			break;
17001da177e4SLinus Torvalds 	}
17011da177e4SLinus Torvalds 	if (rt)
17021da177e4SLinus Torvalds 		dst_hold(&rt->u.dst);
1703c71099acSThomas Graf 	write_unlock_bh(&table->tb6_lock);
17041da177e4SLinus Torvalds 	return rt;
17051da177e4SLinus Torvalds }
17061da177e4SLinus Torvalds 
17071da177e4SLinus Torvalds struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1708ebacaaa0SYOSHIFUJI Hideaki 				     struct net_device *dev,
1709ebacaaa0SYOSHIFUJI Hideaki 				     unsigned int pref)
17101da177e4SLinus Torvalds {
171186872cb5SThomas Graf 	struct fib6_config cfg = {
171286872cb5SThomas Graf 		.fc_table	= RT6_TABLE_DFLT,
1713238fc7eaSRami Rosen 		.fc_metric	= IP6_RT_PRIO_USER,
171486872cb5SThomas Graf 		.fc_ifindex	= dev->ifindex,
171586872cb5SThomas Graf 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
171686872cb5SThomas Graf 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
17175578689aSDaniel Lezcano 		.fc_nlinfo.pid = 0,
17185578689aSDaniel Lezcano 		.fc_nlinfo.nlh = NULL,
1719c346dca1SYOSHIFUJI Hideaki 		.fc_nlinfo.nl_net = dev_net(dev),
172086872cb5SThomas Graf 	};
17211da177e4SLinus Torvalds 
172286872cb5SThomas Graf 	ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
17231da177e4SLinus Torvalds 
172486872cb5SThomas Graf 	ip6_route_add(&cfg);
17251da177e4SLinus Torvalds 
17261da177e4SLinus Torvalds 	return rt6_get_dflt_router(gwaddr, dev);
17271da177e4SLinus Torvalds }
17281da177e4SLinus Torvalds 
17297b4da532SDaniel Lezcano void rt6_purge_dflt_routers(struct net *net)
17301da177e4SLinus Torvalds {
17311da177e4SLinus Torvalds 	struct rt6_info *rt;
1732c71099acSThomas Graf 	struct fib6_table *table;
1733c71099acSThomas Graf 
1734c71099acSThomas Graf 	/* NOTE: Keep consistent with rt6_get_dflt_router */
17357b4da532SDaniel Lezcano 	table = fib6_get_table(net, RT6_TABLE_DFLT);
1736c71099acSThomas Graf 	if (table == NULL)
1737c71099acSThomas Graf 		return;
17381da177e4SLinus Torvalds 
17391da177e4SLinus Torvalds restart:
1740c71099acSThomas Graf 	read_lock_bh(&table->tb6_lock);
17417cc48263SEric Dumazet 	for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
17421da177e4SLinus Torvalds 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
17431da177e4SLinus Torvalds 			dst_hold(&rt->u.dst);
1744c71099acSThomas Graf 			read_unlock_bh(&table->tb6_lock);
1745e0a1ad73SThomas Graf 			ip6_del_rt(rt);
17461da177e4SLinus Torvalds 			goto restart;
17471da177e4SLinus Torvalds 		}
17481da177e4SLinus Torvalds 	}
1749c71099acSThomas Graf 	read_unlock_bh(&table->tb6_lock);
17501da177e4SLinus Torvalds }
17511da177e4SLinus Torvalds 
17525578689aSDaniel Lezcano static void rtmsg_to_fib6_config(struct net *net,
17535578689aSDaniel Lezcano 				 struct in6_rtmsg *rtmsg,
175486872cb5SThomas Graf 				 struct fib6_config *cfg)
175586872cb5SThomas Graf {
175686872cb5SThomas Graf 	memset(cfg, 0, sizeof(*cfg));
175786872cb5SThomas Graf 
175886872cb5SThomas Graf 	cfg->fc_table = RT6_TABLE_MAIN;
175986872cb5SThomas Graf 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
176086872cb5SThomas Graf 	cfg->fc_metric = rtmsg->rtmsg_metric;
176186872cb5SThomas Graf 	cfg->fc_expires = rtmsg->rtmsg_info;
176286872cb5SThomas Graf 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
176386872cb5SThomas Graf 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
176486872cb5SThomas Graf 	cfg->fc_flags = rtmsg->rtmsg_flags;
176586872cb5SThomas Graf 
17665578689aSDaniel Lezcano 	cfg->fc_nlinfo.nl_net = net;
1767f1243c2dSBenjamin Thery 
176886872cb5SThomas Graf 	ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
176986872cb5SThomas Graf 	ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
177086872cb5SThomas Graf 	ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
177186872cb5SThomas Graf }
177286872cb5SThomas Graf 
17735578689aSDaniel Lezcano int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
17741da177e4SLinus Torvalds {
177586872cb5SThomas Graf 	struct fib6_config cfg;
17761da177e4SLinus Torvalds 	struct in6_rtmsg rtmsg;
17771da177e4SLinus Torvalds 	int err;
17781da177e4SLinus Torvalds 
17791da177e4SLinus Torvalds 	switch(cmd) {
17801da177e4SLinus Torvalds 	case SIOCADDRT:		/* Add a route */
17811da177e4SLinus Torvalds 	case SIOCDELRT:		/* Delete a route */
17821da177e4SLinus Torvalds 		if (!capable(CAP_NET_ADMIN))
17831da177e4SLinus Torvalds 			return -EPERM;
17841da177e4SLinus Torvalds 		err = copy_from_user(&rtmsg, arg,
17851da177e4SLinus Torvalds 				     sizeof(struct in6_rtmsg));
17861da177e4SLinus Torvalds 		if (err)
17871da177e4SLinus Torvalds 			return -EFAULT;
17881da177e4SLinus Torvalds 
17895578689aSDaniel Lezcano 		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
179086872cb5SThomas Graf 
17911da177e4SLinus Torvalds 		rtnl_lock();
17921da177e4SLinus Torvalds 		switch (cmd) {
17931da177e4SLinus Torvalds 		case SIOCADDRT:
179486872cb5SThomas Graf 			err = ip6_route_add(&cfg);
17951da177e4SLinus Torvalds 			break;
17961da177e4SLinus Torvalds 		case SIOCDELRT:
179786872cb5SThomas Graf 			err = ip6_route_del(&cfg);
17981da177e4SLinus Torvalds 			break;
17991da177e4SLinus Torvalds 		default:
18001da177e4SLinus Torvalds 			err = -EINVAL;
18011da177e4SLinus Torvalds 		}
18021da177e4SLinus Torvalds 		rtnl_unlock();
18031da177e4SLinus Torvalds 
18041da177e4SLinus Torvalds 		return err;
18053ff50b79SStephen Hemminger 	}
18061da177e4SLinus Torvalds 
18071da177e4SLinus Torvalds 	return -EINVAL;
18081da177e4SLinus Torvalds }
18091da177e4SLinus Torvalds 
18101da177e4SLinus Torvalds /*
18111da177e4SLinus Torvalds  *	Drop the packet on the floor
18121da177e4SLinus Torvalds  */
18131da177e4SLinus Torvalds 
181450eb431dSIlpo Järvinen static int ip6_pkt_drop(struct sk_buff *skb, int code, int ipstats_mib_noroutes)
18151da177e4SLinus Torvalds {
1816612f09e8SYOSHIFUJI Hideaki 	int type;
1817612f09e8SYOSHIFUJI Hideaki 	switch (ipstats_mib_noroutes) {
1818612f09e8SYOSHIFUJI Hideaki 	case IPSTATS_MIB_INNOROUTES:
18190660e03fSArnaldo Carvalho de Melo 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
1820612f09e8SYOSHIFUJI Hideaki 		if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) {
1821a11d206dSYOSHIFUJI Hideaki 			IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
1822612f09e8SYOSHIFUJI Hideaki 			break;
1823612f09e8SYOSHIFUJI Hideaki 		}
1824612f09e8SYOSHIFUJI Hideaki 		/* FALLTHROUGH */
1825612f09e8SYOSHIFUJI Hideaki 	case IPSTATS_MIB_OUTNOROUTES:
1826612f09e8SYOSHIFUJI Hideaki 		IP6_INC_STATS(ip6_dst_idev(skb->dst), ipstats_mib_noroutes);
1827612f09e8SYOSHIFUJI Hideaki 		break;
1828612f09e8SYOSHIFUJI Hideaki 	}
18299ce8ade0SThomas Graf 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
18301da177e4SLinus Torvalds 	kfree_skb(skb);
18311da177e4SLinus Torvalds 	return 0;
18321da177e4SLinus Torvalds }
18331da177e4SLinus Torvalds 
18349ce8ade0SThomas Graf static int ip6_pkt_discard(struct sk_buff *skb)
18359ce8ade0SThomas Graf {
1836612f09e8SYOSHIFUJI Hideaki 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
18379ce8ade0SThomas Graf }
18389ce8ade0SThomas Graf 
183920380731SArnaldo Carvalho de Melo static int ip6_pkt_discard_out(struct sk_buff *skb)
18401da177e4SLinus Torvalds {
18411da177e4SLinus Torvalds 	skb->dev = skb->dst->dev;
1842612f09e8SYOSHIFUJI Hideaki 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
18431da177e4SLinus Torvalds }
18441da177e4SLinus Torvalds 
18456723ab54SDavid S. Miller #ifdef CONFIG_IPV6_MULTIPLE_TABLES
18466723ab54SDavid S. Miller 
18479ce8ade0SThomas Graf static int ip6_pkt_prohibit(struct sk_buff *skb)
18489ce8ade0SThomas Graf {
1849612f09e8SYOSHIFUJI Hideaki 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
18509ce8ade0SThomas Graf }
18519ce8ade0SThomas Graf 
18529ce8ade0SThomas Graf static int ip6_pkt_prohibit_out(struct sk_buff *skb)
18539ce8ade0SThomas Graf {
18549ce8ade0SThomas Graf 	skb->dev = skb->dst->dev;
1855612f09e8SYOSHIFUJI Hideaki 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
18569ce8ade0SThomas Graf }
18579ce8ade0SThomas Graf 
18586723ab54SDavid S. Miller #endif
18596723ab54SDavid S. Miller 
18601da177e4SLinus Torvalds /*
18611da177e4SLinus Torvalds  *	Allocate a dst for local (unicast / anycast) address.
18621da177e4SLinus Torvalds  */
18631da177e4SLinus Torvalds 
18641da177e4SLinus Torvalds struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
18651da177e4SLinus Torvalds 				    const struct in6_addr *addr,
18661da177e4SLinus Torvalds 				    int anycast)
18671da177e4SLinus Torvalds {
1868c346dca1SYOSHIFUJI Hideaki 	struct net *net = dev_net(idev->dev);
1869f2fc6a54SBenjamin Thery 	struct rt6_info *rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops);
18701da177e4SLinus Torvalds 
18711da177e4SLinus Torvalds 	if (rt == NULL)
18721da177e4SLinus Torvalds 		return ERR_PTR(-ENOMEM);
18731da177e4SLinus Torvalds 
18745578689aSDaniel Lezcano 	dev_hold(net->loopback_dev);
18751da177e4SLinus Torvalds 	in6_dev_hold(idev);
18761da177e4SLinus Torvalds 
18771da177e4SLinus Torvalds 	rt->u.dst.flags = DST_HOST;
18781da177e4SLinus Torvalds 	rt->u.dst.input = ip6_input;
18791da177e4SLinus Torvalds 	rt->u.dst.output = ip6_output;
18805578689aSDaniel Lezcano 	rt->rt6i_dev = net->loopback_dev;
18811da177e4SLinus Torvalds 	rt->rt6i_idev = idev;
18821da177e4SLinus Torvalds 	rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
18835578689aSDaniel Lezcano 	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
18841da177e4SLinus Torvalds 	rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
18851da177e4SLinus Torvalds 	rt->u.dst.obsolete = -1;
18861da177e4SLinus Torvalds 
18871da177e4SLinus Torvalds 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
188858c4fb86SYOSHIFUJI Hideaki 	if (anycast)
188958c4fb86SYOSHIFUJI Hideaki 		rt->rt6i_flags |= RTF_ANYCAST;
189058c4fb86SYOSHIFUJI Hideaki 	else
18911da177e4SLinus Torvalds 		rt->rt6i_flags |= RTF_LOCAL;
18921da177e4SLinus Torvalds 	rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
18931da177e4SLinus Torvalds 	if (rt->rt6i_nexthop == NULL) {
189440aa7b90SYOSHIFUJI Hideaki 		dst_free(&rt->u.dst);
18951da177e4SLinus Torvalds 		return ERR_PTR(-ENOMEM);
18961da177e4SLinus Torvalds 	}
18971da177e4SLinus Torvalds 
18981da177e4SLinus Torvalds 	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
18991da177e4SLinus Torvalds 	rt->rt6i_dst.plen = 128;
19005578689aSDaniel Lezcano 	rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
19011da177e4SLinus Torvalds 
19021da177e4SLinus Torvalds 	atomic_set(&rt->u.dst.__refcnt, 1);
19031da177e4SLinus Torvalds 
19041da177e4SLinus Torvalds 	return rt;
19051da177e4SLinus Torvalds }
19061da177e4SLinus Torvalds 
19078ed67789SDaniel Lezcano struct arg_dev_net {
19088ed67789SDaniel Lezcano 	struct net_device *dev;
19098ed67789SDaniel Lezcano 	struct net *net;
19108ed67789SDaniel Lezcano };
19118ed67789SDaniel Lezcano 
19121da177e4SLinus Torvalds static int fib6_ifdown(struct rt6_info *rt, void *arg)
19131da177e4SLinus Torvalds {
19148ed67789SDaniel Lezcano 	struct net_device *dev = ((struct arg_dev_net *)arg)->dev;
19158ed67789SDaniel Lezcano 	struct net *net = ((struct arg_dev_net *)arg)->net;
19168ed67789SDaniel Lezcano 
19178ed67789SDaniel Lezcano 	if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
19188ed67789SDaniel Lezcano 	    rt != net->ipv6.ip6_null_entry) {
19191da177e4SLinus Torvalds 		RT6_TRACE("deleted by ifdown %p\n", rt);
19201da177e4SLinus Torvalds 		return -1;
19211da177e4SLinus Torvalds 	}
19221da177e4SLinus Torvalds 	return 0;
19231da177e4SLinus Torvalds }
19241da177e4SLinus Torvalds 
1925f3db4851SDaniel Lezcano void rt6_ifdown(struct net *net, struct net_device *dev)
19261da177e4SLinus Torvalds {
19278ed67789SDaniel Lezcano 	struct arg_dev_net adn = {
19288ed67789SDaniel Lezcano 		.dev = dev,
19298ed67789SDaniel Lezcano 		.net = net,
19308ed67789SDaniel Lezcano 	};
19318ed67789SDaniel Lezcano 
19328ed67789SDaniel Lezcano 	fib6_clean_all(net, fib6_ifdown, 0, &adn);
19331da177e4SLinus Torvalds }
19341da177e4SLinus Torvalds 
19351da177e4SLinus Torvalds struct rt6_mtu_change_arg
19361da177e4SLinus Torvalds {
19371da177e4SLinus Torvalds 	struct net_device *dev;
19381da177e4SLinus Torvalds 	unsigned mtu;
19391da177e4SLinus Torvalds };
19401da177e4SLinus Torvalds 
19411da177e4SLinus Torvalds static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
19421da177e4SLinus Torvalds {
19431da177e4SLinus Torvalds 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
19441da177e4SLinus Torvalds 	struct inet6_dev *idev;
1945c346dca1SYOSHIFUJI Hideaki 	struct net *net = dev_net(arg->dev);
19461da177e4SLinus Torvalds 
19471da177e4SLinus Torvalds 	/* In IPv6 pmtu discovery is not optional,
19481da177e4SLinus Torvalds 	   so that RTAX_MTU lock cannot disable it.
19491da177e4SLinus Torvalds 	   We still use this lock to block changes
19501da177e4SLinus Torvalds 	   caused by addrconf/ndisc.
19511da177e4SLinus Torvalds 	*/
19521da177e4SLinus Torvalds 
19531da177e4SLinus Torvalds 	idev = __in6_dev_get(arg->dev);
19541da177e4SLinus Torvalds 	if (idev == NULL)
19551da177e4SLinus Torvalds 		return 0;
19561da177e4SLinus Torvalds 
19571da177e4SLinus Torvalds 	/* For administrative MTU increase, there is no way to discover
19581da177e4SLinus Torvalds 	   IPv6 PMTU increase, so PMTU increase should be updated here.
19591da177e4SLinus Torvalds 	   Since RFC 1981 doesn't include administrative MTU increase
19601da177e4SLinus Torvalds 	   update PMTU increase is a MUST. (i.e. jumbo frame)
19611da177e4SLinus Torvalds 	 */
19621da177e4SLinus Torvalds 	/*
19631da177e4SLinus Torvalds 	   If new MTU is less than route PMTU, this new MTU will be the
19641da177e4SLinus Torvalds 	   lowest MTU in the path, update the route PMTU to reflect PMTU
19651da177e4SLinus Torvalds 	   decreases; if new MTU is greater than route PMTU, and the
19661da177e4SLinus Torvalds 	   old MTU is the lowest MTU in the path, update the route PMTU
19671da177e4SLinus Torvalds 	   to reflect the increase. In this case if the other nodes' MTU
19681da177e4SLinus Torvalds 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
19691da177e4SLinus Torvalds 	   PMTU discouvery.
19701da177e4SLinus Torvalds 	 */
19711da177e4SLinus Torvalds 	if (rt->rt6i_dev == arg->dev &&
19721da177e4SLinus Torvalds 	    !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
197323717795SJim Paris 	    (dst_mtu(&rt->u.dst) >= arg->mtu ||
19741da177e4SLinus Torvalds 	     (dst_mtu(&rt->u.dst) < arg->mtu &&
1975566cfd8fSSimon Arlott 	      dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) {
19761da177e4SLinus Torvalds 		rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
19775578689aSDaniel Lezcano 		rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu);
1978566cfd8fSSimon Arlott 	}
19791da177e4SLinus Torvalds 	return 0;
19801da177e4SLinus Torvalds }
19811da177e4SLinus Torvalds 
19821da177e4SLinus Torvalds void rt6_mtu_change(struct net_device *dev, unsigned mtu)
19831da177e4SLinus Torvalds {
1984c71099acSThomas Graf 	struct rt6_mtu_change_arg arg = {
1985c71099acSThomas Graf 		.dev = dev,
1986c71099acSThomas Graf 		.mtu = mtu,
1987c71099acSThomas Graf 	};
19881da177e4SLinus Torvalds 
1989c346dca1SYOSHIFUJI Hideaki 	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
19901da177e4SLinus Torvalds }
19911da177e4SLinus Torvalds 
1992ef7c79edSPatrick McHardy static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
19935176f91eSThomas Graf 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
199486872cb5SThomas Graf 	[RTA_OIF]               = { .type = NLA_U32 },
1995ab364a6fSThomas Graf 	[RTA_IIF]		= { .type = NLA_U32 },
199686872cb5SThomas Graf 	[RTA_PRIORITY]          = { .type = NLA_U32 },
199786872cb5SThomas Graf 	[RTA_METRICS]           = { .type = NLA_NESTED },
199886872cb5SThomas Graf };
199986872cb5SThomas Graf 
200086872cb5SThomas Graf static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
200186872cb5SThomas Graf 			      struct fib6_config *cfg)
20021da177e4SLinus Torvalds {
200386872cb5SThomas Graf 	struct rtmsg *rtm;
200486872cb5SThomas Graf 	struct nlattr *tb[RTA_MAX+1];
200586872cb5SThomas Graf 	int err;
20061da177e4SLinus Torvalds 
200786872cb5SThomas Graf 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
200886872cb5SThomas Graf 	if (err < 0)
200986872cb5SThomas Graf 		goto errout;
20101da177e4SLinus Torvalds 
201186872cb5SThomas Graf 	err = -EINVAL;
201286872cb5SThomas Graf 	rtm = nlmsg_data(nlh);
201386872cb5SThomas Graf 	memset(cfg, 0, sizeof(*cfg));
201486872cb5SThomas Graf 
201586872cb5SThomas Graf 	cfg->fc_table = rtm->rtm_table;
201686872cb5SThomas Graf 	cfg->fc_dst_len = rtm->rtm_dst_len;
201786872cb5SThomas Graf 	cfg->fc_src_len = rtm->rtm_src_len;
201886872cb5SThomas Graf 	cfg->fc_flags = RTF_UP;
201986872cb5SThomas Graf 	cfg->fc_protocol = rtm->rtm_protocol;
202086872cb5SThomas Graf 
202186872cb5SThomas Graf 	if (rtm->rtm_type == RTN_UNREACHABLE)
202286872cb5SThomas Graf 		cfg->fc_flags |= RTF_REJECT;
202386872cb5SThomas Graf 
202486872cb5SThomas Graf 	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
202586872cb5SThomas Graf 	cfg->fc_nlinfo.nlh = nlh;
20263b1e0a65SYOSHIFUJI Hideaki 	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
202786872cb5SThomas Graf 
202886872cb5SThomas Graf 	if (tb[RTA_GATEWAY]) {
202986872cb5SThomas Graf 		nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
203086872cb5SThomas Graf 		cfg->fc_flags |= RTF_GATEWAY;
20311da177e4SLinus Torvalds 	}
203286872cb5SThomas Graf 
203386872cb5SThomas Graf 	if (tb[RTA_DST]) {
203486872cb5SThomas Graf 		int plen = (rtm->rtm_dst_len + 7) >> 3;
203586872cb5SThomas Graf 
203686872cb5SThomas Graf 		if (nla_len(tb[RTA_DST]) < plen)
203786872cb5SThomas Graf 			goto errout;
203886872cb5SThomas Graf 
203986872cb5SThomas Graf 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
20401da177e4SLinus Torvalds 	}
204186872cb5SThomas Graf 
204286872cb5SThomas Graf 	if (tb[RTA_SRC]) {
204386872cb5SThomas Graf 		int plen = (rtm->rtm_src_len + 7) >> 3;
204486872cb5SThomas Graf 
204586872cb5SThomas Graf 		if (nla_len(tb[RTA_SRC]) < plen)
204686872cb5SThomas Graf 			goto errout;
204786872cb5SThomas Graf 
204886872cb5SThomas Graf 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
20491da177e4SLinus Torvalds 	}
205086872cb5SThomas Graf 
205186872cb5SThomas Graf 	if (tb[RTA_OIF])
205286872cb5SThomas Graf 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
205386872cb5SThomas Graf 
205486872cb5SThomas Graf 	if (tb[RTA_PRIORITY])
205586872cb5SThomas Graf 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
205686872cb5SThomas Graf 
205786872cb5SThomas Graf 	if (tb[RTA_METRICS]) {
205886872cb5SThomas Graf 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
205986872cb5SThomas Graf 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
20601da177e4SLinus Torvalds 	}
206186872cb5SThomas Graf 
206286872cb5SThomas Graf 	if (tb[RTA_TABLE])
206386872cb5SThomas Graf 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
206486872cb5SThomas Graf 
206586872cb5SThomas Graf 	err = 0;
206686872cb5SThomas Graf errout:
206786872cb5SThomas Graf 	return err;
20681da177e4SLinus Torvalds }
20691da177e4SLinus Torvalds 
2070c127ea2cSThomas Graf static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
20711da177e4SLinus Torvalds {
207286872cb5SThomas Graf 	struct fib6_config cfg;
207386872cb5SThomas Graf 	int err;
20741da177e4SLinus Torvalds 
207586872cb5SThomas Graf 	err = rtm_to_fib6_config(skb, nlh, &cfg);
207686872cb5SThomas Graf 	if (err < 0)
207786872cb5SThomas Graf 		return err;
207886872cb5SThomas Graf 
207986872cb5SThomas Graf 	return ip6_route_del(&cfg);
20801da177e4SLinus Torvalds }
20811da177e4SLinus Torvalds 
2082c127ea2cSThomas Graf static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
20831da177e4SLinus Torvalds {
208486872cb5SThomas Graf 	struct fib6_config cfg;
208586872cb5SThomas Graf 	int err;
20861da177e4SLinus Torvalds 
208786872cb5SThomas Graf 	err = rtm_to_fib6_config(skb, nlh, &cfg);
208886872cb5SThomas Graf 	if (err < 0)
208986872cb5SThomas Graf 		return err;
209086872cb5SThomas Graf 
209186872cb5SThomas Graf 	return ip6_route_add(&cfg);
20921da177e4SLinus Torvalds }
20931da177e4SLinus Torvalds 
2094339bf98fSThomas Graf static inline size_t rt6_nlmsg_size(void)
2095339bf98fSThomas Graf {
2096339bf98fSThomas Graf 	return NLMSG_ALIGN(sizeof(struct rtmsg))
2097339bf98fSThomas Graf 	       + nla_total_size(16) /* RTA_SRC */
2098339bf98fSThomas Graf 	       + nla_total_size(16) /* RTA_DST */
2099339bf98fSThomas Graf 	       + nla_total_size(16) /* RTA_GATEWAY */
2100339bf98fSThomas Graf 	       + nla_total_size(16) /* RTA_PREFSRC */
2101339bf98fSThomas Graf 	       + nla_total_size(4) /* RTA_TABLE */
2102339bf98fSThomas Graf 	       + nla_total_size(4) /* RTA_IIF */
2103339bf98fSThomas Graf 	       + nla_total_size(4) /* RTA_OIF */
2104339bf98fSThomas Graf 	       + nla_total_size(4) /* RTA_PRIORITY */
21056a2b9ce0SNoriaki TAKAMIYA 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2106339bf98fSThomas Graf 	       + nla_total_size(sizeof(struct rta_cacheinfo));
2107339bf98fSThomas Graf }
2108339bf98fSThomas Graf 
21091da177e4SLinus Torvalds static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
21100d51aa80SJamal Hadi Salim 			 struct in6_addr *dst, struct in6_addr *src,
21110d51aa80SJamal Hadi Salim 			 int iif, int type, u32 pid, u32 seq,
21127bc570c8SYOSHIFUJI Hideaki 			 int prefix, int nowait, unsigned int flags)
21131da177e4SLinus Torvalds {
21141da177e4SLinus Torvalds 	struct rtmsg *rtm;
21151da177e4SLinus Torvalds 	struct nlmsghdr *nlh;
2116e3703b3dSThomas Graf 	long expires;
21179e762a4aSPatrick McHardy 	u32 table;
21181da177e4SLinus Torvalds 
21191da177e4SLinus Torvalds 	if (prefix) {	/* user wants prefix routes only */
21201da177e4SLinus Torvalds 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
21211da177e4SLinus Torvalds 			/* success since this is not a prefix route */
21221da177e4SLinus Torvalds 			return 1;
21231da177e4SLinus Torvalds 		}
21241da177e4SLinus Torvalds 	}
21251da177e4SLinus Torvalds 
21262d7202bfSThomas Graf 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
21272d7202bfSThomas Graf 	if (nlh == NULL)
212826932566SPatrick McHardy 		return -EMSGSIZE;
21292d7202bfSThomas Graf 
21302d7202bfSThomas Graf 	rtm = nlmsg_data(nlh);
21311da177e4SLinus Torvalds 	rtm->rtm_family = AF_INET6;
21321da177e4SLinus Torvalds 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
21331da177e4SLinus Torvalds 	rtm->rtm_src_len = rt->rt6i_src.plen;
21341da177e4SLinus Torvalds 	rtm->rtm_tos = 0;
2135c71099acSThomas Graf 	if (rt->rt6i_table)
21369e762a4aSPatrick McHardy 		table = rt->rt6i_table->tb6_id;
2137c71099acSThomas Graf 	else
21389e762a4aSPatrick McHardy 		table = RT6_TABLE_UNSPEC;
21399e762a4aSPatrick McHardy 	rtm->rtm_table = table;
21402d7202bfSThomas Graf 	NLA_PUT_U32(skb, RTA_TABLE, table);
21411da177e4SLinus Torvalds 	if (rt->rt6i_flags&RTF_REJECT)
21421da177e4SLinus Torvalds 		rtm->rtm_type = RTN_UNREACHABLE;
21431da177e4SLinus Torvalds 	else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
21441da177e4SLinus Torvalds 		rtm->rtm_type = RTN_LOCAL;
21451da177e4SLinus Torvalds 	else
21461da177e4SLinus Torvalds 		rtm->rtm_type = RTN_UNICAST;
21471da177e4SLinus Torvalds 	rtm->rtm_flags = 0;
21481da177e4SLinus Torvalds 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
21491da177e4SLinus Torvalds 	rtm->rtm_protocol = rt->rt6i_protocol;
21501da177e4SLinus Torvalds 	if (rt->rt6i_flags&RTF_DYNAMIC)
21511da177e4SLinus Torvalds 		rtm->rtm_protocol = RTPROT_REDIRECT;
21521da177e4SLinus Torvalds 	else if (rt->rt6i_flags & RTF_ADDRCONF)
21531da177e4SLinus Torvalds 		rtm->rtm_protocol = RTPROT_KERNEL;
21541da177e4SLinus Torvalds 	else if (rt->rt6i_flags&RTF_DEFAULT)
21551da177e4SLinus Torvalds 		rtm->rtm_protocol = RTPROT_RA;
21561da177e4SLinus Torvalds 
21571da177e4SLinus Torvalds 	if (rt->rt6i_flags&RTF_CACHE)
21581da177e4SLinus Torvalds 		rtm->rtm_flags |= RTM_F_CLONED;
21591da177e4SLinus Torvalds 
21601da177e4SLinus Torvalds 	if (dst) {
21612d7202bfSThomas Graf 		NLA_PUT(skb, RTA_DST, 16, dst);
21621da177e4SLinus Torvalds 		rtm->rtm_dst_len = 128;
21631da177e4SLinus Torvalds 	} else if (rtm->rtm_dst_len)
21642d7202bfSThomas Graf 		NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
21651da177e4SLinus Torvalds #ifdef CONFIG_IPV6_SUBTREES
21661da177e4SLinus Torvalds 	if (src) {
21672d7202bfSThomas Graf 		NLA_PUT(skb, RTA_SRC, 16, src);
21681da177e4SLinus Torvalds 		rtm->rtm_src_len = 128;
21691da177e4SLinus Torvalds 	} else if (rtm->rtm_src_len)
21702d7202bfSThomas Graf 		NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
21711da177e4SLinus Torvalds #endif
21727bc570c8SYOSHIFUJI Hideaki 	if (iif) {
21737bc570c8SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_MROUTE
21747bc570c8SYOSHIFUJI Hideaki 		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
21757bc570c8SYOSHIFUJI Hideaki 			int err = ip6mr_get_route(skb, rtm, nowait);
21767bc570c8SYOSHIFUJI Hideaki 			if (err <= 0) {
21777bc570c8SYOSHIFUJI Hideaki 				if (!nowait) {
21787bc570c8SYOSHIFUJI Hideaki 					if (err == 0)
21797bc570c8SYOSHIFUJI Hideaki 						return 0;
21807bc570c8SYOSHIFUJI Hideaki 					goto nla_put_failure;
21817bc570c8SYOSHIFUJI Hideaki 				} else {
21827bc570c8SYOSHIFUJI Hideaki 					if (err == -EMSGSIZE)
21837bc570c8SYOSHIFUJI Hideaki 						goto nla_put_failure;
21847bc570c8SYOSHIFUJI Hideaki 				}
21857bc570c8SYOSHIFUJI Hideaki 			}
21867bc570c8SYOSHIFUJI Hideaki 		} else
21877bc570c8SYOSHIFUJI Hideaki #endif
21882d7202bfSThomas Graf 			NLA_PUT_U32(skb, RTA_IIF, iif);
21897bc570c8SYOSHIFUJI Hideaki 	} else if (dst) {
21901da177e4SLinus Torvalds 		struct in6_addr saddr_buf;
21915e5f3f0fSYOSHIFUJI Hideaki 		if (ipv6_dev_get_saddr(ip6_dst_idev(&rt->u.dst)->dev,
21927cbca67cSYOSHIFUJI Hideaki 				       dst, 0, &saddr_buf) == 0)
21932d7202bfSThomas Graf 			NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
21941da177e4SLinus Torvalds 	}
21952d7202bfSThomas Graf 
21961da177e4SLinus Torvalds 	if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
21972d7202bfSThomas Graf 		goto nla_put_failure;
21982d7202bfSThomas Graf 
21991da177e4SLinus Torvalds 	if (rt->u.dst.neighbour)
22002d7202bfSThomas Graf 		NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
22012d7202bfSThomas Graf 
22021da177e4SLinus Torvalds 	if (rt->u.dst.dev)
22032d7202bfSThomas Graf 		NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
22042d7202bfSThomas Graf 
22052d7202bfSThomas Graf 	NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2206e3703b3dSThomas Graf 
220736e3deaeSYOSHIFUJI Hideaki 	if (!(rt->rt6i_flags & RTF_EXPIRES))
220836e3deaeSYOSHIFUJI Hideaki 		expires = 0;
220936e3deaeSYOSHIFUJI Hideaki 	else if (rt->rt6i_expires - jiffies < INT_MAX)
221036e3deaeSYOSHIFUJI Hideaki 		expires = rt->rt6i_expires - jiffies;
221136e3deaeSYOSHIFUJI Hideaki 	else
221236e3deaeSYOSHIFUJI Hideaki 		expires = INT_MAX;
221369cdf8f9SYOSHIFUJI Hideaki 
2214e3703b3dSThomas Graf 	if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
2215e3703b3dSThomas Graf 			       expires, rt->u.dst.error) < 0)
2216e3703b3dSThomas Graf 		goto nla_put_failure;
22171da177e4SLinus Torvalds 
22182d7202bfSThomas Graf 	return nlmsg_end(skb, nlh);
22192d7202bfSThomas Graf 
22202d7202bfSThomas Graf nla_put_failure:
222126932566SPatrick McHardy 	nlmsg_cancel(skb, nlh);
222226932566SPatrick McHardy 	return -EMSGSIZE;
22231da177e4SLinus Torvalds }
22241da177e4SLinus Torvalds 
22251b43af54SPatrick McHardy int rt6_dump_route(struct rt6_info *rt, void *p_arg)
22261da177e4SLinus Torvalds {
22271da177e4SLinus Torvalds 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
22281da177e4SLinus Torvalds 	int prefix;
22291da177e4SLinus Torvalds 
22302d7202bfSThomas Graf 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
22312d7202bfSThomas Graf 		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
22321da177e4SLinus Torvalds 		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
22331da177e4SLinus Torvalds 	} else
22341da177e4SLinus Torvalds 		prefix = 0;
22351da177e4SLinus Torvalds 
22361da177e4SLinus Torvalds 	return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
22371da177e4SLinus Torvalds 		     NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
22387bc570c8SYOSHIFUJI Hideaki 		     prefix, 0, NLM_F_MULTI);
22391da177e4SLinus Torvalds }
22401da177e4SLinus Torvalds 
2241c127ea2cSThomas Graf static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
22421da177e4SLinus Torvalds {
22433b1e0a65SYOSHIFUJI Hideaki 	struct net *net = sock_net(in_skb->sk);
2244ab364a6fSThomas Graf 	struct nlattr *tb[RTA_MAX+1];
22451da177e4SLinus Torvalds 	struct rt6_info *rt;
2246ab364a6fSThomas Graf 	struct sk_buff *skb;
2247ab364a6fSThomas Graf 	struct rtmsg *rtm;
2248ab364a6fSThomas Graf 	struct flowi fl;
2249ab364a6fSThomas Graf 	int err, iif = 0;
2250ab364a6fSThomas Graf 
2251ab364a6fSThomas Graf 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2252ab364a6fSThomas Graf 	if (err < 0)
2253ab364a6fSThomas Graf 		goto errout;
2254ab364a6fSThomas Graf 
2255ab364a6fSThomas Graf 	err = -EINVAL;
2256ab364a6fSThomas Graf 	memset(&fl, 0, sizeof(fl));
2257ab364a6fSThomas Graf 
2258ab364a6fSThomas Graf 	if (tb[RTA_SRC]) {
2259ab364a6fSThomas Graf 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2260ab364a6fSThomas Graf 			goto errout;
2261ab364a6fSThomas Graf 
2262ab364a6fSThomas Graf 		ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2263ab364a6fSThomas Graf 	}
2264ab364a6fSThomas Graf 
2265ab364a6fSThomas Graf 	if (tb[RTA_DST]) {
2266ab364a6fSThomas Graf 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2267ab364a6fSThomas Graf 			goto errout;
2268ab364a6fSThomas Graf 
2269ab364a6fSThomas Graf 		ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2270ab364a6fSThomas Graf 	}
2271ab364a6fSThomas Graf 
2272ab364a6fSThomas Graf 	if (tb[RTA_IIF])
2273ab364a6fSThomas Graf 		iif = nla_get_u32(tb[RTA_IIF]);
2274ab364a6fSThomas Graf 
2275ab364a6fSThomas Graf 	if (tb[RTA_OIF])
2276ab364a6fSThomas Graf 		fl.oif = nla_get_u32(tb[RTA_OIF]);
2277ab364a6fSThomas Graf 
2278ab364a6fSThomas Graf 	if (iif) {
2279ab364a6fSThomas Graf 		struct net_device *dev;
22805578689aSDaniel Lezcano 		dev = __dev_get_by_index(net, iif);
2281ab364a6fSThomas Graf 		if (!dev) {
2282ab364a6fSThomas Graf 			err = -ENODEV;
2283ab364a6fSThomas Graf 			goto errout;
2284ab364a6fSThomas Graf 		}
2285ab364a6fSThomas Graf 	}
22861da177e4SLinus Torvalds 
22871da177e4SLinus Torvalds 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2288ab364a6fSThomas Graf 	if (skb == NULL) {
2289ab364a6fSThomas Graf 		err = -ENOBUFS;
2290ab364a6fSThomas Graf 		goto errout;
2291ab364a6fSThomas Graf 	}
22921da177e4SLinus Torvalds 
22931da177e4SLinus Torvalds 	/* Reserve room for dummy headers, this skb can pass
22941da177e4SLinus Torvalds 	   through good chunk of routing engine.
22951da177e4SLinus Torvalds 	 */
2296459a98edSArnaldo Carvalho de Melo 	skb_reset_mac_header(skb);
22971da177e4SLinus Torvalds 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
22981da177e4SLinus Torvalds 
22998a3edd80SDaniel Lezcano 	rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl);
23001da177e4SLinus Torvalds 	skb->dst = &rt->u.dst;
23011da177e4SLinus Torvalds 
2302ab364a6fSThomas Graf 	err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
23031da177e4SLinus Torvalds 			    RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
23047bc570c8SYOSHIFUJI Hideaki 			    nlh->nlmsg_seq, 0, 0, 0);
23051da177e4SLinus Torvalds 	if (err < 0) {
2306ab364a6fSThomas Graf 		kfree_skb(skb);
2307ab364a6fSThomas Graf 		goto errout;
23081da177e4SLinus Torvalds 	}
23091da177e4SLinus Torvalds 
23105578689aSDaniel Lezcano 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2311ab364a6fSThomas Graf errout:
23121da177e4SLinus Torvalds 	return err;
23131da177e4SLinus Torvalds }
23141da177e4SLinus Torvalds 
231586872cb5SThomas Graf void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
23161da177e4SLinus Torvalds {
23171da177e4SLinus Torvalds 	struct sk_buff *skb;
23185578689aSDaniel Lezcano 	struct net *net = info->nl_net;
2319528c4cebSDenis V. Lunev 	u32 seq;
2320528c4cebSDenis V. Lunev 	int err;
23210d51aa80SJamal Hadi Salim 
2322528c4cebSDenis V. Lunev 	err = -ENOBUFS;
2323528c4cebSDenis V. Lunev 	seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
232486872cb5SThomas Graf 
2325339bf98fSThomas Graf 	skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
232621713ebcSThomas Graf 	if (skb == NULL)
232721713ebcSThomas Graf 		goto errout;
23281da177e4SLinus Torvalds 
2329528c4cebSDenis V. Lunev 	err = rt6_fill_node(skb, rt, NULL, NULL, 0,
23307bc570c8SYOSHIFUJI Hideaki 				event, info->pid, seq, 0, 0, 0);
233126932566SPatrick McHardy 	if (err < 0) {
233226932566SPatrick McHardy 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
233326932566SPatrick McHardy 		WARN_ON(err == -EMSGSIZE);
233426932566SPatrick McHardy 		kfree_skb(skb);
233526932566SPatrick McHardy 		goto errout;
233626932566SPatrick McHardy 	}
23375578689aSDaniel Lezcano 	err = rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
23385578689aSDaniel Lezcano 			  info->nlh, gfp_any());
233921713ebcSThomas Graf errout:
234021713ebcSThomas Graf 	if (err < 0)
23415578689aSDaniel Lezcano 		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
23421da177e4SLinus Torvalds }
23431da177e4SLinus Torvalds 
23448ed67789SDaniel Lezcano static int ip6_route_dev_notify(struct notifier_block *this,
23458ed67789SDaniel Lezcano 				unsigned long event, void *data)
23468ed67789SDaniel Lezcano {
23478ed67789SDaniel Lezcano 	struct net_device *dev = (struct net_device *)data;
2348c346dca1SYOSHIFUJI Hideaki 	struct net *net = dev_net(dev);
23498ed67789SDaniel Lezcano 
23508ed67789SDaniel Lezcano 	if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
23518ed67789SDaniel Lezcano 		net->ipv6.ip6_null_entry->u.dst.dev = dev;
23528ed67789SDaniel Lezcano 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
23538ed67789SDaniel Lezcano #ifdef CONFIG_IPV6_MULTIPLE_TABLES
23548ed67789SDaniel Lezcano 		net->ipv6.ip6_prohibit_entry->u.dst.dev = dev;
23558ed67789SDaniel Lezcano 		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
23568ed67789SDaniel Lezcano 		net->ipv6.ip6_blk_hole_entry->u.dst.dev = dev;
23578ed67789SDaniel Lezcano 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
23588ed67789SDaniel Lezcano #endif
23598ed67789SDaniel Lezcano 	}
23608ed67789SDaniel Lezcano 
23618ed67789SDaniel Lezcano 	return NOTIFY_OK;
23628ed67789SDaniel Lezcano }
23638ed67789SDaniel Lezcano 
23641da177e4SLinus Torvalds /*
23651da177e4SLinus Torvalds  *	/proc
23661da177e4SLinus Torvalds  */
23671da177e4SLinus Torvalds 
23681da177e4SLinus Torvalds #ifdef CONFIG_PROC_FS
23691da177e4SLinus Torvalds 
23701da177e4SLinus Torvalds #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
23711da177e4SLinus Torvalds 
23721da177e4SLinus Torvalds struct rt6_proc_arg
23731da177e4SLinus Torvalds {
23741da177e4SLinus Torvalds 	char *buffer;
23751da177e4SLinus Torvalds 	int offset;
23761da177e4SLinus Torvalds 	int length;
23771da177e4SLinus Torvalds 	int skip;
23781da177e4SLinus Torvalds 	int len;
23791da177e4SLinus Torvalds };
23801da177e4SLinus Torvalds 
23811da177e4SLinus Torvalds static int rt6_info_route(struct rt6_info *rt, void *p_arg)
23821da177e4SLinus Torvalds {
238333120b30SAlexey Dobriyan 	struct seq_file *m = p_arg;
23841da177e4SLinus Torvalds 
238533120b30SAlexey Dobriyan 	seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_dst.addr),
23861da177e4SLinus Torvalds 		   rt->rt6i_dst.plen);
23871da177e4SLinus Torvalds 
23881da177e4SLinus Torvalds #ifdef CONFIG_IPV6_SUBTREES
238933120b30SAlexey Dobriyan 	seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_src.addr),
23901da177e4SLinus Torvalds 		   rt->rt6i_src.plen);
23911da177e4SLinus Torvalds #else
239233120b30SAlexey Dobriyan 	seq_puts(m, "00000000000000000000000000000000 00 ");
23931da177e4SLinus Torvalds #endif
23941da177e4SLinus Torvalds 
23951da177e4SLinus Torvalds 	if (rt->rt6i_nexthop) {
239633120b30SAlexey Dobriyan 		seq_printf(m, NIP6_SEQFMT,
239733e93c96SYOSHIFUJI Hideaki 			   NIP6(*((struct in6_addr *)rt->rt6i_nexthop->primary_key)));
23981da177e4SLinus Torvalds 	} else {
239933120b30SAlexey Dobriyan 		seq_puts(m, "00000000000000000000000000000000");
24001da177e4SLinus Torvalds 	}
240133120b30SAlexey Dobriyan 	seq_printf(m, " %08x %08x %08x %08x %8s\n",
24021da177e4SLinus Torvalds 		   rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
24031da177e4SLinus Torvalds 		   rt->u.dst.__use, rt->rt6i_flags,
24041da177e4SLinus Torvalds 		   rt->rt6i_dev ? rt->rt6i_dev->name : "");
24051da177e4SLinus Torvalds 	return 0;
24061da177e4SLinus Torvalds }
24071da177e4SLinus Torvalds 
240833120b30SAlexey Dobriyan static int ipv6_route_show(struct seq_file *m, void *v)
24091da177e4SLinus Torvalds {
2410f3db4851SDaniel Lezcano 	struct net *net = (struct net *)m->private;
2411f3db4851SDaniel Lezcano 	fib6_clean_all(net, rt6_info_route, 0, m);
241233120b30SAlexey Dobriyan 	return 0;
24131da177e4SLinus Torvalds }
24141da177e4SLinus Torvalds 
241533120b30SAlexey Dobriyan static int ipv6_route_open(struct inode *inode, struct file *file)
241633120b30SAlexey Dobriyan {
2417de05c557SPavel Emelyanov 	return single_open_net(inode, file, ipv6_route_show);
2418f3db4851SDaniel Lezcano }
2419f3db4851SDaniel Lezcano 
242033120b30SAlexey Dobriyan static const struct file_operations ipv6_route_proc_fops = {
242133120b30SAlexey Dobriyan 	.owner		= THIS_MODULE,
242233120b30SAlexey Dobriyan 	.open		= ipv6_route_open,
242333120b30SAlexey Dobriyan 	.read		= seq_read,
242433120b30SAlexey Dobriyan 	.llseek		= seq_lseek,
2425b6fcbdb4SPavel Emelyanov 	.release	= single_release_net,
242633120b30SAlexey Dobriyan };
242733120b30SAlexey Dobriyan 
24281da177e4SLinus Torvalds static int rt6_stats_seq_show(struct seq_file *seq, void *v)
24291da177e4SLinus Torvalds {
243069ddb805SDaniel Lezcano 	struct net *net = (struct net *)seq->private;
24311da177e4SLinus Torvalds 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
243269ddb805SDaniel Lezcano 		   net->ipv6.rt6_stats->fib_nodes,
243369ddb805SDaniel Lezcano 		   net->ipv6.rt6_stats->fib_route_nodes,
243469ddb805SDaniel Lezcano 		   net->ipv6.rt6_stats->fib_rt_alloc,
243569ddb805SDaniel Lezcano 		   net->ipv6.rt6_stats->fib_rt_entries,
243669ddb805SDaniel Lezcano 		   net->ipv6.rt6_stats->fib_rt_cache,
2437f2fc6a54SBenjamin Thery 		   atomic_read(&net->ipv6.ip6_dst_ops->entries),
243869ddb805SDaniel Lezcano 		   net->ipv6.rt6_stats->fib_discarded_routes);
24391da177e4SLinus Torvalds 
24401da177e4SLinus Torvalds 	return 0;
24411da177e4SLinus Torvalds }
24421da177e4SLinus Torvalds 
24431da177e4SLinus Torvalds static int rt6_stats_seq_open(struct inode *inode, struct file *file)
24441da177e4SLinus Torvalds {
2445de05c557SPavel Emelyanov 	return single_open_net(inode, file, rt6_stats_seq_show);
244669ddb805SDaniel Lezcano }
244769ddb805SDaniel Lezcano 
24489a32144eSArjan van de Ven static const struct file_operations rt6_stats_seq_fops = {
24491da177e4SLinus Torvalds 	.owner	 = THIS_MODULE,
24501da177e4SLinus Torvalds 	.open	 = rt6_stats_seq_open,
24511da177e4SLinus Torvalds 	.read	 = seq_read,
24521da177e4SLinus Torvalds 	.llseek	 = seq_lseek,
2453b6fcbdb4SPavel Emelyanov 	.release = single_release_net,
24541da177e4SLinus Torvalds };
24551da177e4SLinus Torvalds #endif	/* CONFIG_PROC_FS */
24561da177e4SLinus Torvalds 
24571da177e4SLinus Torvalds #ifdef CONFIG_SYSCTL
24581da177e4SLinus Torvalds 
24591da177e4SLinus Torvalds static
24601da177e4SLinus Torvalds int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
24611da177e4SLinus Torvalds 			      void __user *buffer, size_t *lenp, loff_t *ppos)
24621da177e4SLinus Torvalds {
24635b7c931dSDaniel Lezcano 	struct net *net = current->nsproxy->net_ns;
24645b7c931dSDaniel Lezcano 	int delay = net->ipv6.sysctl.flush_delay;
24651da177e4SLinus Torvalds 	if (write) {
24661da177e4SLinus Torvalds 		proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
24675b7c931dSDaniel Lezcano 		fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
24681da177e4SLinus Torvalds 		return 0;
24691da177e4SLinus Torvalds 	} else
24701da177e4SLinus Torvalds 		return -EINVAL;
24711da177e4SLinus Torvalds }
24721da177e4SLinus Torvalds 
2473760f2d01SDaniel Lezcano ctl_table ipv6_route_table_template[] = {
24741da177e4SLinus Torvalds 	{
24751da177e4SLinus Torvalds 		.procname	=	"flush",
24764990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.flush_delay,
24771da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
247889c8b3a1SDave Jones 		.mode		=	0200,
24791da177e4SLinus Torvalds 		.proc_handler	=	&ipv6_sysctl_rtcache_flush
24801da177e4SLinus Torvalds 	},
24811da177e4SLinus Torvalds 	{
24821da177e4SLinus Torvalds 		.ctl_name	=	NET_IPV6_ROUTE_GC_THRESH,
24831da177e4SLinus Torvalds 		.procname	=	"gc_thresh",
24849a7ec3a9SDaniel Lezcano 		.data		=	&ip6_dst_ops_template.gc_thresh,
24851da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
24861da177e4SLinus Torvalds 		.mode		=	0644,
24871da177e4SLinus Torvalds 		.proc_handler	=	&proc_dointvec,
24881da177e4SLinus Torvalds 	},
24891da177e4SLinus Torvalds 	{
24901da177e4SLinus Torvalds 		.ctl_name	=	NET_IPV6_ROUTE_MAX_SIZE,
24911da177e4SLinus Torvalds 		.procname	=	"max_size",
24924990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
24931da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
24941da177e4SLinus Torvalds 		.mode		=	0644,
24951da177e4SLinus Torvalds 		.proc_handler	=	&proc_dointvec,
24961da177e4SLinus Torvalds 	},
24971da177e4SLinus Torvalds 	{
24981da177e4SLinus Torvalds 		.ctl_name	=	NET_IPV6_ROUTE_GC_MIN_INTERVAL,
24991da177e4SLinus Torvalds 		.procname	=	"gc_min_interval",
25004990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
25011da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
25021da177e4SLinus Torvalds 		.mode		=	0644,
25031da177e4SLinus Torvalds 		.proc_handler	=	&proc_dointvec_jiffies,
25041da177e4SLinus Torvalds 		.strategy	=	&sysctl_jiffies,
25051da177e4SLinus Torvalds 	},
25061da177e4SLinus Torvalds 	{
25071da177e4SLinus Torvalds 		.ctl_name	=	NET_IPV6_ROUTE_GC_TIMEOUT,
25081da177e4SLinus Torvalds 		.procname	=	"gc_timeout",
25094990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
25101da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
25111da177e4SLinus Torvalds 		.mode		=	0644,
25121da177e4SLinus Torvalds 		.proc_handler	=	&proc_dointvec_jiffies,
25131da177e4SLinus Torvalds 		.strategy	=	&sysctl_jiffies,
25141da177e4SLinus Torvalds 	},
25151da177e4SLinus Torvalds 	{
25161da177e4SLinus Torvalds 		.ctl_name	=	NET_IPV6_ROUTE_GC_INTERVAL,
25171da177e4SLinus Torvalds 		.procname	=	"gc_interval",
25184990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
25191da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
25201da177e4SLinus Torvalds 		.mode		=	0644,
25211da177e4SLinus Torvalds 		.proc_handler	=	&proc_dointvec_jiffies,
25221da177e4SLinus Torvalds 		.strategy	=	&sysctl_jiffies,
25231da177e4SLinus Torvalds 	},
25241da177e4SLinus Torvalds 	{
25251da177e4SLinus Torvalds 		.ctl_name	=	NET_IPV6_ROUTE_GC_ELASTICITY,
25261da177e4SLinus Torvalds 		.procname	=	"gc_elasticity",
25274990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
25281da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
25291da177e4SLinus Torvalds 		.mode		=	0644,
25301da177e4SLinus Torvalds 		.proc_handler	=	&proc_dointvec_jiffies,
25311da177e4SLinus Torvalds 		.strategy	=	&sysctl_jiffies,
25321da177e4SLinus Torvalds 	},
25331da177e4SLinus Torvalds 	{
25341da177e4SLinus Torvalds 		.ctl_name	=	NET_IPV6_ROUTE_MTU_EXPIRES,
25351da177e4SLinus Torvalds 		.procname	=	"mtu_expires",
25364990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
25371da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
25381da177e4SLinus Torvalds 		.mode		=	0644,
25391da177e4SLinus Torvalds 		.proc_handler	=	&proc_dointvec_jiffies,
25401da177e4SLinus Torvalds 		.strategy	=	&sysctl_jiffies,
25411da177e4SLinus Torvalds 	},
25421da177e4SLinus Torvalds 	{
25431da177e4SLinus Torvalds 		.ctl_name	=	NET_IPV6_ROUTE_MIN_ADVMSS,
25441da177e4SLinus Torvalds 		.procname	=	"min_adv_mss",
25454990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
25461da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
25471da177e4SLinus Torvalds 		.mode		=	0644,
25481da177e4SLinus Torvalds 		.proc_handler	=	&proc_dointvec_jiffies,
25491da177e4SLinus Torvalds 		.strategy	=	&sysctl_jiffies,
25501da177e4SLinus Torvalds 	},
25511da177e4SLinus Torvalds 	{
25521da177e4SLinus Torvalds 		.ctl_name	=	NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
25531da177e4SLinus Torvalds 		.procname	=	"gc_min_interval_ms",
25544990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
25551da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
25561da177e4SLinus Torvalds 		.mode		=	0644,
25571da177e4SLinus Torvalds 		.proc_handler	=	&proc_dointvec_ms_jiffies,
25581da177e4SLinus Torvalds 		.strategy	=	&sysctl_ms_jiffies,
25591da177e4SLinus Torvalds 	},
25601da177e4SLinus Torvalds 	{ .ctl_name = 0 }
25611da177e4SLinus Torvalds };
25621da177e4SLinus Torvalds 
2563760f2d01SDaniel Lezcano struct ctl_table *ipv6_route_sysctl_init(struct net *net)
2564760f2d01SDaniel Lezcano {
2565760f2d01SDaniel Lezcano 	struct ctl_table *table;
2566760f2d01SDaniel Lezcano 
2567760f2d01SDaniel Lezcano 	table = kmemdup(ipv6_route_table_template,
2568760f2d01SDaniel Lezcano 			sizeof(ipv6_route_table_template),
2569760f2d01SDaniel Lezcano 			GFP_KERNEL);
25705ee09105SYOSHIFUJI Hideaki 
25715ee09105SYOSHIFUJI Hideaki 	if (table) {
25725ee09105SYOSHIFUJI Hideaki 		table[0].data = &net->ipv6.sysctl.flush_delay;
2573f2fc6a54SBenjamin Thery 		table[1].data = &net->ipv6.ip6_dst_ops->gc_thresh;
25745ee09105SYOSHIFUJI Hideaki 		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
25755ee09105SYOSHIFUJI Hideaki 		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
25765ee09105SYOSHIFUJI Hideaki 		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
25775ee09105SYOSHIFUJI Hideaki 		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
25785ee09105SYOSHIFUJI Hideaki 		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
25795ee09105SYOSHIFUJI Hideaki 		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
25805ee09105SYOSHIFUJI Hideaki 		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
25815ee09105SYOSHIFUJI Hideaki 	}
25825ee09105SYOSHIFUJI Hideaki 
2583760f2d01SDaniel Lezcano 	return table;
2584760f2d01SDaniel Lezcano }
25851da177e4SLinus Torvalds #endif
25861da177e4SLinus Torvalds 
2587cdb18761SDaniel Lezcano static int ip6_route_net_init(struct net *net)
2588cdb18761SDaniel Lezcano {
2589633d424bSPavel Emelyanov 	int ret = -ENOMEM;
25908ed67789SDaniel Lezcano 
2591f2fc6a54SBenjamin Thery 	net->ipv6.ip6_dst_ops = kmemdup(&ip6_dst_ops_template,
2592f2fc6a54SBenjamin Thery 					sizeof(*net->ipv6.ip6_dst_ops),
2593f2fc6a54SBenjamin Thery 					GFP_KERNEL);
2594f2fc6a54SBenjamin Thery 	if (!net->ipv6.ip6_dst_ops)
2595f2fc6a54SBenjamin Thery 		goto out;
259648115becSDenis V. Lunev 	net->ipv6.ip6_dst_ops->dst_net = hold_net(net);
2597f2fc6a54SBenjamin Thery 
25988ed67789SDaniel Lezcano 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
25998ed67789SDaniel Lezcano 					   sizeof(*net->ipv6.ip6_null_entry),
26008ed67789SDaniel Lezcano 					   GFP_KERNEL);
26018ed67789SDaniel Lezcano 	if (!net->ipv6.ip6_null_entry)
2602f2fc6a54SBenjamin Thery 		goto out_ip6_dst_ops;
26038ed67789SDaniel Lezcano 	net->ipv6.ip6_null_entry->u.dst.path =
26048ed67789SDaniel Lezcano 		(struct dst_entry *)net->ipv6.ip6_null_entry;
2605f2fc6a54SBenjamin Thery 	net->ipv6.ip6_null_entry->u.dst.ops = net->ipv6.ip6_dst_ops;
26068ed67789SDaniel Lezcano 
26078ed67789SDaniel Lezcano #ifdef CONFIG_IPV6_MULTIPLE_TABLES
26088ed67789SDaniel Lezcano 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
26098ed67789SDaniel Lezcano 					       sizeof(*net->ipv6.ip6_prohibit_entry),
26108ed67789SDaniel Lezcano 					       GFP_KERNEL);
26118ed67789SDaniel Lezcano 	if (!net->ipv6.ip6_prohibit_entry) {
26128ed67789SDaniel Lezcano 		kfree(net->ipv6.ip6_null_entry);
26138ed67789SDaniel Lezcano 		goto out;
26148ed67789SDaniel Lezcano 	}
26158ed67789SDaniel Lezcano 	net->ipv6.ip6_prohibit_entry->u.dst.path =
26168ed67789SDaniel Lezcano 		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2617f2fc6a54SBenjamin Thery 	net->ipv6.ip6_prohibit_entry->u.dst.ops = net->ipv6.ip6_dst_ops;
26188ed67789SDaniel Lezcano 
26198ed67789SDaniel Lezcano 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
26208ed67789SDaniel Lezcano 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
26218ed67789SDaniel Lezcano 					       GFP_KERNEL);
26228ed67789SDaniel Lezcano 	if (!net->ipv6.ip6_blk_hole_entry) {
26238ed67789SDaniel Lezcano 		kfree(net->ipv6.ip6_null_entry);
26248ed67789SDaniel Lezcano 		kfree(net->ipv6.ip6_prohibit_entry);
26258ed67789SDaniel Lezcano 		goto out;
26268ed67789SDaniel Lezcano 	}
26278ed67789SDaniel Lezcano 	net->ipv6.ip6_blk_hole_entry->u.dst.path =
26288ed67789SDaniel Lezcano 		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2629f2fc6a54SBenjamin Thery 	net->ipv6.ip6_blk_hole_entry->u.dst.ops = net->ipv6.ip6_dst_ops;
26308ed67789SDaniel Lezcano #endif
26318ed67789SDaniel Lezcano 
2632cdb18761SDaniel Lezcano #ifdef CONFIG_PROC_FS
2633cdb18761SDaniel Lezcano 	proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2634cdb18761SDaniel Lezcano 	proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2635cdb18761SDaniel Lezcano #endif
26366891a346SBenjamin Thery 	net->ipv6.ip6_rt_gc_expire = 30*HZ;
26376891a346SBenjamin Thery 
26388ed67789SDaniel Lezcano 	ret = 0;
26398ed67789SDaniel Lezcano out:
26408ed67789SDaniel Lezcano 	return ret;
2641f2fc6a54SBenjamin Thery 
2642f2fc6a54SBenjamin Thery out_ip6_dst_ops:
264348115becSDenis V. Lunev 	release_net(net->ipv6.ip6_dst_ops->dst_net);
2644f2fc6a54SBenjamin Thery 	kfree(net->ipv6.ip6_dst_ops);
2645f2fc6a54SBenjamin Thery 	goto out;
2646cdb18761SDaniel Lezcano }
2647cdb18761SDaniel Lezcano 
2648cdb18761SDaniel Lezcano static void ip6_route_net_exit(struct net *net)
2649cdb18761SDaniel Lezcano {
2650cdb18761SDaniel Lezcano #ifdef CONFIG_PROC_FS
2651cdb18761SDaniel Lezcano 	proc_net_remove(net, "ipv6_route");
2652cdb18761SDaniel Lezcano 	proc_net_remove(net, "rt6_stats");
2653cdb18761SDaniel Lezcano #endif
26548ed67789SDaniel Lezcano 	kfree(net->ipv6.ip6_null_entry);
26558ed67789SDaniel Lezcano #ifdef CONFIG_IPV6_MULTIPLE_TABLES
26568ed67789SDaniel Lezcano 	kfree(net->ipv6.ip6_prohibit_entry);
26578ed67789SDaniel Lezcano 	kfree(net->ipv6.ip6_blk_hole_entry);
26588ed67789SDaniel Lezcano #endif
265948115becSDenis V. Lunev 	release_net(net->ipv6.ip6_dst_ops->dst_net);
2660f2fc6a54SBenjamin Thery 	kfree(net->ipv6.ip6_dst_ops);
2661cdb18761SDaniel Lezcano }
2662cdb18761SDaniel Lezcano 
2663cdb18761SDaniel Lezcano static struct pernet_operations ip6_route_net_ops = {
2664cdb18761SDaniel Lezcano 	.init = ip6_route_net_init,
2665cdb18761SDaniel Lezcano 	.exit = ip6_route_net_exit,
2666cdb18761SDaniel Lezcano };
2667cdb18761SDaniel Lezcano 
26688ed67789SDaniel Lezcano static struct notifier_block ip6_route_dev_notifier = {
26698ed67789SDaniel Lezcano 	.notifier_call = ip6_route_dev_notify,
26708ed67789SDaniel Lezcano 	.priority = 0,
26718ed67789SDaniel Lezcano };
26728ed67789SDaniel Lezcano 
2673433d49c3SDaniel Lezcano int __init ip6_route_init(void)
26741da177e4SLinus Torvalds {
2675433d49c3SDaniel Lezcano 	int ret;
2676433d49c3SDaniel Lezcano 
26779a7ec3a9SDaniel Lezcano 	ret = -ENOMEM;
26789a7ec3a9SDaniel Lezcano 	ip6_dst_ops_template.kmem_cachep =
26799a7ec3a9SDaniel Lezcano 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
26809a7ec3a9SDaniel Lezcano 				  SLAB_HWCACHE_ALIGN, NULL);
26819a7ec3a9SDaniel Lezcano 	if (!ip6_dst_ops_template.kmem_cachep)
2682f2fc6a54SBenjamin Thery 		goto out;;
268314e50e57SDavid S. Miller 
26848ed67789SDaniel Lezcano 	ret = register_pernet_subsys(&ip6_route_net_ops);
26858ed67789SDaniel Lezcano 	if (ret)
2686bdb3289fSDaniel Lezcano 		goto out_kmem_cache;
2687bdb3289fSDaniel Lezcano 
26888ed67789SDaniel Lezcano 	/* Registering of the loopback is done before this portion of code,
26898ed67789SDaniel Lezcano 	 * the loopback reference in rt6_info will not be taken, do it
26908ed67789SDaniel Lezcano 	 * manually for init_net */
26918ed67789SDaniel Lezcano 	init_net.ipv6.ip6_null_entry->u.dst.dev = init_net.loopback_dev;
26928ed67789SDaniel Lezcano 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2693bdb3289fSDaniel Lezcano   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
26948ed67789SDaniel Lezcano 	init_net.ipv6.ip6_prohibit_entry->u.dst.dev = init_net.loopback_dev;
26958ed67789SDaniel Lezcano 	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
26968ed67789SDaniel Lezcano 	init_net.ipv6.ip6_blk_hole_entry->u.dst.dev = init_net.loopback_dev;
26978ed67789SDaniel Lezcano 	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2698bdb3289fSDaniel Lezcano   #endif
2699433d49c3SDaniel Lezcano 	ret = fib6_init();
2700433d49c3SDaniel Lezcano 	if (ret)
27018ed67789SDaniel Lezcano 		goto out_register_subsys;
2702433d49c3SDaniel Lezcano 
2703433d49c3SDaniel Lezcano 	ret = xfrm6_init();
2704433d49c3SDaniel Lezcano 	if (ret)
2705cdb18761SDaniel Lezcano 		goto out_fib6_init;
2706c35b7e72SDaniel Lezcano 
2707433d49c3SDaniel Lezcano 	ret = fib6_rules_init();
2708433d49c3SDaniel Lezcano 	if (ret)
2709433d49c3SDaniel Lezcano 		goto xfrm6_init;
27107e5449c2SDaniel Lezcano 
2711433d49c3SDaniel Lezcano 	ret = -ENOBUFS;
2712433d49c3SDaniel Lezcano 	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2713433d49c3SDaniel Lezcano 	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2714433d49c3SDaniel Lezcano 	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2715433d49c3SDaniel Lezcano 		goto fib6_rules_init;
2716433d49c3SDaniel Lezcano 
27178ed67789SDaniel Lezcano 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
2718cdb18761SDaniel Lezcano 	if (ret)
2719cdb18761SDaniel Lezcano 		goto fib6_rules_init;
27208ed67789SDaniel Lezcano 
2721433d49c3SDaniel Lezcano out:
2722433d49c3SDaniel Lezcano 	return ret;
2723433d49c3SDaniel Lezcano 
2724433d49c3SDaniel Lezcano fib6_rules_init:
2725433d49c3SDaniel Lezcano 	fib6_rules_cleanup();
2726433d49c3SDaniel Lezcano xfrm6_init:
2727433d49c3SDaniel Lezcano 	xfrm6_fini();
2728433d49c3SDaniel Lezcano out_fib6_init:
2729433d49c3SDaniel Lezcano 	fib6_gc_cleanup();
27308ed67789SDaniel Lezcano out_register_subsys:
27318ed67789SDaniel Lezcano 	unregister_pernet_subsys(&ip6_route_net_ops);
2732433d49c3SDaniel Lezcano out_kmem_cache:
2733f2fc6a54SBenjamin Thery 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
2734433d49c3SDaniel Lezcano 	goto out;
27351da177e4SLinus Torvalds }
27361da177e4SLinus Torvalds 
27371da177e4SLinus Torvalds void ip6_route_cleanup(void)
27381da177e4SLinus Torvalds {
27398ed67789SDaniel Lezcano 	unregister_netdevice_notifier(&ip6_route_dev_notifier);
2740101367c2SThomas Graf 	fib6_rules_cleanup();
27411da177e4SLinus Torvalds 	xfrm6_fini();
27421da177e4SLinus Torvalds 	fib6_gc_cleanup();
27438ed67789SDaniel Lezcano 	unregister_pernet_subsys(&ip6_route_net_ops);
2744f2fc6a54SBenjamin Thery 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
27451da177e4SLinus Torvalds }
2746