xref: /openbmc/linux/net/ipv6/route.c (revision dd3abc4ef52597ec8268274222574b2700ba3ded)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  *	Linux INET6 implementation
31da177e4SLinus Torvalds  *	FIB front-end.
41da177e4SLinus Torvalds  *
51da177e4SLinus Torvalds  *	Authors:
61da177e4SLinus Torvalds  *	Pedro Roque		<roque@di.fc.ul.pt>
71da177e4SLinus Torvalds  *
81da177e4SLinus Torvalds  *	This program is free software; you can redistribute it and/or
91da177e4SLinus Torvalds  *      modify it under the terms of the GNU General Public License
101da177e4SLinus Torvalds  *      as published by the Free Software Foundation; either version
111da177e4SLinus Torvalds  *      2 of the License, or (at your option) any later version.
121da177e4SLinus Torvalds  */
131da177e4SLinus Torvalds 
141da177e4SLinus Torvalds /*	Changes:
151da177e4SLinus Torvalds  *
161da177e4SLinus Torvalds  *	YOSHIFUJI Hideaki @USAGI
171da177e4SLinus Torvalds  *		reworked default router selection.
181da177e4SLinus Torvalds  *		- respect outgoing interface
191da177e4SLinus Torvalds  *		- select from (probably) reachable routers (i.e.
201da177e4SLinus Torvalds  *		routers in REACHABLE, STALE, DELAY or PROBE states).
211da177e4SLinus Torvalds  *		- always select the same router if it is (probably)
221da177e4SLinus Torvalds  *		reachable.  otherwise, round-robin the list.
23c0bece9fSYOSHIFUJI Hideaki  *	Ville Nuorvala
24c0bece9fSYOSHIFUJI Hideaki  *		Fixed routing subtrees.
251da177e4SLinus Torvalds  */
261da177e4SLinus Torvalds 
274fc268d2SRandy Dunlap #include <linux/capability.h>
281da177e4SLinus Torvalds #include <linux/errno.h>
291da177e4SLinus Torvalds #include <linux/types.h>
301da177e4SLinus Torvalds #include <linux/times.h>
311da177e4SLinus Torvalds #include <linux/socket.h>
321da177e4SLinus Torvalds #include <linux/sockios.h>
331da177e4SLinus Torvalds #include <linux/net.h>
341da177e4SLinus Torvalds #include <linux/route.h>
351da177e4SLinus Torvalds #include <linux/netdevice.h>
361da177e4SLinus Torvalds #include <linux/in6.h>
377bc570c8SYOSHIFUJI Hideaki #include <linux/mroute6.h>
381da177e4SLinus Torvalds #include <linux/init.h>
391da177e4SLinus Torvalds #include <linux/if_arp.h>
401da177e4SLinus Torvalds #include <linux/proc_fs.h>
411da177e4SLinus Torvalds #include <linux/seq_file.h>
425b7c931dSDaniel Lezcano #include <linux/nsproxy.h>
43457c4cbcSEric W. Biederman #include <net/net_namespace.h>
441da177e4SLinus Torvalds #include <net/snmp.h>
451da177e4SLinus Torvalds #include <net/ipv6.h>
461da177e4SLinus Torvalds #include <net/ip6_fib.h>
471da177e4SLinus Torvalds #include <net/ip6_route.h>
481da177e4SLinus Torvalds #include <net/ndisc.h>
491da177e4SLinus Torvalds #include <net/addrconf.h>
501da177e4SLinus Torvalds #include <net/tcp.h>
511da177e4SLinus Torvalds #include <linux/rtnetlink.h>
521da177e4SLinus Torvalds #include <net/dst.h>
531da177e4SLinus Torvalds #include <net/xfrm.h>
548d71740cSTom Tucker #include <net/netevent.h>
5521713ebcSThomas Graf #include <net/netlink.h>
561da177e4SLinus Torvalds 
571da177e4SLinus Torvalds #include <asm/uaccess.h>
581da177e4SLinus Torvalds 
591da177e4SLinus Torvalds #ifdef CONFIG_SYSCTL
601da177e4SLinus Torvalds #include <linux/sysctl.h>
611da177e4SLinus Torvalds #endif
621da177e4SLinus Torvalds 
631da177e4SLinus Torvalds /* Set to 3 to get tracing. */
641da177e4SLinus Torvalds #define RT6_DEBUG 2
651da177e4SLinus Torvalds 
661da177e4SLinus Torvalds #if RT6_DEBUG >= 3
671da177e4SLinus Torvalds #define RDBG(x) printk x
681da177e4SLinus Torvalds #define RT6_TRACE(x...) printk(KERN_DEBUG x)
691da177e4SLinus Torvalds #else
701da177e4SLinus Torvalds #define RDBG(x)
711da177e4SLinus Torvalds #define RT6_TRACE(x...) do { ; } while (0)
721da177e4SLinus Torvalds #endif
731da177e4SLinus Torvalds 
74519fbd87SYOSHIFUJI Hideaki #define CLONE_OFFLINK_ROUTE 0
751da177e4SLinus Torvalds 
761da177e4SLinus Torvalds static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
771da177e4SLinus Torvalds static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
781da177e4SLinus Torvalds static struct dst_entry *ip6_negative_advice(struct dst_entry *);
791da177e4SLinus Torvalds static void		ip6_dst_destroy(struct dst_entry *);
801da177e4SLinus Torvalds static void		ip6_dst_ifdown(struct dst_entry *,
811da177e4SLinus Torvalds 				       struct net_device *dev, int how);
82569d3645SDaniel Lezcano static int		 ip6_dst_gc(struct dst_ops *ops);
831da177e4SLinus Torvalds 
841da177e4SLinus Torvalds static int		ip6_pkt_discard(struct sk_buff *skb);
851da177e4SLinus Torvalds static int		ip6_pkt_discard_out(struct sk_buff *skb);
861da177e4SLinus Torvalds static void		ip6_link_failure(struct sk_buff *skb);
871da177e4SLinus Torvalds static void		ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
881da177e4SLinus Torvalds 
8970ceb4f5SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTE_INFO
90efa2cea0SDaniel Lezcano static struct rt6_info *rt6_add_route_info(struct net *net,
91efa2cea0SDaniel Lezcano 					   struct in6_addr *prefix, int prefixlen,
9270ceb4f5SYOSHIFUJI Hideaki 					   struct in6_addr *gwaddr, int ifindex,
9370ceb4f5SYOSHIFUJI Hideaki 					   unsigned pref);
94efa2cea0SDaniel Lezcano static struct rt6_info *rt6_get_route_info(struct net *net,
95efa2cea0SDaniel Lezcano 					   struct in6_addr *prefix, int prefixlen,
9670ceb4f5SYOSHIFUJI Hideaki 					   struct in6_addr *gwaddr, int ifindex);
9770ceb4f5SYOSHIFUJI Hideaki #endif
9870ceb4f5SYOSHIFUJI Hideaki 
999a7ec3a9SDaniel Lezcano static struct dst_ops ip6_dst_ops_template = {
1001da177e4SLinus Torvalds 	.family			=	AF_INET6,
1011da177e4SLinus Torvalds 	.protocol		=	__constant_htons(ETH_P_IPV6),
1021da177e4SLinus Torvalds 	.gc			=	ip6_dst_gc,
1031da177e4SLinus Torvalds 	.gc_thresh		=	1024,
1041da177e4SLinus Torvalds 	.check			=	ip6_dst_check,
1051da177e4SLinus Torvalds 	.destroy		=	ip6_dst_destroy,
1061da177e4SLinus Torvalds 	.ifdown			=	ip6_dst_ifdown,
1071da177e4SLinus Torvalds 	.negative_advice	=	ip6_negative_advice,
1081da177e4SLinus Torvalds 	.link_failure		=	ip6_link_failure,
1091da177e4SLinus Torvalds 	.update_pmtu		=	ip6_rt_update_pmtu,
1101ac06e03SHerbert Xu 	.local_out		=	__ip6_local_out,
1111da177e4SLinus Torvalds 	.entry_size		=	sizeof(struct rt6_info),
112e2422970SEric Dumazet 	.entries		=	ATOMIC_INIT(0),
1131da177e4SLinus Torvalds };
1141da177e4SLinus Torvalds 
11514e50e57SDavid S. Miller static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
11614e50e57SDavid S. Miller {
11714e50e57SDavid S. Miller }
11814e50e57SDavid S. Miller 
11914e50e57SDavid S. Miller static struct dst_ops ip6_dst_blackhole_ops = {
12014e50e57SDavid S. Miller 	.family			=	AF_INET6,
12114e50e57SDavid S. Miller 	.protocol		=	__constant_htons(ETH_P_IPV6),
12214e50e57SDavid S. Miller 	.destroy		=	ip6_dst_destroy,
12314e50e57SDavid S. Miller 	.check			=	ip6_dst_check,
12414e50e57SDavid S. Miller 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
12514e50e57SDavid S. Miller 	.entry_size		=	sizeof(struct rt6_info),
126e2422970SEric Dumazet 	.entries		=	ATOMIC_INIT(0),
12714e50e57SDavid S. Miller };
12814e50e57SDavid S. Miller 
129bdb3289fSDaniel Lezcano static struct rt6_info ip6_null_entry_template = {
1301da177e4SLinus Torvalds 	.u = {
1311da177e4SLinus Torvalds 		.dst = {
1321da177e4SLinus Torvalds 			.__refcnt	= ATOMIC_INIT(1),
1331da177e4SLinus Torvalds 			.__use		= 1,
1341da177e4SLinus Torvalds 			.obsolete	= -1,
1351da177e4SLinus Torvalds 			.error		= -ENETUNREACH,
1361da177e4SLinus Torvalds 			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
1371da177e4SLinus Torvalds 			.input		= ip6_pkt_discard,
1381da177e4SLinus Torvalds 			.output		= ip6_pkt_discard_out,
1391da177e4SLinus Torvalds 		}
1401da177e4SLinus Torvalds 	},
1411da177e4SLinus Torvalds 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
1421da177e4SLinus Torvalds 	.rt6i_metric	= ~(u32) 0,
1431da177e4SLinus Torvalds 	.rt6i_ref	= ATOMIC_INIT(1),
1441da177e4SLinus Torvalds };
1451da177e4SLinus Torvalds 
146101367c2SThomas Graf #ifdef CONFIG_IPV6_MULTIPLE_TABLES
147101367c2SThomas Graf 
1486723ab54SDavid S. Miller static int ip6_pkt_prohibit(struct sk_buff *skb);
1496723ab54SDavid S. Miller static int ip6_pkt_prohibit_out(struct sk_buff *skb);
1506723ab54SDavid S. Miller 
151280a34c8SAdrian Bunk static struct rt6_info ip6_prohibit_entry_template = {
152101367c2SThomas Graf 	.u = {
153101367c2SThomas Graf 		.dst = {
154101367c2SThomas Graf 			.__refcnt	= ATOMIC_INIT(1),
155101367c2SThomas Graf 			.__use		= 1,
156101367c2SThomas Graf 			.obsolete	= -1,
157101367c2SThomas Graf 			.error		= -EACCES,
158101367c2SThomas Graf 			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
1599ce8ade0SThomas Graf 			.input		= ip6_pkt_prohibit,
1609ce8ade0SThomas Graf 			.output		= ip6_pkt_prohibit_out,
161101367c2SThomas Graf 		}
162101367c2SThomas Graf 	},
163101367c2SThomas Graf 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
164101367c2SThomas Graf 	.rt6i_metric	= ~(u32) 0,
165101367c2SThomas Graf 	.rt6i_ref	= ATOMIC_INIT(1),
166101367c2SThomas Graf };
167101367c2SThomas Graf 
168bdb3289fSDaniel Lezcano static struct rt6_info ip6_blk_hole_entry_template = {
169101367c2SThomas Graf 	.u = {
170101367c2SThomas Graf 		.dst = {
171101367c2SThomas Graf 			.__refcnt	= ATOMIC_INIT(1),
172101367c2SThomas Graf 			.__use		= 1,
173101367c2SThomas Graf 			.obsolete	= -1,
174101367c2SThomas Graf 			.error		= -EINVAL,
175101367c2SThomas Graf 			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
176352e512cSHerbert Xu 			.input		= dst_discard,
177352e512cSHerbert Xu 			.output		= dst_discard,
178101367c2SThomas Graf 		}
179101367c2SThomas Graf 	},
180101367c2SThomas Graf 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
181101367c2SThomas Graf 	.rt6i_metric	= ~(u32) 0,
182101367c2SThomas Graf 	.rt6i_ref	= ATOMIC_INIT(1),
183101367c2SThomas Graf };
184101367c2SThomas Graf 
185101367c2SThomas Graf #endif
186101367c2SThomas Graf 
1871da177e4SLinus Torvalds /* allocate dst with ip6_dst_ops */
188f2fc6a54SBenjamin Thery static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops)
1891da177e4SLinus Torvalds {
190f2fc6a54SBenjamin Thery 	return (struct rt6_info *)dst_alloc(ops);
1911da177e4SLinus Torvalds }
1921da177e4SLinus Torvalds 
1931da177e4SLinus Torvalds static void ip6_dst_destroy(struct dst_entry *dst)
1941da177e4SLinus Torvalds {
1951da177e4SLinus Torvalds 	struct rt6_info *rt = (struct rt6_info *)dst;
1961da177e4SLinus Torvalds 	struct inet6_dev *idev = rt->rt6i_idev;
1971da177e4SLinus Torvalds 
1981da177e4SLinus Torvalds 	if (idev != NULL) {
1991da177e4SLinus Torvalds 		rt->rt6i_idev = NULL;
2001da177e4SLinus Torvalds 		in6_dev_put(idev);
2011da177e4SLinus Torvalds 	}
2021da177e4SLinus Torvalds }
2031da177e4SLinus Torvalds 
2041da177e4SLinus Torvalds static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
2051da177e4SLinus Torvalds 			   int how)
2061da177e4SLinus Torvalds {
2071da177e4SLinus Torvalds 	struct rt6_info *rt = (struct rt6_info *)dst;
2081da177e4SLinus Torvalds 	struct inet6_dev *idev = rt->rt6i_idev;
2095a3e55d6SDenis V. Lunev 	struct net_device *loopback_dev =
210c346dca1SYOSHIFUJI Hideaki 		dev_net(dev)->loopback_dev;
2111da177e4SLinus Torvalds 
2125a3e55d6SDenis V. Lunev 	if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
2135a3e55d6SDenis V. Lunev 		struct inet6_dev *loopback_idev =
2145a3e55d6SDenis V. Lunev 			in6_dev_get(loopback_dev);
2151da177e4SLinus Torvalds 		if (loopback_idev != NULL) {
2161da177e4SLinus Torvalds 			rt->rt6i_idev = loopback_idev;
2171da177e4SLinus Torvalds 			in6_dev_put(idev);
2181da177e4SLinus Torvalds 		}
2191da177e4SLinus Torvalds 	}
2201da177e4SLinus Torvalds }
2211da177e4SLinus Torvalds 
2221da177e4SLinus Torvalds static __inline__ int rt6_check_expired(const struct rt6_info *rt)
2231da177e4SLinus Torvalds {
2241da177e4SLinus Torvalds 	return (rt->rt6i_flags & RTF_EXPIRES &&
2251da177e4SLinus Torvalds 		time_after(jiffies, rt->rt6i_expires));
2261da177e4SLinus Torvalds }
2271da177e4SLinus Torvalds 
228c71099acSThomas Graf static inline int rt6_need_strict(struct in6_addr *daddr)
229c71099acSThomas Graf {
230c71099acSThomas Graf 	return (ipv6_addr_type(daddr) &
2315ce83afaSYOSHIFUJI Hideaki 		(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK));
232c71099acSThomas Graf }
233c71099acSThomas Graf 
2341da177e4SLinus Torvalds /*
235c71099acSThomas Graf  *	Route lookup. Any table->tb6_lock is implied.
2361da177e4SLinus Torvalds  */
2371da177e4SLinus Torvalds 
2388ed67789SDaniel Lezcano static inline struct rt6_info *rt6_device_match(struct net *net,
2398ed67789SDaniel Lezcano 						    struct rt6_info *rt,
240*dd3abc4eSYOSHIFUJI Hideaki 						    struct in6_addr *saddr,
2411da177e4SLinus Torvalds 						    int oif,
242d420895eSYOSHIFUJI Hideaki 						    int flags)
2431da177e4SLinus Torvalds {
2441da177e4SLinus Torvalds 	struct rt6_info *local = NULL;
2451da177e4SLinus Torvalds 	struct rt6_info *sprt;
2461da177e4SLinus Torvalds 
247*dd3abc4eSYOSHIFUJI Hideaki 	if (!oif && ipv6_addr_any(saddr))
248*dd3abc4eSYOSHIFUJI Hideaki 		goto out;
249*dd3abc4eSYOSHIFUJI Hideaki 
2507cc48263SEric Dumazet 	for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
2511da177e4SLinus Torvalds 		struct net_device *dev = sprt->rt6i_dev;
252*dd3abc4eSYOSHIFUJI Hideaki 
253*dd3abc4eSYOSHIFUJI Hideaki 		if (oif) {
2541da177e4SLinus Torvalds 			if (dev->ifindex == oif)
2551da177e4SLinus Torvalds 				return sprt;
2561da177e4SLinus Torvalds 			if (dev->flags & IFF_LOOPBACK) {
2571da177e4SLinus Torvalds 				if (sprt->rt6i_idev == NULL ||
2581da177e4SLinus Torvalds 				    sprt->rt6i_idev->dev->ifindex != oif) {
259d420895eSYOSHIFUJI Hideaki 					if (flags & RT6_LOOKUP_F_IFACE && oif)
2601da177e4SLinus Torvalds 						continue;
2611da177e4SLinus Torvalds 					if (local && (!oif ||
2621da177e4SLinus Torvalds 						      local->rt6i_idev->dev->ifindex == oif))
2631da177e4SLinus Torvalds 						continue;
2641da177e4SLinus Torvalds 				}
2651da177e4SLinus Torvalds 				local = sprt;
2661da177e4SLinus Torvalds 			}
267*dd3abc4eSYOSHIFUJI Hideaki 		} else {
268*dd3abc4eSYOSHIFUJI Hideaki 			if (ipv6_chk_addr(net, saddr, dev,
269*dd3abc4eSYOSHIFUJI Hideaki 					  flags & RT6_LOOKUP_F_IFACE))
270*dd3abc4eSYOSHIFUJI Hideaki 				return sprt;
271*dd3abc4eSYOSHIFUJI Hideaki 		}
2721da177e4SLinus Torvalds 	}
2731da177e4SLinus Torvalds 
274*dd3abc4eSYOSHIFUJI Hideaki 	if (oif) {
2751da177e4SLinus Torvalds 		if (local)
2761da177e4SLinus Torvalds 			return local;
2771da177e4SLinus Torvalds 
278d420895eSYOSHIFUJI Hideaki 		if (flags & RT6_LOOKUP_F_IFACE)
2798ed67789SDaniel Lezcano 			return net->ipv6.ip6_null_entry;
2801da177e4SLinus Torvalds 	}
281*dd3abc4eSYOSHIFUJI Hideaki out:
2821da177e4SLinus Torvalds 	return rt;
2831da177e4SLinus Torvalds }
2841da177e4SLinus Torvalds 
28527097255SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTER_PREF
28627097255SYOSHIFUJI Hideaki static void rt6_probe(struct rt6_info *rt)
28727097255SYOSHIFUJI Hideaki {
28827097255SYOSHIFUJI Hideaki 	struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
28927097255SYOSHIFUJI Hideaki 	/*
29027097255SYOSHIFUJI Hideaki 	 * Okay, this does not seem to be appropriate
29127097255SYOSHIFUJI Hideaki 	 * for now, however, we need to check if it
29227097255SYOSHIFUJI Hideaki 	 * is really so; aka Router Reachability Probing.
29327097255SYOSHIFUJI Hideaki 	 *
29427097255SYOSHIFUJI Hideaki 	 * Router Reachability Probe MUST be rate-limited
29527097255SYOSHIFUJI Hideaki 	 * to no more than one per minute.
29627097255SYOSHIFUJI Hideaki 	 */
29727097255SYOSHIFUJI Hideaki 	if (!neigh || (neigh->nud_state & NUD_VALID))
29827097255SYOSHIFUJI Hideaki 		return;
29927097255SYOSHIFUJI Hideaki 	read_lock_bh(&neigh->lock);
30027097255SYOSHIFUJI Hideaki 	if (!(neigh->nud_state & NUD_VALID) &&
30152e16356SYOSHIFUJI Hideaki 	    time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
30227097255SYOSHIFUJI Hideaki 		struct in6_addr mcaddr;
30327097255SYOSHIFUJI Hideaki 		struct in6_addr *target;
30427097255SYOSHIFUJI Hideaki 
30527097255SYOSHIFUJI Hideaki 		neigh->updated = jiffies;
30627097255SYOSHIFUJI Hideaki 		read_unlock_bh(&neigh->lock);
30727097255SYOSHIFUJI Hideaki 
30827097255SYOSHIFUJI Hideaki 		target = (struct in6_addr *)&neigh->primary_key;
30927097255SYOSHIFUJI Hideaki 		addrconf_addr_solict_mult(target, &mcaddr);
31027097255SYOSHIFUJI Hideaki 		ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
31127097255SYOSHIFUJI Hideaki 	} else
31227097255SYOSHIFUJI Hideaki 		read_unlock_bh(&neigh->lock);
31327097255SYOSHIFUJI Hideaki }
31427097255SYOSHIFUJI Hideaki #else
31527097255SYOSHIFUJI Hideaki static inline void rt6_probe(struct rt6_info *rt)
31627097255SYOSHIFUJI Hideaki {
31727097255SYOSHIFUJI Hideaki 	return;
31827097255SYOSHIFUJI Hideaki }
31927097255SYOSHIFUJI Hideaki #endif
32027097255SYOSHIFUJI Hideaki 
3211da177e4SLinus Torvalds /*
322554cfb7eSYOSHIFUJI Hideaki  * Default Router Selection (RFC 2461 6.3.6)
3231da177e4SLinus Torvalds  */
324b6f99a21SDave Jones static inline int rt6_check_dev(struct rt6_info *rt, int oif)
3251da177e4SLinus Torvalds {
326554cfb7eSYOSHIFUJI Hideaki 	struct net_device *dev = rt->rt6i_dev;
327161980f4SDavid S. Miller 	if (!oif || dev->ifindex == oif)
328554cfb7eSYOSHIFUJI Hideaki 		return 2;
329161980f4SDavid S. Miller 	if ((dev->flags & IFF_LOOPBACK) &&
330161980f4SDavid S. Miller 	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
331161980f4SDavid S. Miller 		return 1;
332554cfb7eSYOSHIFUJI Hideaki 	return 0;
3331da177e4SLinus Torvalds }
3341da177e4SLinus Torvalds 
335b6f99a21SDave Jones static inline int rt6_check_neigh(struct rt6_info *rt)
3361da177e4SLinus Torvalds {
337554cfb7eSYOSHIFUJI Hideaki 	struct neighbour *neigh = rt->rt6i_nexthop;
338398bcbebSYOSHIFUJI Hideaki 	int m;
3394d0c5911SYOSHIFUJI Hideaki 	if (rt->rt6i_flags & RTF_NONEXTHOP ||
3404d0c5911SYOSHIFUJI Hideaki 	    !(rt->rt6i_flags & RTF_GATEWAY))
3414d0c5911SYOSHIFUJI Hideaki 		m = 1;
3424d0c5911SYOSHIFUJI Hideaki 	else if (neigh) {
3431da177e4SLinus Torvalds 		read_lock_bh(&neigh->lock);
344554cfb7eSYOSHIFUJI Hideaki 		if (neigh->nud_state & NUD_VALID)
3454d0c5911SYOSHIFUJI Hideaki 			m = 2;
346398bcbebSYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTER_PREF
347398bcbebSYOSHIFUJI Hideaki 		else if (neigh->nud_state & NUD_FAILED)
348398bcbebSYOSHIFUJI Hideaki 			m = 0;
349398bcbebSYOSHIFUJI Hideaki #endif
350398bcbebSYOSHIFUJI Hideaki 		else
351ea73ee23SYOSHIFUJI Hideaki 			m = 1;
3521da177e4SLinus Torvalds 		read_unlock_bh(&neigh->lock);
353398bcbebSYOSHIFUJI Hideaki 	} else
354398bcbebSYOSHIFUJI Hideaki 		m = 0;
355554cfb7eSYOSHIFUJI Hideaki 	return m;
3561da177e4SLinus Torvalds }
3571da177e4SLinus Torvalds 
358554cfb7eSYOSHIFUJI Hideaki static int rt6_score_route(struct rt6_info *rt, int oif,
359554cfb7eSYOSHIFUJI Hideaki 			   int strict)
360554cfb7eSYOSHIFUJI Hideaki {
3614d0c5911SYOSHIFUJI Hideaki 	int m, n;
3624d0c5911SYOSHIFUJI Hideaki 
3634d0c5911SYOSHIFUJI Hideaki 	m = rt6_check_dev(rt, oif);
36477d16f45SYOSHIFUJI Hideaki 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
365554cfb7eSYOSHIFUJI Hideaki 		return -1;
366ebacaaa0SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTER_PREF
367ebacaaa0SYOSHIFUJI Hideaki 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
368ebacaaa0SYOSHIFUJI Hideaki #endif
3694d0c5911SYOSHIFUJI Hideaki 	n = rt6_check_neigh(rt);
370557e92efSYOSHIFUJI Hideaki 	if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
371554cfb7eSYOSHIFUJI Hideaki 		return -1;
372554cfb7eSYOSHIFUJI Hideaki 	return m;
373554cfb7eSYOSHIFUJI Hideaki }
374554cfb7eSYOSHIFUJI Hideaki 
375f11e6659SDavid S. Miller static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
376f11e6659SDavid S. Miller 				   int *mpri, struct rt6_info *match)
377554cfb7eSYOSHIFUJI Hideaki {
378554cfb7eSYOSHIFUJI Hideaki 	int m;
379554cfb7eSYOSHIFUJI Hideaki 
380554cfb7eSYOSHIFUJI Hideaki 	if (rt6_check_expired(rt))
381f11e6659SDavid S. Miller 		goto out;
382554cfb7eSYOSHIFUJI Hideaki 
383554cfb7eSYOSHIFUJI Hideaki 	m = rt6_score_route(rt, oif, strict);
384554cfb7eSYOSHIFUJI Hideaki 	if (m < 0)
385f11e6659SDavid S. Miller 		goto out;
386554cfb7eSYOSHIFUJI Hideaki 
387f11e6659SDavid S. Miller 	if (m > *mpri) {
388ea659e07SYOSHIFUJI Hideaki 		if (strict & RT6_LOOKUP_F_REACHABLE)
38927097255SYOSHIFUJI Hideaki 			rt6_probe(match);
390f11e6659SDavid S. Miller 		*mpri = m;
391554cfb7eSYOSHIFUJI Hideaki 		match = rt;
392ea659e07SYOSHIFUJI Hideaki 	} else if (strict & RT6_LOOKUP_F_REACHABLE) {
39327097255SYOSHIFUJI Hideaki 		rt6_probe(rt);
3941da177e4SLinus Torvalds 	}
395f11e6659SDavid S. Miller 
396f11e6659SDavid S. Miller out:
397f11e6659SDavid S. Miller 	return match;
3981da177e4SLinus Torvalds }
3991da177e4SLinus Torvalds 
400f11e6659SDavid S. Miller static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
401f11e6659SDavid S. Miller 				     struct rt6_info *rr_head,
402f11e6659SDavid S. Miller 				     u32 metric, int oif, int strict)
403f11e6659SDavid S. Miller {
404f11e6659SDavid S. Miller 	struct rt6_info *rt, *match;
405f11e6659SDavid S. Miller 	int mpri = -1;
406f11e6659SDavid S. Miller 
407f11e6659SDavid S. Miller 	match = NULL;
408f11e6659SDavid S. Miller 	for (rt = rr_head; rt && rt->rt6i_metric == metric;
409f11e6659SDavid S. Miller 	     rt = rt->u.dst.rt6_next)
410f11e6659SDavid S. Miller 		match = find_match(rt, oif, strict, &mpri, match);
411f11e6659SDavid S. Miller 	for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
412f11e6659SDavid S. Miller 	     rt = rt->u.dst.rt6_next)
413f11e6659SDavid S. Miller 		match = find_match(rt, oif, strict, &mpri, match);
414f11e6659SDavid S. Miller 
415f11e6659SDavid S. Miller 	return match;
416f11e6659SDavid S. Miller }
417f11e6659SDavid S. Miller 
418f11e6659SDavid S. Miller static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
419f11e6659SDavid S. Miller {
420f11e6659SDavid S. Miller 	struct rt6_info *match, *rt0;
4218ed67789SDaniel Lezcano 	struct net *net;
422f11e6659SDavid S. Miller 
423f11e6659SDavid S. Miller 	RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
4240dc47877SHarvey Harrison 		  __func__, fn->leaf, oif);
425f11e6659SDavid S. Miller 
426f11e6659SDavid S. Miller 	rt0 = fn->rr_ptr;
427f11e6659SDavid S. Miller 	if (!rt0)
428f11e6659SDavid S. Miller 		fn->rr_ptr = rt0 = fn->leaf;
429f11e6659SDavid S. Miller 
430f11e6659SDavid S. Miller 	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
431f11e6659SDavid S. Miller 
432554cfb7eSYOSHIFUJI Hideaki 	if (!match &&
433f11e6659SDavid S. Miller 	    (strict & RT6_LOOKUP_F_REACHABLE)) {
434f11e6659SDavid S. Miller 		struct rt6_info *next = rt0->u.dst.rt6_next;
435f11e6659SDavid S. Miller 
436554cfb7eSYOSHIFUJI Hideaki 		/* no entries matched; do round-robin */
437f11e6659SDavid S. Miller 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
438f11e6659SDavid S. Miller 			next = fn->leaf;
439f11e6659SDavid S. Miller 
440f11e6659SDavid S. Miller 		if (next != rt0)
441f11e6659SDavid S. Miller 			fn->rr_ptr = next;
442554cfb7eSYOSHIFUJI Hideaki 	}
443554cfb7eSYOSHIFUJI Hideaki 
444f11e6659SDavid S. Miller 	RT6_TRACE("%s() => %p\n",
4450dc47877SHarvey Harrison 		  __func__, match);
446554cfb7eSYOSHIFUJI Hideaki 
447c346dca1SYOSHIFUJI Hideaki 	net = dev_net(rt0->rt6i_dev);
4488ed67789SDaniel Lezcano 	return (match ? match : net->ipv6.ip6_null_entry);
4491da177e4SLinus Torvalds }
4501da177e4SLinus Torvalds 
45170ceb4f5SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTE_INFO
45270ceb4f5SYOSHIFUJI Hideaki int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
45370ceb4f5SYOSHIFUJI Hideaki 		  struct in6_addr *gwaddr)
45470ceb4f5SYOSHIFUJI Hideaki {
455c346dca1SYOSHIFUJI Hideaki 	struct net *net = dev_net(dev);
45670ceb4f5SYOSHIFUJI Hideaki 	struct route_info *rinfo = (struct route_info *) opt;
45770ceb4f5SYOSHIFUJI Hideaki 	struct in6_addr prefix_buf, *prefix;
45870ceb4f5SYOSHIFUJI Hideaki 	unsigned int pref;
4594bed72e4SYOSHIFUJI Hideaki 	unsigned long lifetime;
46070ceb4f5SYOSHIFUJI Hideaki 	struct rt6_info *rt;
46170ceb4f5SYOSHIFUJI Hideaki 
46270ceb4f5SYOSHIFUJI Hideaki 	if (len < sizeof(struct route_info)) {
46370ceb4f5SYOSHIFUJI Hideaki 		return -EINVAL;
46470ceb4f5SYOSHIFUJI Hideaki 	}
46570ceb4f5SYOSHIFUJI Hideaki 
46670ceb4f5SYOSHIFUJI Hideaki 	/* Sanity check for prefix_len and length */
46770ceb4f5SYOSHIFUJI Hideaki 	if (rinfo->length > 3) {
46870ceb4f5SYOSHIFUJI Hideaki 		return -EINVAL;
46970ceb4f5SYOSHIFUJI Hideaki 	} else if (rinfo->prefix_len > 128) {
47070ceb4f5SYOSHIFUJI Hideaki 		return -EINVAL;
47170ceb4f5SYOSHIFUJI Hideaki 	} else if (rinfo->prefix_len > 64) {
47270ceb4f5SYOSHIFUJI Hideaki 		if (rinfo->length < 2) {
47370ceb4f5SYOSHIFUJI Hideaki 			return -EINVAL;
47470ceb4f5SYOSHIFUJI Hideaki 		}
47570ceb4f5SYOSHIFUJI Hideaki 	} else if (rinfo->prefix_len > 0) {
47670ceb4f5SYOSHIFUJI Hideaki 		if (rinfo->length < 1) {
47770ceb4f5SYOSHIFUJI Hideaki 			return -EINVAL;
47870ceb4f5SYOSHIFUJI Hideaki 		}
47970ceb4f5SYOSHIFUJI Hideaki 	}
48070ceb4f5SYOSHIFUJI Hideaki 
48170ceb4f5SYOSHIFUJI Hideaki 	pref = rinfo->route_pref;
48270ceb4f5SYOSHIFUJI Hideaki 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
48370ceb4f5SYOSHIFUJI Hideaki 		pref = ICMPV6_ROUTER_PREF_MEDIUM;
48470ceb4f5SYOSHIFUJI Hideaki 
4854bed72e4SYOSHIFUJI Hideaki 	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
48670ceb4f5SYOSHIFUJI Hideaki 
48770ceb4f5SYOSHIFUJI Hideaki 	if (rinfo->length == 3)
48870ceb4f5SYOSHIFUJI Hideaki 		prefix = (struct in6_addr *)rinfo->prefix;
48970ceb4f5SYOSHIFUJI Hideaki 	else {
49070ceb4f5SYOSHIFUJI Hideaki 		/* this function is safe */
49170ceb4f5SYOSHIFUJI Hideaki 		ipv6_addr_prefix(&prefix_buf,
49270ceb4f5SYOSHIFUJI Hideaki 				 (struct in6_addr *)rinfo->prefix,
49370ceb4f5SYOSHIFUJI Hideaki 				 rinfo->prefix_len);
49470ceb4f5SYOSHIFUJI Hideaki 		prefix = &prefix_buf;
49570ceb4f5SYOSHIFUJI Hideaki 	}
49670ceb4f5SYOSHIFUJI Hideaki 
497efa2cea0SDaniel Lezcano 	rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
498efa2cea0SDaniel Lezcano 				dev->ifindex);
49970ceb4f5SYOSHIFUJI Hideaki 
50070ceb4f5SYOSHIFUJI Hideaki 	if (rt && !lifetime) {
501e0a1ad73SThomas Graf 		ip6_del_rt(rt);
50270ceb4f5SYOSHIFUJI Hideaki 		rt = NULL;
50370ceb4f5SYOSHIFUJI Hideaki 	}
50470ceb4f5SYOSHIFUJI Hideaki 
50570ceb4f5SYOSHIFUJI Hideaki 	if (!rt && lifetime)
506efa2cea0SDaniel Lezcano 		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
50770ceb4f5SYOSHIFUJI Hideaki 					pref);
50870ceb4f5SYOSHIFUJI Hideaki 	else if (rt)
50970ceb4f5SYOSHIFUJI Hideaki 		rt->rt6i_flags = RTF_ROUTEINFO |
51070ceb4f5SYOSHIFUJI Hideaki 				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
51170ceb4f5SYOSHIFUJI Hideaki 
51270ceb4f5SYOSHIFUJI Hideaki 	if (rt) {
5134bed72e4SYOSHIFUJI Hideaki 		if (!addrconf_finite_timeout(lifetime)) {
51470ceb4f5SYOSHIFUJI Hideaki 			rt->rt6i_flags &= ~RTF_EXPIRES;
51570ceb4f5SYOSHIFUJI Hideaki 		} else {
51670ceb4f5SYOSHIFUJI Hideaki 			rt->rt6i_expires = jiffies + HZ * lifetime;
51770ceb4f5SYOSHIFUJI Hideaki 			rt->rt6i_flags |= RTF_EXPIRES;
51870ceb4f5SYOSHIFUJI Hideaki 		}
51970ceb4f5SYOSHIFUJI Hideaki 		dst_release(&rt->u.dst);
52070ceb4f5SYOSHIFUJI Hideaki 	}
52170ceb4f5SYOSHIFUJI Hideaki 	return 0;
52270ceb4f5SYOSHIFUJI Hideaki }
52370ceb4f5SYOSHIFUJI Hideaki #endif
52470ceb4f5SYOSHIFUJI Hideaki 
5258ed67789SDaniel Lezcano #define BACKTRACK(__net, saddr)			\
526982f56f3SYOSHIFUJI Hideaki do { \
5278ed67789SDaniel Lezcano 	if (rt == __net->ipv6.ip6_null_entry) {	\
528982f56f3SYOSHIFUJI Hideaki 		struct fib6_node *pn; \
529e0eda7bbSVille Nuorvala 		while (1) { \
530982f56f3SYOSHIFUJI Hideaki 			if (fn->fn_flags & RTN_TL_ROOT) \
531c71099acSThomas Graf 				goto out; \
532982f56f3SYOSHIFUJI Hideaki 			pn = fn->parent; \
533982f56f3SYOSHIFUJI Hideaki 			if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
5348bce65b9SKim Nordlund 				fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
535982f56f3SYOSHIFUJI Hideaki 			else \
536982f56f3SYOSHIFUJI Hideaki 				fn = pn; \
537c71099acSThomas Graf 			if (fn->fn_flags & RTN_RTINFO) \
538c71099acSThomas Graf 				goto restart; \
539c71099acSThomas Graf 		} \
540982f56f3SYOSHIFUJI Hideaki 	} \
541982f56f3SYOSHIFUJI Hideaki } while(0)
542c71099acSThomas Graf 
5438ed67789SDaniel Lezcano static struct rt6_info *ip6_pol_route_lookup(struct net *net,
5448ed67789SDaniel Lezcano 					     struct fib6_table *table,
545c71099acSThomas Graf 					     struct flowi *fl, int flags)
5461da177e4SLinus Torvalds {
5471da177e4SLinus Torvalds 	struct fib6_node *fn;
5481da177e4SLinus Torvalds 	struct rt6_info *rt;
5491da177e4SLinus Torvalds 
550c71099acSThomas Graf 	read_lock_bh(&table->tb6_lock);
551c71099acSThomas Graf 	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
552c71099acSThomas Graf restart:
553c71099acSThomas Graf 	rt = fn->leaf;
554*dd3abc4eSYOSHIFUJI Hideaki 	rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags);
5558ed67789SDaniel Lezcano 	BACKTRACK(net, &fl->fl6_src);
556c71099acSThomas Graf out:
55703f49f34SPavel Emelyanov 	dst_use(&rt->u.dst, jiffies);
558c71099acSThomas Graf 	read_unlock_bh(&table->tb6_lock);
5591da177e4SLinus Torvalds 	return rt;
560c71099acSThomas Graf 
561c71099acSThomas Graf }
562c71099acSThomas Graf 
5639acd9f3aSYOSHIFUJI Hideaki struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
5649acd9f3aSYOSHIFUJI Hideaki 			    const struct in6_addr *saddr, int oif, int strict)
565c71099acSThomas Graf {
566c71099acSThomas Graf 	struct flowi fl = {
567c71099acSThomas Graf 		.oif = oif,
568c71099acSThomas Graf 		.nl_u = {
569c71099acSThomas Graf 			.ip6_u = {
570c71099acSThomas Graf 				.daddr = *daddr,
571c71099acSThomas Graf 			},
572c71099acSThomas Graf 		},
573c71099acSThomas Graf 	};
574c71099acSThomas Graf 	struct dst_entry *dst;
57577d16f45SYOSHIFUJI Hideaki 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
576c71099acSThomas Graf 
577adaa70bbSThomas Graf 	if (saddr) {
578adaa70bbSThomas Graf 		memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
579adaa70bbSThomas Graf 		flags |= RT6_LOOKUP_F_HAS_SADDR;
580adaa70bbSThomas Graf 	}
581adaa70bbSThomas Graf 
582606a2b48SDaniel Lezcano 	dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
583c71099acSThomas Graf 	if (dst->error == 0)
584c71099acSThomas Graf 		return (struct rt6_info *) dst;
585c71099acSThomas Graf 
586c71099acSThomas Graf 	dst_release(dst);
587c71099acSThomas Graf 
5881da177e4SLinus Torvalds 	return NULL;
5891da177e4SLinus Torvalds }
5901da177e4SLinus Torvalds 
5917159039aSYOSHIFUJI Hideaki EXPORT_SYMBOL(rt6_lookup);
5927159039aSYOSHIFUJI Hideaki 
593c71099acSThomas Graf /* ip6_ins_rt is called with FREE table->tb6_lock.
5941da177e4SLinus Torvalds    It takes new route entry, the addition fails by any reason the
5951da177e4SLinus Torvalds    route is freed. In any case, if caller does not hold it, it may
5961da177e4SLinus Torvalds    be destroyed.
5971da177e4SLinus Torvalds  */
5981da177e4SLinus Torvalds 
59986872cb5SThomas Graf static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
6001da177e4SLinus Torvalds {
6011da177e4SLinus Torvalds 	int err;
602c71099acSThomas Graf 	struct fib6_table *table;
6031da177e4SLinus Torvalds 
604c71099acSThomas Graf 	table = rt->rt6i_table;
605c71099acSThomas Graf 	write_lock_bh(&table->tb6_lock);
60686872cb5SThomas Graf 	err = fib6_add(&table->tb6_root, rt, info);
607c71099acSThomas Graf 	write_unlock_bh(&table->tb6_lock);
6081da177e4SLinus Torvalds 
6091da177e4SLinus Torvalds 	return err;
6101da177e4SLinus Torvalds }
6111da177e4SLinus Torvalds 
61240e22e8fSThomas Graf int ip6_ins_rt(struct rt6_info *rt)
61340e22e8fSThomas Graf {
6144d1169c1SDenis V. Lunev 	struct nl_info info = {
615c346dca1SYOSHIFUJI Hideaki 		.nl_net = dev_net(rt->rt6i_dev),
6164d1169c1SDenis V. Lunev 	};
617528c4cebSDenis V. Lunev 	return __ip6_ins_rt(rt, &info);
61840e22e8fSThomas Graf }
61940e22e8fSThomas Graf 
62095a9a5baSYOSHIFUJI Hideaki static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
62195a9a5baSYOSHIFUJI Hideaki 				      struct in6_addr *saddr)
6221da177e4SLinus Torvalds {
6231da177e4SLinus Torvalds 	struct rt6_info *rt;
6241da177e4SLinus Torvalds 
6251da177e4SLinus Torvalds 	/*
6261da177e4SLinus Torvalds 	 *	Clone the route.
6271da177e4SLinus Torvalds 	 */
6281da177e4SLinus Torvalds 
6291da177e4SLinus Torvalds 	rt = ip6_rt_copy(ort);
6301da177e4SLinus Torvalds 
6311da177e4SLinus Torvalds 	if (rt) {
63258c4fb86SYOSHIFUJI Hideaki 		if (!(rt->rt6i_flags&RTF_GATEWAY)) {
63358c4fb86SYOSHIFUJI Hideaki 			if (rt->rt6i_dst.plen != 128 &&
63458c4fb86SYOSHIFUJI Hideaki 			    ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
63558c4fb86SYOSHIFUJI Hideaki 				rt->rt6i_flags |= RTF_ANYCAST;
6361da177e4SLinus Torvalds 			ipv6_addr_copy(&rt->rt6i_gateway, daddr);
63758c4fb86SYOSHIFUJI Hideaki 		}
6381da177e4SLinus Torvalds 
63958c4fb86SYOSHIFUJI Hideaki 		ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
6401da177e4SLinus Torvalds 		rt->rt6i_dst.plen = 128;
6411da177e4SLinus Torvalds 		rt->rt6i_flags |= RTF_CACHE;
6421da177e4SLinus Torvalds 		rt->u.dst.flags |= DST_HOST;
6431da177e4SLinus Torvalds 
6441da177e4SLinus Torvalds #ifdef CONFIG_IPV6_SUBTREES
6451da177e4SLinus Torvalds 		if (rt->rt6i_src.plen && saddr) {
6461da177e4SLinus Torvalds 			ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
6471da177e4SLinus Torvalds 			rt->rt6i_src.plen = 128;
6481da177e4SLinus Torvalds 		}
6491da177e4SLinus Torvalds #endif
6501da177e4SLinus Torvalds 
6511da177e4SLinus Torvalds 		rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
6521da177e4SLinus Torvalds 
65395a9a5baSYOSHIFUJI Hideaki 	}
6541da177e4SLinus Torvalds 
6551da177e4SLinus Torvalds 	return rt;
6561da177e4SLinus Torvalds }
65795a9a5baSYOSHIFUJI Hideaki 
658299d9939SYOSHIFUJI Hideaki static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
659299d9939SYOSHIFUJI Hideaki {
660299d9939SYOSHIFUJI Hideaki 	struct rt6_info *rt = ip6_rt_copy(ort);
661299d9939SYOSHIFUJI Hideaki 	if (rt) {
662299d9939SYOSHIFUJI Hideaki 		ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
663299d9939SYOSHIFUJI Hideaki 		rt->rt6i_dst.plen = 128;
664299d9939SYOSHIFUJI Hideaki 		rt->rt6i_flags |= RTF_CACHE;
665299d9939SYOSHIFUJI Hideaki 		rt->u.dst.flags |= DST_HOST;
666299d9939SYOSHIFUJI Hideaki 		rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
667299d9939SYOSHIFUJI Hideaki 	}
668299d9939SYOSHIFUJI Hideaki 	return rt;
669299d9939SYOSHIFUJI Hideaki }
670299d9939SYOSHIFUJI Hideaki 
6718ed67789SDaniel Lezcano static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
6728ce11e6aSAdrian Bunk 				      struct flowi *fl, int flags)
6731da177e4SLinus Torvalds {
6741da177e4SLinus Torvalds 	struct fib6_node *fn;
675519fbd87SYOSHIFUJI Hideaki 	struct rt6_info *rt, *nrt;
676c71099acSThomas Graf 	int strict = 0;
6771da177e4SLinus Torvalds 	int attempts = 3;
678519fbd87SYOSHIFUJI Hideaki 	int err;
679ea659e07SYOSHIFUJI Hideaki 	int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
6801da177e4SLinus Torvalds 
68177d16f45SYOSHIFUJI Hideaki 	strict |= flags & RT6_LOOKUP_F_IFACE;
6821da177e4SLinus Torvalds 
6831da177e4SLinus Torvalds relookup:
684c71099acSThomas Graf 	read_lock_bh(&table->tb6_lock);
6851da177e4SLinus Torvalds 
6868238dd06SYOSHIFUJI Hideaki restart_2:
687c71099acSThomas Graf 	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
6881da177e4SLinus Torvalds 
6891da177e4SLinus Torvalds restart:
6904acad72dSPavel Emelyanov 	rt = rt6_select(fn, oif, strict | reachable);
6918ed67789SDaniel Lezcano 
6928ed67789SDaniel Lezcano 	BACKTRACK(net, &fl->fl6_src);
6938ed67789SDaniel Lezcano 	if (rt == net->ipv6.ip6_null_entry ||
6948238dd06SYOSHIFUJI Hideaki 	    rt->rt6i_flags & RTF_CACHE)
6951da177e4SLinus Torvalds 		goto out;
6961da177e4SLinus Torvalds 
6971da177e4SLinus Torvalds 	dst_hold(&rt->u.dst);
698c71099acSThomas Graf 	read_unlock_bh(&table->tb6_lock);
6991da177e4SLinus Torvalds 
700519fbd87SYOSHIFUJI Hideaki 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
701e40cf353SYOSHIFUJI Hideaki 		nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
702519fbd87SYOSHIFUJI Hideaki 	else {
703519fbd87SYOSHIFUJI Hideaki #if CLONE_OFFLINK_ROUTE
704519fbd87SYOSHIFUJI Hideaki 		nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
705519fbd87SYOSHIFUJI Hideaki #else
706519fbd87SYOSHIFUJI Hideaki 		goto out2;
707519fbd87SYOSHIFUJI Hideaki #endif
708519fbd87SYOSHIFUJI Hideaki 	}
7091da177e4SLinus Torvalds 
7101da177e4SLinus Torvalds 	dst_release(&rt->u.dst);
7118ed67789SDaniel Lezcano 	rt = nrt ? : net->ipv6.ip6_null_entry;
7121da177e4SLinus Torvalds 
713e40cf353SYOSHIFUJI Hideaki 	dst_hold(&rt->u.dst);
714e40cf353SYOSHIFUJI Hideaki 	if (nrt) {
71540e22e8fSThomas Graf 		err = ip6_ins_rt(nrt);
716e40cf353SYOSHIFUJI Hideaki 		if (!err)
717e40cf353SYOSHIFUJI Hideaki 			goto out2;
718e40cf353SYOSHIFUJI Hideaki 	}
719e40cf353SYOSHIFUJI Hideaki 
720e40cf353SYOSHIFUJI Hideaki 	if (--attempts <= 0)
7211da177e4SLinus Torvalds 		goto out2;
7221da177e4SLinus Torvalds 
723519fbd87SYOSHIFUJI Hideaki 	/*
724c71099acSThomas Graf 	 * Race condition! In the gap, when table->tb6_lock was
725519fbd87SYOSHIFUJI Hideaki 	 * released someone could insert this route.  Relookup.
7261da177e4SLinus Torvalds 	 */
7271da177e4SLinus Torvalds 	dst_release(&rt->u.dst);
7281da177e4SLinus Torvalds 	goto relookup;
729e40cf353SYOSHIFUJI Hideaki 
730519fbd87SYOSHIFUJI Hideaki out:
7318238dd06SYOSHIFUJI Hideaki 	if (reachable) {
7328238dd06SYOSHIFUJI Hideaki 		reachable = 0;
7338238dd06SYOSHIFUJI Hideaki 		goto restart_2;
7348238dd06SYOSHIFUJI Hideaki 	}
735519fbd87SYOSHIFUJI Hideaki 	dst_hold(&rt->u.dst);
736c71099acSThomas Graf 	read_unlock_bh(&table->tb6_lock);
7371da177e4SLinus Torvalds out2:
7381da177e4SLinus Torvalds 	rt->u.dst.lastuse = jiffies;
7391da177e4SLinus Torvalds 	rt->u.dst.__use++;
740c71099acSThomas Graf 
741c71099acSThomas Graf 	return rt;
742c71099acSThomas Graf }
743c71099acSThomas Graf 
7448ed67789SDaniel Lezcano static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
7454acad72dSPavel Emelyanov 					    struct flowi *fl, int flags)
7464acad72dSPavel Emelyanov {
7478ed67789SDaniel Lezcano 	return ip6_pol_route(net, table, fl->iif, fl, flags);
7484acad72dSPavel Emelyanov }
7494acad72dSPavel Emelyanov 
750c71099acSThomas Graf void ip6_route_input(struct sk_buff *skb)
751c71099acSThomas Graf {
7520660e03fSArnaldo Carvalho de Melo 	struct ipv6hdr *iph = ipv6_hdr(skb);
753c346dca1SYOSHIFUJI Hideaki 	struct net *net = dev_net(skb->dev);
754adaa70bbSThomas Graf 	int flags = RT6_LOOKUP_F_HAS_SADDR;
755c71099acSThomas Graf 	struct flowi fl = {
756c71099acSThomas Graf 		.iif = skb->dev->ifindex,
757c71099acSThomas Graf 		.nl_u = {
758c71099acSThomas Graf 			.ip6_u = {
759c71099acSThomas Graf 				.daddr = iph->daddr,
760c71099acSThomas Graf 				.saddr = iph->saddr,
76190bcaf7bSAl Viro 				.flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
762c71099acSThomas Graf 			},
763c71099acSThomas Graf 		},
76447dcf0cbSThomas Graf 		.mark = skb->mark,
765c71099acSThomas Graf 		.proto = iph->nexthdr,
766c71099acSThomas Graf 	};
767adaa70bbSThomas Graf 
768adaa70bbSThomas Graf 	if (rt6_need_strict(&iph->daddr))
769adaa70bbSThomas Graf 		flags |= RT6_LOOKUP_F_IFACE;
770c71099acSThomas Graf 
7715578689aSDaniel Lezcano 	skb->dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input);
772c71099acSThomas Graf }
773c71099acSThomas Graf 
7748ed67789SDaniel Lezcano static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
775c71099acSThomas Graf 					     struct flowi *fl, int flags)
776c71099acSThomas Graf {
7778ed67789SDaniel Lezcano 	return ip6_pol_route(net, table, fl->oif, fl, flags);
778c71099acSThomas Graf }
779c71099acSThomas Graf 
7804591db4fSDaniel Lezcano struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
7814591db4fSDaniel Lezcano 				    struct flowi *fl)
782c71099acSThomas Graf {
783c71099acSThomas Graf 	int flags = 0;
784c71099acSThomas Graf 
785c71099acSThomas Graf 	if (rt6_need_strict(&fl->fl6_dst))
78677d16f45SYOSHIFUJI Hideaki 		flags |= RT6_LOOKUP_F_IFACE;
787c71099acSThomas Graf 
788adaa70bbSThomas Graf 	if (!ipv6_addr_any(&fl->fl6_src))
789adaa70bbSThomas Graf 		flags |= RT6_LOOKUP_F_HAS_SADDR;
7907cbca67cSYOSHIFUJI Hideaki 	else if (sk) {
7917cbca67cSYOSHIFUJI Hideaki 		unsigned int prefs = inet6_sk(sk)->srcprefs;
7927cbca67cSYOSHIFUJI Hideaki 		if (prefs & IPV6_PREFER_SRC_TMP)
7937cbca67cSYOSHIFUJI Hideaki 			flags |= RT6_LOOKUP_F_SRCPREF_TMP;
7947cbca67cSYOSHIFUJI Hideaki 		if (prefs & IPV6_PREFER_SRC_PUBLIC)
7957cbca67cSYOSHIFUJI Hideaki 			flags |= RT6_LOOKUP_F_SRCPREF_PUBLIC;
7967cbca67cSYOSHIFUJI Hideaki 		if (prefs & IPV6_PREFER_SRC_COA)
7977cbca67cSYOSHIFUJI Hideaki 			flags |= RT6_LOOKUP_F_SRCPREF_COA;
7987cbca67cSYOSHIFUJI Hideaki 	}
799adaa70bbSThomas Graf 
8004591db4fSDaniel Lezcano 	return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output);
8011da177e4SLinus Torvalds }
8021da177e4SLinus Torvalds 
8037159039aSYOSHIFUJI Hideaki EXPORT_SYMBOL(ip6_route_output);
8041da177e4SLinus Torvalds 
80514e50e57SDavid S. Miller int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
80614e50e57SDavid S. Miller {
80714e50e57SDavid S. Miller 	struct rt6_info *ort = (struct rt6_info *) *dstp;
80814e50e57SDavid S. Miller 	struct rt6_info *rt = (struct rt6_info *)
80914e50e57SDavid S. Miller 		dst_alloc(&ip6_dst_blackhole_ops);
81014e50e57SDavid S. Miller 	struct dst_entry *new = NULL;
81114e50e57SDavid S. Miller 
81214e50e57SDavid S. Miller 	if (rt) {
81314e50e57SDavid S. Miller 		new = &rt->u.dst;
81414e50e57SDavid S. Miller 
81514e50e57SDavid S. Miller 		atomic_set(&new->__refcnt, 1);
81614e50e57SDavid S. Miller 		new->__use = 1;
817352e512cSHerbert Xu 		new->input = dst_discard;
818352e512cSHerbert Xu 		new->output = dst_discard;
81914e50e57SDavid S. Miller 
82014e50e57SDavid S. Miller 		memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
82114e50e57SDavid S. Miller 		new->dev = ort->u.dst.dev;
82214e50e57SDavid S. Miller 		if (new->dev)
82314e50e57SDavid S. Miller 			dev_hold(new->dev);
82414e50e57SDavid S. Miller 		rt->rt6i_idev = ort->rt6i_idev;
82514e50e57SDavid S. Miller 		if (rt->rt6i_idev)
82614e50e57SDavid S. Miller 			in6_dev_hold(rt->rt6i_idev);
82714e50e57SDavid S. Miller 		rt->rt6i_expires = 0;
82814e50e57SDavid S. Miller 
82914e50e57SDavid S. Miller 		ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
83014e50e57SDavid S. Miller 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
83114e50e57SDavid S. Miller 		rt->rt6i_metric = 0;
83214e50e57SDavid S. Miller 
83314e50e57SDavid S. Miller 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
83414e50e57SDavid S. Miller #ifdef CONFIG_IPV6_SUBTREES
83514e50e57SDavid S. Miller 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
83614e50e57SDavid S. Miller #endif
83714e50e57SDavid S. Miller 
83814e50e57SDavid S. Miller 		dst_free(new);
83914e50e57SDavid S. Miller 	}
84014e50e57SDavid S. Miller 
84114e50e57SDavid S. Miller 	dst_release(*dstp);
84214e50e57SDavid S. Miller 	*dstp = new;
84314e50e57SDavid S. Miller 	return (new ? 0 : -ENOMEM);
84414e50e57SDavid S. Miller }
84514e50e57SDavid S. Miller EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
84614e50e57SDavid S. Miller 
8471da177e4SLinus Torvalds /*
8481da177e4SLinus Torvalds  *	Destination cache support functions
8491da177e4SLinus Torvalds  */
8501da177e4SLinus Torvalds 
8511da177e4SLinus Torvalds static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
8521da177e4SLinus Torvalds {
8531da177e4SLinus Torvalds 	struct rt6_info *rt;
8541da177e4SLinus Torvalds 
8551da177e4SLinus Torvalds 	rt = (struct rt6_info *) dst;
8561da177e4SLinus Torvalds 
8571da177e4SLinus Torvalds 	if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
8581da177e4SLinus Torvalds 		return dst;
8591da177e4SLinus Torvalds 
8601da177e4SLinus Torvalds 	return NULL;
8611da177e4SLinus Torvalds }
8621da177e4SLinus Torvalds 
8631da177e4SLinus Torvalds static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
8641da177e4SLinus Torvalds {
8651da177e4SLinus Torvalds 	struct rt6_info *rt = (struct rt6_info *) dst;
8661da177e4SLinus Torvalds 
8671da177e4SLinus Torvalds 	if (rt) {
8681da177e4SLinus Torvalds 		if (rt->rt6i_flags & RTF_CACHE)
869e0a1ad73SThomas Graf 			ip6_del_rt(rt);
8701da177e4SLinus Torvalds 		else
8711da177e4SLinus Torvalds 			dst_release(dst);
8721da177e4SLinus Torvalds 	}
8731da177e4SLinus Torvalds 	return NULL;
8741da177e4SLinus Torvalds }
8751da177e4SLinus Torvalds 
8761da177e4SLinus Torvalds static void ip6_link_failure(struct sk_buff *skb)
8771da177e4SLinus Torvalds {
8781da177e4SLinus Torvalds 	struct rt6_info *rt;
8791da177e4SLinus Torvalds 
8801da177e4SLinus Torvalds 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
8811da177e4SLinus Torvalds 
8821da177e4SLinus Torvalds 	rt = (struct rt6_info *) skb->dst;
8831da177e4SLinus Torvalds 	if (rt) {
8841da177e4SLinus Torvalds 		if (rt->rt6i_flags&RTF_CACHE) {
8851da177e4SLinus Torvalds 			dst_set_expires(&rt->u.dst, 0);
8861da177e4SLinus Torvalds 			rt->rt6i_flags |= RTF_EXPIRES;
8871da177e4SLinus Torvalds 		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
8881da177e4SLinus Torvalds 			rt->rt6i_node->fn_sernum = -1;
8891da177e4SLinus Torvalds 	}
8901da177e4SLinus Torvalds }
8911da177e4SLinus Torvalds 
8921da177e4SLinus Torvalds static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
8931da177e4SLinus Torvalds {
8941da177e4SLinus Torvalds 	struct rt6_info *rt6 = (struct rt6_info*)dst;
8951da177e4SLinus Torvalds 
8961da177e4SLinus Torvalds 	if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
8971da177e4SLinus Torvalds 		rt6->rt6i_flags |= RTF_MODIFIED;
8981da177e4SLinus Torvalds 		if (mtu < IPV6_MIN_MTU) {
8991da177e4SLinus Torvalds 			mtu = IPV6_MIN_MTU;
9001da177e4SLinus Torvalds 			dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
9011da177e4SLinus Torvalds 		}
9021da177e4SLinus Torvalds 		dst->metrics[RTAX_MTU-1] = mtu;
9038d71740cSTom Tucker 		call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
9041da177e4SLinus Torvalds 	}
9051da177e4SLinus Torvalds }
9061da177e4SLinus Torvalds 
9071da177e4SLinus Torvalds static int ipv6_get_mtu(struct net_device *dev);
9081da177e4SLinus Torvalds 
9095578689aSDaniel Lezcano static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu)
9101da177e4SLinus Torvalds {
9111da177e4SLinus Torvalds 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
9121da177e4SLinus Torvalds 
9135578689aSDaniel Lezcano 	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
9145578689aSDaniel Lezcano 		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
9151da177e4SLinus Torvalds 
9161da177e4SLinus Torvalds 	/*
9171da177e4SLinus Torvalds 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
9181da177e4SLinus Torvalds 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
9191da177e4SLinus Torvalds 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
9201da177e4SLinus Torvalds 	 * rely only on pmtu discovery"
9211da177e4SLinus Torvalds 	 */
9221da177e4SLinus Torvalds 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
9231da177e4SLinus Torvalds 		mtu = IPV6_MAXPLEN;
9241da177e4SLinus Torvalds 	return mtu;
9251da177e4SLinus Torvalds }
9261da177e4SLinus Torvalds 
9273b00944cSYOSHIFUJI Hideaki static struct dst_entry *icmp6_dst_gc_list;
9283b00944cSYOSHIFUJI Hideaki static DEFINE_SPINLOCK(icmp6_dst_lock);
9295d0bbeebSThomas Graf 
9303b00944cSYOSHIFUJI Hideaki struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
9311da177e4SLinus Torvalds 				  struct neighbour *neigh,
9329acd9f3aSYOSHIFUJI Hideaki 				  const struct in6_addr *addr)
9331da177e4SLinus Torvalds {
9341da177e4SLinus Torvalds 	struct rt6_info *rt;
9351da177e4SLinus Torvalds 	struct inet6_dev *idev = in6_dev_get(dev);
936c346dca1SYOSHIFUJI Hideaki 	struct net *net = dev_net(dev);
9371da177e4SLinus Torvalds 
9381da177e4SLinus Torvalds 	if (unlikely(idev == NULL))
9391da177e4SLinus Torvalds 		return NULL;
9401da177e4SLinus Torvalds 
941f2fc6a54SBenjamin Thery 	rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops);
9421da177e4SLinus Torvalds 	if (unlikely(rt == NULL)) {
9431da177e4SLinus Torvalds 		in6_dev_put(idev);
9441da177e4SLinus Torvalds 		goto out;
9451da177e4SLinus Torvalds 	}
9461da177e4SLinus Torvalds 
9471da177e4SLinus Torvalds 	dev_hold(dev);
9481da177e4SLinus Torvalds 	if (neigh)
9491da177e4SLinus Torvalds 		neigh_hold(neigh);
9501da177e4SLinus Torvalds 	else
9511da177e4SLinus Torvalds 		neigh = ndisc_get_neigh(dev, addr);
9521da177e4SLinus Torvalds 
9531da177e4SLinus Torvalds 	rt->rt6i_dev	  = dev;
9541da177e4SLinus Torvalds 	rt->rt6i_idev     = idev;
9551da177e4SLinus Torvalds 	rt->rt6i_nexthop  = neigh;
9561da177e4SLinus Torvalds 	atomic_set(&rt->u.dst.__refcnt, 1);
9571da177e4SLinus Torvalds 	rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
9581da177e4SLinus Torvalds 	rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
9595578689aSDaniel Lezcano 	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
9603b00944cSYOSHIFUJI Hideaki 	rt->u.dst.output  = ip6_output;
9611da177e4SLinus Torvalds 
9621da177e4SLinus Torvalds #if 0	/* there's no chance to use these for ndisc */
9631da177e4SLinus Torvalds 	rt->u.dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
9641da177e4SLinus Torvalds 				? DST_HOST
9651da177e4SLinus Torvalds 				: 0;
9661da177e4SLinus Torvalds 	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
9671da177e4SLinus Torvalds 	rt->rt6i_dst.plen = 128;
9681da177e4SLinus Torvalds #endif
9691da177e4SLinus Torvalds 
9703b00944cSYOSHIFUJI Hideaki 	spin_lock_bh(&icmp6_dst_lock);
9713b00944cSYOSHIFUJI Hideaki 	rt->u.dst.next = icmp6_dst_gc_list;
9723b00944cSYOSHIFUJI Hideaki 	icmp6_dst_gc_list = &rt->u.dst;
9733b00944cSYOSHIFUJI Hideaki 	spin_unlock_bh(&icmp6_dst_lock);
9741da177e4SLinus Torvalds 
9755578689aSDaniel Lezcano 	fib6_force_start_gc(net);
9761da177e4SLinus Torvalds 
9771da177e4SLinus Torvalds out:
97840aa7b90SYOSHIFUJI Hideaki 	return &rt->u.dst;
9791da177e4SLinus Torvalds }
9801da177e4SLinus Torvalds 
9813b00944cSYOSHIFUJI Hideaki int icmp6_dst_gc(int *more)
9821da177e4SLinus Torvalds {
9831da177e4SLinus Torvalds 	struct dst_entry *dst, *next, **pprev;
9841da177e4SLinus Torvalds 	int freed;
9851da177e4SLinus Torvalds 
9861da177e4SLinus Torvalds 	next = NULL;
9871da177e4SLinus Torvalds 	freed = 0;
9885d0bbeebSThomas Graf 
9893b00944cSYOSHIFUJI Hideaki 	spin_lock_bh(&icmp6_dst_lock);
9903b00944cSYOSHIFUJI Hideaki 	pprev = &icmp6_dst_gc_list;
9915d0bbeebSThomas Graf 
9921da177e4SLinus Torvalds 	while ((dst = *pprev) != NULL) {
9931da177e4SLinus Torvalds 		if (!atomic_read(&dst->__refcnt)) {
9941da177e4SLinus Torvalds 			*pprev = dst->next;
9951da177e4SLinus Torvalds 			dst_free(dst);
9961da177e4SLinus Torvalds 			freed++;
9971da177e4SLinus Torvalds 		} else {
9981da177e4SLinus Torvalds 			pprev = &dst->next;
9991da177e4SLinus Torvalds 			(*more)++;
10001da177e4SLinus Torvalds 		}
10011da177e4SLinus Torvalds 	}
10021da177e4SLinus Torvalds 
10033b00944cSYOSHIFUJI Hideaki 	spin_unlock_bh(&icmp6_dst_lock);
10045d0bbeebSThomas Graf 
10051da177e4SLinus Torvalds 	return freed;
10061da177e4SLinus Torvalds }
10071da177e4SLinus Torvalds 
1008569d3645SDaniel Lezcano static int ip6_dst_gc(struct dst_ops *ops)
10091da177e4SLinus Torvalds {
10101da177e4SLinus Torvalds 	unsigned long now = jiffies;
10117019b78eSDaniel Lezcano 	struct net *net = ops->dst_net;
10127019b78eSDaniel Lezcano 	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
10137019b78eSDaniel Lezcano 	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
10147019b78eSDaniel Lezcano 	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
10157019b78eSDaniel Lezcano 	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
10167019b78eSDaniel Lezcano 	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
10171da177e4SLinus Torvalds 
10187019b78eSDaniel Lezcano 	if (time_after(rt_last_gc + rt_min_interval, now) &&
10197019b78eSDaniel Lezcano 	    atomic_read(&ops->entries) <= rt_max_size)
10201da177e4SLinus Torvalds 		goto out;
10211da177e4SLinus Torvalds 
10226891a346SBenjamin Thery 	net->ipv6.ip6_rt_gc_expire++;
10236891a346SBenjamin Thery 	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
10246891a346SBenjamin Thery 	net->ipv6.ip6_rt_last_gc = now;
10257019b78eSDaniel Lezcano 	if (atomic_read(&ops->entries) < ops->gc_thresh)
10267019b78eSDaniel Lezcano 		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
10271da177e4SLinus Torvalds out:
10287019b78eSDaniel Lezcano 	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
10297019b78eSDaniel Lezcano 	return (atomic_read(&ops->entries) > rt_max_size);
10301da177e4SLinus Torvalds }
10311da177e4SLinus Torvalds 
10321da177e4SLinus Torvalds /* Clean host part of a prefix. Not necessary in radix tree,
10331da177e4SLinus Torvalds    but results in cleaner routing tables.
10341da177e4SLinus Torvalds 
10351da177e4SLinus Torvalds    Remove it only when all the things will work!
10361da177e4SLinus Torvalds  */
10371da177e4SLinus Torvalds 
10381da177e4SLinus Torvalds static int ipv6_get_mtu(struct net_device *dev)
10391da177e4SLinus Torvalds {
10401da177e4SLinus Torvalds 	int mtu = IPV6_MIN_MTU;
10411da177e4SLinus Torvalds 	struct inet6_dev *idev;
10421da177e4SLinus Torvalds 
10431da177e4SLinus Torvalds 	idev = in6_dev_get(dev);
10441da177e4SLinus Torvalds 	if (idev) {
10451da177e4SLinus Torvalds 		mtu = idev->cnf.mtu6;
10461da177e4SLinus Torvalds 		in6_dev_put(idev);
10471da177e4SLinus Torvalds 	}
10481da177e4SLinus Torvalds 	return mtu;
10491da177e4SLinus Torvalds }
10501da177e4SLinus Torvalds 
10516b75d090SYOSHIFUJI Hideaki int ip6_dst_hoplimit(struct dst_entry *dst)
10521da177e4SLinus Torvalds {
10536b75d090SYOSHIFUJI Hideaki 	int hoplimit = dst_metric(dst, RTAX_HOPLIMIT);
10546b75d090SYOSHIFUJI Hideaki 	if (hoplimit < 0) {
10556b75d090SYOSHIFUJI Hideaki 		struct net_device *dev = dst->dev;
10566b75d090SYOSHIFUJI Hideaki 		struct inet6_dev *idev = in6_dev_get(dev);
10571da177e4SLinus Torvalds 		if (idev) {
10581da177e4SLinus Torvalds 			hoplimit = idev->cnf.hop_limit;
10591da177e4SLinus Torvalds 			in6_dev_put(idev);
10606b75d090SYOSHIFUJI Hideaki 		} else
10616b75d090SYOSHIFUJI Hideaki 			hoplimit = ipv6_devconf.hop_limit;
10621da177e4SLinus Torvalds 	}
10631da177e4SLinus Torvalds 	return hoplimit;
10641da177e4SLinus Torvalds }
10651da177e4SLinus Torvalds 
10661da177e4SLinus Torvalds /*
10671da177e4SLinus Torvalds  *
10681da177e4SLinus Torvalds  */
10691da177e4SLinus Torvalds 
107086872cb5SThomas Graf int ip6_route_add(struct fib6_config *cfg)
10711da177e4SLinus Torvalds {
10721da177e4SLinus Torvalds 	int err;
10735578689aSDaniel Lezcano 	struct net *net = cfg->fc_nlinfo.nl_net;
10741da177e4SLinus Torvalds 	struct rt6_info *rt = NULL;
10751da177e4SLinus Torvalds 	struct net_device *dev = NULL;
10761da177e4SLinus Torvalds 	struct inet6_dev *idev = NULL;
1077c71099acSThomas Graf 	struct fib6_table *table;
10781da177e4SLinus Torvalds 	int addr_type;
10791da177e4SLinus Torvalds 
108086872cb5SThomas Graf 	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
10811da177e4SLinus Torvalds 		return -EINVAL;
10821da177e4SLinus Torvalds #ifndef CONFIG_IPV6_SUBTREES
108386872cb5SThomas Graf 	if (cfg->fc_src_len)
10841da177e4SLinus Torvalds 		return -EINVAL;
10851da177e4SLinus Torvalds #endif
108686872cb5SThomas Graf 	if (cfg->fc_ifindex) {
10871da177e4SLinus Torvalds 		err = -ENODEV;
10885578689aSDaniel Lezcano 		dev = dev_get_by_index(net, cfg->fc_ifindex);
10891da177e4SLinus Torvalds 		if (!dev)
10901da177e4SLinus Torvalds 			goto out;
10911da177e4SLinus Torvalds 		idev = in6_dev_get(dev);
10921da177e4SLinus Torvalds 		if (!idev)
10931da177e4SLinus Torvalds 			goto out;
10941da177e4SLinus Torvalds 	}
10951da177e4SLinus Torvalds 
109686872cb5SThomas Graf 	if (cfg->fc_metric == 0)
109786872cb5SThomas Graf 		cfg->fc_metric = IP6_RT_PRIO_USER;
10981da177e4SLinus Torvalds 
10995578689aSDaniel Lezcano 	table = fib6_new_table(net, cfg->fc_table);
1100c71099acSThomas Graf 	if (table == NULL) {
1101c71099acSThomas Graf 		err = -ENOBUFS;
1102c71099acSThomas Graf 		goto out;
1103c71099acSThomas Graf 	}
1104c71099acSThomas Graf 
1105f2fc6a54SBenjamin Thery 	rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops);
11061da177e4SLinus Torvalds 
11071da177e4SLinus Torvalds 	if (rt == NULL) {
11081da177e4SLinus Torvalds 		err = -ENOMEM;
11091da177e4SLinus Torvalds 		goto out;
11101da177e4SLinus Torvalds 	}
11111da177e4SLinus Torvalds 
11121da177e4SLinus Torvalds 	rt->u.dst.obsolete = -1;
11136f704992SYOSHIFUJI Hideaki 	rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
11146f704992SYOSHIFUJI Hideaki 				jiffies + clock_t_to_jiffies(cfg->fc_expires) :
11156f704992SYOSHIFUJI Hideaki 				0;
11161da177e4SLinus Torvalds 
111786872cb5SThomas Graf 	if (cfg->fc_protocol == RTPROT_UNSPEC)
111886872cb5SThomas Graf 		cfg->fc_protocol = RTPROT_BOOT;
111986872cb5SThomas Graf 	rt->rt6i_protocol = cfg->fc_protocol;
112086872cb5SThomas Graf 
112186872cb5SThomas Graf 	addr_type = ipv6_addr_type(&cfg->fc_dst);
11221da177e4SLinus Torvalds 
11231da177e4SLinus Torvalds 	if (addr_type & IPV6_ADDR_MULTICAST)
11241da177e4SLinus Torvalds 		rt->u.dst.input = ip6_mc_input;
11251da177e4SLinus Torvalds 	else
11261da177e4SLinus Torvalds 		rt->u.dst.input = ip6_forward;
11271da177e4SLinus Torvalds 
11281da177e4SLinus Torvalds 	rt->u.dst.output = ip6_output;
11291da177e4SLinus Torvalds 
113086872cb5SThomas Graf 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
113186872cb5SThomas Graf 	rt->rt6i_dst.plen = cfg->fc_dst_len;
11321da177e4SLinus Torvalds 	if (rt->rt6i_dst.plen == 128)
11331da177e4SLinus Torvalds 	       rt->u.dst.flags = DST_HOST;
11341da177e4SLinus Torvalds 
11351da177e4SLinus Torvalds #ifdef CONFIG_IPV6_SUBTREES
113686872cb5SThomas Graf 	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
113786872cb5SThomas Graf 	rt->rt6i_src.plen = cfg->fc_src_len;
11381da177e4SLinus Torvalds #endif
11391da177e4SLinus Torvalds 
114086872cb5SThomas Graf 	rt->rt6i_metric = cfg->fc_metric;
11411da177e4SLinus Torvalds 
11421da177e4SLinus Torvalds 	/* We cannot add true routes via loopback here,
11431da177e4SLinus Torvalds 	   they would result in kernel looping; promote them to reject routes
11441da177e4SLinus Torvalds 	 */
114586872cb5SThomas Graf 	if ((cfg->fc_flags & RTF_REJECT) ||
11461da177e4SLinus Torvalds 	    (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
11471da177e4SLinus Torvalds 		/* hold loopback dev/idev if we haven't done so. */
11485578689aSDaniel Lezcano 		if (dev != net->loopback_dev) {
11491da177e4SLinus Torvalds 			if (dev) {
11501da177e4SLinus Torvalds 				dev_put(dev);
11511da177e4SLinus Torvalds 				in6_dev_put(idev);
11521da177e4SLinus Torvalds 			}
11535578689aSDaniel Lezcano 			dev = net->loopback_dev;
11541da177e4SLinus Torvalds 			dev_hold(dev);
11551da177e4SLinus Torvalds 			idev = in6_dev_get(dev);
11561da177e4SLinus Torvalds 			if (!idev) {
11571da177e4SLinus Torvalds 				err = -ENODEV;
11581da177e4SLinus Torvalds 				goto out;
11591da177e4SLinus Torvalds 			}
11601da177e4SLinus Torvalds 		}
11611da177e4SLinus Torvalds 		rt->u.dst.output = ip6_pkt_discard_out;
11621da177e4SLinus Torvalds 		rt->u.dst.input = ip6_pkt_discard;
11631da177e4SLinus Torvalds 		rt->u.dst.error = -ENETUNREACH;
11641da177e4SLinus Torvalds 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
11651da177e4SLinus Torvalds 		goto install_route;
11661da177e4SLinus Torvalds 	}
11671da177e4SLinus Torvalds 
116886872cb5SThomas Graf 	if (cfg->fc_flags & RTF_GATEWAY) {
11691da177e4SLinus Torvalds 		struct in6_addr *gw_addr;
11701da177e4SLinus Torvalds 		int gwa_type;
11711da177e4SLinus Torvalds 
117286872cb5SThomas Graf 		gw_addr = &cfg->fc_gateway;
117386872cb5SThomas Graf 		ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
11741da177e4SLinus Torvalds 		gwa_type = ipv6_addr_type(gw_addr);
11751da177e4SLinus Torvalds 
11761da177e4SLinus Torvalds 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
11771da177e4SLinus Torvalds 			struct rt6_info *grt;
11781da177e4SLinus Torvalds 
11791da177e4SLinus Torvalds 			/* IPv6 strictly inhibits using not link-local
11801da177e4SLinus Torvalds 			   addresses as nexthop address.
11811da177e4SLinus Torvalds 			   Otherwise, router will not able to send redirects.
11821da177e4SLinus Torvalds 			   It is very good, but in some (rare!) circumstances
11831da177e4SLinus Torvalds 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
11841da177e4SLinus Torvalds 			   some exceptions. --ANK
11851da177e4SLinus Torvalds 			 */
11861da177e4SLinus Torvalds 			err = -EINVAL;
11871da177e4SLinus Torvalds 			if (!(gwa_type&IPV6_ADDR_UNICAST))
11881da177e4SLinus Torvalds 				goto out;
11891da177e4SLinus Torvalds 
11905578689aSDaniel Lezcano 			grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
11911da177e4SLinus Torvalds 
11921da177e4SLinus Torvalds 			err = -EHOSTUNREACH;
11931da177e4SLinus Torvalds 			if (grt == NULL)
11941da177e4SLinus Torvalds 				goto out;
11951da177e4SLinus Torvalds 			if (dev) {
11961da177e4SLinus Torvalds 				if (dev != grt->rt6i_dev) {
11971da177e4SLinus Torvalds 					dst_release(&grt->u.dst);
11981da177e4SLinus Torvalds 					goto out;
11991da177e4SLinus Torvalds 				}
12001da177e4SLinus Torvalds 			} else {
12011da177e4SLinus Torvalds 				dev = grt->rt6i_dev;
12021da177e4SLinus Torvalds 				idev = grt->rt6i_idev;
12031da177e4SLinus Torvalds 				dev_hold(dev);
12041da177e4SLinus Torvalds 				in6_dev_hold(grt->rt6i_idev);
12051da177e4SLinus Torvalds 			}
12061da177e4SLinus Torvalds 			if (!(grt->rt6i_flags&RTF_GATEWAY))
12071da177e4SLinus Torvalds 				err = 0;
12081da177e4SLinus Torvalds 			dst_release(&grt->u.dst);
12091da177e4SLinus Torvalds 
12101da177e4SLinus Torvalds 			if (err)
12111da177e4SLinus Torvalds 				goto out;
12121da177e4SLinus Torvalds 		}
12131da177e4SLinus Torvalds 		err = -EINVAL;
12141da177e4SLinus Torvalds 		if (dev == NULL || (dev->flags&IFF_LOOPBACK))
12151da177e4SLinus Torvalds 			goto out;
12161da177e4SLinus Torvalds 	}
12171da177e4SLinus Torvalds 
12181da177e4SLinus Torvalds 	err = -ENODEV;
12191da177e4SLinus Torvalds 	if (dev == NULL)
12201da177e4SLinus Torvalds 		goto out;
12211da177e4SLinus Torvalds 
122286872cb5SThomas Graf 	if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
12231da177e4SLinus Torvalds 		rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
12241da177e4SLinus Torvalds 		if (IS_ERR(rt->rt6i_nexthop)) {
12251da177e4SLinus Torvalds 			err = PTR_ERR(rt->rt6i_nexthop);
12261da177e4SLinus Torvalds 			rt->rt6i_nexthop = NULL;
12271da177e4SLinus Torvalds 			goto out;
12281da177e4SLinus Torvalds 		}
12291da177e4SLinus Torvalds 	}
12301da177e4SLinus Torvalds 
123186872cb5SThomas Graf 	rt->rt6i_flags = cfg->fc_flags;
12321da177e4SLinus Torvalds 
12331da177e4SLinus Torvalds install_route:
123486872cb5SThomas Graf 	if (cfg->fc_mx) {
123586872cb5SThomas Graf 		struct nlattr *nla;
123686872cb5SThomas Graf 		int remaining;
12371da177e4SLinus Torvalds 
123886872cb5SThomas Graf 		nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
12398f4c1f9bSThomas Graf 			int type = nla_type(nla);
124086872cb5SThomas Graf 
124186872cb5SThomas Graf 			if (type) {
124286872cb5SThomas Graf 				if (type > RTAX_MAX) {
12431da177e4SLinus Torvalds 					err = -EINVAL;
12441da177e4SLinus Torvalds 					goto out;
12451da177e4SLinus Torvalds 				}
124686872cb5SThomas Graf 
124786872cb5SThomas Graf 				rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
12481da177e4SLinus Torvalds 			}
12491da177e4SLinus Torvalds 		}
12501da177e4SLinus Torvalds 	}
12511da177e4SLinus Torvalds 
12525ffc02a1SSatoru SATOH 	if (dst_metric(&rt->u.dst, RTAX_HOPLIMIT) == 0)
12531da177e4SLinus Torvalds 		rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
12545ffc02a1SSatoru SATOH 	if (!dst_metric(&rt->u.dst, RTAX_MTU))
12551da177e4SLinus Torvalds 		rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
12565ffc02a1SSatoru SATOH 	if (!dst_metric(&rt->u.dst, RTAX_ADVMSS))
12575578689aSDaniel Lezcano 		rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
12581da177e4SLinus Torvalds 	rt->u.dst.dev = dev;
12591da177e4SLinus Torvalds 	rt->rt6i_idev = idev;
1260c71099acSThomas Graf 	rt->rt6i_table = table;
126163152fc0SDaniel Lezcano 
1262c346dca1SYOSHIFUJI Hideaki 	cfg->fc_nlinfo.nl_net = dev_net(dev);
126363152fc0SDaniel Lezcano 
126486872cb5SThomas Graf 	return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
12651da177e4SLinus Torvalds 
12661da177e4SLinus Torvalds out:
12671da177e4SLinus Torvalds 	if (dev)
12681da177e4SLinus Torvalds 		dev_put(dev);
12691da177e4SLinus Torvalds 	if (idev)
12701da177e4SLinus Torvalds 		in6_dev_put(idev);
12711da177e4SLinus Torvalds 	if (rt)
127240aa7b90SYOSHIFUJI Hideaki 		dst_free(&rt->u.dst);
12731da177e4SLinus Torvalds 	return err;
12741da177e4SLinus Torvalds }
12751da177e4SLinus Torvalds 
127686872cb5SThomas Graf static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
12771da177e4SLinus Torvalds {
12781da177e4SLinus Torvalds 	int err;
1279c71099acSThomas Graf 	struct fib6_table *table;
1280c346dca1SYOSHIFUJI Hideaki 	struct net *net = dev_net(rt->rt6i_dev);
12811da177e4SLinus Torvalds 
12828ed67789SDaniel Lezcano 	if (rt == net->ipv6.ip6_null_entry)
12836c813a72SPatrick McHardy 		return -ENOENT;
12846c813a72SPatrick McHardy 
1285c71099acSThomas Graf 	table = rt->rt6i_table;
1286c71099acSThomas Graf 	write_lock_bh(&table->tb6_lock);
12871da177e4SLinus Torvalds 
128886872cb5SThomas Graf 	err = fib6_del(rt, info);
12891da177e4SLinus Torvalds 	dst_release(&rt->u.dst);
12901da177e4SLinus Torvalds 
1291c71099acSThomas Graf 	write_unlock_bh(&table->tb6_lock);
12921da177e4SLinus Torvalds 
12931da177e4SLinus Torvalds 	return err;
12941da177e4SLinus Torvalds }
12951da177e4SLinus Torvalds 
1296e0a1ad73SThomas Graf int ip6_del_rt(struct rt6_info *rt)
1297e0a1ad73SThomas Graf {
12984d1169c1SDenis V. Lunev 	struct nl_info info = {
1299c346dca1SYOSHIFUJI Hideaki 		.nl_net = dev_net(rt->rt6i_dev),
13004d1169c1SDenis V. Lunev 	};
1301528c4cebSDenis V. Lunev 	return __ip6_del_rt(rt, &info);
1302e0a1ad73SThomas Graf }
1303e0a1ad73SThomas Graf 
130486872cb5SThomas Graf static int ip6_route_del(struct fib6_config *cfg)
13051da177e4SLinus Torvalds {
1306c71099acSThomas Graf 	struct fib6_table *table;
13071da177e4SLinus Torvalds 	struct fib6_node *fn;
13081da177e4SLinus Torvalds 	struct rt6_info *rt;
13091da177e4SLinus Torvalds 	int err = -ESRCH;
13101da177e4SLinus Torvalds 
13115578689aSDaniel Lezcano 	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1312c71099acSThomas Graf 	if (table == NULL)
1313c71099acSThomas Graf 		return err;
13141da177e4SLinus Torvalds 
1315c71099acSThomas Graf 	read_lock_bh(&table->tb6_lock);
1316c71099acSThomas Graf 
1317c71099acSThomas Graf 	fn = fib6_locate(&table->tb6_root,
131886872cb5SThomas Graf 			 &cfg->fc_dst, cfg->fc_dst_len,
131986872cb5SThomas Graf 			 &cfg->fc_src, cfg->fc_src_len);
13201da177e4SLinus Torvalds 
13211da177e4SLinus Torvalds 	if (fn) {
13227cc48263SEric Dumazet 		for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
132386872cb5SThomas Graf 			if (cfg->fc_ifindex &&
13241da177e4SLinus Torvalds 			    (rt->rt6i_dev == NULL ||
132586872cb5SThomas Graf 			     rt->rt6i_dev->ifindex != cfg->fc_ifindex))
13261da177e4SLinus Torvalds 				continue;
132786872cb5SThomas Graf 			if (cfg->fc_flags & RTF_GATEWAY &&
132886872cb5SThomas Graf 			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
13291da177e4SLinus Torvalds 				continue;
133086872cb5SThomas Graf 			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
13311da177e4SLinus Torvalds 				continue;
13321da177e4SLinus Torvalds 			dst_hold(&rt->u.dst);
1333c71099acSThomas Graf 			read_unlock_bh(&table->tb6_lock);
13341da177e4SLinus Torvalds 
133586872cb5SThomas Graf 			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
13361da177e4SLinus Torvalds 		}
13371da177e4SLinus Torvalds 	}
1338c71099acSThomas Graf 	read_unlock_bh(&table->tb6_lock);
13391da177e4SLinus Torvalds 
13401da177e4SLinus Torvalds 	return err;
13411da177e4SLinus Torvalds }
13421da177e4SLinus Torvalds 
13431da177e4SLinus Torvalds /*
13441da177e4SLinus Torvalds  *	Handle redirects
13451da177e4SLinus Torvalds  */
1346a6279458SYOSHIFUJI Hideaki struct ip6rd_flowi {
1347a6279458SYOSHIFUJI Hideaki 	struct flowi fl;
1348a6279458SYOSHIFUJI Hideaki 	struct in6_addr gateway;
1349a6279458SYOSHIFUJI Hideaki };
13501da177e4SLinus Torvalds 
13518ed67789SDaniel Lezcano static struct rt6_info *__ip6_route_redirect(struct net *net,
13528ed67789SDaniel Lezcano 					     struct fib6_table *table,
1353a6279458SYOSHIFUJI Hideaki 					     struct flowi *fl,
1354a6279458SYOSHIFUJI Hideaki 					     int flags)
1355a6279458SYOSHIFUJI Hideaki {
1356a6279458SYOSHIFUJI Hideaki 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1357a6279458SYOSHIFUJI Hideaki 	struct rt6_info *rt;
1358a6279458SYOSHIFUJI Hideaki 	struct fib6_node *fn;
1359c71099acSThomas Graf 
1360e843b9e1SYOSHIFUJI Hideaki 	/*
1361e843b9e1SYOSHIFUJI Hideaki 	 * Get the "current" route for this destination and
1362e843b9e1SYOSHIFUJI Hideaki 	 * check if the redirect has come from approriate router.
1363e843b9e1SYOSHIFUJI Hideaki 	 *
1364e843b9e1SYOSHIFUJI Hideaki 	 * RFC 2461 specifies that redirects should only be
1365e843b9e1SYOSHIFUJI Hideaki 	 * accepted if they come from the nexthop to the target.
1366e843b9e1SYOSHIFUJI Hideaki 	 * Due to the way the routes are chosen, this notion
1367e843b9e1SYOSHIFUJI Hideaki 	 * is a bit fuzzy and one might need to check all possible
1368e843b9e1SYOSHIFUJI Hideaki 	 * routes.
1369e843b9e1SYOSHIFUJI Hideaki 	 */
13701da177e4SLinus Torvalds 
1371c71099acSThomas Graf 	read_lock_bh(&table->tb6_lock);
1372a6279458SYOSHIFUJI Hideaki 	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1373e843b9e1SYOSHIFUJI Hideaki restart:
13747cc48263SEric Dumazet 	for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
13751da177e4SLinus Torvalds 		/*
13761da177e4SLinus Torvalds 		 * Current route is on-link; redirect is always invalid.
13771da177e4SLinus Torvalds 		 *
13781da177e4SLinus Torvalds 		 * Seems, previous statement is not true. It could
13791da177e4SLinus Torvalds 		 * be node, which looks for us as on-link (f.e. proxy ndisc)
13801da177e4SLinus Torvalds 		 * But then router serving it might decide, that we should
13811da177e4SLinus Torvalds 		 * know truth 8)8) --ANK (980726).
13821da177e4SLinus Torvalds 		 */
1383e843b9e1SYOSHIFUJI Hideaki 		if (rt6_check_expired(rt))
1384e843b9e1SYOSHIFUJI Hideaki 			continue;
13851da177e4SLinus Torvalds 		if (!(rt->rt6i_flags & RTF_GATEWAY))
1386e843b9e1SYOSHIFUJI Hideaki 			continue;
1387a6279458SYOSHIFUJI Hideaki 		if (fl->oif != rt->rt6i_dev->ifindex)
1388e843b9e1SYOSHIFUJI Hideaki 			continue;
1389a6279458SYOSHIFUJI Hideaki 		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1390e843b9e1SYOSHIFUJI Hideaki 			continue;
1391e843b9e1SYOSHIFUJI Hideaki 		break;
1392e843b9e1SYOSHIFUJI Hideaki 	}
1393a6279458SYOSHIFUJI Hideaki 
1394cb15d9c2SYOSHIFUJI Hideaki 	if (!rt)
13958ed67789SDaniel Lezcano 		rt = net->ipv6.ip6_null_entry;
13968ed67789SDaniel Lezcano 	BACKTRACK(net, &fl->fl6_src);
1397cb15d9c2SYOSHIFUJI Hideaki out:
1398a6279458SYOSHIFUJI Hideaki 	dst_hold(&rt->u.dst);
1399a6279458SYOSHIFUJI Hideaki 
1400c71099acSThomas Graf 	read_unlock_bh(&table->tb6_lock);
14011da177e4SLinus Torvalds 
1402a6279458SYOSHIFUJI Hideaki 	return rt;
1403a6279458SYOSHIFUJI Hideaki };
1404a6279458SYOSHIFUJI Hideaki 
1405a6279458SYOSHIFUJI Hideaki static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1406a6279458SYOSHIFUJI Hideaki 					   struct in6_addr *src,
1407a6279458SYOSHIFUJI Hideaki 					   struct in6_addr *gateway,
1408a6279458SYOSHIFUJI Hideaki 					   struct net_device *dev)
1409a6279458SYOSHIFUJI Hideaki {
1410adaa70bbSThomas Graf 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1411c346dca1SYOSHIFUJI Hideaki 	struct net *net = dev_net(dev);
1412a6279458SYOSHIFUJI Hideaki 	struct ip6rd_flowi rdfl = {
1413a6279458SYOSHIFUJI Hideaki 		.fl = {
1414a6279458SYOSHIFUJI Hideaki 			.oif = dev->ifindex,
1415a6279458SYOSHIFUJI Hideaki 			.nl_u = {
1416a6279458SYOSHIFUJI Hideaki 				.ip6_u = {
1417a6279458SYOSHIFUJI Hideaki 					.daddr = *dest,
1418a6279458SYOSHIFUJI Hideaki 					.saddr = *src,
1419a6279458SYOSHIFUJI Hideaki 				},
1420a6279458SYOSHIFUJI Hideaki 			},
1421a6279458SYOSHIFUJI Hideaki 		},
1422a6279458SYOSHIFUJI Hideaki 		.gateway = *gateway,
1423a6279458SYOSHIFUJI Hideaki 	};
1424adaa70bbSThomas Graf 
1425adaa70bbSThomas Graf 	if (rt6_need_strict(dest))
1426adaa70bbSThomas Graf 		flags |= RT6_LOOKUP_F_IFACE;
1427a6279458SYOSHIFUJI Hideaki 
14285578689aSDaniel Lezcano 	return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl,
142958f09b78SDaniel Lezcano 						   flags, __ip6_route_redirect);
1430a6279458SYOSHIFUJI Hideaki }
1431a6279458SYOSHIFUJI Hideaki 
1432a6279458SYOSHIFUJI Hideaki void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1433a6279458SYOSHIFUJI Hideaki 		  struct in6_addr *saddr,
1434a6279458SYOSHIFUJI Hideaki 		  struct neighbour *neigh, u8 *lladdr, int on_link)
1435a6279458SYOSHIFUJI Hideaki {
1436a6279458SYOSHIFUJI Hideaki 	struct rt6_info *rt, *nrt = NULL;
1437a6279458SYOSHIFUJI Hideaki 	struct netevent_redirect netevent;
1438c346dca1SYOSHIFUJI Hideaki 	struct net *net = dev_net(neigh->dev);
1439a6279458SYOSHIFUJI Hideaki 
1440a6279458SYOSHIFUJI Hideaki 	rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1441a6279458SYOSHIFUJI Hideaki 
14428ed67789SDaniel Lezcano 	if (rt == net->ipv6.ip6_null_entry) {
14431da177e4SLinus Torvalds 		if (net_ratelimit())
14441da177e4SLinus Torvalds 			printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
14451da177e4SLinus Torvalds 			       "for redirect target\n");
1446a6279458SYOSHIFUJI Hideaki 		goto out;
14471da177e4SLinus Torvalds 	}
14481da177e4SLinus Torvalds 
14491da177e4SLinus Torvalds 	/*
14501da177e4SLinus Torvalds 	 *	We have finally decided to accept it.
14511da177e4SLinus Torvalds 	 */
14521da177e4SLinus Torvalds 
14531da177e4SLinus Torvalds 	neigh_update(neigh, lladdr, NUD_STALE,
14541da177e4SLinus Torvalds 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
14551da177e4SLinus Torvalds 		     NEIGH_UPDATE_F_OVERRIDE|
14561da177e4SLinus Torvalds 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
14571da177e4SLinus Torvalds 				     NEIGH_UPDATE_F_ISROUTER))
14581da177e4SLinus Torvalds 		     );
14591da177e4SLinus Torvalds 
14601da177e4SLinus Torvalds 	/*
14611da177e4SLinus Torvalds 	 * Redirect received -> path was valid.
14621da177e4SLinus Torvalds 	 * Look, redirects are sent only in response to data packets,
14631da177e4SLinus Torvalds 	 * so that this nexthop apparently is reachable. --ANK
14641da177e4SLinus Torvalds 	 */
14651da177e4SLinus Torvalds 	dst_confirm(&rt->u.dst);
14661da177e4SLinus Torvalds 
14671da177e4SLinus Torvalds 	/* Duplicate redirect: silently ignore. */
14681da177e4SLinus Torvalds 	if (neigh == rt->u.dst.neighbour)
14691da177e4SLinus Torvalds 		goto out;
14701da177e4SLinus Torvalds 
14711da177e4SLinus Torvalds 	nrt = ip6_rt_copy(rt);
14721da177e4SLinus Torvalds 	if (nrt == NULL)
14731da177e4SLinus Torvalds 		goto out;
14741da177e4SLinus Torvalds 
14751da177e4SLinus Torvalds 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
14761da177e4SLinus Torvalds 	if (on_link)
14771da177e4SLinus Torvalds 		nrt->rt6i_flags &= ~RTF_GATEWAY;
14781da177e4SLinus Torvalds 
14791da177e4SLinus Torvalds 	ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
14801da177e4SLinus Torvalds 	nrt->rt6i_dst.plen = 128;
14811da177e4SLinus Torvalds 	nrt->u.dst.flags |= DST_HOST;
14821da177e4SLinus Torvalds 
14831da177e4SLinus Torvalds 	ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
14841da177e4SLinus Torvalds 	nrt->rt6i_nexthop = neigh_clone(neigh);
14851da177e4SLinus Torvalds 	/* Reset pmtu, it may be better */
14861da177e4SLinus Torvalds 	nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1487c346dca1SYOSHIFUJI Hideaki 	nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dev_net(neigh->dev),
14885578689aSDaniel Lezcano 							dst_mtu(&nrt->u.dst));
14891da177e4SLinus Torvalds 
149040e22e8fSThomas Graf 	if (ip6_ins_rt(nrt))
14911da177e4SLinus Torvalds 		goto out;
14921da177e4SLinus Torvalds 
14938d71740cSTom Tucker 	netevent.old = &rt->u.dst;
14948d71740cSTom Tucker 	netevent.new = &nrt->u.dst;
14958d71740cSTom Tucker 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
14968d71740cSTom Tucker 
14971da177e4SLinus Torvalds 	if (rt->rt6i_flags&RTF_CACHE) {
1498e0a1ad73SThomas Graf 		ip6_del_rt(rt);
14991da177e4SLinus Torvalds 		return;
15001da177e4SLinus Torvalds 	}
15011da177e4SLinus Torvalds 
15021da177e4SLinus Torvalds out:
15031da177e4SLinus Torvalds 	dst_release(&rt->u.dst);
15041da177e4SLinus Torvalds 	return;
15051da177e4SLinus Torvalds }
15061da177e4SLinus Torvalds 
15071da177e4SLinus Torvalds /*
15081da177e4SLinus Torvalds  *	Handle ICMP "packet too big" messages
15091da177e4SLinus Torvalds  *	i.e. Path MTU discovery
15101da177e4SLinus Torvalds  */
15111da177e4SLinus Torvalds 
15121da177e4SLinus Torvalds void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
15131da177e4SLinus Torvalds 			struct net_device *dev, u32 pmtu)
15141da177e4SLinus Torvalds {
15151da177e4SLinus Torvalds 	struct rt6_info *rt, *nrt;
1516c346dca1SYOSHIFUJI Hideaki 	struct net *net = dev_net(dev);
15171da177e4SLinus Torvalds 	int allfrag = 0;
15181da177e4SLinus Torvalds 
15195578689aSDaniel Lezcano 	rt = rt6_lookup(net, daddr, saddr, dev->ifindex, 0);
15201da177e4SLinus Torvalds 	if (rt == NULL)
15211da177e4SLinus Torvalds 		return;
15221da177e4SLinus Torvalds 
15231da177e4SLinus Torvalds 	if (pmtu >= dst_mtu(&rt->u.dst))
15241da177e4SLinus Torvalds 		goto out;
15251da177e4SLinus Torvalds 
15261da177e4SLinus Torvalds 	if (pmtu < IPV6_MIN_MTU) {
15271da177e4SLinus Torvalds 		/*
15281da177e4SLinus Torvalds 		 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
15291da177e4SLinus Torvalds 		 * MTU (1280) and a fragment header should always be included
15301da177e4SLinus Torvalds 		 * after a node receiving Too Big message reporting PMTU is
15311da177e4SLinus Torvalds 		 * less than the IPv6 Minimum Link MTU.
15321da177e4SLinus Torvalds 		 */
15331da177e4SLinus Torvalds 		pmtu = IPV6_MIN_MTU;
15341da177e4SLinus Torvalds 		allfrag = 1;
15351da177e4SLinus Torvalds 	}
15361da177e4SLinus Torvalds 
15371da177e4SLinus Torvalds 	/* New mtu received -> path was valid.
15381da177e4SLinus Torvalds 	   They are sent only in response to data packets,
15391da177e4SLinus Torvalds 	   so that this nexthop apparently is reachable. --ANK
15401da177e4SLinus Torvalds 	 */
15411da177e4SLinus Torvalds 	dst_confirm(&rt->u.dst);
15421da177e4SLinus Torvalds 
15431da177e4SLinus Torvalds 	/* Host route. If it is static, it would be better
15441da177e4SLinus Torvalds 	   not to override it, but add new one, so that
15451da177e4SLinus Torvalds 	   when cache entry will expire old pmtu
15461da177e4SLinus Torvalds 	   would return automatically.
15471da177e4SLinus Torvalds 	 */
15481da177e4SLinus Torvalds 	if (rt->rt6i_flags & RTF_CACHE) {
15491da177e4SLinus Torvalds 		rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
15501da177e4SLinus Torvalds 		if (allfrag)
15511da177e4SLinus Torvalds 			rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
15525578689aSDaniel Lezcano 		dst_set_expires(&rt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
15531da177e4SLinus Torvalds 		rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
15541da177e4SLinus Torvalds 		goto out;
15551da177e4SLinus Torvalds 	}
15561da177e4SLinus Torvalds 
15571da177e4SLinus Torvalds 	/* Network route.
15581da177e4SLinus Torvalds 	   Two cases are possible:
15591da177e4SLinus Torvalds 	   1. It is connected route. Action: COW
15601da177e4SLinus Torvalds 	   2. It is gatewayed route or NONEXTHOP route. Action: clone it.
15611da177e4SLinus Torvalds 	 */
1562d5315b50SYOSHIFUJI Hideaki 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1563a1e78363SYOSHIFUJI Hideaki 		nrt = rt6_alloc_cow(rt, daddr, saddr);
1564d5315b50SYOSHIFUJI Hideaki 	else
1565d5315b50SYOSHIFUJI Hideaki 		nrt = rt6_alloc_clone(rt, daddr);
1566a1e78363SYOSHIFUJI Hideaki 
1567d5315b50SYOSHIFUJI Hideaki 	if (nrt) {
15681da177e4SLinus Torvalds 		nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
15691da177e4SLinus Torvalds 		if (allfrag)
15701da177e4SLinus Torvalds 			nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1571a1e78363SYOSHIFUJI Hideaki 
15721da177e4SLinus Torvalds 		/* According to RFC 1981, detecting PMTU increase shouldn't be
1573a1e78363SYOSHIFUJI Hideaki 		 * happened within 5 mins, the recommended timer is 10 mins.
1574a1e78363SYOSHIFUJI Hideaki 		 * Here this route expiration time is set to ip6_rt_mtu_expires
1575a1e78363SYOSHIFUJI Hideaki 		 * which is 10 mins. After 10 mins the decreased pmtu is expired
1576a1e78363SYOSHIFUJI Hideaki 		 * and detecting PMTU increase will be automatically happened.
15771da177e4SLinus Torvalds 		 */
15785578689aSDaniel Lezcano 		dst_set_expires(&nrt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
15791da177e4SLinus Torvalds 		nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1580a1e78363SYOSHIFUJI Hideaki 
158140e22e8fSThomas Graf 		ip6_ins_rt(nrt);
15821da177e4SLinus Torvalds 	}
15831da177e4SLinus Torvalds out:
15841da177e4SLinus Torvalds 	dst_release(&rt->u.dst);
15851da177e4SLinus Torvalds }
15861da177e4SLinus Torvalds 
15871da177e4SLinus Torvalds /*
15881da177e4SLinus Torvalds  *	Misc support functions
15891da177e4SLinus Torvalds  */
15901da177e4SLinus Torvalds 
15911da177e4SLinus Torvalds static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
15921da177e4SLinus Torvalds {
1593c346dca1SYOSHIFUJI Hideaki 	struct net *net = dev_net(ort->rt6i_dev);
1594f2fc6a54SBenjamin Thery 	struct rt6_info *rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops);
15951da177e4SLinus Torvalds 
15961da177e4SLinus Torvalds 	if (rt) {
15971da177e4SLinus Torvalds 		rt->u.dst.input = ort->u.dst.input;
15981da177e4SLinus Torvalds 		rt->u.dst.output = ort->u.dst.output;
15991da177e4SLinus Torvalds 
16001da177e4SLinus Torvalds 		memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
160122e1e4d8SVille Nuorvala 		rt->u.dst.error = ort->u.dst.error;
16021da177e4SLinus Torvalds 		rt->u.dst.dev = ort->u.dst.dev;
16031da177e4SLinus Torvalds 		if (rt->u.dst.dev)
16041da177e4SLinus Torvalds 			dev_hold(rt->u.dst.dev);
16051da177e4SLinus Torvalds 		rt->rt6i_idev = ort->rt6i_idev;
16061da177e4SLinus Torvalds 		if (rt->rt6i_idev)
16071da177e4SLinus Torvalds 			in6_dev_hold(rt->rt6i_idev);
16081da177e4SLinus Torvalds 		rt->u.dst.lastuse = jiffies;
16091da177e4SLinus Torvalds 		rt->rt6i_expires = 0;
16101da177e4SLinus Torvalds 
16111da177e4SLinus Torvalds 		ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
16121da177e4SLinus Torvalds 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
16131da177e4SLinus Torvalds 		rt->rt6i_metric = 0;
16141da177e4SLinus Torvalds 
16151da177e4SLinus Torvalds 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
16161da177e4SLinus Torvalds #ifdef CONFIG_IPV6_SUBTREES
16171da177e4SLinus Torvalds 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
16181da177e4SLinus Torvalds #endif
1619c71099acSThomas Graf 		rt->rt6i_table = ort->rt6i_table;
16201da177e4SLinus Torvalds 	}
16211da177e4SLinus Torvalds 	return rt;
16221da177e4SLinus Torvalds }
16231da177e4SLinus Torvalds 
162470ceb4f5SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTE_INFO
1625efa2cea0SDaniel Lezcano static struct rt6_info *rt6_get_route_info(struct net *net,
1626efa2cea0SDaniel Lezcano 					   struct in6_addr *prefix, int prefixlen,
162770ceb4f5SYOSHIFUJI Hideaki 					   struct in6_addr *gwaddr, int ifindex)
162870ceb4f5SYOSHIFUJI Hideaki {
162970ceb4f5SYOSHIFUJI Hideaki 	struct fib6_node *fn;
163070ceb4f5SYOSHIFUJI Hideaki 	struct rt6_info *rt = NULL;
1631c71099acSThomas Graf 	struct fib6_table *table;
163270ceb4f5SYOSHIFUJI Hideaki 
1633efa2cea0SDaniel Lezcano 	table = fib6_get_table(net, RT6_TABLE_INFO);
1634c71099acSThomas Graf 	if (table == NULL)
1635c71099acSThomas Graf 		return NULL;
1636c71099acSThomas Graf 
1637c71099acSThomas Graf 	write_lock_bh(&table->tb6_lock);
1638c71099acSThomas Graf 	fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
163970ceb4f5SYOSHIFUJI Hideaki 	if (!fn)
164070ceb4f5SYOSHIFUJI Hideaki 		goto out;
164170ceb4f5SYOSHIFUJI Hideaki 
16427cc48263SEric Dumazet 	for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
164370ceb4f5SYOSHIFUJI Hideaki 		if (rt->rt6i_dev->ifindex != ifindex)
164470ceb4f5SYOSHIFUJI Hideaki 			continue;
164570ceb4f5SYOSHIFUJI Hideaki 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
164670ceb4f5SYOSHIFUJI Hideaki 			continue;
164770ceb4f5SYOSHIFUJI Hideaki 		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
164870ceb4f5SYOSHIFUJI Hideaki 			continue;
164970ceb4f5SYOSHIFUJI Hideaki 		dst_hold(&rt->u.dst);
165070ceb4f5SYOSHIFUJI Hideaki 		break;
165170ceb4f5SYOSHIFUJI Hideaki 	}
165270ceb4f5SYOSHIFUJI Hideaki out:
1653c71099acSThomas Graf 	write_unlock_bh(&table->tb6_lock);
165470ceb4f5SYOSHIFUJI Hideaki 	return rt;
165570ceb4f5SYOSHIFUJI Hideaki }
165670ceb4f5SYOSHIFUJI Hideaki 
1657efa2cea0SDaniel Lezcano static struct rt6_info *rt6_add_route_info(struct net *net,
1658efa2cea0SDaniel Lezcano 					   struct in6_addr *prefix, int prefixlen,
165970ceb4f5SYOSHIFUJI Hideaki 					   struct in6_addr *gwaddr, int ifindex,
166070ceb4f5SYOSHIFUJI Hideaki 					   unsigned pref)
166170ceb4f5SYOSHIFUJI Hideaki {
166286872cb5SThomas Graf 	struct fib6_config cfg = {
166386872cb5SThomas Graf 		.fc_table	= RT6_TABLE_INFO,
1664238fc7eaSRami Rosen 		.fc_metric	= IP6_RT_PRIO_USER,
166586872cb5SThomas Graf 		.fc_ifindex	= ifindex,
166686872cb5SThomas Graf 		.fc_dst_len	= prefixlen,
166786872cb5SThomas Graf 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
166886872cb5SThomas Graf 				  RTF_UP | RTF_PREF(pref),
1669efa2cea0SDaniel Lezcano 		.fc_nlinfo.pid = 0,
1670efa2cea0SDaniel Lezcano 		.fc_nlinfo.nlh = NULL,
1671efa2cea0SDaniel Lezcano 		.fc_nlinfo.nl_net = net,
167286872cb5SThomas Graf 	};
167370ceb4f5SYOSHIFUJI Hideaki 
167486872cb5SThomas Graf 	ipv6_addr_copy(&cfg.fc_dst, prefix);
167586872cb5SThomas Graf 	ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
167686872cb5SThomas Graf 
1677e317da96SYOSHIFUJI Hideaki 	/* We should treat it as a default route if prefix length is 0. */
1678e317da96SYOSHIFUJI Hideaki 	if (!prefixlen)
167986872cb5SThomas Graf 		cfg.fc_flags |= RTF_DEFAULT;
168070ceb4f5SYOSHIFUJI Hideaki 
168186872cb5SThomas Graf 	ip6_route_add(&cfg);
168270ceb4f5SYOSHIFUJI Hideaki 
1683efa2cea0SDaniel Lezcano 	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
168470ceb4f5SYOSHIFUJI Hideaki }
168570ceb4f5SYOSHIFUJI Hideaki #endif
168670ceb4f5SYOSHIFUJI Hideaki 
16871da177e4SLinus Torvalds struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
16881da177e4SLinus Torvalds {
16891da177e4SLinus Torvalds 	struct rt6_info *rt;
1690c71099acSThomas Graf 	struct fib6_table *table;
16911da177e4SLinus Torvalds 
1692c346dca1SYOSHIFUJI Hideaki 	table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1693c71099acSThomas Graf 	if (table == NULL)
1694c71099acSThomas Graf 		return NULL;
16951da177e4SLinus Torvalds 
1696c71099acSThomas Graf 	write_lock_bh(&table->tb6_lock);
16977cc48263SEric Dumazet 	for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
16981da177e4SLinus Torvalds 		if (dev == rt->rt6i_dev &&
1699045927ffSYOSHIFUJI Hideaki 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
17001da177e4SLinus Torvalds 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
17011da177e4SLinus Torvalds 			break;
17021da177e4SLinus Torvalds 	}
17031da177e4SLinus Torvalds 	if (rt)
17041da177e4SLinus Torvalds 		dst_hold(&rt->u.dst);
1705c71099acSThomas Graf 	write_unlock_bh(&table->tb6_lock);
17061da177e4SLinus Torvalds 	return rt;
17071da177e4SLinus Torvalds }
17081da177e4SLinus Torvalds 
17091da177e4SLinus Torvalds struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1710ebacaaa0SYOSHIFUJI Hideaki 				     struct net_device *dev,
1711ebacaaa0SYOSHIFUJI Hideaki 				     unsigned int pref)
17121da177e4SLinus Torvalds {
171386872cb5SThomas Graf 	struct fib6_config cfg = {
171486872cb5SThomas Graf 		.fc_table	= RT6_TABLE_DFLT,
1715238fc7eaSRami Rosen 		.fc_metric	= IP6_RT_PRIO_USER,
171686872cb5SThomas Graf 		.fc_ifindex	= dev->ifindex,
171786872cb5SThomas Graf 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
171886872cb5SThomas Graf 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
17195578689aSDaniel Lezcano 		.fc_nlinfo.pid = 0,
17205578689aSDaniel Lezcano 		.fc_nlinfo.nlh = NULL,
1721c346dca1SYOSHIFUJI Hideaki 		.fc_nlinfo.nl_net = dev_net(dev),
172286872cb5SThomas Graf 	};
17231da177e4SLinus Torvalds 
172486872cb5SThomas Graf 	ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
17251da177e4SLinus Torvalds 
172686872cb5SThomas Graf 	ip6_route_add(&cfg);
17271da177e4SLinus Torvalds 
17281da177e4SLinus Torvalds 	return rt6_get_dflt_router(gwaddr, dev);
17291da177e4SLinus Torvalds }
17301da177e4SLinus Torvalds 
17317b4da532SDaniel Lezcano void rt6_purge_dflt_routers(struct net *net)
17321da177e4SLinus Torvalds {
17331da177e4SLinus Torvalds 	struct rt6_info *rt;
1734c71099acSThomas Graf 	struct fib6_table *table;
1735c71099acSThomas Graf 
1736c71099acSThomas Graf 	/* NOTE: Keep consistent with rt6_get_dflt_router */
17377b4da532SDaniel Lezcano 	table = fib6_get_table(net, RT6_TABLE_DFLT);
1738c71099acSThomas Graf 	if (table == NULL)
1739c71099acSThomas Graf 		return;
17401da177e4SLinus Torvalds 
17411da177e4SLinus Torvalds restart:
1742c71099acSThomas Graf 	read_lock_bh(&table->tb6_lock);
17437cc48263SEric Dumazet 	for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
17441da177e4SLinus Torvalds 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
17451da177e4SLinus Torvalds 			dst_hold(&rt->u.dst);
1746c71099acSThomas Graf 			read_unlock_bh(&table->tb6_lock);
1747e0a1ad73SThomas Graf 			ip6_del_rt(rt);
17481da177e4SLinus Torvalds 			goto restart;
17491da177e4SLinus Torvalds 		}
17501da177e4SLinus Torvalds 	}
1751c71099acSThomas Graf 	read_unlock_bh(&table->tb6_lock);
17521da177e4SLinus Torvalds }
17531da177e4SLinus Torvalds 
17545578689aSDaniel Lezcano static void rtmsg_to_fib6_config(struct net *net,
17555578689aSDaniel Lezcano 				 struct in6_rtmsg *rtmsg,
175686872cb5SThomas Graf 				 struct fib6_config *cfg)
175786872cb5SThomas Graf {
175886872cb5SThomas Graf 	memset(cfg, 0, sizeof(*cfg));
175986872cb5SThomas Graf 
176086872cb5SThomas Graf 	cfg->fc_table = RT6_TABLE_MAIN;
176186872cb5SThomas Graf 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
176286872cb5SThomas Graf 	cfg->fc_metric = rtmsg->rtmsg_metric;
176386872cb5SThomas Graf 	cfg->fc_expires = rtmsg->rtmsg_info;
176486872cb5SThomas Graf 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
176586872cb5SThomas Graf 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
176686872cb5SThomas Graf 	cfg->fc_flags = rtmsg->rtmsg_flags;
176786872cb5SThomas Graf 
17685578689aSDaniel Lezcano 	cfg->fc_nlinfo.nl_net = net;
1769f1243c2dSBenjamin Thery 
177086872cb5SThomas Graf 	ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
177186872cb5SThomas Graf 	ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
177286872cb5SThomas Graf 	ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
177386872cb5SThomas Graf }
177486872cb5SThomas Graf 
17755578689aSDaniel Lezcano int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
17761da177e4SLinus Torvalds {
177786872cb5SThomas Graf 	struct fib6_config cfg;
17781da177e4SLinus Torvalds 	struct in6_rtmsg rtmsg;
17791da177e4SLinus Torvalds 	int err;
17801da177e4SLinus Torvalds 
17811da177e4SLinus Torvalds 	switch(cmd) {
17821da177e4SLinus Torvalds 	case SIOCADDRT:		/* Add a route */
17831da177e4SLinus Torvalds 	case SIOCDELRT:		/* Delete a route */
17841da177e4SLinus Torvalds 		if (!capable(CAP_NET_ADMIN))
17851da177e4SLinus Torvalds 			return -EPERM;
17861da177e4SLinus Torvalds 		err = copy_from_user(&rtmsg, arg,
17871da177e4SLinus Torvalds 				     sizeof(struct in6_rtmsg));
17881da177e4SLinus Torvalds 		if (err)
17891da177e4SLinus Torvalds 			return -EFAULT;
17901da177e4SLinus Torvalds 
17915578689aSDaniel Lezcano 		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
179286872cb5SThomas Graf 
17931da177e4SLinus Torvalds 		rtnl_lock();
17941da177e4SLinus Torvalds 		switch (cmd) {
17951da177e4SLinus Torvalds 		case SIOCADDRT:
179686872cb5SThomas Graf 			err = ip6_route_add(&cfg);
17971da177e4SLinus Torvalds 			break;
17981da177e4SLinus Torvalds 		case SIOCDELRT:
179986872cb5SThomas Graf 			err = ip6_route_del(&cfg);
18001da177e4SLinus Torvalds 			break;
18011da177e4SLinus Torvalds 		default:
18021da177e4SLinus Torvalds 			err = -EINVAL;
18031da177e4SLinus Torvalds 		}
18041da177e4SLinus Torvalds 		rtnl_unlock();
18051da177e4SLinus Torvalds 
18061da177e4SLinus Torvalds 		return err;
18073ff50b79SStephen Hemminger 	}
18081da177e4SLinus Torvalds 
18091da177e4SLinus Torvalds 	return -EINVAL;
18101da177e4SLinus Torvalds }
18111da177e4SLinus Torvalds 
18121da177e4SLinus Torvalds /*
18131da177e4SLinus Torvalds  *	Drop the packet on the floor
18141da177e4SLinus Torvalds  */
18151da177e4SLinus Torvalds 
181650eb431dSIlpo Järvinen static int ip6_pkt_drop(struct sk_buff *skb, int code, int ipstats_mib_noroutes)
18171da177e4SLinus Torvalds {
1818612f09e8SYOSHIFUJI Hideaki 	int type;
1819612f09e8SYOSHIFUJI Hideaki 	switch (ipstats_mib_noroutes) {
1820612f09e8SYOSHIFUJI Hideaki 	case IPSTATS_MIB_INNOROUTES:
18210660e03fSArnaldo Carvalho de Melo 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
1822612f09e8SYOSHIFUJI Hideaki 		if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) {
1823a11d206dSYOSHIFUJI Hideaki 			IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
1824612f09e8SYOSHIFUJI Hideaki 			break;
1825612f09e8SYOSHIFUJI Hideaki 		}
1826612f09e8SYOSHIFUJI Hideaki 		/* FALLTHROUGH */
1827612f09e8SYOSHIFUJI Hideaki 	case IPSTATS_MIB_OUTNOROUTES:
1828612f09e8SYOSHIFUJI Hideaki 		IP6_INC_STATS(ip6_dst_idev(skb->dst), ipstats_mib_noroutes);
1829612f09e8SYOSHIFUJI Hideaki 		break;
1830612f09e8SYOSHIFUJI Hideaki 	}
18319ce8ade0SThomas Graf 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
18321da177e4SLinus Torvalds 	kfree_skb(skb);
18331da177e4SLinus Torvalds 	return 0;
18341da177e4SLinus Torvalds }
18351da177e4SLinus Torvalds 
18369ce8ade0SThomas Graf static int ip6_pkt_discard(struct sk_buff *skb)
18379ce8ade0SThomas Graf {
1838612f09e8SYOSHIFUJI Hideaki 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
18399ce8ade0SThomas Graf }
18409ce8ade0SThomas Graf 
184120380731SArnaldo Carvalho de Melo static int ip6_pkt_discard_out(struct sk_buff *skb)
18421da177e4SLinus Torvalds {
18431da177e4SLinus Torvalds 	skb->dev = skb->dst->dev;
1844612f09e8SYOSHIFUJI Hideaki 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
18451da177e4SLinus Torvalds }
18461da177e4SLinus Torvalds 
18476723ab54SDavid S. Miller #ifdef CONFIG_IPV6_MULTIPLE_TABLES
18486723ab54SDavid S. Miller 
18499ce8ade0SThomas Graf static int ip6_pkt_prohibit(struct sk_buff *skb)
18509ce8ade0SThomas Graf {
1851612f09e8SYOSHIFUJI Hideaki 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
18529ce8ade0SThomas Graf }
18539ce8ade0SThomas Graf 
18549ce8ade0SThomas Graf static int ip6_pkt_prohibit_out(struct sk_buff *skb)
18559ce8ade0SThomas Graf {
18569ce8ade0SThomas Graf 	skb->dev = skb->dst->dev;
1857612f09e8SYOSHIFUJI Hideaki 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
18589ce8ade0SThomas Graf }
18599ce8ade0SThomas Graf 
18606723ab54SDavid S. Miller #endif
18616723ab54SDavid S. Miller 
18621da177e4SLinus Torvalds /*
18631da177e4SLinus Torvalds  *	Allocate a dst for local (unicast / anycast) address.
18641da177e4SLinus Torvalds  */
18651da177e4SLinus Torvalds 
18661da177e4SLinus Torvalds struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
18671da177e4SLinus Torvalds 				    const struct in6_addr *addr,
18681da177e4SLinus Torvalds 				    int anycast)
18691da177e4SLinus Torvalds {
1870c346dca1SYOSHIFUJI Hideaki 	struct net *net = dev_net(idev->dev);
1871f2fc6a54SBenjamin Thery 	struct rt6_info *rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops);
18721da177e4SLinus Torvalds 
18731da177e4SLinus Torvalds 	if (rt == NULL)
18741da177e4SLinus Torvalds 		return ERR_PTR(-ENOMEM);
18751da177e4SLinus Torvalds 
18765578689aSDaniel Lezcano 	dev_hold(net->loopback_dev);
18771da177e4SLinus Torvalds 	in6_dev_hold(idev);
18781da177e4SLinus Torvalds 
18791da177e4SLinus Torvalds 	rt->u.dst.flags = DST_HOST;
18801da177e4SLinus Torvalds 	rt->u.dst.input = ip6_input;
18811da177e4SLinus Torvalds 	rt->u.dst.output = ip6_output;
18825578689aSDaniel Lezcano 	rt->rt6i_dev = net->loopback_dev;
18831da177e4SLinus Torvalds 	rt->rt6i_idev = idev;
18841da177e4SLinus Torvalds 	rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
18855578689aSDaniel Lezcano 	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
18861da177e4SLinus Torvalds 	rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
18871da177e4SLinus Torvalds 	rt->u.dst.obsolete = -1;
18881da177e4SLinus Torvalds 
18891da177e4SLinus Torvalds 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
189058c4fb86SYOSHIFUJI Hideaki 	if (anycast)
189158c4fb86SYOSHIFUJI Hideaki 		rt->rt6i_flags |= RTF_ANYCAST;
189258c4fb86SYOSHIFUJI Hideaki 	else
18931da177e4SLinus Torvalds 		rt->rt6i_flags |= RTF_LOCAL;
18941da177e4SLinus Torvalds 	rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
18951da177e4SLinus Torvalds 	if (rt->rt6i_nexthop == NULL) {
189640aa7b90SYOSHIFUJI Hideaki 		dst_free(&rt->u.dst);
18971da177e4SLinus Torvalds 		return ERR_PTR(-ENOMEM);
18981da177e4SLinus Torvalds 	}
18991da177e4SLinus Torvalds 
19001da177e4SLinus Torvalds 	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
19011da177e4SLinus Torvalds 	rt->rt6i_dst.plen = 128;
19025578689aSDaniel Lezcano 	rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
19031da177e4SLinus Torvalds 
19041da177e4SLinus Torvalds 	atomic_set(&rt->u.dst.__refcnt, 1);
19051da177e4SLinus Torvalds 
19061da177e4SLinus Torvalds 	return rt;
19071da177e4SLinus Torvalds }
19081da177e4SLinus Torvalds 
19098ed67789SDaniel Lezcano struct arg_dev_net {
19108ed67789SDaniel Lezcano 	struct net_device *dev;
19118ed67789SDaniel Lezcano 	struct net *net;
19128ed67789SDaniel Lezcano };
19138ed67789SDaniel Lezcano 
19141da177e4SLinus Torvalds static int fib6_ifdown(struct rt6_info *rt, void *arg)
19151da177e4SLinus Torvalds {
19168ed67789SDaniel Lezcano 	struct net_device *dev = ((struct arg_dev_net *)arg)->dev;
19178ed67789SDaniel Lezcano 	struct net *net = ((struct arg_dev_net *)arg)->net;
19188ed67789SDaniel Lezcano 
19198ed67789SDaniel Lezcano 	if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
19208ed67789SDaniel Lezcano 	    rt != net->ipv6.ip6_null_entry) {
19211da177e4SLinus Torvalds 		RT6_TRACE("deleted by ifdown %p\n", rt);
19221da177e4SLinus Torvalds 		return -1;
19231da177e4SLinus Torvalds 	}
19241da177e4SLinus Torvalds 	return 0;
19251da177e4SLinus Torvalds }
19261da177e4SLinus Torvalds 
1927f3db4851SDaniel Lezcano void rt6_ifdown(struct net *net, struct net_device *dev)
19281da177e4SLinus Torvalds {
19298ed67789SDaniel Lezcano 	struct arg_dev_net adn = {
19308ed67789SDaniel Lezcano 		.dev = dev,
19318ed67789SDaniel Lezcano 		.net = net,
19328ed67789SDaniel Lezcano 	};
19338ed67789SDaniel Lezcano 
19348ed67789SDaniel Lezcano 	fib6_clean_all(net, fib6_ifdown, 0, &adn);
19351da177e4SLinus Torvalds }
19361da177e4SLinus Torvalds 
19371da177e4SLinus Torvalds struct rt6_mtu_change_arg
19381da177e4SLinus Torvalds {
19391da177e4SLinus Torvalds 	struct net_device *dev;
19401da177e4SLinus Torvalds 	unsigned mtu;
19411da177e4SLinus Torvalds };
19421da177e4SLinus Torvalds 
19431da177e4SLinus Torvalds static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
19441da177e4SLinus Torvalds {
19451da177e4SLinus Torvalds 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
19461da177e4SLinus Torvalds 	struct inet6_dev *idev;
1947c346dca1SYOSHIFUJI Hideaki 	struct net *net = dev_net(arg->dev);
19481da177e4SLinus Torvalds 
19491da177e4SLinus Torvalds 	/* In IPv6 pmtu discovery is not optional,
19501da177e4SLinus Torvalds 	   so that RTAX_MTU lock cannot disable it.
19511da177e4SLinus Torvalds 	   We still use this lock to block changes
19521da177e4SLinus Torvalds 	   caused by addrconf/ndisc.
19531da177e4SLinus Torvalds 	*/
19541da177e4SLinus Torvalds 
19551da177e4SLinus Torvalds 	idev = __in6_dev_get(arg->dev);
19561da177e4SLinus Torvalds 	if (idev == NULL)
19571da177e4SLinus Torvalds 		return 0;
19581da177e4SLinus Torvalds 
19591da177e4SLinus Torvalds 	/* For administrative MTU increase, there is no way to discover
19601da177e4SLinus Torvalds 	   IPv6 PMTU increase, so PMTU increase should be updated here.
19611da177e4SLinus Torvalds 	   Since RFC 1981 doesn't include administrative MTU increase
19621da177e4SLinus Torvalds 	   update PMTU increase is a MUST. (i.e. jumbo frame)
19631da177e4SLinus Torvalds 	 */
19641da177e4SLinus Torvalds 	/*
19651da177e4SLinus Torvalds 	   If new MTU is less than route PMTU, this new MTU will be the
19661da177e4SLinus Torvalds 	   lowest MTU in the path, update the route PMTU to reflect PMTU
19671da177e4SLinus Torvalds 	   decreases; if new MTU is greater than route PMTU, and the
19681da177e4SLinus Torvalds 	   old MTU is the lowest MTU in the path, update the route PMTU
19691da177e4SLinus Torvalds 	   to reflect the increase. In this case if the other nodes' MTU
19701da177e4SLinus Torvalds 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
19711da177e4SLinus Torvalds 	   PMTU discouvery.
19721da177e4SLinus Torvalds 	 */
19731da177e4SLinus Torvalds 	if (rt->rt6i_dev == arg->dev &&
19741da177e4SLinus Torvalds 	    !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
197523717795SJim Paris 	    (dst_mtu(&rt->u.dst) >= arg->mtu ||
19761da177e4SLinus Torvalds 	     (dst_mtu(&rt->u.dst) < arg->mtu &&
1977566cfd8fSSimon Arlott 	      dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) {
19781da177e4SLinus Torvalds 		rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
19795578689aSDaniel Lezcano 		rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu);
1980566cfd8fSSimon Arlott 	}
19811da177e4SLinus Torvalds 	return 0;
19821da177e4SLinus Torvalds }
19831da177e4SLinus Torvalds 
19841da177e4SLinus Torvalds void rt6_mtu_change(struct net_device *dev, unsigned mtu)
19851da177e4SLinus Torvalds {
1986c71099acSThomas Graf 	struct rt6_mtu_change_arg arg = {
1987c71099acSThomas Graf 		.dev = dev,
1988c71099acSThomas Graf 		.mtu = mtu,
1989c71099acSThomas Graf 	};
19901da177e4SLinus Torvalds 
1991c346dca1SYOSHIFUJI Hideaki 	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
19921da177e4SLinus Torvalds }
19931da177e4SLinus Torvalds 
1994ef7c79edSPatrick McHardy static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
19955176f91eSThomas Graf 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
199686872cb5SThomas Graf 	[RTA_OIF]               = { .type = NLA_U32 },
1997ab364a6fSThomas Graf 	[RTA_IIF]		= { .type = NLA_U32 },
199886872cb5SThomas Graf 	[RTA_PRIORITY]          = { .type = NLA_U32 },
199986872cb5SThomas Graf 	[RTA_METRICS]           = { .type = NLA_NESTED },
200086872cb5SThomas Graf };
200186872cb5SThomas Graf 
200286872cb5SThomas Graf static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
200386872cb5SThomas Graf 			      struct fib6_config *cfg)
20041da177e4SLinus Torvalds {
200586872cb5SThomas Graf 	struct rtmsg *rtm;
200686872cb5SThomas Graf 	struct nlattr *tb[RTA_MAX+1];
200786872cb5SThomas Graf 	int err;
20081da177e4SLinus Torvalds 
200986872cb5SThomas Graf 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
201086872cb5SThomas Graf 	if (err < 0)
201186872cb5SThomas Graf 		goto errout;
20121da177e4SLinus Torvalds 
201386872cb5SThomas Graf 	err = -EINVAL;
201486872cb5SThomas Graf 	rtm = nlmsg_data(nlh);
201586872cb5SThomas Graf 	memset(cfg, 0, sizeof(*cfg));
201686872cb5SThomas Graf 
201786872cb5SThomas Graf 	cfg->fc_table = rtm->rtm_table;
201886872cb5SThomas Graf 	cfg->fc_dst_len = rtm->rtm_dst_len;
201986872cb5SThomas Graf 	cfg->fc_src_len = rtm->rtm_src_len;
202086872cb5SThomas Graf 	cfg->fc_flags = RTF_UP;
202186872cb5SThomas Graf 	cfg->fc_protocol = rtm->rtm_protocol;
202286872cb5SThomas Graf 
202386872cb5SThomas Graf 	if (rtm->rtm_type == RTN_UNREACHABLE)
202486872cb5SThomas Graf 		cfg->fc_flags |= RTF_REJECT;
202586872cb5SThomas Graf 
202686872cb5SThomas Graf 	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
202786872cb5SThomas Graf 	cfg->fc_nlinfo.nlh = nlh;
20283b1e0a65SYOSHIFUJI Hideaki 	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
202986872cb5SThomas Graf 
203086872cb5SThomas Graf 	if (tb[RTA_GATEWAY]) {
203186872cb5SThomas Graf 		nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
203286872cb5SThomas Graf 		cfg->fc_flags |= RTF_GATEWAY;
20331da177e4SLinus Torvalds 	}
203486872cb5SThomas Graf 
203586872cb5SThomas Graf 	if (tb[RTA_DST]) {
203686872cb5SThomas Graf 		int plen = (rtm->rtm_dst_len + 7) >> 3;
203786872cb5SThomas Graf 
203886872cb5SThomas Graf 		if (nla_len(tb[RTA_DST]) < plen)
203986872cb5SThomas Graf 			goto errout;
204086872cb5SThomas Graf 
204186872cb5SThomas Graf 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
20421da177e4SLinus Torvalds 	}
204386872cb5SThomas Graf 
204486872cb5SThomas Graf 	if (tb[RTA_SRC]) {
204586872cb5SThomas Graf 		int plen = (rtm->rtm_src_len + 7) >> 3;
204686872cb5SThomas Graf 
204786872cb5SThomas Graf 		if (nla_len(tb[RTA_SRC]) < plen)
204886872cb5SThomas Graf 			goto errout;
204986872cb5SThomas Graf 
205086872cb5SThomas Graf 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
20511da177e4SLinus Torvalds 	}
205286872cb5SThomas Graf 
205386872cb5SThomas Graf 	if (tb[RTA_OIF])
205486872cb5SThomas Graf 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
205586872cb5SThomas Graf 
205686872cb5SThomas Graf 	if (tb[RTA_PRIORITY])
205786872cb5SThomas Graf 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
205886872cb5SThomas Graf 
205986872cb5SThomas Graf 	if (tb[RTA_METRICS]) {
206086872cb5SThomas Graf 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
206186872cb5SThomas Graf 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
20621da177e4SLinus Torvalds 	}
206386872cb5SThomas Graf 
206486872cb5SThomas Graf 	if (tb[RTA_TABLE])
206586872cb5SThomas Graf 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
206686872cb5SThomas Graf 
206786872cb5SThomas Graf 	err = 0;
206886872cb5SThomas Graf errout:
206986872cb5SThomas Graf 	return err;
20701da177e4SLinus Torvalds }
20711da177e4SLinus Torvalds 
2072c127ea2cSThomas Graf static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
20731da177e4SLinus Torvalds {
207486872cb5SThomas Graf 	struct fib6_config cfg;
207586872cb5SThomas Graf 	int err;
20761da177e4SLinus Torvalds 
207786872cb5SThomas Graf 	err = rtm_to_fib6_config(skb, nlh, &cfg);
207886872cb5SThomas Graf 	if (err < 0)
207986872cb5SThomas Graf 		return err;
208086872cb5SThomas Graf 
208186872cb5SThomas Graf 	return ip6_route_del(&cfg);
20821da177e4SLinus Torvalds }
20831da177e4SLinus Torvalds 
2084c127ea2cSThomas Graf static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
20851da177e4SLinus Torvalds {
208686872cb5SThomas Graf 	struct fib6_config cfg;
208786872cb5SThomas Graf 	int err;
20881da177e4SLinus Torvalds 
208986872cb5SThomas Graf 	err = rtm_to_fib6_config(skb, nlh, &cfg);
209086872cb5SThomas Graf 	if (err < 0)
209186872cb5SThomas Graf 		return err;
209286872cb5SThomas Graf 
209386872cb5SThomas Graf 	return ip6_route_add(&cfg);
20941da177e4SLinus Torvalds }
20951da177e4SLinus Torvalds 
2096339bf98fSThomas Graf static inline size_t rt6_nlmsg_size(void)
2097339bf98fSThomas Graf {
2098339bf98fSThomas Graf 	return NLMSG_ALIGN(sizeof(struct rtmsg))
2099339bf98fSThomas Graf 	       + nla_total_size(16) /* RTA_SRC */
2100339bf98fSThomas Graf 	       + nla_total_size(16) /* RTA_DST */
2101339bf98fSThomas Graf 	       + nla_total_size(16) /* RTA_GATEWAY */
2102339bf98fSThomas Graf 	       + nla_total_size(16) /* RTA_PREFSRC */
2103339bf98fSThomas Graf 	       + nla_total_size(4) /* RTA_TABLE */
2104339bf98fSThomas Graf 	       + nla_total_size(4) /* RTA_IIF */
2105339bf98fSThomas Graf 	       + nla_total_size(4) /* RTA_OIF */
2106339bf98fSThomas Graf 	       + nla_total_size(4) /* RTA_PRIORITY */
21076a2b9ce0SNoriaki TAKAMIYA 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2108339bf98fSThomas Graf 	       + nla_total_size(sizeof(struct rta_cacheinfo));
2109339bf98fSThomas Graf }
2110339bf98fSThomas Graf 
21111da177e4SLinus Torvalds static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
21120d51aa80SJamal Hadi Salim 			 struct in6_addr *dst, struct in6_addr *src,
21130d51aa80SJamal Hadi Salim 			 int iif, int type, u32 pid, u32 seq,
21147bc570c8SYOSHIFUJI Hideaki 			 int prefix, int nowait, unsigned int flags)
21151da177e4SLinus Torvalds {
21161da177e4SLinus Torvalds 	struct rtmsg *rtm;
21171da177e4SLinus Torvalds 	struct nlmsghdr *nlh;
2118e3703b3dSThomas Graf 	long expires;
21199e762a4aSPatrick McHardy 	u32 table;
21201da177e4SLinus Torvalds 
21211da177e4SLinus Torvalds 	if (prefix) {	/* user wants prefix routes only */
21221da177e4SLinus Torvalds 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
21231da177e4SLinus Torvalds 			/* success since this is not a prefix route */
21241da177e4SLinus Torvalds 			return 1;
21251da177e4SLinus Torvalds 		}
21261da177e4SLinus Torvalds 	}
21271da177e4SLinus Torvalds 
21282d7202bfSThomas Graf 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
21292d7202bfSThomas Graf 	if (nlh == NULL)
213026932566SPatrick McHardy 		return -EMSGSIZE;
21312d7202bfSThomas Graf 
21322d7202bfSThomas Graf 	rtm = nlmsg_data(nlh);
21331da177e4SLinus Torvalds 	rtm->rtm_family = AF_INET6;
21341da177e4SLinus Torvalds 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
21351da177e4SLinus Torvalds 	rtm->rtm_src_len = rt->rt6i_src.plen;
21361da177e4SLinus Torvalds 	rtm->rtm_tos = 0;
2137c71099acSThomas Graf 	if (rt->rt6i_table)
21389e762a4aSPatrick McHardy 		table = rt->rt6i_table->tb6_id;
2139c71099acSThomas Graf 	else
21409e762a4aSPatrick McHardy 		table = RT6_TABLE_UNSPEC;
21419e762a4aSPatrick McHardy 	rtm->rtm_table = table;
21422d7202bfSThomas Graf 	NLA_PUT_U32(skb, RTA_TABLE, table);
21431da177e4SLinus Torvalds 	if (rt->rt6i_flags&RTF_REJECT)
21441da177e4SLinus Torvalds 		rtm->rtm_type = RTN_UNREACHABLE;
21451da177e4SLinus Torvalds 	else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
21461da177e4SLinus Torvalds 		rtm->rtm_type = RTN_LOCAL;
21471da177e4SLinus Torvalds 	else
21481da177e4SLinus Torvalds 		rtm->rtm_type = RTN_UNICAST;
21491da177e4SLinus Torvalds 	rtm->rtm_flags = 0;
21501da177e4SLinus Torvalds 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
21511da177e4SLinus Torvalds 	rtm->rtm_protocol = rt->rt6i_protocol;
21521da177e4SLinus Torvalds 	if (rt->rt6i_flags&RTF_DYNAMIC)
21531da177e4SLinus Torvalds 		rtm->rtm_protocol = RTPROT_REDIRECT;
21541da177e4SLinus Torvalds 	else if (rt->rt6i_flags & RTF_ADDRCONF)
21551da177e4SLinus Torvalds 		rtm->rtm_protocol = RTPROT_KERNEL;
21561da177e4SLinus Torvalds 	else if (rt->rt6i_flags&RTF_DEFAULT)
21571da177e4SLinus Torvalds 		rtm->rtm_protocol = RTPROT_RA;
21581da177e4SLinus Torvalds 
21591da177e4SLinus Torvalds 	if (rt->rt6i_flags&RTF_CACHE)
21601da177e4SLinus Torvalds 		rtm->rtm_flags |= RTM_F_CLONED;
21611da177e4SLinus Torvalds 
21621da177e4SLinus Torvalds 	if (dst) {
21632d7202bfSThomas Graf 		NLA_PUT(skb, RTA_DST, 16, dst);
21641da177e4SLinus Torvalds 		rtm->rtm_dst_len = 128;
21651da177e4SLinus Torvalds 	} else if (rtm->rtm_dst_len)
21662d7202bfSThomas Graf 		NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
21671da177e4SLinus Torvalds #ifdef CONFIG_IPV6_SUBTREES
21681da177e4SLinus Torvalds 	if (src) {
21692d7202bfSThomas Graf 		NLA_PUT(skb, RTA_SRC, 16, src);
21701da177e4SLinus Torvalds 		rtm->rtm_src_len = 128;
21711da177e4SLinus Torvalds 	} else if (rtm->rtm_src_len)
21722d7202bfSThomas Graf 		NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
21731da177e4SLinus Torvalds #endif
21747bc570c8SYOSHIFUJI Hideaki 	if (iif) {
21757bc570c8SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_MROUTE
21767bc570c8SYOSHIFUJI Hideaki 		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
21777bc570c8SYOSHIFUJI Hideaki 			int err = ip6mr_get_route(skb, rtm, nowait);
21787bc570c8SYOSHIFUJI Hideaki 			if (err <= 0) {
21797bc570c8SYOSHIFUJI Hideaki 				if (!nowait) {
21807bc570c8SYOSHIFUJI Hideaki 					if (err == 0)
21817bc570c8SYOSHIFUJI Hideaki 						return 0;
21827bc570c8SYOSHIFUJI Hideaki 					goto nla_put_failure;
21837bc570c8SYOSHIFUJI Hideaki 				} else {
21847bc570c8SYOSHIFUJI Hideaki 					if (err == -EMSGSIZE)
21857bc570c8SYOSHIFUJI Hideaki 						goto nla_put_failure;
21867bc570c8SYOSHIFUJI Hideaki 				}
21877bc570c8SYOSHIFUJI Hideaki 			}
21887bc570c8SYOSHIFUJI Hideaki 		} else
21897bc570c8SYOSHIFUJI Hideaki #endif
21902d7202bfSThomas Graf 			NLA_PUT_U32(skb, RTA_IIF, iif);
21917bc570c8SYOSHIFUJI Hideaki 	} else if (dst) {
21921da177e4SLinus Torvalds 		struct in6_addr saddr_buf;
21935e5f3f0fSYOSHIFUJI Hideaki 		if (ipv6_dev_get_saddr(ip6_dst_idev(&rt->u.dst)->dev,
21947cbca67cSYOSHIFUJI Hideaki 				       dst, 0, &saddr_buf) == 0)
21952d7202bfSThomas Graf 			NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
21961da177e4SLinus Torvalds 	}
21972d7202bfSThomas Graf 
21981da177e4SLinus Torvalds 	if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
21992d7202bfSThomas Graf 		goto nla_put_failure;
22002d7202bfSThomas Graf 
22011da177e4SLinus Torvalds 	if (rt->u.dst.neighbour)
22022d7202bfSThomas Graf 		NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
22032d7202bfSThomas Graf 
22041da177e4SLinus Torvalds 	if (rt->u.dst.dev)
22052d7202bfSThomas Graf 		NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
22062d7202bfSThomas Graf 
22072d7202bfSThomas Graf 	NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2208e3703b3dSThomas Graf 
220936e3deaeSYOSHIFUJI Hideaki 	if (!(rt->rt6i_flags & RTF_EXPIRES))
221036e3deaeSYOSHIFUJI Hideaki 		expires = 0;
221136e3deaeSYOSHIFUJI Hideaki 	else if (rt->rt6i_expires - jiffies < INT_MAX)
221236e3deaeSYOSHIFUJI Hideaki 		expires = rt->rt6i_expires - jiffies;
221336e3deaeSYOSHIFUJI Hideaki 	else
221436e3deaeSYOSHIFUJI Hideaki 		expires = INT_MAX;
221569cdf8f9SYOSHIFUJI Hideaki 
2216e3703b3dSThomas Graf 	if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
2217e3703b3dSThomas Graf 			       expires, rt->u.dst.error) < 0)
2218e3703b3dSThomas Graf 		goto nla_put_failure;
22191da177e4SLinus Torvalds 
22202d7202bfSThomas Graf 	return nlmsg_end(skb, nlh);
22212d7202bfSThomas Graf 
22222d7202bfSThomas Graf nla_put_failure:
222326932566SPatrick McHardy 	nlmsg_cancel(skb, nlh);
222426932566SPatrick McHardy 	return -EMSGSIZE;
22251da177e4SLinus Torvalds }
22261da177e4SLinus Torvalds 
22271b43af54SPatrick McHardy int rt6_dump_route(struct rt6_info *rt, void *p_arg)
22281da177e4SLinus Torvalds {
22291da177e4SLinus Torvalds 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
22301da177e4SLinus Torvalds 	int prefix;
22311da177e4SLinus Torvalds 
22322d7202bfSThomas Graf 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
22332d7202bfSThomas Graf 		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
22341da177e4SLinus Torvalds 		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
22351da177e4SLinus Torvalds 	} else
22361da177e4SLinus Torvalds 		prefix = 0;
22371da177e4SLinus Torvalds 
22381da177e4SLinus Torvalds 	return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
22391da177e4SLinus Torvalds 		     NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
22407bc570c8SYOSHIFUJI Hideaki 		     prefix, 0, NLM_F_MULTI);
22411da177e4SLinus Torvalds }
22421da177e4SLinus Torvalds 
2243c127ea2cSThomas Graf static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
22441da177e4SLinus Torvalds {
22453b1e0a65SYOSHIFUJI Hideaki 	struct net *net = sock_net(in_skb->sk);
2246ab364a6fSThomas Graf 	struct nlattr *tb[RTA_MAX+1];
22471da177e4SLinus Torvalds 	struct rt6_info *rt;
2248ab364a6fSThomas Graf 	struct sk_buff *skb;
2249ab364a6fSThomas Graf 	struct rtmsg *rtm;
2250ab364a6fSThomas Graf 	struct flowi fl;
2251ab364a6fSThomas Graf 	int err, iif = 0;
2252ab364a6fSThomas Graf 
2253ab364a6fSThomas Graf 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2254ab364a6fSThomas Graf 	if (err < 0)
2255ab364a6fSThomas Graf 		goto errout;
2256ab364a6fSThomas Graf 
2257ab364a6fSThomas Graf 	err = -EINVAL;
2258ab364a6fSThomas Graf 	memset(&fl, 0, sizeof(fl));
2259ab364a6fSThomas Graf 
2260ab364a6fSThomas Graf 	if (tb[RTA_SRC]) {
2261ab364a6fSThomas Graf 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2262ab364a6fSThomas Graf 			goto errout;
2263ab364a6fSThomas Graf 
2264ab364a6fSThomas Graf 		ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2265ab364a6fSThomas Graf 	}
2266ab364a6fSThomas Graf 
2267ab364a6fSThomas Graf 	if (tb[RTA_DST]) {
2268ab364a6fSThomas Graf 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2269ab364a6fSThomas Graf 			goto errout;
2270ab364a6fSThomas Graf 
2271ab364a6fSThomas Graf 		ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2272ab364a6fSThomas Graf 	}
2273ab364a6fSThomas Graf 
2274ab364a6fSThomas Graf 	if (tb[RTA_IIF])
2275ab364a6fSThomas Graf 		iif = nla_get_u32(tb[RTA_IIF]);
2276ab364a6fSThomas Graf 
2277ab364a6fSThomas Graf 	if (tb[RTA_OIF])
2278ab364a6fSThomas Graf 		fl.oif = nla_get_u32(tb[RTA_OIF]);
2279ab364a6fSThomas Graf 
2280ab364a6fSThomas Graf 	if (iif) {
2281ab364a6fSThomas Graf 		struct net_device *dev;
22825578689aSDaniel Lezcano 		dev = __dev_get_by_index(net, iif);
2283ab364a6fSThomas Graf 		if (!dev) {
2284ab364a6fSThomas Graf 			err = -ENODEV;
2285ab364a6fSThomas Graf 			goto errout;
2286ab364a6fSThomas Graf 		}
2287ab364a6fSThomas Graf 	}
22881da177e4SLinus Torvalds 
22891da177e4SLinus Torvalds 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2290ab364a6fSThomas Graf 	if (skb == NULL) {
2291ab364a6fSThomas Graf 		err = -ENOBUFS;
2292ab364a6fSThomas Graf 		goto errout;
2293ab364a6fSThomas Graf 	}
22941da177e4SLinus Torvalds 
22951da177e4SLinus Torvalds 	/* Reserve room for dummy headers, this skb can pass
22961da177e4SLinus Torvalds 	   through good chunk of routing engine.
22971da177e4SLinus Torvalds 	 */
2298459a98edSArnaldo Carvalho de Melo 	skb_reset_mac_header(skb);
22991da177e4SLinus Torvalds 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
23001da177e4SLinus Torvalds 
23018a3edd80SDaniel Lezcano 	rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl);
23021da177e4SLinus Torvalds 	skb->dst = &rt->u.dst;
23031da177e4SLinus Torvalds 
2304ab364a6fSThomas Graf 	err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
23051da177e4SLinus Torvalds 			    RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
23067bc570c8SYOSHIFUJI Hideaki 			    nlh->nlmsg_seq, 0, 0, 0);
23071da177e4SLinus Torvalds 	if (err < 0) {
2308ab364a6fSThomas Graf 		kfree_skb(skb);
2309ab364a6fSThomas Graf 		goto errout;
23101da177e4SLinus Torvalds 	}
23111da177e4SLinus Torvalds 
23125578689aSDaniel Lezcano 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2313ab364a6fSThomas Graf errout:
23141da177e4SLinus Torvalds 	return err;
23151da177e4SLinus Torvalds }
23161da177e4SLinus Torvalds 
231786872cb5SThomas Graf void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
23181da177e4SLinus Torvalds {
23191da177e4SLinus Torvalds 	struct sk_buff *skb;
23205578689aSDaniel Lezcano 	struct net *net = info->nl_net;
2321528c4cebSDenis V. Lunev 	u32 seq;
2322528c4cebSDenis V. Lunev 	int err;
23230d51aa80SJamal Hadi Salim 
2324528c4cebSDenis V. Lunev 	err = -ENOBUFS;
2325528c4cebSDenis V. Lunev 	seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
232686872cb5SThomas Graf 
2327339bf98fSThomas Graf 	skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
232821713ebcSThomas Graf 	if (skb == NULL)
232921713ebcSThomas Graf 		goto errout;
23301da177e4SLinus Torvalds 
2331528c4cebSDenis V. Lunev 	err = rt6_fill_node(skb, rt, NULL, NULL, 0,
23327bc570c8SYOSHIFUJI Hideaki 				event, info->pid, seq, 0, 0, 0);
233326932566SPatrick McHardy 	if (err < 0) {
233426932566SPatrick McHardy 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
233526932566SPatrick McHardy 		WARN_ON(err == -EMSGSIZE);
233626932566SPatrick McHardy 		kfree_skb(skb);
233726932566SPatrick McHardy 		goto errout;
233826932566SPatrick McHardy 	}
23395578689aSDaniel Lezcano 	err = rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
23405578689aSDaniel Lezcano 			  info->nlh, gfp_any());
234121713ebcSThomas Graf errout:
234221713ebcSThomas Graf 	if (err < 0)
23435578689aSDaniel Lezcano 		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
23441da177e4SLinus Torvalds }
23451da177e4SLinus Torvalds 
23468ed67789SDaniel Lezcano static int ip6_route_dev_notify(struct notifier_block *this,
23478ed67789SDaniel Lezcano 				unsigned long event, void *data)
23488ed67789SDaniel Lezcano {
23498ed67789SDaniel Lezcano 	struct net_device *dev = (struct net_device *)data;
2350c346dca1SYOSHIFUJI Hideaki 	struct net *net = dev_net(dev);
23518ed67789SDaniel Lezcano 
23528ed67789SDaniel Lezcano 	if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
23538ed67789SDaniel Lezcano 		net->ipv6.ip6_null_entry->u.dst.dev = dev;
23548ed67789SDaniel Lezcano 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
23558ed67789SDaniel Lezcano #ifdef CONFIG_IPV6_MULTIPLE_TABLES
23568ed67789SDaniel Lezcano 		net->ipv6.ip6_prohibit_entry->u.dst.dev = dev;
23578ed67789SDaniel Lezcano 		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
23588ed67789SDaniel Lezcano 		net->ipv6.ip6_blk_hole_entry->u.dst.dev = dev;
23598ed67789SDaniel Lezcano 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
23608ed67789SDaniel Lezcano #endif
23618ed67789SDaniel Lezcano 	}
23628ed67789SDaniel Lezcano 
23638ed67789SDaniel Lezcano 	return NOTIFY_OK;
23648ed67789SDaniel Lezcano }
23658ed67789SDaniel Lezcano 
23661da177e4SLinus Torvalds /*
23671da177e4SLinus Torvalds  *	/proc
23681da177e4SLinus Torvalds  */
23691da177e4SLinus Torvalds 
23701da177e4SLinus Torvalds #ifdef CONFIG_PROC_FS
23711da177e4SLinus Torvalds 
23721da177e4SLinus Torvalds #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
23731da177e4SLinus Torvalds 
23741da177e4SLinus Torvalds struct rt6_proc_arg
23751da177e4SLinus Torvalds {
23761da177e4SLinus Torvalds 	char *buffer;
23771da177e4SLinus Torvalds 	int offset;
23781da177e4SLinus Torvalds 	int length;
23791da177e4SLinus Torvalds 	int skip;
23801da177e4SLinus Torvalds 	int len;
23811da177e4SLinus Torvalds };
23821da177e4SLinus Torvalds 
23831da177e4SLinus Torvalds static int rt6_info_route(struct rt6_info *rt, void *p_arg)
23841da177e4SLinus Torvalds {
238533120b30SAlexey Dobriyan 	struct seq_file *m = p_arg;
23861da177e4SLinus Torvalds 
238733120b30SAlexey Dobriyan 	seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_dst.addr),
23881da177e4SLinus Torvalds 		   rt->rt6i_dst.plen);
23891da177e4SLinus Torvalds 
23901da177e4SLinus Torvalds #ifdef CONFIG_IPV6_SUBTREES
239133120b30SAlexey Dobriyan 	seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_src.addr),
23921da177e4SLinus Torvalds 		   rt->rt6i_src.plen);
23931da177e4SLinus Torvalds #else
239433120b30SAlexey Dobriyan 	seq_puts(m, "00000000000000000000000000000000 00 ");
23951da177e4SLinus Torvalds #endif
23961da177e4SLinus Torvalds 
23971da177e4SLinus Torvalds 	if (rt->rt6i_nexthop) {
239833120b30SAlexey Dobriyan 		seq_printf(m, NIP6_SEQFMT,
239933e93c96SYOSHIFUJI Hideaki 			   NIP6(*((struct in6_addr *)rt->rt6i_nexthop->primary_key)));
24001da177e4SLinus Torvalds 	} else {
240133120b30SAlexey Dobriyan 		seq_puts(m, "00000000000000000000000000000000");
24021da177e4SLinus Torvalds 	}
240333120b30SAlexey Dobriyan 	seq_printf(m, " %08x %08x %08x %08x %8s\n",
24041da177e4SLinus Torvalds 		   rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
24051da177e4SLinus Torvalds 		   rt->u.dst.__use, rt->rt6i_flags,
24061da177e4SLinus Torvalds 		   rt->rt6i_dev ? rt->rt6i_dev->name : "");
24071da177e4SLinus Torvalds 	return 0;
24081da177e4SLinus Torvalds }
24091da177e4SLinus Torvalds 
241033120b30SAlexey Dobriyan static int ipv6_route_show(struct seq_file *m, void *v)
24111da177e4SLinus Torvalds {
2412f3db4851SDaniel Lezcano 	struct net *net = (struct net *)m->private;
2413f3db4851SDaniel Lezcano 	fib6_clean_all(net, rt6_info_route, 0, m);
241433120b30SAlexey Dobriyan 	return 0;
24151da177e4SLinus Torvalds }
24161da177e4SLinus Torvalds 
241733120b30SAlexey Dobriyan static int ipv6_route_open(struct inode *inode, struct file *file)
241833120b30SAlexey Dobriyan {
2419a2333525SPavel Emelyanov 	int err;
2420f3db4851SDaniel Lezcano 	struct net *net = get_proc_net(inode);
2421f3db4851SDaniel Lezcano 	if (!net)
2422f3db4851SDaniel Lezcano 		return -ENXIO;
2423a2333525SPavel Emelyanov 
2424a2333525SPavel Emelyanov 	err = single_open(file, ipv6_route_show, net);
2425a2333525SPavel Emelyanov 	if (err < 0) {
2426a2333525SPavel Emelyanov 		put_net(net);
2427a2333525SPavel Emelyanov 		return err;
2428a2333525SPavel Emelyanov 	}
2429a2333525SPavel Emelyanov 
2430a2333525SPavel Emelyanov 	return 0;
2431f3db4851SDaniel Lezcano }
2432f3db4851SDaniel Lezcano 
2433f3db4851SDaniel Lezcano static int ipv6_route_release(struct inode *inode, struct file *file)
2434f3db4851SDaniel Lezcano {
2435f3db4851SDaniel Lezcano 	struct seq_file *seq = file->private_data;
2436f3db4851SDaniel Lezcano 	struct net *net = seq->private;
2437f3db4851SDaniel Lezcano 	put_net(net);
2438f3db4851SDaniel Lezcano 	return single_release(inode, file);
243933120b30SAlexey Dobriyan }
244033120b30SAlexey Dobriyan 
244133120b30SAlexey Dobriyan static const struct file_operations ipv6_route_proc_fops = {
244233120b30SAlexey Dobriyan 	.owner		= THIS_MODULE,
244333120b30SAlexey Dobriyan 	.open		= ipv6_route_open,
244433120b30SAlexey Dobriyan 	.read		= seq_read,
244533120b30SAlexey Dobriyan 	.llseek		= seq_lseek,
2446f3db4851SDaniel Lezcano 	.release	= ipv6_route_release,
244733120b30SAlexey Dobriyan };
244833120b30SAlexey Dobriyan 
24491da177e4SLinus Torvalds static int rt6_stats_seq_show(struct seq_file *seq, void *v)
24501da177e4SLinus Torvalds {
245169ddb805SDaniel Lezcano 	struct net *net = (struct net *)seq->private;
24521da177e4SLinus Torvalds 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
245369ddb805SDaniel Lezcano 		   net->ipv6.rt6_stats->fib_nodes,
245469ddb805SDaniel Lezcano 		   net->ipv6.rt6_stats->fib_route_nodes,
245569ddb805SDaniel Lezcano 		   net->ipv6.rt6_stats->fib_rt_alloc,
245669ddb805SDaniel Lezcano 		   net->ipv6.rt6_stats->fib_rt_entries,
245769ddb805SDaniel Lezcano 		   net->ipv6.rt6_stats->fib_rt_cache,
2458f2fc6a54SBenjamin Thery 		   atomic_read(&net->ipv6.ip6_dst_ops->entries),
245969ddb805SDaniel Lezcano 		   net->ipv6.rt6_stats->fib_discarded_routes);
24601da177e4SLinus Torvalds 
24611da177e4SLinus Torvalds 	return 0;
24621da177e4SLinus Torvalds }
24631da177e4SLinus Torvalds 
24641da177e4SLinus Torvalds static int rt6_stats_seq_open(struct inode *inode, struct file *file)
24651da177e4SLinus Torvalds {
2466a2333525SPavel Emelyanov 	int err;
246769ddb805SDaniel Lezcano 	struct net *net = get_proc_net(inode);
2468a2333525SPavel Emelyanov 	if (!net)
2469a2333525SPavel Emelyanov 		return -ENXIO;
2470a2333525SPavel Emelyanov 
2471a2333525SPavel Emelyanov 	err = single_open(file, rt6_stats_seq_show, net);
2472a2333525SPavel Emelyanov 	if (err < 0) {
2473a2333525SPavel Emelyanov 		put_net(net);
2474a2333525SPavel Emelyanov 		return err;
2475a2333525SPavel Emelyanov 	}
2476a2333525SPavel Emelyanov 
2477a2333525SPavel Emelyanov 	return 0;
247869ddb805SDaniel Lezcano }
247969ddb805SDaniel Lezcano 
248069ddb805SDaniel Lezcano static int rt6_stats_seq_release(struct inode *inode, struct file *file)
248169ddb805SDaniel Lezcano {
248269ddb805SDaniel Lezcano 	struct seq_file *seq = file->private_data;
248369ddb805SDaniel Lezcano 	struct net *net = (struct net *)seq->private;
248469ddb805SDaniel Lezcano 	put_net(net);
248569ddb805SDaniel Lezcano 	return single_release(inode, file);
24861da177e4SLinus Torvalds }
24871da177e4SLinus Torvalds 
24889a32144eSArjan van de Ven static const struct file_operations rt6_stats_seq_fops = {
24891da177e4SLinus Torvalds 	.owner	 = THIS_MODULE,
24901da177e4SLinus Torvalds 	.open	 = rt6_stats_seq_open,
24911da177e4SLinus Torvalds 	.read	 = seq_read,
24921da177e4SLinus Torvalds 	.llseek	 = seq_lseek,
249369ddb805SDaniel Lezcano 	.release = rt6_stats_seq_release,
24941da177e4SLinus Torvalds };
24951da177e4SLinus Torvalds #endif	/* CONFIG_PROC_FS */
24961da177e4SLinus Torvalds 
24971da177e4SLinus Torvalds #ifdef CONFIG_SYSCTL
24981da177e4SLinus Torvalds 
24991da177e4SLinus Torvalds static
25001da177e4SLinus Torvalds int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
25011da177e4SLinus Torvalds 			      void __user *buffer, size_t *lenp, loff_t *ppos)
25021da177e4SLinus Torvalds {
25035b7c931dSDaniel Lezcano 	struct net *net = current->nsproxy->net_ns;
25045b7c931dSDaniel Lezcano 	int delay = net->ipv6.sysctl.flush_delay;
25051da177e4SLinus Torvalds 	if (write) {
25061da177e4SLinus Torvalds 		proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
25075b7c931dSDaniel Lezcano 		fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
25081da177e4SLinus Torvalds 		return 0;
25091da177e4SLinus Torvalds 	} else
25101da177e4SLinus Torvalds 		return -EINVAL;
25111da177e4SLinus Torvalds }
25121da177e4SLinus Torvalds 
2513760f2d01SDaniel Lezcano ctl_table ipv6_route_table_template[] = {
25141da177e4SLinus Torvalds 	{
25151da177e4SLinus Torvalds 		.procname	=	"flush",
25164990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.flush_delay,
25171da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
251889c8b3a1SDave Jones 		.mode		=	0200,
25191da177e4SLinus Torvalds 		.proc_handler	=	&ipv6_sysctl_rtcache_flush
25201da177e4SLinus Torvalds 	},
25211da177e4SLinus Torvalds 	{
25221da177e4SLinus Torvalds 		.ctl_name	=	NET_IPV6_ROUTE_GC_THRESH,
25231da177e4SLinus Torvalds 		.procname	=	"gc_thresh",
25249a7ec3a9SDaniel Lezcano 		.data		=	&ip6_dst_ops_template.gc_thresh,
25251da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
25261da177e4SLinus Torvalds 		.mode		=	0644,
25271da177e4SLinus Torvalds 		.proc_handler	=	&proc_dointvec,
25281da177e4SLinus Torvalds 	},
25291da177e4SLinus Torvalds 	{
25301da177e4SLinus Torvalds 		.ctl_name	=	NET_IPV6_ROUTE_MAX_SIZE,
25311da177e4SLinus Torvalds 		.procname	=	"max_size",
25324990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
25331da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
25341da177e4SLinus Torvalds 		.mode		=	0644,
25351da177e4SLinus Torvalds 		.proc_handler	=	&proc_dointvec,
25361da177e4SLinus Torvalds 	},
25371da177e4SLinus Torvalds 	{
25381da177e4SLinus Torvalds 		.ctl_name	=	NET_IPV6_ROUTE_GC_MIN_INTERVAL,
25391da177e4SLinus Torvalds 		.procname	=	"gc_min_interval",
25404990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
25411da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
25421da177e4SLinus Torvalds 		.mode		=	0644,
25431da177e4SLinus Torvalds 		.proc_handler	=	&proc_dointvec_jiffies,
25441da177e4SLinus Torvalds 		.strategy	=	&sysctl_jiffies,
25451da177e4SLinus Torvalds 	},
25461da177e4SLinus Torvalds 	{
25471da177e4SLinus Torvalds 		.ctl_name	=	NET_IPV6_ROUTE_GC_TIMEOUT,
25481da177e4SLinus Torvalds 		.procname	=	"gc_timeout",
25494990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
25501da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
25511da177e4SLinus Torvalds 		.mode		=	0644,
25521da177e4SLinus Torvalds 		.proc_handler	=	&proc_dointvec_jiffies,
25531da177e4SLinus Torvalds 		.strategy	=	&sysctl_jiffies,
25541da177e4SLinus Torvalds 	},
25551da177e4SLinus Torvalds 	{
25561da177e4SLinus Torvalds 		.ctl_name	=	NET_IPV6_ROUTE_GC_INTERVAL,
25571da177e4SLinus Torvalds 		.procname	=	"gc_interval",
25584990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
25591da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
25601da177e4SLinus Torvalds 		.mode		=	0644,
25611da177e4SLinus Torvalds 		.proc_handler	=	&proc_dointvec_jiffies,
25621da177e4SLinus Torvalds 		.strategy	=	&sysctl_jiffies,
25631da177e4SLinus Torvalds 	},
25641da177e4SLinus Torvalds 	{
25651da177e4SLinus Torvalds 		.ctl_name	=	NET_IPV6_ROUTE_GC_ELASTICITY,
25661da177e4SLinus Torvalds 		.procname	=	"gc_elasticity",
25674990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
25681da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
25691da177e4SLinus Torvalds 		.mode		=	0644,
25701da177e4SLinus Torvalds 		.proc_handler	=	&proc_dointvec_jiffies,
25711da177e4SLinus Torvalds 		.strategy	=	&sysctl_jiffies,
25721da177e4SLinus Torvalds 	},
25731da177e4SLinus Torvalds 	{
25741da177e4SLinus Torvalds 		.ctl_name	=	NET_IPV6_ROUTE_MTU_EXPIRES,
25751da177e4SLinus Torvalds 		.procname	=	"mtu_expires",
25764990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
25771da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
25781da177e4SLinus Torvalds 		.mode		=	0644,
25791da177e4SLinus Torvalds 		.proc_handler	=	&proc_dointvec_jiffies,
25801da177e4SLinus Torvalds 		.strategy	=	&sysctl_jiffies,
25811da177e4SLinus Torvalds 	},
25821da177e4SLinus Torvalds 	{
25831da177e4SLinus Torvalds 		.ctl_name	=	NET_IPV6_ROUTE_MIN_ADVMSS,
25841da177e4SLinus Torvalds 		.procname	=	"min_adv_mss",
25854990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
25861da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
25871da177e4SLinus Torvalds 		.mode		=	0644,
25881da177e4SLinus Torvalds 		.proc_handler	=	&proc_dointvec_jiffies,
25891da177e4SLinus Torvalds 		.strategy	=	&sysctl_jiffies,
25901da177e4SLinus Torvalds 	},
25911da177e4SLinus Torvalds 	{
25921da177e4SLinus Torvalds 		.ctl_name	=	NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
25931da177e4SLinus Torvalds 		.procname	=	"gc_min_interval_ms",
25944990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
25951da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
25961da177e4SLinus Torvalds 		.mode		=	0644,
25971da177e4SLinus Torvalds 		.proc_handler	=	&proc_dointvec_ms_jiffies,
25981da177e4SLinus Torvalds 		.strategy	=	&sysctl_ms_jiffies,
25991da177e4SLinus Torvalds 	},
26001da177e4SLinus Torvalds 	{ .ctl_name = 0 }
26011da177e4SLinus Torvalds };
26021da177e4SLinus Torvalds 
2603760f2d01SDaniel Lezcano struct ctl_table *ipv6_route_sysctl_init(struct net *net)
2604760f2d01SDaniel Lezcano {
2605760f2d01SDaniel Lezcano 	struct ctl_table *table;
2606760f2d01SDaniel Lezcano 
2607760f2d01SDaniel Lezcano 	table = kmemdup(ipv6_route_table_template,
2608760f2d01SDaniel Lezcano 			sizeof(ipv6_route_table_template),
2609760f2d01SDaniel Lezcano 			GFP_KERNEL);
26105ee09105SYOSHIFUJI Hideaki 
26115ee09105SYOSHIFUJI Hideaki 	if (table) {
26125ee09105SYOSHIFUJI Hideaki 		table[0].data = &net->ipv6.sysctl.flush_delay;
2613f2fc6a54SBenjamin Thery 		table[1].data = &net->ipv6.ip6_dst_ops->gc_thresh;
26145ee09105SYOSHIFUJI Hideaki 		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
26155ee09105SYOSHIFUJI Hideaki 		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
26165ee09105SYOSHIFUJI Hideaki 		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
26175ee09105SYOSHIFUJI Hideaki 		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
26185ee09105SYOSHIFUJI Hideaki 		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
26195ee09105SYOSHIFUJI Hideaki 		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
26205ee09105SYOSHIFUJI Hideaki 		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
26215ee09105SYOSHIFUJI Hideaki 	}
26225ee09105SYOSHIFUJI Hideaki 
2623760f2d01SDaniel Lezcano 	return table;
2624760f2d01SDaniel Lezcano }
26251da177e4SLinus Torvalds #endif
26261da177e4SLinus Torvalds 
2627cdb18761SDaniel Lezcano static int ip6_route_net_init(struct net *net)
2628cdb18761SDaniel Lezcano {
2629633d424bSPavel Emelyanov 	int ret = -ENOMEM;
26308ed67789SDaniel Lezcano 
2631f2fc6a54SBenjamin Thery 	net->ipv6.ip6_dst_ops = kmemdup(&ip6_dst_ops_template,
2632f2fc6a54SBenjamin Thery 					sizeof(*net->ipv6.ip6_dst_ops),
2633f2fc6a54SBenjamin Thery 					GFP_KERNEL);
2634f2fc6a54SBenjamin Thery 	if (!net->ipv6.ip6_dst_ops)
2635f2fc6a54SBenjamin Thery 		goto out;
263648115becSDenis V. Lunev 	net->ipv6.ip6_dst_ops->dst_net = hold_net(net);
2637f2fc6a54SBenjamin Thery 
26388ed67789SDaniel Lezcano 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
26398ed67789SDaniel Lezcano 					   sizeof(*net->ipv6.ip6_null_entry),
26408ed67789SDaniel Lezcano 					   GFP_KERNEL);
26418ed67789SDaniel Lezcano 	if (!net->ipv6.ip6_null_entry)
2642f2fc6a54SBenjamin Thery 		goto out_ip6_dst_ops;
26438ed67789SDaniel Lezcano 	net->ipv6.ip6_null_entry->u.dst.path =
26448ed67789SDaniel Lezcano 		(struct dst_entry *)net->ipv6.ip6_null_entry;
2645f2fc6a54SBenjamin Thery 	net->ipv6.ip6_null_entry->u.dst.ops = net->ipv6.ip6_dst_ops;
26468ed67789SDaniel Lezcano 
26478ed67789SDaniel Lezcano #ifdef CONFIG_IPV6_MULTIPLE_TABLES
26488ed67789SDaniel Lezcano 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
26498ed67789SDaniel Lezcano 					       sizeof(*net->ipv6.ip6_prohibit_entry),
26508ed67789SDaniel Lezcano 					       GFP_KERNEL);
26518ed67789SDaniel Lezcano 	if (!net->ipv6.ip6_prohibit_entry) {
26528ed67789SDaniel Lezcano 		kfree(net->ipv6.ip6_null_entry);
26538ed67789SDaniel Lezcano 		goto out;
26548ed67789SDaniel Lezcano 	}
26558ed67789SDaniel Lezcano 	net->ipv6.ip6_prohibit_entry->u.dst.path =
26568ed67789SDaniel Lezcano 		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2657f2fc6a54SBenjamin Thery 	net->ipv6.ip6_prohibit_entry->u.dst.ops = net->ipv6.ip6_dst_ops;
26588ed67789SDaniel Lezcano 
26598ed67789SDaniel Lezcano 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
26608ed67789SDaniel Lezcano 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
26618ed67789SDaniel Lezcano 					       GFP_KERNEL);
26628ed67789SDaniel Lezcano 	if (!net->ipv6.ip6_blk_hole_entry) {
26638ed67789SDaniel Lezcano 		kfree(net->ipv6.ip6_null_entry);
26648ed67789SDaniel Lezcano 		kfree(net->ipv6.ip6_prohibit_entry);
26658ed67789SDaniel Lezcano 		goto out;
26668ed67789SDaniel Lezcano 	}
26678ed67789SDaniel Lezcano 	net->ipv6.ip6_blk_hole_entry->u.dst.path =
26688ed67789SDaniel Lezcano 		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2669f2fc6a54SBenjamin Thery 	net->ipv6.ip6_blk_hole_entry->u.dst.ops = net->ipv6.ip6_dst_ops;
26708ed67789SDaniel Lezcano #endif
26718ed67789SDaniel Lezcano 
2672cdb18761SDaniel Lezcano #ifdef CONFIG_PROC_FS
2673cdb18761SDaniel Lezcano 	proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2674cdb18761SDaniel Lezcano 	proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2675cdb18761SDaniel Lezcano #endif
26766891a346SBenjamin Thery 	net->ipv6.ip6_rt_gc_expire = 30*HZ;
26776891a346SBenjamin Thery 
26788ed67789SDaniel Lezcano 	ret = 0;
26798ed67789SDaniel Lezcano out:
26808ed67789SDaniel Lezcano 	return ret;
2681f2fc6a54SBenjamin Thery 
2682f2fc6a54SBenjamin Thery out_ip6_dst_ops:
268348115becSDenis V. Lunev 	release_net(net->ipv6.ip6_dst_ops->dst_net);
2684f2fc6a54SBenjamin Thery 	kfree(net->ipv6.ip6_dst_ops);
2685f2fc6a54SBenjamin Thery 	goto out;
2686cdb18761SDaniel Lezcano }
2687cdb18761SDaniel Lezcano 
2688cdb18761SDaniel Lezcano static void ip6_route_net_exit(struct net *net)
2689cdb18761SDaniel Lezcano {
2690cdb18761SDaniel Lezcano #ifdef CONFIG_PROC_FS
2691cdb18761SDaniel Lezcano 	proc_net_remove(net, "ipv6_route");
2692cdb18761SDaniel Lezcano 	proc_net_remove(net, "rt6_stats");
2693cdb18761SDaniel Lezcano #endif
26948ed67789SDaniel Lezcano 	kfree(net->ipv6.ip6_null_entry);
26958ed67789SDaniel Lezcano #ifdef CONFIG_IPV6_MULTIPLE_TABLES
26968ed67789SDaniel Lezcano 	kfree(net->ipv6.ip6_prohibit_entry);
26978ed67789SDaniel Lezcano 	kfree(net->ipv6.ip6_blk_hole_entry);
26988ed67789SDaniel Lezcano #endif
269948115becSDenis V. Lunev 	release_net(net->ipv6.ip6_dst_ops->dst_net);
2700f2fc6a54SBenjamin Thery 	kfree(net->ipv6.ip6_dst_ops);
2701cdb18761SDaniel Lezcano }
2702cdb18761SDaniel Lezcano 
2703cdb18761SDaniel Lezcano static struct pernet_operations ip6_route_net_ops = {
2704cdb18761SDaniel Lezcano 	.init = ip6_route_net_init,
2705cdb18761SDaniel Lezcano 	.exit = ip6_route_net_exit,
2706cdb18761SDaniel Lezcano };
2707cdb18761SDaniel Lezcano 
27088ed67789SDaniel Lezcano static struct notifier_block ip6_route_dev_notifier = {
27098ed67789SDaniel Lezcano 	.notifier_call = ip6_route_dev_notify,
27108ed67789SDaniel Lezcano 	.priority = 0,
27118ed67789SDaniel Lezcano };
27128ed67789SDaniel Lezcano 
2713433d49c3SDaniel Lezcano int __init ip6_route_init(void)
27141da177e4SLinus Torvalds {
2715433d49c3SDaniel Lezcano 	int ret;
2716433d49c3SDaniel Lezcano 
27179a7ec3a9SDaniel Lezcano 	ret = -ENOMEM;
27189a7ec3a9SDaniel Lezcano 	ip6_dst_ops_template.kmem_cachep =
27199a7ec3a9SDaniel Lezcano 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
27209a7ec3a9SDaniel Lezcano 				  SLAB_HWCACHE_ALIGN, NULL);
27219a7ec3a9SDaniel Lezcano 	if (!ip6_dst_ops_template.kmem_cachep)
2722f2fc6a54SBenjamin Thery 		goto out;;
272314e50e57SDavid S. Miller 
27248ed67789SDaniel Lezcano 	ret = register_pernet_subsys(&ip6_route_net_ops);
27258ed67789SDaniel Lezcano 	if (ret)
2726bdb3289fSDaniel Lezcano 		goto out_kmem_cache;
2727bdb3289fSDaniel Lezcano 
27288ed67789SDaniel Lezcano 	/* Registering of the loopback is done before this portion of code,
27298ed67789SDaniel Lezcano 	 * the loopback reference in rt6_info will not be taken, do it
27308ed67789SDaniel Lezcano 	 * manually for init_net */
27318ed67789SDaniel Lezcano 	init_net.ipv6.ip6_null_entry->u.dst.dev = init_net.loopback_dev;
27328ed67789SDaniel Lezcano 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2733bdb3289fSDaniel Lezcano   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
27348ed67789SDaniel Lezcano 	init_net.ipv6.ip6_prohibit_entry->u.dst.dev = init_net.loopback_dev;
27358ed67789SDaniel Lezcano 	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
27368ed67789SDaniel Lezcano 	init_net.ipv6.ip6_blk_hole_entry->u.dst.dev = init_net.loopback_dev;
27378ed67789SDaniel Lezcano 	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2738bdb3289fSDaniel Lezcano   #endif
2739433d49c3SDaniel Lezcano 	ret = fib6_init();
2740433d49c3SDaniel Lezcano 	if (ret)
27418ed67789SDaniel Lezcano 		goto out_register_subsys;
2742433d49c3SDaniel Lezcano 
2743433d49c3SDaniel Lezcano 	ret = xfrm6_init();
2744433d49c3SDaniel Lezcano 	if (ret)
2745cdb18761SDaniel Lezcano 		goto out_fib6_init;
2746c35b7e72SDaniel Lezcano 
2747433d49c3SDaniel Lezcano 	ret = fib6_rules_init();
2748433d49c3SDaniel Lezcano 	if (ret)
2749433d49c3SDaniel Lezcano 		goto xfrm6_init;
27507e5449c2SDaniel Lezcano 
2751433d49c3SDaniel Lezcano 	ret = -ENOBUFS;
2752433d49c3SDaniel Lezcano 	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2753433d49c3SDaniel Lezcano 	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2754433d49c3SDaniel Lezcano 	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2755433d49c3SDaniel Lezcano 		goto fib6_rules_init;
2756433d49c3SDaniel Lezcano 
27578ed67789SDaniel Lezcano 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
2758cdb18761SDaniel Lezcano 	if (ret)
2759cdb18761SDaniel Lezcano 		goto fib6_rules_init;
27608ed67789SDaniel Lezcano 
2761433d49c3SDaniel Lezcano out:
2762433d49c3SDaniel Lezcano 	return ret;
2763433d49c3SDaniel Lezcano 
2764433d49c3SDaniel Lezcano fib6_rules_init:
2765433d49c3SDaniel Lezcano 	fib6_rules_cleanup();
2766433d49c3SDaniel Lezcano xfrm6_init:
2767433d49c3SDaniel Lezcano 	xfrm6_fini();
2768433d49c3SDaniel Lezcano out_fib6_init:
2769433d49c3SDaniel Lezcano 	fib6_gc_cleanup();
27708ed67789SDaniel Lezcano out_register_subsys:
27718ed67789SDaniel Lezcano 	unregister_pernet_subsys(&ip6_route_net_ops);
2772433d49c3SDaniel Lezcano out_kmem_cache:
2773f2fc6a54SBenjamin Thery 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
2774433d49c3SDaniel Lezcano 	goto out;
27751da177e4SLinus Torvalds }
27761da177e4SLinus Torvalds 
27771da177e4SLinus Torvalds void ip6_route_cleanup(void)
27781da177e4SLinus Torvalds {
27798ed67789SDaniel Lezcano 	unregister_netdevice_notifier(&ip6_route_dev_notifier);
2780101367c2SThomas Graf 	fib6_rules_cleanup();
27811da177e4SLinus Torvalds 	xfrm6_fini();
27821da177e4SLinus Torvalds 	fib6_gc_cleanup();
27838ed67789SDaniel Lezcano 	unregister_pernet_subsys(&ip6_route_net_ops);
2784f2fc6a54SBenjamin Thery 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
27851da177e4SLinus Torvalds }
2786