xref: /openbmc/linux/net/ipv6/route.c (revision b339a47c370ec669f789c5989f54eec1d78574bb)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  *	Linux INET6 implementation
31da177e4SLinus Torvalds  *	FIB front-end.
41da177e4SLinus Torvalds  *
51da177e4SLinus Torvalds  *	Authors:
61da177e4SLinus Torvalds  *	Pedro Roque		<roque@di.fc.ul.pt>
71da177e4SLinus Torvalds  *
81da177e4SLinus Torvalds  *	This program is free software; you can redistribute it and/or
91da177e4SLinus Torvalds  *      modify it under the terms of the GNU General Public License
101da177e4SLinus Torvalds  *      as published by the Free Software Foundation; either version
111da177e4SLinus Torvalds  *      2 of the License, or (at your option) any later version.
121da177e4SLinus Torvalds  */
131da177e4SLinus Torvalds 
141da177e4SLinus Torvalds /*	Changes:
151da177e4SLinus Torvalds  *
161da177e4SLinus Torvalds  *	YOSHIFUJI Hideaki @USAGI
171da177e4SLinus Torvalds  *		reworked default router selection.
181da177e4SLinus Torvalds  *		- respect outgoing interface
191da177e4SLinus Torvalds  *		- select from (probably) reachable routers (i.e.
201da177e4SLinus Torvalds  *		routers in REACHABLE, STALE, DELAY or PROBE states).
211da177e4SLinus Torvalds  *		- always select the same router if it is (probably)
221da177e4SLinus Torvalds  *		reachable.  otherwise, round-robin the list.
23c0bece9fSYOSHIFUJI Hideaki  *	Ville Nuorvala
24c0bece9fSYOSHIFUJI Hideaki  *		Fixed routing subtrees.
251da177e4SLinus Torvalds  */
261da177e4SLinus Torvalds 
274fc268d2SRandy Dunlap #include <linux/capability.h>
281da177e4SLinus Torvalds #include <linux/errno.h>
291da177e4SLinus Torvalds #include <linux/types.h>
301da177e4SLinus Torvalds #include <linux/times.h>
311da177e4SLinus Torvalds #include <linux/socket.h>
321da177e4SLinus Torvalds #include <linux/sockios.h>
331da177e4SLinus Torvalds #include <linux/net.h>
341da177e4SLinus Torvalds #include <linux/route.h>
351da177e4SLinus Torvalds #include <linux/netdevice.h>
361da177e4SLinus Torvalds #include <linux/in6.h>
377bc570c8SYOSHIFUJI Hideaki #include <linux/mroute6.h>
381da177e4SLinus Torvalds #include <linux/init.h>
391da177e4SLinus Torvalds #include <linux/if_arp.h>
401da177e4SLinus Torvalds #include <linux/proc_fs.h>
411da177e4SLinus Torvalds #include <linux/seq_file.h>
425b7c931dSDaniel Lezcano #include <linux/nsproxy.h>
43457c4cbcSEric W. Biederman #include <net/net_namespace.h>
441da177e4SLinus Torvalds #include <net/snmp.h>
451da177e4SLinus Torvalds #include <net/ipv6.h>
461da177e4SLinus Torvalds #include <net/ip6_fib.h>
471da177e4SLinus Torvalds #include <net/ip6_route.h>
481da177e4SLinus Torvalds #include <net/ndisc.h>
491da177e4SLinus Torvalds #include <net/addrconf.h>
501da177e4SLinus Torvalds #include <net/tcp.h>
511da177e4SLinus Torvalds #include <linux/rtnetlink.h>
521da177e4SLinus Torvalds #include <net/dst.h>
531da177e4SLinus Torvalds #include <net/xfrm.h>
548d71740cSTom Tucker #include <net/netevent.h>
5521713ebcSThomas Graf #include <net/netlink.h>
561da177e4SLinus Torvalds 
571da177e4SLinus Torvalds #include <asm/uaccess.h>
581da177e4SLinus Torvalds 
591da177e4SLinus Torvalds #ifdef CONFIG_SYSCTL
601da177e4SLinus Torvalds #include <linux/sysctl.h>
611da177e4SLinus Torvalds #endif
621da177e4SLinus Torvalds 
631da177e4SLinus Torvalds /* Set to 3 to get tracing. */
641da177e4SLinus Torvalds #define RT6_DEBUG 2
651da177e4SLinus Torvalds 
661da177e4SLinus Torvalds #if RT6_DEBUG >= 3
671da177e4SLinus Torvalds #define RDBG(x) printk x
681da177e4SLinus Torvalds #define RT6_TRACE(x...) printk(KERN_DEBUG x)
691da177e4SLinus Torvalds #else
701da177e4SLinus Torvalds #define RDBG(x)
711da177e4SLinus Torvalds #define RT6_TRACE(x...) do { ; } while (0)
721da177e4SLinus Torvalds #endif
731da177e4SLinus Torvalds 
74519fbd87SYOSHIFUJI Hideaki #define CLONE_OFFLINK_ROUTE 0
751da177e4SLinus Torvalds 
761da177e4SLinus Torvalds static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
771da177e4SLinus Torvalds static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
781da177e4SLinus Torvalds static struct dst_entry *ip6_negative_advice(struct dst_entry *);
791da177e4SLinus Torvalds static void		ip6_dst_destroy(struct dst_entry *);
801da177e4SLinus Torvalds static void		ip6_dst_ifdown(struct dst_entry *,
811da177e4SLinus Torvalds 				       struct net_device *dev, int how);
82569d3645SDaniel Lezcano static int		 ip6_dst_gc(struct dst_ops *ops);
831da177e4SLinus Torvalds 
841da177e4SLinus Torvalds static int		ip6_pkt_discard(struct sk_buff *skb);
851da177e4SLinus Torvalds static int		ip6_pkt_discard_out(struct sk_buff *skb);
861da177e4SLinus Torvalds static void		ip6_link_failure(struct sk_buff *skb);
871da177e4SLinus Torvalds static void		ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
881da177e4SLinus Torvalds 
8970ceb4f5SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTE_INFO
90efa2cea0SDaniel Lezcano static struct rt6_info *rt6_add_route_info(struct net *net,
91efa2cea0SDaniel Lezcano 					   struct in6_addr *prefix, int prefixlen,
9270ceb4f5SYOSHIFUJI Hideaki 					   struct in6_addr *gwaddr, int ifindex,
9370ceb4f5SYOSHIFUJI Hideaki 					   unsigned pref);
94efa2cea0SDaniel Lezcano static struct rt6_info *rt6_get_route_info(struct net *net,
95efa2cea0SDaniel Lezcano 					   struct in6_addr *prefix, int prefixlen,
9670ceb4f5SYOSHIFUJI Hideaki 					   struct in6_addr *gwaddr, int ifindex);
9770ceb4f5SYOSHIFUJI Hideaki #endif
9870ceb4f5SYOSHIFUJI Hideaki 
999a7ec3a9SDaniel Lezcano static struct dst_ops ip6_dst_ops_template = {
1001da177e4SLinus Torvalds 	.family			=	AF_INET6,
1011da177e4SLinus Torvalds 	.protocol		=	__constant_htons(ETH_P_IPV6),
1021da177e4SLinus Torvalds 	.gc			=	ip6_dst_gc,
1031da177e4SLinus Torvalds 	.gc_thresh		=	1024,
1041da177e4SLinus Torvalds 	.check			=	ip6_dst_check,
1051da177e4SLinus Torvalds 	.destroy		=	ip6_dst_destroy,
1061da177e4SLinus Torvalds 	.ifdown			=	ip6_dst_ifdown,
1071da177e4SLinus Torvalds 	.negative_advice	=	ip6_negative_advice,
1081da177e4SLinus Torvalds 	.link_failure		=	ip6_link_failure,
1091da177e4SLinus Torvalds 	.update_pmtu		=	ip6_rt_update_pmtu,
1101ac06e03SHerbert Xu 	.local_out		=	__ip6_local_out,
1111da177e4SLinus Torvalds 	.entry_size		=	sizeof(struct rt6_info),
112e2422970SEric Dumazet 	.entries		=	ATOMIC_INIT(0),
1131da177e4SLinus Torvalds };
1141da177e4SLinus Torvalds 
11514e50e57SDavid S. Miller static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
11614e50e57SDavid S. Miller {
11714e50e57SDavid S. Miller }
11814e50e57SDavid S. Miller 
11914e50e57SDavid S. Miller static struct dst_ops ip6_dst_blackhole_ops = {
12014e50e57SDavid S. Miller 	.family			=	AF_INET6,
12114e50e57SDavid S. Miller 	.protocol		=	__constant_htons(ETH_P_IPV6),
12214e50e57SDavid S. Miller 	.destroy		=	ip6_dst_destroy,
12314e50e57SDavid S. Miller 	.check			=	ip6_dst_check,
12414e50e57SDavid S. Miller 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
12514e50e57SDavid S. Miller 	.entry_size		=	sizeof(struct rt6_info),
126e2422970SEric Dumazet 	.entries		=	ATOMIC_INIT(0),
12714e50e57SDavid S. Miller };
12814e50e57SDavid S. Miller 
129bdb3289fSDaniel Lezcano static struct rt6_info ip6_null_entry_template = {
1301da177e4SLinus Torvalds 	.u = {
1311da177e4SLinus Torvalds 		.dst = {
1321da177e4SLinus Torvalds 			.__refcnt	= ATOMIC_INIT(1),
1331da177e4SLinus Torvalds 			.__use		= 1,
1341da177e4SLinus Torvalds 			.obsolete	= -1,
1351da177e4SLinus Torvalds 			.error		= -ENETUNREACH,
1361da177e4SLinus Torvalds 			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
1371da177e4SLinus Torvalds 			.input		= ip6_pkt_discard,
1381da177e4SLinus Torvalds 			.output		= ip6_pkt_discard_out,
1391da177e4SLinus Torvalds 		}
1401da177e4SLinus Torvalds 	},
1411da177e4SLinus Torvalds 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
1421da177e4SLinus Torvalds 	.rt6i_metric	= ~(u32) 0,
1431da177e4SLinus Torvalds 	.rt6i_ref	= ATOMIC_INIT(1),
1441da177e4SLinus Torvalds };
1451da177e4SLinus Torvalds 
146101367c2SThomas Graf #ifdef CONFIG_IPV6_MULTIPLE_TABLES
147101367c2SThomas Graf 
1486723ab54SDavid S. Miller static int ip6_pkt_prohibit(struct sk_buff *skb);
1496723ab54SDavid S. Miller static int ip6_pkt_prohibit_out(struct sk_buff *skb);
1506723ab54SDavid S. Miller 
151280a34c8SAdrian Bunk static struct rt6_info ip6_prohibit_entry_template = {
152101367c2SThomas Graf 	.u = {
153101367c2SThomas Graf 		.dst = {
154101367c2SThomas Graf 			.__refcnt	= ATOMIC_INIT(1),
155101367c2SThomas Graf 			.__use		= 1,
156101367c2SThomas Graf 			.obsolete	= -1,
157101367c2SThomas Graf 			.error		= -EACCES,
158101367c2SThomas Graf 			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
1599ce8ade0SThomas Graf 			.input		= ip6_pkt_prohibit,
1609ce8ade0SThomas Graf 			.output		= ip6_pkt_prohibit_out,
161101367c2SThomas Graf 		}
162101367c2SThomas Graf 	},
163101367c2SThomas Graf 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
164101367c2SThomas Graf 	.rt6i_metric	= ~(u32) 0,
165101367c2SThomas Graf 	.rt6i_ref	= ATOMIC_INIT(1),
166101367c2SThomas Graf };
167101367c2SThomas Graf 
168bdb3289fSDaniel Lezcano static struct rt6_info ip6_blk_hole_entry_template = {
169101367c2SThomas Graf 	.u = {
170101367c2SThomas Graf 		.dst = {
171101367c2SThomas Graf 			.__refcnt	= ATOMIC_INIT(1),
172101367c2SThomas Graf 			.__use		= 1,
173101367c2SThomas Graf 			.obsolete	= -1,
174101367c2SThomas Graf 			.error		= -EINVAL,
175101367c2SThomas Graf 			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
176352e512cSHerbert Xu 			.input		= dst_discard,
177352e512cSHerbert Xu 			.output		= dst_discard,
178101367c2SThomas Graf 		}
179101367c2SThomas Graf 	},
180101367c2SThomas Graf 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
181101367c2SThomas Graf 	.rt6i_metric	= ~(u32) 0,
182101367c2SThomas Graf 	.rt6i_ref	= ATOMIC_INIT(1),
183101367c2SThomas Graf };
184101367c2SThomas Graf 
185101367c2SThomas Graf #endif
186101367c2SThomas Graf 
1871da177e4SLinus Torvalds /* allocate dst with ip6_dst_ops */
188f2fc6a54SBenjamin Thery static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops)
1891da177e4SLinus Torvalds {
190f2fc6a54SBenjamin Thery 	return (struct rt6_info *)dst_alloc(ops);
1911da177e4SLinus Torvalds }
1921da177e4SLinus Torvalds 
1931da177e4SLinus Torvalds static void ip6_dst_destroy(struct dst_entry *dst)
1941da177e4SLinus Torvalds {
1951da177e4SLinus Torvalds 	struct rt6_info *rt = (struct rt6_info *)dst;
1961da177e4SLinus Torvalds 	struct inet6_dev *idev = rt->rt6i_idev;
1971da177e4SLinus Torvalds 
1981da177e4SLinus Torvalds 	if (idev != NULL) {
1991da177e4SLinus Torvalds 		rt->rt6i_idev = NULL;
2001da177e4SLinus Torvalds 		in6_dev_put(idev);
2011da177e4SLinus Torvalds 	}
2021da177e4SLinus Torvalds }
2031da177e4SLinus Torvalds 
2041da177e4SLinus Torvalds static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
2051da177e4SLinus Torvalds 			   int how)
2061da177e4SLinus Torvalds {
2071da177e4SLinus Torvalds 	struct rt6_info *rt = (struct rt6_info *)dst;
2081da177e4SLinus Torvalds 	struct inet6_dev *idev = rt->rt6i_idev;
2095a3e55d6SDenis V. Lunev 	struct net_device *loopback_dev =
210c346dca1SYOSHIFUJI Hideaki 		dev_net(dev)->loopback_dev;
2111da177e4SLinus Torvalds 
2125a3e55d6SDenis V. Lunev 	if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
2135a3e55d6SDenis V. Lunev 		struct inet6_dev *loopback_idev =
2145a3e55d6SDenis V. Lunev 			in6_dev_get(loopback_dev);
2151da177e4SLinus Torvalds 		if (loopback_idev != NULL) {
2161da177e4SLinus Torvalds 			rt->rt6i_idev = loopback_idev;
2171da177e4SLinus Torvalds 			in6_dev_put(idev);
2181da177e4SLinus Torvalds 		}
2191da177e4SLinus Torvalds 	}
2201da177e4SLinus Torvalds }
2211da177e4SLinus Torvalds 
2221da177e4SLinus Torvalds static __inline__ int rt6_check_expired(const struct rt6_info *rt)
2231da177e4SLinus Torvalds {
2241da177e4SLinus Torvalds 	return (rt->rt6i_flags & RTF_EXPIRES &&
2251da177e4SLinus Torvalds 		time_after(jiffies, rt->rt6i_expires));
2261da177e4SLinus Torvalds }
2271da177e4SLinus Torvalds 
228c71099acSThomas Graf static inline int rt6_need_strict(struct in6_addr *daddr)
229c71099acSThomas Graf {
230c71099acSThomas Graf 	return (ipv6_addr_type(daddr) &
2315ce83afaSYOSHIFUJI Hideaki 		(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK));
232c71099acSThomas Graf }
233c71099acSThomas Graf 
2341da177e4SLinus Torvalds /*
235c71099acSThomas Graf  *	Route lookup. Any table->tb6_lock is implied.
2361da177e4SLinus Torvalds  */
2371da177e4SLinus Torvalds 
2388ed67789SDaniel Lezcano static inline struct rt6_info *rt6_device_match(struct net *net,
2398ed67789SDaniel Lezcano 						    struct rt6_info *rt,
240dd3abc4eSYOSHIFUJI Hideaki 						    struct in6_addr *saddr,
2411da177e4SLinus Torvalds 						    int oif,
242d420895eSYOSHIFUJI Hideaki 						    int flags)
2431da177e4SLinus Torvalds {
2441da177e4SLinus Torvalds 	struct rt6_info *local = NULL;
2451da177e4SLinus Torvalds 	struct rt6_info *sprt;
2461da177e4SLinus Torvalds 
247dd3abc4eSYOSHIFUJI Hideaki 	if (!oif && ipv6_addr_any(saddr))
248dd3abc4eSYOSHIFUJI Hideaki 		goto out;
249dd3abc4eSYOSHIFUJI Hideaki 
2507cc48263SEric Dumazet 	for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
2511da177e4SLinus Torvalds 		struct net_device *dev = sprt->rt6i_dev;
252dd3abc4eSYOSHIFUJI Hideaki 
253dd3abc4eSYOSHIFUJI Hideaki 		if (oif) {
2541da177e4SLinus Torvalds 			if (dev->ifindex == oif)
2551da177e4SLinus Torvalds 				return sprt;
2561da177e4SLinus Torvalds 			if (dev->flags & IFF_LOOPBACK) {
2571da177e4SLinus Torvalds 				if (sprt->rt6i_idev == NULL ||
2581da177e4SLinus Torvalds 				    sprt->rt6i_idev->dev->ifindex != oif) {
259d420895eSYOSHIFUJI Hideaki 					if (flags & RT6_LOOKUP_F_IFACE && oif)
2601da177e4SLinus Torvalds 						continue;
2611da177e4SLinus Torvalds 					if (local && (!oif ||
2621da177e4SLinus Torvalds 						      local->rt6i_idev->dev->ifindex == oif))
2631da177e4SLinus Torvalds 						continue;
2641da177e4SLinus Torvalds 				}
2651da177e4SLinus Torvalds 				local = sprt;
2661da177e4SLinus Torvalds 			}
267dd3abc4eSYOSHIFUJI Hideaki 		} else {
268dd3abc4eSYOSHIFUJI Hideaki 			if (ipv6_chk_addr(net, saddr, dev,
269dd3abc4eSYOSHIFUJI Hideaki 					  flags & RT6_LOOKUP_F_IFACE))
270dd3abc4eSYOSHIFUJI Hideaki 				return sprt;
271dd3abc4eSYOSHIFUJI Hideaki 		}
2721da177e4SLinus Torvalds 	}
2731da177e4SLinus Torvalds 
274dd3abc4eSYOSHIFUJI Hideaki 	if (oif) {
2751da177e4SLinus Torvalds 		if (local)
2761da177e4SLinus Torvalds 			return local;
2771da177e4SLinus Torvalds 
278d420895eSYOSHIFUJI Hideaki 		if (flags & RT6_LOOKUP_F_IFACE)
2798ed67789SDaniel Lezcano 			return net->ipv6.ip6_null_entry;
2801da177e4SLinus Torvalds 	}
281dd3abc4eSYOSHIFUJI Hideaki out:
2821da177e4SLinus Torvalds 	return rt;
2831da177e4SLinus Torvalds }
2841da177e4SLinus Torvalds 
28527097255SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTER_PREF
28627097255SYOSHIFUJI Hideaki static void rt6_probe(struct rt6_info *rt)
28727097255SYOSHIFUJI Hideaki {
28827097255SYOSHIFUJI Hideaki 	struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
28927097255SYOSHIFUJI Hideaki 	/*
29027097255SYOSHIFUJI Hideaki 	 * Okay, this does not seem to be appropriate
29127097255SYOSHIFUJI Hideaki 	 * for now, however, we need to check if it
29227097255SYOSHIFUJI Hideaki 	 * is really so; aka Router Reachability Probing.
29327097255SYOSHIFUJI Hideaki 	 *
29427097255SYOSHIFUJI Hideaki 	 * Router Reachability Probe MUST be rate-limited
29527097255SYOSHIFUJI Hideaki 	 * to no more than one per minute.
29627097255SYOSHIFUJI Hideaki 	 */
29727097255SYOSHIFUJI Hideaki 	if (!neigh || (neigh->nud_state & NUD_VALID))
29827097255SYOSHIFUJI Hideaki 		return;
29927097255SYOSHIFUJI Hideaki 	read_lock_bh(&neigh->lock);
30027097255SYOSHIFUJI Hideaki 	if (!(neigh->nud_state & NUD_VALID) &&
30152e16356SYOSHIFUJI Hideaki 	    time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
30227097255SYOSHIFUJI Hideaki 		struct in6_addr mcaddr;
30327097255SYOSHIFUJI Hideaki 		struct in6_addr *target;
30427097255SYOSHIFUJI Hideaki 
30527097255SYOSHIFUJI Hideaki 		neigh->updated = jiffies;
30627097255SYOSHIFUJI Hideaki 		read_unlock_bh(&neigh->lock);
30727097255SYOSHIFUJI Hideaki 
30827097255SYOSHIFUJI Hideaki 		target = (struct in6_addr *)&neigh->primary_key;
30927097255SYOSHIFUJI Hideaki 		addrconf_addr_solict_mult(target, &mcaddr);
31027097255SYOSHIFUJI Hideaki 		ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
31127097255SYOSHIFUJI Hideaki 	} else
31227097255SYOSHIFUJI Hideaki 		read_unlock_bh(&neigh->lock);
31327097255SYOSHIFUJI Hideaki }
31427097255SYOSHIFUJI Hideaki #else
31527097255SYOSHIFUJI Hideaki static inline void rt6_probe(struct rt6_info *rt)
31627097255SYOSHIFUJI Hideaki {
31727097255SYOSHIFUJI Hideaki 	return;
31827097255SYOSHIFUJI Hideaki }
31927097255SYOSHIFUJI Hideaki #endif
32027097255SYOSHIFUJI Hideaki 
3211da177e4SLinus Torvalds /*
322554cfb7eSYOSHIFUJI Hideaki  * Default Router Selection (RFC 2461 6.3.6)
3231da177e4SLinus Torvalds  */
324b6f99a21SDave Jones static inline int rt6_check_dev(struct rt6_info *rt, int oif)
3251da177e4SLinus Torvalds {
326554cfb7eSYOSHIFUJI Hideaki 	struct net_device *dev = rt->rt6i_dev;
327161980f4SDavid S. Miller 	if (!oif || dev->ifindex == oif)
328554cfb7eSYOSHIFUJI Hideaki 		return 2;
329161980f4SDavid S. Miller 	if ((dev->flags & IFF_LOOPBACK) &&
330161980f4SDavid S. Miller 	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
331161980f4SDavid S. Miller 		return 1;
332554cfb7eSYOSHIFUJI Hideaki 	return 0;
3331da177e4SLinus Torvalds }
3341da177e4SLinus Torvalds 
335b6f99a21SDave Jones static inline int rt6_check_neigh(struct rt6_info *rt)
3361da177e4SLinus Torvalds {
337554cfb7eSYOSHIFUJI Hideaki 	struct neighbour *neigh = rt->rt6i_nexthop;
338398bcbebSYOSHIFUJI Hideaki 	int m;
3394d0c5911SYOSHIFUJI Hideaki 	if (rt->rt6i_flags & RTF_NONEXTHOP ||
3404d0c5911SYOSHIFUJI Hideaki 	    !(rt->rt6i_flags & RTF_GATEWAY))
3414d0c5911SYOSHIFUJI Hideaki 		m = 1;
3424d0c5911SYOSHIFUJI Hideaki 	else if (neigh) {
3431da177e4SLinus Torvalds 		read_lock_bh(&neigh->lock);
344554cfb7eSYOSHIFUJI Hideaki 		if (neigh->nud_state & NUD_VALID)
3454d0c5911SYOSHIFUJI Hideaki 			m = 2;
346398bcbebSYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTER_PREF
347398bcbebSYOSHIFUJI Hideaki 		else if (neigh->nud_state & NUD_FAILED)
348398bcbebSYOSHIFUJI Hideaki 			m = 0;
349398bcbebSYOSHIFUJI Hideaki #endif
350398bcbebSYOSHIFUJI Hideaki 		else
351ea73ee23SYOSHIFUJI Hideaki 			m = 1;
3521da177e4SLinus Torvalds 		read_unlock_bh(&neigh->lock);
353398bcbebSYOSHIFUJI Hideaki 	} else
354398bcbebSYOSHIFUJI Hideaki 		m = 0;
355554cfb7eSYOSHIFUJI Hideaki 	return m;
3561da177e4SLinus Torvalds }
3571da177e4SLinus Torvalds 
358554cfb7eSYOSHIFUJI Hideaki static int rt6_score_route(struct rt6_info *rt, int oif,
359554cfb7eSYOSHIFUJI Hideaki 			   int strict)
360554cfb7eSYOSHIFUJI Hideaki {
3614d0c5911SYOSHIFUJI Hideaki 	int m, n;
3624d0c5911SYOSHIFUJI Hideaki 
3634d0c5911SYOSHIFUJI Hideaki 	m = rt6_check_dev(rt, oif);
36477d16f45SYOSHIFUJI Hideaki 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
365554cfb7eSYOSHIFUJI Hideaki 		return -1;
366ebacaaa0SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTER_PREF
367ebacaaa0SYOSHIFUJI Hideaki 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
368ebacaaa0SYOSHIFUJI Hideaki #endif
3694d0c5911SYOSHIFUJI Hideaki 	n = rt6_check_neigh(rt);
370557e92efSYOSHIFUJI Hideaki 	if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
371554cfb7eSYOSHIFUJI Hideaki 		return -1;
372554cfb7eSYOSHIFUJI Hideaki 	return m;
373554cfb7eSYOSHIFUJI Hideaki }
374554cfb7eSYOSHIFUJI Hideaki 
375f11e6659SDavid S. Miller static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
376f11e6659SDavid S. Miller 				   int *mpri, struct rt6_info *match)
377554cfb7eSYOSHIFUJI Hideaki {
378554cfb7eSYOSHIFUJI Hideaki 	int m;
379554cfb7eSYOSHIFUJI Hideaki 
380554cfb7eSYOSHIFUJI Hideaki 	if (rt6_check_expired(rt))
381f11e6659SDavid S. Miller 		goto out;
382554cfb7eSYOSHIFUJI Hideaki 
383554cfb7eSYOSHIFUJI Hideaki 	m = rt6_score_route(rt, oif, strict);
384554cfb7eSYOSHIFUJI Hideaki 	if (m < 0)
385f11e6659SDavid S. Miller 		goto out;
386554cfb7eSYOSHIFUJI Hideaki 
387f11e6659SDavid S. Miller 	if (m > *mpri) {
388ea659e07SYOSHIFUJI Hideaki 		if (strict & RT6_LOOKUP_F_REACHABLE)
38927097255SYOSHIFUJI Hideaki 			rt6_probe(match);
390f11e6659SDavid S. Miller 		*mpri = m;
391554cfb7eSYOSHIFUJI Hideaki 		match = rt;
392ea659e07SYOSHIFUJI Hideaki 	} else if (strict & RT6_LOOKUP_F_REACHABLE) {
39327097255SYOSHIFUJI Hideaki 		rt6_probe(rt);
3941da177e4SLinus Torvalds 	}
395f11e6659SDavid S. Miller 
396f11e6659SDavid S. Miller out:
397f11e6659SDavid S. Miller 	return match;
3981da177e4SLinus Torvalds }
3991da177e4SLinus Torvalds 
400f11e6659SDavid S. Miller static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
401f11e6659SDavid S. Miller 				     struct rt6_info *rr_head,
402f11e6659SDavid S. Miller 				     u32 metric, int oif, int strict)
403f11e6659SDavid S. Miller {
404f11e6659SDavid S. Miller 	struct rt6_info *rt, *match;
405f11e6659SDavid S. Miller 	int mpri = -1;
406f11e6659SDavid S. Miller 
407f11e6659SDavid S. Miller 	match = NULL;
408f11e6659SDavid S. Miller 	for (rt = rr_head; rt && rt->rt6i_metric == metric;
409f11e6659SDavid S. Miller 	     rt = rt->u.dst.rt6_next)
410f11e6659SDavid S. Miller 		match = find_match(rt, oif, strict, &mpri, match);
411f11e6659SDavid S. Miller 	for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
412f11e6659SDavid S. Miller 	     rt = rt->u.dst.rt6_next)
413f11e6659SDavid S. Miller 		match = find_match(rt, oif, strict, &mpri, match);
414f11e6659SDavid S. Miller 
415f11e6659SDavid S. Miller 	return match;
416f11e6659SDavid S. Miller }
417f11e6659SDavid S. Miller 
418f11e6659SDavid S. Miller static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
419f11e6659SDavid S. Miller {
420f11e6659SDavid S. Miller 	struct rt6_info *match, *rt0;
4218ed67789SDaniel Lezcano 	struct net *net;
422f11e6659SDavid S. Miller 
423f11e6659SDavid S. Miller 	RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
4240dc47877SHarvey Harrison 		  __func__, fn->leaf, oif);
425f11e6659SDavid S. Miller 
426f11e6659SDavid S. Miller 	rt0 = fn->rr_ptr;
427f11e6659SDavid S. Miller 	if (!rt0)
428f11e6659SDavid S. Miller 		fn->rr_ptr = rt0 = fn->leaf;
429f11e6659SDavid S. Miller 
430f11e6659SDavid S. Miller 	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
431f11e6659SDavid S. Miller 
432554cfb7eSYOSHIFUJI Hideaki 	if (!match &&
433f11e6659SDavid S. Miller 	    (strict & RT6_LOOKUP_F_REACHABLE)) {
434f11e6659SDavid S. Miller 		struct rt6_info *next = rt0->u.dst.rt6_next;
435f11e6659SDavid S. Miller 
436554cfb7eSYOSHIFUJI Hideaki 		/* no entries matched; do round-robin */
437f11e6659SDavid S. Miller 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
438f11e6659SDavid S. Miller 			next = fn->leaf;
439f11e6659SDavid S. Miller 
440f11e6659SDavid S. Miller 		if (next != rt0)
441f11e6659SDavid S. Miller 			fn->rr_ptr = next;
442554cfb7eSYOSHIFUJI Hideaki 	}
443554cfb7eSYOSHIFUJI Hideaki 
444f11e6659SDavid S. Miller 	RT6_TRACE("%s() => %p\n",
4450dc47877SHarvey Harrison 		  __func__, match);
446554cfb7eSYOSHIFUJI Hideaki 
447c346dca1SYOSHIFUJI Hideaki 	net = dev_net(rt0->rt6i_dev);
4488ed67789SDaniel Lezcano 	return (match ? match : net->ipv6.ip6_null_entry);
4491da177e4SLinus Torvalds }
4501da177e4SLinus Torvalds 
45170ceb4f5SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTE_INFO
45270ceb4f5SYOSHIFUJI Hideaki int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
45370ceb4f5SYOSHIFUJI Hideaki 		  struct in6_addr *gwaddr)
45470ceb4f5SYOSHIFUJI Hideaki {
455c346dca1SYOSHIFUJI Hideaki 	struct net *net = dev_net(dev);
45670ceb4f5SYOSHIFUJI Hideaki 	struct route_info *rinfo = (struct route_info *) opt;
45770ceb4f5SYOSHIFUJI Hideaki 	struct in6_addr prefix_buf, *prefix;
45870ceb4f5SYOSHIFUJI Hideaki 	unsigned int pref;
4594bed72e4SYOSHIFUJI Hideaki 	unsigned long lifetime;
46070ceb4f5SYOSHIFUJI Hideaki 	struct rt6_info *rt;
46170ceb4f5SYOSHIFUJI Hideaki 
46270ceb4f5SYOSHIFUJI Hideaki 	if (len < sizeof(struct route_info)) {
46370ceb4f5SYOSHIFUJI Hideaki 		return -EINVAL;
46470ceb4f5SYOSHIFUJI Hideaki 	}
46570ceb4f5SYOSHIFUJI Hideaki 
46670ceb4f5SYOSHIFUJI Hideaki 	/* Sanity check for prefix_len and length */
46770ceb4f5SYOSHIFUJI Hideaki 	if (rinfo->length > 3) {
46870ceb4f5SYOSHIFUJI Hideaki 		return -EINVAL;
46970ceb4f5SYOSHIFUJI Hideaki 	} else if (rinfo->prefix_len > 128) {
47070ceb4f5SYOSHIFUJI Hideaki 		return -EINVAL;
47170ceb4f5SYOSHIFUJI Hideaki 	} else if (rinfo->prefix_len > 64) {
47270ceb4f5SYOSHIFUJI Hideaki 		if (rinfo->length < 2) {
47370ceb4f5SYOSHIFUJI Hideaki 			return -EINVAL;
47470ceb4f5SYOSHIFUJI Hideaki 		}
47570ceb4f5SYOSHIFUJI Hideaki 	} else if (rinfo->prefix_len > 0) {
47670ceb4f5SYOSHIFUJI Hideaki 		if (rinfo->length < 1) {
47770ceb4f5SYOSHIFUJI Hideaki 			return -EINVAL;
47870ceb4f5SYOSHIFUJI Hideaki 		}
47970ceb4f5SYOSHIFUJI Hideaki 	}
48070ceb4f5SYOSHIFUJI Hideaki 
48170ceb4f5SYOSHIFUJI Hideaki 	pref = rinfo->route_pref;
48270ceb4f5SYOSHIFUJI Hideaki 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
48370ceb4f5SYOSHIFUJI Hideaki 		pref = ICMPV6_ROUTER_PREF_MEDIUM;
48470ceb4f5SYOSHIFUJI Hideaki 
4854bed72e4SYOSHIFUJI Hideaki 	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
48670ceb4f5SYOSHIFUJI Hideaki 
48770ceb4f5SYOSHIFUJI Hideaki 	if (rinfo->length == 3)
48870ceb4f5SYOSHIFUJI Hideaki 		prefix = (struct in6_addr *)rinfo->prefix;
48970ceb4f5SYOSHIFUJI Hideaki 	else {
49070ceb4f5SYOSHIFUJI Hideaki 		/* this function is safe */
49170ceb4f5SYOSHIFUJI Hideaki 		ipv6_addr_prefix(&prefix_buf,
49270ceb4f5SYOSHIFUJI Hideaki 				 (struct in6_addr *)rinfo->prefix,
49370ceb4f5SYOSHIFUJI Hideaki 				 rinfo->prefix_len);
49470ceb4f5SYOSHIFUJI Hideaki 		prefix = &prefix_buf;
49570ceb4f5SYOSHIFUJI Hideaki 	}
49670ceb4f5SYOSHIFUJI Hideaki 
497efa2cea0SDaniel Lezcano 	rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
498efa2cea0SDaniel Lezcano 				dev->ifindex);
49970ceb4f5SYOSHIFUJI Hideaki 
50070ceb4f5SYOSHIFUJI Hideaki 	if (rt && !lifetime) {
501e0a1ad73SThomas Graf 		ip6_del_rt(rt);
50270ceb4f5SYOSHIFUJI Hideaki 		rt = NULL;
50370ceb4f5SYOSHIFUJI Hideaki 	}
50470ceb4f5SYOSHIFUJI Hideaki 
50570ceb4f5SYOSHIFUJI Hideaki 	if (!rt && lifetime)
506efa2cea0SDaniel Lezcano 		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
50770ceb4f5SYOSHIFUJI Hideaki 					pref);
50870ceb4f5SYOSHIFUJI Hideaki 	else if (rt)
50970ceb4f5SYOSHIFUJI Hideaki 		rt->rt6i_flags = RTF_ROUTEINFO |
51070ceb4f5SYOSHIFUJI Hideaki 				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
51170ceb4f5SYOSHIFUJI Hideaki 
51270ceb4f5SYOSHIFUJI Hideaki 	if (rt) {
5134bed72e4SYOSHIFUJI Hideaki 		if (!addrconf_finite_timeout(lifetime)) {
51470ceb4f5SYOSHIFUJI Hideaki 			rt->rt6i_flags &= ~RTF_EXPIRES;
51570ceb4f5SYOSHIFUJI Hideaki 		} else {
51670ceb4f5SYOSHIFUJI Hideaki 			rt->rt6i_expires = jiffies + HZ * lifetime;
51770ceb4f5SYOSHIFUJI Hideaki 			rt->rt6i_flags |= RTF_EXPIRES;
51870ceb4f5SYOSHIFUJI Hideaki 		}
51970ceb4f5SYOSHIFUJI Hideaki 		dst_release(&rt->u.dst);
52070ceb4f5SYOSHIFUJI Hideaki 	}
52170ceb4f5SYOSHIFUJI Hideaki 	return 0;
52270ceb4f5SYOSHIFUJI Hideaki }
52370ceb4f5SYOSHIFUJI Hideaki #endif
52470ceb4f5SYOSHIFUJI Hideaki 
5258ed67789SDaniel Lezcano #define BACKTRACK(__net, saddr)			\
526982f56f3SYOSHIFUJI Hideaki do { \
5278ed67789SDaniel Lezcano 	if (rt == __net->ipv6.ip6_null_entry) {	\
528982f56f3SYOSHIFUJI Hideaki 		struct fib6_node *pn; \
529e0eda7bbSVille Nuorvala 		while (1) { \
530982f56f3SYOSHIFUJI Hideaki 			if (fn->fn_flags & RTN_TL_ROOT) \
531c71099acSThomas Graf 				goto out; \
532982f56f3SYOSHIFUJI Hideaki 			pn = fn->parent; \
533982f56f3SYOSHIFUJI Hideaki 			if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
5348bce65b9SKim Nordlund 				fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
535982f56f3SYOSHIFUJI Hideaki 			else \
536982f56f3SYOSHIFUJI Hideaki 				fn = pn; \
537c71099acSThomas Graf 			if (fn->fn_flags & RTN_RTINFO) \
538c71099acSThomas Graf 				goto restart; \
539c71099acSThomas Graf 		} \
540982f56f3SYOSHIFUJI Hideaki 	} \
541982f56f3SYOSHIFUJI Hideaki } while(0)
542c71099acSThomas Graf 
5438ed67789SDaniel Lezcano static struct rt6_info *ip6_pol_route_lookup(struct net *net,
5448ed67789SDaniel Lezcano 					     struct fib6_table *table,
545c71099acSThomas Graf 					     struct flowi *fl, int flags)
5461da177e4SLinus Torvalds {
5471da177e4SLinus Torvalds 	struct fib6_node *fn;
5481da177e4SLinus Torvalds 	struct rt6_info *rt;
5491da177e4SLinus Torvalds 
550c71099acSThomas Graf 	read_lock_bh(&table->tb6_lock);
551c71099acSThomas Graf 	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
552c71099acSThomas Graf restart:
553c71099acSThomas Graf 	rt = fn->leaf;
554dd3abc4eSYOSHIFUJI Hideaki 	rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags);
5558ed67789SDaniel Lezcano 	BACKTRACK(net, &fl->fl6_src);
556c71099acSThomas Graf out:
55703f49f34SPavel Emelyanov 	dst_use(&rt->u.dst, jiffies);
558c71099acSThomas Graf 	read_unlock_bh(&table->tb6_lock);
5591da177e4SLinus Torvalds 	return rt;
560c71099acSThomas Graf 
561c71099acSThomas Graf }
562c71099acSThomas Graf 
5639acd9f3aSYOSHIFUJI Hideaki struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
5649acd9f3aSYOSHIFUJI Hideaki 			    const struct in6_addr *saddr, int oif, int strict)
565c71099acSThomas Graf {
566c71099acSThomas Graf 	struct flowi fl = {
567c71099acSThomas Graf 		.oif = oif,
568c71099acSThomas Graf 		.nl_u = {
569c71099acSThomas Graf 			.ip6_u = {
570c71099acSThomas Graf 				.daddr = *daddr,
571c71099acSThomas Graf 			},
572c71099acSThomas Graf 		},
573c71099acSThomas Graf 	};
574c71099acSThomas Graf 	struct dst_entry *dst;
57577d16f45SYOSHIFUJI Hideaki 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
576c71099acSThomas Graf 
577adaa70bbSThomas Graf 	if (saddr) {
578adaa70bbSThomas Graf 		memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
579adaa70bbSThomas Graf 		flags |= RT6_LOOKUP_F_HAS_SADDR;
580adaa70bbSThomas Graf 	}
581adaa70bbSThomas Graf 
582606a2b48SDaniel Lezcano 	dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
583c71099acSThomas Graf 	if (dst->error == 0)
584c71099acSThomas Graf 		return (struct rt6_info *) dst;
585c71099acSThomas Graf 
586c71099acSThomas Graf 	dst_release(dst);
587c71099acSThomas Graf 
5881da177e4SLinus Torvalds 	return NULL;
5891da177e4SLinus Torvalds }
5901da177e4SLinus Torvalds 
5917159039aSYOSHIFUJI Hideaki EXPORT_SYMBOL(rt6_lookup);
5927159039aSYOSHIFUJI Hideaki 
593c71099acSThomas Graf /* ip6_ins_rt is called with FREE table->tb6_lock.
5941da177e4SLinus Torvalds    It takes new route entry, the addition fails by any reason the
5951da177e4SLinus Torvalds    route is freed. In any case, if caller does not hold it, it may
5961da177e4SLinus Torvalds    be destroyed.
5971da177e4SLinus Torvalds  */
5981da177e4SLinus Torvalds 
59986872cb5SThomas Graf static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
6001da177e4SLinus Torvalds {
6011da177e4SLinus Torvalds 	int err;
602c71099acSThomas Graf 	struct fib6_table *table;
6031da177e4SLinus Torvalds 
604c71099acSThomas Graf 	table = rt->rt6i_table;
605c71099acSThomas Graf 	write_lock_bh(&table->tb6_lock);
60686872cb5SThomas Graf 	err = fib6_add(&table->tb6_root, rt, info);
607c71099acSThomas Graf 	write_unlock_bh(&table->tb6_lock);
6081da177e4SLinus Torvalds 
6091da177e4SLinus Torvalds 	return err;
6101da177e4SLinus Torvalds }
6111da177e4SLinus Torvalds 
61240e22e8fSThomas Graf int ip6_ins_rt(struct rt6_info *rt)
61340e22e8fSThomas Graf {
6144d1169c1SDenis V. Lunev 	struct nl_info info = {
615c346dca1SYOSHIFUJI Hideaki 		.nl_net = dev_net(rt->rt6i_dev),
6164d1169c1SDenis V. Lunev 	};
617528c4cebSDenis V. Lunev 	return __ip6_ins_rt(rt, &info);
61840e22e8fSThomas Graf }
61940e22e8fSThomas Graf 
62095a9a5baSYOSHIFUJI Hideaki static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
62195a9a5baSYOSHIFUJI Hideaki 				      struct in6_addr *saddr)
6221da177e4SLinus Torvalds {
6231da177e4SLinus Torvalds 	struct rt6_info *rt;
6241da177e4SLinus Torvalds 
6251da177e4SLinus Torvalds 	/*
6261da177e4SLinus Torvalds 	 *	Clone the route.
6271da177e4SLinus Torvalds 	 */
6281da177e4SLinus Torvalds 
6291da177e4SLinus Torvalds 	rt = ip6_rt_copy(ort);
6301da177e4SLinus Torvalds 
6311da177e4SLinus Torvalds 	if (rt) {
63258c4fb86SYOSHIFUJI Hideaki 		if (!(rt->rt6i_flags&RTF_GATEWAY)) {
63358c4fb86SYOSHIFUJI Hideaki 			if (rt->rt6i_dst.plen != 128 &&
63458c4fb86SYOSHIFUJI Hideaki 			    ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
63558c4fb86SYOSHIFUJI Hideaki 				rt->rt6i_flags |= RTF_ANYCAST;
6361da177e4SLinus Torvalds 			ipv6_addr_copy(&rt->rt6i_gateway, daddr);
63758c4fb86SYOSHIFUJI Hideaki 		}
6381da177e4SLinus Torvalds 
63958c4fb86SYOSHIFUJI Hideaki 		ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
6401da177e4SLinus Torvalds 		rt->rt6i_dst.plen = 128;
6411da177e4SLinus Torvalds 		rt->rt6i_flags |= RTF_CACHE;
6421da177e4SLinus Torvalds 		rt->u.dst.flags |= DST_HOST;
6431da177e4SLinus Torvalds 
6441da177e4SLinus Torvalds #ifdef CONFIG_IPV6_SUBTREES
6451da177e4SLinus Torvalds 		if (rt->rt6i_src.plen && saddr) {
6461da177e4SLinus Torvalds 			ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
6471da177e4SLinus Torvalds 			rt->rt6i_src.plen = 128;
6481da177e4SLinus Torvalds 		}
6491da177e4SLinus Torvalds #endif
6501da177e4SLinus Torvalds 
6511da177e4SLinus Torvalds 		rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
6521da177e4SLinus Torvalds 
65395a9a5baSYOSHIFUJI Hideaki 	}
6541da177e4SLinus Torvalds 
6551da177e4SLinus Torvalds 	return rt;
6561da177e4SLinus Torvalds }
65795a9a5baSYOSHIFUJI Hideaki 
658299d9939SYOSHIFUJI Hideaki static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
659299d9939SYOSHIFUJI Hideaki {
660299d9939SYOSHIFUJI Hideaki 	struct rt6_info *rt = ip6_rt_copy(ort);
661299d9939SYOSHIFUJI Hideaki 	if (rt) {
662299d9939SYOSHIFUJI Hideaki 		ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
663299d9939SYOSHIFUJI Hideaki 		rt->rt6i_dst.plen = 128;
664299d9939SYOSHIFUJI Hideaki 		rt->rt6i_flags |= RTF_CACHE;
665299d9939SYOSHIFUJI Hideaki 		rt->u.dst.flags |= DST_HOST;
666299d9939SYOSHIFUJI Hideaki 		rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
667299d9939SYOSHIFUJI Hideaki 	}
668299d9939SYOSHIFUJI Hideaki 	return rt;
669299d9939SYOSHIFUJI Hideaki }
670299d9939SYOSHIFUJI Hideaki 
6718ed67789SDaniel Lezcano static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
6728ce11e6aSAdrian Bunk 				      struct flowi *fl, int flags)
6731da177e4SLinus Torvalds {
6741da177e4SLinus Torvalds 	struct fib6_node *fn;
675519fbd87SYOSHIFUJI Hideaki 	struct rt6_info *rt, *nrt;
676c71099acSThomas Graf 	int strict = 0;
6771da177e4SLinus Torvalds 	int attempts = 3;
678519fbd87SYOSHIFUJI Hideaki 	int err;
67953b7997fSYOSHIFUJI Hideaki 	int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
6801da177e4SLinus Torvalds 
68177d16f45SYOSHIFUJI Hideaki 	strict |= flags & RT6_LOOKUP_F_IFACE;
6821da177e4SLinus Torvalds 
6831da177e4SLinus Torvalds relookup:
684c71099acSThomas Graf 	read_lock_bh(&table->tb6_lock);
6851da177e4SLinus Torvalds 
6868238dd06SYOSHIFUJI Hideaki restart_2:
687c71099acSThomas Graf 	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
6881da177e4SLinus Torvalds 
6891da177e4SLinus Torvalds restart:
6904acad72dSPavel Emelyanov 	rt = rt6_select(fn, oif, strict | reachable);
6918ed67789SDaniel Lezcano 
6928ed67789SDaniel Lezcano 	BACKTRACK(net, &fl->fl6_src);
6938ed67789SDaniel Lezcano 	if (rt == net->ipv6.ip6_null_entry ||
6948238dd06SYOSHIFUJI Hideaki 	    rt->rt6i_flags & RTF_CACHE)
6951da177e4SLinus Torvalds 		goto out;
6961da177e4SLinus Torvalds 
6971da177e4SLinus Torvalds 	dst_hold(&rt->u.dst);
698c71099acSThomas Graf 	read_unlock_bh(&table->tb6_lock);
6991da177e4SLinus Torvalds 
700519fbd87SYOSHIFUJI Hideaki 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
701e40cf353SYOSHIFUJI Hideaki 		nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
702519fbd87SYOSHIFUJI Hideaki 	else {
703519fbd87SYOSHIFUJI Hideaki #if CLONE_OFFLINK_ROUTE
704519fbd87SYOSHIFUJI Hideaki 		nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
705519fbd87SYOSHIFUJI Hideaki #else
706519fbd87SYOSHIFUJI Hideaki 		goto out2;
707519fbd87SYOSHIFUJI Hideaki #endif
708519fbd87SYOSHIFUJI Hideaki 	}
7091da177e4SLinus Torvalds 
7101da177e4SLinus Torvalds 	dst_release(&rt->u.dst);
7118ed67789SDaniel Lezcano 	rt = nrt ? : net->ipv6.ip6_null_entry;
7121da177e4SLinus Torvalds 
713e40cf353SYOSHIFUJI Hideaki 	dst_hold(&rt->u.dst);
714e40cf353SYOSHIFUJI Hideaki 	if (nrt) {
71540e22e8fSThomas Graf 		err = ip6_ins_rt(nrt);
716e40cf353SYOSHIFUJI Hideaki 		if (!err)
717e40cf353SYOSHIFUJI Hideaki 			goto out2;
718e40cf353SYOSHIFUJI Hideaki 	}
719e40cf353SYOSHIFUJI Hideaki 
720e40cf353SYOSHIFUJI Hideaki 	if (--attempts <= 0)
7211da177e4SLinus Torvalds 		goto out2;
7221da177e4SLinus Torvalds 
723519fbd87SYOSHIFUJI Hideaki 	/*
724c71099acSThomas Graf 	 * Race condition! In the gap, when table->tb6_lock was
725519fbd87SYOSHIFUJI Hideaki 	 * released someone could insert this route.  Relookup.
7261da177e4SLinus Torvalds 	 */
7271da177e4SLinus Torvalds 	dst_release(&rt->u.dst);
7281da177e4SLinus Torvalds 	goto relookup;
729e40cf353SYOSHIFUJI Hideaki 
730519fbd87SYOSHIFUJI Hideaki out:
7318238dd06SYOSHIFUJI Hideaki 	if (reachable) {
7328238dd06SYOSHIFUJI Hideaki 		reachable = 0;
7338238dd06SYOSHIFUJI Hideaki 		goto restart_2;
7348238dd06SYOSHIFUJI Hideaki 	}
735519fbd87SYOSHIFUJI Hideaki 	dst_hold(&rt->u.dst);
736c71099acSThomas Graf 	read_unlock_bh(&table->tb6_lock);
7371da177e4SLinus Torvalds out2:
7381da177e4SLinus Torvalds 	rt->u.dst.lastuse = jiffies;
7391da177e4SLinus Torvalds 	rt->u.dst.__use++;
740c71099acSThomas Graf 
741c71099acSThomas Graf 	return rt;
742c71099acSThomas Graf }
743c71099acSThomas Graf 
7448ed67789SDaniel Lezcano static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
7454acad72dSPavel Emelyanov 					    struct flowi *fl, int flags)
7464acad72dSPavel Emelyanov {
7478ed67789SDaniel Lezcano 	return ip6_pol_route(net, table, fl->iif, fl, flags);
7484acad72dSPavel Emelyanov }
7494acad72dSPavel Emelyanov 
750c71099acSThomas Graf void ip6_route_input(struct sk_buff *skb)
751c71099acSThomas Graf {
7520660e03fSArnaldo Carvalho de Melo 	struct ipv6hdr *iph = ipv6_hdr(skb);
753c346dca1SYOSHIFUJI Hideaki 	struct net *net = dev_net(skb->dev);
754adaa70bbSThomas Graf 	int flags = RT6_LOOKUP_F_HAS_SADDR;
755c71099acSThomas Graf 	struct flowi fl = {
756c71099acSThomas Graf 		.iif = skb->dev->ifindex,
757c71099acSThomas Graf 		.nl_u = {
758c71099acSThomas Graf 			.ip6_u = {
759c71099acSThomas Graf 				.daddr = iph->daddr,
760c71099acSThomas Graf 				.saddr = iph->saddr,
76190bcaf7bSAl Viro 				.flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
762c71099acSThomas Graf 			},
763c71099acSThomas Graf 		},
76447dcf0cbSThomas Graf 		.mark = skb->mark,
765c71099acSThomas Graf 		.proto = iph->nexthdr,
766c71099acSThomas Graf 	};
767adaa70bbSThomas Graf 
768adaa70bbSThomas Graf 	if (rt6_need_strict(&iph->daddr))
769adaa70bbSThomas Graf 		flags |= RT6_LOOKUP_F_IFACE;
770c71099acSThomas Graf 
7715578689aSDaniel Lezcano 	skb->dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input);
772c71099acSThomas Graf }
773c71099acSThomas Graf 
7748ed67789SDaniel Lezcano static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
775c71099acSThomas Graf 					     struct flowi *fl, int flags)
776c71099acSThomas Graf {
7778ed67789SDaniel Lezcano 	return ip6_pol_route(net, table, fl->oif, fl, flags);
778c71099acSThomas Graf }
779c71099acSThomas Graf 
7804591db4fSDaniel Lezcano struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
7814591db4fSDaniel Lezcano 				    struct flowi *fl)
782c71099acSThomas Graf {
783c71099acSThomas Graf 	int flags = 0;
784c71099acSThomas Graf 
785c71099acSThomas Graf 	if (rt6_need_strict(&fl->fl6_dst))
78677d16f45SYOSHIFUJI Hideaki 		flags |= RT6_LOOKUP_F_IFACE;
787c71099acSThomas Graf 
788adaa70bbSThomas Graf 	if (!ipv6_addr_any(&fl->fl6_src))
789adaa70bbSThomas Graf 		flags |= RT6_LOOKUP_F_HAS_SADDR;
7907cbca67cSYOSHIFUJI Hideaki 	else if (sk) {
7917cbca67cSYOSHIFUJI Hideaki 		unsigned int prefs = inet6_sk(sk)->srcprefs;
7927cbca67cSYOSHIFUJI Hideaki 		if (prefs & IPV6_PREFER_SRC_TMP)
7937cbca67cSYOSHIFUJI Hideaki 			flags |= RT6_LOOKUP_F_SRCPREF_TMP;
7947cbca67cSYOSHIFUJI Hideaki 		if (prefs & IPV6_PREFER_SRC_PUBLIC)
7957cbca67cSYOSHIFUJI Hideaki 			flags |= RT6_LOOKUP_F_SRCPREF_PUBLIC;
7967cbca67cSYOSHIFUJI Hideaki 		if (prefs & IPV6_PREFER_SRC_COA)
7977cbca67cSYOSHIFUJI Hideaki 			flags |= RT6_LOOKUP_F_SRCPREF_COA;
7987cbca67cSYOSHIFUJI Hideaki 	}
799adaa70bbSThomas Graf 
8004591db4fSDaniel Lezcano 	return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output);
8011da177e4SLinus Torvalds }
8021da177e4SLinus Torvalds 
8037159039aSYOSHIFUJI Hideaki EXPORT_SYMBOL(ip6_route_output);
8041da177e4SLinus Torvalds 
80514e50e57SDavid S. Miller int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
80614e50e57SDavid S. Miller {
80714e50e57SDavid S. Miller 	struct rt6_info *ort = (struct rt6_info *) *dstp;
80814e50e57SDavid S. Miller 	struct rt6_info *rt = (struct rt6_info *)
80914e50e57SDavid S. Miller 		dst_alloc(&ip6_dst_blackhole_ops);
81014e50e57SDavid S. Miller 	struct dst_entry *new = NULL;
81114e50e57SDavid S. Miller 
81214e50e57SDavid S. Miller 	if (rt) {
81314e50e57SDavid S. Miller 		new = &rt->u.dst;
81414e50e57SDavid S. Miller 
81514e50e57SDavid S. Miller 		atomic_set(&new->__refcnt, 1);
81614e50e57SDavid S. Miller 		new->__use = 1;
817352e512cSHerbert Xu 		new->input = dst_discard;
818352e512cSHerbert Xu 		new->output = dst_discard;
81914e50e57SDavid S. Miller 
82014e50e57SDavid S. Miller 		memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
82114e50e57SDavid S. Miller 		new->dev = ort->u.dst.dev;
82214e50e57SDavid S. Miller 		if (new->dev)
82314e50e57SDavid S. Miller 			dev_hold(new->dev);
82414e50e57SDavid S. Miller 		rt->rt6i_idev = ort->rt6i_idev;
82514e50e57SDavid S. Miller 		if (rt->rt6i_idev)
82614e50e57SDavid S. Miller 			in6_dev_hold(rt->rt6i_idev);
82714e50e57SDavid S. Miller 		rt->rt6i_expires = 0;
82814e50e57SDavid S. Miller 
82914e50e57SDavid S. Miller 		ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
83014e50e57SDavid S. Miller 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
83114e50e57SDavid S. Miller 		rt->rt6i_metric = 0;
83214e50e57SDavid S. Miller 
83314e50e57SDavid S. Miller 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
83414e50e57SDavid S. Miller #ifdef CONFIG_IPV6_SUBTREES
83514e50e57SDavid S. Miller 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
83614e50e57SDavid S. Miller #endif
83714e50e57SDavid S. Miller 
83814e50e57SDavid S. Miller 		dst_free(new);
83914e50e57SDavid S. Miller 	}
84014e50e57SDavid S. Miller 
84114e50e57SDavid S. Miller 	dst_release(*dstp);
84214e50e57SDavid S. Miller 	*dstp = new;
84314e50e57SDavid S. Miller 	return (new ? 0 : -ENOMEM);
84414e50e57SDavid S. Miller }
84514e50e57SDavid S. Miller EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
84614e50e57SDavid S. Miller 
8471da177e4SLinus Torvalds /*
8481da177e4SLinus Torvalds  *	Destination cache support functions
8491da177e4SLinus Torvalds  */
8501da177e4SLinus Torvalds 
8511da177e4SLinus Torvalds static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
8521da177e4SLinus Torvalds {
8531da177e4SLinus Torvalds 	struct rt6_info *rt;
8541da177e4SLinus Torvalds 
8551da177e4SLinus Torvalds 	rt = (struct rt6_info *) dst;
8561da177e4SLinus Torvalds 
8571da177e4SLinus Torvalds 	if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
8581da177e4SLinus Torvalds 		return dst;
8591da177e4SLinus Torvalds 
8601da177e4SLinus Torvalds 	return NULL;
8611da177e4SLinus Torvalds }
8621da177e4SLinus Torvalds 
8631da177e4SLinus Torvalds static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
8641da177e4SLinus Torvalds {
8651da177e4SLinus Torvalds 	struct rt6_info *rt = (struct rt6_info *) dst;
8661da177e4SLinus Torvalds 
8671da177e4SLinus Torvalds 	if (rt) {
8681da177e4SLinus Torvalds 		if (rt->rt6i_flags & RTF_CACHE)
869e0a1ad73SThomas Graf 			ip6_del_rt(rt);
8701da177e4SLinus Torvalds 		else
8711da177e4SLinus Torvalds 			dst_release(dst);
8721da177e4SLinus Torvalds 	}
8731da177e4SLinus Torvalds 	return NULL;
8741da177e4SLinus Torvalds }
8751da177e4SLinus Torvalds 
8761da177e4SLinus Torvalds static void ip6_link_failure(struct sk_buff *skb)
8771da177e4SLinus Torvalds {
8781da177e4SLinus Torvalds 	struct rt6_info *rt;
8791da177e4SLinus Torvalds 
8801da177e4SLinus Torvalds 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
8811da177e4SLinus Torvalds 
8821da177e4SLinus Torvalds 	rt = (struct rt6_info *) skb->dst;
8831da177e4SLinus Torvalds 	if (rt) {
8841da177e4SLinus Torvalds 		if (rt->rt6i_flags&RTF_CACHE) {
8851da177e4SLinus Torvalds 			dst_set_expires(&rt->u.dst, 0);
8861da177e4SLinus Torvalds 			rt->rt6i_flags |= RTF_EXPIRES;
8871da177e4SLinus Torvalds 		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
8881da177e4SLinus Torvalds 			rt->rt6i_node->fn_sernum = -1;
8891da177e4SLinus Torvalds 	}
8901da177e4SLinus Torvalds }
8911da177e4SLinus Torvalds 
8921da177e4SLinus Torvalds static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
8931da177e4SLinus Torvalds {
8941da177e4SLinus Torvalds 	struct rt6_info *rt6 = (struct rt6_info*)dst;
8951da177e4SLinus Torvalds 
8961da177e4SLinus Torvalds 	if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
8971da177e4SLinus Torvalds 		rt6->rt6i_flags |= RTF_MODIFIED;
8981da177e4SLinus Torvalds 		if (mtu < IPV6_MIN_MTU) {
8991da177e4SLinus Torvalds 			mtu = IPV6_MIN_MTU;
9001da177e4SLinus Torvalds 			dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
9011da177e4SLinus Torvalds 		}
9021da177e4SLinus Torvalds 		dst->metrics[RTAX_MTU-1] = mtu;
9038d71740cSTom Tucker 		call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
9041da177e4SLinus Torvalds 	}
9051da177e4SLinus Torvalds }
9061da177e4SLinus Torvalds 
9071da177e4SLinus Torvalds static int ipv6_get_mtu(struct net_device *dev);
9081da177e4SLinus Torvalds 
9095578689aSDaniel Lezcano static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu)
9101da177e4SLinus Torvalds {
9111da177e4SLinus Torvalds 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
9121da177e4SLinus Torvalds 
9135578689aSDaniel Lezcano 	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
9145578689aSDaniel Lezcano 		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
9151da177e4SLinus Torvalds 
9161da177e4SLinus Torvalds 	/*
9171da177e4SLinus Torvalds 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
9181da177e4SLinus Torvalds 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
9191da177e4SLinus Torvalds 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
9201da177e4SLinus Torvalds 	 * rely only on pmtu discovery"
9211da177e4SLinus Torvalds 	 */
9221da177e4SLinus Torvalds 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
9231da177e4SLinus Torvalds 		mtu = IPV6_MAXPLEN;
9241da177e4SLinus Torvalds 	return mtu;
9251da177e4SLinus Torvalds }
9261da177e4SLinus Torvalds 
9273b00944cSYOSHIFUJI Hideaki static struct dst_entry *icmp6_dst_gc_list;
9283b00944cSYOSHIFUJI Hideaki static DEFINE_SPINLOCK(icmp6_dst_lock);
9295d0bbeebSThomas Graf 
9303b00944cSYOSHIFUJI Hideaki struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
9311da177e4SLinus Torvalds 				  struct neighbour *neigh,
9329acd9f3aSYOSHIFUJI Hideaki 				  const struct in6_addr *addr)
9331da177e4SLinus Torvalds {
9341da177e4SLinus Torvalds 	struct rt6_info *rt;
9351da177e4SLinus Torvalds 	struct inet6_dev *idev = in6_dev_get(dev);
936c346dca1SYOSHIFUJI Hideaki 	struct net *net = dev_net(dev);
9371da177e4SLinus Torvalds 
9381da177e4SLinus Torvalds 	if (unlikely(idev == NULL))
9391da177e4SLinus Torvalds 		return NULL;
9401da177e4SLinus Torvalds 
941f2fc6a54SBenjamin Thery 	rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops);
9421da177e4SLinus Torvalds 	if (unlikely(rt == NULL)) {
9431da177e4SLinus Torvalds 		in6_dev_put(idev);
9441da177e4SLinus Torvalds 		goto out;
9451da177e4SLinus Torvalds 	}
9461da177e4SLinus Torvalds 
9471da177e4SLinus Torvalds 	dev_hold(dev);
9481da177e4SLinus Torvalds 	if (neigh)
9491da177e4SLinus Torvalds 		neigh_hold(neigh);
9501da177e4SLinus Torvalds 	else
9511da177e4SLinus Torvalds 		neigh = ndisc_get_neigh(dev, addr);
9521da177e4SLinus Torvalds 
9531da177e4SLinus Torvalds 	rt->rt6i_dev	  = dev;
9541da177e4SLinus Torvalds 	rt->rt6i_idev     = idev;
9551da177e4SLinus Torvalds 	rt->rt6i_nexthop  = neigh;
9561da177e4SLinus Torvalds 	atomic_set(&rt->u.dst.__refcnt, 1);
9571da177e4SLinus Torvalds 	rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
9581da177e4SLinus Torvalds 	rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
9595578689aSDaniel Lezcano 	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
9603b00944cSYOSHIFUJI Hideaki 	rt->u.dst.output  = ip6_output;
9611da177e4SLinus Torvalds 
9621da177e4SLinus Torvalds #if 0	/* there's no chance to use these for ndisc */
9631da177e4SLinus Torvalds 	rt->u.dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
9641da177e4SLinus Torvalds 				? DST_HOST
9651da177e4SLinus Torvalds 				: 0;
9661da177e4SLinus Torvalds 	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
9671da177e4SLinus Torvalds 	rt->rt6i_dst.plen = 128;
9681da177e4SLinus Torvalds #endif
9691da177e4SLinus Torvalds 
9703b00944cSYOSHIFUJI Hideaki 	spin_lock_bh(&icmp6_dst_lock);
9713b00944cSYOSHIFUJI Hideaki 	rt->u.dst.next = icmp6_dst_gc_list;
9723b00944cSYOSHIFUJI Hideaki 	icmp6_dst_gc_list = &rt->u.dst;
9733b00944cSYOSHIFUJI Hideaki 	spin_unlock_bh(&icmp6_dst_lock);
9741da177e4SLinus Torvalds 
9755578689aSDaniel Lezcano 	fib6_force_start_gc(net);
9761da177e4SLinus Torvalds 
9771da177e4SLinus Torvalds out:
97840aa7b90SYOSHIFUJI Hideaki 	return &rt->u.dst;
9791da177e4SLinus Torvalds }
9801da177e4SLinus Torvalds 
9813d0f24a7SStephen Hemminger int icmp6_dst_gc(void)
9821da177e4SLinus Torvalds {
9831da177e4SLinus Torvalds 	struct dst_entry *dst, *next, **pprev;
9843d0f24a7SStephen Hemminger 	int more = 0;
9851da177e4SLinus Torvalds 
9861da177e4SLinus Torvalds 	next = NULL;
9875d0bbeebSThomas Graf 
9883b00944cSYOSHIFUJI Hideaki 	spin_lock_bh(&icmp6_dst_lock);
9893b00944cSYOSHIFUJI Hideaki 	pprev = &icmp6_dst_gc_list;
9905d0bbeebSThomas Graf 
9911da177e4SLinus Torvalds 	while ((dst = *pprev) != NULL) {
9921da177e4SLinus Torvalds 		if (!atomic_read(&dst->__refcnt)) {
9931da177e4SLinus Torvalds 			*pprev = dst->next;
9941da177e4SLinus Torvalds 			dst_free(dst);
9951da177e4SLinus Torvalds 		} else {
9961da177e4SLinus Torvalds 			pprev = &dst->next;
9973d0f24a7SStephen Hemminger 			++more;
9981da177e4SLinus Torvalds 		}
9991da177e4SLinus Torvalds 	}
10001da177e4SLinus Torvalds 
10013b00944cSYOSHIFUJI Hideaki 	spin_unlock_bh(&icmp6_dst_lock);
10025d0bbeebSThomas Graf 
10033d0f24a7SStephen Hemminger 	return more;
10041da177e4SLinus Torvalds }
10051da177e4SLinus Torvalds 
10061e493d19SDavid S. Miller static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
10071e493d19SDavid S. Miller 			    void *arg)
10081e493d19SDavid S. Miller {
10091e493d19SDavid S. Miller 	struct dst_entry *dst, **pprev;
10101e493d19SDavid S. Miller 
10111e493d19SDavid S. Miller 	spin_lock_bh(&icmp6_dst_lock);
10121e493d19SDavid S. Miller 	pprev = &icmp6_dst_gc_list;
10131e493d19SDavid S. Miller 	while ((dst = *pprev) != NULL) {
10141e493d19SDavid S. Miller 		struct rt6_info *rt = (struct rt6_info *) dst;
10151e493d19SDavid S. Miller 		if (func(rt, arg)) {
10161e493d19SDavid S. Miller 			*pprev = dst->next;
10171e493d19SDavid S. Miller 			dst_free(dst);
10181e493d19SDavid S. Miller 		} else {
10191e493d19SDavid S. Miller 			pprev = &dst->next;
10201e493d19SDavid S. Miller 		}
10211e493d19SDavid S. Miller 	}
10221e493d19SDavid S. Miller 	spin_unlock_bh(&icmp6_dst_lock);
10231e493d19SDavid S. Miller }
10241e493d19SDavid S. Miller 
1025569d3645SDaniel Lezcano static int ip6_dst_gc(struct dst_ops *ops)
10261da177e4SLinus Torvalds {
10271da177e4SLinus Torvalds 	unsigned long now = jiffies;
10287019b78eSDaniel Lezcano 	struct net *net = ops->dst_net;
10297019b78eSDaniel Lezcano 	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
10307019b78eSDaniel Lezcano 	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
10317019b78eSDaniel Lezcano 	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
10327019b78eSDaniel Lezcano 	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
10337019b78eSDaniel Lezcano 	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
10341da177e4SLinus Torvalds 
10357019b78eSDaniel Lezcano 	if (time_after(rt_last_gc + rt_min_interval, now) &&
10367019b78eSDaniel Lezcano 	    atomic_read(&ops->entries) <= rt_max_size)
10371da177e4SLinus Torvalds 		goto out;
10381da177e4SLinus Torvalds 
10396891a346SBenjamin Thery 	net->ipv6.ip6_rt_gc_expire++;
10406891a346SBenjamin Thery 	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
10416891a346SBenjamin Thery 	net->ipv6.ip6_rt_last_gc = now;
10427019b78eSDaniel Lezcano 	if (atomic_read(&ops->entries) < ops->gc_thresh)
10437019b78eSDaniel Lezcano 		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
10441da177e4SLinus Torvalds out:
10457019b78eSDaniel Lezcano 	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
10467019b78eSDaniel Lezcano 	return (atomic_read(&ops->entries) > rt_max_size);
10471da177e4SLinus Torvalds }
10481da177e4SLinus Torvalds 
10491da177e4SLinus Torvalds /* Clean host part of a prefix. Not necessary in radix tree,
10501da177e4SLinus Torvalds    but results in cleaner routing tables.
10511da177e4SLinus Torvalds 
10521da177e4SLinus Torvalds    Remove it only when all the things will work!
10531da177e4SLinus Torvalds  */
10541da177e4SLinus Torvalds 
10551da177e4SLinus Torvalds static int ipv6_get_mtu(struct net_device *dev)
10561da177e4SLinus Torvalds {
10571da177e4SLinus Torvalds 	int mtu = IPV6_MIN_MTU;
10581da177e4SLinus Torvalds 	struct inet6_dev *idev;
10591da177e4SLinus Torvalds 
10601da177e4SLinus Torvalds 	idev = in6_dev_get(dev);
10611da177e4SLinus Torvalds 	if (idev) {
10621da177e4SLinus Torvalds 		mtu = idev->cnf.mtu6;
10631da177e4SLinus Torvalds 		in6_dev_put(idev);
10641da177e4SLinus Torvalds 	}
10651da177e4SLinus Torvalds 	return mtu;
10661da177e4SLinus Torvalds }
10671da177e4SLinus Torvalds 
10686b75d090SYOSHIFUJI Hideaki int ip6_dst_hoplimit(struct dst_entry *dst)
10691da177e4SLinus Torvalds {
10706b75d090SYOSHIFUJI Hideaki 	int hoplimit = dst_metric(dst, RTAX_HOPLIMIT);
10716b75d090SYOSHIFUJI Hideaki 	if (hoplimit < 0) {
10726b75d090SYOSHIFUJI Hideaki 		struct net_device *dev = dst->dev;
10736b75d090SYOSHIFUJI Hideaki 		struct inet6_dev *idev = in6_dev_get(dev);
10741da177e4SLinus Torvalds 		if (idev) {
10751da177e4SLinus Torvalds 			hoplimit = idev->cnf.hop_limit;
10761da177e4SLinus Torvalds 			in6_dev_put(idev);
10776b75d090SYOSHIFUJI Hideaki 		} else
107853b7997fSYOSHIFUJI Hideaki 			hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
10791da177e4SLinus Torvalds 	}
10801da177e4SLinus Torvalds 	return hoplimit;
10811da177e4SLinus Torvalds }
10821da177e4SLinus Torvalds 
10831da177e4SLinus Torvalds /*
10841da177e4SLinus Torvalds  *
10851da177e4SLinus Torvalds  */
10861da177e4SLinus Torvalds 
108786872cb5SThomas Graf int ip6_route_add(struct fib6_config *cfg)
10881da177e4SLinus Torvalds {
10891da177e4SLinus Torvalds 	int err;
10905578689aSDaniel Lezcano 	struct net *net = cfg->fc_nlinfo.nl_net;
10911da177e4SLinus Torvalds 	struct rt6_info *rt = NULL;
10921da177e4SLinus Torvalds 	struct net_device *dev = NULL;
10931da177e4SLinus Torvalds 	struct inet6_dev *idev = NULL;
1094c71099acSThomas Graf 	struct fib6_table *table;
10951da177e4SLinus Torvalds 	int addr_type;
10961da177e4SLinus Torvalds 
109786872cb5SThomas Graf 	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
10981da177e4SLinus Torvalds 		return -EINVAL;
10991da177e4SLinus Torvalds #ifndef CONFIG_IPV6_SUBTREES
110086872cb5SThomas Graf 	if (cfg->fc_src_len)
11011da177e4SLinus Torvalds 		return -EINVAL;
11021da177e4SLinus Torvalds #endif
110386872cb5SThomas Graf 	if (cfg->fc_ifindex) {
11041da177e4SLinus Torvalds 		err = -ENODEV;
11055578689aSDaniel Lezcano 		dev = dev_get_by_index(net, cfg->fc_ifindex);
11061da177e4SLinus Torvalds 		if (!dev)
11071da177e4SLinus Torvalds 			goto out;
11081da177e4SLinus Torvalds 		idev = in6_dev_get(dev);
11091da177e4SLinus Torvalds 		if (!idev)
11101da177e4SLinus Torvalds 			goto out;
11111da177e4SLinus Torvalds 	}
11121da177e4SLinus Torvalds 
111386872cb5SThomas Graf 	if (cfg->fc_metric == 0)
111486872cb5SThomas Graf 		cfg->fc_metric = IP6_RT_PRIO_USER;
11151da177e4SLinus Torvalds 
11165578689aSDaniel Lezcano 	table = fib6_new_table(net, cfg->fc_table);
1117c71099acSThomas Graf 	if (table == NULL) {
1118c71099acSThomas Graf 		err = -ENOBUFS;
1119c71099acSThomas Graf 		goto out;
1120c71099acSThomas Graf 	}
1121c71099acSThomas Graf 
1122f2fc6a54SBenjamin Thery 	rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops);
11231da177e4SLinus Torvalds 
11241da177e4SLinus Torvalds 	if (rt == NULL) {
11251da177e4SLinus Torvalds 		err = -ENOMEM;
11261da177e4SLinus Torvalds 		goto out;
11271da177e4SLinus Torvalds 	}
11281da177e4SLinus Torvalds 
11291da177e4SLinus Torvalds 	rt->u.dst.obsolete = -1;
11306f704992SYOSHIFUJI Hideaki 	rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
11316f704992SYOSHIFUJI Hideaki 				jiffies + clock_t_to_jiffies(cfg->fc_expires) :
11326f704992SYOSHIFUJI Hideaki 				0;
11331da177e4SLinus Torvalds 
113486872cb5SThomas Graf 	if (cfg->fc_protocol == RTPROT_UNSPEC)
113586872cb5SThomas Graf 		cfg->fc_protocol = RTPROT_BOOT;
113686872cb5SThomas Graf 	rt->rt6i_protocol = cfg->fc_protocol;
113786872cb5SThomas Graf 
113886872cb5SThomas Graf 	addr_type = ipv6_addr_type(&cfg->fc_dst);
11391da177e4SLinus Torvalds 
11401da177e4SLinus Torvalds 	if (addr_type & IPV6_ADDR_MULTICAST)
11411da177e4SLinus Torvalds 		rt->u.dst.input = ip6_mc_input;
11421da177e4SLinus Torvalds 	else
11431da177e4SLinus Torvalds 		rt->u.dst.input = ip6_forward;
11441da177e4SLinus Torvalds 
11451da177e4SLinus Torvalds 	rt->u.dst.output = ip6_output;
11461da177e4SLinus Torvalds 
114786872cb5SThomas Graf 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
114886872cb5SThomas Graf 	rt->rt6i_dst.plen = cfg->fc_dst_len;
11491da177e4SLinus Torvalds 	if (rt->rt6i_dst.plen == 128)
11501da177e4SLinus Torvalds 	       rt->u.dst.flags = DST_HOST;
11511da177e4SLinus Torvalds 
11521da177e4SLinus Torvalds #ifdef CONFIG_IPV6_SUBTREES
115386872cb5SThomas Graf 	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
115486872cb5SThomas Graf 	rt->rt6i_src.plen = cfg->fc_src_len;
11551da177e4SLinus Torvalds #endif
11561da177e4SLinus Torvalds 
115786872cb5SThomas Graf 	rt->rt6i_metric = cfg->fc_metric;
11581da177e4SLinus Torvalds 
11591da177e4SLinus Torvalds 	/* We cannot add true routes via loopback here,
11601da177e4SLinus Torvalds 	   they would result in kernel looping; promote them to reject routes
11611da177e4SLinus Torvalds 	 */
116286872cb5SThomas Graf 	if ((cfg->fc_flags & RTF_REJECT) ||
11631da177e4SLinus Torvalds 	    (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
11641da177e4SLinus Torvalds 		/* hold loopback dev/idev if we haven't done so. */
11655578689aSDaniel Lezcano 		if (dev != net->loopback_dev) {
11661da177e4SLinus Torvalds 			if (dev) {
11671da177e4SLinus Torvalds 				dev_put(dev);
11681da177e4SLinus Torvalds 				in6_dev_put(idev);
11691da177e4SLinus Torvalds 			}
11705578689aSDaniel Lezcano 			dev = net->loopback_dev;
11711da177e4SLinus Torvalds 			dev_hold(dev);
11721da177e4SLinus Torvalds 			idev = in6_dev_get(dev);
11731da177e4SLinus Torvalds 			if (!idev) {
11741da177e4SLinus Torvalds 				err = -ENODEV;
11751da177e4SLinus Torvalds 				goto out;
11761da177e4SLinus Torvalds 			}
11771da177e4SLinus Torvalds 		}
11781da177e4SLinus Torvalds 		rt->u.dst.output = ip6_pkt_discard_out;
11791da177e4SLinus Torvalds 		rt->u.dst.input = ip6_pkt_discard;
11801da177e4SLinus Torvalds 		rt->u.dst.error = -ENETUNREACH;
11811da177e4SLinus Torvalds 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
11821da177e4SLinus Torvalds 		goto install_route;
11831da177e4SLinus Torvalds 	}
11841da177e4SLinus Torvalds 
118586872cb5SThomas Graf 	if (cfg->fc_flags & RTF_GATEWAY) {
11861da177e4SLinus Torvalds 		struct in6_addr *gw_addr;
11871da177e4SLinus Torvalds 		int gwa_type;
11881da177e4SLinus Torvalds 
118986872cb5SThomas Graf 		gw_addr = &cfg->fc_gateway;
119086872cb5SThomas Graf 		ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
11911da177e4SLinus Torvalds 		gwa_type = ipv6_addr_type(gw_addr);
11921da177e4SLinus Torvalds 
11931da177e4SLinus Torvalds 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
11941da177e4SLinus Torvalds 			struct rt6_info *grt;
11951da177e4SLinus Torvalds 
11961da177e4SLinus Torvalds 			/* IPv6 strictly inhibits using not link-local
11971da177e4SLinus Torvalds 			   addresses as nexthop address.
11981da177e4SLinus Torvalds 			   Otherwise, router will not able to send redirects.
11991da177e4SLinus Torvalds 			   It is very good, but in some (rare!) circumstances
12001da177e4SLinus Torvalds 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
12011da177e4SLinus Torvalds 			   some exceptions. --ANK
12021da177e4SLinus Torvalds 			 */
12031da177e4SLinus Torvalds 			err = -EINVAL;
12041da177e4SLinus Torvalds 			if (!(gwa_type&IPV6_ADDR_UNICAST))
12051da177e4SLinus Torvalds 				goto out;
12061da177e4SLinus Torvalds 
12075578689aSDaniel Lezcano 			grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
12081da177e4SLinus Torvalds 
12091da177e4SLinus Torvalds 			err = -EHOSTUNREACH;
12101da177e4SLinus Torvalds 			if (grt == NULL)
12111da177e4SLinus Torvalds 				goto out;
12121da177e4SLinus Torvalds 			if (dev) {
12131da177e4SLinus Torvalds 				if (dev != grt->rt6i_dev) {
12141da177e4SLinus Torvalds 					dst_release(&grt->u.dst);
12151da177e4SLinus Torvalds 					goto out;
12161da177e4SLinus Torvalds 				}
12171da177e4SLinus Torvalds 			} else {
12181da177e4SLinus Torvalds 				dev = grt->rt6i_dev;
12191da177e4SLinus Torvalds 				idev = grt->rt6i_idev;
12201da177e4SLinus Torvalds 				dev_hold(dev);
12211da177e4SLinus Torvalds 				in6_dev_hold(grt->rt6i_idev);
12221da177e4SLinus Torvalds 			}
12231da177e4SLinus Torvalds 			if (!(grt->rt6i_flags&RTF_GATEWAY))
12241da177e4SLinus Torvalds 				err = 0;
12251da177e4SLinus Torvalds 			dst_release(&grt->u.dst);
12261da177e4SLinus Torvalds 
12271da177e4SLinus Torvalds 			if (err)
12281da177e4SLinus Torvalds 				goto out;
12291da177e4SLinus Torvalds 		}
12301da177e4SLinus Torvalds 		err = -EINVAL;
12311da177e4SLinus Torvalds 		if (dev == NULL || (dev->flags&IFF_LOOPBACK))
12321da177e4SLinus Torvalds 			goto out;
12331da177e4SLinus Torvalds 	}
12341da177e4SLinus Torvalds 
12351da177e4SLinus Torvalds 	err = -ENODEV;
12361da177e4SLinus Torvalds 	if (dev == NULL)
12371da177e4SLinus Torvalds 		goto out;
12381da177e4SLinus Torvalds 
123986872cb5SThomas Graf 	if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
12401da177e4SLinus Torvalds 		rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
12411da177e4SLinus Torvalds 		if (IS_ERR(rt->rt6i_nexthop)) {
12421da177e4SLinus Torvalds 			err = PTR_ERR(rt->rt6i_nexthop);
12431da177e4SLinus Torvalds 			rt->rt6i_nexthop = NULL;
12441da177e4SLinus Torvalds 			goto out;
12451da177e4SLinus Torvalds 		}
12461da177e4SLinus Torvalds 	}
12471da177e4SLinus Torvalds 
124886872cb5SThomas Graf 	rt->rt6i_flags = cfg->fc_flags;
12491da177e4SLinus Torvalds 
12501da177e4SLinus Torvalds install_route:
125186872cb5SThomas Graf 	if (cfg->fc_mx) {
125286872cb5SThomas Graf 		struct nlattr *nla;
125386872cb5SThomas Graf 		int remaining;
12541da177e4SLinus Torvalds 
125586872cb5SThomas Graf 		nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
12568f4c1f9bSThomas Graf 			int type = nla_type(nla);
125786872cb5SThomas Graf 
125886872cb5SThomas Graf 			if (type) {
125986872cb5SThomas Graf 				if (type > RTAX_MAX) {
12601da177e4SLinus Torvalds 					err = -EINVAL;
12611da177e4SLinus Torvalds 					goto out;
12621da177e4SLinus Torvalds 				}
126386872cb5SThomas Graf 
126486872cb5SThomas Graf 				rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
12651da177e4SLinus Torvalds 			}
12661da177e4SLinus Torvalds 		}
12671da177e4SLinus Torvalds 	}
12681da177e4SLinus Torvalds 
12695ffc02a1SSatoru SATOH 	if (dst_metric(&rt->u.dst, RTAX_HOPLIMIT) == 0)
12701da177e4SLinus Torvalds 		rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
12711ca615fbSRami Rosen 	if (!dst_mtu(&rt->u.dst))
12721da177e4SLinus Torvalds 		rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
12735ffc02a1SSatoru SATOH 	if (!dst_metric(&rt->u.dst, RTAX_ADVMSS))
12745578689aSDaniel Lezcano 		rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
12751da177e4SLinus Torvalds 	rt->u.dst.dev = dev;
12761da177e4SLinus Torvalds 	rt->rt6i_idev = idev;
1277c71099acSThomas Graf 	rt->rt6i_table = table;
127863152fc0SDaniel Lezcano 
1279c346dca1SYOSHIFUJI Hideaki 	cfg->fc_nlinfo.nl_net = dev_net(dev);
128063152fc0SDaniel Lezcano 
128186872cb5SThomas Graf 	return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
12821da177e4SLinus Torvalds 
12831da177e4SLinus Torvalds out:
12841da177e4SLinus Torvalds 	if (dev)
12851da177e4SLinus Torvalds 		dev_put(dev);
12861da177e4SLinus Torvalds 	if (idev)
12871da177e4SLinus Torvalds 		in6_dev_put(idev);
12881da177e4SLinus Torvalds 	if (rt)
128940aa7b90SYOSHIFUJI Hideaki 		dst_free(&rt->u.dst);
12901da177e4SLinus Torvalds 	return err;
12911da177e4SLinus Torvalds }
12921da177e4SLinus Torvalds 
129386872cb5SThomas Graf static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
12941da177e4SLinus Torvalds {
12951da177e4SLinus Torvalds 	int err;
1296c71099acSThomas Graf 	struct fib6_table *table;
1297c346dca1SYOSHIFUJI Hideaki 	struct net *net = dev_net(rt->rt6i_dev);
12981da177e4SLinus Torvalds 
12998ed67789SDaniel Lezcano 	if (rt == net->ipv6.ip6_null_entry)
13006c813a72SPatrick McHardy 		return -ENOENT;
13016c813a72SPatrick McHardy 
1302c71099acSThomas Graf 	table = rt->rt6i_table;
1303c71099acSThomas Graf 	write_lock_bh(&table->tb6_lock);
13041da177e4SLinus Torvalds 
130586872cb5SThomas Graf 	err = fib6_del(rt, info);
13061da177e4SLinus Torvalds 	dst_release(&rt->u.dst);
13071da177e4SLinus Torvalds 
1308c71099acSThomas Graf 	write_unlock_bh(&table->tb6_lock);
13091da177e4SLinus Torvalds 
13101da177e4SLinus Torvalds 	return err;
13111da177e4SLinus Torvalds }
13121da177e4SLinus Torvalds 
1313e0a1ad73SThomas Graf int ip6_del_rt(struct rt6_info *rt)
1314e0a1ad73SThomas Graf {
13154d1169c1SDenis V. Lunev 	struct nl_info info = {
1316c346dca1SYOSHIFUJI Hideaki 		.nl_net = dev_net(rt->rt6i_dev),
13174d1169c1SDenis V. Lunev 	};
1318528c4cebSDenis V. Lunev 	return __ip6_del_rt(rt, &info);
1319e0a1ad73SThomas Graf }
1320e0a1ad73SThomas Graf 
132186872cb5SThomas Graf static int ip6_route_del(struct fib6_config *cfg)
13221da177e4SLinus Torvalds {
1323c71099acSThomas Graf 	struct fib6_table *table;
13241da177e4SLinus Torvalds 	struct fib6_node *fn;
13251da177e4SLinus Torvalds 	struct rt6_info *rt;
13261da177e4SLinus Torvalds 	int err = -ESRCH;
13271da177e4SLinus Torvalds 
13285578689aSDaniel Lezcano 	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1329c71099acSThomas Graf 	if (table == NULL)
1330c71099acSThomas Graf 		return err;
13311da177e4SLinus Torvalds 
1332c71099acSThomas Graf 	read_lock_bh(&table->tb6_lock);
1333c71099acSThomas Graf 
1334c71099acSThomas Graf 	fn = fib6_locate(&table->tb6_root,
133586872cb5SThomas Graf 			 &cfg->fc_dst, cfg->fc_dst_len,
133686872cb5SThomas Graf 			 &cfg->fc_src, cfg->fc_src_len);
13371da177e4SLinus Torvalds 
13381da177e4SLinus Torvalds 	if (fn) {
13397cc48263SEric Dumazet 		for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
134086872cb5SThomas Graf 			if (cfg->fc_ifindex &&
13411da177e4SLinus Torvalds 			    (rt->rt6i_dev == NULL ||
134286872cb5SThomas Graf 			     rt->rt6i_dev->ifindex != cfg->fc_ifindex))
13431da177e4SLinus Torvalds 				continue;
134486872cb5SThomas Graf 			if (cfg->fc_flags & RTF_GATEWAY &&
134586872cb5SThomas Graf 			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
13461da177e4SLinus Torvalds 				continue;
134786872cb5SThomas Graf 			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
13481da177e4SLinus Torvalds 				continue;
13491da177e4SLinus Torvalds 			dst_hold(&rt->u.dst);
1350c71099acSThomas Graf 			read_unlock_bh(&table->tb6_lock);
13511da177e4SLinus Torvalds 
135286872cb5SThomas Graf 			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
13531da177e4SLinus Torvalds 		}
13541da177e4SLinus Torvalds 	}
1355c71099acSThomas Graf 	read_unlock_bh(&table->tb6_lock);
13561da177e4SLinus Torvalds 
13571da177e4SLinus Torvalds 	return err;
13581da177e4SLinus Torvalds }
13591da177e4SLinus Torvalds 
13601da177e4SLinus Torvalds /*
13611da177e4SLinus Torvalds  *	Handle redirects
13621da177e4SLinus Torvalds  */
1363a6279458SYOSHIFUJI Hideaki struct ip6rd_flowi {
1364a6279458SYOSHIFUJI Hideaki 	struct flowi fl;
1365a6279458SYOSHIFUJI Hideaki 	struct in6_addr gateway;
1366a6279458SYOSHIFUJI Hideaki };
13671da177e4SLinus Torvalds 
13688ed67789SDaniel Lezcano static struct rt6_info *__ip6_route_redirect(struct net *net,
13698ed67789SDaniel Lezcano 					     struct fib6_table *table,
1370a6279458SYOSHIFUJI Hideaki 					     struct flowi *fl,
1371a6279458SYOSHIFUJI Hideaki 					     int flags)
1372a6279458SYOSHIFUJI Hideaki {
1373a6279458SYOSHIFUJI Hideaki 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1374a6279458SYOSHIFUJI Hideaki 	struct rt6_info *rt;
1375a6279458SYOSHIFUJI Hideaki 	struct fib6_node *fn;
1376c71099acSThomas Graf 
1377e843b9e1SYOSHIFUJI Hideaki 	/*
1378e843b9e1SYOSHIFUJI Hideaki 	 * Get the "current" route for this destination and
1379e843b9e1SYOSHIFUJI Hideaki 	 * check if the redirect has come from approriate router.
1380e843b9e1SYOSHIFUJI Hideaki 	 *
1381e843b9e1SYOSHIFUJI Hideaki 	 * RFC 2461 specifies that redirects should only be
1382e843b9e1SYOSHIFUJI Hideaki 	 * accepted if they come from the nexthop to the target.
1383e843b9e1SYOSHIFUJI Hideaki 	 * Due to the way the routes are chosen, this notion
1384e843b9e1SYOSHIFUJI Hideaki 	 * is a bit fuzzy and one might need to check all possible
1385e843b9e1SYOSHIFUJI Hideaki 	 * routes.
1386e843b9e1SYOSHIFUJI Hideaki 	 */
13871da177e4SLinus Torvalds 
1388c71099acSThomas Graf 	read_lock_bh(&table->tb6_lock);
1389a6279458SYOSHIFUJI Hideaki 	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1390e843b9e1SYOSHIFUJI Hideaki restart:
13917cc48263SEric Dumazet 	for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
13921da177e4SLinus Torvalds 		/*
13931da177e4SLinus Torvalds 		 * Current route is on-link; redirect is always invalid.
13941da177e4SLinus Torvalds 		 *
13951da177e4SLinus Torvalds 		 * Seems, previous statement is not true. It could
13961da177e4SLinus Torvalds 		 * be node, which looks for us as on-link (f.e. proxy ndisc)
13971da177e4SLinus Torvalds 		 * But then router serving it might decide, that we should
13981da177e4SLinus Torvalds 		 * know truth 8)8) --ANK (980726).
13991da177e4SLinus Torvalds 		 */
1400e843b9e1SYOSHIFUJI Hideaki 		if (rt6_check_expired(rt))
1401e843b9e1SYOSHIFUJI Hideaki 			continue;
14021da177e4SLinus Torvalds 		if (!(rt->rt6i_flags & RTF_GATEWAY))
1403e843b9e1SYOSHIFUJI Hideaki 			continue;
1404a6279458SYOSHIFUJI Hideaki 		if (fl->oif != rt->rt6i_dev->ifindex)
1405e843b9e1SYOSHIFUJI Hideaki 			continue;
1406a6279458SYOSHIFUJI Hideaki 		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1407e843b9e1SYOSHIFUJI Hideaki 			continue;
1408e843b9e1SYOSHIFUJI Hideaki 		break;
1409e843b9e1SYOSHIFUJI Hideaki 	}
1410a6279458SYOSHIFUJI Hideaki 
1411cb15d9c2SYOSHIFUJI Hideaki 	if (!rt)
14128ed67789SDaniel Lezcano 		rt = net->ipv6.ip6_null_entry;
14138ed67789SDaniel Lezcano 	BACKTRACK(net, &fl->fl6_src);
1414cb15d9c2SYOSHIFUJI Hideaki out:
1415a6279458SYOSHIFUJI Hideaki 	dst_hold(&rt->u.dst);
1416a6279458SYOSHIFUJI Hideaki 
1417c71099acSThomas Graf 	read_unlock_bh(&table->tb6_lock);
14181da177e4SLinus Torvalds 
1419a6279458SYOSHIFUJI Hideaki 	return rt;
1420a6279458SYOSHIFUJI Hideaki };
1421a6279458SYOSHIFUJI Hideaki 
1422a6279458SYOSHIFUJI Hideaki static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1423a6279458SYOSHIFUJI Hideaki 					   struct in6_addr *src,
1424a6279458SYOSHIFUJI Hideaki 					   struct in6_addr *gateway,
1425a6279458SYOSHIFUJI Hideaki 					   struct net_device *dev)
1426a6279458SYOSHIFUJI Hideaki {
1427adaa70bbSThomas Graf 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1428c346dca1SYOSHIFUJI Hideaki 	struct net *net = dev_net(dev);
1429a6279458SYOSHIFUJI Hideaki 	struct ip6rd_flowi rdfl = {
1430a6279458SYOSHIFUJI Hideaki 		.fl = {
1431a6279458SYOSHIFUJI Hideaki 			.oif = dev->ifindex,
1432a6279458SYOSHIFUJI Hideaki 			.nl_u = {
1433a6279458SYOSHIFUJI Hideaki 				.ip6_u = {
1434a6279458SYOSHIFUJI Hideaki 					.daddr = *dest,
1435a6279458SYOSHIFUJI Hideaki 					.saddr = *src,
1436a6279458SYOSHIFUJI Hideaki 				},
1437a6279458SYOSHIFUJI Hideaki 			},
1438a6279458SYOSHIFUJI Hideaki 		},
1439a6279458SYOSHIFUJI Hideaki 		.gateway = *gateway,
1440a6279458SYOSHIFUJI Hideaki 	};
1441adaa70bbSThomas Graf 
1442adaa70bbSThomas Graf 	if (rt6_need_strict(dest))
1443adaa70bbSThomas Graf 		flags |= RT6_LOOKUP_F_IFACE;
1444a6279458SYOSHIFUJI Hideaki 
14455578689aSDaniel Lezcano 	return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl,
144658f09b78SDaniel Lezcano 						   flags, __ip6_route_redirect);
1447a6279458SYOSHIFUJI Hideaki }
1448a6279458SYOSHIFUJI Hideaki 
1449a6279458SYOSHIFUJI Hideaki void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1450a6279458SYOSHIFUJI Hideaki 		  struct in6_addr *saddr,
1451a6279458SYOSHIFUJI Hideaki 		  struct neighbour *neigh, u8 *lladdr, int on_link)
1452a6279458SYOSHIFUJI Hideaki {
1453a6279458SYOSHIFUJI Hideaki 	struct rt6_info *rt, *nrt = NULL;
1454a6279458SYOSHIFUJI Hideaki 	struct netevent_redirect netevent;
1455c346dca1SYOSHIFUJI Hideaki 	struct net *net = dev_net(neigh->dev);
1456a6279458SYOSHIFUJI Hideaki 
1457a6279458SYOSHIFUJI Hideaki 	rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1458a6279458SYOSHIFUJI Hideaki 
14598ed67789SDaniel Lezcano 	if (rt == net->ipv6.ip6_null_entry) {
14601da177e4SLinus Torvalds 		if (net_ratelimit())
14611da177e4SLinus Torvalds 			printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
14621da177e4SLinus Torvalds 			       "for redirect target\n");
1463a6279458SYOSHIFUJI Hideaki 		goto out;
14641da177e4SLinus Torvalds 	}
14651da177e4SLinus Torvalds 
14661da177e4SLinus Torvalds 	/*
14671da177e4SLinus Torvalds 	 *	We have finally decided to accept it.
14681da177e4SLinus Torvalds 	 */
14691da177e4SLinus Torvalds 
14701da177e4SLinus Torvalds 	neigh_update(neigh, lladdr, NUD_STALE,
14711da177e4SLinus Torvalds 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
14721da177e4SLinus Torvalds 		     NEIGH_UPDATE_F_OVERRIDE|
14731da177e4SLinus Torvalds 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
14741da177e4SLinus Torvalds 				     NEIGH_UPDATE_F_ISROUTER))
14751da177e4SLinus Torvalds 		     );
14761da177e4SLinus Torvalds 
14771da177e4SLinus Torvalds 	/*
14781da177e4SLinus Torvalds 	 * Redirect received -> path was valid.
14791da177e4SLinus Torvalds 	 * Look, redirects are sent only in response to data packets,
14801da177e4SLinus Torvalds 	 * so that this nexthop apparently is reachable. --ANK
14811da177e4SLinus Torvalds 	 */
14821da177e4SLinus Torvalds 	dst_confirm(&rt->u.dst);
14831da177e4SLinus Torvalds 
14841da177e4SLinus Torvalds 	/* Duplicate redirect: silently ignore. */
14851da177e4SLinus Torvalds 	if (neigh == rt->u.dst.neighbour)
14861da177e4SLinus Torvalds 		goto out;
14871da177e4SLinus Torvalds 
14881da177e4SLinus Torvalds 	nrt = ip6_rt_copy(rt);
14891da177e4SLinus Torvalds 	if (nrt == NULL)
14901da177e4SLinus Torvalds 		goto out;
14911da177e4SLinus Torvalds 
14921da177e4SLinus Torvalds 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
14931da177e4SLinus Torvalds 	if (on_link)
14941da177e4SLinus Torvalds 		nrt->rt6i_flags &= ~RTF_GATEWAY;
14951da177e4SLinus Torvalds 
14961da177e4SLinus Torvalds 	ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
14971da177e4SLinus Torvalds 	nrt->rt6i_dst.plen = 128;
14981da177e4SLinus Torvalds 	nrt->u.dst.flags |= DST_HOST;
14991da177e4SLinus Torvalds 
15001da177e4SLinus Torvalds 	ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
15011da177e4SLinus Torvalds 	nrt->rt6i_nexthop = neigh_clone(neigh);
15021da177e4SLinus Torvalds 	/* Reset pmtu, it may be better */
15031da177e4SLinus Torvalds 	nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1504c346dca1SYOSHIFUJI Hideaki 	nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dev_net(neigh->dev),
15055578689aSDaniel Lezcano 							dst_mtu(&nrt->u.dst));
15061da177e4SLinus Torvalds 
150740e22e8fSThomas Graf 	if (ip6_ins_rt(nrt))
15081da177e4SLinus Torvalds 		goto out;
15091da177e4SLinus Torvalds 
15108d71740cSTom Tucker 	netevent.old = &rt->u.dst;
15118d71740cSTom Tucker 	netevent.new = &nrt->u.dst;
15128d71740cSTom Tucker 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
15138d71740cSTom Tucker 
15141da177e4SLinus Torvalds 	if (rt->rt6i_flags&RTF_CACHE) {
1515e0a1ad73SThomas Graf 		ip6_del_rt(rt);
15161da177e4SLinus Torvalds 		return;
15171da177e4SLinus Torvalds 	}
15181da177e4SLinus Torvalds 
15191da177e4SLinus Torvalds out:
15201da177e4SLinus Torvalds 	dst_release(&rt->u.dst);
15211da177e4SLinus Torvalds 	return;
15221da177e4SLinus Torvalds }
15231da177e4SLinus Torvalds 
15241da177e4SLinus Torvalds /*
15251da177e4SLinus Torvalds  *	Handle ICMP "packet too big" messages
15261da177e4SLinus Torvalds  *	i.e. Path MTU discovery
15271da177e4SLinus Torvalds  */
15281da177e4SLinus Torvalds 
15291da177e4SLinus Torvalds void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
15301da177e4SLinus Torvalds 			struct net_device *dev, u32 pmtu)
15311da177e4SLinus Torvalds {
15321da177e4SLinus Torvalds 	struct rt6_info *rt, *nrt;
1533c346dca1SYOSHIFUJI Hideaki 	struct net *net = dev_net(dev);
15341da177e4SLinus Torvalds 	int allfrag = 0;
15351da177e4SLinus Torvalds 
15365578689aSDaniel Lezcano 	rt = rt6_lookup(net, daddr, saddr, dev->ifindex, 0);
15371da177e4SLinus Torvalds 	if (rt == NULL)
15381da177e4SLinus Torvalds 		return;
15391da177e4SLinus Torvalds 
15401da177e4SLinus Torvalds 	if (pmtu >= dst_mtu(&rt->u.dst))
15411da177e4SLinus Torvalds 		goto out;
15421da177e4SLinus Torvalds 
15431da177e4SLinus Torvalds 	if (pmtu < IPV6_MIN_MTU) {
15441da177e4SLinus Torvalds 		/*
15451da177e4SLinus Torvalds 		 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
15461da177e4SLinus Torvalds 		 * MTU (1280) and a fragment header should always be included
15471da177e4SLinus Torvalds 		 * after a node receiving Too Big message reporting PMTU is
15481da177e4SLinus Torvalds 		 * less than the IPv6 Minimum Link MTU.
15491da177e4SLinus Torvalds 		 */
15501da177e4SLinus Torvalds 		pmtu = IPV6_MIN_MTU;
15511da177e4SLinus Torvalds 		allfrag = 1;
15521da177e4SLinus Torvalds 	}
15531da177e4SLinus Torvalds 
15541da177e4SLinus Torvalds 	/* New mtu received -> path was valid.
15551da177e4SLinus Torvalds 	   They are sent only in response to data packets,
15561da177e4SLinus Torvalds 	   so that this nexthop apparently is reachable. --ANK
15571da177e4SLinus Torvalds 	 */
15581da177e4SLinus Torvalds 	dst_confirm(&rt->u.dst);
15591da177e4SLinus Torvalds 
15601da177e4SLinus Torvalds 	/* Host route. If it is static, it would be better
15611da177e4SLinus Torvalds 	   not to override it, but add new one, so that
15621da177e4SLinus Torvalds 	   when cache entry will expire old pmtu
15631da177e4SLinus Torvalds 	   would return automatically.
15641da177e4SLinus Torvalds 	 */
15651da177e4SLinus Torvalds 	if (rt->rt6i_flags & RTF_CACHE) {
15661da177e4SLinus Torvalds 		rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
15671da177e4SLinus Torvalds 		if (allfrag)
15681da177e4SLinus Torvalds 			rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
15695578689aSDaniel Lezcano 		dst_set_expires(&rt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
15701da177e4SLinus Torvalds 		rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
15711da177e4SLinus Torvalds 		goto out;
15721da177e4SLinus Torvalds 	}
15731da177e4SLinus Torvalds 
15741da177e4SLinus Torvalds 	/* Network route.
15751da177e4SLinus Torvalds 	   Two cases are possible:
15761da177e4SLinus Torvalds 	   1. It is connected route. Action: COW
15771da177e4SLinus Torvalds 	   2. It is gatewayed route or NONEXTHOP route. Action: clone it.
15781da177e4SLinus Torvalds 	 */
1579d5315b50SYOSHIFUJI Hideaki 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1580a1e78363SYOSHIFUJI Hideaki 		nrt = rt6_alloc_cow(rt, daddr, saddr);
1581d5315b50SYOSHIFUJI Hideaki 	else
1582d5315b50SYOSHIFUJI Hideaki 		nrt = rt6_alloc_clone(rt, daddr);
1583a1e78363SYOSHIFUJI Hideaki 
1584d5315b50SYOSHIFUJI Hideaki 	if (nrt) {
15851da177e4SLinus Torvalds 		nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
15861da177e4SLinus Torvalds 		if (allfrag)
15871da177e4SLinus Torvalds 			nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1588a1e78363SYOSHIFUJI Hideaki 
15891da177e4SLinus Torvalds 		/* According to RFC 1981, detecting PMTU increase shouldn't be
1590a1e78363SYOSHIFUJI Hideaki 		 * happened within 5 mins, the recommended timer is 10 mins.
1591a1e78363SYOSHIFUJI Hideaki 		 * Here this route expiration time is set to ip6_rt_mtu_expires
1592a1e78363SYOSHIFUJI Hideaki 		 * which is 10 mins. After 10 mins the decreased pmtu is expired
1593a1e78363SYOSHIFUJI Hideaki 		 * and detecting PMTU increase will be automatically happened.
15941da177e4SLinus Torvalds 		 */
15955578689aSDaniel Lezcano 		dst_set_expires(&nrt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
15961da177e4SLinus Torvalds 		nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1597a1e78363SYOSHIFUJI Hideaki 
159840e22e8fSThomas Graf 		ip6_ins_rt(nrt);
15991da177e4SLinus Torvalds 	}
16001da177e4SLinus Torvalds out:
16011da177e4SLinus Torvalds 	dst_release(&rt->u.dst);
16021da177e4SLinus Torvalds }
16031da177e4SLinus Torvalds 
16041da177e4SLinus Torvalds /*
16051da177e4SLinus Torvalds  *	Misc support functions
16061da177e4SLinus Torvalds  */
16071da177e4SLinus Torvalds 
16081da177e4SLinus Torvalds static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
16091da177e4SLinus Torvalds {
1610c346dca1SYOSHIFUJI Hideaki 	struct net *net = dev_net(ort->rt6i_dev);
1611f2fc6a54SBenjamin Thery 	struct rt6_info *rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops);
16121da177e4SLinus Torvalds 
16131da177e4SLinus Torvalds 	if (rt) {
16141da177e4SLinus Torvalds 		rt->u.dst.input = ort->u.dst.input;
16151da177e4SLinus Torvalds 		rt->u.dst.output = ort->u.dst.output;
16161da177e4SLinus Torvalds 
16171da177e4SLinus Torvalds 		memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
161822e1e4d8SVille Nuorvala 		rt->u.dst.error = ort->u.dst.error;
16191da177e4SLinus Torvalds 		rt->u.dst.dev = ort->u.dst.dev;
16201da177e4SLinus Torvalds 		if (rt->u.dst.dev)
16211da177e4SLinus Torvalds 			dev_hold(rt->u.dst.dev);
16221da177e4SLinus Torvalds 		rt->rt6i_idev = ort->rt6i_idev;
16231da177e4SLinus Torvalds 		if (rt->rt6i_idev)
16241da177e4SLinus Torvalds 			in6_dev_hold(rt->rt6i_idev);
16251da177e4SLinus Torvalds 		rt->u.dst.lastuse = jiffies;
16261da177e4SLinus Torvalds 		rt->rt6i_expires = 0;
16271da177e4SLinus Torvalds 
16281da177e4SLinus Torvalds 		ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
16291da177e4SLinus Torvalds 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
16301da177e4SLinus Torvalds 		rt->rt6i_metric = 0;
16311da177e4SLinus Torvalds 
16321da177e4SLinus Torvalds 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
16331da177e4SLinus Torvalds #ifdef CONFIG_IPV6_SUBTREES
16341da177e4SLinus Torvalds 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
16351da177e4SLinus Torvalds #endif
1636c71099acSThomas Graf 		rt->rt6i_table = ort->rt6i_table;
16371da177e4SLinus Torvalds 	}
16381da177e4SLinus Torvalds 	return rt;
16391da177e4SLinus Torvalds }
16401da177e4SLinus Torvalds 
164170ceb4f5SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTE_INFO
1642efa2cea0SDaniel Lezcano static struct rt6_info *rt6_get_route_info(struct net *net,
1643efa2cea0SDaniel Lezcano 					   struct in6_addr *prefix, int prefixlen,
164470ceb4f5SYOSHIFUJI Hideaki 					   struct in6_addr *gwaddr, int ifindex)
164570ceb4f5SYOSHIFUJI Hideaki {
164670ceb4f5SYOSHIFUJI Hideaki 	struct fib6_node *fn;
164770ceb4f5SYOSHIFUJI Hideaki 	struct rt6_info *rt = NULL;
1648c71099acSThomas Graf 	struct fib6_table *table;
164970ceb4f5SYOSHIFUJI Hideaki 
1650efa2cea0SDaniel Lezcano 	table = fib6_get_table(net, RT6_TABLE_INFO);
1651c71099acSThomas Graf 	if (table == NULL)
1652c71099acSThomas Graf 		return NULL;
1653c71099acSThomas Graf 
1654c71099acSThomas Graf 	write_lock_bh(&table->tb6_lock);
1655c71099acSThomas Graf 	fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
165670ceb4f5SYOSHIFUJI Hideaki 	if (!fn)
165770ceb4f5SYOSHIFUJI Hideaki 		goto out;
165870ceb4f5SYOSHIFUJI Hideaki 
16597cc48263SEric Dumazet 	for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
166070ceb4f5SYOSHIFUJI Hideaki 		if (rt->rt6i_dev->ifindex != ifindex)
166170ceb4f5SYOSHIFUJI Hideaki 			continue;
166270ceb4f5SYOSHIFUJI Hideaki 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
166370ceb4f5SYOSHIFUJI Hideaki 			continue;
166470ceb4f5SYOSHIFUJI Hideaki 		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
166570ceb4f5SYOSHIFUJI Hideaki 			continue;
166670ceb4f5SYOSHIFUJI Hideaki 		dst_hold(&rt->u.dst);
166770ceb4f5SYOSHIFUJI Hideaki 		break;
166870ceb4f5SYOSHIFUJI Hideaki 	}
166970ceb4f5SYOSHIFUJI Hideaki out:
1670c71099acSThomas Graf 	write_unlock_bh(&table->tb6_lock);
167170ceb4f5SYOSHIFUJI Hideaki 	return rt;
167270ceb4f5SYOSHIFUJI Hideaki }
167370ceb4f5SYOSHIFUJI Hideaki 
1674efa2cea0SDaniel Lezcano static struct rt6_info *rt6_add_route_info(struct net *net,
1675efa2cea0SDaniel Lezcano 					   struct in6_addr *prefix, int prefixlen,
167670ceb4f5SYOSHIFUJI Hideaki 					   struct in6_addr *gwaddr, int ifindex,
167770ceb4f5SYOSHIFUJI Hideaki 					   unsigned pref)
167870ceb4f5SYOSHIFUJI Hideaki {
167986872cb5SThomas Graf 	struct fib6_config cfg = {
168086872cb5SThomas Graf 		.fc_table	= RT6_TABLE_INFO,
1681238fc7eaSRami Rosen 		.fc_metric	= IP6_RT_PRIO_USER,
168286872cb5SThomas Graf 		.fc_ifindex	= ifindex,
168386872cb5SThomas Graf 		.fc_dst_len	= prefixlen,
168486872cb5SThomas Graf 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
168586872cb5SThomas Graf 				  RTF_UP | RTF_PREF(pref),
1686efa2cea0SDaniel Lezcano 		.fc_nlinfo.pid = 0,
1687efa2cea0SDaniel Lezcano 		.fc_nlinfo.nlh = NULL,
1688efa2cea0SDaniel Lezcano 		.fc_nlinfo.nl_net = net,
168986872cb5SThomas Graf 	};
169070ceb4f5SYOSHIFUJI Hideaki 
169186872cb5SThomas Graf 	ipv6_addr_copy(&cfg.fc_dst, prefix);
169286872cb5SThomas Graf 	ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
169386872cb5SThomas Graf 
1694e317da96SYOSHIFUJI Hideaki 	/* We should treat it as a default route if prefix length is 0. */
1695e317da96SYOSHIFUJI Hideaki 	if (!prefixlen)
169686872cb5SThomas Graf 		cfg.fc_flags |= RTF_DEFAULT;
169770ceb4f5SYOSHIFUJI Hideaki 
169886872cb5SThomas Graf 	ip6_route_add(&cfg);
169970ceb4f5SYOSHIFUJI Hideaki 
1700efa2cea0SDaniel Lezcano 	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
170170ceb4f5SYOSHIFUJI Hideaki }
170270ceb4f5SYOSHIFUJI Hideaki #endif
170370ceb4f5SYOSHIFUJI Hideaki 
17041da177e4SLinus Torvalds struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
17051da177e4SLinus Torvalds {
17061da177e4SLinus Torvalds 	struct rt6_info *rt;
1707c71099acSThomas Graf 	struct fib6_table *table;
17081da177e4SLinus Torvalds 
1709c346dca1SYOSHIFUJI Hideaki 	table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1710c71099acSThomas Graf 	if (table == NULL)
1711c71099acSThomas Graf 		return NULL;
17121da177e4SLinus Torvalds 
1713c71099acSThomas Graf 	write_lock_bh(&table->tb6_lock);
17147cc48263SEric Dumazet 	for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
17151da177e4SLinus Torvalds 		if (dev == rt->rt6i_dev &&
1716045927ffSYOSHIFUJI Hideaki 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
17171da177e4SLinus Torvalds 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
17181da177e4SLinus Torvalds 			break;
17191da177e4SLinus Torvalds 	}
17201da177e4SLinus Torvalds 	if (rt)
17211da177e4SLinus Torvalds 		dst_hold(&rt->u.dst);
1722c71099acSThomas Graf 	write_unlock_bh(&table->tb6_lock);
17231da177e4SLinus Torvalds 	return rt;
17241da177e4SLinus Torvalds }
17251da177e4SLinus Torvalds 
17261da177e4SLinus Torvalds struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1727ebacaaa0SYOSHIFUJI Hideaki 				     struct net_device *dev,
1728ebacaaa0SYOSHIFUJI Hideaki 				     unsigned int pref)
17291da177e4SLinus Torvalds {
173086872cb5SThomas Graf 	struct fib6_config cfg = {
173186872cb5SThomas Graf 		.fc_table	= RT6_TABLE_DFLT,
1732238fc7eaSRami Rosen 		.fc_metric	= IP6_RT_PRIO_USER,
173386872cb5SThomas Graf 		.fc_ifindex	= dev->ifindex,
173486872cb5SThomas Graf 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
173586872cb5SThomas Graf 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
17365578689aSDaniel Lezcano 		.fc_nlinfo.pid = 0,
17375578689aSDaniel Lezcano 		.fc_nlinfo.nlh = NULL,
1738c346dca1SYOSHIFUJI Hideaki 		.fc_nlinfo.nl_net = dev_net(dev),
173986872cb5SThomas Graf 	};
17401da177e4SLinus Torvalds 
174186872cb5SThomas Graf 	ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
17421da177e4SLinus Torvalds 
174386872cb5SThomas Graf 	ip6_route_add(&cfg);
17441da177e4SLinus Torvalds 
17451da177e4SLinus Torvalds 	return rt6_get_dflt_router(gwaddr, dev);
17461da177e4SLinus Torvalds }
17471da177e4SLinus Torvalds 
17487b4da532SDaniel Lezcano void rt6_purge_dflt_routers(struct net *net)
17491da177e4SLinus Torvalds {
17501da177e4SLinus Torvalds 	struct rt6_info *rt;
1751c71099acSThomas Graf 	struct fib6_table *table;
1752c71099acSThomas Graf 
1753c71099acSThomas Graf 	/* NOTE: Keep consistent with rt6_get_dflt_router */
17547b4da532SDaniel Lezcano 	table = fib6_get_table(net, RT6_TABLE_DFLT);
1755c71099acSThomas Graf 	if (table == NULL)
1756c71099acSThomas Graf 		return;
17571da177e4SLinus Torvalds 
17581da177e4SLinus Torvalds restart:
1759c71099acSThomas Graf 	read_lock_bh(&table->tb6_lock);
17607cc48263SEric Dumazet 	for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
17611da177e4SLinus Torvalds 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
17621da177e4SLinus Torvalds 			dst_hold(&rt->u.dst);
1763c71099acSThomas Graf 			read_unlock_bh(&table->tb6_lock);
1764e0a1ad73SThomas Graf 			ip6_del_rt(rt);
17651da177e4SLinus Torvalds 			goto restart;
17661da177e4SLinus Torvalds 		}
17671da177e4SLinus Torvalds 	}
1768c71099acSThomas Graf 	read_unlock_bh(&table->tb6_lock);
17691da177e4SLinus Torvalds }
17701da177e4SLinus Torvalds 
17715578689aSDaniel Lezcano static void rtmsg_to_fib6_config(struct net *net,
17725578689aSDaniel Lezcano 				 struct in6_rtmsg *rtmsg,
177386872cb5SThomas Graf 				 struct fib6_config *cfg)
177486872cb5SThomas Graf {
177586872cb5SThomas Graf 	memset(cfg, 0, sizeof(*cfg));
177686872cb5SThomas Graf 
177786872cb5SThomas Graf 	cfg->fc_table = RT6_TABLE_MAIN;
177886872cb5SThomas Graf 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
177986872cb5SThomas Graf 	cfg->fc_metric = rtmsg->rtmsg_metric;
178086872cb5SThomas Graf 	cfg->fc_expires = rtmsg->rtmsg_info;
178186872cb5SThomas Graf 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
178286872cb5SThomas Graf 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
178386872cb5SThomas Graf 	cfg->fc_flags = rtmsg->rtmsg_flags;
178486872cb5SThomas Graf 
17855578689aSDaniel Lezcano 	cfg->fc_nlinfo.nl_net = net;
1786f1243c2dSBenjamin Thery 
178786872cb5SThomas Graf 	ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
178886872cb5SThomas Graf 	ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
178986872cb5SThomas Graf 	ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
179086872cb5SThomas Graf }
179186872cb5SThomas Graf 
17925578689aSDaniel Lezcano int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
17931da177e4SLinus Torvalds {
179486872cb5SThomas Graf 	struct fib6_config cfg;
17951da177e4SLinus Torvalds 	struct in6_rtmsg rtmsg;
17961da177e4SLinus Torvalds 	int err;
17971da177e4SLinus Torvalds 
17981da177e4SLinus Torvalds 	switch(cmd) {
17991da177e4SLinus Torvalds 	case SIOCADDRT:		/* Add a route */
18001da177e4SLinus Torvalds 	case SIOCDELRT:		/* Delete a route */
18011da177e4SLinus Torvalds 		if (!capable(CAP_NET_ADMIN))
18021da177e4SLinus Torvalds 			return -EPERM;
18031da177e4SLinus Torvalds 		err = copy_from_user(&rtmsg, arg,
18041da177e4SLinus Torvalds 				     sizeof(struct in6_rtmsg));
18051da177e4SLinus Torvalds 		if (err)
18061da177e4SLinus Torvalds 			return -EFAULT;
18071da177e4SLinus Torvalds 
18085578689aSDaniel Lezcano 		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
180986872cb5SThomas Graf 
18101da177e4SLinus Torvalds 		rtnl_lock();
18111da177e4SLinus Torvalds 		switch (cmd) {
18121da177e4SLinus Torvalds 		case SIOCADDRT:
181386872cb5SThomas Graf 			err = ip6_route_add(&cfg);
18141da177e4SLinus Torvalds 			break;
18151da177e4SLinus Torvalds 		case SIOCDELRT:
181686872cb5SThomas Graf 			err = ip6_route_del(&cfg);
18171da177e4SLinus Torvalds 			break;
18181da177e4SLinus Torvalds 		default:
18191da177e4SLinus Torvalds 			err = -EINVAL;
18201da177e4SLinus Torvalds 		}
18211da177e4SLinus Torvalds 		rtnl_unlock();
18221da177e4SLinus Torvalds 
18231da177e4SLinus Torvalds 		return err;
18243ff50b79SStephen Hemminger 	}
18251da177e4SLinus Torvalds 
18261da177e4SLinus Torvalds 	return -EINVAL;
18271da177e4SLinus Torvalds }
18281da177e4SLinus Torvalds 
18291da177e4SLinus Torvalds /*
18301da177e4SLinus Torvalds  *	Drop the packet on the floor
18311da177e4SLinus Torvalds  */
18321da177e4SLinus Torvalds 
183350eb431dSIlpo Järvinen static int ip6_pkt_drop(struct sk_buff *skb, int code, int ipstats_mib_noroutes)
18341da177e4SLinus Torvalds {
1835612f09e8SYOSHIFUJI Hideaki 	int type;
1836612f09e8SYOSHIFUJI Hideaki 	switch (ipstats_mib_noroutes) {
1837612f09e8SYOSHIFUJI Hideaki 	case IPSTATS_MIB_INNOROUTES:
18380660e03fSArnaldo Carvalho de Melo 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
1839612f09e8SYOSHIFUJI Hideaki 		if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) {
1840a11d206dSYOSHIFUJI Hideaki 			IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
1841612f09e8SYOSHIFUJI Hideaki 			break;
1842612f09e8SYOSHIFUJI Hideaki 		}
1843612f09e8SYOSHIFUJI Hideaki 		/* FALLTHROUGH */
1844612f09e8SYOSHIFUJI Hideaki 	case IPSTATS_MIB_OUTNOROUTES:
1845612f09e8SYOSHIFUJI Hideaki 		IP6_INC_STATS(ip6_dst_idev(skb->dst), ipstats_mib_noroutes);
1846612f09e8SYOSHIFUJI Hideaki 		break;
1847612f09e8SYOSHIFUJI Hideaki 	}
18489ce8ade0SThomas Graf 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
18491da177e4SLinus Torvalds 	kfree_skb(skb);
18501da177e4SLinus Torvalds 	return 0;
18511da177e4SLinus Torvalds }
18521da177e4SLinus Torvalds 
18539ce8ade0SThomas Graf static int ip6_pkt_discard(struct sk_buff *skb)
18549ce8ade0SThomas Graf {
1855612f09e8SYOSHIFUJI Hideaki 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
18569ce8ade0SThomas Graf }
18579ce8ade0SThomas Graf 
185820380731SArnaldo Carvalho de Melo static int ip6_pkt_discard_out(struct sk_buff *skb)
18591da177e4SLinus Torvalds {
18601da177e4SLinus Torvalds 	skb->dev = skb->dst->dev;
1861612f09e8SYOSHIFUJI Hideaki 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
18621da177e4SLinus Torvalds }
18631da177e4SLinus Torvalds 
18646723ab54SDavid S. Miller #ifdef CONFIG_IPV6_MULTIPLE_TABLES
18656723ab54SDavid S. Miller 
18669ce8ade0SThomas Graf static int ip6_pkt_prohibit(struct sk_buff *skb)
18679ce8ade0SThomas Graf {
1868612f09e8SYOSHIFUJI Hideaki 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
18699ce8ade0SThomas Graf }
18709ce8ade0SThomas Graf 
18719ce8ade0SThomas Graf static int ip6_pkt_prohibit_out(struct sk_buff *skb)
18729ce8ade0SThomas Graf {
18739ce8ade0SThomas Graf 	skb->dev = skb->dst->dev;
1874612f09e8SYOSHIFUJI Hideaki 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
18759ce8ade0SThomas Graf }
18769ce8ade0SThomas Graf 
18776723ab54SDavid S. Miller #endif
18786723ab54SDavid S. Miller 
18791da177e4SLinus Torvalds /*
18801da177e4SLinus Torvalds  *	Allocate a dst for local (unicast / anycast) address.
18811da177e4SLinus Torvalds  */
18821da177e4SLinus Torvalds 
18831da177e4SLinus Torvalds struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
18841da177e4SLinus Torvalds 				    const struct in6_addr *addr,
18851da177e4SLinus Torvalds 				    int anycast)
18861da177e4SLinus Torvalds {
1887c346dca1SYOSHIFUJI Hideaki 	struct net *net = dev_net(idev->dev);
1888f2fc6a54SBenjamin Thery 	struct rt6_info *rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops);
18891da177e4SLinus Torvalds 
18901da177e4SLinus Torvalds 	if (rt == NULL)
18911da177e4SLinus Torvalds 		return ERR_PTR(-ENOMEM);
18921da177e4SLinus Torvalds 
18935578689aSDaniel Lezcano 	dev_hold(net->loopback_dev);
18941da177e4SLinus Torvalds 	in6_dev_hold(idev);
18951da177e4SLinus Torvalds 
18961da177e4SLinus Torvalds 	rt->u.dst.flags = DST_HOST;
18971da177e4SLinus Torvalds 	rt->u.dst.input = ip6_input;
18981da177e4SLinus Torvalds 	rt->u.dst.output = ip6_output;
18995578689aSDaniel Lezcano 	rt->rt6i_dev = net->loopback_dev;
19001da177e4SLinus Torvalds 	rt->rt6i_idev = idev;
19011da177e4SLinus Torvalds 	rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
19025578689aSDaniel Lezcano 	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
19031da177e4SLinus Torvalds 	rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
19041da177e4SLinus Torvalds 	rt->u.dst.obsolete = -1;
19051da177e4SLinus Torvalds 
19061da177e4SLinus Torvalds 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
190758c4fb86SYOSHIFUJI Hideaki 	if (anycast)
190858c4fb86SYOSHIFUJI Hideaki 		rt->rt6i_flags |= RTF_ANYCAST;
190958c4fb86SYOSHIFUJI Hideaki 	else
19101da177e4SLinus Torvalds 		rt->rt6i_flags |= RTF_LOCAL;
19111da177e4SLinus Torvalds 	rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
19121da177e4SLinus Torvalds 	if (rt->rt6i_nexthop == NULL) {
191340aa7b90SYOSHIFUJI Hideaki 		dst_free(&rt->u.dst);
19141da177e4SLinus Torvalds 		return ERR_PTR(-ENOMEM);
19151da177e4SLinus Torvalds 	}
19161da177e4SLinus Torvalds 
19171da177e4SLinus Torvalds 	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
19181da177e4SLinus Torvalds 	rt->rt6i_dst.plen = 128;
19195578689aSDaniel Lezcano 	rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
19201da177e4SLinus Torvalds 
19211da177e4SLinus Torvalds 	atomic_set(&rt->u.dst.__refcnt, 1);
19221da177e4SLinus Torvalds 
19231da177e4SLinus Torvalds 	return rt;
19241da177e4SLinus Torvalds }
19251da177e4SLinus Torvalds 
19268ed67789SDaniel Lezcano struct arg_dev_net {
19278ed67789SDaniel Lezcano 	struct net_device *dev;
19288ed67789SDaniel Lezcano 	struct net *net;
19298ed67789SDaniel Lezcano };
19308ed67789SDaniel Lezcano 
19311da177e4SLinus Torvalds static int fib6_ifdown(struct rt6_info *rt, void *arg)
19321da177e4SLinus Torvalds {
19338ed67789SDaniel Lezcano 	struct net_device *dev = ((struct arg_dev_net *)arg)->dev;
19348ed67789SDaniel Lezcano 	struct net *net = ((struct arg_dev_net *)arg)->net;
19358ed67789SDaniel Lezcano 
19368ed67789SDaniel Lezcano 	if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
19378ed67789SDaniel Lezcano 	    rt != net->ipv6.ip6_null_entry) {
19381da177e4SLinus Torvalds 		RT6_TRACE("deleted by ifdown %p\n", rt);
19391da177e4SLinus Torvalds 		return -1;
19401da177e4SLinus Torvalds 	}
19411da177e4SLinus Torvalds 	return 0;
19421da177e4SLinus Torvalds }
19431da177e4SLinus Torvalds 
1944f3db4851SDaniel Lezcano void rt6_ifdown(struct net *net, struct net_device *dev)
19451da177e4SLinus Torvalds {
19468ed67789SDaniel Lezcano 	struct arg_dev_net adn = {
19478ed67789SDaniel Lezcano 		.dev = dev,
19488ed67789SDaniel Lezcano 		.net = net,
19498ed67789SDaniel Lezcano 	};
19508ed67789SDaniel Lezcano 
19518ed67789SDaniel Lezcano 	fib6_clean_all(net, fib6_ifdown, 0, &adn);
19521e493d19SDavid S. Miller 	icmp6_clean_all(fib6_ifdown, &adn);
19531da177e4SLinus Torvalds }
19541da177e4SLinus Torvalds 
19551da177e4SLinus Torvalds struct rt6_mtu_change_arg
19561da177e4SLinus Torvalds {
19571da177e4SLinus Torvalds 	struct net_device *dev;
19581da177e4SLinus Torvalds 	unsigned mtu;
19591da177e4SLinus Torvalds };
19601da177e4SLinus Torvalds 
19611da177e4SLinus Torvalds static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
19621da177e4SLinus Torvalds {
19631da177e4SLinus Torvalds 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
19641da177e4SLinus Torvalds 	struct inet6_dev *idev;
1965c346dca1SYOSHIFUJI Hideaki 	struct net *net = dev_net(arg->dev);
19661da177e4SLinus Torvalds 
19671da177e4SLinus Torvalds 	/* In IPv6 pmtu discovery is not optional,
19681da177e4SLinus Torvalds 	   so that RTAX_MTU lock cannot disable it.
19691da177e4SLinus Torvalds 	   We still use this lock to block changes
19701da177e4SLinus Torvalds 	   caused by addrconf/ndisc.
19711da177e4SLinus Torvalds 	*/
19721da177e4SLinus Torvalds 
19731da177e4SLinus Torvalds 	idev = __in6_dev_get(arg->dev);
19741da177e4SLinus Torvalds 	if (idev == NULL)
19751da177e4SLinus Torvalds 		return 0;
19761da177e4SLinus Torvalds 
19771da177e4SLinus Torvalds 	/* For administrative MTU increase, there is no way to discover
19781da177e4SLinus Torvalds 	   IPv6 PMTU increase, so PMTU increase should be updated here.
19791da177e4SLinus Torvalds 	   Since RFC 1981 doesn't include administrative MTU increase
19801da177e4SLinus Torvalds 	   update PMTU increase is a MUST. (i.e. jumbo frame)
19811da177e4SLinus Torvalds 	 */
19821da177e4SLinus Torvalds 	/*
19831da177e4SLinus Torvalds 	   If new MTU is less than route PMTU, this new MTU will be the
19841da177e4SLinus Torvalds 	   lowest MTU in the path, update the route PMTU to reflect PMTU
19851da177e4SLinus Torvalds 	   decreases; if new MTU is greater than route PMTU, and the
19861da177e4SLinus Torvalds 	   old MTU is the lowest MTU in the path, update the route PMTU
19871da177e4SLinus Torvalds 	   to reflect the increase. In this case if the other nodes' MTU
19881da177e4SLinus Torvalds 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
19891da177e4SLinus Torvalds 	   PMTU discouvery.
19901da177e4SLinus Torvalds 	 */
19911da177e4SLinus Torvalds 	if (rt->rt6i_dev == arg->dev &&
19921da177e4SLinus Torvalds 	    !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
199323717795SJim Paris 	    (dst_mtu(&rt->u.dst) >= arg->mtu ||
19941da177e4SLinus Torvalds 	     (dst_mtu(&rt->u.dst) < arg->mtu &&
1995566cfd8fSSimon Arlott 	      dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) {
19961da177e4SLinus Torvalds 		rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
19975578689aSDaniel Lezcano 		rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu);
1998566cfd8fSSimon Arlott 	}
19991da177e4SLinus Torvalds 	return 0;
20001da177e4SLinus Torvalds }
20011da177e4SLinus Torvalds 
20021da177e4SLinus Torvalds void rt6_mtu_change(struct net_device *dev, unsigned mtu)
20031da177e4SLinus Torvalds {
2004c71099acSThomas Graf 	struct rt6_mtu_change_arg arg = {
2005c71099acSThomas Graf 		.dev = dev,
2006c71099acSThomas Graf 		.mtu = mtu,
2007c71099acSThomas Graf 	};
20081da177e4SLinus Torvalds 
2009c346dca1SYOSHIFUJI Hideaki 	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
20101da177e4SLinus Torvalds }
20111da177e4SLinus Torvalds 
2012ef7c79edSPatrick McHardy static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
20135176f91eSThomas Graf 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
201486872cb5SThomas Graf 	[RTA_OIF]               = { .type = NLA_U32 },
2015ab364a6fSThomas Graf 	[RTA_IIF]		= { .type = NLA_U32 },
201686872cb5SThomas Graf 	[RTA_PRIORITY]          = { .type = NLA_U32 },
201786872cb5SThomas Graf 	[RTA_METRICS]           = { .type = NLA_NESTED },
201886872cb5SThomas Graf };
201986872cb5SThomas Graf 
202086872cb5SThomas Graf static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
202186872cb5SThomas Graf 			      struct fib6_config *cfg)
20221da177e4SLinus Torvalds {
202386872cb5SThomas Graf 	struct rtmsg *rtm;
202486872cb5SThomas Graf 	struct nlattr *tb[RTA_MAX+1];
202586872cb5SThomas Graf 	int err;
20261da177e4SLinus Torvalds 
202786872cb5SThomas Graf 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
202886872cb5SThomas Graf 	if (err < 0)
202986872cb5SThomas Graf 		goto errout;
20301da177e4SLinus Torvalds 
203186872cb5SThomas Graf 	err = -EINVAL;
203286872cb5SThomas Graf 	rtm = nlmsg_data(nlh);
203386872cb5SThomas Graf 	memset(cfg, 0, sizeof(*cfg));
203486872cb5SThomas Graf 
203586872cb5SThomas Graf 	cfg->fc_table = rtm->rtm_table;
203686872cb5SThomas Graf 	cfg->fc_dst_len = rtm->rtm_dst_len;
203786872cb5SThomas Graf 	cfg->fc_src_len = rtm->rtm_src_len;
203886872cb5SThomas Graf 	cfg->fc_flags = RTF_UP;
203986872cb5SThomas Graf 	cfg->fc_protocol = rtm->rtm_protocol;
204086872cb5SThomas Graf 
204186872cb5SThomas Graf 	if (rtm->rtm_type == RTN_UNREACHABLE)
204286872cb5SThomas Graf 		cfg->fc_flags |= RTF_REJECT;
204386872cb5SThomas Graf 
204486872cb5SThomas Graf 	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
204586872cb5SThomas Graf 	cfg->fc_nlinfo.nlh = nlh;
20463b1e0a65SYOSHIFUJI Hideaki 	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
204786872cb5SThomas Graf 
204886872cb5SThomas Graf 	if (tb[RTA_GATEWAY]) {
204986872cb5SThomas Graf 		nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
205086872cb5SThomas Graf 		cfg->fc_flags |= RTF_GATEWAY;
20511da177e4SLinus Torvalds 	}
205286872cb5SThomas Graf 
205386872cb5SThomas Graf 	if (tb[RTA_DST]) {
205486872cb5SThomas Graf 		int plen = (rtm->rtm_dst_len + 7) >> 3;
205586872cb5SThomas Graf 
205686872cb5SThomas Graf 		if (nla_len(tb[RTA_DST]) < plen)
205786872cb5SThomas Graf 			goto errout;
205886872cb5SThomas Graf 
205986872cb5SThomas Graf 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
20601da177e4SLinus Torvalds 	}
206186872cb5SThomas Graf 
206286872cb5SThomas Graf 	if (tb[RTA_SRC]) {
206386872cb5SThomas Graf 		int plen = (rtm->rtm_src_len + 7) >> 3;
206486872cb5SThomas Graf 
206586872cb5SThomas Graf 		if (nla_len(tb[RTA_SRC]) < plen)
206686872cb5SThomas Graf 			goto errout;
206786872cb5SThomas Graf 
206886872cb5SThomas Graf 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
20691da177e4SLinus Torvalds 	}
207086872cb5SThomas Graf 
207186872cb5SThomas Graf 	if (tb[RTA_OIF])
207286872cb5SThomas Graf 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
207386872cb5SThomas Graf 
207486872cb5SThomas Graf 	if (tb[RTA_PRIORITY])
207586872cb5SThomas Graf 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
207686872cb5SThomas Graf 
207786872cb5SThomas Graf 	if (tb[RTA_METRICS]) {
207886872cb5SThomas Graf 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
207986872cb5SThomas Graf 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
20801da177e4SLinus Torvalds 	}
208186872cb5SThomas Graf 
208286872cb5SThomas Graf 	if (tb[RTA_TABLE])
208386872cb5SThomas Graf 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
208486872cb5SThomas Graf 
208586872cb5SThomas Graf 	err = 0;
208686872cb5SThomas Graf errout:
208786872cb5SThomas Graf 	return err;
20881da177e4SLinus Torvalds }
20891da177e4SLinus Torvalds 
2090c127ea2cSThomas Graf static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
20911da177e4SLinus Torvalds {
209286872cb5SThomas Graf 	struct fib6_config cfg;
209386872cb5SThomas Graf 	int err;
20941da177e4SLinus Torvalds 
209586872cb5SThomas Graf 	err = rtm_to_fib6_config(skb, nlh, &cfg);
209686872cb5SThomas Graf 	if (err < 0)
209786872cb5SThomas Graf 		return err;
209886872cb5SThomas Graf 
209986872cb5SThomas Graf 	return ip6_route_del(&cfg);
21001da177e4SLinus Torvalds }
21011da177e4SLinus Torvalds 
2102c127ea2cSThomas Graf static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
21031da177e4SLinus Torvalds {
210486872cb5SThomas Graf 	struct fib6_config cfg;
210586872cb5SThomas Graf 	int err;
21061da177e4SLinus Torvalds 
210786872cb5SThomas Graf 	err = rtm_to_fib6_config(skb, nlh, &cfg);
210886872cb5SThomas Graf 	if (err < 0)
210986872cb5SThomas Graf 		return err;
211086872cb5SThomas Graf 
211186872cb5SThomas Graf 	return ip6_route_add(&cfg);
21121da177e4SLinus Torvalds }
21131da177e4SLinus Torvalds 
2114339bf98fSThomas Graf static inline size_t rt6_nlmsg_size(void)
2115339bf98fSThomas Graf {
2116339bf98fSThomas Graf 	return NLMSG_ALIGN(sizeof(struct rtmsg))
2117339bf98fSThomas Graf 	       + nla_total_size(16) /* RTA_SRC */
2118339bf98fSThomas Graf 	       + nla_total_size(16) /* RTA_DST */
2119339bf98fSThomas Graf 	       + nla_total_size(16) /* RTA_GATEWAY */
2120339bf98fSThomas Graf 	       + nla_total_size(16) /* RTA_PREFSRC */
2121339bf98fSThomas Graf 	       + nla_total_size(4) /* RTA_TABLE */
2122339bf98fSThomas Graf 	       + nla_total_size(4) /* RTA_IIF */
2123339bf98fSThomas Graf 	       + nla_total_size(4) /* RTA_OIF */
2124339bf98fSThomas Graf 	       + nla_total_size(4) /* RTA_PRIORITY */
21256a2b9ce0SNoriaki TAKAMIYA 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2126339bf98fSThomas Graf 	       + nla_total_size(sizeof(struct rta_cacheinfo));
2127339bf98fSThomas Graf }
2128339bf98fSThomas Graf 
2129191cd582SBrian Haley static int rt6_fill_node(struct net *net,
2130191cd582SBrian Haley 			 struct sk_buff *skb, struct rt6_info *rt,
21310d51aa80SJamal Hadi Salim 			 struct in6_addr *dst, struct in6_addr *src,
21320d51aa80SJamal Hadi Salim 			 int iif, int type, u32 pid, u32 seq,
21337bc570c8SYOSHIFUJI Hideaki 			 int prefix, int nowait, unsigned int flags)
21341da177e4SLinus Torvalds {
21351da177e4SLinus Torvalds 	struct rtmsg *rtm;
21361da177e4SLinus Torvalds 	struct nlmsghdr *nlh;
2137e3703b3dSThomas Graf 	long expires;
21389e762a4aSPatrick McHardy 	u32 table;
21391da177e4SLinus Torvalds 
21401da177e4SLinus Torvalds 	if (prefix) {	/* user wants prefix routes only */
21411da177e4SLinus Torvalds 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
21421da177e4SLinus Torvalds 			/* success since this is not a prefix route */
21431da177e4SLinus Torvalds 			return 1;
21441da177e4SLinus Torvalds 		}
21451da177e4SLinus Torvalds 	}
21461da177e4SLinus Torvalds 
21472d7202bfSThomas Graf 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
21482d7202bfSThomas Graf 	if (nlh == NULL)
214926932566SPatrick McHardy 		return -EMSGSIZE;
21502d7202bfSThomas Graf 
21512d7202bfSThomas Graf 	rtm = nlmsg_data(nlh);
21521da177e4SLinus Torvalds 	rtm->rtm_family = AF_INET6;
21531da177e4SLinus Torvalds 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
21541da177e4SLinus Torvalds 	rtm->rtm_src_len = rt->rt6i_src.plen;
21551da177e4SLinus Torvalds 	rtm->rtm_tos = 0;
2156c71099acSThomas Graf 	if (rt->rt6i_table)
21579e762a4aSPatrick McHardy 		table = rt->rt6i_table->tb6_id;
2158c71099acSThomas Graf 	else
21599e762a4aSPatrick McHardy 		table = RT6_TABLE_UNSPEC;
21609e762a4aSPatrick McHardy 	rtm->rtm_table = table;
21612d7202bfSThomas Graf 	NLA_PUT_U32(skb, RTA_TABLE, table);
21621da177e4SLinus Torvalds 	if (rt->rt6i_flags&RTF_REJECT)
21631da177e4SLinus Torvalds 		rtm->rtm_type = RTN_UNREACHABLE;
21641da177e4SLinus Torvalds 	else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
21651da177e4SLinus Torvalds 		rtm->rtm_type = RTN_LOCAL;
21661da177e4SLinus Torvalds 	else
21671da177e4SLinus Torvalds 		rtm->rtm_type = RTN_UNICAST;
21681da177e4SLinus Torvalds 	rtm->rtm_flags = 0;
21691da177e4SLinus Torvalds 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
21701da177e4SLinus Torvalds 	rtm->rtm_protocol = rt->rt6i_protocol;
21711da177e4SLinus Torvalds 	if (rt->rt6i_flags&RTF_DYNAMIC)
21721da177e4SLinus Torvalds 		rtm->rtm_protocol = RTPROT_REDIRECT;
21731da177e4SLinus Torvalds 	else if (rt->rt6i_flags & RTF_ADDRCONF)
21741da177e4SLinus Torvalds 		rtm->rtm_protocol = RTPROT_KERNEL;
21751da177e4SLinus Torvalds 	else if (rt->rt6i_flags&RTF_DEFAULT)
21761da177e4SLinus Torvalds 		rtm->rtm_protocol = RTPROT_RA;
21771da177e4SLinus Torvalds 
21781da177e4SLinus Torvalds 	if (rt->rt6i_flags&RTF_CACHE)
21791da177e4SLinus Torvalds 		rtm->rtm_flags |= RTM_F_CLONED;
21801da177e4SLinus Torvalds 
21811da177e4SLinus Torvalds 	if (dst) {
21822d7202bfSThomas Graf 		NLA_PUT(skb, RTA_DST, 16, dst);
21831da177e4SLinus Torvalds 		rtm->rtm_dst_len = 128;
21841da177e4SLinus Torvalds 	} else if (rtm->rtm_dst_len)
21852d7202bfSThomas Graf 		NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
21861da177e4SLinus Torvalds #ifdef CONFIG_IPV6_SUBTREES
21871da177e4SLinus Torvalds 	if (src) {
21882d7202bfSThomas Graf 		NLA_PUT(skb, RTA_SRC, 16, src);
21891da177e4SLinus Torvalds 		rtm->rtm_src_len = 128;
21901da177e4SLinus Torvalds 	} else if (rtm->rtm_src_len)
21912d7202bfSThomas Graf 		NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
21921da177e4SLinus Torvalds #endif
21937bc570c8SYOSHIFUJI Hideaki 	if (iif) {
21947bc570c8SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_MROUTE
21957bc570c8SYOSHIFUJI Hideaki 		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
21967bc570c8SYOSHIFUJI Hideaki 			int err = ip6mr_get_route(skb, rtm, nowait);
21977bc570c8SYOSHIFUJI Hideaki 			if (err <= 0) {
21987bc570c8SYOSHIFUJI Hideaki 				if (!nowait) {
21997bc570c8SYOSHIFUJI Hideaki 					if (err == 0)
22007bc570c8SYOSHIFUJI Hideaki 						return 0;
22017bc570c8SYOSHIFUJI Hideaki 					goto nla_put_failure;
22027bc570c8SYOSHIFUJI Hideaki 				} else {
22037bc570c8SYOSHIFUJI Hideaki 					if (err == -EMSGSIZE)
22047bc570c8SYOSHIFUJI Hideaki 						goto nla_put_failure;
22057bc570c8SYOSHIFUJI Hideaki 				}
22067bc570c8SYOSHIFUJI Hideaki 			}
22077bc570c8SYOSHIFUJI Hideaki 		} else
22087bc570c8SYOSHIFUJI Hideaki #endif
22092d7202bfSThomas Graf 			NLA_PUT_U32(skb, RTA_IIF, iif);
22107bc570c8SYOSHIFUJI Hideaki 	} else if (dst) {
22115e0115e5SBrian Haley 		struct inet6_dev *idev = ip6_dst_idev(&rt->u.dst);
22121da177e4SLinus Torvalds 		struct in6_addr saddr_buf;
2213191cd582SBrian Haley 		if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
22147cbca67cSYOSHIFUJI Hideaki 				       dst, 0, &saddr_buf) == 0)
22152d7202bfSThomas Graf 			NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
22161da177e4SLinus Torvalds 	}
22172d7202bfSThomas Graf 
22181da177e4SLinus Torvalds 	if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
22192d7202bfSThomas Graf 		goto nla_put_failure;
22202d7202bfSThomas Graf 
22211da177e4SLinus Torvalds 	if (rt->u.dst.neighbour)
22222d7202bfSThomas Graf 		NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
22232d7202bfSThomas Graf 
22241da177e4SLinus Torvalds 	if (rt->u.dst.dev)
22252d7202bfSThomas Graf 		NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
22262d7202bfSThomas Graf 
22272d7202bfSThomas Graf 	NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2228e3703b3dSThomas Graf 
222936e3deaeSYOSHIFUJI Hideaki 	if (!(rt->rt6i_flags & RTF_EXPIRES))
223036e3deaeSYOSHIFUJI Hideaki 		expires = 0;
223136e3deaeSYOSHIFUJI Hideaki 	else if (rt->rt6i_expires - jiffies < INT_MAX)
223236e3deaeSYOSHIFUJI Hideaki 		expires = rt->rt6i_expires - jiffies;
223336e3deaeSYOSHIFUJI Hideaki 	else
223436e3deaeSYOSHIFUJI Hideaki 		expires = INT_MAX;
223569cdf8f9SYOSHIFUJI Hideaki 
2236e3703b3dSThomas Graf 	if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
2237e3703b3dSThomas Graf 			       expires, rt->u.dst.error) < 0)
2238e3703b3dSThomas Graf 		goto nla_put_failure;
22391da177e4SLinus Torvalds 
22402d7202bfSThomas Graf 	return nlmsg_end(skb, nlh);
22412d7202bfSThomas Graf 
22422d7202bfSThomas Graf nla_put_failure:
224326932566SPatrick McHardy 	nlmsg_cancel(skb, nlh);
224426932566SPatrick McHardy 	return -EMSGSIZE;
22451da177e4SLinus Torvalds }
22461da177e4SLinus Torvalds 
22471b43af54SPatrick McHardy int rt6_dump_route(struct rt6_info *rt, void *p_arg)
22481da177e4SLinus Torvalds {
22491da177e4SLinus Torvalds 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
22501da177e4SLinus Torvalds 	int prefix;
22511da177e4SLinus Torvalds 
22522d7202bfSThomas Graf 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
22532d7202bfSThomas Graf 		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
22541da177e4SLinus Torvalds 		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
22551da177e4SLinus Torvalds 	} else
22561da177e4SLinus Torvalds 		prefix = 0;
22571da177e4SLinus Torvalds 
2258191cd582SBrian Haley 	return rt6_fill_node(arg->net,
2259191cd582SBrian Haley 		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
22601da177e4SLinus Torvalds 		     NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
22617bc570c8SYOSHIFUJI Hideaki 		     prefix, 0, NLM_F_MULTI);
22621da177e4SLinus Torvalds }
22631da177e4SLinus Torvalds 
2264c127ea2cSThomas Graf static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
22651da177e4SLinus Torvalds {
22663b1e0a65SYOSHIFUJI Hideaki 	struct net *net = sock_net(in_skb->sk);
2267ab364a6fSThomas Graf 	struct nlattr *tb[RTA_MAX+1];
22681da177e4SLinus Torvalds 	struct rt6_info *rt;
2269ab364a6fSThomas Graf 	struct sk_buff *skb;
2270ab364a6fSThomas Graf 	struct rtmsg *rtm;
2271ab364a6fSThomas Graf 	struct flowi fl;
2272ab364a6fSThomas Graf 	int err, iif = 0;
2273ab364a6fSThomas Graf 
2274ab364a6fSThomas Graf 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2275ab364a6fSThomas Graf 	if (err < 0)
2276ab364a6fSThomas Graf 		goto errout;
2277ab364a6fSThomas Graf 
2278ab364a6fSThomas Graf 	err = -EINVAL;
2279ab364a6fSThomas Graf 	memset(&fl, 0, sizeof(fl));
2280ab364a6fSThomas Graf 
2281ab364a6fSThomas Graf 	if (tb[RTA_SRC]) {
2282ab364a6fSThomas Graf 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2283ab364a6fSThomas Graf 			goto errout;
2284ab364a6fSThomas Graf 
2285ab364a6fSThomas Graf 		ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2286ab364a6fSThomas Graf 	}
2287ab364a6fSThomas Graf 
2288ab364a6fSThomas Graf 	if (tb[RTA_DST]) {
2289ab364a6fSThomas Graf 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2290ab364a6fSThomas Graf 			goto errout;
2291ab364a6fSThomas Graf 
2292ab364a6fSThomas Graf 		ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2293ab364a6fSThomas Graf 	}
2294ab364a6fSThomas Graf 
2295ab364a6fSThomas Graf 	if (tb[RTA_IIF])
2296ab364a6fSThomas Graf 		iif = nla_get_u32(tb[RTA_IIF]);
2297ab364a6fSThomas Graf 
2298ab364a6fSThomas Graf 	if (tb[RTA_OIF])
2299ab364a6fSThomas Graf 		fl.oif = nla_get_u32(tb[RTA_OIF]);
2300ab364a6fSThomas Graf 
2301ab364a6fSThomas Graf 	if (iif) {
2302ab364a6fSThomas Graf 		struct net_device *dev;
23035578689aSDaniel Lezcano 		dev = __dev_get_by_index(net, iif);
2304ab364a6fSThomas Graf 		if (!dev) {
2305ab364a6fSThomas Graf 			err = -ENODEV;
2306ab364a6fSThomas Graf 			goto errout;
2307ab364a6fSThomas Graf 		}
2308ab364a6fSThomas Graf 	}
23091da177e4SLinus Torvalds 
23101da177e4SLinus Torvalds 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2311ab364a6fSThomas Graf 	if (skb == NULL) {
2312ab364a6fSThomas Graf 		err = -ENOBUFS;
2313ab364a6fSThomas Graf 		goto errout;
2314ab364a6fSThomas Graf 	}
23151da177e4SLinus Torvalds 
23161da177e4SLinus Torvalds 	/* Reserve room for dummy headers, this skb can pass
23171da177e4SLinus Torvalds 	   through good chunk of routing engine.
23181da177e4SLinus Torvalds 	 */
2319459a98edSArnaldo Carvalho de Melo 	skb_reset_mac_header(skb);
23201da177e4SLinus Torvalds 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
23211da177e4SLinus Torvalds 
23228a3edd80SDaniel Lezcano 	rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl);
23231da177e4SLinus Torvalds 	skb->dst = &rt->u.dst;
23241da177e4SLinus Torvalds 
2325191cd582SBrian Haley 	err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
23261da177e4SLinus Torvalds 			    RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
23277bc570c8SYOSHIFUJI Hideaki 			    nlh->nlmsg_seq, 0, 0, 0);
23281da177e4SLinus Torvalds 	if (err < 0) {
2329ab364a6fSThomas Graf 		kfree_skb(skb);
2330ab364a6fSThomas Graf 		goto errout;
23311da177e4SLinus Torvalds 	}
23321da177e4SLinus Torvalds 
23335578689aSDaniel Lezcano 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2334ab364a6fSThomas Graf errout:
23351da177e4SLinus Torvalds 	return err;
23361da177e4SLinus Torvalds }
23371da177e4SLinus Torvalds 
233886872cb5SThomas Graf void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
23391da177e4SLinus Torvalds {
23401da177e4SLinus Torvalds 	struct sk_buff *skb;
23415578689aSDaniel Lezcano 	struct net *net = info->nl_net;
2342528c4cebSDenis V. Lunev 	u32 seq;
2343528c4cebSDenis V. Lunev 	int err;
23440d51aa80SJamal Hadi Salim 
2345528c4cebSDenis V. Lunev 	err = -ENOBUFS;
2346528c4cebSDenis V. Lunev 	seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
234786872cb5SThomas Graf 
2348339bf98fSThomas Graf 	skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
234921713ebcSThomas Graf 	if (skb == NULL)
235021713ebcSThomas Graf 		goto errout;
23511da177e4SLinus Torvalds 
2352191cd582SBrian Haley 	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
23537bc570c8SYOSHIFUJI Hideaki 				event, info->pid, seq, 0, 0, 0);
235426932566SPatrick McHardy 	if (err < 0) {
235526932566SPatrick McHardy 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
235626932566SPatrick McHardy 		WARN_ON(err == -EMSGSIZE);
235726932566SPatrick McHardy 		kfree_skb(skb);
235826932566SPatrick McHardy 		goto errout;
235926932566SPatrick McHardy 	}
23605578689aSDaniel Lezcano 	err = rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
23615578689aSDaniel Lezcano 			  info->nlh, gfp_any());
236221713ebcSThomas Graf errout:
236321713ebcSThomas Graf 	if (err < 0)
23645578689aSDaniel Lezcano 		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
23651da177e4SLinus Torvalds }
23661da177e4SLinus Torvalds 
23678ed67789SDaniel Lezcano static int ip6_route_dev_notify(struct notifier_block *this,
23688ed67789SDaniel Lezcano 				unsigned long event, void *data)
23698ed67789SDaniel Lezcano {
23708ed67789SDaniel Lezcano 	struct net_device *dev = (struct net_device *)data;
2371c346dca1SYOSHIFUJI Hideaki 	struct net *net = dev_net(dev);
23728ed67789SDaniel Lezcano 
23738ed67789SDaniel Lezcano 	if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
23748ed67789SDaniel Lezcano 		net->ipv6.ip6_null_entry->u.dst.dev = dev;
23758ed67789SDaniel Lezcano 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
23768ed67789SDaniel Lezcano #ifdef CONFIG_IPV6_MULTIPLE_TABLES
23778ed67789SDaniel Lezcano 		net->ipv6.ip6_prohibit_entry->u.dst.dev = dev;
23788ed67789SDaniel Lezcano 		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
23798ed67789SDaniel Lezcano 		net->ipv6.ip6_blk_hole_entry->u.dst.dev = dev;
23808ed67789SDaniel Lezcano 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
23818ed67789SDaniel Lezcano #endif
23828ed67789SDaniel Lezcano 	}
23838ed67789SDaniel Lezcano 
23848ed67789SDaniel Lezcano 	return NOTIFY_OK;
23858ed67789SDaniel Lezcano }
23868ed67789SDaniel Lezcano 
23871da177e4SLinus Torvalds /*
23881da177e4SLinus Torvalds  *	/proc
23891da177e4SLinus Torvalds  */
23901da177e4SLinus Torvalds 
23911da177e4SLinus Torvalds #ifdef CONFIG_PROC_FS
23921da177e4SLinus Torvalds 
23931da177e4SLinus Torvalds #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
23941da177e4SLinus Torvalds 
23951da177e4SLinus Torvalds struct rt6_proc_arg
23961da177e4SLinus Torvalds {
23971da177e4SLinus Torvalds 	char *buffer;
23981da177e4SLinus Torvalds 	int offset;
23991da177e4SLinus Torvalds 	int length;
24001da177e4SLinus Torvalds 	int skip;
24011da177e4SLinus Torvalds 	int len;
24021da177e4SLinus Torvalds };
24031da177e4SLinus Torvalds 
24041da177e4SLinus Torvalds static int rt6_info_route(struct rt6_info *rt, void *p_arg)
24051da177e4SLinus Torvalds {
240633120b30SAlexey Dobriyan 	struct seq_file *m = p_arg;
24071da177e4SLinus Torvalds 
240833120b30SAlexey Dobriyan 	seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_dst.addr),
24091da177e4SLinus Torvalds 		   rt->rt6i_dst.plen);
24101da177e4SLinus Torvalds 
24111da177e4SLinus Torvalds #ifdef CONFIG_IPV6_SUBTREES
241233120b30SAlexey Dobriyan 	seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_src.addr),
24131da177e4SLinus Torvalds 		   rt->rt6i_src.plen);
24141da177e4SLinus Torvalds #else
241533120b30SAlexey Dobriyan 	seq_puts(m, "00000000000000000000000000000000 00 ");
24161da177e4SLinus Torvalds #endif
24171da177e4SLinus Torvalds 
24181da177e4SLinus Torvalds 	if (rt->rt6i_nexthop) {
241933120b30SAlexey Dobriyan 		seq_printf(m, NIP6_SEQFMT,
242033e93c96SYOSHIFUJI Hideaki 			   NIP6(*((struct in6_addr *)rt->rt6i_nexthop->primary_key)));
24211da177e4SLinus Torvalds 	} else {
242233120b30SAlexey Dobriyan 		seq_puts(m, "00000000000000000000000000000000");
24231da177e4SLinus Torvalds 	}
242433120b30SAlexey Dobriyan 	seq_printf(m, " %08x %08x %08x %08x %8s\n",
24251da177e4SLinus Torvalds 		   rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
24261da177e4SLinus Torvalds 		   rt->u.dst.__use, rt->rt6i_flags,
24271da177e4SLinus Torvalds 		   rt->rt6i_dev ? rt->rt6i_dev->name : "");
24281da177e4SLinus Torvalds 	return 0;
24291da177e4SLinus Torvalds }
24301da177e4SLinus Torvalds 
243133120b30SAlexey Dobriyan static int ipv6_route_show(struct seq_file *m, void *v)
24321da177e4SLinus Torvalds {
2433f3db4851SDaniel Lezcano 	struct net *net = (struct net *)m->private;
2434f3db4851SDaniel Lezcano 	fib6_clean_all(net, rt6_info_route, 0, m);
243533120b30SAlexey Dobriyan 	return 0;
24361da177e4SLinus Torvalds }
24371da177e4SLinus Torvalds 
243833120b30SAlexey Dobriyan static int ipv6_route_open(struct inode *inode, struct file *file)
243933120b30SAlexey Dobriyan {
2440de05c557SPavel Emelyanov 	return single_open_net(inode, file, ipv6_route_show);
2441f3db4851SDaniel Lezcano }
2442f3db4851SDaniel Lezcano 
244333120b30SAlexey Dobriyan static const struct file_operations ipv6_route_proc_fops = {
244433120b30SAlexey Dobriyan 	.owner		= THIS_MODULE,
244533120b30SAlexey Dobriyan 	.open		= ipv6_route_open,
244633120b30SAlexey Dobriyan 	.read		= seq_read,
244733120b30SAlexey Dobriyan 	.llseek		= seq_lseek,
2448b6fcbdb4SPavel Emelyanov 	.release	= single_release_net,
244933120b30SAlexey Dobriyan };
245033120b30SAlexey Dobriyan 
24511da177e4SLinus Torvalds static int rt6_stats_seq_show(struct seq_file *seq, void *v)
24521da177e4SLinus Torvalds {
245369ddb805SDaniel Lezcano 	struct net *net = (struct net *)seq->private;
24541da177e4SLinus Torvalds 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
245569ddb805SDaniel Lezcano 		   net->ipv6.rt6_stats->fib_nodes,
245669ddb805SDaniel Lezcano 		   net->ipv6.rt6_stats->fib_route_nodes,
245769ddb805SDaniel Lezcano 		   net->ipv6.rt6_stats->fib_rt_alloc,
245869ddb805SDaniel Lezcano 		   net->ipv6.rt6_stats->fib_rt_entries,
245969ddb805SDaniel Lezcano 		   net->ipv6.rt6_stats->fib_rt_cache,
2460f2fc6a54SBenjamin Thery 		   atomic_read(&net->ipv6.ip6_dst_ops->entries),
246169ddb805SDaniel Lezcano 		   net->ipv6.rt6_stats->fib_discarded_routes);
24621da177e4SLinus Torvalds 
24631da177e4SLinus Torvalds 	return 0;
24641da177e4SLinus Torvalds }
24651da177e4SLinus Torvalds 
24661da177e4SLinus Torvalds static int rt6_stats_seq_open(struct inode *inode, struct file *file)
24671da177e4SLinus Torvalds {
2468de05c557SPavel Emelyanov 	return single_open_net(inode, file, rt6_stats_seq_show);
246969ddb805SDaniel Lezcano }
247069ddb805SDaniel Lezcano 
24719a32144eSArjan van de Ven static const struct file_operations rt6_stats_seq_fops = {
24721da177e4SLinus Torvalds 	.owner	 = THIS_MODULE,
24731da177e4SLinus Torvalds 	.open	 = rt6_stats_seq_open,
24741da177e4SLinus Torvalds 	.read	 = seq_read,
24751da177e4SLinus Torvalds 	.llseek	 = seq_lseek,
2476b6fcbdb4SPavel Emelyanov 	.release = single_release_net,
24771da177e4SLinus Torvalds };
24781da177e4SLinus Torvalds #endif	/* CONFIG_PROC_FS */
24791da177e4SLinus Torvalds 
24801da177e4SLinus Torvalds #ifdef CONFIG_SYSCTL
24811da177e4SLinus Torvalds 
24821da177e4SLinus Torvalds static
24831da177e4SLinus Torvalds int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
24841da177e4SLinus Torvalds 			      void __user *buffer, size_t *lenp, loff_t *ppos)
24851da177e4SLinus Torvalds {
24865b7c931dSDaniel Lezcano 	struct net *net = current->nsproxy->net_ns;
24875b7c931dSDaniel Lezcano 	int delay = net->ipv6.sysctl.flush_delay;
24881da177e4SLinus Torvalds 	if (write) {
24891da177e4SLinus Torvalds 		proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
24905b7c931dSDaniel Lezcano 		fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
24911da177e4SLinus Torvalds 		return 0;
24921da177e4SLinus Torvalds 	} else
24931da177e4SLinus Torvalds 		return -EINVAL;
24941da177e4SLinus Torvalds }
24951da177e4SLinus Torvalds 
2496760f2d01SDaniel Lezcano ctl_table ipv6_route_table_template[] = {
24971da177e4SLinus Torvalds 	{
24981da177e4SLinus Torvalds 		.procname	=	"flush",
24994990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.flush_delay,
25001da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
250189c8b3a1SDave Jones 		.mode		=	0200,
25021da177e4SLinus Torvalds 		.proc_handler	=	&ipv6_sysctl_rtcache_flush
25031da177e4SLinus Torvalds 	},
25041da177e4SLinus Torvalds 	{
25051da177e4SLinus Torvalds 		.ctl_name	=	NET_IPV6_ROUTE_GC_THRESH,
25061da177e4SLinus Torvalds 		.procname	=	"gc_thresh",
25079a7ec3a9SDaniel Lezcano 		.data		=	&ip6_dst_ops_template.gc_thresh,
25081da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
25091da177e4SLinus Torvalds 		.mode		=	0644,
25101da177e4SLinus Torvalds 		.proc_handler	=	&proc_dointvec,
25111da177e4SLinus Torvalds 	},
25121da177e4SLinus Torvalds 	{
25131da177e4SLinus Torvalds 		.ctl_name	=	NET_IPV6_ROUTE_MAX_SIZE,
25141da177e4SLinus Torvalds 		.procname	=	"max_size",
25154990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
25161da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
25171da177e4SLinus Torvalds 		.mode		=	0644,
25181da177e4SLinus Torvalds 		.proc_handler	=	&proc_dointvec,
25191da177e4SLinus Torvalds 	},
25201da177e4SLinus Torvalds 	{
25211da177e4SLinus Torvalds 		.ctl_name	=	NET_IPV6_ROUTE_GC_MIN_INTERVAL,
25221da177e4SLinus Torvalds 		.procname	=	"gc_min_interval",
25234990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
25241da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
25251da177e4SLinus Torvalds 		.mode		=	0644,
25261da177e4SLinus Torvalds 		.proc_handler	=	&proc_dointvec_jiffies,
25271da177e4SLinus Torvalds 		.strategy	=	&sysctl_jiffies,
25281da177e4SLinus Torvalds 	},
25291da177e4SLinus Torvalds 	{
25301da177e4SLinus Torvalds 		.ctl_name	=	NET_IPV6_ROUTE_GC_TIMEOUT,
25311da177e4SLinus Torvalds 		.procname	=	"gc_timeout",
25324990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
25331da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
25341da177e4SLinus Torvalds 		.mode		=	0644,
25351da177e4SLinus Torvalds 		.proc_handler	=	&proc_dointvec_jiffies,
25361da177e4SLinus Torvalds 		.strategy	=	&sysctl_jiffies,
25371da177e4SLinus Torvalds 	},
25381da177e4SLinus Torvalds 	{
25391da177e4SLinus Torvalds 		.ctl_name	=	NET_IPV6_ROUTE_GC_INTERVAL,
25401da177e4SLinus Torvalds 		.procname	=	"gc_interval",
25414990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
25421da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
25431da177e4SLinus Torvalds 		.mode		=	0644,
25441da177e4SLinus Torvalds 		.proc_handler	=	&proc_dointvec_jiffies,
25451da177e4SLinus Torvalds 		.strategy	=	&sysctl_jiffies,
25461da177e4SLinus Torvalds 	},
25471da177e4SLinus Torvalds 	{
25481da177e4SLinus Torvalds 		.ctl_name	=	NET_IPV6_ROUTE_GC_ELASTICITY,
25491da177e4SLinus Torvalds 		.procname	=	"gc_elasticity",
25504990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
25511da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
25521da177e4SLinus Torvalds 		.mode		=	0644,
25531da177e4SLinus Torvalds 		.proc_handler	=	&proc_dointvec_jiffies,
25541da177e4SLinus Torvalds 		.strategy	=	&sysctl_jiffies,
25551da177e4SLinus Torvalds 	},
25561da177e4SLinus Torvalds 	{
25571da177e4SLinus Torvalds 		.ctl_name	=	NET_IPV6_ROUTE_MTU_EXPIRES,
25581da177e4SLinus Torvalds 		.procname	=	"mtu_expires",
25594990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
25601da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
25611da177e4SLinus Torvalds 		.mode		=	0644,
25621da177e4SLinus Torvalds 		.proc_handler	=	&proc_dointvec_jiffies,
25631da177e4SLinus Torvalds 		.strategy	=	&sysctl_jiffies,
25641da177e4SLinus Torvalds 	},
25651da177e4SLinus Torvalds 	{
25661da177e4SLinus Torvalds 		.ctl_name	=	NET_IPV6_ROUTE_MIN_ADVMSS,
25671da177e4SLinus Torvalds 		.procname	=	"min_adv_mss",
25684990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
25691da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
25701da177e4SLinus Torvalds 		.mode		=	0644,
25711da177e4SLinus Torvalds 		.proc_handler	=	&proc_dointvec_jiffies,
25721da177e4SLinus Torvalds 		.strategy	=	&sysctl_jiffies,
25731da177e4SLinus Torvalds 	},
25741da177e4SLinus Torvalds 	{
25751da177e4SLinus Torvalds 		.ctl_name	=	NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
25761da177e4SLinus Torvalds 		.procname	=	"gc_min_interval_ms",
25774990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
25781da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
25791da177e4SLinus Torvalds 		.mode		=	0644,
25801da177e4SLinus Torvalds 		.proc_handler	=	&proc_dointvec_ms_jiffies,
25811da177e4SLinus Torvalds 		.strategy	=	&sysctl_ms_jiffies,
25821da177e4SLinus Torvalds 	},
25831da177e4SLinus Torvalds 	{ .ctl_name = 0 }
25841da177e4SLinus Torvalds };
25851da177e4SLinus Torvalds 
2586760f2d01SDaniel Lezcano struct ctl_table *ipv6_route_sysctl_init(struct net *net)
2587760f2d01SDaniel Lezcano {
2588760f2d01SDaniel Lezcano 	struct ctl_table *table;
2589760f2d01SDaniel Lezcano 
2590760f2d01SDaniel Lezcano 	table = kmemdup(ipv6_route_table_template,
2591760f2d01SDaniel Lezcano 			sizeof(ipv6_route_table_template),
2592760f2d01SDaniel Lezcano 			GFP_KERNEL);
25935ee09105SYOSHIFUJI Hideaki 
25945ee09105SYOSHIFUJI Hideaki 	if (table) {
25955ee09105SYOSHIFUJI Hideaki 		table[0].data = &net->ipv6.sysctl.flush_delay;
2596f2fc6a54SBenjamin Thery 		table[1].data = &net->ipv6.ip6_dst_ops->gc_thresh;
25975ee09105SYOSHIFUJI Hideaki 		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
25985ee09105SYOSHIFUJI Hideaki 		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
25995ee09105SYOSHIFUJI Hideaki 		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
26005ee09105SYOSHIFUJI Hideaki 		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
26015ee09105SYOSHIFUJI Hideaki 		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
26025ee09105SYOSHIFUJI Hideaki 		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
26035ee09105SYOSHIFUJI Hideaki 		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
26045ee09105SYOSHIFUJI Hideaki 	}
26055ee09105SYOSHIFUJI Hideaki 
2606760f2d01SDaniel Lezcano 	return table;
2607760f2d01SDaniel Lezcano }
26081da177e4SLinus Torvalds #endif
26091da177e4SLinus Torvalds 
2610cdb18761SDaniel Lezcano static int ip6_route_net_init(struct net *net)
2611cdb18761SDaniel Lezcano {
2612633d424bSPavel Emelyanov 	int ret = -ENOMEM;
26138ed67789SDaniel Lezcano 
2614f2fc6a54SBenjamin Thery 	net->ipv6.ip6_dst_ops = kmemdup(&ip6_dst_ops_template,
2615f2fc6a54SBenjamin Thery 					sizeof(*net->ipv6.ip6_dst_ops),
2616f2fc6a54SBenjamin Thery 					GFP_KERNEL);
2617f2fc6a54SBenjamin Thery 	if (!net->ipv6.ip6_dst_ops)
2618f2fc6a54SBenjamin Thery 		goto out;
261948115becSDenis V. Lunev 	net->ipv6.ip6_dst_ops->dst_net = hold_net(net);
2620f2fc6a54SBenjamin Thery 
26218ed67789SDaniel Lezcano 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
26228ed67789SDaniel Lezcano 					   sizeof(*net->ipv6.ip6_null_entry),
26238ed67789SDaniel Lezcano 					   GFP_KERNEL);
26248ed67789SDaniel Lezcano 	if (!net->ipv6.ip6_null_entry)
2625f2fc6a54SBenjamin Thery 		goto out_ip6_dst_ops;
26268ed67789SDaniel Lezcano 	net->ipv6.ip6_null_entry->u.dst.path =
26278ed67789SDaniel Lezcano 		(struct dst_entry *)net->ipv6.ip6_null_entry;
2628f2fc6a54SBenjamin Thery 	net->ipv6.ip6_null_entry->u.dst.ops = net->ipv6.ip6_dst_ops;
26298ed67789SDaniel Lezcano 
26308ed67789SDaniel Lezcano #ifdef CONFIG_IPV6_MULTIPLE_TABLES
26318ed67789SDaniel Lezcano 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
26328ed67789SDaniel Lezcano 					       sizeof(*net->ipv6.ip6_prohibit_entry),
26338ed67789SDaniel Lezcano 					       GFP_KERNEL);
263468fffc67SPeter Zijlstra 	if (!net->ipv6.ip6_prohibit_entry)
263568fffc67SPeter Zijlstra 		goto out_ip6_null_entry;
26368ed67789SDaniel Lezcano 	net->ipv6.ip6_prohibit_entry->u.dst.path =
26378ed67789SDaniel Lezcano 		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2638f2fc6a54SBenjamin Thery 	net->ipv6.ip6_prohibit_entry->u.dst.ops = net->ipv6.ip6_dst_ops;
26398ed67789SDaniel Lezcano 
26408ed67789SDaniel Lezcano 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
26418ed67789SDaniel Lezcano 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
26428ed67789SDaniel Lezcano 					       GFP_KERNEL);
264368fffc67SPeter Zijlstra 	if (!net->ipv6.ip6_blk_hole_entry)
264468fffc67SPeter Zijlstra 		goto out_ip6_prohibit_entry;
26458ed67789SDaniel Lezcano 	net->ipv6.ip6_blk_hole_entry->u.dst.path =
26468ed67789SDaniel Lezcano 		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2647f2fc6a54SBenjamin Thery 	net->ipv6.ip6_blk_hole_entry->u.dst.ops = net->ipv6.ip6_dst_ops;
26488ed67789SDaniel Lezcano #endif
26498ed67789SDaniel Lezcano 
2650*b339a47cSPeter Zijlstra 	net->ipv6.sysctl.flush_delay = 0;
2651*b339a47cSPeter Zijlstra 	net->ipv6.sysctl.ip6_rt_max_size = 4096;
2652*b339a47cSPeter Zijlstra 	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2653*b339a47cSPeter Zijlstra 	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2654*b339a47cSPeter Zijlstra 	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2655*b339a47cSPeter Zijlstra 	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2656*b339a47cSPeter Zijlstra 	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2657*b339a47cSPeter Zijlstra 	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2658*b339a47cSPeter Zijlstra 
2659cdb18761SDaniel Lezcano #ifdef CONFIG_PROC_FS
2660cdb18761SDaniel Lezcano 	proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2661cdb18761SDaniel Lezcano 	proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2662cdb18761SDaniel Lezcano #endif
26636891a346SBenjamin Thery 	net->ipv6.ip6_rt_gc_expire = 30*HZ;
26646891a346SBenjamin Thery 
26658ed67789SDaniel Lezcano 	ret = 0;
26668ed67789SDaniel Lezcano out:
26678ed67789SDaniel Lezcano 	return ret;
2668f2fc6a54SBenjamin Thery 
266968fffc67SPeter Zijlstra #ifdef CONFIG_IPV6_MULTIPLE_TABLES
267068fffc67SPeter Zijlstra out_ip6_prohibit_entry:
267168fffc67SPeter Zijlstra 	kfree(net->ipv6.ip6_prohibit_entry);
267268fffc67SPeter Zijlstra out_ip6_null_entry:
267368fffc67SPeter Zijlstra 	kfree(net->ipv6.ip6_null_entry);
267468fffc67SPeter Zijlstra #endif
2675f2fc6a54SBenjamin Thery out_ip6_dst_ops:
267648115becSDenis V. Lunev 	release_net(net->ipv6.ip6_dst_ops->dst_net);
2677f2fc6a54SBenjamin Thery 	kfree(net->ipv6.ip6_dst_ops);
2678f2fc6a54SBenjamin Thery 	goto out;
2679cdb18761SDaniel Lezcano }
2680cdb18761SDaniel Lezcano 
2681cdb18761SDaniel Lezcano static void ip6_route_net_exit(struct net *net)
2682cdb18761SDaniel Lezcano {
2683cdb18761SDaniel Lezcano #ifdef CONFIG_PROC_FS
2684cdb18761SDaniel Lezcano 	proc_net_remove(net, "ipv6_route");
2685cdb18761SDaniel Lezcano 	proc_net_remove(net, "rt6_stats");
2686cdb18761SDaniel Lezcano #endif
26878ed67789SDaniel Lezcano 	kfree(net->ipv6.ip6_null_entry);
26888ed67789SDaniel Lezcano #ifdef CONFIG_IPV6_MULTIPLE_TABLES
26898ed67789SDaniel Lezcano 	kfree(net->ipv6.ip6_prohibit_entry);
26908ed67789SDaniel Lezcano 	kfree(net->ipv6.ip6_blk_hole_entry);
26918ed67789SDaniel Lezcano #endif
269248115becSDenis V. Lunev 	release_net(net->ipv6.ip6_dst_ops->dst_net);
2693f2fc6a54SBenjamin Thery 	kfree(net->ipv6.ip6_dst_ops);
2694cdb18761SDaniel Lezcano }
2695cdb18761SDaniel Lezcano 
2696cdb18761SDaniel Lezcano static struct pernet_operations ip6_route_net_ops = {
2697cdb18761SDaniel Lezcano 	.init = ip6_route_net_init,
2698cdb18761SDaniel Lezcano 	.exit = ip6_route_net_exit,
2699cdb18761SDaniel Lezcano };
2700cdb18761SDaniel Lezcano 
27018ed67789SDaniel Lezcano static struct notifier_block ip6_route_dev_notifier = {
27028ed67789SDaniel Lezcano 	.notifier_call = ip6_route_dev_notify,
27038ed67789SDaniel Lezcano 	.priority = 0,
27048ed67789SDaniel Lezcano };
27058ed67789SDaniel Lezcano 
2706433d49c3SDaniel Lezcano int __init ip6_route_init(void)
27071da177e4SLinus Torvalds {
2708433d49c3SDaniel Lezcano 	int ret;
2709433d49c3SDaniel Lezcano 
27109a7ec3a9SDaniel Lezcano 	ret = -ENOMEM;
27119a7ec3a9SDaniel Lezcano 	ip6_dst_ops_template.kmem_cachep =
27129a7ec3a9SDaniel Lezcano 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
27139a7ec3a9SDaniel Lezcano 				  SLAB_HWCACHE_ALIGN, NULL);
27149a7ec3a9SDaniel Lezcano 	if (!ip6_dst_ops_template.kmem_cachep)
2715f2fc6a54SBenjamin Thery 		goto out;;
271614e50e57SDavid S. Miller 
27178ed67789SDaniel Lezcano 	ret = register_pernet_subsys(&ip6_route_net_ops);
27188ed67789SDaniel Lezcano 	if (ret)
2719bdb3289fSDaniel Lezcano 		goto out_kmem_cache;
2720bdb3289fSDaniel Lezcano 
27215dc121e9SArnaud Ebalard 	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
27225dc121e9SArnaud Ebalard 
27238ed67789SDaniel Lezcano 	/* Registering of the loopback is done before this portion of code,
27248ed67789SDaniel Lezcano 	 * the loopback reference in rt6_info will not be taken, do it
27258ed67789SDaniel Lezcano 	 * manually for init_net */
27268ed67789SDaniel Lezcano 	init_net.ipv6.ip6_null_entry->u.dst.dev = init_net.loopback_dev;
27278ed67789SDaniel Lezcano 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2728bdb3289fSDaniel Lezcano   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
27298ed67789SDaniel Lezcano 	init_net.ipv6.ip6_prohibit_entry->u.dst.dev = init_net.loopback_dev;
27308ed67789SDaniel Lezcano 	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
27318ed67789SDaniel Lezcano 	init_net.ipv6.ip6_blk_hole_entry->u.dst.dev = init_net.loopback_dev;
27328ed67789SDaniel Lezcano 	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2733bdb3289fSDaniel Lezcano   #endif
2734433d49c3SDaniel Lezcano 	ret = fib6_init();
2735433d49c3SDaniel Lezcano 	if (ret)
27368ed67789SDaniel Lezcano 		goto out_register_subsys;
2737433d49c3SDaniel Lezcano 
2738433d49c3SDaniel Lezcano 	ret = xfrm6_init();
2739433d49c3SDaniel Lezcano 	if (ret)
2740cdb18761SDaniel Lezcano 		goto out_fib6_init;
2741c35b7e72SDaniel Lezcano 
2742433d49c3SDaniel Lezcano 	ret = fib6_rules_init();
2743433d49c3SDaniel Lezcano 	if (ret)
2744433d49c3SDaniel Lezcano 		goto xfrm6_init;
27457e5449c2SDaniel Lezcano 
2746433d49c3SDaniel Lezcano 	ret = -ENOBUFS;
2747433d49c3SDaniel Lezcano 	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2748433d49c3SDaniel Lezcano 	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2749433d49c3SDaniel Lezcano 	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2750433d49c3SDaniel Lezcano 		goto fib6_rules_init;
2751433d49c3SDaniel Lezcano 
27528ed67789SDaniel Lezcano 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
2753cdb18761SDaniel Lezcano 	if (ret)
2754cdb18761SDaniel Lezcano 		goto fib6_rules_init;
27558ed67789SDaniel Lezcano 
2756433d49c3SDaniel Lezcano out:
2757433d49c3SDaniel Lezcano 	return ret;
2758433d49c3SDaniel Lezcano 
2759433d49c3SDaniel Lezcano fib6_rules_init:
2760433d49c3SDaniel Lezcano 	fib6_rules_cleanup();
2761433d49c3SDaniel Lezcano xfrm6_init:
2762433d49c3SDaniel Lezcano 	xfrm6_fini();
2763433d49c3SDaniel Lezcano out_fib6_init:
2764433d49c3SDaniel Lezcano 	fib6_gc_cleanup();
27658ed67789SDaniel Lezcano out_register_subsys:
27668ed67789SDaniel Lezcano 	unregister_pernet_subsys(&ip6_route_net_ops);
2767433d49c3SDaniel Lezcano out_kmem_cache:
2768f2fc6a54SBenjamin Thery 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
2769433d49c3SDaniel Lezcano 	goto out;
27701da177e4SLinus Torvalds }
27711da177e4SLinus Torvalds 
27721da177e4SLinus Torvalds void ip6_route_cleanup(void)
27731da177e4SLinus Torvalds {
27748ed67789SDaniel Lezcano 	unregister_netdevice_notifier(&ip6_route_dev_notifier);
2775101367c2SThomas Graf 	fib6_rules_cleanup();
27761da177e4SLinus Torvalds 	xfrm6_fini();
27771da177e4SLinus Torvalds 	fib6_gc_cleanup();
27788ed67789SDaniel Lezcano 	unregister_pernet_subsys(&ip6_route_net_ops);
2779f2fc6a54SBenjamin Thery 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
27801da177e4SLinus Torvalds }
2781