xref: /openbmc/linux/net/ipv6/route.c (revision 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2)
1*1da177e4SLinus Torvalds /*
2*1da177e4SLinus Torvalds  *	Linux INET6 implementation
3*1da177e4SLinus Torvalds  *	FIB front-end.
4*1da177e4SLinus Torvalds  *
5*1da177e4SLinus Torvalds  *	Authors:
6*1da177e4SLinus Torvalds  *	Pedro Roque		<roque@di.fc.ul.pt>
7*1da177e4SLinus Torvalds  *
8*1da177e4SLinus Torvalds  *	$Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9*1da177e4SLinus Torvalds  *
10*1da177e4SLinus Torvalds  *	This program is free software; you can redistribute it and/or
11*1da177e4SLinus Torvalds  *      modify it under the terms of the GNU General Public License
12*1da177e4SLinus Torvalds  *      as published by the Free Software Foundation; either version
13*1da177e4SLinus Torvalds  *      2 of the License, or (at your option) any later version.
14*1da177e4SLinus Torvalds  */
15*1da177e4SLinus Torvalds 
16*1da177e4SLinus Torvalds /*	Changes:
17*1da177e4SLinus Torvalds  *
18*1da177e4SLinus Torvalds  *	YOSHIFUJI Hideaki @USAGI
19*1da177e4SLinus Torvalds  *		reworked default router selection.
20*1da177e4SLinus Torvalds  *		- respect outgoing interface
21*1da177e4SLinus Torvalds  *		- select from (probably) reachable routers (i.e.
22*1da177e4SLinus Torvalds  *		routers in REACHABLE, STALE, DELAY or PROBE states).
23*1da177e4SLinus Torvalds  *		- always select the same router if it is (probably)
24*1da177e4SLinus Torvalds  *		reachable.  otherwise, round-robin the list.
25*1da177e4SLinus Torvalds  */
26*1da177e4SLinus Torvalds 
27*1da177e4SLinus Torvalds #include <linux/config.h>
28*1da177e4SLinus Torvalds #include <linux/errno.h>
29*1da177e4SLinus Torvalds #include <linux/types.h>
30*1da177e4SLinus Torvalds #include <linux/times.h>
31*1da177e4SLinus Torvalds #include <linux/socket.h>
32*1da177e4SLinus Torvalds #include <linux/sockios.h>
33*1da177e4SLinus Torvalds #include <linux/net.h>
34*1da177e4SLinus Torvalds #include <linux/route.h>
35*1da177e4SLinus Torvalds #include <linux/netdevice.h>
36*1da177e4SLinus Torvalds #include <linux/in6.h>
37*1da177e4SLinus Torvalds #include <linux/init.h>
38*1da177e4SLinus Torvalds #include <linux/netlink.h>
39*1da177e4SLinus Torvalds #include <linux/if_arp.h>
40*1da177e4SLinus Torvalds 
41*1da177e4SLinus Torvalds #ifdef 	CONFIG_PROC_FS
42*1da177e4SLinus Torvalds #include <linux/proc_fs.h>
43*1da177e4SLinus Torvalds #include <linux/seq_file.h>
44*1da177e4SLinus Torvalds #endif
45*1da177e4SLinus Torvalds 
46*1da177e4SLinus Torvalds #include <net/snmp.h>
47*1da177e4SLinus Torvalds #include <net/ipv6.h>
48*1da177e4SLinus Torvalds #include <net/ip6_fib.h>
49*1da177e4SLinus Torvalds #include <net/ip6_route.h>
50*1da177e4SLinus Torvalds #include <net/ndisc.h>
51*1da177e4SLinus Torvalds #include <net/addrconf.h>
52*1da177e4SLinus Torvalds #include <net/tcp.h>
53*1da177e4SLinus Torvalds #include <linux/rtnetlink.h>
54*1da177e4SLinus Torvalds #include <net/dst.h>
55*1da177e4SLinus Torvalds #include <net/xfrm.h>
56*1da177e4SLinus Torvalds 
57*1da177e4SLinus Torvalds #include <asm/uaccess.h>
58*1da177e4SLinus Torvalds 
59*1da177e4SLinus Torvalds #ifdef CONFIG_SYSCTL
60*1da177e4SLinus Torvalds #include <linux/sysctl.h>
61*1da177e4SLinus Torvalds #endif
62*1da177e4SLinus Torvalds 
63*1da177e4SLinus Torvalds /* Set to 3 to get tracing. */
64*1da177e4SLinus Torvalds #define RT6_DEBUG 2
65*1da177e4SLinus Torvalds 
66*1da177e4SLinus Torvalds #if RT6_DEBUG >= 3
67*1da177e4SLinus Torvalds #define RDBG(x) printk x
68*1da177e4SLinus Torvalds #define RT6_TRACE(x...) printk(KERN_DEBUG x)
69*1da177e4SLinus Torvalds #else
70*1da177e4SLinus Torvalds #define RDBG(x)
71*1da177e4SLinus Torvalds #define RT6_TRACE(x...) do { ; } while (0)
72*1da177e4SLinus Torvalds #endif
73*1da177e4SLinus Torvalds 
74*1da177e4SLinus Torvalds 
75*1da177e4SLinus Torvalds static int ip6_rt_max_size = 4096;
76*1da177e4SLinus Torvalds static int ip6_rt_gc_min_interval = HZ / 2;
77*1da177e4SLinus Torvalds static int ip6_rt_gc_timeout = 60*HZ;
78*1da177e4SLinus Torvalds int ip6_rt_gc_interval = 30*HZ;
79*1da177e4SLinus Torvalds static int ip6_rt_gc_elasticity = 9;
80*1da177e4SLinus Torvalds static int ip6_rt_mtu_expires = 10*60*HZ;
81*1da177e4SLinus Torvalds static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
82*1da177e4SLinus Torvalds 
83*1da177e4SLinus Torvalds static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
84*1da177e4SLinus Torvalds static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
85*1da177e4SLinus Torvalds static struct dst_entry *ip6_negative_advice(struct dst_entry *);
86*1da177e4SLinus Torvalds static void		ip6_dst_destroy(struct dst_entry *);
87*1da177e4SLinus Torvalds static void		ip6_dst_ifdown(struct dst_entry *,
88*1da177e4SLinus Torvalds 				       struct net_device *dev, int how);
89*1da177e4SLinus Torvalds static int		 ip6_dst_gc(void);
90*1da177e4SLinus Torvalds 
91*1da177e4SLinus Torvalds static int		ip6_pkt_discard(struct sk_buff *skb);
92*1da177e4SLinus Torvalds static int		ip6_pkt_discard_out(struct sk_buff *skb);
93*1da177e4SLinus Torvalds static void		ip6_link_failure(struct sk_buff *skb);
94*1da177e4SLinus Torvalds static void		ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
95*1da177e4SLinus Torvalds 
96*1da177e4SLinus Torvalds static struct dst_ops ip6_dst_ops = {
97*1da177e4SLinus Torvalds 	.family			=	AF_INET6,
98*1da177e4SLinus Torvalds 	.protocol		=	__constant_htons(ETH_P_IPV6),
99*1da177e4SLinus Torvalds 	.gc			=	ip6_dst_gc,
100*1da177e4SLinus Torvalds 	.gc_thresh		=	1024,
101*1da177e4SLinus Torvalds 	.check			=	ip6_dst_check,
102*1da177e4SLinus Torvalds 	.destroy		=	ip6_dst_destroy,
103*1da177e4SLinus Torvalds 	.ifdown			=	ip6_dst_ifdown,
104*1da177e4SLinus Torvalds 	.negative_advice	=	ip6_negative_advice,
105*1da177e4SLinus Torvalds 	.link_failure		=	ip6_link_failure,
106*1da177e4SLinus Torvalds 	.update_pmtu		=	ip6_rt_update_pmtu,
107*1da177e4SLinus Torvalds 	.entry_size		=	sizeof(struct rt6_info),
108*1da177e4SLinus Torvalds };
109*1da177e4SLinus Torvalds 
110*1da177e4SLinus Torvalds struct rt6_info ip6_null_entry = {
111*1da177e4SLinus Torvalds 	.u = {
112*1da177e4SLinus Torvalds 		.dst = {
113*1da177e4SLinus Torvalds 			.__refcnt	= ATOMIC_INIT(1),
114*1da177e4SLinus Torvalds 			.__use		= 1,
115*1da177e4SLinus Torvalds 			.dev		= &loopback_dev,
116*1da177e4SLinus Torvalds 			.obsolete	= -1,
117*1da177e4SLinus Torvalds 			.error		= -ENETUNREACH,
118*1da177e4SLinus Torvalds 			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
119*1da177e4SLinus Torvalds 			.input		= ip6_pkt_discard,
120*1da177e4SLinus Torvalds 			.output		= ip6_pkt_discard_out,
121*1da177e4SLinus Torvalds 			.ops		= &ip6_dst_ops,
122*1da177e4SLinus Torvalds 			.path		= (struct dst_entry*)&ip6_null_entry,
123*1da177e4SLinus Torvalds 		}
124*1da177e4SLinus Torvalds 	},
125*1da177e4SLinus Torvalds 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
126*1da177e4SLinus Torvalds 	.rt6i_metric	= ~(u32) 0,
127*1da177e4SLinus Torvalds 	.rt6i_ref	= ATOMIC_INIT(1),
128*1da177e4SLinus Torvalds };
129*1da177e4SLinus Torvalds 
130*1da177e4SLinus Torvalds struct fib6_node ip6_routing_table = {
131*1da177e4SLinus Torvalds 	.leaf		= &ip6_null_entry,
132*1da177e4SLinus Torvalds 	.fn_flags	= RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
133*1da177e4SLinus Torvalds };
134*1da177e4SLinus Torvalds 
135*1da177e4SLinus Torvalds /* Protects all the ip6 fib */
136*1da177e4SLinus Torvalds 
137*1da177e4SLinus Torvalds DEFINE_RWLOCK(rt6_lock);
138*1da177e4SLinus Torvalds 
139*1da177e4SLinus Torvalds 
140*1da177e4SLinus Torvalds /* allocate dst with ip6_dst_ops */
141*1da177e4SLinus Torvalds static __inline__ struct rt6_info *ip6_dst_alloc(void)
142*1da177e4SLinus Torvalds {
143*1da177e4SLinus Torvalds 	return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
144*1da177e4SLinus Torvalds }
145*1da177e4SLinus Torvalds 
146*1da177e4SLinus Torvalds static void ip6_dst_destroy(struct dst_entry *dst)
147*1da177e4SLinus Torvalds {
148*1da177e4SLinus Torvalds 	struct rt6_info *rt = (struct rt6_info *)dst;
149*1da177e4SLinus Torvalds 	struct inet6_dev *idev = rt->rt6i_idev;
150*1da177e4SLinus Torvalds 
151*1da177e4SLinus Torvalds 	if (idev != NULL) {
152*1da177e4SLinus Torvalds 		rt->rt6i_idev = NULL;
153*1da177e4SLinus Torvalds 		in6_dev_put(idev);
154*1da177e4SLinus Torvalds 	}
155*1da177e4SLinus Torvalds }
156*1da177e4SLinus Torvalds 
157*1da177e4SLinus Torvalds static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
158*1da177e4SLinus Torvalds 			   int how)
159*1da177e4SLinus Torvalds {
160*1da177e4SLinus Torvalds 	struct rt6_info *rt = (struct rt6_info *)dst;
161*1da177e4SLinus Torvalds 	struct inet6_dev *idev = rt->rt6i_idev;
162*1da177e4SLinus Torvalds 
163*1da177e4SLinus Torvalds 	if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
164*1da177e4SLinus Torvalds 		struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
165*1da177e4SLinus Torvalds 		if (loopback_idev != NULL) {
166*1da177e4SLinus Torvalds 			rt->rt6i_idev = loopback_idev;
167*1da177e4SLinus Torvalds 			in6_dev_put(idev);
168*1da177e4SLinus Torvalds 		}
169*1da177e4SLinus Torvalds 	}
170*1da177e4SLinus Torvalds }
171*1da177e4SLinus Torvalds 
172*1da177e4SLinus Torvalds static __inline__ int rt6_check_expired(const struct rt6_info *rt)
173*1da177e4SLinus Torvalds {
174*1da177e4SLinus Torvalds 	return (rt->rt6i_flags & RTF_EXPIRES &&
175*1da177e4SLinus Torvalds 		time_after(jiffies, rt->rt6i_expires));
176*1da177e4SLinus Torvalds }
177*1da177e4SLinus Torvalds 
178*1da177e4SLinus Torvalds /*
179*1da177e4SLinus Torvalds  *	Route lookup. Any rt6_lock is implied.
180*1da177e4SLinus Torvalds  */
181*1da177e4SLinus Torvalds 
182*1da177e4SLinus Torvalds static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
183*1da177e4SLinus Torvalds 						    int oif,
184*1da177e4SLinus Torvalds 						    int strict)
185*1da177e4SLinus Torvalds {
186*1da177e4SLinus Torvalds 	struct rt6_info *local = NULL;
187*1da177e4SLinus Torvalds 	struct rt6_info *sprt;
188*1da177e4SLinus Torvalds 
189*1da177e4SLinus Torvalds 	if (oif) {
190*1da177e4SLinus Torvalds 		for (sprt = rt; sprt; sprt = sprt->u.next) {
191*1da177e4SLinus Torvalds 			struct net_device *dev = sprt->rt6i_dev;
192*1da177e4SLinus Torvalds 			if (dev->ifindex == oif)
193*1da177e4SLinus Torvalds 				return sprt;
194*1da177e4SLinus Torvalds 			if (dev->flags & IFF_LOOPBACK) {
195*1da177e4SLinus Torvalds 				if (sprt->rt6i_idev == NULL ||
196*1da177e4SLinus Torvalds 				    sprt->rt6i_idev->dev->ifindex != oif) {
197*1da177e4SLinus Torvalds 					if (strict && oif)
198*1da177e4SLinus Torvalds 						continue;
199*1da177e4SLinus Torvalds 					if (local && (!oif ||
200*1da177e4SLinus Torvalds 						      local->rt6i_idev->dev->ifindex == oif))
201*1da177e4SLinus Torvalds 						continue;
202*1da177e4SLinus Torvalds 				}
203*1da177e4SLinus Torvalds 				local = sprt;
204*1da177e4SLinus Torvalds 			}
205*1da177e4SLinus Torvalds 		}
206*1da177e4SLinus Torvalds 
207*1da177e4SLinus Torvalds 		if (local)
208*1da177e4SLinus Torvalds 			return local;
209*1da177e4SLinus Torvalds 
210*1da177e4SLinus Torvalds 		if (strict)
211*1da177e4SLinus Torvalds 			return &ip6_null_entry;
212*1da177e4SLinus Torvalds 	}
213*1da177e4SLinus Torvalds 	return rt;
214*1da177e4SLinus Torvalds }
215*1da177e4SLinus Torvalds 
216*1da177e4SLinus Torvalds /*
217*1da177e4SLinus Torvalds  *	pointer to the last default router chosen. BH is disabled locally.
218*1da177e4SLinus Torvalds  */
219*1da177e4SLinus Torvalds static struct rt6_info *rt6_dflt_pointer;
220*1da177e4SLinus Torvalds static DEFINE_SPINLOCK(rt6_dflt_lock);
221*1da177e4SLinus Torvalds 
222*1da177e4SLinus Torvalds void rt6_reset_dflt_pointer(struct rt6_info *rt)
223*1da177e4SLinus Torvalds {
224*1da177e4SLinus Torvalds 	spin_lock_bh(&rt6_dflt_lock);
225*1da177e4SLinus Torvalds 	if (rt == NULL || rt == rt6_dflt_pointer) {
226*1da177e4SLinus Torvalds 		RT6_TRACE("reset default router: %p->NULL\n", rt6_dflt_pointer);
227*1da177e4SLinus Torvalds 		rt6_dflt_pointer = NULL;
228*1da177e4SLinus Torvalds 	}
229*1da177e4SLinus Torvalds 	spin_unlock_bh(&rt6_dflt_lock);
230*1da177e4SLinus Torvalds }
231*1da177e4SLinus Torvalds 
232*1da177e4SLinus Torvalds /* Default Router Selection (RFC 2461 6.3.6) */
233*1da177e4SLinus Torvalds static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif)
234*1da177e4SLinus Torvalds {
235*1da177e4SLinus Torvalds 	struct rt6_info *match = NULL;
236*1da177e4SLinus Torvalds 	struct rt6_info *sprt;
237*1da177e4SLinus Torvalds 	int mpri = 0;
238*1da177e4SLinus Torvalds 
239*1da177e4SLinus Torvalds 	for (sprt = rt; sprt; sprt = sprt->u.next) {
240*1da177e4SLinus Torvalds 		struct neighbour *neigh;
241*1da177e4SLinus Torvalds 		int m = 0;
242*1da177e4SLinus Torvalds 
243*1da177e4SLinus Torvalds 		if (!oif ||
244*1da177e4SLinus Torvalds 		    (sprt->rt6i_dev &&
245*1da177e4SLinus Torvalds 		     sprt->rt6i_dev->ifindex == oif))
246*1da177e4SLinus Torvalds 			m += 8;
247*1da177e4SLinus Torvalds 
248*1da177e4SLinus Torvalds 		if (rt6_check_expired(sprt))
249*1da177e4SLinus Torvalds 			continue;
250*1da177e4SLinus Torvalds 
251*1da177e4SLinus Torvalds 		if (sprt == rt6_dflt_pointer)
252*1da177e4SLinus Torvalds 			m += 4;
253*1da177e4SLinus Torvalds 
254*1da177e4SLinus Torvalds 		if ((neigh = sprt->rt6i_nexthop) != NULL) {
255*1da177e4SLinus Torvalds 			read_lock_bh(&neigh->lock);
256*1da177e4SLinus Torvalds 			switch (neigh->nud_state) {
257*1da177e4SLinus Torvalds 			case NUD_REACHABLE:
258*1da177e4SLinus Torvalds 				m += 3;
259*1da177e4SLinus Torvalds 				break;
260*1da177e4SLinus Torvalds 
261*1da177e4SLinus Torvalds 			case NUD_STALE:
262*1da177e4SLinus Torvalds 			case NUD_DELAY:
263*1da177e4SLinus Torvalds 			case NUD_PROBE:
264*1da177e4SLinus Torvalds 				m += 2;
265*1da177e4SLinus Torvalds 				break;
266*1da177e4SLinus Torvalds 
267*1da177e4SLinus Torvalds 			case NUD_NOARP:
268*1da177e4SLinus Torvalds 			case NUD_PERMANENT:
269*1da177e4SLinus Torvalds 				m += 1;
270*1da177e4SLinus Torvalds 				break;
271*1da177e4SLinus Torvalds 
272*1da177e4SLinus Torvalds 			case NUD_INCOMPLETE:
273*1da177e4SLinus Torvalds 			default:
274*1da177e4SLinus Torvalds 				read_unlock_bh(&neigh->lock);
275*1da177e4SLinus Torvalds 				continue;
276*1da177e4SLinus Torvalds 			}
277*1da177e4SLinus Torvalds 			read_unlock_bh(&neigh->lock);
278*1da177e4SLinus Torvalds 		} else {
279*1da177e4SLinus Torvalds 			continue;
280*1da177e4SLinus Torvalds 		}
281*1da177e4SLinus Torvalds 
282*1da177e4SLinus Torvalds 		if (m > mpri || m >= 12) {
283*1da177e4SLinus Torvalds 			match = sprt;
284*1da177e4SLinus Torvalds 			mpri = m;
285*1da177e4SLinus Torvalds 			if (m >= 12) {
286*1da177e4SLinus Torvalds 				/* we choose the last default router if it
287*1da177e4SLinus Torvalds 				 * is in (probably) reachable state.
288*1da177e4SLinus Torvalds 				 * If route changed, we should do pmtu
289*1da177e4SLinus Torvalds 				 * discovery. --yoshfuji
290*1da177e4SLinus Torvalds 				 */
291*1da177e4SLinus Torvalds 				break;
292*1da177e4SLinus Torvalds 			}
293*1da177e4SLinus Torvalds 		}
294*1da177e4SLinus Torvalds 	}
295*1da177e4SLinus Torvalds 
296*1da177e4SLinus Torvalds 	spin_lock(&rt6_dflt_lock);
297*1da177e4SLinus Torvalds 	if (!match) {
298*1da177e4SLinus Torvalds 		/*
299*1da177e4SLinus Torvalds 		 *	No default routers are known to be reachable.
300*1da177e4SLinus Torvalds 		 *	SHOULD round robin
301*1da177e4SLinus Torvalds 		 */
302*1da177e4SLinus Torvalds 		if (rt6_dflt_pointer) {
303*1da177e4SLinus Torvalds 			for (sprt = rt6_dflt_pointer->u.next;
304*1da177e4SLinus Torvalds 			     sprt; sprt = sprt->u.next) {
305*1da177e4SLinus Torvalds 				if (sprt->u.dst.obsolete <= 0 &&
306*1da177e4SLinus Torvalds 				    sprt->u.dst.error == 0 &&
307*1da177e4SLinus Torvalds 				    !rt6_check_expired(sprt)) {
308*1da177e4SLinus Torvalds 					match = sprt;
309*1da177e4SLinus Torvalds 					break;
310*1da177e4SLinus Torvalds 				}
311*1da177e4SLinus Torvalds 			}
312*1da177e4SLinus Torvalds 			for (sprt = rt;
313*1da177e4SLinus Torvalds 			     !match && sprt;
314*1da177e4SLinus Torvalds 			     sprt = sprt->u.next) {
315*1da177e4SLinus Torvalds 				if (sprt->u.dst.obsolete <= 0 &&
316*1da177e4SLinus Torvalds 				    sprt->u.dst.error == 0 &&
317*1da177e4SLinus Torvalds 				    !rt6_check_expired(sprt)) {
318*1da177e4SLinus Torvalds 					match = sprt;
319*1da177e4SLinus Torvalds 					break;
320*1da177e4SLinus Torvalds 				}
321*1da177e4SLinus Torvalds 				if (sprt == rt6_dflt_pointer)
322*1da177e4SLinus Torvalds 					break;
323*1da177e4SLinus Torvalds 			}
324*1da177e4SLinus Torvalds 		}
325*1da177e4SLinus Torvalds 	}
326*1da177e4SLinus Torvalds 
327*1da177e4SLinus Torvalds 	if (match) {
328*1da177e4SLinus Torvalds 		if (rt6_dflt_pointer != match)
329*1da177e4SLinus Torvalds 			RT6_TRACE("changed default router: %p->%p\n",
330*1da177e4SLinus Torvalds 				  rt6_dflt_pointer, match);
331*1da177e4SLinus Torvalds 		rt6_dflt_pointer = match;
332*1da177e4SLinus Torvalds 	}
333*1da177e4SLinus Torvalds 	spin_unlock(&rt6_dflt_lock);
334*1da177e4SLinus Torvalds 
335*1da177e4SLinus Torvalds 	if (!match) {
336*1da177e4SLinus Torvalds 		/*
337*1da177e4SLinus Torvalds 		 * Last Resort: if no default routers found,
338*1da177e4SLinus Torvalds 		 * use addrconf default route.
339*1da177e4SLinus Torvalds 		 * We don't record this route.
340*1da177e4SLinus Torvalds 		 */
341*1da177e4SLinus Torvalds 		for (sprt = ip6_routing_table.leaf;
342*1da177e4SLinus Torvalds 		     sprt; sprt = sprt->u.next) {
343*1da177e4SLinus Torvalds 			if (!rt6_check_expired(sprt) &&
344*1da177e4SLinus Torvalds 			    (sprt->rt6i_flags & RTF_DEFAULT) &&
345*1da177e4SLinus Torvalds 			    (!oif ||
346*1da177e4SLinus Torvalds 			     (sprt->rt6i_dev &&
347*1da177e4SLinus Torvalds 			      sprt->rt6i_dev->ifindex == oif))) {
348*1da177e4SLinus Torvalds 				match = sprt;
349*1da177e4SLinus Torvalds 				break;
350*1da177e4SLinus Torvalds 			}
351*1da177e4SLinus Torvalds 		}
352*1da177e4SLinus Torvalds 		if (!match) {
353*1da177e4SLinus Torvalds 			/* no default route.  give up. */
354*1da177e4SLinus Torvalds 			match = &ip6_null_entry;
355*1da177e4SLinus Torvalds 		}
356*1da177e4SLinus Torvalds 	}
357*1da177e4SLinus Torvalds 
358*1da177e4SLinus Torvalds 	return match;
359*1da177e4SLinus Torvalds }
360*1da177e4SLinus Torvalds 
361*1da177e4SLinus Torvalds struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
362*1da177e4SLinus Torvalds 			    int oif, int strict)
363*1da177e4SLinus Torvalds {
364*1da177e4SLinus Torvalds 	struct fib6_node *fn;
365*1da177e4SLinus Torvalds 	struct rt6_info *rt;
366*1da177e4SLinus Torvalds 
367*1da177e4SLinus Torvalds 	read_lock_bh(&rt6_lock);
368*1da177e4SLinus Torvalds 	fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
369*1da177e4SLinus Torvalds 	rt = rt6_device_match(fn->leaf, oif, strict);
370*1da177e4SLinus Torvalds 	dst_hold(&rt->u.dst);
371*1da177e4SLinus Torvalds 	rt->u.dst.__use++;
372*1da177e4SLinus Torvalds 	read_unlock_bh(&rt6_lock);
373*1da177e4SLinus Torvalds 
374*1da177e4SLinus Torvalds 	rt->u.dst.lastuse = jiffies;
375*1da177e4SLinus Torvalds 	if (rt->u.dst.error == 0)
376*1da177e4SLinus Torvalds 		return rt;
377*1da177e4SLinus Torvalds 	dst_release(&rt->u.dst);
378*1da177e4SLinus Torvalds 	return NULL;
379*1da177e4SLinus Torvalds }
380*1da177e4SLinus Torvalds 
381*1da177e4SLinus Torvalds /* ip6_ins_rt is called with FREE rt6_lock.
382*1da177e4SLinus Torvalds    It takes new route entry, the addition fails by any reason the
383*1da177e4SLinus Torvalds    route is freed. In any case, if caller does not hold it, it may
384*1da177e4SLinus Torvalds    be destroyed.
385*1da177e4SLinus Torvalds  */
386*1da177e4SLinus Torvalds 
387*1da177e4SLinus Torvalds int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
388*1da177e4SLinus Torvalds {
389*1da177e4SLinus Torvalds 	int err;
390*1da177e4SLinus Torvalds 
391*1da177e4SLinus Torvalds 	write_lock_bh(&rt6_lock);
392*1da177e4SLinus Torvalds 	err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr);
393*1da177e4SLinus Torvalds 	write_unlock_bh(&rt6_lock);
394*1da177e4SLinus Torvalds 
395*1da177e4SLinus Torvalds 	return err;
396*1da177e4SLinus Torvalds }
397*1da177e4SLinus Torvalds 
398*1da177e4SLinus Torvalds /* No rt6_lock! If COW failed, the function returns dead route entry
399*1da177e4SLinus Torvalds    with dst->error set to errno value.
400*1da177e4SLinus Torvalds  */
401*1da177e4SLinus Torvalds 
402*1da177e4SLinus Torvalds static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
403*1da177e4SLinus Torvalds 				struct in6_addr *saddr)
404*1da177e4SLinus Torvalds {
405*1da177e4SLinus Torvalds 	int err;
406*1da177e4SLinus Torvalds 	struct rt6_info *rt;
407*1da177e4SLinus Torvalds 
408*1da177e4SLinus Torvalds 	/*
409*1da177e4SLinus Torvalds 	 *	Clone the route.
410*1da177e4SLinus Torvalds 	 */
411*1da177e4SLinus Torvalds 
412*1da177e4SLinus Torvalds 	rt = ip6_rt_copy(ort);
413*1da177e4SLinus Torvalds 
414*1da177e4SLinus Torvalds 	if (rt) {
415*1da177e4SLinus Torvalds 		ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
416*1da177e4SLinus Torvalds 
417*1da177e4SLinus Torvalds 		if (!(rt->rt6i_flags&RTF_GATEWAY))
418*1da177e4SLinus Torvalds 			ipv6_addr_copy(&rt->rt6i_gateway, daddr);
419*1da177e4SLinus Torvalds 
420*1da177e4SLinus Torvalds 		rt->rt6i_dst.plen = 128;
421*1da177e4SLinus Torvalds 		rt->rt6i_flags |= RTF_CACHE;
422*1da177e4SLinus Torvalds 		rt->u.dst.flags |= DST_HOST;
423*1da177e4SLinus Torvalds 
424*1da177e4SLinus Torvalds #ifdef CONFIG_IPV6_SUBTREES
425*1da177e4SLinus Torvalds 		if (rt->rt6i_src.plen && saddr) {
426*1da177e4SLinus Torvalds 			ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
427*1da177e4SLinus Torvalds 			rt->rt6i_src.plen = 128;
428*1da177e4SLinus Torvalds 		}
429*1da177e4SLinus Torvalds #endif
430*1da177e4SLinus Torvalds 
431*1da177e4SLinus Torvalds 		rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
432*1da177e4SLinus Torvalds 
433*1da177e4SLinus Torvalds 		dst_hold(&rt->u.dst);
434*1da177e4SLinus Torvalds 
435*1da177e4SLinus Torvalds 		err = ip6_ins_rt(rt, NULL, NULL);
436*1da177e4SLinus Torvalds 		if (err == 0)
437*1da177e4SLinus Torvalds 			return rt;
438*1da177e4SLinus Torvalds 
439*1da177e4SLinus Torvalds 		rt->u.dst.error = err;
440*1da177e4SLinus Torvalds 
441*1da177e4SLinus Torvalds 		return rt;
442*1da177e4SLinus Torvalds 	}
443*1da177e4SLinus Torvalds 	dst_hold(&ip6_null_entry.u.dst);
444*1da177e4SLinus Torvalds 	return &ip6_null_entry;
445*1da177e4SLinus Torvalds }
446*1da177e4SLinus Torvalds 
447*1da177e4SLinus Torvalds #define BACKTRACK() \
448*1da177e4SLinus Torvalds if (rt == &ip6_null_entry && strict) { \
449*1da177e4SLinus Torvalds        while ((fn = fn->parent) != NULL) { \
450*1da177e4SLinus Torvalds 		if (fn->fn_flags & RTN_ROOT) { \
451*1da177e4SLinus Torvalds 			dst_hold(&rt->u.dst); \
452*1da177e4SLinus Torvalds 			goto out; \
453*1da177e4SLinus Torvalds 		} \
454*1da177e4SLinus Torvalds 		if (fn->fn_flags & RTN_RTINFO) \
455*1da177e4SLinus Torvalds 			goto restart; \
456*1da177e4SLinus Torvalds 	} \
457*1da177e4SLinus Torvalds }
458*1da177e4SLinus Torvalds 
459*1da177e4SLinus Torvalds 
460*1da177e4SLinus Torvalds void ip6_route_input(struct sk_buff *skb)
461*1da177e4SLinus Torvalds {
462*1da177e4SLinus Torvalds 	struct fib6_node *fn;
463*1da177e4SLinus Torvalds 	struct rt6_info *rt;
464*1da177e4SLinus Torvalds 	int strict;
465*1da177e4SLinus Torvalds 	int attempts = 3;
466*1da177e4SLinus Torvalds 
467*1da177e4SLinus Torvalds 	strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
468*1da177e4SLinus Torvalds 
469*1da177e4SLinus Torvalds relookup:
470*1da177e4SLinus Torvalds 	read_lock_bh(&rt6_lock);
471*1da177e4SLinus Torvalds 
472*1da177e4SLinus Torvalds 	fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
473*1da177e4SLinus Torvalds 			 &skb->nh.ipv6h->saddr);
474*1da177e4SLinus Torvalds 
475*1da177e4SLinus Torvalds restart:
476*1da177e4SLinus Torvalds 	rt = fn->leaf;
477*1da177e4SLinus Torvalds 
478*1da177e4SLinus Torvalds 	if ((rt->rt6i_flags & RTF_CACHE)) {
479*1da177e4SLinus Torvalds 		rt = rt6_device_match(rt, skb->dev->ifindex, strict);
480*1da177e4SLinus Torvalds 		BACKTRACK();
481*1da177e4SLinus Torvalds 		dst_hold(&rt->u.dst);
482*1da177e4SLinus Torvalds 		goto out;
483*1da177e4SLinus Torvalds 	}
484*1da177e4SLinus Torvalds 
485*1da177e4SLinus Torvalds 	rt = rt6_device_match(rt, skb->dev->ifindex, 0);
486*1da177e4SLinus Torvalds 	BACKTRACK();
487*1da177e4SLinus Torvalds 
488*1da177e4SLinus Torvalds 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
489*1da177e4SLinus Torvalds 		struct rt6_info *nrt;
490*1da177e4SLinus Torvalds 		dst_hold(&rt->u.dst);
491*1da177e4SLinus Torvalds 		read_unlock_bh(&rt6_lock);
492*1da177e4SLinus Torvalds 
493*1da177e4SLinus Torvalds 		nrt = rt6_cow(rt, &skb->nh.ipv6h->daddr,
494*1da177e4SLinus Torvalds 			      &skb->nh.ipv6h->saddr);
495*1da177e4SLinus Torvalds 
496*1da177e4SLinus Torvalds 		dst_release(&rt->u.dst);
497*1da177e4SLinus Torvalds 		rt = nrt;
498*1da177e4SLinus Torvalds 
499*1da177e4SLinus Torvalds 		if (rt->u.dst.error != -EEXIST || --attempts <= 0)
500*1da177e4SLinus Torvalds 			goto out2;
501*1da177e4SLinus Torvalds 
502*1da177e4SLinus Torvalds 		/* Race condition! In the gap, when rt6_lock was
503*1da177e4SLinus Torvalds 		   released someone could insert this route.  Relookup.
504*1da177e4SLinus Torvalds 		*/
505*1da177e4SLinus Torvalds 		dst_release(&rt->u.dst);
506*1da177e4SLinus Torvalds 		goto relookup;
507*1da177e4SLinus Torvalds 	}
508*1da177e4SLinus Torvalds 	dst_hold(&rt->u.dst);
509*1da177e4SLinus Torvalds 
510*1da177e4SLinus Torvalds out:
511*1da177e4SLinus Torvalds 	read_unlock_bh(&rt6_lock);
512*1da177e4SLinus Torvalds out2:
513*1da177e4SLinus Torvalds 	rt->u.dst.lastuse = jiffies;
514*1da177e4SLinus Torvalds 	rt->u.dst.__use++;
515*1da177e4SLinus Torvalds 	skb->dst = (struct dst_entry *) rt;
516*1da177e4SLinus Torvalds }
517*1da177e4SLinus Torvalds 
518*1da177e4SLinus Torvalds struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
519*1da177e4SLinus Torvalds {
520*1da177e4SLinus Torvalds 	struct fib6_node *fn;
521*1da177e4SLinus Torvalds 	struct rt6_info *rt;
522*1da177e4SLinus Torvalds 	int strict;
523*1da177e4SLinus Torvalds 	int attempts = 3;
524*1da177e4SLinus Torvalds 
525*1da177e4SLinus Torvalds 	strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
526*1da177e4SLinus Torvalds 
527*1da177e4SLinus Torvalds relookup:
528*1da177e4SLinus Torvalds 	read_lock_bh(&rt6_lock);
529*1da177e4SLinus Torvalds 
530*1da177e4SLinus Torvalds 	fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
531*1da177e4SLinus Torvalds 
532*1da177e4SLinus Torvalds restart:
533*1da177e4SLinus Torvalds 	rt = fn->leaf;
534*1da177e4SLinus Torvalds 
535*1da177e4SLinus Torvalds 	if ((rt->rt6i_flags & RTF_CACHE)) {
536*1da177e4SLinus Torvalds 		rt = rt6_device_match(rt, fl->oif, strict);
537*1da177e4SLinus Torvalds 		BACKTRACK();
538*1da177e4SLinus Torvalds 		dst_hold(&rt->u.dst);
539*1da177e4SLinus Torvalds 		goto out;
540*1da177e4SLinus Torvalds 	}
541*1da177e4SLinus Torvalds 	if (rt->rt6i_flags & RTF_DEFAULT) {
542*1da177e4SLinus Torvalds 		if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF)
543*1da177e4SLinus Torvalds 			rt = rt6_best_dflt(rt, fl->oif);
544*1da177e4SLinus Torvalds 	} else {
545*1da177e4SLinus Torvalds 		rt = rt6_device_match(rt, fl->oif, strict);
546*1da177e4SLinus Torvalds 		BACKTRACK();
547*1da177e4SLinus Torvalds 	}
548*1da177e4SLinus Torvalds 
549*1da177e4SLinus Torvalds 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
550*1da177e4SLinus Torvalds 		struct rt6_info *nrt;
551*1da177e4SLinus Torvalds 		dst_hold(&rt->u.dst);
552*1da177e4SLinus Torvalds 		read_unlock_bh(&rt6_lock);
553*1da177e4SLinus Torvalds 
554*1da177e4SLinus Torvalds 		nrt = rt6_cow(rt, &fl->fl6_dst, &fl->fl6_src);
555*1da177e4SLinus Torvalds 
556*1da177e4SLinus Torvalds 		dst_release(&rt->u.dst);
557*1da177e4SLinus Torvalds 		rt = nrt;
558*1da177e4SLinus Torvalds 
559*1da177e4SLinus Torvalds 		if (rt->u.dst.error != -EEXIST || --attempts <= 0)
560*1da177e4SLinus Torvalds 			goto out2;
561*1da177e4SLinus Torvalds 
562*1da177e4SLinus Torvalds 		/* Race condition! In the gap, when rt6_lock was
563*1da177e4SLinus Torvalds 		   released someone could insert this route.  Relookup.
564*1da177e4SLinus Torvalds 		*/
565*1da177e4SLinus Torvalds 		dst_release(&rt->u.dst);
566*1da177e4SLinus Torvalds 		goto relookup;
567*1da177e4SLinus Torvalds 	}
568*1da177e4SLinus Torvalds 	dst_hold(&rt->u.dst);
569*1da177e4SLinus Torvalds 
570*1da177e4SLinus Torvalds out:
571*1da177e4SLinus Torvalds 	read_unlock_bh(&rt6_lock);
572*1da177e4SLinus Torvalds out2:
573*1da177e4SLinus Torvalds 	rt->u.dst.lastuse = jiffies;
574*1da177e4SLinus Torvalds 	rt->u.dst.__use++;
575*1da177e4SLinus Torvalds 	return &rt->u.dst;
576*1da177e4SLinus Torvalds }
577*1da177e4SLinus Torvalds 
578*1da177e4SLinus Torvalds 
579*1da177e4SLinus Torvalds /*
580*1da177e4SLinus Torvalds  *	Destination cache support functions
581*1da177e4SLinus Torvalds  */
582*1da177e4SLinus Torvalds 
583*1da177e4SLinus Torvalds static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
584*1da177e4SLinus Torvalds {
585*1da177e4SLinus Torvalds 	struct rt6_info *rt;
586*1da177e4SLinus Torvalds 
587*1da177e4SLinus Torvalds 	rt = (struct rt6_info *) dst;
588*1da177e4SLinus Torvalds 
589*1da177e4SLinus Torvalds 	if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
590*1da177e4SLinus Torvalds 		return dst;
591*1da177e4SLinus Torvalds 
592*1da177e4SLinus Torvalds 	return NULL;
593*1da177e4SLinus Torvalds }
594*1da177e4SLinus Torvalds 
595*1da177e4SLinus Torvalds static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
596*1da177e4SLinus Torvalds {
597*1da177e4SLinus Torvalds 	struct rt6_info *rt = (struct rt6_info *) dst;
598*1da177e4SLinus Torvalds 
599*1da177e4SLinus Torvalds 	if (rt) {
600*1da177e4SLinus Torvalds 		if (rt->rt6i_flags & RTF_CACHE)
601*1da177e4SLinus Torvalds 			ip6_del_rt(rt, NULL, NULL);
602*1da177e4SLinus Torvalds 		else
603*1da177e4SLinus Torvalds 			dst_release(dst);
604*1da177e4SLinus Torvalds 	}
605*1da177e4SLinus Torvalds 	return NULL;
606*1da177e4SLinus Torvalds }
607*1da177e4SLinus Torvalds 
608*1da177e4SLinus Torvalds static void ip6_link_failure(struct sk_buff *skb)
609*1da177e4SLinus Torvalds {
610*1da177e4SLinus Torvalds 	struct rt6_info *rt;
611*1da177e4SLinus Torvalds 
612*1da177e4SLinus Torvalds 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
613*1da177e4SLinus Torvalds 
614*1da177e4SLinus Torvalds 	rt = (struct rt6_info *) skb->dst;
615*1da177e4SLinus Torvalds 	if (rt) {
616*1da177e4SLinus Torvalds 		if (rt->rt6i_flags&RTF_CACHE) {
617*1da177e4SLinus Torvalds 			dst_set_expires(&rt->u.dst, 0);
618*1da177e4SLinus Torvalds 			rt->rt6i_flags |= RTF_EXPIRES;
619*1da177e4SLinus Torvalds 		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
620*1da177e4SLinus Torvalds 			rt->rt6i_node->fn_sernum = -1;
621*1da177e4SLinus Torvalds 	}
622*1da177e4SLinus Torvalds }
623*1da177e4SLinus Torvalds 
624*1da177e4SLinus Torvalds static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
625*1da177e4SLinus Torvalds {
626*1da177e4SLinus Torvalds 	struct rt6_info *rt6 = (struct rt6_info*)dst;
627*1da177e4SLinus Torvalds 
628*1da177e4SLinus Torvalds 	if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
629*1da177e4SLinus Torvalds 		rt6->rt6i_flags |= RTF_MODIFIED;
630*1da177e4SLinus Torvalds 		if (mtu < IPV6_MIN_MTU) {
631*1da177e4SLinus Torvalds 			mtu = IPV6_MIN_MTU;
632*1da177e4SLinus Torvalds 			dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
633*1da177e4SLinus Torvalds 		}
634*1da177e4SLinus Torvalds 		dst->metrics[RTAX_MTU-1] = mtu;
635*1da177e4SLinus Torvalds 	}
636*1da177e4SLinus Torvalds }
637*1da177e4SLinus Torvalds 
638*1da177e4SLinus Torvalds /* Protected by rt6_lock.  */
639*1da177e4SLinus Torvalds static struct dst_entry *ndisc_dst_gc_list;
640*1da177e4SLinus Torvalds static int ipv6_get_mtu(struct net_device *dev);
641*1da177e4SLinus Torvalds 
642*1da177e4SLinus Torvalds static inline unsigned int ipv6_advmss(unsigned int mtu)
643*1da177e4SLinus Torvalds {
644*1da177e4SLinus Torvalds 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
645*1da177e4SLinus Torvalds 
646*1da177e4SLinus Torvalds 	if (mtu < ip6_rt_min_advmss)
647*1da177e4SLinus Torvalds 		mtu = ip6_rt_min_advmss;
648*1da177e4SLinus Torvalds 
649*1da177e4SLinus Torvalds 	/*
650*1da177e4SLinus Torvalds 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
651*1da177e4SLinus Torvalds 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
652*1da177e4SLinus Torvalds 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
653*1da177e4SLinus Torvalds 	 * rely only on pmtu discovery"
654*1da177e4SLinus Torvalds 	 */
655*1da177e4SLinus Torvalds 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
656*1da177e4SLinus Torvalds 		mtu = IPV6_MAXPLEN;
657*1da177e4SLinus Torvalds 	return mtu;
658*1da177e4SLinus Torvalds }
659*1da177e4SLinus Torvalds 
660*1da177e4SLinus Torvalds struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
661*1da177e4SLinus Torvalds 				  struct neighbour *neigh,
662*1da177e4SLinus Torvalds 				  struct in6_addr *addr,
663*1da177e4SLinus Torvalds 				  int (*output)(struct sk_buff *))
664*1da177e4SLinus Torvalds {
665*1da177e4SLinus Torvalds 	struct rt6_info *rt;
666*1da177e4SLinus Torvalds 	struct inet6_dev *idev = in6_dev_get(dev);
667*1da177e4SLinus Torvalds 
668*1da177e4SLinus Torvalds 	if (unlikely(idev == NULL))
669*1da177e4SLinus Torvalds 		return NULL;
670*1da177e4SLinus Torvalds 
671*1da177e4SLinus Torvalds 	rt = ip6_dst_alloc();
672*1da177e4SLinus Torvalds 	if (unlikely(rt == NULL)) {
673*1da177e4SLinus Torvalds 		in6_dev_put(idev);
674*1da177e4SLinus Torvalds 		goto out;
675*1da177e4SLinus Torvalds 	}
676*1da177e4SLinus Torvalds 
677*1da177e4SLinus Torvalds 	dev_hold(dev);
678*1da177e4SLinus Torvalds 	if (neigh)
679*1da177e4SLinus Torvalds 		neigh_hold(neigh);
680*1da177e4SLinus Torvalds 	else
681*1da177e4SLinus Torvalds 		neigh = ndisc_get_neigh(dev, addr);
682*1da177e4SLinus Torvalds 
683*1da177e4SLinus Torvalds 	rt->rt6i_dev	  = dev;
684*1da177e4SLinus Torvalds 	rt->rt6i_idev     = idev;
685*1da177e4SLinus Torvalds 	rt->rt6i_nexthop  = neigh;
686*1da177e4SLinus Torvalds 	atomic_set(&rt->u.dst.__refcnt, 1);
687*1da177e4SLinus Torvalds 	rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
688*1da177e4SLinus Torvalds 	rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
689*1da177e4SLinus Torvalds 	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
690*1da177e4SLinus Torvalds 	rt->u.dst.output  = output;
691*1da177e4SLinus Torvalds 
692*1da177e4SLinus Torvalds #if 0	/* there's no chance to use these for ndisc */
693*1da177e4SLinus Torvalds 	rt->u.dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
694*1da177e4SLinus Torvalds 				? DST_HOST
695*1da177e4SLinus Torvalds 				: 0;
696*1da177e4SLinus Torvalds 	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
697*1da177e4SLinus Torvalds 	rt->rt6i_dst.plen = 128;
698*1da177e4SLinus Torvalds #endif
699*1da177e4SLinus Torvalds 
700*1da177e4SLinus Torvalds 	write_lock_bh(&rt6_lock);
701*1da177e4SLinus Torvalds 	rt->u.dst.next = ndisc_dst_gc_list;
702*1da177e4SLinus Torvalds 	ndisc_dst_gc_list = &rt->u.dst;
703*1da177e4SLinus Torvalds 	write_unlock_bh(&rt6_lock);
704*1da177e4SLinus Torvalds 
705*1da177e4SLinus Torvalds 	fib6_force_start_gc();
706*1da177e4SLinus Torvalds 
707*1da177e4SLinus Torvalds out:
708*1da177e4SLinus Torvalds 	return (struct dst_entry *)rt;
709*1da177e4SLinus Torvalds }
710*1da177e4SLinus Torvalds 
711*1da177e4SLinus Torvalds int ndisc_dst_gc(int *more)
712*1da177e4SLinus Torvalds {
713*1da177e4SLinus Torvalds 	struct dst_entry *dst, *next, **pprev;
714*1da177e4SLinus Torvalds 	int freed;
715*1da177e4SLinus Torvalds 
716*1da177e4SLinus Torvalds 	next = NULL;
717*1da177e4SLinus Torvalds 	pprev = &ndisc_dst_gc_list;
718*1da177e4SLinus Torvalds 	freed = 0;
719*1da177e4SLinus Torvalds 	while ((dst = *pprev) != NULL) {
720*1da177e4SLinus Torvalds 		if (!atomic_read(&dst->__refcnt)) {
721*1da177e4SLinus Torvalds 			*pprev = dst->next;
722*1da177e4SLinus Torvalds 			dst_free(dst);
723*1da177e4SLinus Torvalds 			freed++;
724*1da177e4SLinus Torvalds 		} else {
725*1da177e4SLinus Torvalds 			pprev = &dst->next;
726*1da177e4SLinus Torvalds 			(*more)++;
727*1da177e4SLinus Torvalds 		}
728*1da177e4SLinus Torvalds 	}
729*1da177e4SLinus Torvalds 
730*1da177e4SLinus Torvalds 	return freed;
731*1da177e4SLinus Torvalds }
732*1da177e4SLinus Torvalds 
733*1da177e4SLinus Torvalds static int ip6_dst_gc(void)
734*1da177e4SLinus Torvalds {
735*1da177e4SLinus Torvalds 	static unsigned expire = 30*HZ;
736*1da177e4SLinus Torvalds 	static unsigned long last_gc;
737*1da177e4SLinus Torvalds 	unsigned long now = jiffies;
738*1da177e4SLinus Torvalds 
739*1da177e4SLinus Torvalds 	if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
740*1da177e4SLinus Torvalds 	    atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
741*1da177e4SLinus Torvalds 		goto out;
742*1da177e4SLinus Torvalds 
743*1da177e4SLinus Torvalds 	expire++;
744*1da177e4SLinus Torvalds 	fib6_run_gc(expire);
745*1da177e4SLinus Torvalds 	last_gc = now;
746*1da177e4SLinus Torvalds 	if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
747*1da177e4SLinus Torvalds 		expire = ip6_rt_gc_timeout>>1;
748*1da177e4SLinus Torvalds 
749*1da177e4SLinus Torvalds out:
750*1da177e4SLinus Torvalds 	expire -= expire>>ip6_rt_gc_elasticity;
751*1da177e4SLinus Torvalds 	return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
752*1da177e4SLinus Torvalds }
753*1da177e4SLinus Torvalds 
754*1da177e4SLinus Torvalds /* Clean host part of a prefix. Not necessary in radix tree,
755*1da177e4SLinus Torvalds    but results in cleaner routing tables.
756*1da177e4SLinus Torvalds 
757*1da177e4SLinus Torvalds    Remove it only when all the things will work!
758*1da177e4SLinus Torvalds  */
759*1da177e4SLinus Torvalds 
760*1da177e4SLinus Torvalds static int ipv6_get_mtu(struct net_device *dev)
761*1da177e4SLinus Torvalds {
762*1da177e4SLinus Torvalds 	int mtu = IPV6_MIN_MTU;
763*1da177e4SLinus Torvalds 	struct inet6_dev *idev;
764*1da177e4SLinus Torvalds 
765*1da177e4SLinus Torvalds 	idev = in6_dev_get(dev);
766*1da177e4SLinus Torvalds 	if (idev) {
767*1da177e4SLinus Torvalds 		mtu = idev->cnf.mtu6;
768*1da177e4SLinus Torvalds 		in6_dev_put(idev);
769*1da177e4SLinus Torvalds 	}
770*1da177e4SLinus Torvalds 	return mtu;
771*1da177e4SLinus Torvalds }
772*1da177e4SLinus Torvalds 
773*1da177e4SLinus Torvalds int ipv6_get_hoplimit(struct net_device *dev)
774*1da177e4SLinus Torvalds {
775*1da177e4SLinus Torvalds 	int hoplimit = ipv6_devconf.hop_limit;
776*1da177e4SLinus Torvalds 	struct inet6_dev *idev;
777*1da177e4SLinus Torvalds 
778*1da177e4SLinus Torvalds 	idev = in6_dev_get(dev);
779*1da177e4SLinus Torvalds 	if (idev) {
780*1da177e4SLinus Torvalds 		hoplimit = idev->cnf.hop_limit;
781*1da177e4SLinus Torvalds 		in6_dev_put(idev);
782*1da177e4SLinus Torvalds 	}
783*1da177e4SLinus Torvalds 	return hoplimit;
784*1da177e4SLinus Torvalds }
785*1da177e4SLinus Torvalds 
786*1da177e4SLinus Torvalds /*
787*1da177e4SLinus Torvalds  *
788*1da177e4SLinus Torvalds  */
789*1da177e4SLinus Torvalds 
790*1da177e4SLinus Torvalds int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr)
791*1da177e4SLinus Torvalds {
792*1da177e4SLinus Torvalds 	int err;
793*1da177e4SLinus Torvalds 	struct rtmsg *r;
794*1da177e4SLinus Torvalds 	struct rtattr **rta;
795*1da177e4SLinus Torvalds 	struct rt6_info *rt = NULL;
796*1da177e4SLinus Torvalds 	struct net_device *dev = NULL;
797*1da177e4SLinus Torvalds 	struct inet6_dev *idev = NULL;
798*1da177e4SLinus Torvalds 	int addr_type;
799*1da177e4SLinus Torvalds 
800*1da177e4SLinus Torvalds 	rta = (struct rtattr **) _rtattr;
801*1da177e4SLinus Torvalds 
802*1da177e4SLinus Torvalds 	if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
803*1da177e4SLinus Torvalds 		return -EINVAL;
804*1da177e4SLinus Torvalds #ifndef CONFIG_IPV6_SUBTREES
805*1da177e4SLinus Torvalds 	if (rtmsg->rtmsg_src_len)
806*1da177e4SLinus Torvalds 		return -EINVAL;
807*1da177e4SLinus Torvalds #endif
808*1da177e4SLinus Torvalds 	if (rtmsg->rtmsg_ifindex) {
809*1da177e4SLinus Torvalds 		err = -ENODEV;
810*1da177e4SLinus Torvalds 		dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
811*1da177e4SLinus Torvalds 		if (!dev)
812*1da177e4SLinus Torvalds 			goto out;
813*1da177e4SLinus Torvalds 		idev = in6_dev_get(dev);
814*1da177e4SLinus Torvalds 		if (!idev)
815*1da177e4SLinus Torvalds 			goto out;
816*1da177e4SLinus Torvalds 	}
817*1da177e4SLinus Torvalds 
818*1da177e4SLinus Torvalds 	if (rtmsg->rtmsg_metric == 0)
819*1da177e4SLinus Torvalds 		rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
820*1da177e4SLinus Torvalds 
821*1da177e4SLinus Torvalds 	rt = ip6_dst_alloc();
822*1da177e4SLinus Torvalds 
823*1da177e4SLinus Torvalds 	if (rt == NULL) {
824*1da177e4SLinus Torvalds 		err = -ENOMEM;
825*1da177e4SLinus Torvalds 		goto out;
826*1da177e4SLinus Torvalds 	}
827*1da177e4SLinus Torvalds 
828*1da177e4SLinus Torvalds 	rt->u.dst.obsolete = -1;
829*1da177e4SLinus Torvalds 	rt->rt6i_expires = clock_t_to_jiffies(rtmsg->rtmsg_info);
830*1da177e4SLinus Torvalds 	if (nlh && (r = NLMSG_DATA(nlh))) {
831*1da177e4SLinus Torvalds 		rt->rt6i_protocol = r->rtm_protocol;
832*1da177e4SLinus Torvalds 	} else {
833*1da177e4SLinus Torvalds 		rt->rt6i_protocol = RTPROT_BOOT;
834*1da177e4SLinus Torvalds 	}
835*1da177e4SLinus Torvalds 
836*1da177e4SLinus Torvalds 	addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
837*1da177e4SLinus Torvalds 
838*1da177e4SLinus Torvalds 	if (addr_type & IPV6_ADDR_MULTICAST)
839*1da177e4SLinus Torvalds 		rt->u.dst.input = ip6_mc_input;
840*1da177e4SLinus Torvalds 	else
841*1da177e4SLinus Torvalds 		rt->u.dst.input = ip6_forward;
842*1da177e4SLinus Torvalds 
843*1da177e4SLinus Torvalds 	rt->u.dst.output = ip6_output;
844*1da177e4SLinus Torvalds 
845*1da177e4SLinus Torvalds 	ipv6_addr_prefix(&rt->rt6i_dst.addr,
846*1da177e4SLinus Torvalds 			 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
847*1da177e4SLinus Torvalds 	rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
848*1da177e4SLinus Torvalds 	if (rt->rt6i_dst.plen == 128)
849*1da177e4SLinus Torvalds 	       rt->u.dst.flags = DST_HOST;
850*1da177e4SLinus Torvalds 
851*1da177e4SLinus Torvalds #ifdef CONFIG_IPV6_SUBTREES
852*1da177e4SLinus Torvalds 	ipv6_addr_prefix(&rt->rt6i_src.addr,
853*1da177e4SLinus Torvalds 			 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
854*1da177e4SLinus Torvalds 	rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
855*1da177e4SLinus Torvalds #endif
856*1da177e4SLinus Torvalds 
857*1da177e4SLinus Torvalds 	rt->rt6i_metric = rtmsg->rtmsg_metric;
858*1da177e4SLinus Torvalds 
859*1da177e4SLinus Torvalds 	/* We cannot add true routes via loopback here,
860*1da177e4SLinus Torvalds 	   they would result in kernel looping; promote them to reject routes
861*1da177e4SLinus Torvalds 	 */
862*1da177e4SLinus Torvalds 	if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
863*1da177e4SLinus Torvalds 	    (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
864*1da177e4SLinus Torvalds 		/* hold loopback dev/idev if we haven't done so. */
865*1da177e4SLinus Torvalds 		if (dev != &loopback_dev) {
866*1da177e4SLinus Torvalds 			if (dev) {
867*1da177e4SLinus Torvalds 				dev_put(dev);
868*1da177e4SLinus Torvalds 				in6_dev_put(idev);
869*1da177e4SLinus Torvalds 			}
870*1da177e4SLinus Torvalds 			dev = &loopback_dev;
871*1da177e4SLinus Torvalds 			dev_hold(dev);
872*1da177e4SLinus Torvalds 			idev = in6_dev_get(dev);
873*1da177e4SLinus Torvalds 			if (!idev) {
874*1da177e4SLinus Torvalds 				err = -ENODEV;
875*1da177e4SLinus Torvalds 				goto out;
876*1da177e4SLinus Torvalds 			}
877*1da177e4SLinus Torvalds 		}
878*1da177e4SLinus Torvalds 		rt->u.dst.output = ip6_pkt_discard_out;
879*1da177e4SLinus Torvalds 		rt->u.dst.input = ip6_pkt_discard;
880*1da177e4SLinus Torvalds 		rt->u.dst.error = -ENETUNREACH;
881*1da177e4SLinus Torvalds 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
882*1da177e4SLinus Torvalds 		goto install_route;
883*1da177e4SLinus Torvalds 	}
884*1da177e4SLinus Torvalds 
885*1da177e4SLinus Torvalds 	if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
886*1da177e4SLinus Torvalds 		struct in6_addr *gw_addr;
887*1da177e4SLinus Torvalds 		int gwa_type;
888*1da177e4SLinus Torvalds 
889*1da177e4SLinus Torvalds 		gw_addr = &rtmsg->rtmsg_gateway;
890*1da177e4SLinus Torvalds 		ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
891*1da177e4SLinus Torvalds 		gwa_type = ipv6_addr_type(gw_addr);
892*1da177e4SLinus Torvalds 
893*1da177e4SLinus Torvalds 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
894*1da177e4SLinus Torvalds 			struct rt6_info *grt;
895*1da177e4SLinus Torvalds 
896*1da177e4SLinus Torvalds 			/* IPv6 strictly inhibits using not link-local
897*1da177e4SLinus Torvalds 			   addresses as nexthop address.
898*1da177e4SLinus Torvalds 			   Otherwise, router will not able to send redirects.
899*1da177e4SLinus Torvalds 			   It is very good, but in some (rare!) circumstances
900*1da177e4SLinus Torvalds 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
901*1da177e4SLinus Torvalds 			   some exceptions. --ANK
902*1da177e4SLinus Torvalds 			 */
903*1da177e4SLinus Torvalds 			err = -EINVAL;
904*1da177e4SLinus Torvalds 			if (!(gwa_type&IPV6_ADDR_UNICAST))
905*1da177e4SLinus Torvalds 				goto out;
906*1da177e4SLinus Torvalds 
907*1da177e4SLinus Torvalds 			grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
908*1da177e4SLinus Torvalds 
909*1da177e4SLinus Torvalds 			err = -EHOSTUNREACH;
910*1da177e4SLinus Torvalds 			if (grt == NULL)
911*1da177e4SLinus Torvalds 				goto out;
912*1da177e4SLinus Torvalds 			if (dev) {
913*1da177e4SLinus Torvalds 				if (dev != grt->rt6i_dev) {
914*1da177e4SLinus Torvalds 					dst_release(&grt->u.dst);
915*1da177e4SLinus Torvalds 					goto out;
916*1da177e4SLinus Torvalds 				}
917*1da177e4SLinus Torvalds 			} else {
918*1da177e4SLinus Torvalds 				dev = grt->rt6i_dev;
919*1da177e4SLinus Torvalds 				idev = grt->rt6i_idev;
920*1da177e4SLinus Torvalds 				dev_hold(dev);
921*1da177e4SLinus Torvalds 				in6_dev_hold(grt->rt6i_idev);
922*1da177e4SLinus Torvalds 			}
923*1da177e4SLinus Torvalds 			if (!(grt->rt6i_flags&RTF_GATEWAY))
924*1da177e4SLinus Torvalds 				err = 0;
925*1da177e4SLinus Torvalds 			dst_release(&grt->u.dst);
926*1da177e4SLinus Torvalds 
927*1da177e4SLinus Torvalds 			if (err)
928*1da177e4SLinus Torvalds 				goto out;
929*1da177e4SLinus Torvalds 		}
930*1da177e4SLinus Torvalds 		err = -EINVAL;
931*1da177e4SLinus Torvalds 		if (dev == NULL || (dev->flags&IFF_LOOPBACK))
932*1da177e4SLinus Torvalds 			goto out;
933*1da177e4SLinus Torvalds 	}
934*1da177e4SLinus Torvalds 
935*1da177e4SLinus Torvalds 	err = -ENODEV;
936*1da177e4SLinus Torvalds 	if (dev == NULL)
937*1da177e4SLinus Torvalds 		goto out;
938*1da177e4SLinus Torvalds 
939*1da177e4SLinus Torvalds 	if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
940*1da177e4SLinus Torvalds 		rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
941*1da177e4SLinus Torvalds 		if (IS_ERR(rt->rt6i_nexthop)) {
942*1da177e4SLinus Torvalds 			err = PTR_ERR(rt->rt6i_nexthop);
943*1da177e4SLinus Torvalds 			rt->rt6i_nexthop = NULL;
944*1da177e4SLinus Torvalds 			goto out;
945*1da177e4SLinus Torvalds 		}
946*1da177e4SLinus Torvalds 	}
947*1da177e4SLinus Torvalds 
948*1da177e4SLinus Torvalds 	rt->rt6i_flags = rtmsg->rtmsg_flags;
949*1da177e4SLinus Torvalds 
950*1da177e4SLinus Torvalds install_route:
951*1da177e4SLinus Torvalds 	if (rta && rta[RTA_METRICS-1]) {
952*1da177e4SLinus Torvalds 		int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
953*1da177e4SLinus Torvalds 		struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
954*1da177e4SLinus Torvalds 
955*1da177e4SLinus Torvalds 		while (RTA_OK(attr, attrlen)) {
956*1da177e4SLinus Torvalds 			unsigned flavor = attr->rta_type;
957*1da177e4SLinus Torvalds 			if (flavor) {
958*1da177e4SLinus Torvalds 				if (flavor > RTAX_MAX) {
959*1da177e4SLinus Torvalds 					err = -EINVAL;
960*1da177e4SLinus Torvalds 					goto out;
961*1da177e4SLinus Torvalds 				}
962*1da177e4SLinus Torvalds 				rt->u.dst.metrics[flavor-1] =
963*1da177e4SLinus Torvalds 					*(u32 *)RTA_DATA(attr);
964*1da177e4SLinus Torvalds 			}
965*1da177e4SLinus Torvalds 			attr = RTA_NEXT(attr, attrlen);
966*1da177e4SLinus Torvalds 		}
967*1da177e4SLinus Torvalds 	}
968*1da177e4SLinus Torvalds 
969*1da177e4SLinus Torvalds 	if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
970*1da177e4SLinus Torvalds 		rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
971*1da177e4SLinus Torvalds 	if (!rt->u.dst.metrics[RTAX_MTU-1])
972*1da177e4SLinus Torvalds 		rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
973*1da177e4SLinus Torvalds 	if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
974*1da177e4SLinus Torvalds 		rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
975*1da177e4SLinus Torvalds 	rt->u.dst.dev = dev;
976*1da177e4SLinus Torvalds 	rt->rt6i_idev = idev;
977*1da177e4SLinus Torvalds 	return ip6_ins_rt(rt, nlh, _rtattr);
978*1da177e4SLinus Torvalds 
979*1da177e4SLinus Torvalds out:
980*1da177e4SLinus Torvalds 	if (dev)
981*1da177e4SLinus Torvalds 		dev_put(dev);
982*1da177e4SLinus Torvalds 	if (idev)
983*1da177e4SLinus Torvalds 		in6_dev_put(idev);
984*1da177e4SLinus Torvalds 	if (rt)
985*1da177e4SLinus Torvalds 		dst_free((struct dst_entry *) rt);
986*1da177e4SLinus Torvalds 	return err;
987*1da177e4SLinus Torvalds }
988*1da177e4SLinus Torvalds 
989*1da177e4SLinus Torvalds int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
990*1da177e4SLinus Torvalds {
991*1da177e4SLinus Torvalds 	int err;
992*1da177e4SLinus Torvalds 
993*1da177e4SLinus Torvalds 	write_lock_bh(&rt6_lock);
994*1da177e4SLinus Torvalds 
995*1da177e4SLinus Torvalds 	rt6_reset_dflt_pointer(NULL);
996*1da177e4SLinus Torvalds 
997*1da177e4SLinus Torvalds 	err = fib6_del(rt, nlh, _rtattr);
998*1da177e4SLinus Torvalds 	dst_release(&rt->u.dst);
999*1da177e4SLinus Torvalds 
1000*1da177e4SLinus Torvalds 	write_unlock_bh(&rt6_lock);
1001*1da177e4SLinus Torvalds 
1002*1da177e4SLinus Torvalds 	return err;
1003*1da177e4SLinus Torvalds }
1004*1da177e4SLinus Torvalds 
1005*1da177e4SLinus Torvalds static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr)
1006*1da177e4SLinus Torvalds {
1007*1da177e4SLinus Torvalds 	struct fib6_node *fn;
1008*1da177e4SLinus Torvalds 	struct rt6_info *rt;
1009*1da177e4SLinus Torvalds 	int err = -ESRCH;
1010*1da177e4SLinus Torvalds 
1011*1da177e4SLinus Torvalds 	read_lock_bh(&rt6_lock);
1012*1da177e4SLinus Torvalds 
1013*1da177e4SLinus Torvalds 	fn = fib6_locate(&ip6_routing_table,
1014*1da177e4SLinus Torvalds 			 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
1015*1da177e4SLinus Torvalds 			 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
1016*1da177e4SLinus Torvalds 
1017*1da177e4SLinus Torvalds 	if (fn) {
1018*1da177e4SLinus Torvalds 		for (rt = fn->leaf; rt; rt = rt->u.next) {
1019*1da177e4SLinus Torvalds 			if (rtmsg->rtmsg_ifindex &&
1020*1da177e4SLinus Torvalds 			    (rt->rt6i_dev == NULL ||
1021*1da177e4SLinus Torvalds 			     rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
1022*1da177e4SLinus Torvalds 				continue;
1023*1da177e4SLinus Torvalds 			if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
1024*1da177e4SLinus Torvalds 			    !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
1025*1da177e4SLinus Torvalds 				continue;
1026*1da177e4SLinus Torvalds 			if (rtmsg->rtmsg_metric &&
1027*1da177e4SLinus Torvalds 			    rtmsg->rtmsg_metric != rt->rt6i_metric)
1028*1da177e4SLinus Torvalds 				continue;
1029*1da177e4SLinus Torvalds 			dst_hold(&rt->u.dst);
1030*1da177e4SLinus Torvalds 			read_unlock_bh(&rt6_lock);
1031*1da177e4SLinus Torvalds 
1032*1da177e4SLinus Torvalds 			return ip6_del_rt(rt, nlh, _rtattr);
1033*1da177e4SLinus Torvalds 		}
1034*1da177e4SLinus Torvalds 	}
1035*1da177e4SLinus Torvalds 	read_unlock_bh(&rt6_lock);
1036*1da177e4SLinus Torvalds 
1037*1da177e4SLinus Torvalds 	return err;
1038*1da177e4SLinus Torvalds }
1039*1da177e4SLinus Torvalds 
1040*1da177e4SLinus Torvalds /*
1041*1da177e4SLinus Torvalds  *	Handle redirects
1042*1da177e4SLinus Torvalds  */
1043*1da177e4SLinus Torvalds void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
1044*1da177e4SLinus Torvalds 		  struct neighbour *neigh, u8 *lladdr, int on_link)
1045*1da177e4SLinus Torvalds {
1046*1da177e4SLinus Torvalds 	struct rt6_info *rt, *nrt;
1047*1da177e4SLinus Torvalds 
1048*1da177e4SLinus Torvalds 	/* Locate old route to this destination. */
1049*1da177e4SLinus Torvalds 	rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
1050*1da177e4SLinus Torvalds 
1051*1da177e4SLinus Torvalds 	if (rt == NULL)
1052*1da177e4SLinus Torvalds 		return;
1053*1da177e4SLinus Torvalds 
1054*1da177e4SLinus Torvalds 	if (neigh->dev != rt->rt6i_dev)
1055*1da177e4SLinus Torvalds 		goto out;
1056*1da177e4SLinus Torvalds 
1057*1da177e4SLinus Torvalds 	/*
1058*1da177e4SLinus Torvalds 	 * Current route is on-link; redirect is always invalid.
1059*1da177e4SLinus Torvalds 	 *
1060*1da177e4SLinus Torvalds 	 * Seems, previous statement is not true. It could
1061*1da177e4SLinus Torvalds 	 * be node, which looks for us as on-link (f.e. proxy ndisc)
1062*1da177e4SLinus Torvalds 	 * But then router serving it might decide, that we should
1063*1da177e4SLinus Torvalds 	 * know truth 8)8) --ANK (980726).
1064*1da177e4SLinus Torvalds 	 */
1065*1da177e4SLinus Torvalds 	if (!(rt->rt6i_flags&RTF_GATEWAY))
1066*1da177e4SLinus Torvalds 		goto out;
1067*1da177e4SLinus Torvalds 
1068*1da177e4SLinus Torvalds 	/*
1069*1da177e4SLinus Torvalds 	 *	RFC 2461 specifies that redirects should only be
1070*1da177e4SLinus Torvalds 	 *	accepted if they come from the nexthop to the target.
1071*1da177e4SLinus Torvalds 	 *	Due to the way default routers are chosen, this notion
1072*1da177e4SLinus Torvalds 	 *	is a bit fuzzy and one might need to check all default
1073*1da177e4SLinus Torvalds 	 *	routers.
1074*1da177e4SLinus Torvalds 	 */
1075*1da177e4SLinus Torvalds 	if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway)) {
1076*1da177e4SLinus Torvalds 		if (rt->rt6i_flags & RTF_DEFAULT) {
1077*1da177e4SLinus Torvalds 			struct rt6_info *rt1;
1078*1da177e4SLinus Torvalds 
1079*1da177e4SLinus Torvalds 			read_lock(&rt6_lock);
1080*1da177e4SLinus Torvalds 			for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
1081*1da177e4SLinus Torvalds 				if (ipv6_addr_equal(saddr, &rt1->rt6i_gateway)) {
1082*1da177e4SLinus Torvalds 					dst_hold(&rt1->u.dst);
1083*1da177e4SLinus Torvalds 					dst_release(&rt->u.dst);
1084*1da177e4SLinus Torvalds 					read_unlock(&rt6_lock);
1085*1da177e4SLinus Torvalds 					rt = rt1;
1086*1da177e4SLinus Torvalds 					goto source_ok;
1087*1da177e4SLinus Torvalds 				}
1088*1da177e4SLinus Torvalds 			}
1089*1da177e4SLinus Torvalds 			read_unlock(&rt6_lock);
1090*1da177e4SLinus Torvalds 		}
1091*1da177e4SLinus Torvalds 		if (net_ratelimit())
1092*1da177e4SLinus Torvalds 			printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1093*1da177e4SLinus Torvalds 			       "for redirect target\n");
1094*1da177e4SLinus Torvalds 		goto out;
1095*1da177e4SLinus Torvalds 	}
1096*1da177e4SLinus Torvalds 
1097*1da177e4SLinus Torvalds source_ok:
1098*1da177e4SLinus Torvalds 
1099*1da177e4SLinus Torvalds 	/*
1100*1da177e4SLinus Torvalds 	 *	We have finally decided to accept it.
1101*1da177e4SLinus Torvalds 	 */
1102*1da177e4SLinus Torvalds 
1103*1da177e4SLinus Torvalds 	neigh_update(neigh, lladdr, NUD_STALE,
1104*1da177e4SLinus Torvalds 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
1105*1da177e4SLinus Torvalds 		     NEIGH_UPDATE_F_OVERRIDE|
1106*1da177e4SLinus Torvalds 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1107*1da177e4SLinus Torvalds 				     NEIGH_UPDATE_F_ISROUTER))
1108*1da177e4SLinus Torvalds 		     );
1109*1da177e4SLinus Torvalds 
1110*1da177e4SLinus Torvalds 	/*
1111*1da177e4SLinus Torvalds 	 * Redirect received -> path was valid.
1112*1da177e4SLinus Torvalds 	 * Look, redirects are sent only in response to data packets,
1113*1da177e4SLinus Torvalds 	 * so that this nexthop apparently is reachable. --ANK
1114*1da177e4SLinus Torvalds 	 */
1115*1da177e4SLinus Torvalds 	dst_confirm(&rt->u.dst);
1116*1da177e4SLinus Torvalds 
1117*1da177e4SLinus Torvalds 	/* Duplicate redirect: silently ignore. */
1118*1da177e4SLinus Torvalds 	if (neigh == rt->u.dst.neighbour)
1119*1da177e4SLinus Torvalds 		goto out;
1120*1da177e4SLinus Torvalds 
1121*1da177e4SLinus Torvalds 	nrt = ip6_rt_copy(rt);
1122*1da177e4SLinus Torvalds 	if (nrt == NULL)
1123*1da177e4SLinus Torvalds 		goto out;
1124*1da177e4SLinus Torvalds 
1125*1da177e4SLinus Torvalds 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1126*1da177e4SLinus Torvalds 	if (on_link)
1127*1da177e4SLinus Torvalds 		nrt->rt6i_flags &= ~RTF_GATEWAY;
1128*1da177e4SLinus Torvalds 
1129*1da177e4SLinus Torvalds 	ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1130*1da177e4SLinus Torvalds 	nrt->rt6i_dst.plen = 128;
1131*1da177e4SLinus Torvalds 	nrt->u.dst.flags |= DST_HOST;
1132*1da177e4SLinus Torvalds 
1133*1da177e4SLinus Torvalds 	ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1134*1da177e4SLinus Torvalds 	nrt->rt6i_nexthop = neigh_clone(neigh);
1135*1da177e4SLinus Torvalds 	/* Reset pmtu, it may be better */
1136*1da177e4SLinus Torvalds 	nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1137*1da177e4SLinus Torvalds 	nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1138*1da177e4SLinus Torvalds 
1139*1da177e4SLinus Torvalds 	if (ip6_ins_rt(nrt, NULL, NULL))
1140*1da177e4SLinus Torvalds 		goto out;
1141*1da177e4SLinus Torvalds 
1142*1da177e4SLinus Torvalds 	if (rt->rt6i_flags&RTF_CACHE) {
1143*1da177e4SLinus Torvalds 		ip6_del_rt(rt, NULL, NULL);
1144*1da177e4SLinus Torvalds 		return;
1145*1da177e4SLinus Torvalds 	}
1146*1da177e4SLinus Torvalds 
1147*1da177e4SLinus Torvalds out:
1148*1da177e4SLinus Torvalds         dst_release(&rt->u.dst);
1149*1da177e4SLinus Torvalds 	return;
1150*1da177e4SLinus Torvalds }
1151*1da177e4SLinus Torvalds 
1152*1da177e4SLinus Torvalds /*
1153*1da177e4SLinus Torvalds  *	Handle ICMP "packet too big" messages
1154*1da177e4SLinus Torvalds  *	i.e. Path MTU discovery
1155*1da177e4SLinus Torvalds  */
1156*1da177e4SLinus Torvalds 
1157*1da177e4SLinus Torvalds void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1158*1da177e4SLinus Torvalds 			struct net_device *dev, u32 pmtu)
1159*1da177e4SLinus Torvalds {
1160*1da177e4SLinus Torvalds 	struct rt6_info *rt, *nrt;
1161*1da177e4SLinus Torvalds 	int allfrag = 0;
1162*1da177e4SLinus Torvalds 
1163*1da177e4SLinus Torvalds 	rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1164*1da177e4SLinus Torvalds 	if (rt == NULL)
1165*1da177e4SLinus Torvalds 		return;
1166*1da177e4SLinus Torvalds 
1167*1da177e4SLinus Torvalds 	if (pmtu >= dst_mtu(&rt->u.dst))
1168*1da177e4SLinus Torvalds 		goto out;
1169*1da177e4SLinus Torvalds 
1170*1da177e4SLinus Torvalds 	if (pmtu < IPV6_MIN_MTU) {
1171*1da177e4SLinus Torvalds 		/*
1172*1da177e4SLinus Torvalds 		 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1173*1da177e4SLinus Torvalds 		 * MTU (1280) and a fragment header should always be included
1174*1da177e4SLinus Torvalds 		 * after a node receiving Too Big message reporting PMTU is
1175*1da177e4SLinus Torvalds 		 * less than the IPv6 Minimum Link MTU.
1176*1da177e4SLinus Torvalds 		 */
1177*1da177e4SLinus Torvalds 		pmtu = IPV6_MIN_MTU;
1178*1da177e4SLinus Torvalds 		allfrag = 1;
1179*1da177e4SLinus Torvalds 	}
1180*1da177e4SLinus Torvalds 
1181*1da177e4SLinus Torvalds 	/* New mtu received -> path was valid.
1182*1da177e4SLinus Torvalds 	   They are sent only in response to data packets,
1183*1da177e4SLinus Torvalds 	   so that this nexthop apparently is reachable. --ANK
1184*1da177e4SLinus Torvalds 	 */
1185*1da177e4SLinus Torvalds 	dst_confirm(&rt->u.dst);
1186*1da177e4SLinus Torvalds 
1187*1da177e4SLinus Torvalds 	/* Host route. If it is static, it would be better
1188*1da177e4SLinus Torvalds 	   not to override it, but add new one, so that
1189*1da177e4SLinus Torvalds 	   when cache entry will expire old pmtu
1190*1da177e4SLinus Torvalds 	   would return automatically.
1191*1da177e4SLinus Torvalds 	 */
1192*1da177e4SLinus Torvalds 	if (rt->rt6i_flags & RTF_CACHE) {
1193*1da177e4SLinus Torvalds 		rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1194*1da177e4SLinus Torvalds 		if (allfrag)
1195*1da177e4SLinus Torvalds 			rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1196*1da177e4SLinus Torvalds 		dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1197*1da177e4SLinus Torvalds 		rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1198*1da177e4SLinus Torvalds 		goto out;
1199*1da177e4SLinus Torvalds 	}
1200*1da177e4SLinus Torvalds 
1201*1da177e4SLinus Torvalds 	/* Network route.
1202*1da177e4SLinus Torvalds 	   Two cases are possible:
1203*1da177e4SLinus Torvalds 	   1. It is connected route. Action: COW
1204*1da177e4SLinus Torvalds 	   2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1205*1da177e4SLinus Torvalds 	 */
1206*1da177e4SLinus Torvalds 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
1207*1da177e4SLinus Torvalds 		nrt = rt6_cow(rt, daddr, saddr);
1208*1da177e4SLinus Torvalds 		if (!nrt->u.dst.error) {
1209*1da177e4SLinus Torvalds 			nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1210*1da177e4SLinus Torvalds 			if (allfrag)
1211*1da177e4SLinus Torvalds 				nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1212*1da177e4SLinus Torvalds 			/* According to RFC 1981, detecting PMTU increase shouldn't be
1213*1da177e4SLinus Torvalds 			   happened within 5 mins, the recommended timer is 10 mins.
1214*1da177e4SLinus Torvalds 			   Here this route expiration time is set to ip6_rt_mtu_expires
1215*1da177e4SLinus Torvalds 			   which is 10 mins. After 10 mins the decreased pmtu is expired
1216*1da177e4SLinus Torvalds 			   and detecting PMTU increase will be automatically happened.
1217*1da177e4SLinus Torvalds 			 */
1218*1da177e4SLinus Torvalds 			dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1219*1da177e4SLinus Torvalds 			nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1220*1da177e4SLinus Torvalds 		}
1221*1da177e4SLinus Torvalds 		dst_release(&nrt->u.dst);
1222*1da177e4SLinus Torvalds 	} else {
1223*1da177e4SLinus Torvalds 		nrt = ip6_rt_copy(rt);
1224*1da177e4SLinus Torvalds 		if (nrt == NULL)
1225*1da177e4SLinus Torvalds 			goto out;
1226*1da177e4SLinus Torvalds 		ipv6_addr_copy(&nrt->rt6i_dst.addr, daddr);
1227*1da177e4SLinus Torvalds 		nrt->rt6i_dst.plen = 128;
1228*1da177e4SLinus Torvalds 		nrt->u.dst.flags |= DST_HOST;
1229*1da177e4SLinus Torvalds 		nrt->rt6i_nexthop = neigh_clone(rt->rt6i_nexthop);
1230*1da177e4SLinus Torvalds 		dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1231*1da177e4SLinus Torvalds 		nrt->rt6i_flags |= RTF_DYNAMIC|RTF_CACHE|RTF_EXPIRES;
1232*1da177e4SLinus Torvalds 		nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1233*1da177e4SLinus Torvalds 		if (allfrag)
1234*1da177e4SLinus Torvalds 			nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1235*1da177e4SLinus Torvalds 		ip6_ins_rt(nrt, NULL, NULL);
1236*1da177e4SLinus Torvalds 	}
1237*1da177e4SLinus Torvalds 
1238*1da177e4SLinus Torvalds out:
1239*1da177e4SLinus Torvalds 	dst_release(&rt->u.dst);
1240*1da177e4SLinus Torvalds }
1241*1da177e4SLinus Torvalds 
1242*1da177e4SLinus Torvalds /*
1243*1da177e4SLinus Torvalds  *	Misc support functions
1244*1da177e4SLinus Torvalds  */
1245*1da177e4SLinus Torvalds 
1246*1da177e4SLinus Torvalds static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1247*1da177e4SLinus Torvalds {
1248*1da177e4SLinus Torvalds 	struct rt6_info *rt = ip6_dst_alloc();
1249*1da177e4SLinus Torvalds 
1250*1da177e4SLinus Torvalds 	if (rt) {
1251*1da177e4SLinus Torvalds 		rt->u.dst.input = ort->u.dst.input;
1252*1da177e4SLinus Torvalds 		rt->u.dst.output = ort->u.dst.output;
1253*1da177e4SLinus Torvalds 
1254*1da177e4SLinus Torvalds 		memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1255*1da177e4SLinus Torvalds 		rt->u.dst.dev = ort->u.dst.dev;
1256*1da177e4SLinus Torvalds 		if (rt->u.dst.dev)
1257*1da177e4SLinus Torvalds 			dev_hold(rt->u.dst.dev);
1258*1da177e4SLinus Torvalds 		rt->rt6i_idev = ort->rt6i_idev;
1259*1da177e4SLinus Torvalds 		if (rt->rt6i_idev)
1260*1da177e4SLinus Torvalds 			in6_dev_hold(rt->rt6i_idev);
1261*1da177e4SLinus Torvalds 		rt->u.dst.lastuse = jiffies;
1262*1da177e4SLinus Torvalds 		rt->rt6i_expires = 0;
1263*1da177e4SLinus Torvalds 
1264*1da177e4SLinus Torvalds 		ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1265*1da177e4SLinus Torvalds 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1266*1da177e4SLinus Torvalds 		rt->rt6i_metric = 0;
1267*1da177e4SLinus Torvalds 
1268*1da177e4SLinus Torvalds 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1269*1da177e4SLinus Torvalds #ifdef CONFIG_IPV6_SUBTREES
1270*1da177e4SLinus Torvalds 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1271*1da177e4SLinus Torvalds #endif
1272*1da177e4SLinus Torvalds 	}
1273*1da177e4SLinus Torvalds 	return rt;
1274*1da177e4SLinus Torvalds }
1275*1da177e4SLinus Torvalds 
1276*1da177e4SLinus Torvalds struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1277*1da177e4SLinus Torvalds {
1278*1da177e4SLinus Torvalds 	struct rt6_info *rt;
1279*1da177e4SLinus Torvalds 	struct fib6_node *fn;
1280*1da177e4SLinus Torvalds 
1281*1da177e4SLinus Torvalds 	fn = &ip6_routing_table;
1282*1da177e4SLinus Torvalds 
1283*1da177e4SLinus Torvalds 	write_lock_bh(&rt6_lock);
1284*1da177e4SLinus Torvalds 	for (rt = fn->leaf; rt; rt=rt->u.next) {
1285*1da177e4SLinus Torvalds 		if (dev == rt->rt6i_dev &&
1286*1da177e4SLinus Torvalds 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
1287*1da177e4SLinus Torvalds 			break;
1288*1da177e4SLinus Torvalds 	}
1289*1da177e4SLinus Torvalds 	if (rt)
1290*1da177e4SLinus Torvalds 		dst_hold(&rt->u.dst);
1291*1da177e4SLinus Torvalds 	write_unlock_bh(&rt6_lock);
1292*1da177e4SLinus Torvalds 	return rt;
1293*1da177e4SLinus Torvalds }
1294*1da177e4SLinus Torvalds 
1295*1da177e4SLinus Torvalds struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1296*1da177e4SLinus Torvalds 				     struct net_device *dev)
1297*1da177e4SLinus Torvalds {
1298*1da177e4SLinus Torvalds 	struct in6_rtmsg rtmsg;
1299*1da177e4SLinus Torvalds 
1300*1da177e4SLinus Torvalds 	memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1301*1da177e4SLinus Torvalds 	rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1302*1da177e4SLinus Torvalds 	ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1303*1da177e4SLinus Torvalds 	rtmsg.rtmsg_metric = 1024;
1304*1da177e4SLinus Torvalds 	rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES;
1305*1da177e4SLinus Torvalds 
1306*1da177e4SLinus Torvalds 	rtmsg.rtmsg_ifindex = dev->ifindex;
1307*1da177e4SLinus Torvalds 
1308*1da177e4SLinus Torvalds 	ip6_route_add(&rtmsg, NULL, NULL);
1309*1da177e4SLinus Torvalds 	return rt6_get_dflt_router(gwaddr, dev);
1310*1da177e4SLinus Torvalds }
1311*1da177e4SLinus Torvalds 
1312*1da177e4SLinus Torvalds void rt6_purge_dflt_routers(void)
1313*1da177e4SLinus Torvalds {
1314*1da177e4SLinus Torvalds 	struct rt6_info *rt;
1315*1da177e4SLinus Torvalds 
1316*1da177e4SLinus Torvalds restart:
1317*1da177e4SLinus Torvalds 	read_lock_bh(&rt6_lock);
1318*1da177e4SLinus Torvalds 	for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1319*1da177e4SLinus Torvalds 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1320*1da177e4SLinus Torvalds 			dst_hold(&rt->u.dst);
1321*1da177e4SLinus Torvalds 
1322*1da177e4SLinus Torvalds 			rt6_reset_dflt_pointer(NULL);
1323*1da177e4SLinus Torvalds 
1324*1da177e4SLinus Torvalds 			read_unlock_bh(&rt6_lock);
1325*1da177e4SLinus Torvalds 
1326*1da177e4SLinus Torvalds 			ip6_del_rt(rt, NULL, NULL);
1327*1da177e4SLinus Torvalds 
1328*1da177e4SLinus Torvalds 			goto restart;
1329*1da177e4SLinus Torvalds 		}
1330*1da177e4SLinus Torvalds 	}
1331*1da177e4SLinus Torvalds 	read_unlock_bh(&rt6_lock);
1332*1da177e4SLinus Torvalds }
1333*1da177e4SLinus Torvalds 
1334*1da177e4SLinus Torvalds int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1335*1da177e4SLinus Torvalds {
1336*1da177e4SLinus Torvalds 	struct in6_rtmsg rtmsg;
1337*1da177e4SLinus Torvalds 	int err;
1338*1da177e4SLinus Torvalds 
1339*1da177e4SLinus Torvalds 	switch(cmd) {
1340*1da177e4SLinus Torvalds 	case SIOCADDRT:		/* Add a route */
1341*1da177e4SLinus Torvalds 	case SIOCDELRT:		/* Delete a route */
1342*1da177e4SLinus Torvalds 		if (!capable(CAP_NET_ADMIN))
1343*1da177e4SLinus Torvalds 			return -EPERM;
1344*1da177e4SLinus Torvalds 		err = copy_from_user(&rtmsg, arg,
1345*1da177e4SLinus Torvalds 				     sizeof(struct in6_rtmsg));
1346*1da177e4SLinus Torvalds 		if (err)
1347*1da177e4SLinus Torvalds 			return -EFAULT;
1348*1da177e4SLinus Torvalds 
1349*1da177e4SLinus Torvalds 		rtnl_lock();
1350*1da177e4SLinus Torvalds 		switch (cmd) {
1351*1da177e4SLinus Torvalds 		case SIOCADDRT:
1352*1da177e4SLinus Torvalds 			err = ip6_route_add(&rtmsg, NULL, NULL);
1353*1da177e4SLinus Torvalds 			break;
1354*1da177e4SLinus Torvalds 		case SIOCDELRT:
1355*1da177e4SLinus Torvalds 			err = ip6_route_del(&rtmsg, NULL, NULL);
1356*1da177e4SLinus Torvalds 			break;
1357*1da177e4SLinus Torvalds 		default:
1358*1da177e4SLinus Torvalds 			err = -EINVAL;
1359*1da177e4SLinus Torvalds 		}
1360*1da177e4SLinus Torvalds 		rtnl_unlock();
1361*1da177e4SLinus Torvalds 
1362*1da177e4SLinus Torvalds 		return err;
1363*1da177e4SLinus Torvalds 	};
1364*1da177e4SLinus Torvalds 
1365*1da177e4SLinus Torvalds 	return -EINVAL;
1366*1da177e4SLinus Torvalds }
1367*1da177e4SLinus Torvalds 
1368*1da177e4SLinus Torvalds /*
1369*1da177e4SLinus Torvalds  *	Drop the packet on the floor
1370*1da177e4SLinus Torvalds  */
1371*1da177e4SLinus Torvalds 
1372*1da177e4SLinus Torvalds int ip6_pkt_discard(struct sk_buff *skb)
1373*1da177e4SLinus Torvalds {
1374*1da177e4SLinus Torvalds 	IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1375*1da177e4SLinus Torvalds 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1376*1da177e4SLinus Torvalds 	kfree_skb(skb);
1377*1da177e4SLinus Torvalds 	return 0;
1378*1da177e4SLinus Torvalds }
1379*1da177e4SLinus Torvalds 
1380*1da177e4SLinus Torvalds int ip6_pkt_discard_out(struct sk_buff *skb)
1381*1da177e4SLinus Torvalds {
1382*1da177e4SLinus Torvalds 	skb->dev = skb->dst->dev;
1383*1da177e4SLinus Torvalds 	return ip6_pkt_discard(skb);
1384*1da177e4SLinus Torvalds }
1385*1da177e4SLinus Torvalds 
1386*1da177e4SLinus Torvalds /*
1387*1da177e4SLinus Torvalds  *	Allocate a dst for local (unicast / anycast) address.
1388*1da177e4SLinus Torvalds  */
1389*1da177e4SLinus Torvalds 
1390*1da177e4SLinus Torvalds struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1391*1da177e4SLinus Torvalds 				    const struct in6_addr *addr,
1392*1da177e4SLinus Torvalds 				    int anycast)
1393*1da177e4SLinus Torvalds {
1394*1da177e4SLinus Torvalds 	struct rt6_info *rt = ip6_dst_alloc();
1395*1da177e4SLinus Torvalds 
1396*1da177e4SLinus Torvalds 	if (rt == NULL)
1397*1da177e4SLinus Torvalds 		return ERR_PTR(-ENOMEM);
1398*1da177e4SLinus Torvalds 
1399*1da177e4SLinus Torvalds 	dev_hold(&loopback_dev);
1400*1da177e4SLinus Torvalds 	in6_dev_hold(idev);
1401*1da177e4SLinus Torvalds 
1402*1da177e4SLinus Torvalds 	rt->u.dst.flags = DST_HOST;
1403*1da177e4SLinus Torvalds 	rt->u.dst.input = ip6_input;
1404*1da177e4SLinus Torvalds 	rt->u.dst.output = ip6_output;
1405*1da177e4SLinus Torvalds 	rt->rt6i_dev = &loopback_dev;
1406*1da177e4SLinus Torvalds 	rt->rt6i_idev = idev;
1407*1da177e4SLinus Torvalds 	rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1408*1da177e4SLinus Torvalds 	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1409*1da177e4SLinus Torvalds 	rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1410*1da177e4SLinus Torvalds 	rt->u.dst.obsolete = -1;
1411*1da177e4SLinus Torvalds 
1412*1da177e4SLinus Torvalds 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1413*1da177e4SLinus Torvalds 	if (!anycast)
1414*1da177e4SLinus Torvalds 		rt->rt6i_flags |= RTF_LOCAL;
1415*1da177e4SLinus Torvalds 	rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1416*1da177e4SLinus Torvalds 	if (rt->rt6i_nexthop == NULL) {
1417*1da177e4SLinus Torvalds 		dst_free((struct dst_entry *) rt);
1418*1da177e4SLinus Torvalds 		return ERR_PTR(-ENOMEM);
1419*1da177e4SLinus Torvalds 	}
1420*1da177e4SLinus Torvalds 
1421*1da177e4SLinus Torvalds 	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1422*1da177e4SLinus Torvalds 	rt->rt6i_dst.plen = 128;
1423*1da177e4SLinus Torvalds 
1424*1da177e4SLinus Torvalds 	atomic_set(&rt->u.dst.__refcnt, 1);
1425*1da177e4SLinus Torvalds 
1426*1da177e4SLinus Torvalds 	return rt;
1427*1da177e4SLinus Torvalds }
1428*1da177e4SLinus Torvalds 
1429*1da177e4SLinus Torvalds static int fib6_ifdown(struct rt6_info *rt, void *arg)
1430*1da177e4SLinus Torvalds {
1431*1da177e4SLinus Torvalds 	if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1432*1da177e4SLinus Torvalds 	    rt != &ip6_null_entry) {
1433*1da177e4SLinus Torvalds 		RT6_TRACE("deleted by ifdown %p\n", rt);
1434*1da177e4SLinus Torvalds 		return -1;
1435*1da177e4SLinus Torvalds 	}
1436*1da177e4SLinus Torvalds 	return 0;
1437*1da177e4SLinus Torvalds }
1438*1da177e4SLinus Torvalds 
1439*1da177e4SLinus Torvalds void rt6_ifdown(struct net_device *dev)
1440*1da177e4SLinus Torvalds {
1441*1da177e4SLinus Torvalds 	write_lock_bh(&rt6_lock);
1442*1da177e4SLinus Torvalds 	fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1443*1da177e4SLinus Torvalds 	write_unlock_bh(&rt6_lock);
1444*1da177e4SLinus Torvalds }
1445*1da177e4SLinus Torvalds 
1446*1da177e4SLinus Torvalds struct rt6_mtu_change_arg
1447*1da177e4SLinus Torvalds {
1448*1da177e4SLinus Torvalds 	struct net_device *dev;
1449*1da177e4SLinus Torvalds 	unsigned mtu;
1450*1da177e4SLinus Torvalds };
1451*1da177e4SLinus Torvalds 
1452*1da177e4SLinus Torvalds static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1453*1da177e4SLinus Torvalds {
1454*1da177e4SLinus Torvalds 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1455*1da177e4SLinus Torvalds 	struct inet6_dev *idev;
1456*1da177e4SLinus Torvalds 
1457*1da177e4SLinus Torvalds 	/* In IPv6 pmtu discovery is not optional,
1458*1da177e4SLinus Torvalds 	   so that RTAX_MTU lock cannot disable it.
1459*1da177e4SLinus Torvalds 	   We still use this lock to block changes
1460*1da177e4SLinus Torvalds 	   caused by addrconf/ndisc.
1461*1da177e4SLinus Torvalds 	*/
1462*1da177e4SLinus Torvalds 
1463*1da177e4SLinus Torvalds 	idev = __in6_dev_get(arg->dev);
1464*1da177e4SLinus Torvalds 	if (idev == NULL)
1465*1da177e4SLinus Torvalds 		return 0;
1466*1da177e4SLinus Torvalds 
1467*1da177e4SLinus Torvalds 	/* For administrative MTU increase, there is no way to discover
1468*1da177e4SLinus Torvalds 	   IPv6 PMTU increase, so PMTU increase should be updated here.
1469*1da177e4SLinus Torvalds 	   Since RFC 1981 doesn't include administrative MTU increase
1470*1da177e4SLinus Torvalds 	   update PMTU increase is a MUST. (i.e. jumbo frame)
1471*1da177e4SLinus Torvalds 	 */
1472*1da177e4SLinus Torvalds 	/*
1473*1da177e4SLinus Torvalds 	   If new MTU is less than route PMTU, this new MTU will be the
1474*1da177e4SLinus Torvalds 	   lowest MTU in the path, update the route PMTU to reflect PMTU
1475*1da177e4SLinus Torvalds 	   decreases; if new MTU is greater than route PMTU, and the
1476*1da177e4SLinus Torvalds 	   old MTU is the lowest MTU in the path, update the route PMTU
1477*1da177e4SLinus Torvalds 	   to reflect the increase. In this case if the other nodes' MTU
1478*1da177e4SLinus Torvalds 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
1479*1da177e4SLinus Torvalds 	   PMTU discouvery.
1480*1da177e4SLinus Torvalds 	 */
1481*1da177e4SLinus Torvalds 	if (rt->rt6i_dev == arg->dev &&
1482*1da177e4SLinus Torvalds 	    !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1483*1da177e4SLinus Torvalds             (dst_mtu(&rt->u.dst) > arg->mtu ||
1484*1da177e4SLinus Torvalds              (dst_mtu(&rt->u.dst) < arg->mtu &&
1485*1da177e4SLinus Torvalds 	      dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1486*1da177e4SLinus Torvalds 		rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1487*1da177e4SLinus Torvalds 	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1488*1da177e4SLinus Torvalds 	return 0;
1489*1da177e4SLinus Torvalds }
1490*1da177e4SLinus Torvalds 
1491*1da177e4SLinus Torvalds void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1492*1da177e4SLinus Torvalds {
1493*1da177e4SLinus Torvalds 	struct rt6_mtu_change_arg arg;
1494*1da177e4SLinus Torvalds 
1495*1da177e4SLinus Torvalds 	arg.dev = dev;
1496*1da177e4SLinus Torvalds 	arg.mtu = mtu;
1497*1da177e4SLinus Torvalds 	read_lock_bh(&rt6_lock);
1498*1da177e4SLinus Torvalds 	fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1499*1da177e4SLinus Torvalds 	read_unlock_bh(&rt6_lock);
1500*1da177e4SLinus Torvalds }
1501*1da177e4SLinus Torvalds 
1502*1da177e4SLinus Torvalds static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1503*1da177e4SLinus Torvalds 			      struct in6_rtmsg *rtmsg)
1504*1da177e4SLinus Torvalds {
1505*1da177e4SLinus Torvalds 	memset(rtmsg, 0, sizeof(*rtmsg));
1506*1da177e4SLinus Torvalds 
1507*1da177e4SLinus Torvalds 	rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1508*1da177e4SLinus Torvalds 	rtmsg->rtmsg_src_len = r->rtm_src_len;
1509*1da177e4SLinus Torvalds 	rtmsg->rtmsg_flags = RTF_UP;
1510*1da177e4SLinus Torvalds 	if (r->rtm_type == RTN_UNREACHABLE)
1511*1da177e4SLinus Torvalds 		rtmsg->rtmsg_flags |= RTF_REJECT;
1512*1da177e4SLinus Torvalds 
1513*1da177e4SLinus Torvalds 	if (rta[RTA_GATEWAY-1]) {
1514*1da177e4SLinus Torvalds 		if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1515*1da177e4SLinus Torvalds 			return -EINVAL;
1516*1da177e4SLinus Torvalds 		memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1517*1da177e4SLinus Torvalds 		rtmsg->rtmsg_flags |= RTF_GATEWAY;
1518*1da177e4SLinus Torvalds 	}
1519*1da177e4SLinus Torvalds 	if (rta[RTA_DST-1]) {
1520*1da177e4SLinus Torvalds 		if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1521*1da177e4SLinus Torvalds 			return -EINVAL;
1522*1da177e4SLinus Torvalds 		memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1523*1da177e4SLinus Torvalds 	}
1524*1da177e4SLinus Torvalds 	if (rta[RTA_SRC-1]) {
1525*1da177e4SLinus Torvalds 		if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1526*1da177e4SLinus Torvalds 			return -EINVAL;
1527*1da177e4SLinus Torvalds 		memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1528*1da177e4SLinus Torvalds 	}
1529*1da177e4SLinus Torvalds 	if (rta[RTA_OIF-1]) {
1530*1da177e4SLinus Torvalds 		if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1531*1da177e4SLinus Torvalds 			return -EINVAL;
1532*1da177e4SLinus Torvalds 		memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1533*1da177e4SLinus Torvalds 	}
1534*1da177e4SLinus Torvalds 	if (rta[RTA_PRIORITY-1]) {
1535*1da177e4SLinus Torvalds 		if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1536*1da177e4SLinus Torvalds 			return -EINVAL;
1537*1da177e4SLinus Torvalds 		memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1538*1da177e4SLinus Torvalds 	}
1539*1da177e4SLinus Torvalds 	return 0;
1540*1da177e4SLinus Torvalds }
1541*1da177e4SLinus Torvalds 
1542*1da177e4SLinus Torvalds int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1543*1da177e4SLinus Torvalds {
1544*1da177e4SLinus Torvalds 	struct rtmsg *r = NLMSG_DATA(nlh);
1545*1da177e4SLinus Torvalds 	struct in6_rtmsg rtmsg;
1546*1da177e4SLinus Torvalds 
1547*1da177e4SLinus Torvalds 	if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1548*1da177e4SLinus Torvalds 		return -EINVAL;
1549*1da177e4SLinus Torvalds 	return ip6_route_del(&rtmsg, nlh, arg);
1550*1da177e4SLinus Torvalds }
1551*1da177e4SLinus Torvalds 
1552*1da177e4SLinus Torvalds int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1553*1da177e4SLinus Torvalds {
1554*1da177e4SLinus Torvalds 	struct rtmsg *r = NLMSG_DATA(nlh);
1555*1da177e4SLinus Torvalds 	struct in6_rtmsg rtmsg;
1556*1da177e4SLinus Torvalds 
1557*1da177e4SLinus Torvalds 	if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1558*1da177e4SLinus Torvalds 		return -EINVAL;
1559*1da177e4SLinus Torvalds 	return ip6_route_add(&rtmsg, nlh, arg);
1560*1da177e4SLinus Torvalds }
1561*1da177e4SLinus Torvalds 
1562*1da177e4SLinus Torvalds struct rt6_rtnl_dump_arg
1563*1da177e4SLinus Torvalds {
1564*1da177e4SLinus Torvalds 	struct sk_buff *skb;
1565*1da177e4SLinus Torvalds 	struct netlink_callback *cb;
1566*1da177e4SLinus Torvalds };
1567*1da177e4SLinus Torvalds 
1568*1da177e4SLinus Torvalds static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1569*1da177e4SLinus Torvalds 			 struct in6_addr *dst,
1570*1da177e4SLinus Torvalds 			 struct in6_addr *src,
1571*1da177e4SLinus Torvalds 			 int iif,
1572*1da177e4SLinus Torvalds 			 int type, u32 pid, u32 seq,
1573*1da177e4SLinus Torvalds 			 struct nlmsghdr *in_nlh, int prefix)
1574*1da177e4SLinus Torvalds {
1575*1da177e4SLinus Torvalds 	struct rtmsg *rtm;
1576*1da177e4SLinus Torvalds 	struct nlmsghdr  *nlh;
1577*1da177e4SLinus Torvalds 	unsigned char	 *b = skb->tail;
1578*1da177e4SLinus Torvalds 	struct rta_cacheinfo ci;
1579*1da177e4SLinus Torvalds 
1580*1da177e4SLinus Torvalds 	if (prefix) {	/* user wants prefix routes only */
1581*1da177e4SLinus Torvalds 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1582*1da177e4SLinus Torvalds 			/* success since this is not a prefix route */
1583*1da177e4SLinus Torvalds 			return 1;
1584*1da177e4SLinus Torvalds 		}
1585*1da177e4SLinus Torvalds 	}
1586*1da177e4SLinus Torvalds 
1587*1da177e4SLinus Torvalds 	if (!pid && in_nlh) {
1588*1da177e4SLinus Torvalds 		pid = in_nlh->nlmsg_pid;
1589*1da177e4SLinus Torvalds 	}
1590*1da177e4SLinus Torvalds 
1591*1da177e4SLinus Torvalds 	nlh = NLMSG_PUT(skb, pid, seq, type, sizeof(*rtm));
1592*1da177e4SLinus Torvalds 	rtm = NLMSG_DATA(nlh);
1593*1da177e4SLinus Torvalds 	rtm->rtm_family = AF_INET6;
1594*1da177e4SLinus Torvalds 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
1595*1da177e4SLinus Torvalds 	rtm->rtm_src_len = rt->rt6i_src.plen;
1596*1da177e4SLinus Torvalds 	rtm->rtm_tos = 0;
1597*1da177e4SLinus Torvalds 	rtm->rtm_table = RT_TABLE_MAIN;
1598*1da177e4SLinus Torvalds 	if (rt->rt6i_flags&RTF_REJECT)
1599*1da177e4SLinus Torvalds 		rtm->rtm_type = RTN_UNREACHABLE;
1600*1da177e4SLinus Torvalds 	else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1601*1da177e4SLinus Torvalds 		rtm->rtm_type = RTN_LOCAL;
1602*1da177e4SLinus Torvalds 	else
1603*1da177e4SLinus Torvalds 		rtm->rtm_type = RTN_UNICAST;
1604*1da177e4SLinus Torvalds 	rtm->rtm_flags = 0;
1605*1da177e4SLinus Torvalds 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1606*1da177e4SLinus Torvalds 	rtm->rtm_protocol = rt->rt6i_protocol;
1607*1da177e4SLinus Torvalds 	if (rt->rt6i_flags&RTF_DYNAMIC)
1608*1da177e4SLinus Torvalds 		rtm->rtm_protocol = RTPROT_REDIRECT;
1609*1da177e4SLinus Torvalds 	else if (rt->rt6i_flags & RTF_ADDRCONF)
1610*1da177e4SLinus Torvalds 		rtm->rtm_protocol = RTPROT_KERNEL;
1611*1da177e4SLinus Torvalds 	else if (rt->rt6i_flags&RTF_DEFAULT)
1612*1da177e4SLinus Torvalds 		rtm->rtm_protocol = RTPROT_RA;
1613*1da177e4SLinus Torvalds 
1614*1da177e4SLinus Torvalds 	if (rt->rt6i_flags&RTF_CACHE)
1615*1da177e4SLinus Torvalds 		rtm->rtm_flags |= RTM_F_CLONED;
1616*1da177e4SLinus Torvalds 
1617*1da177e4SLinus Torvalds 	if (dst) {
1618*1da177e4SLinus Torvalds 		RTA_PUT(skb, RTA_DST, 16, dst);
1619*1da177e4SLinus Torvalds 	        rtm->rtm_dst_len = 128;
1620*1da177e4SLinus Torvalds 	} else if (rtm->rtm_dst_len)
1621*1da177e4SLinus Torvalds 		RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1622*1da177e4SLinus Torvalds #ifdef CONFIG_IPV6_SUBTREES
1623*1da177e4SLinus Torvalds 	if (src) {
1624*1da177e4SLinus Torvalds 		RTA_PUT(skb, RTA_SRC, 16, src);
1625*1da177e4SLinus Torvalds 	        rtm->rtm_src_len = 128;
1626*1da177e4SLinus Torvalds 	} else if (rtm->rtm_src_len)
1627*1da177e4SLinus Torvalds 		RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1628*1da177e4SLinus Torvalds #endif
1629*1da177e4SLinus Torvalds 	if (iif)
1630*1da177e4SLinus Torvalds 		RTA_PUT(skb, RTA_IIF, 4, &iif);
1631*1da177e4SLinus Torvalds 	else if (dst) {
1632*1da177e4SLinus Torvalds 		struct in6_addr saddr_buf;
1633*1da177e4SLinus Torvalds 		if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1634*1da177e4SLinus Torvalds 			RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1635*1da177e4SLinus Torvalds 	}
1636*1da177e4SLinus Torvalds 	if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1637*1da177e4SLinus Torvalds 		goto rtattr_failure;
1638*1da177e4SLinus Torvalds 	if (rt->u.dst.neighbour)
1639*1da177e4SLinus Torvalds 		RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1640*1da177e4SLinus Torvalds 	if (rt->u.dst.dev)
1641*1da177e4SLinus Torvalds 		RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1642*1da177e4SLinus Torvalds 	RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1643*1da177e4SLinus Torvalds 	ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1644*1da177e4SLinus Torvalds 	if (rt->rt6i_expires)
1645*1da177e4SLinus Torvalds 		ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1646*1da177e4SLinus Torvalds 	else
1647*1da177e4SLinus Torvalds 		ci.rta_expires = 0;
1648*1da177e4SLinus Torvalds 	ci.rta_used = rt->u.dst.__use;
1649*1da177e4SLinus Torvalds 	ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1650*1da177e4SLinus Torvalds 	ci.rta_error = rt->u.dst.error;
1651*1da177e4SLinus Torvalds 	ci.rta_id = 0;
1652*1da177e4SLinus Torvalds 	ci.rta_ts = 0;
1653*1da177e4SLinus Torvalds 	ci.rta_tsage = 0;
1654*1da177e4SLinus Torvalds 	RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1655*1da177e4SLinus Torvalds 	nlh->nlmsg_len = skb->tail - b;
1656*1da177e4SLinus Torvalds 	return skb->len;
1657*1da177e4SLinus Torvalds 
1658*1da177e4SLinus Torvalds nlmsg_failure:
1659*1da177e4SLinus Torvalds rtattr_failure:
1660*1da177e4SLinus Torvalds 	skb_trim(skb, b - skb->data);
1661*1da177e4SLinus Torvalds 	return -1;
1662*1da177e4SLinus Torvalds }
1663*1da177e4SLinus Torvalds 
1664*1da177e4SLinus Torvalds static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1665*1da177e4SLinus Torvalds {
1666*1da177e4SLinus Torvalds 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1667*1da177e4SLinus Torvalds 	int prefix;
1668*1da177e4SLinus Torvalds 
1669*1da177e4SLinus Torvalds 	if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1670*1da177e4SLinus Torvalds 		struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1671*1da177e4SLinus Torvalds 		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1672*1da177e4SLinus Torvalds 	} else
1673*1da177e4SLinus Torvalds 		prefix = 0;
1674*1da177e4SLinus Torvalds 
1675*1da177e4SLinus Torvalds 	return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1676*1da177e4SLinus Torvalds 		     NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
1677*1da177e4SLinus Torvalds 		     NULL, prefix);
1678*1da177e4SLinus Torvalds }
1679*1da177e4SLinus Torvalds 
1680*1da177e4SLinus Torvalds static int fib6_dump_node(struct fib6_walker_t *w)
1681*1da177e4SLinus Torvalds {
1682*1da177e4SLinus Torvalds 	int res;
1683*1da177e4SLinus Torvalds 	struct rt6_info *rt;
1684*1da177e4SLinus Torvalds 
1685*1da177e4SLinus Torvalds 	for (rt = w->leaf; rt; rt = rt->u.next) {
1686*1da177e4SLinus Torvalds 		res = rt6_dump_route(rt, w->args);
1687*1da177e4SLinus Torvalds 		if (res < 0) {
1688*1da177e4SLinus Torvalds 			/* Frame is full, suspend walking */
1689*1da177e4SLinus Torvalds 			w->leaf = rt;
1690*1da177e4SLinus Torvalds 			return 1;
1691*1da177e4SLinus Torvalds 		}
1692*1da177e4SLinus Torvalds 		BUG_TRAP(res!=0);
1693*1da177e4SLinus Torvalds 	}
1694*1da177e4SLinus Torvalds 	w->leaf = NULL;
1695*1da177e4SLinus Torvalds 	return 0;
1696*1da177e4SLinus Torvalds }
1697*1da177e4SLinus Torvalds 
1698*1da177e4SLinus Torvalds static void fib6_dump_end(struct netlink_callback *cb)
1699*1da177e4SLinus Torvalds {
1700*1da177e4SLinus Torvalds 	struct fib6_walker_t *w = (void*)cb->args[0];
1701*1da177e4SLinus Torvalds 
1702*1da177e4SLinus Torvalds 	if (w) {
1703*1da177e4SLinus Torvalds 		cb->args[0] = 0;
1704*1da177e4SLinus Torvalds 		fib6_walker_unlink(w);
1705*1da177e4SLinus Torvalds 		kfree(w);
1706*1da177e4SLinus Torvalds 	}
1707*1da177e4SLinus Torvalds 	if (cb->args[1]) {
1708*1da177e4SLinus Torvalds 		cb->done = (void*)cb->args[1];
1709*1da177e4SLinus Torvalds 		cb->args[1] = 0;
1710*1da177e4SLinus Torvalds 	}
1711*1da177e4SLinus Torvalds }
1712*1da177e4SLinus Torvalds 
1713*1da177e4SLinus Torvalds static int fib6_dump_done(struct netlink_callback *cb)
1714*1da177e4SLinus Torvalds {
1715*1da177e4SLinus Torvalds 	fib6_dump_end(cb);
1716*1da177e4SLinus Torvalds 	return cb->done(cb);
1717*1da177e4SLinus Torvalds }
1718*1da177e4SLinus Torvalds 
1719*1da177e4SLinus Torvalds int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1720*1da177e4SLinus Torvalds {
1721*1da177e4SLinus Torvalds 	struct rt6_rtnl_dump_arg arg;
1722*1da177e4SLinus Torvalds 	struct fib6_walker_t *w;
1723*1da177e4SLinus Torvalds 	int res;
1724*1da177e4SLinus Torvalds 
1725*1da177e4SLinus Torvalds 	arg.skb = skb;
1726*1da177e4SLinus Torvalds 	arg.cb = cb;
1727*1da177e4SLinus Torvalds 
1728*1da177e4SLinus Torvalds 	w = (void*)cb->args[0];
1729*1da177e4SLinus Torvalds 	if (w == NULL) {
1730*1da177e4SLinus Torvalds 		/* New dump:
1731*1da177e4SLinus Torvalds 		 *
1732*1da177e4SLinus Torvalds 		 * 1. hook callback destructor.
1733*1da177e4SLinus Torvalds 		 */
1734*1da177e4SLinus Torvalds 		cb->args[1] = (long)cb->done;
1735*1da177e4SLinus Torvalds 		cb->done = fib6_dump_done;
1736*1da177e4SLinus Torvalds 
1737*1da177e4SLinus Torvalds 		/*
1738*1da177e4SLinus Torvalds 		 * 2. allocate and initialize walker.
1739*1da177e4SLinus Torvalds 		 */
1740*1da177e4SLinus Torvalds 		w = kmalloc(sizeof(*w), GFP_ATOMIC);
1741*1da177e4SLinus Torvalds 		if (w == NULL)
1742*1da177e4SLinus Torvalds 			return -ENOMEM;
1743*1da177e4SLinus Torvalds 		RT6_TRACE("dump<%p", w);
1744*1da177e4SLinus Torvalds 		memset(w, 0, sizeof(*w));
1745*1da177e4SLinus Torvalds 		w->root = &ip6_routing_table;
1746*1da177e4SLinus Torvalds 		w->func = fib6_dump_node;
1747*1da177e4SLinus Torvalds 		w->args = &arg;
1748*1da177e4SLinus Torvalds 		cb->args[0] = (long)w;
1749*1da177e4SLinus Torvalds 		read_lock_bh(&rt6_lock);
1750*1da177e4SLinus Torvalds 		res = fib6_walk(w);
1751*1da177e4SLinus Torvalds 		read_unlock_bh(&rt6_lock);
1752*1da177e4SLinus Torvalds 	} else {
1753*1da177e4SLinus Torvalds 		w->args = &arg;
1754*1da177e4SLinus Torvalds 		read_lock_bh(&rt6_lock);
1755*1da177e4SLinus Torvalds 		res = fib6_walk_continue(w);
1756*1da177e4SLinus Torvalds 		read_unlock_bh(&rt6_lock);
1757*1da177e4SLinus Torvalds 	}
1758*1da177e4SLinus Torvalds #if RT6_DEBUG >= 3
1759*1da177e4SLinus Torvalds 	if (res <= 0 && skb->len == 0)
1760*1da177e4SLinus Torvalds 		RT6_TRACE("%p>dump end\n", w);
1761*1da177e4SLinus Torvalds #endif
1762*1da177e4SLinus Torvalds 	res = res < 0 ? res : skb->len;
1763*1da177e4SLinus Torvalds 	/* res < 0 is an error. (really, impossible)
1764*1da177e4SLinus Torvalds 	   res == 0 means that dump is complete, but skb still can contain data.
1765*1da177e4SLinus Torvalds 	   res > 0 dump is not complete, but frame is full.
1766*1da177e4SLinus Torvalds 	 */
1767*1da177e4SLinus Torvalds 	/* Destroy walker, if dump of this table is complete. */
1768*1da177e4SLinus Torvalds 	if (res <= 0)
1769*1da177e4SLinus Torvalds 		fib6_dump_end(cb);
1770*1da177e4SLinus Torvalds 	return res;
1771*1da177e4SLinus Torvalds }
1772*1da177e4SLinus Torvalds 
1773*1da177e4SLinus Torvalds int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1774*1da177e4SLinus Torvalds {
1775*1da177e4SLinus Torvalds 	struct rtattr **rta = arg;
1776*1da177e4SLinus Torvalds 	int iif = 0;
1777*1da177e4SLinus Torvalds 	int err = -ENOBUFS;
1778*1da177e4SLinus Torvalds 	struct sk_buff *skb;
1779*1da177e4SLinus Torvalds 	struct flowi fl;
1780*1da177e4SLinus Torvalds 	struct rt6_info *rt;
1781*1da177e4SLinus Torvalds 
1782*1da177e4SLinus Torvalds 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1783*1da177e4SLinus Torvalds 	if (skb == NULL)
1784*1da177e4SLinus Torvalds 		goto out;
1785*1da177e4SLinus Torvalds 
1786*1da177e4SLinus Torvalds 	/* Reserve room for dummy headers, this skb can pass
1787*1da177e4SLinus Torvalds 	   through good chunk of routing engine.
1788*1da177e4SLinus Torvalds 	 */
1789*1da177e4SLinus Torvalds 	skb->mac.raw = skb->data;
1790*1da177e4SLinus Torvalds 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1791*1da177e4SLinus Torvalds 
1792*1da177e4SLinus Torvalds 	memset(&fl, 0, sizeof(fl));
1793*1da177e4SLinus Torvalds 	if (rta[RTA_SRC-1])
1794*1da177e4SLinus Torvalds 		ipv6_addr_copy(&fl.fl6_src,
1795*1da177e4SLinus Torvalds 			       (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1796*1da177e4SLinus Torvalds 	if (rta[RTA_DST-1])
1797*1da177e4SLinus Torvalds 		ipv6_addr_copy(&fl.fl6_dst,
1798*1da177e4SLinus Torvalds 			       (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1799*1da177e4SLinus Torvalds 
1800*1da177e4SLinus Torvalds 	if (rta[RTA_IIF-1])
1801*1da177e4SLinus Torvalds 		memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1802*1da177e4SLinus Torvalds 
1803*1da177e4SLinus Torvalds 	if (iif) {
1804*1da177e4SLinus Torvalds 		struct net_device *dev;
1805*1da177e4SLinus Torvalds 		dev = __dev_get_by_index(iif);
1806*1da177e4SLinus Torvalds 		if (!dev) {
1807*1da177e4SLinus Torvalds 			err = -ENODEV;
1808*1da177e4SLinus Torvalds 			goto out_free;
1809*1da177e4SLinus Torvalds 		}
1810*1da177e4SLinus Torvalds 	}
1811*1da177e4SLinus Torvalds 
1812*1da177e4SLinus Torvalds 	fl.oif = 0;
1813*1da177e4SLinus Torvalds 	if (rta[RTA_OIF-1])
1814*1da177e4SLinus Torvalds 		memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1815*1da177e4SLinus Torvalds 
1816*1da177e4SLinus Torvalds 	rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1817*1da177e4SLinus Torvalds 
1818*1da177e4SLinus Torvalds 	skb->dst = &rt->u.dst;
1819*1da177e4SLinus Torvalds 
1820*1da177e4SLinus Torvalds 	NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1821*1da177e4SLinus Torvalds 	err = rt6_fill_node(skb, rt,
1822*1da177e4SLinus Torvalds 			    &fl.fl6_dst, &fl.fl6_src,
1823*1da177e4SLinus Torvalds 			    iif,
1824*1da177e4SLinus Torvalds 			    RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
1825*1da177e4SLinus Torvalds 			    nlh->nlmsg_seq, nlh, 0);
1826*1da177e4SLinus Torvalds 	if (err < 0) {
1827*1da177e4SLinus Torvalds 		err = -EMSGSIZE;
1828*1da177e4SLinus Torvalds 		goto out_free;
1829*1da177e4SLinus Torvalds 	}
1830*1da177e4SLinus Torvalds 
1831*1da177e4SLinus Torvalds 	err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1832*1da177e4SLinus Torvalds 	if (err > 0)
1833*1da177e4SLinus Torvalds 		err = 0;
1834*1da177e4SLinus Torvalds out:
1835*1da177e4SLinus Torvalds 	return err;
1836*1da177e4SLinus Torvalds out_free:
1837*1da177e4SLinus Torvalds 	kfree_skb(skb);
1838*1da177e4SLinus Torvalds 	goto out;
1839*1da177e4SLinus Torvalds }
1840*1da177e4SLinus Torvalds 
1841*1da177e4SLinus Torvalds void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh)
1842*1da177e4SLinus Torvalds {
1843*1da177e4SLinus Torvalds 	struct sk_buff *skb;
1844*1da177e4SLinus Torvalds 	int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
1845*1da177e4SLinus Torvalds 
1846*1da177e4SLinus Torvalds 	skb = alloc_skb(size, gfp_any());
1847*1da177e4SLinus Torvalds 	if (!skb) {
1848*1da177e4SLinus Torvalds 		netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, ENOBUFS);
1849*1da177e4SLinus Torvalds 		return;
1850*1da177e4SLinus Torvalds 	}
1851*1da177e4SLinus Torvalds 	if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, 0, 0, nlh, 0) < 0) {
1852*1da177e4SLinus Torvalds 		kfree_skb(skb);
1853*1da177e4SLinus Torvalds 		netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, EINVAL);
1854*1da177e4SLinus Torvalds 		return;
1855*1da177e4SLinus Torvalds 	}
1856*1da177e4SLinus Torvalds 	NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_ROUTE;
1857*1da177e4SLinus Torvalds 	netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_ROUTE, gfp_any());
1858*1da177e4SLinus Torvalds }
1859*1da177e4SLinus Torvalds 
1860*1da177e4SLinus Torvalds /*
1861*1da177e4SLinus Torvalds  *	/proc
1862*1da177e4SLinus Torvalds  */
1863*1da177e4SLinus Torvalds 
1864*1da177e4SLinus Torvalds #ifdef CONFIG_PROC_FS
1865*1da177e4SLinus Torvalds 
1866*1da177e4SLinus Torvalds #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1867*1da177e4SLinus Torvalds 
1868*1da177e4SLinus Torvalds struct rt6_proc_arg
1869*1da177e4SLinus Torvalds {
1870*1da177e4SLinus Torvalds 	char *buffer;
1871*1da177e4SLinus Torvalds 	int offset;
1872*1da177e4SLinus Torvalds 	int length;
1873*1da177e4SLinus Torvalds 	int skip;
1874*1da177e4SLinus Torvalds 	int len;
1875*1da177e4SLinus Torvalds };
1876*1da177e4SLinus Torvalds 
1877*1da177e4SLinus Torvalds static int rt6_info_route(struct rt6_info *rt, void *p_arg)
1878*1da177e4SLinus Torvalds {
1879*1da177e4SLinus Torvalds 	struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1880*1da177e4SLinus Torvalds 	int i;
1881*1da177e4SLinus Torvalds 
1882*1da177e4SLinus Torvalds 	if (arg->skip < arg->offset / RT6_INFO_LEN) {
1883*1da177e4SLinus Torvalds 		arg->skip++;
1884*1da177e4SLinus Torvalds 		return 0;
1885*1da177e4SLinus Torvalds 	}
1886*1da177e4SLinus Torvalds 
1887*1da177e4SLinus Torvalds 	if (arg->len >= arg->length)
1888*1da177e4SLinus Torvalds 		return 0;
1889*1da177e4SLinus Torvalds 
1890*1da177e4SLinus Torvalds 	for (i=0; i<16; i++) {
1891*1da177e4SLinus Torvalds 		sprintf(arg->buffer + arg->len, "%02x",
1892*1da177e4SLinus Torvalds 			rt->rt6i_dst.addr.s6_addr[i]);
1893*1da177e4SLinus Torvalds 		arg->len += 2;
1894*1da177e4SLinus Torvalds 	}
1895*1da177e4SLinus Torvalds 	arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1896*1da177e4SLinus Torvalds 			    rt->rt6i_dst.plen);
1897*1da177e4SLinus Torvalds 
1898*1da177e4SLinus Torvalds #ifdef CONFIG_IPV6_SUBTREES
1899*1da177e4SLinus Torvalds 	for (i=0; i<16; i++) {
1900*1da177e4SLinus Torvalds 		sprintf(arg->buffer + arg->len, "%02x",
1901*1da177e4SLinus Torvalds 			rt->rt6i_src.addr.s6_addr[i]);
1902*1da177e4SLinus Torvalds 		arg->len += 2;
1903*1da177e4SLinus Torvalds 	}
1904*1da177e4SLinus Torvalds 	arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1905*1da177e4SLinus Torvalds 			    rt->rt6i_src.plen);
1906*1da177e4SLinus Torvalds #else
1907*1da177e4SLinus Torvalds 	sprintf(arg->buffer + arg->len,
1908*1da177e4SLinus Torvalds 		"00000000000000000000000000000000 00 ");
1909*1da177e4SLinus Torvalds 	arg->len += 36;
1910*1da177e4SLinus Torvalds #endif
1911*1da177e4SLinus Torvalds 
1912*1da177e4SLinus Torvalds 	if (rt->rt6i_nexthop) {
1913*1da177e4SLinus Torvalds 		for (i=0; i<16; i++) {
1914*1da177e4SLinus Torvalds 			sprintf(arg->buffer + arg->len, "%02x",
1915*1da177e4SLinus Torvalds 				rt->rt6i_nexthop->primary_key[i]);
1916*1da177e4SLinus Torvalds 			arg->len += 2;
1917*1da177e4SLinus Torvalds 		}
1918*1da177e4SLinus Torvalds 	} else {
1919*1da177e4SLinus Torvalds 		sprintf(arg->buffer + arg->len,
1920*1da177e4SLinus Torvalds 			"00000000000000000000000000000000");
1921*1da177e4SLinus Torvalds 		arg->len += 32;
1922*1da177e4SLinus Torvalds 	}
1923*1da177e4SLinus Torvalds 	arg->len += sprintf(arg->buffer + arg->len,
1924*1da177e4SLinus Torvalds 			    " %08x %08x %08x %08x %8s\n",
1925*1da177e4SLinus Torvalds 			    rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1926*1da177e4SLinus Torvalds 			    rt->u.dst.__use, rt->rt6i_flags,
1927*1da177e4SLinus Torvalds 			    rt->rt6i_dev ? rt->rt6i_dev->name : "");
1928*1da177e4SLinus Torvalds 	return 0;
1929*1da177e4SLinus Torvalds }
1930*1da177e4SLinus Torvalds 
1931*1da177e4SLinus Torvalds static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
1932*1da177e4SLinus Torvalds {
1933*1da177e4SLinus Torvalds 	struct rt6_proc_arg arg;
1934*1da177e4SLinus Torvalds 	arg.buffer = buffer;
1935*1da177e4SLinus Torvalds 	arg.offset = offset;
1936*1da177e4SLinus Torvalds 	arg.length = length;
1937*1da177e4SLinus Torvalds 	arg.skip = 0;
1938*1da177e4SLinus Torvalds 	arg.len = 0;
1939*1da177e4SLinus Torvalds 
1940*1da177e4SLinus Torvalds 	read_lock_bh(&rt6_lock);
1941*1da177e4SLinus Torvalds 	fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
1942*1da177e4SLinus Torvalds 	read_unlock_bh(&rt6_lock);
1943*1da177e4SLinus Torvalds 
1944*1da177e4SLinus Torvalds 	*start = buffer;
1945*1da177e4SLinus Torvalds 	if (offset)
1946*1da177e4SLinus Torvalds 		*start += offset % RT6_INFO_LEN;
1947*1da177e4SLinus Torvalds 
1948*1da177e4SLinus Torvalds 	arg.len -= offset % RT6_INFO_LEN;
1949*1da177e4SLinus Torvalds 
1950*1da177e4SLinus Torvalds 	if (arg.len > length)
1951*1da177e4SLinus Torvalds 		arg.len = length;
1952*1da177e4SLinus Torvalds 	if (arg.len < 0)
1953*1da177e4SLinus Torvalds 		arg.len = 0;
1954*1da177e4SLinus Torvalds 
1955*1da177e4SLinus Torvalds 	return arg.len;
1956*1da177e4SLinus Torvalds }
1957*1da177e4SLinus Torvalds 
1958*1da177e4SLinus Torvalds extern struct rt6_statistics rt6_stats;
1959*1da177e4SLinus Torvalds 
1960*1da177e4SLinus Torvalds static int rt6_stats_seq_show(struct seq_file *seq, void *v)
1961*1da177e4SLinus Torvalds {
1962*1da177e4SLinus Torvalds 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
1963*1da177e4SLinus Torvalds 		      rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
1964*1da177e4SLinus Torvalds 		      rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
1965*1da177e4SLinus Torvalds 		      rt6_stats.fib_rt_cache,
1966*1da177e4SLinus Torvalds 		      atomic_read(&ip6_dst_ops.entries),
1967*1da177e4SLinus Torvalds 		      rt6_stats.fib_discarded_routes);
1968*1da177e4SLinus Torvalds 
1969*1da177e4SLinus Torvalds 	return 0;
1970*1da177e4SLinus Torvalds }
1971*1da177e4SLinus Torvalds 
1972*1da177e4SLinus Torvalds static int rt6_stats_seq_open(struct inode *inode, struct file *file)
1973*1da177e4SLinus Torvalds {
1974*1da177e4SLinus Torvalds 	return single_open(file, rt6_stats_seq_show, NULL);
1975*1da177e4SLinus Torvalds }
1976*1da177e4SLinus Torvalds 
1977*1da177e4SLinus Torvalds static struct file_operations rt6_stats_seq_fops = {
1978*1da177e4SLinus Torvalds 	.owner	 = THIS_MODULE,
1979*1da177e4SLinus Torvalds 	.open	 = rt6_stats_seq_open,
1980*1da177e4SLinus Torvalds 	.read	 = seq_read,
1981*1da177e4SLinus Torvalds 	.llseek	 = seq_lseek,
1982*1da177e4SLinus Torvalds 	.release = single_release,
1983*1da177e4SLinus Torvalds };
1984*1da177e4SLinus Torvalds #endif	/* CONFIG_PROC_FS */
1985*1da177e4SLinus Torvalds 
1986*1da177e4SLinus Torvalds #ifdef CONFIG_SYSCTL
1987*1da177e4SLinus Torvalds 
1988*1da177e4SLinus Torvalds static int flush_delay;
1989*1da177e4SLinus Torvalds 
1990*1da177e4SLinus Torvalds static
1991*1da177e4SLinus Torvalds int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
1992*1da177e4SLinus Torvalds 			      void __user *buffer, size_t *lenp, loff_t *ppos)
1993*1da177e4SLinus Torvalds {
1994*1da177e4SLinus Torvalds 	if (write) {
1995*1da177e4SLinus Torvalds 		proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1996*1da177e4SLinus Torvalds 		fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
1997*1da177e4SLinus Torvalds 		return 0;
1998*1da177e4SLinus Torvalds 	} else
1999*1da177e4SLinus Torvalds 		return -EINVAL;
2000*1da177e4SLinus Torvalds }
2001*1da177e4SLinus Torvalds 
2002*1da177e4SLinus Torvalds ctl_table ipv6_route_table[] = {
2003*1da177e4SLinus Torvalds         {
2004*1da177e4SLinus Torvalds 		.ctl_name	=	NET_IPV6_ROUTE_FLUSH,
2005*1da177e4SLinus Torvalds 		.procname	=	"flush",
2006*1da177e4SLinus Torvalds          	.data		=	&flush_delay,
2007*1da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
2008*1da177e4SLinus Torvalds 		.mode		=	0644,
2009*1da177e4SLinus Torvalds          	.proc_handler	=	&ipv6_sysctl_rtcache_flush
2010*1da177e4SLinus Torvalds 	},
2011*1da177e4SLinus Torvalds 	{
2012*1da177e4SLinus Torvalds 		.ctl_name	=	NET_IPV6_ROUTE_GC_THRESH,
2013*1da177e4SLinus Torvalds 		.procname	=	"gc_thresh",
2014*1da177e4SLinus Torvalds          	.data		=	&ip6_dst_ops.gc_thresh,
2015*1da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
2016*1da177e4SLinus Torvalds 		.mode		=	0644,
2017*1da177e4SLinus Torvalds          	.proc_handler	=	&proc_dointvec,
2018*1da177e4SLinus Torvalds 	},
2019*1da177e4SLinus Torvalds 	{
2020*1da177e4SLinus Torvalds 		.ctl_name	=	NET_IPV6_ROUTE_MAX_SIZE,
2021*1da177e4SLinus Torvalds 		.procname	=	"max_size",
2022*1da177e4SLinus Torvalds          	.data		=	&ip6_rt_max_size,
2023*1da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
2024*1da177e4SLinus Torvalds 		.mode		=	0644,
2025*1da177e4SLinus Torvalds          	.proc_handler	=	&proc_dointvec,
2026*1da177e4SLinus Torvalds 	},
2027*1da177e4SLinus Torvalds 	{
2028*1da177e4SLinus Torvalds 		.ctl_name	=	NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2029*1da177e4SLinus Torvalds 		.procname	=	"gc_min_interval",
2030*1da177e4SLinus Torvalds          	.data		=	&ip6_rt_gc_min_interval,
2031*1da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
2032*1da177e4SLinus Torvalds 		.mode		=	0644,
2033*1da177e4SLinus Torvalds          	.proc_handler	=	&proc_dointvec_jiffies,
2034*1da177e4SLinus Torvalds 		.strategy	=	&sysctl_jiffies,
2035*1da177e4SLinus Torvalds 	},
2036*1da177e4SLinus Torvalds 	{
2037*1da177e4SLinus Torvalds 		.ctl_name	=	NET_IPV6_ROUTE_GC_TIMEOUT,
2038*1da177e4SLinus Torvalds 		.procname	=	"gc_timeout",
2039*1da177e4SLinus Torvalds          	.data		=	&ip6_rt_gc_timeout,
2040*1da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
2041*1da177e4SLinus Torvalds 		.mode		=	0644,
2042*1da177e4SLinus Torvalds          	.proc_handler	=	&proc_dointvec_jiffies,
2043*1da177e4SLinus Torvalds 		.strategy	=	&sysctl_jiffies,
2044*1da177e4SLinus Torvalds 	},
2045*1da177e4SLinus Torvalds 	{
2046*1da177e4SLinus Torvalds 		.ctl_name	=	NET_IPV6_ROUTE_GC_INTERVAL,
2047*1da177e4SLinus Torvalds 		.procname	=	"gc_interval",
2048*1da177e4SLinus Torvalds          	.data		=	&ip6_rt_gc_interval,
2049*1da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
2050*1da177e4SLinus Torvalds 		.mode		=	0644,
2051*1da177e4SLinus Torvalds          	.proc_handler	=	&proc_dointvec_jiffies,
2052*1da177e4SLinus Torvalds 		.strategy	=	&sysctl_jiffies,
2053*1da177e4SLinus Torvalds 	},
2054*1da177e4SLinus Torvalds 	{
2055*1da177e4SLinus Torvalds 		.ctl_name	=	NET_IPV6_ROUTE_GC_ELASTICITY,
2056*1da177e4SLinus Torvalds 		.procname	=	"gc_elasticity",
2057*1da177e4SLinus Torvalds          	.data		=	&ip6_rt_gc_elasticity,
2058*1da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
2059*1da177e4SLinus Torvalds 		.mode		=	0644,
2060*1da177e4SLinus Torvalds          	.proc_handler	=	&proc_dointvec_jiffies,
2061*1da177e4SLinus Torvalds 		.strategy	=	&sysctl_jiffies,
2062*1da177e4SLinus Torvalds 	},
2063*1da177e4SLinus Torvalds 	{
2064*1da177e4SLinus Torvalds 		.ctl_name	=	NET_IPV6_ROUTE_MTU_EXPIRES,
2065*1da177e4SLinus Torvalds 		.procname	=	"mtu_expires",
2066*1da177e4SLinus Torvalds          	.data		=	&ip6_rt_mtu_expires,
2067*1da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
2068*1da177e4SLinus Torvalds 		.mode		=	0644,
2069*1da177e4SLinus Torvalds          	.proc_handler	=	&proc_dointvec_jiffies,
2070*1da177e4SLinus Torvalds 		.strategy	=	&sysctl_jiffies,
2071*1da177e4SLinus Torvalds 	},
2072*1da177e4SLinus Torvalds 	{
2073*1da177e4SLinus Torvalds 		.ctl_name	=	NET_IPV6_ROUTE_MIN_ADVMSS,
2074*1da177e4SLinus Torvalds 		.procname	=	"min_adv_mss",
2075*1da177e4SLinus Torvalds          	.data		=	&ip6_rt_min_advmss,
2076*1da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
2077*1da177e4SLinus Torvalds 		.mode		=	0644,
2078*1da177e4SLinus Torvalds          	.proc_handler	=	&proc_dointvec_jiffies,
2079*1da177e4SLinus Torvalds 		.strategy	=	&sysctl_jiffies,
2080*1da177e4SLinus Torvalds 	},
2081*1da177e4SLinus Torvalds 	{
2082*1da177e4SLinus Torvalds 		.ctl_name	=	NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2083*1da177e4SLinus Torvalds 		.procname	=	"gc_min_interval_ms",
2084*1da177e4SLinus Torvalds          	.data		=	&ip6_rt_gc_min_interval,
2085*1da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
2086*1da177e4SLinus Torvalds 		.mode		=	0644,
2087*1da177e4SLinus Torvalds          	.proc_handler	=	&proc_dointvec_ms_jiffies,
2088*1da177e4SLinus Torvalds 		.strategy	=	&sysctl_ms_jiffies,
2089*1da177e4SLinus Torvalds 	},
2090*1da177e4SLinus Torvalds 	{ .ctl_name = 0 }
2091*1da177e4SLinus Torvalds };
2092*1da177e4SLinus Torvalds 
2093*1da177e4SLinus Torvalds #endif
2094*1da177e4SLinus Torvalds 
2095*1da177e4SLinus Torvalds void __init ip6_route_init(void)
2096*1da177e4SLinus Torvalds {
2097*1da177e4SLinus Torvalds 	struct proc_dir_entry *p;
2098*1da177e4SLinus Torvalds 
2099*1da177e4SLinus Torvalds 	ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2100*1da177e4SLinus Torvalds 						     sizeof(struct rt6_info),
2101*1da177e4SLinus Torvalds 						     0, SLAB_HWCACHE_ALIGN,
2102*1da177e4SLinus Torvalds 						     NULL, NULL);
2103*1da177e4SLinus Torvalds 	if (!ip6_dst_ops.kmem_cachep)
2104*1da177e4SLinus Torvalds 		panic("cannot create ip6_dst_cache");
2105*1da177e4SLinus Torvalds 
2106*1da177e4SLinus Torvalds 	fib6_init();
2107*1da177e4SLinus Torvalds #ifdef 	CONFIG_PROC_FS
2108*1da177e4SLinus Torvalds 	p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2109*1da177e4SLinus Torvalds 	if (p)
2110*1da177e4SLinus Torvalds 		p->owner = THIS_MODULE;
2111*1da177e4SLinus Torvalds 
2112*1da177e4SLinus Torvalds 	proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2113*1da177e4SLinus Torvalds #endif
2114*1da177e4SLinus Torvalds #ifdef CONFIG_XFRM
2115*1da177e4SLinus Torvalds 	xfrm6_init();
2116*1da177e4SLinus Torvalds #endif
2117*1da177e4SLinus Torvalds }
2118*1da177e4SLinus Torvalds 
2119*1da177e4SLinus Torvalds void ip6_route_cleanup(void)
2120*1da177e4SLinus Torvalds {
2121*1da177e4SLinus Torvalds #ifdef CONFIG_PROC_FS
2122*1da177e4SLinus Torvalds 	proc_net_remove("ipv6_route");
2123*1da177e4SLinus Torvalds 	proc_net_remove("rt6_stats");
2124*1da177e4SLinus Torvalds #endif
2125*1da177e4SLinus Torvalds #ifdef CONFIG_XFRM
2126*1da177e4SLinus Torvalds 	xfrm6_fini();
2127*1da177e4SLinus Torvalds #endif
2128*1da177e4SLinus Torvalds 	rt6_ifdown(NULL);
2129*1da177e4SLinus Torvalds 	fib6_gc_cleanup();
2130*1da177e4SLinus Torvalds 	kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2131*1da177e4SLinus Torvalds }
2132