xref: /openbmc/linux/net/ipv6/route.c (revision 1da177e4)
1 /*
2  *	Linux INET6 implementation
3  *	FIB front-end.
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	$Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9  *
10  *	This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  */
15 
16 /*	Changes:
17  *
18  *	YOSHIFUJI Hideaki @USAGI
19  *		reworked default router selection.
20  *		- respect outgoing interface
21  *		- select from (probably) reachable routers (i.e.
22  *		routers in REACHABLE, STALE, DELAY or PROBE states).
23  *		- always select the same router if it is (probably)
24  *		reachable.  otherwise, round-robin the list.
25  */
26 
27 #include <linux/config.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/times.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/route.h>
35 #include <linux/netdevice.h>
36 #include <linux/in6.h>
37 #include <linux/init.h>
38 #include <linux/netlink.h>
39 #include <linux/if_arp.h>
40 
41 #ifdef 	CONFIG_PROC_FS
42 #include <linux/proc_fs.h>
43 #include <linux/seq_file.h>
44 #endif
45 
46 #include <net/snmp.h>
47 #include <net/ipv6.h>
48 #include <net/ip6_fib.h>
49 #include <net/ip6_route.h>
50 #include <net/ndisc.h>
51 #include <net/addrconf.h>
52 #include <net/tcp.h>
53 #include <linux/rtnetlink.h>
54 #include <net/dst.h>
55 #include <net/xfrm.h>
56 
57 #include <asm/uaccess.h>
58 
59 #ifdef CONFIG_SYSCTL
60 #include <linux/sysctl.h>
61 #endif
62 
63 /* Set to 3 to get tracing. */
64 #define RT6_DEBUG 2
65 
66 #if RT6_DEBUG >= 3
67 #define RDBG(x) printk x
68 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
69 #else
70 #define RDBG(x)
71 #define RT6_TRACE(x...) do { ; } while (0)
72 #endif
73 
74 
75 static int ip6_rt_max_size = 4096;
76 static int ip6_rt_gc_min_interval = HZ / 2;
77 static int ip6_rt_gc_timeout = 60*HZ;
78 int ip6_rt_gc_interval = 30*HZ;
79 static int ip6_rt_gc_elasticity = 9;
80 static int ip6_rt_mtu_expires = 10*60*HZ;
81 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
82 
83 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
84 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
85 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
86 static void		ip6_dst_destroy(struct dst_entry *);
87 static void		ip6_dst_ifdown(struct dst_entry *,
88 				       struct net_device *dev, int how);
89 static int		 ip6_dst_gc(void);
90 
91 static int		ip6_pkt_discard(struct sk_buff *skb);
92 static int		ip6_pkt_discard_out(struct sk_buff *skb);
93 static void		ip6_link_failure(struct sk_buff *skb);
94 static void		ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
95 
96 static struct dst_ops ip6_dst_ops = {
97 	.family			=	AF_INET6,
98 	.protocol		=	__constant_htons(ETH_P_IPV6),
99 	.gc			=	ip6_dst_gc,
100 	.gc_thresh		=	1024,
101 	.check			=	ip6_dst_check,
102 	.destroy		=	ip6_dst_destroy,
103 	.ifdown			=	ip6_dst_ifdown,
104 	.negative_advice	=	ip6_negative_advice,
105 	.link_failure		=	ip6_link_failure,
106 	.update_pmtu		=	ip6_rt_update_pmtu,
107 	.entry_size		=	sizeof(struct rt6_info),
108 };
109 
110 struct rt6_info ip6_null_entry = {
111 	.u = {
112 		.dst = {
113 			.__refcnt	= ATOMIC_INIT(1),
114 			.__use		= 1,
115 			.dev		= &loopback_dev,
116 			.obsolete	= -1,
117 			.error		= -ENETUNREACH,
118 			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
119 			.input		= ip6_pkt_discard,
120 			.output		= ip6_pkt_discard_out,
121 			.ops		= &ip6_dst_ops,
122 			.path		= (struct dst_entry*)&ip6_null_entry,
123 		}
124 	},
125 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
126 	.rt6i_metric	= ~(u32) 0,
127 	.rt6i_ref	= ATOMIC_INIT(1),
128 };
129 
130 struct fib6_node ip6_routing_table = {
131 	.leaf		= &ip6_null_entry,
132 	.fn_flags	= RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
133 };
134 
135 /* Protects all the ip6 fib */
136 
137 DEFINE_RWLOCK(rt6_lock);
138 
139 
140 /* allocate dst with ip6_dst_ops */
141 static __inline__ struct rt6_info *ip6_dst_alloc(void)
142 {
143 	return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
144 }
145 
146 static void ip6_dst_destroy(struct dst_entry *dst)
147 {
148 	struct rt6_info *rt = (struct rt6_info *)dst;
149 	struct inet6_dev *idev = rt->rt6i_idev;
150 
151 	if (idev != NULL) {
152 		rt->rt6i_idev = NULL;
153 		in6_dev_put(idev);
154 	}
155 }
156 
157 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
158 			   int how)
159 {
160 	struct rt6_info *rt = (struct rt6_info *)dst;
161 	struct inet6_dev *idev = rt->rt6i_idev;
162 
163 	if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
164 		struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
165 		if (loopback_idev != NULL) {
166 			rt->rt6i_idev = loopback_idev;
167 			in6_dev_put(idev);
168 		}
169 	}
170 }
171 
172 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
173 {
174 	return (rt->rt6i_flags & RTF_EXPIRES &&
175 		time_after(jiffies, rt->rt6i_expires));
176 }
177 
178 /*
179  *	Route lookup. Any rt6_lock is implied.
180  */
181 
182 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
183 						    int oif,
184 						    int strict)
185 {
186 	struct rt6_info *local = NULL;
187 	struct rt6_info *sprt;
188 
189 	if (oif) {
190 		for (sprt = rt; sprt; sprt = sprt->u.next) {
191 			struct net_device *dev = sprt->rt6i_dev;
192 			if (dev->ifindex == oif)
193 				return sprt;
194 			if (dev->flags & IFF_LOOPBACK) {
195 				if (sprt->rt6i_idev == NULL ||
196 				    sprt->rt6i_idev->dev->ifindex != oif) {
197 					if (strict && oif)
198 						continue;
199 					if (local && (!oif ||
200 						      local->rt6i_idev->dev->ifindex == oif))
201 						continue;
202 				}
203 				local = sprt;
204 			}
205 		}
206 
207 		if (local)
208 			return local;
209 
210 		if (strict)
211 			return &ip6_null_entry;
212 	}
213 	return rt;
214 }
215 
216 /*
217  *	pointer to the last default router chosen. BH is disabled locally.
218  */
219 static struct rt6_info *rt6_dflt_pointer;
220 static DEFINE_SPINLOCK(rt6_dflt_lock);
221 
222 void rt6_reset_dflt_pointer(struct rt6_info *rt)
223 {
224 	spin_lock_bh(&rt6_dflt_lock);
225 	if (rt == NULL || rt == rt6_dflt_pointer) {
226 		RT6_TRACE("reset default router: %p->NULL\n", rt6_dflt_pointer);
227 		rt6_dflt_pointer = NULL;
228 	}
229 	spin_unlock_bh(&rt6_dflt_lock);
230 }
231 
232 /* Default Router Selection (RFC 2461 6.3.6) */
233 static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif)
234 {
235 	struct rt6_info *match = NULL;
236 	struct rt6_info *sprt;
237 	int mpri = 0;
238 
239 	for (sprt = rt; sprt; sprt = sprt->u.next) {
240 		struct neighbour *neigh;
241 		int m = 0;
242 
243 		if (!oif ||
244 		    (sprt->rt6i_dev &&
245 		     sprt->rt6i_dev->ifindex == oif))
246 			m += 8;
247 
248 		if (rt6_check_expired(sprt))
249 			continue;
250 
251 		if (sprt == rt6_dflt_pointer)
252 			m += 4;
253 
254 		if ((neigh = sprt->rt6i_nexthop) != NULL) {
255 			read_lock_bh(&neigh->lock);
256 			switch (neigh->nud_state) {
257 			case NUD_REACHABLE:
258 				m += 3;
259 				break;
260 
261 			case NUD_STALE:
262 			case NUD_DELAY:
263 			case NUD_PROBE:
264 				m += 2;
265 				break;
266 
267 			case NUD_NOARP:
268 			case NUD_PERMANENT:
269 				m += 1;
270 				break;
271 
272 			case NUD_INCOMPLETE:
273 			default:
274 				read_unlock_bh(&neigh->lock);
275 				continue;
276 			}
277 			read_unlock_bh(&neigh->lock);
278 		} else {
279 			continue;
280 		}
281 
282 		if (m > mpri || m >= 12) {
283 			match = sprt;
284 			mpri = m;
285 			if (m >= 12) {
286 				/* we choose the last default router if it
287 				 * is in (probably) reachable state.
288 				 * If route changed, we should do pmtu
289 				 * discovery. --yoshfuji
290 				 */
291 				break;
292 			}
293 		}
294 	}
295 
296 	spin_lock(&rt6_dflt_lock);
297 	if (!match) {
298 		/*
299 		 *	No default routers are known to be reachable.
300 		 *	SHOULD round robin
301 		 */
302 		if (rt6_dflt_pointer) {
303 			for (sprt = rt6_dflt_pointer->u.next;
304 			     sprt; sprt = sprt->u.next) {
305 				if (sprt->u.dst.obsolete <= 0 &&
306 				    sprt->u.dst.error == 0 &&
307 				    !rt6_check_expired(sprt)) {
308 					match = sprt;
309 					break;
310 				}
311 			}
312 			for (sprt = rt;
313 			     !match && sprt;
314 			     sprt = sprt->u.next) {
315 				if (sprt->u.dst.obsolete <= 0 &&
316 				    sprt->u.dst.error == 0 &&
317 				    !rt6_check_expired(sprt)) {
318 					match = sprt;
319 					break;
320 				}
321 				if (sprt == rt6_dflt_pointer)
322 					break;
323 			}
324 		}
325 	}
326 
327 	if (match) {
328 		if (rt6_dflt_pointer != match)
329 			RT6_TRACE("changed default router: %p->%p\n",
330 				  rt6_dflt_pointer, match);
331 		rt6_dflt_pointer = match;
332 	}
333 	spin_unlock(&rt6_dflt_lock);
334 
335 	if (!match) {
336 		/*
337 		 * Last Resort: if no default routers found,
338 		 * use addrconf default route.
339 		 * We don't record this route.
340 		 */
341 		for (sprt = ip6_routing_table.leaf;
342 		     sprt; sprt = sprt->u.next) {
343 			if (!rt6_check_expired(sprt) &&
344 			    (sprt->rt6i_flags & RTF_DEFAULT) &&
345 			    (!oif ||
346 			     (sprt->rt6i_dev &&
347 			      sprt->rt6i_dev->ifindex == oif))) {
348 				match = sprt;
349 				break;
350 			}
351 		}
352 		if (!match) {
353 			/* no default route.  give up. */
354 			match = &ip6_null_entry;
355 		}
356 	}
357 
358 	return match;
359 }
360 
361 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
362 			    int oif, int strict)
363 {
364 	struct fib6_node *fn;
365 	struct rt6_info *rt;
366 
367 	read_lock_bh(&rt6_lock);
368 	fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
369 	rt = rt6_device_match(fn->leaf, oif, strict);
370 	dst_hold(&rt->u.dst);
371 	rt->u.dst.__use++;
372 	read_unlock_bh(&rt6_lock);
373 
374 	rt->u.dst.lastuse = jiffies;
375 	if (rt->u.dst.error == 0)
376 		return rt;
377 	dst_release(&rt->u.dst);
378 	return NULL;
379 }
380 
381 /* ip6_ins_rt is called with FREE rt6_lock.
382    It takes new route entry, the addition fails by any reason the
383    route is freed. In any case, if caller does not hold it, it may
384    be destroyed.
385  */
386 
387 int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
388 {
389 	int err;
390 
391 	write_lock_bh(&rt6_lock);
392 	err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr);
393 	write_unlock_bh(&rt6_lock);
394 
395 	return err;
396 }
397 
398 /* No rt6_lock! If COW failed, the function returns dead route entry
399    with dst->error set to errno value.
400  */
401 
402 static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
403 				struct in6_addr *saddr)
404 {
405 	int err;
406 	struct rt6_info *rt;
407 
408 	/*
409 	 *	Clone the route.
410 	 */
411 
412 	rt = ip6_rt_copy(ort);
413 
414 	if (rt) {
415 		ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
416 
417 		if (!(rt->rt6i_flags&RTF_GATEWAY))
418 			ipv6_addr_copy(&rt->rt6i_gateway, daddr);
419 
420 		rt->rt6i_dst.plen = 128;
421 		rt->rt6i_flags |= RTF_CACHE;
422 		rt->u.dst.flags |= DST_HOST;
423 
424 #ifdef CONFIG_IPV6_SUBTREES
425 		if (rt->rt6i_src.plen && saddr) {
426 			ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
427 			rt->rt6i_src.plen = 128;
428 		}
429 #endif
430 
431 		rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
432 
433 		dst_hold(&rt->u.dst);
434 
435 		err = ip6_ins_rt(rt, NULL, NULL);
436 		if (err == 0)
437 			return rt;
438 
439 		rt->u.dst.error = err;
440 
441 		return rt;
442 	}
443 	dst_hold(&ip6_null_entry.u.dst);
444 	return &ip6_null_entry;
445 }
446 
447 #define BACKTRACK() \
448 if (rt == &ip6_null_entry && strict) { \
449        while ((fn = fn->parent) != NULL) { \
450 		if (fn->fn_flags & RTN_ROOT) { \
451 			dst_hold(&rt->u.dst); \
452 			goto out; \
453 		} \
454 		if (fn->fn_flags & RTN_RTINFO) \
455 			goto restart; \
456 	} \
457 }
458 
459 
460 void ip6_route_input(struct sk_buff *skb)
461 {
462 	struct fib6_node *fn;
463 	struct rt6_info *rt;
464 	int strict;
465 	int attempts = 3;
466 
467 	strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
468 
469 relookup:
470 	read_lock_bh(&rt6_lock);
471 
472 	fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
473 			 &skb->nh.ipv6h->saddr);
474 
475 restart:
476 	rt = fn->leaf;
477 
478 	if ((rt->rt6i_flags & RTF_CACHE)) {
479 		rt = rt6_device_match(rt, skb->dev->ifindex, strict);
480 		BACKTRACK();
481 		dst_hold(&rt->u.dst);
482 		goto out;
483 	}
484 
485 	rt = rt6_device_match(rt, skb->dev->ifindex, 0);
486 	BACKTRACK();
487 
488 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
489 		struct rt6_info *nrt;
490 		dst_hold(&rt->u.dst);
491 		read_unlock_bh(&rt6_lock);
492 
493 		nrt = rt6_cow(rt, &skb->nh.ipv6h->daddr,
494 			      &skb->nh.ipv6h->saddr);
495 
496 		dst_release(&rt->u.dst);
497 		rt = nrt;
498 
499 		if (rt->u.dst.error != -EEXIST || --attempts <= 0)
500 			goto out2;
501 
502 		/* Race condition! In the gap, when rt6_lock was
503 		   released someone could insert this route.  Relookup.
504 		*/
505 		dst_release(&rt->u.dst);
506 		goto relookup;
507 	}
508 	dst_hold(&rt->u.dst);
509 
510 out:
511 	read_unlock_bh(&rt6_lock);
512 out2:
513 	rt->u.dst.lastuse = jiffies;
514 	rt->u.dst.__use++;
515 	skb->dst = (struct dst_entry *) rt;
516 }
517 
518 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
519 {
520 	struct fib6_node *fn;
521 	struct rt6_info *rt;
522 	int strict;
523 	int attempts = 3;
524 
525 	strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
526 
527 relookup:
528 	read_lock_bh(&rt6_lock);
529 
530 	fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
531 
532 restart:
533 	rt = fn->leaf;
534 
535 	if ((rt->rt6i_flags & RTF_CACHE)) {
536 		rt = rt6_device_match(rt, fl->oif, strict);
537 		BACKTRACK();
538 		dst_hold(&rt->u.dst);
539 		goto out;
540 	}
541 	if (rt->rt6i_flags & RTF_DEFAULT) {
542 		if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF)
543 			rt = rt6_best_dflt(rt, fl->oif);
544 	} else {
545 		rt = rt6_device_match(rt, fl->oif, strict);
546 		BACKTRACK();
547 	}
548 
549 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
550 		struct rt6_info *nrt;
551 		dst_hold(&rt->u.dst);
552 		read_unlock_bh(&rt6_lock);
553 
554 		nrt = rt6_cow(rt, &fl->fl6_dst, &fl->fl6_src);
555 
556 		dst_release(&rt->u.dst);
557 		rt = nrt;
558 
559 		if (rt->u.dst.error != -EEXIST || --attempts <= 0)
560 			goto out2;
561 
562 		/* Race condition! In the gap, when rt6_lock was
563 		   released someone could insert this route.  Relookup.
564 		*/
565 		dst_release(&rt->u.dst);
566 		goto relookup;
567 	}
568 	dst_hold(&rt->u.dst);
569 
570 out:
571 	read_unlock_bh(&rt6_lock);
572 out2:
573 	rt->u.dst.lastuse = jiffies;
574 	rt->u.dst.__use++;
575 	return &rt->u.dst;
576 }
577 
578 
579 /*
580  *	Destination cache support functions
581  */
582 
583 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
584 {
585 	struct rt6_info *rt;
586 
587 	rt = (struct rt6_info *) dst;
588 
589 	if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
590 		return dst;
591 
592 	return NULL;
593 }
594 
595 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
596 {
597 	struct rt6_info *rt = (struct rt6_info *) dst;
598 
599 	if (rt) {
600 		if (rt->rt6i_flags & RTF_CACHE)
601 			ip6_del_rt(rt, NULL, NULL);
602 		else
603 			dst_release(dst);
604 	}
605 	return NULL;
606 }
607 
608 static void ip6_link_failure(struct sk_buff *skb)
609 {
610 	struct rt6_info *rt;
611 
612 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
613 
614 	rt = (struct rt6_info *) skb->dst;
615 	if (rt) {
616 		if (rt->rt6i_flags&RTF_CACHE) {
617 			dst_set_expires(&rt->u.dst, 0);
618 			rt->rt6i_flags |= RTF_EXPIRES;
619 		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
620 			rt->rt6i_node->fn_sernum = -1;
621 	}
622 }
623 
624 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
625 {
626 	struct rt6_info *rt6 = (struct rt6_info*)dst;
627 
628 	if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
629 		rt6->rt6i_flags |= RTF_MODIFIED;
630 		if (mtu < IPV6_MIN_MTU) {
631 			mtu = IPV6_MIN_MTU;
632 			dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
633 		}
634 		dst->metrics[RTAX_MTU-1] = mtu;
635 	}
636 }
637 
638 /* Protected by rt6_lock.  */
639 static struct dst_entry *ndisc_dst_gc_list;
640 static int ipv6_get_mtu(struct net_device *dev);
641 
642 static inline unsigned int ipv6_advmss(unsigned int mtu)
643 {
644 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
645 
646 	if (mtu < ip6_rt_min_advmss)
647 		mtu = ip6_rt_min_advmss;
648 
649 	/*
650 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
651 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
652 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
653 	 * rely only on pmtu discovery"
654 	 */
655 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
656 		mtu = IPV6_MAXPLEN;
657 	return mtu;
658 }
659 
660 struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
661 				  struct neighbour *neigh,
662 				  struct in6_addr *addr,
663 				  int (*output)(struct sk_buff *))
664 {
665 	struct rt6_info *rt;
666 	struct inet6_dev *idev = in6_dev_get(dev);
667 
668 	if (unlikely(idev == NULL))
669 		return NULL;
670 
671 	rt = ip6_dst_alloc();
672 	if (unlikely(rt == NULL)) {
673 		in6_dev_put(idev);
674 		goto out;
675 	}
676 
677 	dev_hold(dev);
678 	if (neigh)
679 		neigh_hold(neigh);
680 	else
681 		neigh = ndisc_get_neigh(dev, addr);
682 
683 	rt->rt6i_dev	  = dev;
684 	rt->rt6i_idev     = idev;
685 	rt->rt6i_nexthop  = neigh;
686 	atomic_set(&rt->u.dst.__refcnt, 1);
687 	rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
688 	rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
689 	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
690 	rt->u.dst.output  = output;
691 
692 #if 0	/* there's no chance to use these for ndisc */
693 	rt->u.dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
694 				? DST_HOST
695 				: 0;
696 	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
697 	rt->rt6i_dst.plen = 128;
698 #endif
699 
700 	write_lock_bh(&rt6_lock);
701 	rt->u.dst.next = ndisc_dst_gc_list;
702 	ndisc_dst_gc_list = &rt->u.dst;
703 	write_unlock_bh(&rt6_lock);
704 
705 	fib6_force_start_gc();
706 
707 out:
708 	return (struct dst_entry *)rt;
709 }
710 
711 int ndisc_dst_gc(int *more)
712 {
713 	struct dst_entry *dst, *next, **pprev;
714 	int freed;
715 
716 	next = NULL;
717 	pprev = &ndisc_dst_gc_list;
718 	freed = 0;
719 	while ((dst = *pprev) != NULL) {
720 		if (!atomic_read(&dst->__refcnt)) {
721 			*pprev = dst->next;
722 			dst_free(dst);
723 			freed++;
724 		} else {
725 			pprev = &dst->next;
726 			(*more)++;
727 		}
728 	}
729 
730 	return freed;
731 }
732 
733 static int ip6_dst_gc(void)
734 {
735 	static unsigned expire = 30*HZ;
736 	static unsigned long last_gc;
737 	unsigned long now = jiffies;
738 
739 	if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
740 	    atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
741 		goto out;
742 
743 	expire++;
744 	fib6_run_gc(expire);
745 	last_gc = now;
746 	if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
747 		expire = ip6_rt_gc_timeout>>1;
748 
749 out:
750 	expire -= expire>>ip6_rt_gc_elasticity;
751 	return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
752 }
753 
754 /* Clean host part of a prefix. Not necessary in radix tree,
755    but results in cleaner routing tables.
756 
757    Remove it only when all the things will work!
758  */
759 
760 static int ipv6_get_mtu(struct net_device *dev)
761 {
762 	int mtu = IPV6_MIN_MTU;
763 	struct inet6_dev *idev;
764 
765 	idev = in6_dev_get(dev);
766 	if (idev) {
767 		mtu = idev->cnf.mtu6;
768 		in6_dev_put(idev);
769 	}
770 	return mtu;
771 }
772 
773 int ipv6_get_hoplimit(struct net_device *dev)
774 {
775 	int hoplimit = ipv6_devconf.hop_limit;
776 	struct inet6_dev *idev;
777 
778 	idev = in6_dev_get(dev);
779 	if (idev) {
780 		hoplimit = idev->cnf.hop_limit;
781 		in6_dev_put(idev);
782 	}
783 	return hoplimit;
784 }
785 
786 /*
787  *
788  */
789 
790 int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr)
791 {
792 	int err;
793 	struct rtmsg *r;
794 	struct rtattr **rta;
795 	struct rt6_info *rt = NULL;
796 	struct net_device *dev = NULL;
797 	struct inet6_dev *idev = NULL;
798 	int addr_type;
799 
800 	rta = (struct rtattr **) _rtattr;
801 
802 	if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
803 		return -EINVAL;
804 #ifndef CONFIG_IPV6_SUBTREES
805 	if (rtmsg->rtmsg_src_len)
806 		return -EINVAL;
807 #endif
808 	if (rtmsg->rtmsg_ifindex) {
809 		err = -ENODEV;
810 		dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
811 		if (!dev)
812 			goto out;
813 		idev = in6_dev_get(dev);
814 		if (!idev)
815 			goto out;
816 	}
817 
818 	if (rtmsg->rtmsg_metric == 0)
819 		rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
820 
821 	rt = ip6_dst_alloc();
822 
823 	if (rt == NULL) {
824 		err = -ENOMEM;
825 		goto out;
826 	}
827 
828 	rt->u.dst.obsolete = -1;
829 	rt->rt6i_expires = clock_t_to_jiffies(rtmsg->rtmsg_info);
830 	if (nlh && (r = NLMSG_DATA(nlh))) {
831 		rt->rt6i_protocol = r->rtm_protocol;
832 	} else {
833 		rt->rt6i_protocol = RTPROT_BOOT;
834 	}
835 
836 	addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
837 
838 	if (addr_type & IPV6_ADDR_MULTICAST)
839 		rt->u.dst.input = ip6_mc_input;
840 	else
841 		rt->u.dst.input = ip6_forward;
842 
843 	rt->u.dst.output = ip6_output;
844 
845 	ipv6_addr_prefix(&rt->rt6i_dst.addr,
846 			 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
847 	rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
848 	if (rt->rt6i_dst.plen == 128)
849 	       rt->u.dst.flags = DST_HOST;
850 
851 #ifdef CONFIG_IPV6_SUBTREES
852 	ipv6_addr_prefix(&rt->rt6i_src.addr,
853 			 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
854 	rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
855 #endif
856 
857 	rt->rt6i_metric = rtmsg->rtmsg_metric;
858 
859 	/* We cannot add true routes via loopback here,
860 	   they would result in kernel looping; promote them to reject routes
861 	 */
862 	if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
863 	    (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
864 		/* hold loopback dev/idev if we haven't done so. */
865 		if (dev != &loopback_dev) {
866 			if (dev) {
867 				dev_put(dev);
868 				in6_dev_put(idev);
869 			}
870 			dev = &loopback_dev;
871 			dev_hold(dev);
872 			idev = in6_dev_get(dev);
873 			if (!idev) {
874 				err = -ENODEV;
875 				goto out;
876 			}
877 		}
878 		rt->u.dst.output = ip6_pkt_discard_out;
879 		rt->u.dst.input = ip6_pkt_discard;
880 		rt->u.dst.error = -ENETUNREACH;
881 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
882 		goto install_route;
883 	}
884 
885 	if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
886 		struct in6_addr *gw_addr;
887 		int gwa_type;
888 
889 		gw_addr = &rtmsg->rtmsg_gateway;
890 		ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
891 		gwa_type = ipv6_addr_type(gw_addr);
892 
893 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
894 			struct rt6_info *grt;
895 
896 			/* IPv6 strictly inhibits using not link-local
897 			   addresses as nexthop address.
898 			   Otherwise, router will not able to send redirects.
899 			   It is very good, but in some (rare!) circumstances
900 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
901 			   some exceptions. --ANK
902 			 */
903 			err = -EINVAL;
904 			if (!(gwa_type&IPV6_ADDR_UNICAST))
905 				goto out;
906 
907 			grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
908 
909 			err = -EHOSTUNREACH;
910 			if (grt == NULL)
911 				goto out;
912 			if (dev) {
913 				if (dev != grt->rt6i_dev) {
914 					dst_release(&grt->u.dst);
915 					goto out;
916 				}
917 			} else {
918 				dev = grt->rt6i_dev;
919 				idev = grt->rt6i_idev;
920 				dev_hold(dev);
921 				in6_dev_hold(grt->rt6i_idev);
922 			}
923 			if (!(grt->rt6i_flags&RTF_GATEWAY))
924 				err = 0;
925 			dst_release(&grt->u.dst);
926 
927 			if (err)
928 				goto out;
929 		}
930 		err = -EINVAL;
931 		if (dev == NULL || (dev->flags&IFF_LOOPBACK))
932 			goto out;
933 	}
934 
935 	err = -ENODEV;
936 	if (dev == NULL)
937 		goto out;
938 
939 	if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
940 		rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
941 		if (IS_ERR(rt->rt6i_nexthop)) {
942 			err = PTR_ERR(rt->rt6i_nexthop);
943 			rt->rt6i_nexthop = NULL;
944 			goto out;
945 		}
946 	}
947 
948 	rt->rt6i_flags = rtmsg->rtmsg_flags;
949 
950 install_route:
951 	if (rta && rta[RTA_METRICS-1]) {
952 		int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
953 		struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
954 
955 		while (RTA_OK(attr, attrlen)) {
956 			unsigned flavor = attr->rta_type;
957 			if (flavor) {
958 				if (flavor > RTAX_MAX) {
959 					err = -EINVAL;
960 					goto out;
961 				}
962 				rt->u.dst.metrics[flavor-1] =
963 					*(u32 *)RTA_DATA(attr);
964 			}
965 			attr = RTA_NEXT(attr, attrlen);
966 		}
967 	}
968 
969 	if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
970 		rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
971 	if (!rt->u.dst.metrics[RTAX_MTU-1])
972 		rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
973 	if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
974 		rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
975 	rt->u.dst.dev = dev;
976 	rt->rt6i_idev = idev;
977 	return ip6_ins_rt(rt, nlh, _rtattr);
978 
979 out:
980 	if (dev)
981 		dev_put(dev);
982 	if (idev)
983 		in6_dev_put(idev);
984 	if (rt)
985 		dst_free((struct dst_entry *) rt);
986 	return err;
987 }
988 
989 int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
990 {
991 	int err;
992 
993 	write_lock_bh(&rt6_lock);
994 
995 	rt6_reset_dflt_pointer(NULL);
996 
997 	err = fib6_del(rt, nlh, _rtattr);
998 	dst_release(&rt->u.dst);
999 
1000 	write_unlock_bh(&rt6_lock);
1001 
1002 	return err;
1003 }
1004 
1005 static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr)
1006 {
1007 	struct fib6_node *fn;
1008 	struct rt6_info *rt;
1009 	int err = -ESRCH;
1010 
1011 	read_lock_bh(&rt6_lock);
1012 
1013 	fn = fib6_locate(&ip6_routing_table,
1014 			 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
1015 			 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
1016 
1017 	if (fn) {
1018 		for (rt = fn->leaf; rt; rt = rt->u.next) {
1019 			if (rtmsg->rtmsg_ifindex &&
1020 			    (rt->rt6i_dev == NULL ||
1021 			     rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
1022 				continue;
1023 			if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
1024 			    !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
1025 				continue;
1026 			if (rtmsg->rtmsg_metric &&
1027 			    rtmsg->rtmsg_metric != rt->rt6i_metric)
1028 				continue;
1029 			dst_hold(&rt->u.dst);
1030 			read_unlock_bh(&rt6_lock);
1031 
1032 			return ip6_del_rt(rt, nlh, _rtattr);
1033 		}
1034 	}
1035 	read_unlock_bh(&rt6_lock);
1036 
1037 	return err;
1038 }
1039 
1040 /*
1041  *	Handle redirects
1042  */
1043 void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
1044 		  struct neighbour *neigh, u8 *lladdr, int on_link)
1045 {
1046 	struct rt6_info *rt, *nrt;
1047 
1048 	/* Locate old route to this destination. */
1049 	rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
1050 
1051 	if (rt == NULL)
1052 		return;
1053 
1054 	if (neigh->dev != rt->rt6i_dev)
1055 		goto out;
1056 
1057 	/*
1058 	 * Current route is on-link; redirect is always invalid.
1059 	 *
1060 	 * Seems, previous statement is not true. It could
1061 	 * be node, which looks for us as on-link (f.e. proxy ndisc)
1062 	 * But then router serving it might decide, that we should
1063 	 * know truth 8)8) --ANK (980726).
1064 	 */
1065 	if (!(rt->rt6i_flags&RTF_GATEWAY))
1066 		goto out;
1067 
1068 	/*
1069 	 *	RFC 2461 specifies that redirects should only be
1070 	 *	accepted if they come from the nexthop to the target.
1071 	 *	Due to the way default routers are chosen, this notion
1072 	 *	is a bit fuzzy and one might need to check all default
1073 	 *	routers.
1074 	 */
1075 	if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway)) {
1076 		if (rt->rt6i_flags & RTF_DEFAULT) {
1077 			struct rt6_info *rt1;
1078 
1079 			read_lock(&rt6_lock);
1080 			for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
1081 				if (ipv6_addr_equal(saddr, &rt1->rt6i_gateway)) {
1082 					dst_hold(&rt1->u.dst);
1083 					dst_release(&rt->u.dst);
1084 					read_unlock(&rt6_lock);
1085 					rt = rt1;
1086 					goto source_ok;
1087 				}
1088 			}
1089 			read_unlock(&rt6_lock);
1090 		}
1091 		if (net_ratelimit())
1092 			printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1093 			       "for redirect target\n");
1094 		goto out;
1095 	}
1096 
1097 source_ok:
1098 
1099 	/*
1100 	 *	We have finally decided to accept it.
1101 	 */
1102 
1103 	neigh_update(neigh, lladdr, NUD_STALE,
1104 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
1105 		     NEIGH_UPDATE_F_OVERRIDE|
1106 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1107 				     NEIGH_UPDATE_F_ISROUTER))
1108 		     );
1109 
1110 	/*
1111 	 * Redirect received -> path was valid.
1112 	 * Look, redirects are sent only in response to data packets,
1113 	 * so that this nexthop apparently is reachable. --ANK
1114 	 */
1115 	dst_confirm(&rt->u.dst);
1116 
1117 	/* Duplicate redirect: silently ignore. */
1118 	if (neigh == rt->u.dst.neighbour)
1119 		goto out;
1120 
1121 	nrt = ip6_rt_copy(rt);
1122 	if (nrt == NULL)
1123 		goto out;
1124 
1125 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1126 	if (on_link)
1127 		nrt->rt6i_flags &= ~RTF_GATEWAY;
1128 
1129 	ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1130 	nrt->rt6i_dst.plen = 128;
1131 	nrt->u.dst.flags |= DST_HOST;
1132 
1133 	ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1134 	nrt->rt6i_nexthop = neigh_clone(neigh);
1135 	/* Reset pmtu, it may be better */
1136 	nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1137 	nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1138 
1139 	if (ip6_ins_rt(nrt, NULL, NULL))
1140 		goto out;
1141 
1142 	if (rt->rt6i_flags&RTF_CACHE) {
1143 		ip6_del_rt(rt, NULL, NULL);
1144 		return;
1145 	}
1146 
1147 out:
1148         dst_release(&rt->u.dst);
1149 	return;
1150 }
1151 
1152 /*
1153  *	Handle ICMP "packet too big" messages
1154  *	i.e. Path MTU discovery
1155  */
1156 
1157 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1158 			struct net_device *dev, u32 pmtu)
1159 {
1160 	struct rt6_info *rt, *nrt;
1161 	int allfrag = 0;
1162 
1163 	rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1164 	if (rt == NULL)
1165 		return;
1166 
1167 	if (pmtu >= dst_mtu(&rt->u.dst))
1168 		goto out;
1169 
1170 	if (pmtu < IPV6_MIN_MTU) {
1171 		/*
1172 		 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1173 		 * MTU (1280) and a fragment header should always be included
1174 		 * after a node receiving Too Big message reporting PMTU is
1175 		 * less than the IPv6 Minimum Link MTU.
1176 		 */
1177 		pmtu = IPV6_MIN_MTU;
1178 		allfrag = 1;
1179 	}
1180 
1181 	/* New mtu received -> path was valid.
1182 	   They are sent only in response to data packets,
1183 	   so that this nexthop apparently is reachable. --ANK
1184 	 */
1185 	dst_confirm(&rt->u.dst);
1186 
1187 	/* Host route. If it is static, it would be better
1188 	   not to override it, but add new one, so that
1189 	   when cache entry will expire old pmtu
1190 	   would return automatically.
1191 	 */
1192 	if (rt->rt6i_flags & RTF_CACHE) {
1193 		rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1194 		if (allfrag)
1195 			rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1196 		dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1197 		rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1198 		goto out;
1199 	}
1200 
1201 	/* Network route.
1202 	   Two cases are possible:
1203 	   1. It is connected route. Action: COW
1204 	   2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1205 	 */
1206 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
1207 		nrt = rt6_cow(rt, daddr, saddr);
1208 		if (!nrt->u.dst.error) {
1209 			nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1210 			if (allfrag)
1211 				nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1212 			/* According to RFC 1981, detecting PMTU increase shouldn't be
1213 			   happened within 5 mins, the recommended timer is 10 mins.
1214 			   Here this route expiration time is set to ip6_rt_mtu_expires
1215 			   which is 10 mins. After 10 mins the decreased pmtu is expired
1216 			   and detecting PMTU increase will be automatically happened.
1217 			 */
1218 			dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1219 			nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1220 		}
1221 		dst_release(&nrt->u.dst);
1222 	} else {
1223 		nrt = ip6_rt_copy(rt);
1224 		if (nrt == NULL)
1225 			goto out;
1226 		ipv6_addr_copy(&nrt->rt6i_dst.addr, daddr);
1227 		nrt->rt6i_dst.plen = 128;
1228 		nrt->u.dst.flags |= DST_HOST;
1229 		nrt->rt6i_nexthop = neigh_clone(rt->rt6i_nexthop);
1230 		dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1231 		nrt->rt6i_flags |= RTF_DYNAMIC|RTF_CACHE|RTF_EXPIRES;
1232 		nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1233 		if (allfrag)
1234 			nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1235 		ip6_ins_rt(nrt, NULL, NULL);
1236 	}
1237 
1238 out:
1239 	dst_release(&rt->u.dst);
1240 }
1241 
1242 /*
1243  *	Misc support functions
1244  */
1245 
1246 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1247 {
1248 	struct rt6_info *rt = ip6_dst_alloc();
1249 
1250 	if (rt) {
1251 		rt->u.dst.input = ort->u.dst.input;
1252 		rt->u.dst.output = ort->u.dst.output;
1253 
1254 		memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1255 		rt->u.dst.dev = ort->u.dst.dev;
1256 		if (rt->u.dst.dev)
1257 			dev_hold(rt->u.dst.dev);
1258 		rt->rt6i_idev = ort->rt6i_idev;
1259 		if (rt->rt6i_idev)
1260 			in6_dev_hold(rt->rt6i_idev);
1261 		rt->u.dst.lastuse = jiffies;
1262 		rt->rt6i_expires = 0;
1263 
1264 		ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1265 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1266 		rt->rt6i_metric = 0;
1267 
1268 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1269 #ifdef CONFIG_IPV6_SUBTREES
1270 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1271 #endif
1272 	}
1273 	return rt;
1274 }
1275 
1276 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1277 {
1278 	struct rt6_info *rt;
1279 	struct fib6_node *fn;
1280 
1281 	fn = &ip6_routing_table;
1282 
1283 	write_lock_bh(&rt6_lock);
1284 	for (rt = fn->leaf; rt; rt=rt->u.next) {
1285 		if (dev == rt->rt6i_dev &&
1286 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
1287 			break;
1288 	}
1289 	if (rt)
1290 		dst_hold(&rt->u.dst);
1291 	write_unlock_bh(&rt6_lock);
1292 	return rt;
1293 }
1294 
1295 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1296 				     struct net_device *dev)
1297 {
1298 	struct in6_rtmsg rtmsg;
1299 
1300 	memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1301 	rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1302 	ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1303 	rtmsg.rtmsg_metric = 1024;
1304 	rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES;
1305 
1306 	rtmsg.rtmsg_ifindex = dev->ifindex;
1307 
1308 	ip6_route_add(&rtmsg, NULL, NULL);
1309 	return rt6_get_dflt_router(gwaddr, dev);
1310 }
1311 
1312 void rt6_purge_dflt_routers(void)
1313 {
1314 	struct rt6_info *rt;
1315 
1316 restart:
1317 	read_lock_bh(&rt6_lock);
1318 	for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1319 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1320 			dst_hold(&rt->u.dst);
1321 
1322 			rt6_reset_dflt_pointer(NULL);
1323 
1324 			read_unlock_bh(&rt6_lock);
1325 
1326 			ip6_del_rt(rt, NULL, NULL);
1327 
1328 			goto restart;
1329 		}
1330 	}
1331 	read_unlock_bh(&rt6_lock);
1332 }
1333 
1334 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1335 {
1336 	struct in6_rtmsg rtmsg;
1337 	int err;
1338 
1339 	switch(cmd) {
1340 	case SIOCADDRT:		/* Add a route */
1341 	case SIOCDELRT:		/* Delete a route */
1342 		if (!capable(CAP_NET_ADMIN))
1343 			return -EPERM;
1344 		err = copy_from_user(&rtmsg, arg,
1345 				     sizeof(struct in6_rtmsg));
1346 		if (err)
1347 			return -EFAULT;
1348 
1349 		rtnl_lock();
1350 		switch (cmd) {
1351 		case SIOCADDRT:
1352 			err = ip6_route_add(&rtmsg, NULL, NULL);
1353 			break;
1354 		case SIOCDELRT:
1355 			err = ip6_route_del(&rtmsg, NULL, NULL);
1356 			break;
1357 		default:
1358 			err = -EINVAL;
1359 		}
1360 		rtnl_unlock();
1361 
1362 		return err;
1363 	};
1364 
1365 	return -EINVAL;
1366 }
1367 
1368 /*
1369  *	Drop the packet on the floor
1370  */
1371 
1372 int ip6_pkt_discard(struct sk_buff *skb)
1373 {
1374 	IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1375 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1376 	kfree_skb(skb);
1377 	return 0;
1378 }
1379 
1380 int ip6_pkt_discard_out(struct sk_buff *skb)
1381 {
1382 	skb->dev = skb->dst->dev;
1383 	return ip6_pkt_discard(skb);
1384 }
1385 
1386 /*
1387  *	Allocate a dst for local (unicast / anycast) address.
1388  */
1389 
1390 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1391 				    const struct in6_addr *addr,
1392 				    int anycast)
1393 {
1394 	struct rt6_info *rt = ip6_dst_alloc();
1395 
1396 	if (rt == NULL)
1397 		return ERR_PTR(-ENOMEM);
1398 
1399 	dev_hold(&loopback_dev);
1400 	in6_dev_hold(idev);
1401 
1402 	rt->u.dst.flags = DST_HOST;
1403 	rt->u.dst.input = ip6_input;
1404 	rt->u.dst.output = ip6_output;
1405 	rt->rt6i_dev = &loopback_dev;
1406 	rt->rt6i_idev = idev;
1407 	rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1408 	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1409 	rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1410 	rt->u.dst.obsolete = -1;
1411 
1412 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1413 	if (!anycast)
1414 		rt->rt6i_flags |= RTF_LOCAL;
1415 	rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1416 	if (rt->rt6i_nexthop == NULL) {
1417 		dst_free((struct dst_entry *) rt);
1418 		return ERR_PTR(-ENOMEM);
1419 	}
1420 
1421 	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1422 	rt->rt6i_dst.plen = 128;
1423 
1424 	atomic_set(&rt->u.dst.__refcnt, 1);
1425 
1426 	return rt;
1427 }
1428 
1429 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1430 {
1431 	if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1432 	    rt != &ip6_null_entry) {
1433 		RT6_TRACE("deleted by ifdown %p\n", rt);
1434 		return -1;
1435 	}
1436 	return 0;
1437 }
1438 
1439 void rt6_ifdown(struct net_device *dev)
1440 {
1441 	write_lock_bh(&rt6_lock);
1442 	fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1443 	write_unlock_bh(&rt6_lock);
1444 }
1445 
1446 struct rt6_mtu_change_arg
1447 {
1448 	struct net_device *dev;
1449 	unsigned mtu;
1450 };
1451 
1452 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1453 {
1454 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1455 	struct inet6_dev *idev;
1456 
1457 	/* In IPv6 pmtu discovery is not optional,
1458 	   so that RTAX_MTU lock cannot disable it.
1459 	   We still use this lock to block changes
1460 	   caused by addrconf/ndisc.
1461 	*/
1462 
1463 	idev = __in6_dev_get(arg->dev);
1464 	if (idev == NULL)
1465 		return 0;
1466 
1467 	/* For administrative MTU increase, there is no way to discover
1468 	   IPv6 PMTU increase, so PMTU increase should be updated here.
1469 	   Since RFC 1981 doesn't include administrative MTU increase
1470 	   update PMTU increase is a MUST. (i.e. jumbo frame)
1471 	 */
1472 	/*
1473 	   If new MTU is less than route PMTU, this new MTU will be the
1474 	   lowest MTU in the path, update the route PMTU to reflect PMTU
1475 	   decreases; if new MTU is greater than route PMTU, and the
1476 	   old MTU is the lowest MTU in the path, update the route PMTU
1477 	   to reflect the increase. In this case if the other nodes' MTU
1478 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
1479 	   PMTU discouvery.
1480 	 */
1481 	if (rt->rt6i_dev == arg->dev &&
1482 	    !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1483             (dst_mtu(&rt->u.dst) > arg->mtu ||
1484              (dst_mtu(&rt->u.dst) < arg->mtu &&
1485 	      dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1486 		rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1487 	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1488 	return 0;
1489 }
1490 
1491 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1492 {
1493 	struct rt6_mtu_change_arg arg;
1494 
1495 	arg.dev = dev;
1496 	arg.mtu = mtu;
1497 	read_lock_bh(&rt6_lock);
1498 	fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1499 	read_unlock_bh(&rt6_lock);
1500 }
1501 
1502 static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1503 			      struct in6_rtmsg *rtmsg)
1504 {
1505 	memset(rtmsg, 0, sizeof(*rtmsg));
1506 
1507 	rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1508 	rtmsg->rtmsg_src_len = r->rtm_src_len;
1509 	rtmsg->rtmsg_flags = RTF_UP;
1510 	if (r->rtm_type == RTN_UNREACHABLE)
1511 		rtmsg->rtmsg_flags |= RTF_REJECT;
1512 
1513 	if (rta[RTA_GATEWAY-1]) {
1514 		if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1515 			return -EINVAL;
1516 		memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1517 		rtmsg->rtmsg_flags |= RTF_GATEWAY;
1518 	}
1519 	if (rta[RTA_DST-1]) {
1520 		if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1521 			return -EINVAL;
1522 		memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1523 	}
1524 	if (rta[RTA_SRC-1]) {
1525 		if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1526 			return -EINVAL;
1527 		memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1528 	}
1529 	if (rta[RTA_OIF-1]) {
1530 		if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1531 			return -EINVAL;
1532 		memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1533 	}
1534 	if (rta[RTA_PRIORITY-1]) {
1535 		if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1536 			return -EINVAL;
1537 		memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1538 	}
1539 	return 0;
1540 }
1541 
1542 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1543 {
1544 	struct rtmsg *r = NLMSG_DATA(nlh);
1545 	struct in6_rtmsg rtmsg;
1546 
1547 	if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1548 		return -EINVAL;
1549 	return ip6_route_del(&rtmsg, nlh, arg);
1550 }
1551 
1552 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1553 {
1554 	struct rtmsg *r = NLMSG_DATA(nlh);
1555 	struct in6_rtmsg rtmsg;
1556 
1557 	if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1558 		return -EINVAL;
1559 	return ip6_route_add(&rtmsg, nlh, arg);
1560 }
1561 
1562 struct rt6_rtnl_dump_arg
1563 {
1564 	struct sk_buff *skb;
1565 	struct netlink_callback *cb;
1566 };
1567 
1568 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1569 			 struct in6_addr *dst,
1570 			 struct in6_addr *src,
1571 			 int iif,
1572 			 int type, u32 pid, u32 seq,
1573 			 struct nlmsghdr *in_nlh, int prefix)
1574 {
1575 	struct rtmsg *rtm;
1576 	struct nlmsghdr  *nlh;
1577 	unsigned char	 *b = skb->tail;
1578 	struct rta_cacheinfo ci;
1579 
1580 	if (prefix) {	/* user wants prefix routes only */
1581 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1582 			/* success since this is not a prefix route */
1583 			return 1;
1584 		}
1585 	}
1586 
1587 	if (!pid && in_nlh) {
1588 		pid = in_nlh->nlmsg_pid;
1589 	}
1590 
1591 	nlh = NLMSG_PUT(skb, pid, seq, type, sizeof(*rtm));
1592 	rtm = NLMSG_DATA(nlh);
1593 	rtm->rtm_family = AF_INET6;
1594 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
1595 	rtm->rtm_src_len = rt->rt6i_src.plen;
1596 	rtm->rtm_tos = 0;
1597 	rtm->rtm_table = RT_TABLE_MAIN;
1598 	if (rt->rt6i_flags&RTF_REJECT)
1599 		rtm->rtm_type = RTN_UNREACHABLE;
1600 	else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1601 		rtm->rtm_type = RTN_LOCAL;
1602 	else
1603 		rtm->rtm_type = RTN_UNICAST;
1604 	rtm->rtm_flags = 0;
1605 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1606 	rtm->rtm_protocol = rt->rt6i_protocol;
1607 	if (rt->rt6i_flags&RTF_DYNAMIC)
1608 		rtm->rtm_protocol = RTPROT_REDIRECT;
1609 	else if (rt->rt6i_flags & RTF_ADDRCONF)
1610 		rtm->rtm_protocol = RTPROT_KERNEL;
1611 	else if (rt->rt6i_flags&RTF_DEFAULT)
1612 		rtm->rtm_protocol = RTPROT_RA;
1613 
1614 	if (rt->rt6i_flags&RTF_CACHE)
1615 		rtm->rtm_flags |= RTM_F_CLONED;
1616 
1617 	if (dst) {
1618 		RTA_PUT(skb, RTA_DST, 16, dst);
1619 	        rtm->rtm_dst_len = 128;
1620 	} else if (rtm->rtm_dst_len)
1621 		RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1622 #ifdef CONFIG_IPV6_SUBTREES
1623 	if (src) {
1624 		RTA_PUT(skb, RTA_SRC, 16, src);
1625 	        rtm->rtm_src_len = 128;
1626 	} else if (rtm->rtm_src_len)
1627 		RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1628 #endif
1629 	if (iif)
1630 		RTA_PUT(skb, RTA_IIF, 4, &iif);
1631 	else if (dst) {
1632 		struct in6_addr saddr_buf;
1633 		if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1634 			RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1635 	}
1636 	if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1637 		goto rtattr_failure;
1638 	if (rt->u.dst.neighbour)
1639 		RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1640 	if (rt->u.dst.dev)
1641 		RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1642 	RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1643 	ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1644 	if (rt->rt6i_expires)
1645 		ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1646 	else
1647 		ci.rta_expires = 0;
1648 	ci.rta_used = rt->u.dst.__use;
1649 	ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1650 	ci.rta_error = rt->u.dst.error;
1651 	ci.rta_id = 0;
1652 	ci.rta_ts = 0;
1653 	ci.rta_tsage = 0;
1654 	RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1655 	nlh->nlmsg_len = skb->tail - b;
1656 	return skb->len;
1657 
1658 nlmsg_failure:
1659 rtattr_failure:
1660 	skb_trim(skb, b - skb->data);
1661 	return -1;
1662 }
1663 
1664 static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1665 {
1666 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1667 	int prefix;
1668 
1669 	if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1670 		struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1671 		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1672 	} else
1673 		prefix = 0;
1674 
1675 	return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1676 		     NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
1677 		     NULL, prefix);
1678 }
1679 
1680 static int fib6_dump_node(struct fib6_walker_t *w)
1681 {
1682 	int res;
1683 	struct rt6_info *rt;
1684 
1685 	for (rt = w->leaf; rt; rt = rt->u.next) {
1686 		res = rt6_dump_route(rt, w->args);
1687 		if (res < 0) {
1688 			/* Frame is full, suspend walking */
1689 			w->leaf = rt;
1690 			return 1;
1691 		}
1692 		BUG_TRAP(res!=0);
1693 	}
1694 	w->leaf = NULL;
1695 	return 0;
1696 }
1697 
1698 static void fib6_dump_end(struct netlink_callback *cb)
1699 {
1700 	struct fib6_walker_t *w = (void*)cb->args[0];
1701 
1702 	if (w) {
1703 		cb->args[0] = 0;
1704 		fib6_walker_unlink(w);
1705 		kfree(w);
1706 	}
1707 	if (cb->args[1]) {
1708 		cb->done = (void*)cb->args[1];
1709 		cb->args[1] = 0;
1710 	}
1711 }
1712 
1713 static int fib6_dump_done(struct netlink_callback *cb)
1714 {
1715 	fib6_dump_end(cb);
1716 	return cb->done(cb);
1717 }
1718 
1719 int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1720 {
1721 	struct rt6_rtnl_dump_arg arg;
1722 	struct fib6_walker_t *w;
1723 	int res;
1724 
1725 	arg.skb = skb;
1726 	arg.cb = cb;
1727 
1728 	w = (void*)cb->args[0];
1729 	if (w == NULL) {
1730 		/* New dump:
1731 		 *
1732 		 * 1. hook callback destructor.
1733 		 */
1734 		cb->args[1] = (long)cb->done;
1735 		cb->done = fib6_dump_done;
1736 
1737 		/*
1738 		 * 2. allocate and initialize walker.
1739 		 */
1740 		w = kmalloc(sizeof(*w), GFP_ATOMIC);
1741 		if (w == NULL)
1742 			return -ENOMEM;
1743 		RT6_TRACE("dump<%p", w);
1744 		memset(w, 0, sizeof(*w));
1745 		w->root = &ip6_routing_table;
1746 		w->func = fib6_dump_node;
1747 		w->args = &arg;
1748 		cb->args[0] = (long)w;
1749 		read_lock_bh(&rt6_lock);
1750 		res = fib6_walk(w);
1751 		read_unlock_bh(&rt6_lock);
1752 	} else {
1753 		w->args = &arg;
1754 		read_lock_bh(&rt6_lock);
1755 		res = fib6_walk_continue(w);
1756 		read_unlock_bh(&rt6_lock);
1757 	}
1758 #if RT6_DEBUG >= 3
1759 	if (res <= 0 && skb->len == 0)
1760 		RT6_TRACE("%p>dump end\n", w);
1761 #endif
1762 	res = res < 0 ? res : skb->len;
1763 	/* res < 0 is an error. (really, impossible)
1764 	   res == 0 means that dump is complete, but skb still can contain data.
1765 	   res > 0 dump is not complete, but frame is full.
1766 	 */
1767 	/* Destroy walker, if dump of this table is complete. */
1768 	if (res <= 0)
1769 		fib6_dump_end(cb);
1770 	return res;
1771 }
1772 
1773 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1774 {
1775 	struct rtattr **rta = arg;
1776 	int iif = 0;
1777 	int err = -ENOBUFS;
1778 	struct sk_buff *skb;
1779 	struct flowi fl;
1780 	struct rt6_info *rt;
1781 
1782 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1783 	if (skb == NULL)
1784 		goto out;
1785 
1786 	/* Reserve room for dummy headers, this skb can pass
1787 	   through good chunk of routing engine.
1788 	 */
1789 	skb->mac.raw = skb->data;
1790 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1791 
1792 	memset(&fl, 0, sizeof(fl));
1793 	if (rta[RTA_SRC-1])
1794 		ipv6_addr_copy(&fl.fl6_src,
1795 			       (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1796 	if (rta[RTA_DST-1])
1797 		ipv6_addr_copy(&fl.fl6_dst,
1798 			       (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1799 
1800 	if (rta[RTA_IIF-1])
1801 		memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1802 
1803 	if (iif) {
1804 		struct net_device *dev;
1805 		dev = __dev_get_by_index(iif);
1806 		if (!dev) {
1807 			err = -ENODEV;
1808 			goto out_free;
1809 		}
1810 	}
1811 
1812 	fl.oif = 0;
1813 	if (rta[RTA_OIF-1])
1814 		memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1815 
1816 	rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1817 
1818 	skb->dst = &rt->u.dst;
1819 
1820 	NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1821 	err = rt6_fill_node(skb, rt,
1822 			    &fl.fl6_dst, &fl.fl6_src,
1823 			    iif,
1824 			    RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
1825 			    nlh->nlmsg_seq, nlh, 0);
1826 	if (err < 0) {
1827 		err = -EMSGSIZE;
1828 		goto out_free;
1829 	}
1830 
1831 	err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1832 	if (err > 0)
1833 		err = 0;
1834 out:
1835 	return err;
1836 out_free:
1837 	kfree_skb(skb);
1838 	goto out;
1839 }
1840 
1841 void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh)
1842 {
1843 	struct sk_buff *skb;
1844 	int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
1845 
1846 	skb = alloc_skb(size, gfp_any());
1847 	if (!skb) {
1848 		netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, ENOBUFS);
1849 		return;
1850 	}
1851 	if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, 0, 0, nlh, 0) < 0) {
1852 		kfree_skb(skb);
1853 		netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, EINVAL);
1854 		return;
1855 	}
1856 	NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_ROUTE;
1857 	netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_ROUTE, gfp_any());
1858 }
1859 
1860 /*
1861  *	/proc
1862  */
1863 
1864 #ifdef CONFIG_PROC_FS
1865 
1866 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1867 
1868 struct rt6_proc_arg
1869 {
1870 	char *buffer;
1871 	int offset;
1872 	int length;
1873 	int skip;
1874 	int len;
1875 };
1876 
1877 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
1878 {
1879 	struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1880 	int i;
1881 
1882 	if (arg->skip < arg->offset / RT6_INFO_LEN) {
1883 		arg->skip++;
1884 		return 0;
1885 	}
1886 
1887 	if (arg->len >= arg->length)
1888 		return 0;
1889 
1890 	for (i=0; i<16; i++) {
1891 		sprintf(arg->buffer + arg->len, "%02x",
1892 			rt->rt6i_dst.addr.s6_addr[i]);
1893 		arg->len += 2;
1894 	}
1895 	arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1896 			    rt->rt6i_dst.plen);
1897 
1898 #ifdef CONFIG_IPV6_SUBTREES
1899 	for (i=0; i<16; i++) {
1900 		sprintf(arg->buffer + arg->len, "%02x",
1901 			rt->rt6i_src.addr.s6_addr[i]);
1902 		arg->len += 2;
1903 	}
1904 	arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1905 			    rt->rt6i_src.plen);
1906 #else
1907 	sprintf(arg->buffer + arg->len,
1908 		"00000000000000000000000000000000 00 ");
1909 	arg->len += 36;
1910 #endif
1911 
1912 	if (rt->rt6i_nexthop) {
1913 		for (i=0; i<16; i++) {
1914 			sprintf(arg->buffer + arg->len, "%02x",
1915 				rt->rt6i_nexthop->primary_key[i]);
1916 			arg->len += 2;
1917 		}
1918 	} else {
1919 		sprintf(arg->buffer + arg->len,
1920 			"00000000000000000000000000000000");
1921 		arg->len += 32;
1922 	}
1923 	arg->len += sprintf(arg->buffer + arg->len,
1924 			    " %08x %08x %08x %08x %8s\n",
1925 			    rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1926 			    rt->u.dst.__use, rt->rt6i_flags,
1927 			    rt->rt6i_dev ? rt->rt6i_dev->name : "");
1928 	return 0;
1929 }
1930 
1931 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
1932 {
1933 	struct rt6_proc_arg arg;
1934 	arg.buffer = buffer;
1935 	arg.offset = offset;
1936 	arg.length = length;
1937 	arg.skip = 0;
1938 	arg.len = 0;
1939 
1940 	read_lock_bh(&rt6_lock);
1941 	fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
1942 	read_unlock_bh(&rt6_lock);
1943 
1944 	*start = buffer;
1945 	if (offset)
1946 		*start += offset % RT6_INFO_LEN;
1947 
1948 	arg.len -= offset % RT6_INFO_LEN;
1949 
1950 	if (arg.len > length)
1951 		arg.len = length;
1952 	if (arg.len < 0)
1953 		arg.len = 0;
1954 
1955 	return arg.len;
1956 }
1957 
1958 extern struct rt6_statistics rt6_stats;
1959 
1960 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
1961 {
1962 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
1963 		      rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
1964 		      rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
1965 		      rt6_stats.fib_rt_cache,
1966 		      atomic_read(&ip6_dst_ops.entries),
1967 		      rt6_stats.fib_discarded_routes);
1968 
1969 	return 0;
1970 }
1971 
1972 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
1973 {
1974 	return single_open(file, rt6_stats_seq_show, NULL);
1975 }
1976 
1977 static struct file_operations rt6_stats_seq_fops = {
1978 	.owner	 = THIS_MODULE,
1979 	.open	 = rt6_stats_seq_open,
1980 	.read	 = seq_read,
1981 	.llseek	 = seq_lseek,
1982 	.release = single_release,
1983 };
1984 #endif	/* CONFIG_PROC_FS */
1985 
1986 #ifdef CONFIG_SYSCTL
1987 
1988 static int flush_delay;
1989 
1990 static
1991 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
1992 			      void __user *buffer, size_t *lenp, loff_t *ppos)
1993 {
1994 	if (write) {
1995 		proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1996 		fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
1997 		return 0;
1998 	} else
1999 		return -EINVAL;
2000 }
2001 
2002 ctl_table ipv6_route_table[] = {
2003         {
2004 		.ctl_name	=	NET_IPV6_ROUTE_FLUSH,
2005 		.procname	=	"flush",
2006          	.data		=	&flush_delay,
2007 		.maxlen		=	sizeof(int),
2008 		.mode		=	0644,
2009          	.proc_handler	=	&ipv6_sysctl_rtcache_flush
2010 	},
2011 	{
2012 		.ctl_name	=	NET_IPV6_ROUTE_GC_THRESH,
2013 		.procname	=	"gc_thresh",
2014          	.data		=	&ip6_dst_ops.gc_thresh,
2015 		.maxlen		=	sizeof(int),
2016 		.mode		=	0644,
2017          	.proc_handler	=	&proc_dointvec,
2018 	},
2019 	{
2020 		.ctl_name	=	NET_IPV6_ROUTE_MAX_SIZE,
2021 		.procname	=	"max_size",
2022          	.data		=	&ip6_rt_max_size,
2023 		.maxlen		=	sizeof(int),
2024 		.mode		=	0644,
2025          	.proc_handler	=	&proc_dointvec,
2026 	},
2027 	{
2028 		.ctl_name	=	NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2029 		.procname	=	"gc_min_interval",
2030          	.data		=	&ip6_rt_gc_min_interval,
2031 		.maxlen		=	sizeof(int),
2032 		.mode		=	0644,
2033          	.proc_handler	=	&proc_dointvec_jiffies,
2034 		.strategy	=	&sysctl_jiffies,
2035 	},
2036 	{
2037 		.ctl_name	=	NET_IPV6_ROUTE_GC_TIMEOUT,
2038 		.procname	=	"gc_timeout",
2039          	.data		=	&ip6_rt_gc_timeout,
2040 		.maxlen		=	sizeof(int),
2041 		.mode		=	0644,
2042          	.proc_handler	=	&proc_dointvec_jiffies,
2043 		.strategy	=	&sysctl_jiffies,
2044 	},
2045 	{
2046 		.ctl_name	=	NET_IPV6_ROUTE_GC_INTERVAL,
2047 		.procname	=	"gc_interval",
2048          	.data		=	&ip6_rt_gc_interval,
2049 		.maxlen		=	sizeof(int),
2050 		.mode		=	0644,
2051          	.proc_handler	=	&proc_dointvec_jiffies,
2052 		.strategy	=	&sysctl_jiffies,
2053 	},
2054 	{
2055 		.ctl_name	=	NET_IPV6_ROUTE_GC_ELASTICITY,
2056 		.procname	=	"gc_elasticity",
2057          	.data		=	&ip6_rt_gc_elasticity,
2058 		.maxlen		=	sizeof(int),
2059 		.mode		=	0644,
2060          	.proc_handler	=	&proc_dointvec_jiffies,
2061 		.strategy	=	&sysctl_jiffies,
2062 	},
2063 	{
2064 		.ctl_name	=	NET_IPV6_ROUTE_MTU_EXPIRES,
2065 		.procname	=	"mtu_expires",
2066          	.data		=	&ip6_rt_mtu_expires,
2067 		.maxlen		=	sizeof(int),
2068 		.mode		=	0644,
2069          	.proc_handler	=	&proc_dointvec_jiffies,
2070 		.strategy	=	&sysctl_jiffies,
2071 	},
2072 	{
2073 		.ctl_name	=	NET_IPV6_ROUTE_MIN_ADVMSS,
2074 		.procname	=	"min_adv_mss",
2075          	.data		=	&ip6_rt_min_advmss,
2076 		.maxlen		=	sizeof(int),
2077 		.mode		=	0644,
2078          	.proc_handler	=	&proc_dointvec_jiffies,
2079 		.strategy	=	&sysctl_jiffies,
2080 	},
2081 	{
2082 		.ctl_name	=	NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2083 		.procname	=	"gc_min_interval_ms",
2084          	.data		=	&ip6_rt_gc_min_interval,
2085 		.maxlen		=	sizeof(int),
2086 		.mode		=	0644,
2087          	.proc_handler	=	&proc_dointvec_ms_jiffies,
2088 		.strategy	=	&sysctl_ms_jiffies,
2089 	},
2090 	{ .ctl_name = 0 }
2091 };
2092 
2093 #endif
2094 
2095 void __init ip6_route_init(void)
2096 {
2097 	struct proc_dir_entry *p;
2098 
2099 	ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2100 						     sizeof(struct rt6_info),
2101 						     0, SLAB_HWCACHE_ALIGN,
2102 						     NULL, NULL);
2103 	if (!ip6_dst_ops.kmem_cachep)
2104 		panic("cannot create ip6_dst_cache");
2105 
2106 	fib6_init();
2107 #ifdef 	CONFIG_PROC_FS
2108 	p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2109 	if (p)
2110 		p->owner = THIS_MODULE;
2111 
2112 	proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2113 #endif
2114 #ifdef CONFIG_XFRM
2115 	xfrm6_init();
2116 #endif
2117 }
2118 
2119 void ip6_route_cleanup(void)
2120 {
2121 #ifdef CONFIG_PROC_FS
2122 	proc_net_remove("ipv6_route");
2123 	proc_net_remove("rt6_stats");
2124 #endif
2125 #ifdef CONFIG_XFRM
2126 	xfrm6_fini();
2127 #endif
2128 	rt6_ifdown(NULL);
2129 	fib6_gc_cleanup();
2130 	kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2131 }
2132