xref: /openbmc/linux/net/ipv6/route.c (revision d5cb9783536a41df9f9cba5b0a1d78047ed787f7)
1 /*
2  *	Linux INET6 implementation
3  *	FIB front-end.
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	$Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9  *
10  *	This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  */
15 
16 /*	Changes:
17  *
18  *	YOSHIFUJI Hideaki @USAGI
19  *		reworked default router selection.
20  *		- respect outgoing interface
21  *		- select from (probably) reachable routers (i.e.
22  *		routers in REACHABLE, STALE, DELAY or PROBE states).
23  *		- always select the same router if it is (probably)
24  *		reachable.  otherwise, round-robin the list.
25  */
26 
27 #include <linux/config.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/times.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/route.h>
35 #include <linux/netdevice.h>
36 #include <linux/in6.h>
37 #include <linux/init.h>
38 #include <linux/netlink.h>
39 #include <linux/if_arp.h>
40 
41 #ifdef 	CONFIG_PROC_FS
42 #include <linux/proc_fs.h>
43 #include <linux/seq_file.h>
44 #endif
45 
46 #include <net/snmp.h>
47 #include <net/ipv6.h>
48 #include <net/ip6_fib.h>
49 #include <net/ip6_route.h>
50 #include <net/ndisc.h>
51 #include <net/addrconf.h>
52 #include <net/tcp.h>
53 #include <linux/rtnetlink.h>
54 #include <net/dst.h>
55 #include <net/xfrm.h>
56 
57 #include <asm/uaccess.h>
58 
59 #ifdef CONFIG_SYSCTL
60 #include <linux/sysctl.h>
61 #endif
62 
63 /* Set to 3 to get tracing. */
64 #define RT6_DEBUG 2
65 
66 #if RT6_DEBUG >= 3
67 #define RDBG(x) printk x
68 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
69 #else
70 #define RDBG(x)
71 #define RT6_TRACE(x...) do { ; } while (0)
72 #endif
73 
74 
75 static int ip6_rt_max_size = 4096;
76 static int ip6_rt_gc_min_interval = HZ / 2;
77 static int ip6_rt_gc_timeout = 60*HZ;
78 int ip6_rt_gc_interval = 30*HZ;
79 static int ip6_rt_gc_elasticity = 9;
80 static int ip6_rt_mtu_expires = 10*60*HZ;
81 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
82 
83 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
84 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
85 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
86 static void		ip6_dst_destroy(struct dst_entry *);
87 static void		ip6_dst_ifdown(struct dst_entry *,
88 				       struct net_device *dev, int how);
89 static int		 ip6_dst_gc(void);
90 
91 static int		ip6_pkt_discard(struct sk_buff *skb);
92 static int		ip6_pkt_discard_out(struct sk_buff *skb);
93 static void		ip6_link_failure(struct sk_buff *skb);
94 static void		ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
95 
96 static struct dst_ops ip6_dst_ops = {
97 	.family			=	AF_INET6,
98 	.protocol		=	__constant_htons(ETH_P_IPV6),
99 	.gc			=	ip6_dst_gc,
100 	.gc_thresh		=	1024,
101 	.check			=	ip6_dst_check,
102 	.destroy		=	ip6_dst_destroy,
103 	.ifdown			=	ip6_dst_ifdown,
104 	.negative_advice	=	ip6_negative_advice,
105 	.link_failure		=	ip6_link_failure,
106 	.update_pmtu		=	ip6_rt_update_pmtu,
107 	.entry_size		=	sizeof(struct rt6_info),
108 };
109 
110 struct rt6_info ip6_null_entry = {
111 	.u = {
112 		.dst = {
113 			.__refcnt	= ATOMIC_INIT(1),
114 			.__use		= 1,
115 			.dev		= &loopback_dev,
116 			.obsolete	= -1,
117 			.error		= -ENETUNREACH,
118 			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
119 			.input		= ip6_pkt_discard,
120 			.output		= ip6_pkt_discard_out,
121 			.ops		= &ip6_dst_ops,
122 			.path		= (struct dst_entry*)&ip6_null_entry,
123 		}
124 	},
125 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
126 	.rt6i_metric	= ~(u32) 0,
127 	.rt6i_ref	= ATOMIC_INIT(1),
128 };
129 
130 struct fib6_node ip6_routing_table = {
131 	.leaf		= &ip6_null_entry,
132 	.fn_flags	= RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
133 };
134 
135 /* Protects all the ip6 fib */
136 
137 DEFINE_RWLOCK(rt6_lock);
138 
139 
140 /* allocate dst with ip6_dst_ops */
141 static __inline__ struct rt6_info *ip6_dst_alloc(void)
142 {
143 	return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
144 }
145 
146 static void ip6_dst_destroy(struct dst_entry *dst)
147 {
148 	struct rt6_info *rt = (struct rt6_info *)dst;
149 	struct inet6_dev *idev = rt->rt6i_idev;
150 
151 	if (idev != NULL) {
152 		rt->rt6i_idev = NULL;
153 		in6_dev_put(idev);
154 	}
155 }
156 
157 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
158 			   int how)
159 {
160 	struct rt6_info *rt = (struct rt6_info *)dst;
161 	struct inet6_dev *idev = rt->rt6i_idev;
162 
163 	if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
164 		struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
165 		if (loopback_idev != NULL) {
166 			rt->rt6i_idev = loopback_idev;
167 			in6_dev_put(idev);
168 		}
169 	}
170 }
171 
172 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
173 {
174 	return (rt->rt6i_flags & RTF_EXPIRES &&
175 		time_after(jiffies, rt->rt6i_expires));
176 }
177 
178 /*
179  *	Route lookup. Any rt6_lock is implied.
180  */
181 
182 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
183 						    int oif,
184 						    int strict)
185 {
186 	struct rt6_info *local = NULL;
187 	struct rt6_info *sprt;
188 
189 	if (oif) {
190 		for (sprt = rt; sprt; sprt = sprt->u.next) {
191 			struct net_device *dev = sprt->rt6i_dev;
192 			if (dev->ifindex == oif)
193 				return sprt;
194 			if (dev->flags & IFF_LOOPBACK) {
195 				if (sprt->rt6i_idev == NULL ||
196 				    sprt->rt6i_idev->dev->ifindex != oif) {
197 					if (strict && oif)
198 						continue;
199 					if (local && (!oif ||
200 						      local->rt6i_idev->dev->ifindex == oif))
201 						continue;
202 				}
203 				local = sprt;
204 			}
205 		}
206 
207 		if (local)
208 			return local;
209 
210 		if (strict)
211 			return &ip6_null_entry;
212 	}
213 	return rt;
214 }
215 
216 /*
217  *	pointer to the last default router chosen. BH is disabled locally.
218  */
219 static struct rt6_info *rt6_dflt_pointer;
220 static DEFINE_SPINLOCK(rt6_dflt_lock);
221 
222 void rt6_reset_dflt_pointer(struct rt6_info *rt)
223 {
224 	spin_lock_bh(&rt6_dflt_lock);
225 	if (rt == NULL || rt == rt6_dflt_pointer) {
226 		RT6_TRACE("reset default router: %p->NULL\n", rt6_dflt_pointer);
227 		rt6_dflt_pointer = NULL;
228 	}
229 	spin_unlock_bh(&rt6_dflt_lock);
230 }
231 
232 /* Default Router Selection (RFC 2461 6.3.6) */
233 static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif)
234 {
235 	struct rt6_info *match = NULL;
236 	struct rt6_info *sprt;
237 	int mpri = 0;
238 
239 	for (sprt = rt; sprt; sprt = sprt->u.next) {
240 		struct neighbour *neigh;
241 		int m = 0;
242 
243 		if (!oif ||
244 		    (sprt->rt6i_dev &&
245 		     sprt->rt6i_dev->ifindex == oif))
246 			m += 8;
247 
248 		if (rt6_check_expired(sprt))
249 			continue;
250 
251 		if (sprt == rt6_dflt_pointer)
252 			m += 4;
253 
254 		if ((neigh = sprt->rt6i_nexthop) != NULL) {
255 			read_lock_bh(&neigh->lock);
256 			switch (neigh->nud_state) {
257 			case NUD_REACHABLE:
258 				m += 3;
259 				break;
260 
261 			case NUD_STALE:
262 			case NUD_DELAY:
263 			case NUD_PROBE:
264 				m += 2;
265 				break;
266 
267 			case NUD_NOARP:
268 			case NUD_PERMANENT:
269 				m += 1;
270 				break;
271 
272 			case NUD_INCOMPLETE:
273 			default:
274 				read_unlock_bh(&neigh->lock);
275 				continue;
276 			}
277 			read_unlock_bh(&neigh->lock);
278 		} else {
279 			continue;
280 		}
281 
282 		if (m > mpri || m >= 12) {
283 			match = sprt;
284 			mpri = m;
285 			if (m >= 12) {
286 				/* we choose the last default router if it
287 				 * is in (probably) reachable state.
288 				 * If route changed, we should do pmtu
289 				 * discovery. --yoshfuji
290 				 */
291 				break;
292 			}
293 		}
294 	}
295 
296 	spin_lock(&rt6_dflt_lock);
297 	if (!match) {
298 		/*
299 		 *	No default routers are known to be reachable.
300 		 *	SHOULD round robin
301 		 */
302 		if (rt6_dflt_pointer) {
303 			for (sprt = rt6_dflt_pointer->u.next;
304 			     sprt; sprt = sprt->u.next) {
305 				if (sprt->u.dst.obsolete <= 0 &&
306 				    sprt->u.dst.error == 0 &&
307 				    !rt6_check_expired(sprt)) {
308 					match = sprt;
309 					break;
310 				}
311 			}
312 			for (sprt = rt;
313 			     !match && sprt;
314 			     sprt = sprt->u.next) {
315 				if (sprt->u.dst.obsolete <= 0 &&
316 				    sprt->u.dst.error == 0 &&
317 				    !rt6_check_expired(sprt)) {
318 					match = sprt;
319 					break;
320 				}
321 				if (sprt == rt6_dflt_pointer)
322 					break;
323 			}
324 		}
325 	}
326 
327 	if (match) {
328 		if (rt6_dflt_pointer != match)
329 			RT6_TRACE("changed default router: %p->%p\n",
330 				  rt6_dflt_pointer, match);
331 		rt6_dflt_pointer = match;
332 	}
333 	spin_unlock(&rt6_dflt_lock);
334 
335 	if (!match) {
336 		/*
337 		 * Last Resort: if no default routers found,
338 		 * use addrconf default route.
339 		 * We don't record this route.
340 		 */
341 		for (sprt = ip6_routing_table.leaf;
342 		     sprt; sprt = sprt->u.next) {
343 			if (!rt6_check_expired(sprt) &&
344 			    (sprt->rt6i_flags & RTF_DEFAULT) &&
345 			    (!oif ||
346 			     (sprt->rt6i_dev &&
347 			      sprt->rt6i_dev->ifindex == oif))) {
348 				match = sprt;
349 				break;
350 			}
351 		}
352 		if (!match) {
353 			/* no default route.  give up. */
354 			match = &ip6_null_entry;
355 		}
356 	}
357 
358 	return match;
359 }
360 
361 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
362 			    int oif, int strict)
363 {
364 	struct fib6_node *fn;
365 	struct rt6_info *rt;
366 
367 	read_lock_bh(&rt6_lock);
368 	fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
369 	rt = rt6_device_match(fn->leaf, oif, strict);
370 	dst_hold(&rt->u.dst);
371 	rt->u.dst.__use++;
372 	read_unlock_bh(&rt6_lock);
373 
374 	rt->u.dst.lastuse = jiffies;
375 	if (rt->u.dst.error == 0)
376 		return rt;
377 	dst_release(&rt->u.dst);
378 	return NULL;
379 }
380 
381 /* ip6_ins_rt is called with FREE rt6_lock.
382    It takes new route entry, the addition fails by any reason the
383    route is freed. In any case, if caller does not hold it, it may
384    be destroyed.
385  */
386 
387 int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
388 		void *_rtattr, struct netlink_skb_parms *req)
389 {
390 	int err;
391 
392 	write_lock_bh(&rt6_lock);
393 	err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr, req);
394 	write_unlock_bh(&rt6_lock);
395 
396 	return err;
397 }
398 
399 /* No rt6_lock! If COW failed, the function returns dead route entry
400    with dst->error set to errno value.
401  */
402 
403 static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
404 				struct in6_addr *saddr, struct netlink_skb_parms *req)
405 {
406 	int err;
407 	struct rt6_info *rt;
408 
409 	/*
410 	 *	Clone the route.
411 	 */
412 
413 	rt = ip6_rt_copy(ort);
414 
415 	if (rt) {
416 		ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
417 
418 		if (!(rt->rt6i_flags&RTF_GATEWAY))
419 			ipv6_addr_copy(&rt->rt6i_gateway, daddr);
420 
421 		rt->rt6i_dst.plen = 128;
422 		rt->rt6i_flags |= RTF_CACHE;
423 		rt->u.dst.flags |= DST_HOST;
424 
425 #ifdef CONFIG_IPV6_SUBTREES
426 		if (rt->rt6i_src.plen && saddr) {
427 			ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
428 			rt->rt6i_src.plen = 128;
429 		}
430 #endif
431 
432 		rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
433 
434 		dst_hold(&rt->u.dst);
435 
436 		err = ip6_ins_rt(rt, NULL, NULL, req);
437 		if (err == 0)
438 			return rt;
439 
440 		rt->u.dst.error = err;
441 
442 		return rt;
443 	}
444 	dst_hold(&ip6_null_entry.u.dst);
445 	return &ip6_null_entry;
446 }
447 
448 #define BACKTRACK() \
449 if (rt == &ip6_null_entry && strict) { \
450        while ((fn = fn->parent) != NULL) { \
451 		if (fn->fn_flags & RTN_ROOT) { \
452 			dst_hold(&rt->u.dst); \
453 			goto out; \
454 		} \
455 		if (fn->fn_flags & RTN_RTINFO) \
456 			goto restart; \
457 	} \
458 }
459 
460 
461 void ip6_route_input(struct sk_buff *skb)
462 {
463 	struct fib6_node *fn;
464 	struct rt6_info *rt;
465 	int strict;
466 	int attempts = 3;
467 
468 	strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
469 
470 relookup:
471 	read_lock_bh(&rt6_lock);
472 
473 	fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
474 			 &skb->nh.ipv6h->saddr);
475 
476 restart:
477 	rt = fn->leaf;
478 
479 	if ((rt->rt6i_flags & RTF_CACHE)) {
480 		rt = rt6_device_match(rt, skb->dev->ifindex, strict);
481 		BACKTRACK();
482 		dst_hold(&rt->u.dst);
483 		goto out;
484 	}
485 
486 	rt = rt6_device_match(rt, skb->dev->ifindex, strict);
487 	BACKTRACK();
488 
489 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
490 		struct rt6_info *nrt;
491 		dst_hold(&rt->u.dst);
492 		read_unlock_bh(&rt6_lock);
493 
494 		nrt = rt6_cow(rt, &skb->nh.ipv6h->daddr,
495 			      &skb->nh.ipv6h->saddr,
496 			      &NETLINK_CB(skb));
497 
498 		dst_release(&rt->u.dst);
499 		rt = nrt;
500 
501 		if (rt->u.dst.error != -EEXIST || --attempts <= 0)
502 			goto out2;
503 
504 		/* Race condition! In the gap, when rt6_lock was
505 		   released someone could insert this route.  Relookup.
506 		*/
507 		dst_release(&rt->u.dst);
508 		goto relookup;
509 	}
510 	dst_hold(&rt->u.dst);
511 
512 out:
513 	read_unlock_bh(&rt6_lock);
514 out2:
515 	rt->u.dst.lastuse = jiffies;
516 	rt->u.dst.__use++;
517 	skb->dst = (struct dst_entry *) rt;
518 }
519 
520 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
521 {
522 	struct fib6_node *fn;
523 	struct rt6_info *rt;
524 	int strict;
525 	int attempts = 3;
526 
527 	strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
528 
529 relookup:
530 	read_lock_bh(&rt6_lock);
531 
532 	fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
533 
534 restart:
535 	rt = fn->leaf;
536 
537 	if ((rt->rt6i_flags & RTF_CACHE)) {
538 		rt = rt6_device_match(rt, fl->oif, strict);
539 		BACKTRACK();
540 		dst_hold(&rt->u.dst);
541 		goto out;
542 	}
543 	if (rt->rt6i_flags & RTF_DEFAULT) {
544 		if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF)
545 			rt = rt6_best_dflt(rt, fl->oif);
546 	} else {
547 		rt = rt6_device_match(rt, fl->oif, strict);
548 		BACKTRACK();
549 	}
550 
551 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
552 		struct rt6_info *nrt;
553 		dst_hold(&rt->u.dst);
554 		read_unlock_bh(&rt6_lock);
555 
556 		nrt = rt6_cow(rt, &fl->fl6_dst, &fl->fl6_src, NULL);
557 
558 		dst_release(&rt->u.dst);
559 		rt = nrt;
560 
561 		if (rt->u.dst.error != -EEXIST || --attempts <= 0)
562 			goto out2;
563 
564 		/* Race condition! In the gap, when rt6_lock was
565 		   released someone could insert this route.  Relookup.
566 		*/
567 		dst_release(&rt->u.dst);
568 		goto relookup;
569 	}
570 	dst_hold(&rt->u.dst);
571 
572 out:
573 	read_unlock_bh(&rt6_lock);
574 out2:
575 	rt->u.dst.lastuse = jiffies;
576 	rt->u.dst.__use++;
577 	return &rt->u.dst;
578 }
579 
580 
581 /*
582  *	Destination cache support functions
583  */
584 
585 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
586 {
587 	struct rt6_info *rt;
588 
589 	rt = (struct rt6_info *) dst;
590 
591 	if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
592 		return dst;
593 
594 	return NULL;
595 }
596 
597 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
598 {
599 	struct rt6_info *rt = (struct rt6_info *) dst;
600 
601 	if (rt) {
602 		if (rt->rt6i_flags & RTF_CACHE)
603 			ip6_del_rt(rt, NULL, NULL, NULL);
604 		else
605 			dst_release(dst);
606 	}
607 	return NULL;
608 }
609 
610 static void ip6_link_failure(struct sk_buff *skb)
611 {
612 	struct rt6_info *rt;
613 
614 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
615 
616 	rt = (struct rt6_info *) skb->dst;
617 	if (rt) {
618 		if (rt->rt6i_flags&RTF_CACHE) {
619 			dst_set_expires(&rt->u.dst, 0);
620 			rt->rt6i_flags |= RTF_EXPIRES;
621 		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
622 			rt->rt6i_node->fn_sernum = -1;
623 	}
624 }
625 
626 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
627 {
628 	struct rt6_info *rt6 = (struct rt6_info*)dst;
629 
630 	if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
631 		rt6->rt6i_flags |= RTF_MODIFIED;
632 		if (mtu < IPV6_MIN_MTU) {
633 			mtu = IPV6_MIN_MTU;
634 			dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
635 		}
636 		dst->metrics[RTAX_MTU-1] = mtu;
637 	}
638 }
639 
640 /* Protected by rt6_lock.  */
641 static struct dst_entry *ndisc_dst_gc_list;
642 static int ipv6_get_mtu(struct net_device *dev);
643 
644 static inline unsigned int ipv6_advmss(unsigned int mtu)
645 {
646 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
647 
648 	if (mtu < ip6_rt_min_advmss)
649 		mtu = ip6_rt_min_advmss;
650 
651 	/*
652 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
653 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
654 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
655 	 * rely only on pmtu discovery"
656 	 */
657 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
658 		mtu = IPV6_MAXPLEN;
659 	return mtu;
660 }
661 
662 struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
663 				  struct neighbour *neigh,
664 				  struct in6_addr *addr,
665 				  int (*output)(struct sk_buff *))
666 {
667 	struct rt6_info *rt;
668 	struct inet6_dev *idev = in6_dev_get(dev);
669 
670 	if (unlikely(idev == NULL))
671 		return NULL;
672 
673 	rt = ip6_dst_alloc();
674 	if (unlikely(rt == NULL)) {
675 		in6_dev_put(idev);
676 		goto out;
677 	}
678 
679 	dev_hold(dev);
680 	if (neigh)
681 		neigh_hold(neigh);
682 	else
683 		neigh = ndisc_get_neigh(dev, addr);
684 
685 	rt->rt6i_dev	  = dev;
686 	rt->rt6i_idev     = idev;
687 	rt->rt6i_nexthop  = neigh;
688 	atomic_set(&rt->u.dst.__refcnt, 1);
689 	rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
690 	rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
691 	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
692 	rt->u.dst.output  = output;
693 
694 #if 0	/* there's no chance to use these for ndisc */
695 	rt->u.dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
696 				? DST_HOST
697 				: 0;
698 	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
699 	rt->rt6i_dst.plen = 128;
700 #endif
701 
702 	write_lock_bh(&rt6_lock);
703 	rt->u.dst.next = ndisc_dst_gc_list;
704 	ndisc_dst_gc_list = &rt->u.dst;
705 	write_unlock_bh(&rt6_lock);
706 
707 	fib6_force_start_gc();
708 
709 out:
710 	return (struct dst_entry *)rt;
711 }
712 
713 int ndisc_dst_gc(int *more)
714 {
715 	struct dst_entry *dst, *next, **pprev;
716 	int freed;
717 
718 	next = NULL;
719 	pprev = &ndisc_dst_gc_list;
720 	freed = 0;
721 	while ((dst = *pprev) != NULL) {
722 		if (!atomic_read(&dst->__refcnt)) {
723 			*pprev = dst->next;
724 			dst_free(dst);
725 			freed++;
726 		} else {
727 			pprev = &dst->next;
728 			(*more)++;
729 		}
730 	}
731 
732 	return freed;
733 }
734 
735 static int ip6_dst_gc(void)
736 {
737 	static unsigned expire = 30*HZ;
738 	static unsigned long last_gc;
739 	unsigned long now = jiffies;
740 
741 	if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
742 	    atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
743 		goto out;
744 
745 	expire++;
746 	fib6_run_gc(expire);
747 	last_gc = now;
748 	if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
749 		expire = ip6_rt_gc_timeout>>1;
750 
751 out:
752 	expire -= expire>>ip6_rt_gc_elasticity;
753 	return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
754 }
755 
756 /* Clean host part of a prefix. Not necessary in radix tree,
757    but results in cleaner routing tables.
758 
759    Remove it only when all the things will work!
760  */
761 
762 static int ipv6_get_mtu(struct net_device *dev)
763 {
764 	int mtu = IPV6_MIN_MTU;
765 	struct inet6_dev *idev;
766 
767 	idev = in6_dev_get(dev);
768 	if (idev) {
769 		mtu = idev->cnf.mtu6;
770 		in6_dev_put(idev);
771 	}
772 	return mtu;
773 }
774 
775 int ipv6_get_hoplimit(struct net_device *dev)
776 {
777 	int hoplimit = ipv6_devconf.hop_limit;
778 	struct inet6_dev *idev;
779 
780 	idev = in6_dev_get(dev);
781 	if (idev) {
782 		hoplimit = idev->cnf.hop_limit;
783 		in6_dev_put(idev);
784 	}
785 	return hoplimit;
786 }
787 
788 /*
789  *
790  */
791 
792 int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
793 		void *_rtattr, struct netlink_skb_parms *req)
794 {
795 	int err;
796 	struct rtmsg *r;
797 	struct rtattr **rta;
798 	struct rt6_info *rt = NULL;
799 	struct net_device *dev = NULL;
800 	struct inet6_dev *idev = NULL;
801 	int addr_type;
802 
803 	rta = (struct rtattr **) _rtattr;
804 
805 	if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
806 		return -EINVAL;
807 #ifndef CONFIG_IPV6_SUBTREES
808 	if (rtmsg->rtmsg_src_len)
809 		return -EINVAL;
810 #endif
811 	if (rtmsg->rtmsg_ifindex) {
812 		err = -ENODEV;
813 		dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
814 		if (!dev)
815 			goto out;
816 		idev = in6_dev_get(dev);
817 		if (!idev)
818 			goto out;
819 	}
820 
821 	if (rtmsg->rtmsg_metric == 0)
822 		rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
823 
824 	rt = ip6_dst_alloc();
825 
826 	if (rt == NULL) {
827 		err = -ENOMEM;
828 		goto out;
829 	}
830 
831 	rt->u.dst.obsolete = -1;
832 	rt->rt6i_expires = clock_t_to_jiffies(rtmsg->rtmsg_info);
833 	if (nlh && (r = NLMSG_DATA(nlh))) {
834 		rt->rt6i_protocol = r->rtm_protocol;
835 	} else {
836 		rt->rt6i_protocol = RTPROT_BOOT;
837 	}
838 
839 	addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
840 
841 	if (addr_type & IPV6_ADDR_MULTICAST)
842 		rt->u.dst.input = ip6_mc_input;
843 	else
844 		rt->u.dst.input = ip6_forward;
845 
846 	rt->u.dst.output = ip6_output;
847 
848 	ipv6_addr_prefix(&rt->rt6i_dst.addr,
849 			 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
850 	rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
851 	if (rt->rt6i_dst.plen == 128)
852 	       rt->u.dst.flags = DST_HOST;
853 
854 #ifdef CONFIG_IPV6_SUBTREES
855 	ipv6_addr_prefix(&rt->rt6i_src.addr,
856 			 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
857 	rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
858 #endif
859 
860 	rt->rt6i_metric = rtmsg->rtmsg_metric;
861 
862 	/* We cannot add true routes via loopback here,
863 	   they would result in kernel looping; promote them to reject routes
864 	 */
865 	if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
866 	    (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
867 		/* hold loopback dev/idev if we haven't done so. */
868 		if (dev != &loopback_dev) {
869 			if (dev) {
870 				dev_put(dev);
871 				in6_dev_put(idev);
872 			}
873 			dev = &loopback_dev;
874 			dev_hold(dev);
875 			idev = in6_dev_get(dev);
876 			if (!idev) {
877 				err = -ENODEV;
878 				goto out;
879 			}
880 		}
881 		rt->u.dst.output = ip6_pkt_discard_out;
882 		rt->u.dst.input = ip6_pkt_discard;
883 		rt->u.dst.error = -ENETUNREACH;
884 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
885 		goto install_route;
886 	}
887 
888 	if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
889 		struct in6_addr *gw_addr;
890 		int gwa_type;
891 
892 		gw_addr = &rtmsg->rtmsg_gateway;
893 		ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
894 		gwa_type = ipv6_addr_type(gw_addr);
895 
896 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
897 			struct rt6_info *grt;
898 
899 			/* IPv6 strictly inhibits using not link-local
900 			   addresses as nexthop address.
901 			   Otherwise, router will not able to send redirects.
902 			   It is very good, but in some (rare!) circumstances
903 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
904 			   some exceptions. --ANK
905 			 */
906 			err = -EINVAL;
907 			if (!(gwa_type&IPV6_ADDR_UNICAST))
908 				goto out;
909 
910 			grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
911 
912 			err = -EHOSTUNREACH;
913 			if (grt == NULL)
914 				goto out;
915 			if (dev) {
916 				if (dev != grt->rt6i_dev) {
917 					dst_release(&grt->u.dst);
918 					goto out;
919 				}
920 			} else {
921 				dev = grt->rt6i_dev;
922 				idev = grt->rt6i_idev;
923 				dev_hold(dev);
924 				in6_dev_hold(grt->rt6i_idev);
925 			}
926 			if (!(grt->rt6i_flags&RTF_GATEWAY))
927 				err = 0;
928 			dst_release(&grt->u.dst);
929 
930 			if (err)
931 				goto out;
932 		}
933 		err = -EINVAL;
934 		if (dev == NULL || (dev->flags&IFF_LOOPBACK))
935 			goto out;
936 	}
937 
938 	err = -ENODEV;
939 	if (dev == NULL)
940 		goto out;
941 
942 	if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
943 		rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
944 		if (IS_ERR(rt->rt6i_nexthop)) {
945 			err = PTR_ERR(rt->rt6i_nexthop);
946 			rt->rt6i_nexthop = NULL;
947 			goto out;
948 		}
949 	}
950 
951 	rt->rt6i_flags = rtmsg->rtmsg_flags;
952 
953 install_route:
954 	if (rta && rta[RTA_METRICS-1]) {
955 		int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
956 		struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
957 
958 		while (RTA_OK(attr, attrlen)) {
959 			unsigned flavor = attr->rta_type;
960 			if (flavor) {
961 				if (flavor > RTAX_MAX) {
962 					err = -EINVAL;
963 					goto out;
964 				}
965 				rt->u.dst.metrics[flavor-1] =
966 					*(u32 *)RTA_DATA(attr);
967 			}
968 			attr = RTA_NEXT(attr, attrlen);
969 		}
970 	}
971 
972 	if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
973 		rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
974 	if (!rt->u.dst.metrics[RTAX_MTU-1])
975 		rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
976 	if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
977 		rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
978 	rt->u.dst.dev = dev;
979 	rt->rt6i_idev = idev;
980 	return ip6_ins_rt(rt, nlh, _rtattr, req);
981 
982 out:
983 	if (dev)
984 		dev_put(dev);
985 	if (idev)
986 		in6_dev_put(idev);
987 	if (rt)
988 		dst_free((struct dst_entry *) rt);
989 	return err;
990 }
991 
992 int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
993 {
994 	int err;
995 
996 	write_lock_bh(&rt6_lock);
997 
998 	rt6_reset_dflt_pointer(NULL);
999 
1000 	err = fib6_del(rt, nlh, _rtattr, req);
1001 	dst_release(&rt->u.dst);
1002 
1003 	write_unlock_bh(&rt6_lock);
1004 
1005 	return err;
1006 }
1007 
1008 static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
1009 {
1010 	struct fib6_node *fn;
1011 	struct rt6_info *rt;
1012 	int err = -ESRCH;
1013 
1014 	read_lock_bh(&rt6_lock);
1015 
1016 	fn = fib6_locate(&ip6_routing_table,
1017 			 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
1018 			 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
1019 
1020 	if (fn) {
1021 		for (rt = fn->leaf; rt; rt = rt->u.next) {
1022 			if (rtmsg->rtmsg_ifindex &&
1023 			    (rt->rt6i_dev == NULL ||
1024 			     rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
1025 				continue;
1026 			if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
1027 			    !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
1028 				continue;
1029 			if (rtmsg->rtmsg_metric &&
1030 			    rtmsg->rtmsg_metric != rt->rt6i_metric)
1031 				continue;
1032 			dst_hold(&rt->u.dst);
1033 			read_unlock_bh(&rt6_lock);
1034 
1035 			return ip6_del_rt(rt, nlh, _rtattr, req);
1036 		}
1037 	}
1038 	read_unlock_bh(&rt6_lock);
1039 
1040 	return err;
1041 }
1042 
1043 /*
1044  *	Handle redirects
1045  */
1046 void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
1047 		  struct neighbour *neigh, u8 *lladdr, int on_link)
1048 {
1049 	struct rt6_info *rt, *nrt;
1050 
1051 	/* Locate old route to this destination. */
1052 	rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
1053 
1054 	if (rt == NULL)
1055 		return;
1056 
1057 	if (neigh->dev != rt->rt6i_dev)
1058 		goto out;
1059 
1060 	/*
1061 	 * Current route is on-link; redirect is always invalid.
1062 	 *
1063 	 * Seems, previous statement is not true. It could
1064 	 * be node, which looks for us as on-link (f.e. proxy ndisc)
1065 	 * But then router serving it might decide, that we should
1066 	 * know truth 8)8) --ANK (980726).
1067 	 */
1068 	if (!(rt->rt6i_flags&RTF_GATEWAY))
1069 		goto out;
1070 
1071 	/*
1072 	 *	RFC 2461 specifies that redirects should only be
1073 	 *	accepted if they come from the nexthop to the target.
1074 	 *	Due to the way default routers are chosen, this notion
1075 	 *	is a bit fuzzy and one might need to check all default
1076 	 *	routers.
1077 	 */
1078 	if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway)) {
1079 		if (rt->rt6i_flags & RTF_DEFAULT) {
1080 			struct rt6_info *rt1;
1081 
1082 			read_lock(&rt6_lock);
1083 			for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
1084 				if (ipv6_addr_equal(saddr, &rt1->rt6i_gateway)) {
1085 					dst_hold(&rt1->u.dst);
1086 					dst_release(&rt->u.dst);
1087 					read_unlock(&rt6_lock);
1088 					rt = rt1;
1089 					goto source_ok;
1090 				}
1091 			}
1092 			read_unlock(&rt6_lock);
1093 		}
1094 		if (net_ratelimit())
1095 			printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1096 			       "for redirect target\n");
1097 		goto out;
1098 	}
1099 
1100 source_ok:
1101 
1102 	/*
1103 	 *	We have finally decided to accept it.
1104 	 */
1105 
1106 	neigh_update(neigh, lladdr, NUD_STALE,
1107 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
1108 		     NEIGH_UPDATE_F_OVERRIDE|
1109 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1110 				     NEIGH_UPDATE_F_ISROUTER))
1111 		     );
1112 
1113 	/*
1114 	 * Redirect received -> path was valid.
1115 	 * Look, redirects are sent only in response to data packets,
1116 	 * so that this nexthop apparently is reachable. --ANK
1117 	 */
1118 	dst_confirm(&rt->u.dst);
1119 
1120 	/* Duplicate redirect: silently ignore. */
1121 	if (neigh == rt->u.dst.neighbour)
1122 		goto out;
1123 
1124 	nrt = ip6_rt_copy(rt);
1125 	if (nrt == NULL)
1126 		goto out;
1127 
1128 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1129 	if (on_link)
1130 		nrt->rt6i_flags &= ~RTF_GATEWAY;
1131 
1132 	ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1133 	nrt->rt6i_dst.plen = 128;
1134 	nrt->u.dst.flags |= DST_HOST;
1135 
1136 	ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1137 	nrt->rt6i_nexthop = neigh_clone(neigh);
1138 	/* Reset pmtu, it may be better */
1139 	nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1140 	nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1141 
1142 	if (ip6_ins_rt(nrt, NULL, NULL, NULL))
1143 		goto out;
1144 
1145 	if (rt->rt6i_flags&RTF_CACHE) {
1146 		ip6_del_rt(rt, NULL, NULL, NULL);
1147 		return;
1148 	}
1149 
1150 out:
1151         dst_release(&rt->u.dst);
1152 	return;
1153 }
1154 
1155 /*
1156  *	Handle ICMP "packet too big" messages
1157  *	i.e. Path MTU discovery
1158  */
1159 
1160 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1161 			struct net_device *dev, u32 pmtu)
1162 {
1163 	struct rt6_info *rt, *nrt;
1164 	int allfrag = 0;
1165 
1166 	rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1167 	if (rt == NULL)
1168 		return;
1169 
1170 	if (pmtu >= dst_mtu(&rt->u.dst))
1171 		goto out;
1172 
1173 	if (pmtu < IPV6_MIN_MTU) {
1174 		/*
1175 		 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1176 		 * MTU (1280) and a fragment header should always be included
1177 		 * after a node receiving Too Big message reporting PMTU is
1178 		 * less than the IPv6 Minimum Link MTU.
1179 		 */
1180 		pmtu = IPV6_MIN_MTU;
1181 		allfrag = 1;
1182 	}
1183 
1184 	/* New mtu received -> path was valid.
1185 	   They are sent only in response to data packets,
1186 	   so that this nexthop apparently is reachable. --ANK
1187 	 */
1188 	dst_confirm(&rt->u.dst);
1189 
1190 	/* Host route. If it is static, it would be better
1191 	   not to override it, but add new one, so that
1192 	   when cache entry will expire old pmtu
1193 	   would return automatically.
1194 	 */
1195 	if (rt->rt6i_flags & RTF_CACHE) {
1196 		rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1197 		if (allfrag)
1198 			rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1199 		dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1200 		rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1201 		goto out;
1202 	}
1203 
1204 	/* Network route.
1205 	   Two cases are possible:
1206 	   1. It is connected route. Action: COW
1207 	   2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1208 	 */
1209 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
1210 		nrt = rt6_cow(rt, daddr, saddr, NULL);
1211 		if (!nrt->u.dst.error) {
1212 			nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1213 			if (allfrag)
1214 				nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1215 			/* According to RFC 1981, detecting PMTU increase shouldn't be
1216 			   happened within 5 mins, the recommended timer is 10 mins.
1217 			   Here this route expiration time is set to ip6_rt_mtu_expires
1218 			   which is 10 mins. After 10 mins the decreased pmtu is expired
1219 			   and detecting PMTU increase will be automatically happened.
1220 			 */
1221 			dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1222 			nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1223 		}
1224 		dst_release(&nrt->u.dst);
1225 	} else {
1226 		nrt = ip6_rt_copy(rt);
1227 		if (nrt == NULL)
1228 			goto out;
1229 		ipv6_addr_copy(&nrt->rt6i_dst.addr, daddr);
1230 		nrt->rt6i_dst.plen = 128;
1231 		nrt->u.dst.flags |= DST_HOST;
1232 		nrt->rt6i_nexthop = neigh_clone(rt->rt6i_nexthop);
1233 		dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1234 		nrt->rt6i_flags |= RTF_DYNAMIC|RTF_CACHE|RTF_EXPIRES;
1235 		nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1236 		if (allfrag)
1237 			nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1238 		ip6_ins_rt(nrt, NULL, NULL, NULL);
1239 	}
1240 
1241 out:
1242 	dst_release(&rt->u.dst);
1243 }
1244 
1245 /*
1246  *	Misc support functions
1247  */
1248 
1249 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1250 {
1251 	struct rt6_info *rt = ip6_dst_alloc();
1252 
1253 	if (rt) {
1254 		rt->u.dst.input = ort->u.dst.input;
1255 		rt->u.dst.output = ort->u.dst.output;
1256 
1257 		memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1258 		rt->u.dst.dev = ort->u.dst.dev;
1259 		if (rt->u.dst.dev)
1260 			dev_hold(rt->u.dst.dev);
1261 		rt->rt6i_idev = ort->rt6i_idev;
1262 		if (rt->rt6i_idev)
1263 			in6_dev_hold(rt->rt6i_idev);
1264 		rt->u.dst.lastuse = jiffies;
1265 		rt->rt6i_expires = 0;
1266 
1267 		ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1268 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1269 		rt->rt6i_metric = 0;
1270 
1271 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1272 #ifdef CONFIG_IPV6_SUBTREES
1273 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1274 #endif
1275 	}
1276 	return rt;
1277 }
1278 
1279 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1280 {
1281 	struct rt6_info *rt;
1282 	struct fib6_node *fn;
1283 
1284 	fn = &ip6_routing_table;
1285 
1286 	write_lock_bh(&rt6_lock);
1287 	for (rt = fn->leaf; rt; rt=rt->u.next) {
1288 		if (dev == rt->rt6i_dev &&
1289 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
1290 			break;
1291 	}
1292 	if (rt)
1293 		dst_hold(&rt->u.dst);
1294 	write_unlock_bh(&rt6_lock);
1295 	return rt;
1296 }
1297 
1298 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1299 				     struct net_device *dev)
1300 {
1301 	struct in6_rtmsg rtmsg;
1302 
1303 	memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1304 	rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1305 	ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1306 	rtmsg.rtmsg_metric = 1024;
1307 	rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES;
1308 
1309 	rtmsg.rtmsg_ifindex = dev->ifindex;
1310 
1311 	ip6_route_add(&rtmsg, NULL, NULL, NULL);
1312 	return rt6_get_dflt_router(gwaddr, dev);
1313 }
1314 
1315 void rt6_purge_dflt_routers(void)
1316 {
1317 	struct rt6_info *rt;
1318 
1319 restart:
1320 	read_lock_bh(&rt6_lock);
1321 	for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1322 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1323 			dst_hold(&rt->u.dst);
1324 
1325 			rt6_reset_dflt_pointer(NULL);
1326 
1327 			read_unlock_bh(&rt6_lock);
1328 
1329 			ip6_del_rt(rt, NULL, NULL, NULL);
1330 
1331 			goto restart;
1332 		}
1333 	}
1334 	read_unlock_bh(&rt6_lock);
1335 }
1336 
1337 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1338 {
1339 	struct in6_rtmsg rtmsg;
1340 	int err;
1341 
1342 	switch(cmd) {
1343 	case SIOCADDRT:		/* Add a route */
1344 	case SIOCDELRT:		/* Delete a route */
1345 		if (!capable(CAP_NET_ADMIN))
1346 			return -EPERM;
1347 		err = copy_from_user(&rtmsg, arg,
1348 				     sizeof(struct in6_rtmsg));
1349 		if (err)
1350 			return -EFAULT;
1351 
1352 		rtnl_lock();
1353 		switch (cmd) {
1354 		case SIOCADDRT:
1355 			err = ip6_route_add(&rtmsg, NULL, NULL, NULL);
1356 			break;
1357 		case SIOCDELRT:
1358 			err = ip6_route_del(&rtmsg, NULL, NULL, NULL);
1359 			break;
1360 		default:
1361 			err = -EINVAL;
1362 		}
1363 		rtnl_unlock();
1364 
1365 		return err;
1366 	};
1367 
1368 	return -EINVAL;
1369 }
1370 
1371 /*
1372  *	Drop the packet on the floor
1373  */
1374 
1375 static int ip6_pkt_discard(struct sk_buff *skb)
1376 {
1377 	IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1378 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1379 	kfree_skb(skb);
1380 	return 0;
1381 }
1382 
1383 static int ip6_pkt_discard_out(struct sk_buff *skb)
1384 {
1385 	skb->dev = skb->dst->dev;
1386 	return ip6_pkt_discard(skb);
1387 }
1388 
1389 /*
1390  *	Allocate a dst for local (unicast / anycast) address.
1391  */
1392 
1393 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1394 				    const struct in6_addr *addr,
1395 				    int anycast)
1396 {
1397 	struct rt6_info *rt = ip6_dst_alloc();
1398 
1399 	if (rt == NULL)
1400 		return ERR_PTR(-ENOMEM);
1401 
1402 	dev_hold(&loopback_dev);
1403 	in6_dev_hold(idev);
1404 
1405 	rt->u.dst.flags = DST_HOST;
1406 	rt->u.dst.input = ip6_input;
1407 	rt->u.dst.output = ip6_output;
1408 	rt->rt6i_dev = &loopback_dev;
1409 	rt->rt6i_idev = idev;
1410 	rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1411 	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1412 	rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1413 	rt->u.dst.obsolete = -1;
1414 
1415 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1416 	if (!anycast)
1417 		rt->rt6i_flags |= RTF_LOCAL;
1418 	rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1419 	if (rt->rt6i_nexthop == NULL) {
1420 		dst_free((struct dst_entry *) rt);
1421 		return ERR_PTR(-ENOMEM);
1422 	}
1423 
1424 	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1425 	rt->rt6i_dst.plen = 128;
1426 
1427 	atomic_set(&rt->u.dst.__refcnt, 1);
1428 
1429 	return rt;
1430 }
1431 
1432 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1433 {
1434 	if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1435 	    rt != &ip6_null_entry) {
1436 		RT6_TRACE("deleted by ifdown %p\n", rt);
1437 		return -1;
1438 	}
1439 	return 0;
1440 }
1441 
1442 void rt6_ifdown(struct net_device *dev)
1443 {
1444 	write_lock_bh(&rt6_lock);
1445 	fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1446 	write_unlock_bh(&rt6_lock);
1447 }
1448 
1449 struct rt6_mtu_change_arg
1450 {
1451 	struct net_device *dev;
1452 	unsigned mtu;
1453 };
1454 
1455 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1456 {
1457 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1458 	struct inet6_dev *idev;
1459 
1460 	/* In IPv6 pmtu discovery is not optional,
1461 	   so that RTAX_MTU lock cannot disable it.
1462 	   We still use this lock to block changes
1463 	   caused by addrconf/ndisc.
1464 	*/
1465 
1466 	idev = __in6_dev_get(arg->dev);
1467 	if (idev == NULL)
1468 		return 0;
1469 
1470 	/* For administrative MTU increase, there is no way to discover
1471 	   IPv6 PMTU increase, so PMTU increase should be updated here.
1472 	   Since RFC 1981 doesn't include administrative MTU increase
1473 	   update PMTU increase is a MUST. (i.e. jumbo frame)
1474 	 */
1475 	/*
1476 	   If new MTU is less than route PMTU, this new MTU will be the
1477 	   lowest MTU in the path, update the route PMTU to reflect PMTU
1478 	   decreases; if new MTU is greater than route PMTU, and the
1479 	   old MTU is the lowest MTU in the path, update the route PMTU
1480 	   to reflect the increase. In this case if the other nodes' MTU
1481 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
1482 	   PMTU discouvery.
1483 	 */
1484 	if (rt->rt6i_dev == arg->dev &&
1485 	    !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1486             (dst_mtu(&rt->u.dst) > arg->mtu ||
1487              (dst_mtu(&rt->u.dst) < arg->mtu &&
1488 	      dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1489 		rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1490 	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1491 	return 0;
1492 }
1493 
1494 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1495 {
1496 	struct rt6_mtu_change_arg arg;
1497 
1498 	arg.dev = dev;
1499 	arg.mtu = mtu;
1500 	read_lock_bh(&rt6_lock);
1501 	fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1502 	read_unlock_bh(&rt6_lock);
1503 }
1504 
1505 static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1506 			      struct in6_rtmsg *rtmsg)
1507 {
1508 	memset(rtmsg, 0, sizeof(*rtmsg));
1509 
1510 	rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1511 	rtmsg->rtmsg_src_len = r->rtm_src_len;
1512 	rtmsg->rtmsg_flags = RTF_UP;
1513 	if (r->rtm_type == RTN_UNREACHABLE)
1514 		rtmsg->rtmsg_flags |= RTF_REJECT;
1515 
1516 	if (rta[RTA_GATEWAY-1]) {
1517 		if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1518 			return -EINVAL;
1519 		memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1520 		rtmsg->rtmsg_flags |= RTF_GATEWAY;
1521 	}
1522 	if (rta[RTA_DST-1]) {
1523 		if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1524 			return -EINVAL;
1525 		memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1526 	}
1527 	if (rta[RTA_SRC-1]) {
1528 		if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1529 			return -EINVAL;
1530 		memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1531 	}
1532 	if (rta[RTA_OIF-1]) {
1533 		if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1534 			return -EINVAL;
1535 		memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1536 	}
1537 	if (rta[RTA_PRIORITY-1]) {
1538 		if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1539 			return -EINVAL;
1540 		memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1541 	}
1542 	return 0;
1543 }
1544 
1545 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1546 {
1547 	struct rtmsg *r = NLMSG_DATA(nlh);
1548 	struct in6_rtmsg rtmsg;
1549 
1550 	if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1551 		return -EINVAL;
1552 	return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1553 }
1554 
1555 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1556 {
1557 	struct rtmsg *r = NLMSG_DATA(nlh);
1558 	struct in6_rtmsg rtmsg;
1559 
1560 	if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1561 		return -EINVAL;
1562 	return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1563 }
1564 
1565 struct rt6_rtnl_dump_arg
1566 {
1567 	struct sk_buff *skb;
1568 	struct netlink_callback *cb;
1569 };
1570 
1571 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1572 			 struct in6_addr *dst, struct in6_addr *src,
1573 			 int iif, int type, u32 pid, u32 seq,
1574 			 int prefix, unsigned int flags)
1575 {
1576 	struct rtmsg *rtm;
1577 	struct nlmsghdr  *nlh;
1578 	unsigned char	 *b = skb->tail;
1579 	struct rta_cacheinfo ci;
1580 
1581 	if (prefix) {	/* user wants prefix routes only */
1582 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1583 			/* success since this is not a prefix route */
1584 			return 1;
1585 		}
1586 	}
1587 
1588 	nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags);
1589 	rtm = NLMSG_DATA(nlh);
1590 	rtm->rtm_family = AF_INET6;
1591 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
1592 	rtm->rtm_src_len = rt->rt6i_src.plen;
1593 	rtm->rtm_tos = 0;
1594 	rtm->rtm_table = RT_TABLE_MAIN;
1595 	if (rt->rt6i_flags&RTF_REJECT)
1596 		rtm->rtm_type = RTN_UNREACHABLE;
1597 	else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1598 		rtm->rtm_type = RTN_LOCAL;
1599 	else
1600 		rtm->rtm_type = RTN_UNICAST;
1601 	rtm->rtm_flags = 0;
1602 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1603 	rtm->rtm_protocol = rt->rt6i_protocol;
1604 	if (rt->rt6i_flags&RTF_DYNAMIC)
1605 		rtm->rtm_protocol = RTPROT_REDIRECT;
1606 	else if (rt->rt6i_flags & RTF_ADDRCONF)
1607 		rtm->rtm_protocol = RTPROT_KERNEL;
1608 	else if (rt->rt6i_flags&RTF_DEFAULT)
1609 		rtm->rtm_protocol = RTPROT_RA;
1610 
1611 	if (rt->rt6i_flags&RTF_CACHE)
1612 		rtm->rtm_flags |= RTM_F_CLONED;
1613 
1614 	if (dst) {
1615 		RTA_PUT(skb, RTA_DST, 16, dst);
1616 	        rtm->rtm_dst_len = 128;
1617 	} else if (rtm->rtm_dst_len)
1618 		RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1619 #ifdef CONFIG_IPV6_SUBTREES
1620 	if (src) {
1621 		RTA_PUT(skb, RTA_SRC, 16, src);
1622 	        rtm->rtm_src_len = 128;
1623 	} else if (rtm->rtm_src_len)
1624 		RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1625 #endif
1626 	if (iif)
1627 		RTA_PUT(skb, RTA_IIF, 4, &iif);
1628 	else if (dst) {
1629 		struct in6_addr saddr_buf;
1630 		if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1631 			RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1632 	}
1633 	if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1634 		goto rtattr_failure;
1635 	if (rt->u.dst.neighbour)
1636 		RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1637 	if (rt->u.dst.dev)
1638 		RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1639 	RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1640 	ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1641 	if (rt->rt6i_expires)
1642 		ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1643 	else
1644 		ci.rta_expires = 0;
1645 	ci.rta_used = rt->u.dst.__use;
1646 	ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1647 	ci.rta_error = rt->u.dst.error;
1648 	ci.rta_id = 0;
1649 	ci.rta_ts = 0;
1650 	ci.rta_tsage = 0;
1651 	RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1652 	nlh->nlmsg_len = skb->tail - b;
1653 	return skb->len;
1654 
1655 nlmsg_failure:
1656 rtattr_failure:
1657 	skb_trim(skb, b - skb->data);
1658 	return -1;
1659 }
1660 
1661 static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1662 {
1663 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1664 	int prefix;
1665 
1666 	if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1667 		struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1668 		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1669 	} else
1670 		prefix = 0;
1671 
1672 	return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1673 		     NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
1674 		     prefix, NLM_F_MULTI);
1675 }
1676 
1677 static int fib6_dump_node(struct fib6_walker_t *w)
1678 {
1679 	int res;
1680 	struct rt6_info *rt;
1681 
1682 	for (rt = w->leaf; rt; rt = rt->u.next) {
1683 		res = rt6_dump_route(rt, w->args);
1684 		if (res < 0) {
1685 			/* Frame is full, suspend walking */
1686 			w->leaf = rt;
1687 			return 1;
1688 		}
1689 		BUG_TRAP(res!=0);
1690 	}
1691 	w->leaf = NULL;
1692 	return 0;
1693 }
1694 
1695 static void fib6_dump_end(struct netlink_callback *cb)
1696 {
1697 	struct fib6_walker_t *w = (void*)cb->args[0];
1698 
1699 	if (w) {
1700 		cb->args[0] = 0;
1701 		fib6_walker_unlink(w);
1702 		kfree(w);
1703 	}
1704 	if (cb->args[1]) {
1705 		cb->done = (void*)cb->args[1];
1706 		cb->args[1] = 0;
1707 	}
1708 }
1709 
1710 static int fib6_dump_done(struct netlink_callback *cb)
1711 {
1712 	fib6_dump_end(cb);
1713 	return cb->done(cb);
1714 }
1715 
1716 int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1717 {
1718 	struct rt6_rtnl_dump_arg arg;
1719 	struct fib6_walker_t *w;
1720 	int res;
1721 
1722 	arg.skb = skb;
1723 	arg.cb = cb;
1724 
1725 	w = (void*)cb->args[0];
1726 	if (w == NULL) {
1727 		/* New dump:
1728 		 *
1729 		 * 1. hook callback destructor.
1730 		 */
1731 		cb->args[1] = (long)cb->done;
1732 		cb->done = fib6_dump_done;
1733 
1734 		/*
1735 		 * 2. allocate and initialize walker.
1736 		 */
1737 		w = kmalloc(sizeof(*w), GFP_ATOMIC);
1738 		if (w == NULL)
1739 			return -ENOMEM;
1740 		RT6_TRACE("dump<%p", w);
1741 		memset(w, 0, sizeof(*w));
1742 		w->root = &ip6_routing_table;
1743 		w->func = fib6_dump_node;
1744 		w->args = &arg;
1745 		cb->args[0] = (long)w;
1746 		read_lock_bh(&rt6_lock);
1747 		res = fib6_walk(w);
1748 		read_unlock_bh(&rt6_lock);
1749 	} else {
1750 		w->args = &arg;
1751 		read_lock_bh(&rt6_lock);
1752 		res = fib6_walk_continue(w);
1753 		read_unlock_bh(&rt6_lock);
1754 	}
1755 #if RT6_DEBUG >= 3
1756 	if (res <= 0 && skb->len == 0)
1757 		RT6_TRACE("%p>dump end\n", w);
1758 #endif
1759 	res = res < 0 ? res : skb->len;
1760 	/* res < 0 is an error. (really, impossible)
1761 	   res == 0 means that dump is complete, but skb still can contain data.
1762 	   res > 0 dump is not complete, but frame is full.
1763 	 */
1764 	/* Destroy walker, if dump of this table is complete. */
1765 	if (res <= 0)
1766 		fib6_dump_end(cb);
1767 	return res;
1768 }
1769 
1770 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1771 {
1772 	struct rtattr **rta = arg;
1773 	int iif = 0;
1774 	int err = -ENOBUFS;
1775 	struct sk_buff *skb;
1776 	struct flowi fl;
1777 	struct rt6_info *rt;
1778 
1779 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1780 	if (skb == NULL)
1781 		goto out;
1782 
1783 	/* Reserve room for dummy headers, this skb can pass
1784 	   through good chunk of routing engine.
1785 	 */
1786 	skb->mac.raw = skb->data;
1787 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1788 
1789 	memset(&fl, 0, sizeof(fl));
1790 	if (rta[RTA_SRC-1])
1791 		ipv6_addr_copy(&fl.fl6_src,
1792 			       (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1793 	if (rta[RTA_DST-1])
1794 		ipv6_addr_copy(&fl.fl6_dst,
1795 			       (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1796 
1797 	if (rta[RTA_IIF-1])
1798 		memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1799 
1800 	if (iif) {
1801 		struct net_device *dev;
1802 		dev = __dev_get_by_index(iif);
1803 		if (!dev) {
1804 			err = -ENODEV;
1805 			goto out_free;
1806 		}
1807 	}
1808 
1809 	fl.oif = 0;
1810 	if (rta[RTA_OIF-1])
1811 		memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1812 
1813 	rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1814 
1815 	skb->dst = &rt->u.dst;
1816 
1817 	NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1818 	err = rt6_fill_node(skb, rt,
1819 			    &fl.fl6_dst, &fl.fl6_src,
1820 			    iif,
1821 			    RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
1822 			    nlh->nlmsg_seq, 0, 0);
1823 	if (err < 0) {
1824 		err = -EMSGSIZE;
1825 		goto out_free;
1826 	}
1827 
1828 	err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1829 	if (err > 0)
1830 		err = 0;
1831 out:
1832 	return err;
1833 out_free:
1834 	kfree_skb(skb);
1835 	goto out;
1836 }
1837 
1838 void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh,
1839 			struct netlink_skb_parms *req)
1840 {
1841 	struct sk_buff *skb;
1842 	int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
1843 	u32 pid = current->pid;
1844 	u32 seq = 0;
1845 
1846 	if (req)
1847 		pid = req->pid;
1848 	if (nlh)
1849 		seq = nlh->nlmsg_seq;
1850 
1851 	skb = alloc_skb(size, gfp_any());
1852 	if (!skb) {
1853 		netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS);
1854 		return;
1855 	}
1856 	if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) {
1857 		kfree_skb(skb);
1858 		netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL);
1859 		return;
1860 	}
1861 	NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE;
1862 	netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any());
1863 }
1864 
1865 /*
1866  *	/proc
1867  */
1868 
1869 #ifdef CONFIG_PROC_FS
1870 
1871 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1872 
1873 struct rt6_proc_arg
1874 {
1875 	char *buffer;
1876 	int offset;
1877 	int length;
1878 	int skip;
1879 	int len;
1880 };
1881 
1882 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
1883 {
1884 	struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1885 	int i;
1886 
1887 	if (arg->skip < arg->offset / RT6_INFO_LEN) {
1888 		arg->skip++;
1889 		return 0;
1890 	}
1891 
1892 	if (arg->len >= arg->length)
1893 		return 0;
1894 
1895 	for (i=0; i<16; i++) {
1896 		sprintf(arg->buffer + arg->len, "%02x",
1897 			rt->rt6i_dst.addr.s6_addr[i]);
1898 		arg->len += 2;
1899 	}
1900 	arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1901 			    rt->rt6i_dst.plen);
1902 
1903 #ifdef CONFIG_IPV6_SUBTREES
1904 	for (i=0; i<16; i++) {
1905 		sprintf(arg->buffer + arg->len, "%02x",
1906 			rt->rt6i_src.addr.s6_addr[i]);
1907 		arg->len += 2;
1908 	}
1909 	arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1910 			    rt->rt6i_src.plen);
1911 #else
1912 	sprintf(arg->buffer + arg->len,
1913 		"00000000000000000000000000000000 00 ");
1914 	arg->len += 36;
1915 #endif
1916 
1917 	if (rt->rt6i_nexthop) {
1918 		for (i=0; i<16; i++) {
1919 			sprintf(arg->buffer + arg->len, "%02x",
1920 				rt->rt6i_nexthop->primary_key[i]);
1921 			arg->len += 2;
1922 		}
1923 	} else {
1924 		sprintf(arg->buffer + arg->len,
1925 			"00000000000000000000000000000000");
1926 		arg->len += 32;
1927 	}
1928 	arg->len += sprintf(arg->buffer + arg->len,
1929 			    " %08x %08x %08x %08x %8s\n",
1930 			    rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1931 			    rt->u.dst.__use, rt->rt6i_flags,
1932 			    rt->rt6i_dev ? rt->rt6i_dev->name : "");
1933 	return 0;
1934 }
1935 
1936 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
1937 {
1938 	struct rt6_proc_arg arg;
1939 	arg.buffer = buffer;
1940 	arg.offset = offset;
1941 	arg.length = length;
1942 	arg.skip = 0;
1943 	arg.len = 0;
1944 
1945 	read_lock_bh(&rt6_lock);
1946 	fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
1947 	read_unlock_bh(&rt6_lock);
1948 
1949 	*start = buffer;
1950 	if (offset)
1951 		*start += offset % RT6_INFO_LEN;
1952 
1953 	arg.len -= offset % RT6_INFO_LEN;
1954 
1955 	if (arg.len > length)
1956 		arg.len = length;
1957 	if (arg.len < 0)
1958 		arg.len = 0;
1959 
1960 	return arg.len;
1961 }
1962 
1963 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
1964 {
1965 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
1966 		      rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
1967 		      rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
1968 		      rt6_stats.fib_rt_cache,
1969 		      atomic_read(&ip6_dst_ops.entries),
1970 		      rt6_stats.fib_discarded_routes);
1971 
1972 	return 0;
1973 }
1974 
1975 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
1976 {
1977 	return single_open(file, rt6_stats_seq_show, NULL);
1978 }
1979 
1980 static struct file_operations rt6_stats_seq_fops = {
1981 	.owner	 = THIS_MODULE,
1982 	.open	 = rt6_stats_seq_open,
1983 	.read	 = seq_read,
1984 	.llseek	 = seq_lseek,
1985 	.release = single_release,
1986 };
1987 #endif	/* CONFIG_PROC_FS */
1988 
1989 #ifdef CONFIG_SYSCTL
1990 
1991 static int flush_delay;
1992 
1993 static
1994 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
1995 			      void __user *buffer, size_t *lenp, loff_t *ppos)
1996 {
1997 	if (write) {
1998 		proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1999 		fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2000 		return 0;
2001 	} else
2002 		return -EINVAL;
2003 }
2004 
2005 ctl_table ipv6_route_table[] = {
2006         {
2007 		.ctl_name	=	NET_IPV6_ROUTE_FLUSH,
2008 		.procname	=	"flush",
2009          	.data		=	&flush_delay,
2010 		.maxlen		=	sizeof(int),
2011 		.mode		=	0200,
2012          	.proc_handler	=	&ipv6_sysctl_rtcache_flush
2013 	},
2014 	{
2015 		.ctl_name	=	NET_IPV6_ROUTE_GC_THRESH,
2016 		.procname	=	"gc_thresh",
2017          	.data		=	&ip6_dst_ops.gc_thresh,
2018 		.maxlen		=	sizeof(int),
2019 		.mode		=	0644,
2020          	.proc_handler	=	&proc_dointvec,
2021 	},
2022 	{
2023 		.ctl_name	=	NET_IPV6_ROUTE_MAX_SIZE,
2024 		.procname	=	"max_size",
2025          	.data		=	&ip6_rt_max_size,
2026 		.maxlen		=	sizeof(int),
2027 		.mode		=	0644,
2028          	.proc_handler	=	&proc_dointvec,
2029 	},
2030 	{
2031 		.ctl_name	=	NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2032 		.procname	=	"gc_min_interval",
2033          	.data		=	&ip6_rt_gc_min_interval,
2034 		.maxlen		=	sizeof(int),
2035 		.mode		=	0644,
2036          	.proc_handler	=	&proc_dointvec_jiffies,
2037 		.strategy	=	&sysctl_jiffies,
2038 	},
2039 	{
2040 		.ctl_name	=	NET_IPV6_ROUTE_GC_TIMEOUT,
2041 		.procname	=	"gc_timeout",
2042          	.data		=	&ip6_rt_gc_timeout,
2043 		.maxlen		=	sizeof(int),
2044 		.mode		=	0644,
2045          	.proc_handler	=	&proc_dointvec_jiffies,
2046 		.strategy	=	&sysctl_jiffies,
2047 	},
2048 	{
2049 		.ctl_name	=	NET_IPV6_ROUTE_GC_INTERVAL,
2050 		.procname	=	"gc_interval",
2051          	.data		=	&ip6_rt_gc_interval,
2052 		.maxlen		=	sizeof(int),
2053 		.mode		=	0644,
2054          	.proc_handler	=	&proc_dointvec_jiffies,
2055 		.strategy	=	&sysctl_jiffies,
2056 	},
2057 	{
2058 		.ctl_name	=	NET_IPV6_ROUTE_GC_ELASTICITY,
2059 		.procname	=	"gc_elasticity",
2060          	.data		=	&ip6_rt_gc_elasticity,
2061 		.maxlen		=	sizeof(int),
2062 		.mode		=	0644,
2063          	.proc_handler	=	&proc_dointvec_jiffies,
2064 		.strategy	=	&sysctl_jiffies,
2065 	},
2066 	{
2067 		.ctl_name	=	NET_IPV6_ROUTE_MTU_EXPIRES,
2068 		.procname	=	"mtu_expires",
2069          	.data		=	&ip6_rt_mtu_expires,
2070 		.maxlen		=	sizeof(int),
2071 		.mode		=	0644,
2072          	.proc_handler	=	&proc_dointvec_jiffies,
2073 		.strategy	=	&sysctl_jiffies,
2074 	},
2075 	{
2076 		.ctl_name	=	NET_IPV6_ROUTE_MIN_ADVMSS,
2077 		.procname	=	"min_adv_mss",
2078          	.data		=	&ip6_rt_min_advmss,
2079 		.maxlen		=	sizeof(int),
2080 		.mode		=	0644,
2081          	.proc_handler	=	&proc_dointvec_jiffies,
2082 		.strategy	=	&sysctl_jiffies,
2083 	},
2084 	{
2085 		.ctl_name	=	NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2086 		.procname	=	"gc_min_interval_ms",
2087          	.data		=	&ip6_rt_gc_min_interval,
2088 		.maxlen		=	sizeof(int),
2089 		.mode		=	0644,
2090          	.proc_handler	=	&proc_dointvec_ms_jiffies,
2091 		.strategy	=	&sysctl_ms_jiffies,
2092 	},
2093 	{ .ctl_name = 0 }
2094 };
2095 
2096 #endif
2097 
2098 void __init ip6_route_init(void)
2099 {
2100 	struct proc_dir_entry *p;
2101 
2102 	ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2103 						     sizeof(struct rt6_info),
2104 						     0, SLAB_HWCACHE_ALIGN,
2105 						     NULL, NULL);
2106 	if (!ip6_dst_ops.kmem_cachep)
2107 		panic("cannot create ip6_dst_cache");
2108 
2109 	fib6_init();
2110 #ifdef 	CONFIG_PROC_FS
2111 	p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2112 	if (p)
2113 		p->owner = THIS_MODULE;
2114 
2115 	proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2116 #endif
2117 #ifdef CONFIG_XFRM
2118 	xfrm6_init();
2119 #endif
2120 }
2121 
2122 void ip6_route_cleanup(void)
2123 {
2124 #ifdef CONFIG_PROC_FS
2125 	proc_net_remove("ipv6_route");
2126 	proc_net_remove("rt6_stats");
2127 #endif
2128 #ifdef CONFIG_XFRM
2129 	xfrm6_fini();
2130 #endif
2131 	rt6_ifdown(NULL);
2132 	fib6_gc_cleanup();
2133 	kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2134 }
2135