xref: /openbmc/linux/net/ipv6/route.c (revision c21b37f6)
1 /*
2  *	Linux INET6 implementation
3  *	FIB front-end.
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	$Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9  *
10  *	This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  */
15 
16 /*	Changes:
17  *
18  *	YOSHIFUJI Hideaki @USAGI
19  *		reworked default router selection.
20  *		- respect outgoing interface
21  *		- select from (probably) reachable routers (i.e.
22  *		routers in REACHABLE, STALE, DELAY or PROBE states).
23  *		- always select the same router if it is (probably)
24  *		reachable.  otherwise, round-robin the list.
25  *	Ville Nuorvala
26  *		Fixed routing subtrees.
27  */
28 
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/types.h>
32 #include <linux/times.h>
33 #include <linux/socket.h>
34 #include <linux/sockios.h>
35 #include <linux/net.h>
36 #include <linux/route.h>
37 #include <linux/netdevice.h>
38 #include <linux/in6.h>
39 #include <linux/init.h>
40 #include <linux/if_arp.h>
41 
42 #ifdef 	CONFIG_PROC_FS
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #endif
46 
47 #include <net/snmp.h>
48 #include <net/ipv6.h>
49 #include <net/ip6_fib.h>
50 #include <net/ip6_route.h>
51 #include <net/ndisc.h>
52 #include <net/addrconf.h>
53 #include <net/tcp.h>
54 #include <linux/rtnetlink.h>
55 #include <net/dst.h>
56 #include <net/xfrm.h>
57 #include <net/netevent.h>
58 #include <net/netlink.h>
59 
60 #include <asm/uaccess.h>
61 
62 #ifdef CONFIG_SYSCTL
63 #include <linux/sysctl.h>
64 #endif
65 
66 /* Set to 3 to get tracing. */
67 #define RT6_DEBUG 2
68 
69 #if RT6_DEBUG >= 3
70 #define RDBG(x) printk x
71 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
72 #else
73 #define RDBG(x)
74 #define RT6_TRACE(x...) do { ; } while (0)
75 #endif
76 
77 #define CLONE_OFFLINK_ROUTE 0
78 
79 static int ip6_rt_max_size = 4096;
80 static int ip6_rt_gc_min_interval = HZ / 2;
81 static int ip6_rt_gc_timeout = 60*HZ;
82 int ip6_rt_gc_interval = 30*HZ;
83 static int ip6_rt_gc_elasticity = 9;
84 static int ip6_rt_mtu_expires = 10*60*HZ;
85 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
86 
87 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
88 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
89 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
90 static void		ip6_dst_destroy(struct dst_entry *);
91 static void		ip6_dst_ifdown(struct dst_entry *,
92 				       struct net_device *dev, int how);
93 static int		 ip6_dst_gc(void);
94 
95 static int		ip6_pkt_discard(struct sk_buff *skb);
96 static int		ip6_pkt_discard_out(struct sk_buff *skb);
97 static void		ip6_link_failure(struct sk_buff *skb);
98 static void		ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
99 
100 #ifdef CONFIG_IPV6_ROUTE_INFO
101 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
102 					   struct in6_addr *gwaddr, int ifindex,
103 					   unsigned pref);
104 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
105 					   struct in6_addr *gwaddr, int ifindex);
106 #endif
107 
108 static struct dst_ops ip6_dst_ops = {
109 	.family			=	AF_INET6,
110 	.protocol		=	__constant_htons(ETH_P_IPV6),
111 	.gc			=	ip6_dst_gc,
112 	.gc_thresh		=	1024,
113 	.check			=	ip6_dst_check,
114 	.destroy		=	ip6_dst_destroy,
115 	.ifdown			=	ip6_dst_ifdown,
116 	.negative_advice	=	ip6_negative_advice,
117 	.link_failure		=	ip6_link_failure,
118 	.update_pmtu		=	ip6_rt_update_pmtu,
119 	.entry_size		=	sizeof(struct rt6_info),
120 };
121 
122 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
123 {
124 }
125 
126 static struct dst_ops ip6_dst_blackhole_ops = {
127 	.family			=	AF_INET6,
128 	.protocol		=	__constant_htons(ETH_P_IPV6),
129 	.destroy		=	ip6_dst_destroy,
130 	.check			=	ip6_dst_check,
131 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
132 	.entry_size		=	sizeof(struct rt6_info),
133 };
134 
135 struct rt6_info ip6_null_entry = {
136 	.u = {
137 		.dst = {
138 			.__refcnt	= ATOMIC_INIT(1),
139 			.__use		= 1,
140 			.dev		= &loopback_dev,
141 			.obsolete	= -1,
142 			.error		= -ENETUNREACH,
143 			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
144 			.input		= ip6_pkt_discard,
145 			.output		= ip6_pkt_discard_out,
146 			.ops		= &ip6_dst_ops,
147 			.path		= (struct dst_entry*)&ip6_null_entry,
148 		}
149 	},
150 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
151 	.rt6i_metric	= ~(u32) 0,
152 	.rt6i_ref	= ATOMIC_INIT(1),
153 };
154 
155 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
156 
157 static int ip6_pkt_prohibit(struct sk_buff *skb);
158 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
159 static int ip6_pkt_blk_hole(struct sk_buff *skb);
160 
161 struct rt6_info ip6_prohibit_entry = {
162 	.u = {
163 		.dst = {
164 			.__refcnt	= ATOMIC_INIT(1),
165 			.__use		= 1,
166 			.dev		= &loopback_dev,
167 			.obsolete	= -1,
168 			.error		= -EACCES,
169 			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
170 			.input		= ip6_pkt_prohibit,
171 			.output		= ip6_pkt_prohibit_out,
172 			.ops		= &ip6_dst_ops,
173 			.path		= (struct dst_entry*)&ip6_prohibit_entry,
174 		}
175 	},
176 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
177 	.rt6i_metric	= ~(u32) 0,
178 	.rt6i_ref	= ATOMIC_INIT(1),
179 };
180 
181 struct rt6_info ip6_blk_hole_entry = {
182 	.u = {
183 		.dst = {
184 			.__refcnt	= ATOMIC_INIT(1),
185 			.__use		= 1,
186 			.dev		= &loopback_dev,
187 			.obsolete	= -1,
188 			.error		= -EINVAL,
189 			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
190 			.input		= ip6_pkt_blk_hole,
191 			.output		= ip6_pkt_blk_hole,
192 			.ops		= &ip6_dst_ops,
193 			.path		= (struct dst_entry*)&ip6_blk_hole_entry,
194 		}
195 	},
196 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
197 	.rt6i_metric	= ~(u32) 0,
198 	.rt6i_ref	= ATOMIC_INIT(1),
199 };
200 
201 #endif
202 
203 /* allocate dst with ip6_dst_ops */
204 static __inline__ struct rt6_info *ip6_dst_alloc(void)
205 {
206 	return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
207 }
208 
209 static void ip6_dst_destroy(struct dst_entry *dst)
210 {
211 	struct rt6_info *rt = (struct rt6_info *)dst;
212 	struct inet6_dev *idev = rt->rt6i_idev;
213 
214 	if (idev != NULL) {
215 		rt->rt6i_idev = NULL;
216 		in6_dev_put(idev);
217 	}
218 }
219 
220 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
221 			   int how)
222 {
223 	struct rt6_info *rt = (struct rt6_info *)dst;
224 	struct inet6_dev *idev = rt->rt6i_idev;
225 
226 	if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
227 		struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
228 		if (loopback_idev != NULL) {
229 			rt->rt6i_idev = loopback_idev;
230 			in6_dev_put(idev);
231 		}
232 	}
233 }
234 
235 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
236 {
237 	return (rt->rt6i_flags & RTF_EXPIRES &&
238 		time_after(jiffies, rt->rt6i_expires));
239 }
240 
241 static inline int rt6_need_strict(struct in6_addr *daddr)
242 {
243 	return (ipv6_addr_type(daddr) &
244 		(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
245 }
246 
247 /*
248  *	Route lookup. Any table->tb6_lock is implied.
249  */
250 
251 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
252 						    int oif,
253 						    int strict)
254 {
255 	struct rt6_info *local = NULL;
256 	struct rt6_info *sprt;
257 
258 	if (oif) {
259 		for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
260 			struct net_device *dev = sprt->rt6i_dev;
261 			if (dev->ifindex == oif)
262 				return sprt;
263 			if (dev->flags & IFF_LOOPBACK) {
264 				if (sprt->rt6i_idev == NULL ||
265 				    sprt->rt6i_idev->dev->ifindex != oif) {
266 					if (strict && oif)
267 						continue;
268 					if (local && (!oif ||
269 						      local->rt6i_idev->dev->ifindex == oif))
270 						continue;
271 				}
272 				local = sprt;
273 			}
274 		}
275 
276 		if (local)
277 			return local;
278 
279 		if (strict)
280 			return &ip6_null_entry;
281 	}
282 	return rt;
283 }
284 
285 #ifdef CONFIG_IPV6_ROUTER_PREF
286 static void rt6_probe(struct rt6_info *rt)
287 {
288 	struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
289 	/*
290 	 * Okay, this does not seem to be appropriate
291 	 * for now, however, we need to check if it
292 	 * is really so; aka Router Reachability Probing.
293 	 *
294 	 * Router Reachability Probe MUST be rate-limited
295 	 * to no more than one per minute.
296 	 */
297 	if (!neigh || (neigh->nud_state & NUD_VALID))
298 		return;
299 	read_lock_bh(&neigh->lock);
300 	if (!(neigh->nud_state & NUD_VALID) &&
301 	    time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
302 		struct in6_addr mcaddr;
303 		struct in6_addr *target;
304 
305 		neigh->updated = jiffies;
306 		read_unlock_bh(&neigh->lock);
307 
308 		target = (struct in6_addr *)&neigh->primary_key;
309 		addrconf_addr_solict_mult(target, &mcaddr);
310 		ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
311 	} else
312 		read_unlock_bh(&neigh->lock);
313 }
314 #else
315 static inline void rt6_probe(struct rt6_info *rt)
316 {
317 	return;
318 }
319 #endif
320 
321 /*
322  * Default Router Selection (RFC 2461 6.3.6)
323  */
324 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
325 {
326 	struct net_device *dev = rt->rt6i_dev;
327 	if (!oif || dev->ifindex == oif)
328 		return 2;
329 	if ((dev->flags & IFF_LOOPBACK) &&
330 	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
331 		return 1;
332 	return 0;
333 }
334 
335 static inline int rt6_check_neigh(struct rt6_info *rt)
336 {
337 	struct neighbour *neigh = rt->rt6i_nexthop;
338 	int m = 0;
339 	if (rt->rt6i_flags & RTF_NONEXTHOP ||
340 	    !(rt->rt6i_flags & RTF_GATEWAY))
341 		m = 1;
342 	else if (neigh) {
343 		read_lock_bh(&neigh->lock);
344 		if (neigh->nud_state & NUD_VALID)
345 			m = 2;
346 		else if (!(neigh->nud_state & NUD_FAILED))
347 			m = 1;
348 		read_unlock_bh(&neigh->lock);
349 	}
350 	return m;
351 }
352 
353 static int rt6_score_route(struct rt6_info *rt, int oif,
354 			   int strict)
355 {
356 	int m, n;
357 
358 	m = rt6_check_dev(rt, oif);
359 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
360 		return -1;
361 #ifdef CONFIG_IPV6_ROUTER_PREF
362 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
363 #endif
364 	n = rt6_check_neigh(rt);
365 	if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
366 		return -1;
367 	return m;
368 }
369 
370 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
371 				   int *mpri, struct rt6_info *match)
372 {
373 	int m;
374 
375 	if (rt6_check_expired(rt))
376 		goto out;
377 
378 	m = rt6_score_route(rt, oif, strict);
379 	if (m < 0)
380 		goto out;
381 
382 	if (m > *mpri) {
383 		if (strict & RT6_LOOKUP_F_REACHABLE)
384 			rt6_probe(match);
385 		*mpri = m;
386 		match = rt;
387 	} else if (strict & RT6_LOOKUP_F_REACHABLE) {
388 		rt6_probe(rt);
389 	}
390 
391 out:
392 	return match;
393 }
394 
395 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
396 				     struct rt6_info *rr_head,
397 				     u32 metric, int oif, int strict)
398 {
399 	struct rt6_info *rt, *match;
400 	int mpri = -1;
401 
402 	match = NULL;
403 	for (rt = rr_head; rt && rt->rt6i_metric == metric;
404 	     rt = rt->u.dst.rt6_next)
405 		match = find_match(rt, oif, strict, &mpri, match);
406 	for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
407 	     rt = rt->u.dst.rt6_next)
408 		match = find_match(rt, oif, strict, &mpri, match);
409 
410 	return match;
411 }
412 
413 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
414 {
415 	struct rt6_info *match, *rt0;
416 
417 	RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
418 		  __FUNCTION__, fn->leaf, oif);
419 
420 	rt0 = fn->rr_ptr;
421 	if (!rt0)
422 		fn->rr_ptr = rt0 = fn->leaf;
423 
424 	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
425 
426 	if (!match &&
427 	    (strict & RT6_LOOKUP_F_REACHABLE)) {
428 		struct rt6_info *next = rt0->u.dst.rt6_next;
429 
430 		/* no entries matched; do round-robin */
431 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
432 			next = fn->leaf;
433 
434 		if (next != rt0)
435 			fn->rr_ptr = next;
436 	}
437 
438 	RT6_TRACE("%s() => %p\n",
439 		  __FUNCTION__, match);
440 
441 	return (match ? match : &ip6_null_entry);
442 }
443 
444 #ifdef CONFIG_IPV6_ROUTE_INFO
445 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
446 		  struct in6_addr *gwaddr)
447 {
448 	struct route_info *rinfo = (struct route_info *) opt;
449 	struct in6_addr prefix_buf, *prefix;
450 	unsigned int pref;
451 	u32 lifetime;
452 	struct rt6_info *rt;
453 
454 	if (len < sizeof(struct route_info)) {
455 		return -EINVAL;
456 	}
457 
458 	/* Sanity check for prefix_len and length */
459 	if (rinfo->length > 3) {
460 		return -EINVAL;
461 	} else if (rinfo->prefix_len > 128) {
462 		return -EINVAL;
463 	} else if (rinfo->prefix_len > 64) {
464 		if (rinfo->length < 2) {
465 			return -EINVAL;
466 		}
467 	} else if (rinfo->prefix_len > 0) {
468 		if (rinfo->length < 1) {
469 			return -EINVAL;
470 		}
471 	}
472 
473 	pref = rinfo->route_pref;
474 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
475 		pref = ICMPV6_ROUTER_PREF_MEDIUM;
476 
477 	lifetime = ntohl(rinfo->lifetime);
478 	if (lifetime == 0xffffffff) {
479 		/* infinity */
480 	} else if (lifetime > 0x7fffffff/HZ) {
481 		/* Avoid arithmetic overflow */
482 		lifetime = 0x7fffffff/HZ - 1;
483 	}
484 
485 	if (rinfo->length == 3)
486 		prefix = (struct in6_addr *)rinfo->prefix;
487 	else {
488 		/* this function is safe */
489 		ipv6_addr_prefix(&prefix_buf,
490 				 (struct in6_addr *)rinfo->prefix,
491 				 rinfo->prefix_len);
492 		prefix = &prefix_buf;
493 	}
494 
495 	rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
496 
497 	if (rt && !lifetime) {
498 		ip6_del_rt(rt);
499 		rt = NULL;
500 	}
501 
502 	if (!rt && lifetime)
503 		rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
504 					pref);
505 	else if (rt)
506 		rt->rt6i_flags = RTF_ROUTEINFO |
507 				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
508 
509 	if (rt) {
510 		if (lifetime == 0xffffffff) {
511 			rt->rt6i_flags &= ~RTF_EXPIRES;
512 		} else {
513 			rt->rt6i_expires = jiffies + HZ * lifetime;
514 			rt->rt6i_flags |= RTF_EXPIRES;
515 		}
516 		dst_release(&rt->u.dst);
517 	}
518 	return 0;
519 }
520 #endif
521 
522 #define BACKTRACK(saddr) \
523 do { \
524 	if (rt == &ip6_null_entry) { \
525 		struct fib6_node *pn; \
526 		while (1) { \
527 			if (fn->fn_flags & RTN_TL_ROOT) \
528 				goto out; \
529 			pn = fn->parent; \
530 			if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
531 				fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
532 			else \
533 				fn = pn; \
534 			if (fn->fn_flags & RTN_RTINFO) \
535 				goto restart; \
536 		} \
537 	} \
538 } while(0)
539 
540 static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
541 					     struct flowi *fl, int flags)
542 {
543 	struct fib6_node *fn;
544 	struct rt6_info *rt;
545 
546 	read_lock_bh(&table->tb6_lock);
547 	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
548 restart:
549 	rt = fn->leaf;
550 	rt = rt6_device_match(rt, fl->oif, flags);
551 	BACKTRACK(&fl->fl6_src);
552 out:
553 	dst_hold(&rt->u.dst);
554 	read_unlock_bh(&table->tb6_lock);
555 
556 	rt->u.dst.lastuse = jiffies;
557 	rt->u.dst.__use++;
558 
559 	return rt;
560 
561 }
562 
563 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
564 			    int oif, int strict)
565 {
566 	struct flowi fl = {
567 		.oif = oif,
568 		.nl_u = {
569 			.ip6_u = {
570 				.daddr = *daddr,
571 			},
572 		},
573 	};
574 	struct dst_entry *dst;
575 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
576 
577 	if (saddr) {
578 		memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
579 		flags |= RT6_LOOKUP_F_HAS_SADDR;
580 	}
581 
582 	dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup);
583 	if (dst->error == 0)
584 		return (struct rt6_info *) dst;
585 
586 	dst_release(dst);
587 
588 	return NULL;
589 }
590 
591 EXPORT_SYMBOL(rt6_lookup);
592 
593 /* ip6_ins_rt is called with FREE table->tb6_lock.
594    It takes new route entry, the addition fails by any reason the
595    route is freed. In any case, if caller does not hold it, it may
596    be destroyed.
597  */
598 
599 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
600 {
601 	int err;
602 	struct fib6_table *table;
603 
604 	table = rt->rt6i_table;
605 	write_lock_bh(&table->tb6_lock);
606 	err = fib6_add(&table->tb6_root, rt, info);
607 	write_unlock_bh(&table->tb6_lock);
608 
609 	return err;
610 }
611 
612 int ip6_ins_rt(struct rt6_info *rt)
613 {
614 	return __ip6_ins_rt(rt, NULL);
615 }
616 
617 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
618 				      struct in6_addr *saddr)
619 {
620 	struct rt6_info *rt;
621 
622 	/*
623 	 *	Clone the route.
624 	 */
625 
626 	rt = ip6_rt_copy(ort);
627 
628 	if (rt) {
629 		if (!(rt->rt6i_flags&RTF_GATEWAY)) {
630 			if (rt->rt6i_dst.plen != 128 &&
631 			    ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
632 				rt->rt6i_flags |= RTF_ANYCAST;
633 			ipv6_addr_copy(&rt->rt6i_gateway, daddr);
634 		}
635 
636 		ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
637 		rt->rt6i_dst.plen = 128;
638 		rt->rt6i_flags |= RTF_CACHE;
639 		rt->u.dst.flags |= DST_HOST;
640 
641 #ifdef CONFIG_IPV6_SUBTREES
642 		if (rt->rt6i_src.plen && saddr) {
643 			ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
644 			rt->rt6i_src.plen = 128;
645 		}
646 #endif
647 
648 		rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
649 
650 	}
651 
652 	return rt;
653 }
654 
655 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
656 {
657 	struct rt6_info *rt = ip6_rt_copy(ort);
658 	if (rt) {
659 		ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
660 		rt->rt6i_dst.plen = 128;
661 		rt->rt6i_flags |= RTF_CACHE;
662 		rt->u.dst.flags |= DST_HOST;
663 		rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
664 	}
665 	return rt;
666 }
667 
668 static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
669 					    struct flowi *fl, int flags)
670 {
671 	struct fib6_node *fn;
672 	struct rt6_info *rt, *nrt;
673 	int strict = 0;
674 	int attempts = 3;
675 	int err;
676 	int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
677 
678 	strict |= flags & RT6_LOOKUP_F_IFACE;
679 
680 relookup:
681 	read_lock_bh(&table->tb6_lock);
682 
683 restart_2:
684 	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
685 
686 restart:
687 	rt = rt6_select(fn, fl->iif, strict | reachable);
688 	BACKTRACK(&fl->fl6_src);
689 	if (rt == &ip6_null_entry ||
690 	    rt->rt6i_flags & RTF_CACHE)
691 		goto out;
692 
693 	dst_hold(&rt->u.dst);
694 	read_unlock_bh(&table->tb6_lock);
695 
696 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
697 		nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
698 	else {
699 #if CLONE_OFFLINK_ROUTE
700 		nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
701 #else
702 		goto out2;
703 #endif
704 	}
705 
706 	dst_release(&rt->u.dst);
707 	rt = nrt ? : &ip6_null_entry;
708 
709 	dst_hold(&rt->u.dst);
710 	if (nrt) {
711 		err = ip6_ins_rt(nrt);
712 		if (!err)
713 			goto out2;
714 	}
715 
716 	if (--attempts <= 0)
717 		goto out2;
718 
719 	/*
720 	 * Race condition! In the gap, when table->tb6_lock was
721 	 * released someone could insert this route.  Relookup.
722 	 */
723 	dst_release(&rt->u.dst);
724 	goto relookup;
725 
726 out:
727 	if (reachable) {
728 		reachable = 0;
729 		goto restart_2;
730 	}
731 	dst_hold(&rt->u.dst);
732 	read_unlock_bh(&table->tb6_lock);
733 out2:
734 	rt->u.dst.lastuse = jiffies;
735 	rt->u.dst.__use++;
736 
737 	return rt;
738 }
739 
740 void ip6_route_input(struct sk_buff *skb)
741 {
742 	struct ipv6hdr *iph = ipv6_hdr(skb);
743 	int flags = RT6_LOOKUP_F_HAS_SADDR;
744 	struct flowi fl = {
745 		.iif = skb->dev->ifindex,
746 		.nl_u = {
747 			.ip6_u = {
748 				.daddr = iph->daddr,
749 				.saddr = iph->saddr,
750 				.flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
751 			},
752 		},
753 		.mark = skb->mark,
754 		.proto = iph->nexthdr,
755 	};
756 
757 	if (rt6_need_strict(&iph->daddr))
758 		flags |= RT6_LOOKUP_F_IFACE;
759 
760 	skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input);
761 }
762 
763 static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
764 					     struct flowi *fl, int flags)
765 {
766 	struct fib6_node *fn;
767 	struct rt6_info *rt, *nrt;
768 	int strict = 0;
769 	int attempts = 3;
770 	int err;
771 	int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
772 
773 	strict |= flags & RT6_LOOKUP_F_IFACE;
774 
775 relookup:
776 	read_lock_bh(&table->tb6_lock);
777 
778 restart_2:
779 	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
780 
781 restart:
782 	rt = rt6_select(fn, fl->oif, strict | reachable);
783 	BACKTRACK(&fl->fl6_src);
784 	if (rt == &ip6_null_entry ||
785 	    rt->rt6i_flags & RTF_CACHE)
786 		goto out;
787 
788 	dst_hold(&rt->u.dst);
789 	read_unlock_bh(&table->tb6_lock);
790 
791 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
792 		nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
793 	else {
794 #if CLONE_OFFLINK_ROUTE
795 		nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
796 #else
797 		goto out2;
798 #endif
799 	}
800 
801 	dst_release(&rt->u.dst);
802 	rt = nrt ? : &ip6_null_entry;
803 
804 	dst_hold(&rt->u.dst);
805 	if (nrt) {
806 		err = ip6_ins_rt(nrt);
807 		if (!err)
808 			goto out2;
809 	}
810 
811 	if (--attempts <= 0)
812 		goto out2;
813 
814 	/*
815 	 * Race condition! In the gap, when table->tb6_lock was
816 	 * released someone could insert this route.  Relookup.
817 	 */
818 	dst_release(&rt->u.dst);
819 	goto relookup;
820 
821 out:
822 	if (reachable) {
823 		reachable = 0;
824 		goto restart_2;
825 	}
826 	dst_hold(&rt->u.dst);
827 	read_unlock_bh(&table->tb6_lock);
828 out2:
829 	rt->u.dst.lastuse = jiffies;
830 	rt->u.dst.__use++;
831 	return rt;
832 }
833 
834 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
835 {
836 	int flags = 0;
837 
838 	if (rt6_need_strict(&fl->fl6_dst))
839 		flags |= RT6_LOOKUP_F_IFACE;
840 
841 	if (!ipv6_addr_any(&fl->fl6_src))
842 		flags |= RT6_LOOKUP_F_HAS_SADDR;
843 
844 	return fib6_rule_lookup(fl, flags, ip6_pol_route_output);
845 }
846 
847 EXPORT_SYMBOL(ip6_route_output);
848 
849 static int ip6_blackhole_output(struct sk_buff *skb)
850 {
851 	kfree_skb(skb);
852 	return 0;
853 }
854 
855 int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
856 {
857 	struct rt6_info *ort = (struct rt6_info *) *dstp;
858 	struct rt6_info *rt = (struct rt6_info *)
859 		dst_alloc(&ip6_dst_blackhole_ops);
860 	struct dst_entry *new = NULL;
861 
862 	if (rt) {
863 		new = &rt->u.dst;
864 
865 		atomic_set(&new->__refcnt, 1);
866 		new->__use = 1;
867 		new->input = ip6_blackhole_output;
868 		new->output = ip6_blackhole_output;
869 
870 		memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
871 		new->dev = ort->u.dst.dev;
872 		if (new->dev)
873 			dev_hold(new->dev);
874 		rt->rt6i_idev = ort->rt6i_idev;
875 		if (rt->rt6i_idev)
876 			in6_dev_hold(rt->rt6i_idev);
877 		rt->rt6i_expires = 0;
878 
879 		ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
880 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
881 		rt->rt6i_metric = 0;
882 
883 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
884 #ifdef CONFIG_IPV6_SUBTREES
885 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
886 #endif
887 
888 		dst_free(new);
889 	}
890 
891 	dst_release(*dstp);
892 	*dstp = new;
893 	return (new ? 0 : -ENOMEM);
894 }
895 EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
896 
897 /*
898  *	Destination cache support functions
899  */
900 
901 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
902 {
903 	struct rt6_info *rt;
904 
905 	rt = (struct rt6_info *) dst;
906 
907 	if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
908 		return dst;
909 
910 	return NULL;
911 }
912 
913 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
914 {
915 	struct rt6_info *rt = (struct rt6_info *) dst;
916 
917 	if (rt) {
918 		if (rt->rt6i_flags & RTF_CACHE)
919 			ip6_del_rt(rt);
920 		else
921 			dst_release(dst);
922 	}
923 	return NULL;
924 }
925 
926 static void ip6_link_failure(struct sk_buff *skb)
927 {
928 	struct rt6_info *rt;
929 
930 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
931 
932 	rt = (struct rt6_info *) skb->dst;
933 	if (rt) {
934 		if (rt->rt6i_flags&RTF_CACHE) {
935 			dst_set_expires(&rt->u.dst, 0);
936 			rt->rt6i_flags |= RTF_EXPIRES;
937 		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
938 			rt->rt6i_node->fn_sernum = -1;
939 	}
940 }
941 
942 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
943 {
944 	struct rt6_info *rt6 = (struct rt6_info*)dst;
945 
946 	if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
947 		rt6->rt6i_flags |= RTF_MODIFIED;
948 		if (mtu < IPV6_MIN_MTU) {
949 			mtu = IPV6_MIN_MTU;
950 			dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
951 		}
952 		dst->metrics[RTAX_MTU-1] = mtu;
953 		call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
954 	}
955 }
956 
957 static int ipv6_get_mtu(struct net_device *dev);
958 
959 static inline unsigned int ipv6_advmss(unsigned int mtu)
960 {
961 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
962 
963 	if (mtu < ip6_rt_min_advmss)
964 		mtu = ip6_rt_min_advmss;
965 
966 	/*
967 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
968 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
969 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
970 	 * rely only on pmtu discovery"
971 	 */
972 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
973 		mtu = IPV6_MAXPLEN;
974 	return mtu;
975 }
976 
977 static struct dst_entry *ndisc_dst_gc_list;
978 static DEFINE_SPINLOCK(ndisc_lock);
979 
980 struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
981 				  struct neighbour *neigh,
982 				  struct in6_addr *addr,
983 				  int (*output)(struct sk_buff *))
984 {
985 	struct rt6_info *rt;
986 	struct inet6_dev *idev = in6_dev_get(dev);
987 
988 	if (unlikely(idev == NULL))
989 		return NULL;
990 
991 	rt = ip6_dst_alloc();
992 	if (unlikely(rt == NULL)) {
993 		in6_dev_put(idev);
994 		goto out;
995 	}
996 
997 	dev_hold(dev);
998 	if (neigh)
999 		neigh_hold(neigh);
1000 	else
1001 		neigh = ndisc_get_neigh(dev, addr);
1002 
1003 	rt->rt6i_dev	  = dev;
1004 	rt->rt6i_idev     = idev;
1005 	rt->rt6i_nexthop  = neigh;
1006 	atomic_set(&rt->u.dst.__refcnt, 1);
1007 	rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
1008 	rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1009 	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1010 	rt->u.dst.output  = output;
1011 
1012 #if 0	/* there's no chance to use these for ndisc */
1013 	rt->u.dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
1014 				? DST_HOST
1015 				: 0;
1016 	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1017 	rt->rt6i_dst.plen = 128;
1018 #endif
1019 
1020 	spin_lock_bh(&ndisc_lock);
1021 	rt->u.dst.next = ndisc_dst_gc_list;
1022 	ndisc_dst_gc_list = &rt->u.dst;
1023 	spin_unlock_bh(&ndisc_lock);
1024 
1025 	fib6_force_start_gc();
1026 
1027 out:
1028 	return &rt->u.dst;
1029 }
1030 
1031 int ndisc_dst_gc(int *more)
1032 {
1033 	struct dst_entry *dst, *next, **pprev;
1034 	int freed;
1035 
1036 	next = NULL;
1037 	freed = 0;
1038 
1039 	spin_lock_bh(&ndisc_lock);
1040 	pprev = &ndisc_dst_gc_list;
1041 
1042 	while ((dst = *pprev) != NULL) {
1043 		if (!atomic_read(&dst->__refcnt)) {
1044 			*pprev = dst->next;
1045 			dst_free(dst);
1046 			freed++;
1047 		} else {
1048 			pprev = &dst->next;
1049 			(*more)++;
1050 		}
1051 	}
1052 
1053 	spin_unlock_bh(&ndisc_lock);
1054 
1055 	return freed;
1056 }
1057 
1058 static int ip6_dst_gc(void)
1059 {
1060 	static unsigned expire = 30*HZ;
1061 	static unsigned long last_gc;
1062 	unsigned long now = jiffies;
1063 
1064 	if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
1065 	    atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
1066 		goto out;
1067 
1068 	expire++;
1069 	fib6_run_gc(expire);
1070 	last_gc = now;
1071 	if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
1072 		expire = ip6_rt_gc_timeout>>1;
1073 
1074 out:
1075 	expire -= expire>>ip6_rt_gc_elasticity;
1076 	return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
1077 }
1078 
1079 /* Clean host part of a prefix. Not necessary in radix tree,
1080    but results in cleaner routing tables.
1081 
1082    Remove it only when all the things will work!
1083  */
1084 
1085 static int ipv6_get_mtu(struct net_device *dev)
1086 {
1087 	int mtu = IPV6_MIN_MTU;
1088 	struct inet6_dev *idev;
1089 
1090 	idev = in6_dev_get(dev);
1091 	if (idev) {
1092 		mtu = idev->cnf.mtu6;
1093 		in6_dev_put(idev);
1094 	}
1095 	return mtu;
1096 }
1097 
1098 int ipv6_get_hoplimit(struct net_device *dev)
1099 {
1100 	int hoplimit = ipv6_devconf.hop_limit;
1101 	struct inet6_dev *idev;
1102 
1103 	idev = in6_dev_get(dev);
1104 	if (idev) {
1105 		hoplimit = idev->cnf.hop_limit;
1106 		in6_dev_put(idev);
1107 	}
1108 	return hoplimit;
1109 }
1110 
1111 /*
1112  *
1113  */
1114 
1115 int ip6_route_add(struct fib6_config *cfg)
1116 {
1117 	int err;
1118 	struct rt6_info *rt = NULL;
1119 	struct net_device *dev = NULL;
1120 	struct inet6_dev *idev = NULL;
1121 	struct fib6_table *table;
1122 	int addr_type;
1123 
1124 	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1125 		return -EINVAL;
1126 #ifndef CONFIG_IPV6_SUBTREES
1127 	if (cfg->fc_src_len)
1128 		return -EINVAL;
1129 #endif
1130 	if (cfg->fc_ifindex) {
1131 		err = -ENODEV;
1132 		dev = dev_get_by_index(cfg->fc_ifindex);
1133 		if (!dev)
1134 			goto out;
1135 		idev = in6_dev_get(dev);
1136 		if (!idev)
1137 			goto out;
1138 	}
1139 
1140 	if (cfg->fc_metric == 0)
1141 		cfg->fc_metric = IP6_RT_PRIO_USER;
1142 
1143 	table = fib6_new_table(cfg->fc_table);
1144 	if (table == NULL) {
1145 		err = -ENOBUFS;
1146 		goto out;
1147 	}
1148 
1149 	rt = ip6_dst_alloc();
1150 
1151 	if (rt == NULL) {
1152 		err = -ENOMEM;
1153 		goto out;
1154 	}
1155 
1156 	rt->u.dst.obsolete = -1;
1157 	rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires);
1158 
1159 	if (cfg->fc_protocol == RTPROT_UNSPEC)
1160 		cfg->fc_protocol = RTPROT_BOOT;
1161 	rt->rt6i_protocol = cfg->fc_protocol;
1162 
1163 	addr_type = ipv6_addr_type(&cfg->fc_dst);
1164 
1165 	if (addr_type & IPV6_ADDR_MULTICAST)
1166 		rt->u.dst.input = ip6_mc_input;
1167 	else
1168 		rt->u.dst.input = ip6_forward;
1169 
1170 	rt->u.dst.output = ip6_output;
1171 
1172 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1173 	rt->rt6i_dst.plen = cfg->fc_dst_len;
1174 	if (rt->rt6i_dst.plen == 128)
1175 	       rt->u.dst.flags = DST_HOST;
1176 
1177 #ifdef CONFIG_IPV6_SUBTREES
1178 	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1179 	rt->rt6i_src.plen = cfg->fc_src_len;
1180 #endif
1181 
1182 	rt->rt6i_metric = cfg->fc_metric;
1183 
1184 	/* We cannot add true routes via loopback here,
1185 	   they would result in kernel looping; promote them to reject routes
1186 	 */
1187 	if ((cfg->fc_flags & RTF_REJECT) ||
1188 	    (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1189 		/* hold loopback dev/idev if we haven't done so. */
1190 		if (dev != &loopback_dev) {
1191 			if (dev) {
1192 				dev_put(dev);
1193 				in6_dev_put(idev);
1194 			}
1195 			dev = &loopback_dev;
1196 			dev_hold(dev);
1197 			idev = in6_dev_get(dev);
1198 			if (!idev) {
1199 				err = -ENODEV;
1200 				goto out;
1201 			}
1202 		}
1203 		rt->u.dst.output = ip6_pkt_discard_out;
1204 		rt->u.dst.input = ip6_pkt_discard;
1205 		rt->u.dst.error = -ENETUNREACH;
1206 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1207 		goto install_route;
1208 	}
1209 
1210 	if (cfg->fc_flags & RTF_GATEWAY) {
1211 		struct in6_addr *gw_addr;
1212 		int gwa_type;
1213 
1214 		gw_addr = &cfg->fc_gateway;
1215 		ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1216 		gwa_type = ipv6_addr_type(gw_addr);
1217 
1218 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1219 			struct rt6_info *grt;
1220 
1221 			/* IPv6 strictly inhibits using not link-local
1222 			   addresses as nexthop address.
1223 			   Otherwise, router will not able to send redirects.
1224 			   It is very good, but in some (rare!) circumstances
1225 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
1226 			   some exceptions. --ANK
1227 			 */
1228 			err = -EINVAL;
1229 			if (!(gwa_type&IPV6_ADDR_UNICAST))
1230 				goto out;
1231 
1232 			grt = rt6_lookup(gw_addr, NULL, cfg->fc_ifindex, 1);
1233 
1234 			err = -EHOSTUNREACH;
1235 			if (grt == NULL)
1236 				goto out;
1237 			if (dev) {
1238 				if (dev != grt->rt6i_dev) {
1239 					dst_release(&grt->u.dst);
1240 					goto out;
1241 				}
1242 			} else {
1243 				dev = grt->rt6i_dev;
1244 				idev = grt->rt6i_idev;
1245 				dev_hold(dev);
1246 				in6_dev_hold(grt->rt6i_idev);
1247 			}
1248 			if (!(grt->rt6i_flags&RTF_GATEWAY))
1249 				err = 0;
1250 			dst_release(&grt->u.dst);
1251 
1252 			if (err)
1253 				goto out;
1254 		}
1255 		err = -EINVAL;
1256 		if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1257 			goto out;
1258 	}
1259 
1260 	err = -ENODEV;
1261 	if (dev == NULL)
1262 		goto out;
1263 
1264 	if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1265 		rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1266 		if (IS_ERR(rt->rt6i_nexthop)) {
1267 			err = PTR_ERR(rt->rt6i_nexthop);
1268 			rt->rt6i_nexthop = NULL;
1269 			goto out;
1270 		}
1271 	}
1272 
1273 	rt->rt6i_flags = cfg->fc_flags;
1274 
1275 install_route:
1276 	if (cfg->fc_mx) {
1277 		struct nlattr *nla;
1278 		int remaining;
1279 
1280 		nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1281 			int type = nla->nla_type;
1282 
1283 			if (type) {
1284 				if (type > RTAX_MAX) {
1285 					err = -EINVAL;
1286 					goto out;
1287 				}
1288 
1289 				rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
1290 			}
1291 		}
1292 	}
1293 
1294 	if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1295 		rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1296 	if (!rt->u.dst.metrics[RTAX_MTU-1])
1297 		rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1298 	if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1299 		rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1300 	rt->u.dst.dev = dev;
1301 	rt->rt6i_idev = idev;
1302 	rt->rt6i_table = table;
1303 	return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1304 
1305 out:
1306 	if (dev)
1307 		dev_put(dev);
1308 	if (idev)
1309 		in6_dev_put(idev);
1310 	if (rt)
1311 		dst_free(&rt->u.dst);
1312 	return err;
1313 }
1314 
1315 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1316 {
1317 	int err;
1318 	struct fib6_table *table;
1319 
1320 	if (rt == &ip6_null_entry)
1321 		return -ENOENT;
1322 
1323 	table = rt->rt6i_table;
1324 	write_lock_bh(&table->tb6_lock);
1325 
1326 	err = fib6_del(rt, info);
1327 	dst_release(&rt->u.dst);
1328 
1329 	write_unlock_bh(&table->tb6_lock);
1330 
1331 	return err;
1332 }
1333 
1334 int ip6_del_rt(struct rt6_info *rt)
1335 {
1336 	return __ip6_del_rt(rt, NULL);
1337 }
1338 
1339 static int ip6_route_del(struct fib6_config *cfg)
1340 {
1341 	struct fib6_table *table;
1342 	struct fib6_node *fn;
1343 	struct rt6_info *rt;
1344 	int err = -ESRCH;
1345 
1346 	table = fib6_get_table(cfg->fc_table);
1347 	if (table == NULL)
1348 		return err;
1349 
1350 	read_lock_bh(&table->tb6_lock);
1351 
1352 	fn = fib6_locate(&table->tb6_root,
1353 			 &cfg->fc_dst, cfg->fc_dst_len,
1354 			 &cfg->fc_src, cfg->fc_src_len);
1355 
1356 	if (fn) {
1357 		for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1358 			if (cfg->fc_ifindex &&
1359 			    (rt->rt6i_dev == NULL ||
1360 			     rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1361 				continue;
1362 			if (cfg->fc_flags & RTF_GATEWAY &&
1363 			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1364 				continue;
1365 			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1366 				continue;
1367 			dst_hold(&rt->u.dst);
1368 			read_unlock_bh(&table->tb6_lock);
1369 
1370 			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1371 		}
1372 	}
1373 	read_unlock_bh(&table->tb6_lock);
1374 
1375 	return err;
1376 }
1377 
1378 /*
1379  *	Handle redirects
1380  */
1381 struct ip6rd_flowi {
1382 	struct flowi fl;
1383 	struct in6_addr gateway;
1384 };
1385 
1386 static struct rt6_info *__ip6_route_redirect(struct fib6_table *table,
1387 					     struct flowi *fl,
1388 					     int flags)
1389 {
1390 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1391 	struct rt6_info *rt;
1392 	struct fib6_node *fn;
1393 
1394 	/*
1395 	 * Get the "current" route for this destination and
1396 	 * check if the redirect has come from approriate router.
1397 	 *
1398 	 * RFC 2461 specifies that redirects should only be
1399 	 * accepted if they come from the nexthop to the target.
1400 	 * Due to the way the routes are chosen, this notion
1401 	 * is a bit fuzzy and one might need to check all possible
1402 	 * routes.
1403 	 */
1404 
1405 	read_lock_bh(&table->tb6_lock);
1406 	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1407 restart:
1408 	for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1409 		/*
1410 		 * Current route is on-link; redirect is always invalid.
1411 		 *
1412 		 * Seems, previous statement is not true. It could
1413 		 * be node, which looks for us as on-link (f.e. proxy ndisc)
1414 		 * But then router serving it might decide, that we should
1415 		 * know truth 8)8) --ANK (980726).
1416 		 */
1417 		if (rt6_check_expired(rt))
1418 			continue;
1419 		if (!(rt->rt6i_flags & RTF_GATEWAY))
1420 			continue;
1421 		if (fl->oif != rt->rt6i_dev->ifindex)
1422 			continue;
1423 		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1424 			continue;
1425 		break;
1426 	}
1427 
1428 	if (!rt)
1429 		rt = &ip6_null_entry;
1430 	BACKTRACK(&fl->fl6_src);
1431 out:
1432 	dst_hold(&rt->u.dst);
1433 
1434 	read_unlock_bh(&table->tb6_lock);
1435 
1436 	return rt;
1437 };
1438 
1439 static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1440 					   struct in6_addr *src,
1441 					   struct in6_addr *gateway,
1442 					   struct net_device *dev)
1443 {
1444 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1445 	struct ip6rd_flowi rdfl = {
1446 		.fl = {
1447 			.oif = dev->ifindex,
1448 			.nl_u = {
1449 				.ip6_u = {
1450 					.daddr = *dest,
1451 					.saddr = *src,
1452 				},
1453 			},
1454 		},
1455 		.gateway = *gateway,
1456 	};
1457 
1458 	if (rt6_need_strict(dest))
1459 		flags |= RT6_LOOKUP_F_IFACE;
1460 
1461 	return (struct rt6_info *)fib6_rule_lookup((struct flowi *)&rdfl, flags, __ip6_route_redirect);
1462 }
1463 
1464 void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1465 		  struct in6_addr *saddr,
1466 		  struct neighbour *neigh, u8 *lladdr, int on_link)
1467 {
1468 	struct rt6_info *rt, *nrt = NULL;
1469 	struct netevent_redirect netevent;
1470 
1471 	rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1472 
1473 	if (rt == &ip6_null_entry) {
1474 		if (net_ratelimit())
1475 			printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1476 			       "for redirect target\n");
1477 		goto out;
1478 	}
1479 
1480 	/*
1481 	 *	We have finally decided to accept it.
1482 	 */
1483 
1484 	neigh_update(neigh, lladdr, NUD_STALE,
1485 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
1486 		     NEIGH_UPDATE_F_OVERRIDE|
1487 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1488 				     NEIGH_UPDATE_F_ISROUTER))
1489 		     );
1490 
1491 	/*
1492 	 * Redirect received -> path was valid.
1493 	 * Look, redirects are sent only in response to data packets,
1494 	 * so that this nexthop apparently is reachable. --ANK
1495 	 */
1496 	dst_confirm(&rt->u.dst);
1497 
1498 	/* Duplicate redirect: silently ignore. */
1499 	if (neigh == rt->u.dst.neighbour)
1500 		goto out;
1501 
1502 	nrt = ip6_rt_copy(rt);
1503 	if (nrt == NULL)
1504 		goto out;
1505 
1506 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1507 	if (on_link)
1508 		nrt->rt6i_flags &= ~RTF_GATEWAY;
1509 
1510 	ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1511 	nrt->rt6i_dst.plen = 128;
1512 	nrt->u.dst.flags |= DST_HOST;
1513 
1514 	ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1515 	nrt->rt6i_nexthop = neigh_clone(neigh);
1516 	/* Reset pmtu, it may be better */
1517 	nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1518 	nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1519 
1520 	if (ip6_ins_rt(nrt))
1521 		goto out;
1522 
1523 	netevent.old = &rt->u.dst;
1524 	netevent.new = &nrt->u.dst;
1525 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1526 
1527 	if (rt->rt6i_flags&RTF_CACHE) {
1528 		ip6_del_rt(rt);
1529 		return;
1530 	}
1531 
1532 out:
1533 	dst_release(&rt->u.dst);
1534 	return;
1535 }
1536 
1537 /*
1538  *	Handle ICMP "packet too big" messages
1539  *	i.e. Path MTU discovery
1540  */
1541 
1542 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1543 			struct net_device *dev, u32 pmtu)
1544 {
1545 	struct rt6_info *rt, *nrt;
1546 	int allfrag = 0;
1547 
1548 	rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1549 	if (rt == NULL)
1550 		return;
1551 
1552 	if (pmtu >= dst_mtu(&rt->u.dst))
1553 		goto out;
1554 
1555 	if (pmtu < IPV6_MIN_MTU) {
1556 		/*
1557 		 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1558 		 * MTU (1280) and a fragment header should always be included
1559 		 * after a node receiving Too Big message reporting PMTU is
1560 		 * less than the IPv6 Minimum Link MTU.
1561 		 */
1562 		pmtu = IPV6_MIN_MTU;
1563 		allfrag = 1;
1564 	}
1565 
1566 	/* New mtu received -> path was valid.
1567 	   They are sent only in response to data packets,
1568 	   so that this nexthop apparently is reachable. --ANK
1569 	 */
1570 	dst_confirm(&rt->u.dst);
1571 
1572 	/* Host route. If it is static, it would be better
1573 	   not to override it, but add new one, so that
1574 	   when cache entry will expire old pmtu
1575 	   would return automatically.
1576 	 */
1577 	if (rt->rt6i_flags & RTF_CACHE) {
1578 		rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1579 		if (allfrag)
1580 			rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1581 		dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1582 		rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1583 		goto out;
1584 	}
1585 
1586 	/* Network route.
1587 	   Two cases are possible:
1588 	   1. It is connected route. Action: COW
1589 	   2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1590 	 */
1591 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1592 		nrt = rt6_alloc_cow(rt, daddr, saddr);
1593 	else
1594 		nrt = rt6_alloc_clone(rt, daddr);
1595 
1596 	if (nrt) {
1597 		nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1598 		if (allfrag)
1599 			nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1600 
1601 		/* According to RFC 1981, detecting PMTU increase shouldn't be
1602 		 * happened within 5 mins, the recommended timer is 10 mins.
1603 		 * Here this route expiration time is set to ip6_rt_mtu_expires
1604 		 * which is 10 mins. After 10 mins the decreased pmtu is expired
1605 		 * and detecting PMTU increase will be automatically happened.
1606 		 */
1607 		dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1608 		nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1609 
1610 		ip6_ins_rt(nrt);
1611 	}
1612 out:
1613 	dst_release(&rt->u.dst);
1614 }
1615 
1616 /*
1617  *	Misc support functions
1618  */
1619 
1620 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1621 {
1622 	struct rt6_info *rt = ip6_dst_alloc();
1623 
1624 	if (rt) {
1625 		rt->u.dst.input = ort->u.dst.input;
1626 		rt->u.dst.output = ort->u.dst.output;
1627 
1628 		memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1629 		rt->u.dst.error = ort->u.dst.error;
1630 		rt->u.dst.dev = ort->u.dst.dev;
1631 		if (rt->u.dst.dev)
1632 			dev_hold(rt->u.dst.dev);
1633 		rt->rt6i_idev = ort->rt6i_idev;
1634 		if (rt->rt6i_idev)
1635 			in6_dev_hold(rt->rt6i_idev);
1636 		rt->u.dst.lastuse = jiffies;
1637 		rt->rt6i_expires = 0;
1638 
1639 		ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1640 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1641 		rt->rt6i_metric = 0;
1642 
1643 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1644 #ifdef CONFIG_IPV6_SUBTREES
1645 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1646 #endif
1647 		rt->rt6i_table = ort->rt6i_table;
1648 	}
1649 	return rt;
1650 }
1651 
1652 #ifdef CONFIG_IPV6_ROUTE_INFO
1653 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
1654 					   struct in6_addr *gwaddr, int ifindex)
1655 {
1656 	struct fib6_node *fn;
1657 	struct rt6_info *rt = NULL;
1658 	struct fib6_table *table;
1659 
1660 	table = fib6_get_table(RT6_TABLE_INFO);
1661 	if (table == NULL)
1662 		return NULL;
1663 
1664 	write_lock_bh(&table->tb6_lock);
1665 	fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1666 	if (!fn)
1667 		goto out;
1668 
1669 	for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1670 		if (rt->rt6i_dev->ifindex != ifindex)
1671 			continue;
1672 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1673 			continue;
1674 		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1675 			continue;
1676 		dst_hold(&rt->u.dst);
1677 		break;
1678 	}
1679 out:
1680 	write_unlock_bh(&table->tb6_lock);
1681 	return rt;
1682 }
1683 
1684 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
1685 					   struct in6_addr *gwaddr, int ifindex,
1686 					   unsigned pref)
1687 {
1688 	struct fib6_config cfg = {
1689 		.fc_table	= RT6_TABLE_INFO,
1690 		.fc_metric	= 1024,
1691 		.fc_ifindex	= ifindex,
1692 		.fc_dst_len	= prefixlen,
1693 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1694 				  RTF_UP | RTF_PREF(pref),
1695 	};
1696 
1697 	ipv6_addr_copy(&cfg.fc_dst, prefix);
1698 	ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1699 
1700 	/* We should treat it as a default route if prefix length is 0. */
1701 	if (!prefixlen)
1702 		cfg.fc_flags |= RTF_DEFAULT;
1703 
1704 	ip6_route_add(&cfg);
1705 
1706 	return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
1707 }
1708 #endif
1709 
1710 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1711 {
1712 	struct rt6_info *rt;
1713 	struct fib6_table *table;
1714 
1715 	table = fib6_get_table(RT6_TABLE_DFLT);
1716 	if (table == NULL)
1717 		return NULL;
1718 
1719 	write_lock_bh(&table->tb6_lock);
1720 	for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
1721 		if (dev == rt->rt6i_dev &&
1722 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1723 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
1724 			break;
1725 	}
1726 	if (rt)
1727 		dst_hold(&rt->u.dst);
1728 	write_unlock_bh(&table->tb6_lock);
1729 	return rt;
1730 }
1731 
1732 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1733 				     struct net_device *dev,
1734 				     unsigned int pref)
1735 {
1736 	struct fib6_config cfg = {
1737 		.fc_table	= RT6_TABLE_DFLT,
1738 		.fc_metric	= 1024,
1739 		.fc_ifindex	= dev->ifindex,
1740 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1741 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1742 	};
1743 
1744 	ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1745 
1746 	ip6_route_add(&cfg);
1747 
1748 	return rt6_get_dflt_router(gwaddr, dev);
1749 }
1750 
1751 void rt6_purge_dflt_routers(void)
1752 {
1753 	struct rt6_info *rt;
1754 	struct fib6_table *table;
1755 
1756 	/* NOTE: Keep consistent with rt6_get_dflt_router */
1757 	table = fib6_get_table(RT6_TABLE_DFLT);
1758 	if (table == NULL)
1759 		return;
1760 
1761 restart:
1762 	read_lock_bh(&table->tb6_lock);
1763 	for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
1764 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1765 			dst_hold(&rt->u.dst);
1766 			read_unlock_bh(&table->tb6_lock);
1767 			ip6_del_rt(rt);
1768 			goto restart;
1769 		}
1770 	}
1771 	read_unlock_bh(&table->tb6_lock);
1772 }
1773 
1774 static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg,
1775 				 struct fib6_config *cfg)
1776 {
1777 	memset(cfg, 0, sizeof(*cfg));
1778 
1779 	cfg->fc_table = RT6_TABLE_MAIN;
1780 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1781 	cfg->fc_metric = rtmsg->rtmsg_metric;
1782 	cfg->fc_expires = rtmsg->rtmsg_info;
1783 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1784 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
1785 	cfg->fc_flags = rtmsg->rtmsg_flags;
1786 
1787 	ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1788 	ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1789 	ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1790 }
1791 
1792 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1793 {
1794 	struct fib6_config cfg;
1795 	struct in6_rtmsg rtmsg;
1796 	int err;
1797 
1798 	switch(cmd) {
1799 	case SIOCADDRT:		/* Add a route */
1800 	case SIOCDELRT:		/* Delete a route */
1801 		if (!capable(CAP_NET_ADMIN))
1802 			return -EPERM;
1803 		err = copy_from_user(&rtmsg, arg,
1804 				     sizeof(struct in6_rtmsg));
1805 		if (err)
1806 			return -EFAULT;
1807 
1808 		rtmsg_to_fib6_config(&rtmsg, &cfg);
1809 
1810 		rtnl_lock();
1811 		switch (cmd) {
1812 		case SIOCADDRT:
1813 			err = ip6_route_add(&cfg);
1814 			break;
1815 		case SIOCDELRT:
1816 			err = ip6_route_del(&cfg);
1817 			break;
1818 		default:
1819 			err = -EINVAL;
1820 		}
1821 		rtnl_unlock();
1822 
1823 		return err;
1824 	}
1825 
1826 	return -EINVAL;
1827 }
1828 
1829 /*
1830  *	Drop the packet on the floor
1831  */
1832 
1833 static inline int ip6_pkt_drop(struct sk_buff *skb, int code,
1834 			       int ipstats_mib_noroutes)
1835 {
1836 	int type;
1837 	switch (ipstats_mib_noroutes) {
1838 	case IPSTATS_MIB_INNOROUTES:
1839 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
1840 		if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) {
1841 			IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
1842 			break;
1843 		}
1844 		/* FALLTHROUGH */
1845 	case IPSTATS_MIB_OUTNOROUTES:
1846 		IP6_INC_STATS(ip6_dst_idev(skb->dst), ipstats_mib_noroutes);
1847 		break;
1848 	}
1849 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
1850 	kfree_skb(skb);
1851 	return 0;
1852 }
1853 
1854 static int ip6_pkt_discard(struct sk_buff *skb)
1855 {
1856 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
1857 }
1858 
1859 static int ip6_pkt_discard_out(struct sk_buff *skb)
1860 {
1861 	skb->dev = skb->dst->dev;
1862 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1863 }
1864 
1865 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
1866 
1867 static int ip6_pkt_prohibit(struct sk_buff *skb)
1868 {
1869 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
1870 }
1871 
1872 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1873 {
1874 	skb->dev = skb->dst->dev;
1875 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
1876 }
1877 
1878 static int ip6_pkt_blk_hole(struct sk_buff *skb)
1879 {
1880 	kfree_skb(skb);
1881 	return 0;
1882 }
1883 
1884 #endif
1885 
1886 /*
1887  *	Allocate a dst for local (unicast / anycast) address.
1888  */
1889 
1890 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1891 				    const struct in6_addr *addr,
1892 				    int anycast)
1893 {
1894 	struct rt6_info *rt = ip6_dst_alloc();
1895 
1896 	if (rt == NULL)
1897 		return ERR_PTR(-ENOMEM);
1898 
1899 	dev_hold(&loopback_dev);
1900 	in6_dev_hold(idev);
1901 
1902 	rt->u.dst.flags = DST_HOST;
1903 	rt->u.dst.input = ip6_input;
1904 	rt->u.dst.output = ip6_output;
1905 	rt->rt6i_dev = &loopback_dev;
1906 	rt->rt6i_idev = idev;
1907 	rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1908 	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1909 	rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1910 	rt->u.dst.obsolete = -1;
1911 
1912 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1913 	if (anycast)
1914 		rt->rt6i_flags |= RTF_ANYCAST;
1915 	else
1916 		rt->rt6i_flags |= RTF_LOCAL;
1917 	rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1918 	if (rt->rt6i_nexthop == NULL) {
1919 		dst_free(&rt->u.dst);
1920 		return ERR_PTR(-ENOMEM);
1921 	}
1922 
1923 	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1924 	rt->rt6i_dst.plen = 128;
1925 	rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL);
1926 
1927 	atomic_set(&rt->u.dst.__refcnt, 1);
1928 
1929 	return rt;
1930 }
1931 
1932 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1933 {
1934 	if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1935 	    rt != &ip6_null_entry) {
1936 		RT6_TRACE("deleted by ifdown %p\n", rt);
1937 		return -1;
1938 	}
1939 	return 0;
1940 }
1941 
1942 void rt6_ifdown(struct net_device *dev)
1943 {
1944 	fib6_clean_all(fib6_ifdown, 0, dev);
1945 }
1946 
1947 struct rt6_mtu_change_arg
1948 {
1949 	struct net_device *dev;
1950 	unsigned mtu;
1951 };
1952 
1953 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1954 {
1955 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1956 	struct inet6_dev *idev;
1957 
1958 	/* In IPv6 pmtu discovery is not optional,
1959 	   so that RTAX_MTU lock cannot disable it.
1960 	   We still use this lock to block changes
1961 	   caused by addrconf/ndisc.
1962 	*/
1963 
1964 	idev = __in6_dev_get(arg->dev);
1965 	if (idev == NULL)
1966 		return 0;
1967 
1968 	/* For administrative MTU increase, there is no way to discover
1969 	   IPv6 PMTU increase, so PMTU increase should be updated here.
1970 	   Since RFC 1981 doesn't include administrative MTU increase
1971 	   update PMTU increase is a MUST. (i.e. jumbo frame)
1972 	 */
1973 	/*
1974 	   If new MTU is less than route PMTU, this new MTU will be the
1975 	   lowest MTU in the path, update the route PMTU to reflect PMTU
1976 	   decreases; if new MTU is greater than route PMTU, and the
1977 	   old MTU is the lowest MTU in the path, update the route PMTU
1978 	   to reflect the increase. In this case if the other nodes' MTU
1979 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
1980 	   PMTU discouvery.
1981 	 */
1982 	if (rt->rt6i_dev == arg->dev &&
1983 	    !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1984 	    (dst_mtu(&rt->u.dst) > arg->mtu ||
1985 	     (dst_mtu(&rt->u.dst) < arg->mtu &&
1986 	      dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1987 		rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1988 	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1989 	return 0;
1990 }
1991 
1992 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1993 {
1994 	struct rt6_mtu_change_arg arg = {
1995 		.dev = dev,
1996 		.mtu = mtu,
1997 	};
1998 
1999 	fib6_clean_all(rt6_mtu_change_route, 0, &arg);
2000 }
2001 
2002 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2003 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2004 	[RTA_OIF]               = { .type = NLA_U32 },
2005 	[RTA_IIF]		= { .type = NLA_U32 },
2006 	[RTA_PRIORITY]          = { .type = NLA_U32 },
2007 	[RTA_METRICS]           = { .type = NLA_NESTED },
2008 };
2009 
2010 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2011 			      struct fib6_config *cfg)
2012 {
2013 	struct rtmsg *rtm;
2014 	struct nlattr *tb[RTA_MAX+1];
2015 	int err;
2016 
2017 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2018 	if (err < 0)
2019 		goto errout;
2020 
2021 	err = -EINVAL;
2022 	rtm = nlmsg_data(nlh);
2023 	memset(cfg, 0, sizeof(*cfg));
2024 
2025 	cfg->fc_table = rtm->rtm_table;
2026 	cfg->fc_dst_len = rtm->rtm_dst_len;
2027 	cfg->fc_src_len = rtm->rtm_src_len;
2028 	cfg->fc_flags = RTF_UP;
2029 	cfg->fc_protocol = rtm->rtm_protocol;
2030 
2031 	if (rtm->rtm_type == RTN_UNREACHABLE)
2032 		cfg->fc_flags |= RTF_REJECT;
2033 
2034 	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2035 	cfg->fc_nlinfo.nlh = nlh;
2036 
2037 	if (tb[RTA_GATEWAY]) {
2038 		nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2039 		cfg->fc_flags |= RTF_GATEWAY;
2040 	}
2041 
2042 	if (tb[RTA_DST]) {
2043 		int plen = (rtm->rtm_dst_len + 7) >> 3;
2044 
2045 		if (nla_len(tb[RTA_DST]) < plen)
2046 			goto errout;
2047 
2048 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2049 	}
2050 
2051 	if (tb[RTA_SRC]) {
2052 		int plen = (rtm->rtm_src_len + 7) >> 3;
2053 
2054 		if (nla_len(tb[RTA_SRC]) < plen)
2055 			goto errout;
2056 
2057 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2058 	}
2059 
2060 	if (tb[RTA_OIF])
2061 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2062 
2063 	if (tb[RTA_PRIORITY])
2064 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2065 
2066 	if (tb[RTA_METRICS]) {
2067 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2068 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2069 	}
2070 
2071 	if (tb[RTA_TABLE])
2072 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2073 
2074 	err = 0;
2075 errout:
2076 	return err;
2077 }
2078 
2079 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2080 {
2081 	struct fib6_config cfg;
2082 	int err;
2083 
2084 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2085 	if (err < 0)
2086 		return err;
2087 
2088 	return ip6_route_del(&cfg);
2089 }
2090 
2091 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2092 {
2093 	struct fib6_config cfg;
2094 	int err;
2095 
2096 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2097 	if (err < 0)
2098 		return err;
2099 
2100 	return ip6_route_add(&cfg);
2101 }
2102 
2103 static inline size_t rt6_nlmsg_size(void)
2104 {
2105 	return NLMSG_ALIGN(sizeof(struct rtmsg))
2106 	       + nla_total_size(16) /* RTA_SRC */
2107 	       + nla_total_size(16) /* RTA_DST */
2108 	       + nla_total_size(16) /* RTA_GATEWAY */
2109 	       + nla_total_size(16) /* RTA_PREFSRC */
2110 	       + nla_total_size(4) /* RTA_TABLE */
2111 	       + nla_total_size(4) /* RTA_IIF */
2112 	       + nla_total_size(4) /* RTA_OIF */
2113 	       + nla_total_size(4) /* RTA_PRIORITY */
2114 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2115 	       + nla_total_size(sizeof(struct rta_cacheinfo));
2116 }
2117 
2118 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
2119 			 struct in6_addr *dst, struct in6_addr *src,
2120 			 int iif, int type, u32 pid, u32 seq,
2121 			 int prefix, unsigned int flags)
2122 {
2123 	struct rtmsg *rtm;
2124 	struct nlmsghdr *nlh;
2125 	long expires;
2126 	u32 table;
2127 
2128 	if (prefix) {	/* user wants prefix routes only */
2129 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2130 			/* success since this is not a prefix route */
2131 			return 1;
2132 		}
2133 	}
2134 
2135 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2136 	if (nlh == NULL)
2137 		return -EMSGSIZE;
2138 
2139 	rtm = nlmsg_data(nlh);
2140 	rtm->rtm_family = AF_INET6;
2141 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
2142 	rtm->rtm_src_len = rt->rt6i_src.plen;
2143 	rtm->rtm_tos = 0;
2144 	if (rt->rt6i_table)
2145 		table = rt->rt6i_table->tb6_id;
2146 	else
2147 		table = RT6_TABLE_UNSPEC;
2148 	rtm->rtm_table = table;
2149 	NLA_PUT_U32(skb, RTA_TABLE, table);
2150 	if (rt->rt6i_flags&RTF_REJECT)
2151 		rtm->rtm_type = RTN_UNREACHABLE;
2152 	else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2153 		rtm->rtm_type = RTN_LOCAL;
2154 	else
2155 		rtm->rtm_type = RTN_UNICAST;
2156 	rtm->rtm_flags = 0;
2157 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2158 	rtm->rtm_protocol = rt->rt6i_protocol;
2159 	if (rt->rt6i_flags&RTF_DYNAMIC)
2160 		rtm->rtm_protocol = RTPROT_REDIRECT;
2161 	else if (rt->rt6i_flags & RTF_ADDRCONF)
2162 		rtm->rtm_protocol = RTPROT_KERNEL;
2163 	else if (rt->rt6i_flags&RTF_DEFAULT)
2164 		rtm->rtm_protocol = RTPROT_RA;
2165 
2166 	if (rt->rt6i_flags&RTF_CACHE)
2167 		rtm->rtm_flags |= RTM_F_CLONED;
2168 
2169 	if (dst) {
2170 		NLA_PUT(skb, RTA_DST, 16, dst);
2171 		rtm->rtm_dst_len = 128;
2172 	} else if (rtm->rtm_dst_len)
2173 		NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2174 #ifdef CONFIG_IPV6_SUBTREES
2175 	if (src) {
2176 		NLA_PUT(skb, RTA_SRC, 16, src);
2177 		rtm->rtm_src_len = 128;
2178 	} else if (rtm->rtm_src_len)
2179 		NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2180 #endif
2181 	if (iif)
2182 		NLA_PUT_U32(skb, RTA_IIF, iif);
2183 	else if (dst) {
2184 		struct in6_addr saddr_buf;
2185 		if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
2186 			NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2187 	}
2188 
2189 	if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
2190 		goto nla_put_failure;
2191 
2192 	if (rt->u.dst.neighbour)
2193 		NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2194 
2195 	if (rt->u.dst.dev)
2196 		NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2197 
2198 	NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2199 
2200 	expires = rt->rt6i_expires ? rt->rt6i_expires - jiffies : 0;
2201 	if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
2202 			       expires, rt->u.dst.error) < 0)
2203 		goto nla_put_failure;
2204 
2205 	return nlmsg_end(skb, nlh);
2206 
2207 nla_put_failure:
2208 	nlmsg_cancel(skb, nlh);
2209 	return -EMSGSIZE;
2210 }
2211 
2212 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2213 {
2214 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2215 	int prefix;
2216 
2217 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2218 		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2219 		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2220 	} else
2221 		prefix = 0;
2222 
2223 	return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2224 		     NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2225 		     prefix, NLM_F_MULTI);
2226 }
2227 
2228 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2229 {
2230 	struct nlattr *tb[RTA_MAX+1];
2231 	struct rt6_info *rt;
2232 	struct sk_buff *skb;
2233 	struct rtmsg *rtm;
2234 	struct flowi fl;
2235 	int err, iif = 0;
2236 
2237 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2238 	if (err < 0)
2239 		goto errout;
2240 
2241 	err = -EINVAL;
2242 	memset(&fl, 0, sizeof(fl));
2243 
2244 	if (tb[RTA_SRC]) {
2245 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2246 			goto errout;
2247 
2248 		ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2249 	}
2250 
2251 	if (tb[RTA_DST]) {
2252 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2253 			goto errout;
2254 
2255 		ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2256 	}
2257 
2258 	if (tb[RTA_IIF])
2259 		iif = nla_get_u32(tb[RTA_IIF]);
2260 
2261 	if (tb[RTA_OIF])
2262 		fl.oif = nla_get_u32(tb[RTA_OIF]);
2263 
2264 	if (iif) {
2265 		struct net_device *dev;
2266 		dev = __dev_get_by_index(iif);
2267 		if (!dev) {
2268 			err = -ENODEV;
2269 			goto errout;
2270 		}
2271 	}
2272 
2273 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2274 	if (skb == NULL) {
2275 		err = -ENOBUFS;
2276 		goto errout;
2277 	}
2278 
2279 	/* Reserve room for dummy headers, this skb can pass
2280 	   through good chunk of routing engine.
2281 	 */
2282 	skb_reset_mac_header(skb);
2283 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2284 
2285 	rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
2286 	skb->dst = &rt->u.dst;
2287 
2288 	err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
2289 			    RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2290 			    nlh->nlmsg_seq, 0, 0);
2291 	if (err < 0) {
2292 		kfree_skb(skb);
2293 		goto errout;
2294 	}
2295 
2296 	err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
2297 errout:
2298 	return err;
2299 }
2300 
2301 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2302 {
2303 	struct sk_buff *skb;
2304 	u32 pid = 0, seq = 0;
2305 	struct nlmsghdr *nlh = NULL;
2306 	int err = -ENOBUFS;
2307 
2308 	if (info) {
2309 		pid = info->pid;
2310 		nlh = info->nlh;
2311 		if (nlh)
2312 			seq = nlh->nlmsg_seq;
2313 	}
2314 
2315 	skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2316 	if (skb == NULL)
2317 		goto errout;
2318 
2319 	err = rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0);
2320 	if (err < 0) {
2321 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2322 		WARN_ON(err == -EMSGSIZE);
2323 		kfree_skb(skb);
2324 		goto errout;
2325 	}
2326 	err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any());
2327 errout:
2328 	if (err < 0)
2329 		rtnl_set_sk_err(RTNLGRP_IPV6_ROUTE, err);
2330 }
2331 
2332 /*
2333  *	/proc
2334  */
2335 
2336 #ifdef CONFIG_PROC_FS
2337 
2338 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2339 
2340 struct rt6_proc_arg
2341 {
2342 	char *buffer;
2343 	int offset;
2344 	int length;
2345 	int skip;
2346 	int len;
2347 };
2348 
2349 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2350 {
2351 	struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
2352 
2353 	if (arg->skip < arg->offset / RT6_INFO_LEN) {
2354 		arg->skip++;
2355 		return 0;
2356 	}
2357 
2358 	if (arg->len >= arg->length)
2359 		return 0;
2360 
2361 	arg->len += sprintf(arg->buffer + arg->len,
2362 			    NIP6_SEQFMT " %02x ",
2363 			    NIP6(rt->rt6i_dst.addr),
2364 			    rt->rt6i_dst.plen);
2365 
2366 #ifdef CONFIG_IPV6_SUBTREES
2367 	arg->len += sprintf(arg->buffer + arg->len,
2368 			    NIP6_SEQFMT " %02x ",
2369 			    NIP6(rt->rt6i_src.addr),
2370 			    rt->rt6i_src.plen);
2371 #else
2372 	arg->len += sprintf(arg->buffer + arg->len,
2373 			    "00000000000000000000000000000000 00 ");
2374 #endif
2375 
2376 	if (rt->rt6i_nexthop) {
2377 		arg->len += sprintf(arg->buffer + arg->len,
2378 				    NIP6_SEQFMT,
2379 				    NIP6(*((struct in6_addr *)rt->rt6i_nexthop->primary_key)));
2380 	} else {
2381 		arg->len += sprintf(arg->buffer + arg->len,
2382 				    "00000000000000000000000000000000");
2383 	}
2384 	arg->len += sprintf(arg->buffer + arg->len,
2385 			    " %08x %08x %08x %08x %8s\n",
2386 			    rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2387 			    rt->u.dst.__use, rt->rt6i_flags,
2388 			    rt->rt6i_dev ? rt->rt6i_dev->name : "");
2389 	return 0;
2390 }
2391 
2392 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
2393 {
2394 	struct rt6_proc_arg arg = {
2395 		.buffer = buffer,
2396 		.offset = offset,
2397 		.length = length,
2398 	};
2399 
2400 	fib6_clean_all(rt6_info_route, 0, &arg);
2401 
2402 	*start = buffer;
2403 	if (offset)
2404 		*start += offset % RT6_INFO_LEN;
2405 
2406 	arg.len -= offset % RT6_INFO_LEN;
2407 
2408 	if (arg.len > length)
2409 		arg.len = length;
2410 	if (arg.len < 0)
2411 		arg.len = 0;
2412 
2413 	return arg.len;
2414 }
2415 
2416 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2417 {
2418 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2419 		      rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
2420 		      rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
2421 		      rt6_stats.fib_rt_cache,
2422 		      atomic_read(&ip6_dst_ops.entries),
2423 		      rt6_stats.fib_discarded_routes);
2424 
2425 	return 0;
2426 }
2427 
2428 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2429 {
2430 	return single_open(file, rt6_stats_seq_show, NULL);
2431 }
2432 
2433 static const struct file_operations rt6_stats_seq_fops = {
2434 	.owner	 = THIS_MODULE,
2435 	.open	 = rt6_stats_seq_open,
2436 	.read	 = seq_read,
2437 	.llseek	 = seq_lseek,
2438 	.release = single_release,
2439 };
2440 #endif	/* CONFIG_PROC_FS */
2441 
2442 #ifdef CONFIG_SYSCTL
2443 
2444 static int flush_delay;
2445 
2446 static
2447 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2448 			      void __user *buffer, size_t *lenp, loff_t *ppos)
2449 {
2450 	if (write) {
2451 		proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2452 		fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2453 		return 0;
2454 	} else
2455 		return -EINVAL;
2456 }
2457 
2458 ctl_table ipv6_route_table[] = {
2459 	{
2460 		.ctl_name	=	NET_IPV6_ROUTE_FLUSH,
2461 		.procname	=	"flush",
2462 		.data		=	&flush_delay,
2463 		.maxlen		=	sizeof(int),
2464 		.mode		=	0200,
2465 		.proc_handler	=	&ipv6_sysctl_rtcache_flush
2466 	},
2467 	{
2468 		.ctl_name	=	NET_IPV6_ROUTE_GC_THRESH,
2469 		.procname	=	"gc_thresh",
2470 		.data		=	&ip6_dst_ops.gc_thresh,
2471 		.maxlen		=	sizeof(int),
2472 		.mode		=	0644,
2473 		.proc_handler	=	&proc_dointvec,
2474 	},
2475 	{
2476 		.ctl_name	=	NET_IPV6_ROUTE_MAX_SIZE,
2477 		.procname	=	"max_size",
2478 		.data		=	&ip6_rt_max_size,
2479 		.maxlen		=	sizeof(int),
2480 		.mode		=	0644,
2481 		.proc_handler	=	&proc_dointvec,
2482 	},
2483 	{
2484 		.ctl_name	=	NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2485 		.procname	=	"gc_min_interval",
2486 		.data		=	&ip6_rt_gc_min_interval,
2487 		.maxlen		=	sizeof(int),
2488 		.mode		=	0644,
2489 		.proc_handler	=	&proc_dointvec_jiffies,
2490 		.strategy	=	&sysctl_jiffies,
2491 	},
2492 	{
2493 		.ctl_name	=	NET_IPV6_ROUTE_GC_TIMEOUT,
2494 		.procname	=	"gc_timeout",
2495 		.data		=	&ip6_rt_gc_timeout,
2496 		.maxlen		=	sizeof(int),
2497 		.mode		=	0644,
2498 		.proc_handler	=	&proc_dointvec_jiffies,
2499 		.strategy	=	&sysctl_jiffies,
2500 	},
2501 	{
2502 		.ctl_name	=	NET_IPV6_ROUTE_GC_INTERVAL,
2503 		.procname	=	"gc_interval",
2504 		.data		=	&ip6_rt_gc_interval,
2505 		.maxlen		=	sizeof(int),
2506 		.mode		=	0644,
2507 		.proc_handler	=	&proc_dointvec_jiffies,
2508 		.strategy	=	&sysctl_jiffies,
2509 	},
2510 	{
2511 		.ctl_name	=	NET_IPV6_ROUTE_GC_ELASTICITY,
2512 		.procname	=	"gc_elasticity",
2513 		.data		=	&ip6_rt_gc_elasticity,
2514 		.maxlen		=	sizeof(int),
2515 		.mode		=	0644,
2516 		.proc_handler	=	&proc_dointvec_jiffies,
2517 		.strategy	=	&sysctl_jiffies,
2518 	},
2519 	{
2520 		.ctl_name	=	NET_IPV6_ROUTE_MTU_EXPIRES,
2521 		.procname	=	"mtu_expires",
2522 		.data		=	&ip6_rt_mtu_expires,
2523 		.maxlen		=	sizeof(int),
2524 		.mode		=	0644,
2525 		.proc_handler	=	&proc_dointvec_jiffies,
2526 		.strategy	=	&sysctl_jiffies,
2527 	},
2528 	{
2529 		.ctl_name	=	NET_IPV6_ROUTE_MIN_ADVMSS,
2530 		.procname	=	"min_adv_mss",
2531 		.data		=	&ip6_rt_min_advmss,
2532 		.maxlen		=	sizeof(int),
2533 		.mode		=	0644,
2534 		.proc_handler	=	&proc_dointvec_jiffies,
2535 		.strategy	=	&sysctl_jiffies,
2536 	},
2537 	{
2538 		.ctl_name	=	NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2539 		.procname	=	"gc_min_interval_ms",
2540 		.data		=	&ip6_rt_gc_min_interval,
2541 		.maxlen		=	sizeof(int),
2542 		.mode		=	0644,
2543 		.proc_handler	=	&proc_dointvec_ms_jiffies,
2544 		.strategy	=	&sysctl_ms_jiffies,
2545 	},
2546 	{ .ctl_name = 0 }
2547 };
2548 
2549 #endif
2550 
2551 void __init ip6_route_init(void)
2552 {
2553 #ifdef 	CONFIG_PROC_FS
2554 	struct proc_dir_entry *p;
2555 #endif
2556 	ip6_dst_ops.kmem_cachep =
2557 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2558 				  SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
2559 	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops.kmem_cachep;
2560 
2561 	fib6_init();
2562 #ifdef 	CONFIG_PROC_FS
2563 	p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2564 	if (p)
2565 		p->owner = THIS_MODULE;
2566 
2567 	proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2568 #endif
2569 #ifdef CONFIG_XFRM
2570 	xfrm6_init();
2571 #endif
2572 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2573 	fib6_rules_init();
2574 #endif
2575 
2576 	__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL);
2577 	__rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL);
2578 	__rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL);
2579 }
2580 
2581 void ip6_route_cleanup(void)
2582 {
2583 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2584 	fib6_rules_cleanup();
2585 #endif
2586 #ifdef CONFIG_PROC_FS
2587 	proc_net_remove("ipv6_route");
2588 	proc_net_remove("rt6_stats");
2589 #endif
2590 #ifdef CONFIG_XFRM
2591 	xfrm6_fini();
2592 #endif
2593 	rt6_ifdown(NULL);
2594 	fib6_gc_cleanup();
2595 	kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2596 }
2597