xref: /openbmc/linux/net/ipv6/route.c (revision e868d61272caa648214046a096e5a6bfc068dc8c)
1 /*
2  *	Linux INET6 implementation
3  *	FIB front-end.
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	$Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9  *
10  *	This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  */
15 
16 /*	Changes:
17  *
18  *	YOSHIFUJI Hideaki @USAGI
19  *		reworked default router selection.
20  *		- respect outgoing interface
21  *		- select from (probably) reachable routers (i.e.
22  *		routers in REACHABLE, STALE, DELAY or PROBE states).
23  *		- always select the same router if it is (probably)
24  *		reachable.  otherwise, round-robin the list.
25  *	Ville Nuorvala
26  *		Fixed routing subtrees.
27  */
28 
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/types.h>
32 #include <linux/times.h>
33 #include <linux/socket.h>
34 #include <linux/sockios.h>
35 #include <linux/net.h>
36 #include <linux/route.h>
37 #include <linux/netdevice.h>
38 #include <linux/in6.h>
39 #include <linux/init.h>
40 #include <linux/if_arp.h>
41 
42 #ifdef 	CONFIG_PROC_FS
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #endif
46 
47 #include <net/snmp.h>
48 #include <net/ipv6.h>
49 #include <net/ip6_fib.h>
50 #include <net/ip6_route.h>
51 #include <net/ndisc.h>
52 #include <net/addrconf.h>
53 #include <net/tcp.h>
54 #include <linux/rtnetlink.h>
55 #include <net/dst.h>
56 #include <net/xfrm.h>
57 #include <net/netevent.h>
58 #include <net/netlink.h>
59 
60 #include <asm/uaccess.h>
61 
62 #ifdef CONFIG_SYSCTL
63 #include <linux/sysctl.h>
64 #endif
65 
66 /* Set to 3 to get tracing. */
67 #define RT6_DEBUG 2
68 
69 #if RT6_DEBUG >= 3
70 #define RDBG(x) printk x
71 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
72 #else
73 #define RDBG(x)
74 #define RT6_TRACE(x...) do { ; } while (0)
75 #endif
76 
77 #define CLONE_OFFLINK_ROUTE 0
78 
79 static int ip6_rt_max_size = 4096;
80 static int ip6_rt_gc_min_interval = HZ / 2;
81 static int ip6_rt_gc_timeout = 60*HZ;
82 int ip6_rt_gc_interval = 30*HZ;
83 static int ip6_rt_gc_elasticity = 9;
84 static int ip6_rt_mtu_expires = 10*60*HZ;
85 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
86 
87 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
88 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
89 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
90 static void		ip6_dst_destroy(struct dst_entry *);
91 static void		ip6_dst_ifdown(struct dst_entry *,
92 				       struct net_device *dev, int how);
93 static int		 ip6_dst_gc(void);
94 
95 static int		ip6_pkt_discard(struct sk_buff *skb);
96 static int		ip6_pkt_discard_out(struct sk_buff *skb);
97 static void		ip6_link_failure(struct sk_buff *skb);
98 static void		ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
99 
100 #ifdef CONFIG_IPV6_ROUTE_INFO
101 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
102 					   struct in6_addr *gwaddr, int ifindex,
103 					   unsigned pref);
104 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
105 					   struct in6_addr *gwaddr, int ifindex);
106 #endif
107 
108 static struct dst_ops ip6_dst_ops = {
109 	.family			=	AF_INET6,
110 	.protocol		=	__constant_htons(ETH_P_IPV6),
111 	.gc			=	ip6_dst_gc,
112 	.gc_thresh		=	1024,
113 	.check			=	ip6_dst_check,
114 	.destroy		=	ip6_dst_destroy,
115 	.ifdown			=	ip6_dst_ifdown,
116 	.negative_advice	=	ip6_negative_advice,
117 	.link_failure		=	ip6_link_failure,
118 	.update_pmtu		=	ip6_rt_update_pmtu,
119 	.entry_size		=	sizeof(struct rt6_info),
120 };
121 
122 struct rt6_info ip6_null_entry = {
123 	.u = {
124 		.dst = {
125 			.__refcnt	= ATOMIC_INIT(1),
126 			.__use		= 1,
127 			.dev		= &loopback_dev,
128 			.obsolete	= -1,
129 			.error		= -ENETUNREACH,
130 			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
131 			.input		= ip6_pkt_discard,
132 			.output		= ip6_pkt_discard_out,
133 			.ops		= &ip6_dst_ops,
134 			.path		= (struct dst_entry*)&ip6_null_entry,
135 		}
136 	},
137 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
138 	.rt6i_metric	= ~(u32) 0,
139 	.rt6i_ref	= ATOMIC_INIT(1),
140 };
141 
142 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
143 
144 static int ip6_pkt_prohibit(struct sk_buff *skb);
145 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
146 static int ip6_pkt_blk_hole(struct sk_buff *skb);
147 
148 struct rt6_info ip6_prohibit_entry = {
149 	.u = {
150 		.dst = {
151 			.__refcnt	= ATOMIC_INIT(1),
152 			.__use		= 1,
153 			.dev		= &loopback_dev,
154 			.obsolete	= -1,
155 			.error		= -EACCES,
156 			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
157 			.input		= ip6_pkt_prohibit,
158 			.output		= ip6_pkt_prohibit_out,
159 			.ops		= &ip6_dst_ops,
160 			.path		= (struct dst_entry*)&ip6_prohibit_entry,
161 		}
162 	},
163 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
164 	.rt6i_metric	= ~(u32) 0,
165 	.rt6i_ref	= ATOMIC_INIT(1),
166 };
167 
168 struct rt6_info ip6_blk_hole_entry = {
169 	.u = {
170 		.dst = {
171 			.__refcnt	= ATOMIC_INIT(1),
172 			.__use		= 1,
173 			.dev		= &loopback_dev,
174 			.obsolete	= -1,
175 			.error		= -EINVAL,
176 			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
177 			.input		= ip6_pkt_blk_hole,
178 			.output		= ip6_pkt_blk_hole,
179 			.ops		= &ip6_dst_ops,
180 			.path		= (struct dst_entry*)&ip6_blk_hole_entry,
181 		}
182 	},
183 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
184 	.rt6i_metric	= ~(u32) 0,
185 	.rt6i_ref	= ATOMIC_INIT(1),
186 };
187 
188 #endif
189 
190 /* allocate dst with ip6_dst_ops */
191 static __inline__ struct rt6_info *ip6_dst_alloc(void)
192 {
193 	return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
194 }
195 
196 static void ip6_dst_destroy(struct dst_entry *dst)
197 {
198 	struct rt6_info *rt = (struct rt6_info *)dst;
199 	struct inet6_dev *idev = rt->rt6i_idev;
200 
201 	if (idev != NULL) {
202 		rt->rt6i_idev = NULL;
203 		in6_dev_put(idev);
204 	}
205 }
206 
207 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
208 			   int how)
209 {
210 	struct rt6_info *rt = (struct rt6_info *)dst;
211 	struct inet6_dev *idev = rt->rt6i_idev;
212 
213 	if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
214 		struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
215 		if (loopback_idev != NULL) {
216 			rt->rt6i_idev = loopback_idev;
217 			in6_dev_put(idev);
218 		}
219 	}
220 }
221 
222 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
223 {
224 	return (rt->rt6i_flags & RTF_EXPIRES &&
225 		time_after(jiffies, rt->rt6i_expires));
226 }
227 
228 static inline int rt6_need_strict(struct in6_addr *daddr)
229 {
230 	return (ipv6_addr_type(daddr) &
231 		(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
232 }
233 
234 /*
235  *	Route lookup. Any table->tb6_lock is implied.
236  */
237 
238 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
239 						    int oif,
240 						    int strict)
241 {
242 	struct rt6_info *local = NULL;
243 	struct rt6_info *sprt;
244 
245 	if (oif) {
246 		for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
247 			struct net_device *dev = sprt->rt6i_dev;
248 			if (dev->ifindex == oif)
249 				return sprt;
250 			if (dev->flags & IFF_LOOPBACK) {
251 				if (sprt->rt6i_idev == NULL ||
252 				    sprt->rt6i_idev->dev->ifindex != oif) {
253 					if (strict && oif)
254 						continue;
255 					if (local && (!oif ||
256 						      local->rt6i_idev->dev->ifindex == oif))
257 						continue;
258 				}
259 				local = sprt;
260 			}
261 		}
262 
263 		if (local)
264 			return local;
265 
266 		if (strict)
267 			return &ip6_null_entry;
268 	}
269 	return rt;
270 }
271 
272 #ifdef CONFIG_IPV6_ROUTER_PREF
273 static void rt6_probe(struct rt6_info *rt)
274 {
275 	struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
276 	/*
277 	 * Okay, this does not seem to be appropriate
278 	 * for now, however, we need to check if it
279 	 * is really so; aka Router Reachability Probing.
280 	 *
281 	 * Router Reachability Probe MUST be rate-limited
282 	 * to no more than one per minute.
283 	 */
284 	if (!neigh || (neigh->nud_state & NUD_VALID))
285 		return;
286 	read_lock_bh(&neigh->lock);
287 	if (!(neigh->nud_state & NUD_VALID) &&
288 	    time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
289 		struct in6_addr mcaddr;
290 		struct in6_addr *target;
291 
292 		neigh->updated = jiffies;
293 		read_unlock_bh(&neigh->lock);
294 
295 		target = (struct in6_addr *)&neigh->primary_key;
296 		addrconf_addr_solict_mult(target, &mcaddr);
297 		ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
298 	} else
299 		read_unlock_bh(&neigh->lock);
300 }
301 #else
302 static inline void rt6_probe(struct rt6_info *rt)
303 {
304 	return;
305 }
306 #endif
307 
308 /*
309  * Default Router Selection (RFC 2461 6.3.6)
310  */
311 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
312 {
313 	struct net_device *dev = rt->rt6i_dev;
314 	if (!oif || dev->ifindex == oif)
315 		return 2;
316 	if ((dev->flags & IFF_LOOPBACK) &&
317 	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
318 		return 1;
319 	return 0;
320 }
321 
322 static inline int rt6_check_neigh(struct rt6_info *rt)
323 {
324 	struct neighbour *neigh = rt->rt6i_nexthop;
325 	int m = 0;
326 	if (rt->rt6i_flags & RTF_NONEXTHOP ||
327 	    !(rt->rt6i_flags & RTF_GATEWAY))
328 		m = 1;
329 	else if (neigh) {
330 		read_lock_bh(&neigh->lock);
331 		if (neigh->nud_state & NUD_VALID)
332 			m = 2;
333 		else if (!(neigh->nud_state & NUD_FAILED))
334 			m = 1;
335 		read_unlock_bh(&neigh->lock);
336 	}
337 	return m;
338 }
339 
340 static int rt6_score_route(struct rt6_info *rt, int oif,
341 			   int strict)
342 {
343 	int m, n;
344 
345 	m = rt6_check_dev(rt, oif);
346 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
347 		return -1;
348 #ifdef CONFIG_IPV6_ROUTER_PREF
349 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
350 #endif
351 	n = rt6_check_neigh(rt);
352 	if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
353 		return -1;
354 	return m;
355 }
356 
357 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
358 				   int *mpri, struct rt6_info *match)
359 {
360 	int m;
361 
362 	if (rt6_check_expired(rt))
363 		goto out;
364 
365 	m = rt6_score_route(rt, oif, strict);
366 	if (m < 0)
367 		goto out;
368 
369 	if (m > *mpri) {
370 		if (strict & RT6_LOOKUP_F_REACHABLE)
371 			rt6_probe(match);
372 		*mpri = m;
373 		match = rt;
374 	} else if (strict & RT6_LOOKUP_F_REACHABLE) {
375 		rt6_probe(rt);
376 	}
377 
378 out:
379 	return match;
380 }
381 
382 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
383 				     struct rt6_info *rr_head,
384 				     u32 metric, int oif, int strict)
385 {
386 	struct rt6_info *rt, *match;
387 	int mpri = -1;
388 
389 	match = NULL;
390 	for (rt = rr_head; rt && rt->rt6i_metric == metric;
391 	     rt = rt->u.dst.rt6_next)
392 		match = find_match(rt, oif, strict, &mpri, match);
393 	for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
394 	     rt = rt->u.dst.rt6_next)
395 		match = find_match(rt, oif, strict, &mpri, match);
396 
397 	return match;
398 }
399 
400 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
401 {
402 	struct rt6_info *match, *rt0;
403 
404 	RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
405 		  __FUNCTION__, fn->leaf, oif);
406 
407 	rt0 = fn->rr_ptr;
408 	if (!rt0)
409 		fn->rr_ptr = rt0 = fn->leaf;
410 
411 	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
412 
413 	if (!match &&
414 	    (strict & RT6_LOOKUP_F_REACHABLE)) {
415 		struct rt6_info *next = rt0->u.dst.rt6_next;
416 
417 		/* no entries matched; do round-robin */
418 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
419 			next = fn->leaf;
420 
421 		if (next != rt0)
422 			fn->rr_ptr = next;
423 	}
424 
425 	RT6_TRACE("%s() => %p\n",
426 		  __FUNCTION__, match);
427 
428 	return (match ? match : &ip6_null_entry);
429 }
430 
431 #ifdef CONFIG_IPV6_ROUTE_INFO
432 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
433 		  struct in6_addr *gwaddr)
434 {
435 	struct route_info *rinfo = (struct route_info *) opt;
436 	struct in6_addr prefix_buf, *prefix;
437 	unsigned int pref;
438 	u32 lifetime;
439 	struct rt6_info *rt;
440 
441 	if (len < sizeof(struct route_info)) {
442 		return -EINVAL;
443 	}
444 
445 	/* Sanity check for prefix_len and length */
446 	if (rinfo->length > 3) {
447 		return -EINVAL;
448 	} else if (rinfo->prefix_len > 128) {
449 		return -EINVAL;
450 	} else if (rinfo->prefix_len > 64) {
451 		if (rinfo->length < 2) {
452 			return -EINVAL;
453 		}
454 	} else if (rinfo->prefix_len > 0) {
455 		if (rinfo->length < 1) {
456 			return -EINVAL;
457 		}
458 	}
459 
460 	pref = rinfo->route_pref;
461 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
462 		pref = ICMPV6_ROUTER_PREF_MEDIUM;
463 
464 	lifetime = ntohl(rinfo->lifetime);
465 	if (lifetime == 0xffffffff) {
466 		/* infinity */
467 	} else if (lifetime > 0x7fffffff/HZ) {
468 		/* Avoid arithmetic overflow */
469 		lifetime = 0x7fffffff/HZ - 1;
470 	}
471 
472 	if (rinfo->length == 3)
473 		prefix = (struct in6_addr *)rinfo->prefix;
474 	else {
475 		/* this function is safe */
476 		ipv6_addr_prefix(&prefix_buf,
477 				 (struct in6_addr *)rinfo->prefix,
478 				 rinfo->prefix_len);
479 		prefix = &prefix_buf;
480 	}
481 
482 	rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
483 
484 	if (rt && !lifetime) {
485 		ip6_del_rt(rt);
486 		rt = NULL;
487 	}
488 
489 	if (!rt && lifetime)
490 		rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
491 					pref);
492 	else if (rt)
493 		rt->rt6i_flags = RTF_ROUTEINFO |
494 				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
495 
496 	if (rt) {
497 		if (lifetime == 0xffffffff) {
498 			rt->rt6i_flags &= ~RTF_EXPIRES;
499 		} else {
500 			rt->rt6i_expires = jiffies + HZ * lifetime;
501 			rt->rt6i_flags |= RTF_EXPIRES;
502 		}
503 		dst_release(&rt->u.dst);
504 	}
505 	return 0;
506 }
507 #endif
508 
509 #define BACKTRACK(saddr) \
510 do { \
511 	if (rt == &ip6_null_entry) { \
512 		struct fib6_node *pn; \
513 		while (1) { \
514 			if (fn->fn_flags & RTN_TL_ROOT) \
515 				goto out; \
516 			pn = fn->parent; \
517 			if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
518 				fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
519 			else \
520 				fn = pn; \
521 			if (fn->fn_flags & RTN_RTINFO) \
522 				goto restart; \
523 		} \
524 	} \
525 } while(0)
526 
527 static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
528 					     struct flowi *fl, int flags)
529 {
530 	struct fib6_node *fn;
531 	struct rt6_info *rt;
532 
533 	read_lock_bh(&table->tb6_lock);
534 	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
535 restart:
536 	rt = fn->leaf;
537 	rt = rt6_device_match(rt, fl->oif, flags);
538 	BACKTRACK(&fl->fl6_src);
539 out:
540 	dst_hold(&rt->u.dst);
541 	read_unlock_bh(&table->tb6_lock);
542 
543 	rt->u.dst.lastuse = jiffies;
544 	rt->u.dst.__use++;
545 
546 	return rt;
547 
548 }
549 
550 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
551 			    int oif, int strict)
552 {
553 	struct flowi fl = {
554 		.oif = oif,
555 		.nl_u = {
556 			.ip6_u = {
557 				.daddr = *daddr,
558 			},
559 		},
560 	};
561 	struct dst_entry *dst;
562 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
563 
564 	if (saddr) {
565 		memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
566 		flags |= RT6_LOOKUP_F_HAS_SADDR;
567 	}
568 
569 	dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup);
570 	if (dst->error == 0)
571 		return (struct rt6_info *) dst;
572 
573 	dst_release(dst);
574 
575 	return NULL;
576 }
577 
578 EXPORT_SYMBOL(rt6_lookup);
579 
580 /* ip6_ins_rt is called with FREE table->tb6_lock.
581    It takes new route entry, the addition fails by any reason the
582    route is freed. In any case, if caller does not hold it, it may
583    be destroyed.
584  */
585 
586 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
587 {
588 	int err;
589 	struct fib6_table *table;
590 
591 	table = rt->rt6i_table;
592 	write_lock_bh(&table->tb6_lock);
593 	err = fib6_add(&table->tb6_root, rt, info);
594 	write_unlock_bh(&table->tb6_lock);
595 
596 	return err;
597 }
598 
599 int ip6_ins_rt(struct rt6_info *rt)
600 {
601 	return __ip6_ins_rt(rt, NULL);
602 }
603 
604 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
605 				      struct in6_addr *saddr)
606 {
607 	struct rt6_info *rt;
608 
609 	/*
610 	 *	Clone the route.
611 	 */
612 
613 	rt = ip6_rt_copy(ort);
614 
615 	if (rt) {
616 		if (!(rt->rt6i_flags&RTF_GATEWAY)) {
617 			if (rt->rt6i_dst.plen != 128 &&
618 			    ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
619 				rt->rt6i_flags |= RTF_ANYCAST;
620 			ipv6_addr_copy(&rt->rt6i_gateway, daddr);
621 		}
622 
623 		ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
624 		rt->rt6i_dst.plen = 128;
625 		rt->rt6i_flags |= RTF_CACHE;
626 		rt->u.dst.flags |= DST_HOST;
627 
628 #ifdef CONFIG_IPV6_SUBTREES
629 		if (rt->rt6i_src.plen && saddr) {
630 			ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
631 			rt->rt6i_src.plen = 128;
632 		}
633 #endif
634 
635 		rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
636 
637 	}
638 
639 	return rt;
640 }
641 
642 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
643 {
644 	struct rt6_info *rt = ip6_rt_copy(ort);
645 	if (rt) {
646 		ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
647 		rt->rt6i_dst.plen = 128;
648 		rt->rt6i_flags |= RTF_CACHE;
649 		rt->u.dst.flags |= DST_HOST;
650 		rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
651 	}
652 	return rt;
653 }
654 
655 static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
656 					    struct flowi *fl, int flags)
657 {
658 	struct fib6_node *fn;
659 	struct rt6_info *rt, *nrt;
660 	int strict = 0;
661 	int attempts = 3;
662 	int err;
663 	int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
664 
665 	strict |= flags & RT6_LOOKUP_F_IFACE;
666 
667 relookup:
668 	read_lock_bh(&table->tb6_lock);
669 
670 restart_2:
671 	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
672 
673 restart:
674 	rt = rt6_select(fn, fl->iif, strict | reachable);
675 	BACKTRACK(&fl->fl6_src);
676 	if (rt == &ip6_null_entry ||
677 	    rt->rt6i_flags & RTF_CACHE)
678 		goto out;
679 
680 	dst_hold(&rt->u.dst);
681 	read_unlock_bh(&table->tb6_lock);
682 
683 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
684 		nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
685 	else {
686 #if CLONE_OFFLINK_ROUTE
687 		nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
688 #else
689 		goto out2;
690 #endif
691 	}
692 
693 	dst_release(&rt->u.dst);
694 	rt = nrt ? : &ip6_null_entry;
695 
696 	dst_hold(&rt->u.dst);
697 	if (nrt) {
698 		err = ip6_ins_rt(nrt);
699 		if (!err)
700 			goto out2;
701 	}
702 
703 	if (--attempts <= 0)
704 		goto out2;
705 
706 	/*
707 	 * Race condition! In the gap, when table->tb6_lock was
708 	 * released someone could insert this route.  Relookup.
709 	 */
710 	dst_release(&rt->u.dst);
711 	goto relookup;
712 
713 out:
714 	if (reachable) {
715 		reachable = 0;
716 		goto restart_2;
717 	}
718 	dst_hold(&rt->u.dst);
719 	read_unlock_bh(&table->tb6_lock);
720 out2:
721 	rt->u.dst.lastuse = jiffies;
722 	rt->u.dst.__use++;
723 
724 	return rt;
725 }
726 
727 void ip6_route_input(struct sk_buff *skb)
728 {
729 	struct ipv6hdr *iph = ipv6_hdr(skb);
730 	int flags = RT6_LOOKUP_F_HAS_SADDR;
731 	struct flowi fl = {
732 		.iif = skb->dev->ifindex,
733 		.nl_u = {
734 			.ip6_u = {
735 				.daddr = iph->daddr,
736 				.saddr = iph->saddr,
737 				.flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
738 			},
739 		},
740 		.mark = skb->mark,
741 		.proto = iph->nexthdr,
742 	};
743 
744 	if (rt6_need_strict(&iph->daddr))
745 		flags |= RT6_LOOKUP_F_IFACE;
746 
747 	skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input);
748 }
749 
750 static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
751 					     struct flowi *fl, int flags)
752 {
753 	struct fib6_node *fn;
754 	struct rt6_info *rt, *nrt;
755 	int strict = 0;
756 	int attempts = 3;
757 	int err;
758 	int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
759 
760 	strict |= flags & RT6_LOOKUP_F_IFACE;
761 
762 relookup:
763 	read_lock_bh(&table->tb6_lock);
764 
765 restart_2:
766 	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
767 
768 restart:
769 	rt = rt6_select(fn, fl->oif, strict | reachable);
770 	BACKTRACK(&fl->fl6_src);
771 	if (rt == &ip6_null_entry ||
772 	    rt->rt6i_flags & RTF_CACHE)
773 		goto out;
774 
775 	dst_hold(&rt->u.dst);
776 	read_unlock_bh(&table->tb6_lock);
777 
778 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
779 		nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
780 	else {
781 #if CLONE_OFFLINK_ROUTE
782 		nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
783 #else
784 		goto out2;
785 #endif
786 	}
787 
788 	dst_release(&rt->u.dst);
789 	rt = nrt ? : &ip6_null_entry;
790 
791 	dst_hold(&rt->u.dst);
792 	if (nrt) {
793 		err = ip6_ins_rt(nrt);
794 		if (!err)
795 			goto out2;
796 	}
797 
798 	if (--attempts <= 0)
799 		goto out2;
800 
801 	/*
802 	 * Race condition! In the gap, when table->tb6_lock was
803 	 * released someone could insert this route.  Relookup.
804 	 */
805 	dst_release(&rt->u.dst);
806 	goto relookup;
807 
808 out:
809 	if (reachable) {
810 		reachable = 0;
811 		goto restart_2;
812 	}
813 	dst_hold(&rt->u.dst);
814 	read_unlock_bh(&table->tb6_lock);
815 out2:
816 	rt->u.dst.lastuse = jiffies;
817 	rt->u.dst.__use++;
818 	return rt;
819 }
820 
821 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
822 {
823 	int flags = 0;
824 
825 	if (rt6_need_strict(&fl->fl6_dst))
826 		flags |= RT6_LOOKUP_F_IFACE;
827 
828 	if (!ipv6_addr_any(&fl->fl6_src))
829 		flags |= RT6_LOOKUP_F_HAS_SADDR;
830 
831 	return fib6_rule_lookup(fl, flags, ip6_pol_route_output);
832 }
833 
834 EXPORT_SYMBOL(ip6_route_output);
835 
836 /*
837  *	Destination cache support functions
838  */
839 
840 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
841 {
842 	struct rt6_info *rt;
843 
844 	rt = (struct rt6_info *) dst;
845 
846 	if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
847 		return dst;
848 
849 	return NULL;
850 }
851 
852 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
853 {
854 	struct rt6_info *rt = (struct rt6_info *) dst;
855 
856 	if (rt) {
857 		if (rt->rt6i_flags & RTF_CACHE)
858 			ip6_del_rt(rt);
859 		else
860 			dst_release(dst);
861 	}
862 	return NULL;
863 }
864 
865 static void ip6_link_failure(struct sk_buff *skb)
866 {
867 	struct rt6_info *rt;
868 
869 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
870 
871 	rt = (struct rt6_info *) skb->dst;
872 	if (rt) {
873 		if (rt->rt6i_flags&RTF_CACHE) {
874 			dst_set_expires(&rt->u.dst, 0);
875 			rt->rt6i_flags |= RTF_EXPIRES;
876 		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
877 			rt->rt6i_node->fn_sernum = -1;
878 	}
879 }
880 
881 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
882 {
883 	struct rt6_info *rt6 = (struct rt6_info*)dst;
884 
885 	if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
886 		rt6->rt6i_flags |= RTF_MODIFIED;
887 		if (mtu < IPV6_MIN_MTU) {
888 			mtu = IPV6_MIN_MTU;
889 			dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
890 		}
891 		dst->metrics[RTAX_MTU-1] = mtu;
892 		call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
893 	}
894 }
895 
896 static int ipv6_get_mtu(struct net_device *dev);
897 
898 static inline unsigned int ipv6_advmss(unsigned int mtu)
899 {
900 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
901 
902 	if (mtu < ip6_rt_min_advmss)
903 		mtu = ip6_rt_min_advmss;
904 
905 	/*
906 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
907 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
908 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
909 	 * rely only on pmtu discovery"
910 	 */
911 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
912 		mtu = IPV6_MAXPLEN;
913 	return mtu;
914 }
915 
916 static struct dst_entry *ndisc_dst_gc_list;
917 static DEFINE_SPINLOCK(ndisc_lock);
918 
919 struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
920 				  struct neighbour *neigh,
921 				  struct in6_addr *addr,
922 				  int (*output)(struct sk_buff *))
923 {
924 	struct rt6_info *rt;
925 	struct inet6_dev *idev = in6_dev_get(dev);
926 
927 	if (unlikely(idev == NULL))
928 		return NULL;
929 
930 	rt = ip6_dst_alloc();
931 	if (unlikely(rt == NULL)) {
932 		in6_dev_put(idev);
933 		goto out;
934 	}
935 
936 	dev_hold(dev);
937 	if (neigh)
938 		neigh_hold(neigh);
939 	else
940 		neigh = ndisc_get_neigh(dev, addr);
941 
942 	rt->rt6i_dev	  = dev;
943 	rt->rt6i_idev     = idev;
944 	rt->rt6i_nexthop  = neigh;
945 	atomic_set(&rt->u.dst.__refcnt, 1);
946 	rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
947 	rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
948 	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
949 	rt->u.dst.output  = output;
950 
951 #if 0	/* there's no chance to use these for ndisc */
952 	rt->u.dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
953 				? DST_HOST
954 				: 0;
955 	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
956 	rt->rt6i_dst.plen = 128;
957 #endif
958 
959 	spin_lock_bh(&ndisc_lock);
960 	rt->u.dst.next = ndisc_dst_gc_list;
961 	ndisc_dst_gc_list = &rt->u.dst;
962 	spin_unlock_bh(&ndisc_lock);
963 
964 	fib6_force_start_gc();
965 
966 out:
967 	return &rt->u.dst;
968 }
969 
970 int ndisc_dst_gc(int *more)
971 {
972 	struct dst_entry *dst, *next, **pprev;
973 	int freed;
974 
975 	next = NULL;
976 	freed = 0;
977 
978 	spin_lock_bh(&ndisc_lock);
979 	pprev = &ndisc_dst_gc_list;
980 
981 	while ((dst = *pprev) != NULL) {
982 		if (!atomic_read(&dst->__refcnt)) {
983 			*pprev = dst->next;
984 			dst_free(dst);
985 			freed++;
986 		} else {
987 			pprev = &dst->next;
988 			(*more)++;
989 		}
990 	}
991 
992 	spin_unlock_bh(&ndisc_lock);
993 
994 	return freed;
995 }
996 
997 static int ip6_dst_gc(void)
998 {
999 	static unsigned expire = 30*HZ;
1000 	static unsigned long last_gc;
1001 	unsigned long now = jiffies;
1002 
1003 	if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
1004 	    atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
1005 		goto out;
1006 
1007 	expire++;
1008 	fib6_run_gc(expire);
1009 	last_gc = now;
1010 	if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
1011 		expire = ip6_rt_gc_timeout>>1;
1012 
1013 out:
1014 	expire -= expire>>ip6_rt_gc_elasticity;
1015 	return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
1016 }
1017 
1018 /* Clean host part of a prefix. Not necessary in radix tree,
1019    but results in cleaner routing tables.
1020 
1021    Remove it only when all the things will work!
1022  */
1023 
1024 static int ipv6_get_mtu(struct net_device *dev)
1025 {
1026 	int mtu = IPV6_MIN_MTU;
1027 	struct inet6_dev *idev;
1028 
1029 	idev = in6_dev_get(dev);
1030 	if (idev) {
1031 		mtu = idev->cnf.mtu6;
1032 		in6_dev_put(idev);
1033 	}
1034 	return mtu;
1035 }
1036 
1037 int ipv6_get_hoplimit(struct net_device *dev)
1038 {
1039 	int hoplimit = ipv6_devconf.hop_limit;
1040 	struct inet6_dev *idev;
1041 
1042 	idev = in6_dev_get(dev);
1043 	if (idev) {
1044 		hoplimit = idev->cnf.hop_limit;
1045 		in6_dev_put(idev);
1046 	}
1047 	return hoplimit;
1048 }
1049 
1050 /*
1051  *
1052  */
1053 
1054 int ip6_route_add(struct fib6_config *cfg)
1055 {
1056 	int err;
1057 	struct rt6_info *rt = NULL;
1058 	struct net_device *dev = NULL;
1059 	struct inet6_dev *idev = NULL;
1060 	struct fib6_table *table;
1061 	int addr_type;
1062 
1063 	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1064 		return -EINVAL;
1065 #ifndef CONFIG_IPV6_SUBTREES
1066 	if (cfg->fc_src_len)
1067 		return -EINVAL;
1068 #endif
1069 	if (cfg->fc_ifindex) {
1070 		err = -ENODEV;
1071 		dev = dev_get_by_index(cfg->fc_ifindex);
1072 		if (!dev)
1073 			goto out;
1074 		idev = in6_dev_get(dev);
1075 		if (!idev)
1076 			goto out;
1077 	}
1078 
1079 	if (cfg->fc_metric == 0)
1080 		cfg->fc_metric = IP6_RT_PRIO_USER;
1081 
1082 	table = fib6_new_table(cfg->fc_table);
1083 	if (table == NULL) {
1084 		err = -ENOBUFS;
1085 		goto out;
1086 	}
1087 
1088 	rt = ip6_dst_alloc();
1089 
1090 	if (rt == NULL) {
1091 		err = -ENOMEM;
1092 		goto out;
1093 	}
1094 
1095 	rt->u.dst.obsolete = -1;
1096 	rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires);
1097 
1098 	if (cfg->fc_protocol == RTPROT_UNSPEC)
1099 		cfg->fc_protocol = RTPROT_BOOT;
1100 	rt->rt6i_protocol = cfg->fc_protocol;
1101 
1102 	addr_type = ipv6_addr_type(&cfg->fc_dst);
1103 
1104 	if (addr_type & IPV6_ADDR_MULTICAST)
1105 		rt->u.dst.input = ip6_mc_input;
1106 	else
1107 		rt->u.dst.input = ip6_forward;
1108 
1109 	rt->u.dst.output = ip6_output;
1110 
1111 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1112 	rt->rt6i_dst.plen = cfg->fc_dst_len;
1113 	if (rt->rt6i_dst.plen == 128)
1114 	       rt->u.dst.flags = DST_HOST;
1115 
1116 #ifdef CONFIG_IPV6_SUBTREES
1117 	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1118 	rt->rt6i_src.plen = cfg->fc_src_len;
1119 #endif
1120 
1121 	rt->rt6i_metric = cfg->fc_metric;
1122 
1123 	/* We cannot add true routes via loopback here,
1124 	   they would result in kernel looping; promote them to reject routes
1125 	 */
1126 	if ((cfg->fc_flags & RTF_REJECT) ||
1127 	    (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1128 		/* hold loopback dev/idev if we haven't done so. */
1129 		if (dev != &loopback_dev) {
1130 			if (dev) {
1131 				dev_put(dev);
1132 				in6_dev_put(idev);
1133 			}
1134 			dev = &loopback_dev;
1135 			dev_hold(dev);
1136 			idev = in6_dev_get(dev);
1137 			if (!idev) {
1138 				err = -ENODEV;
1139 				goto out;
1140 			}
1141 		}
1142 		rt->u.dst.output = ip6_pkt_discard_out;
1143 		rt->u.dst.input = ip6_pkt_discard;
1144 		rt->u.dst.error = -ENETUNREACH;
1145 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1146 		goto install_route;
1147 	}
1148 
1149 	if (cfg->fc_flags & RTF_GATEWAY) {
1150 		struct in6_addr *gw_addr;
1151 		int gwa_type;
1152 
1153 		gw_addr = &cfg->fc_gateway;
1154 		ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1155 		gwa_type = ipv6_addr_type(gw_addr);
1156 
1157 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1158 			struct rt6_info *grt;
1159 
1160 			/* IPv6 strictly inhibits using not link-local
1161 			   addresses as nexthop address.
1162 			   Otherwise, router will not able to send redirects.
1163 			   It is very good, but in some (rare!) circumstances
1164 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
1165 			   some exceptions. --ANK
1166 			 */
1167 			err = -EINVAL;
1168 			if (!(gwa_type&IPV6_ADDR_UNICAST))
1169 				goto out;
1170 
1171 			grt = rt6_lookup(gw_addr, NULL, cfg->fc_ifindex, 1);
1172 
1173 			err = -EHOSTUNREACH;
1174 			if (grt == NULL)
1175 				goto out;
1176 			if (dev) {
1177 				if (dev != grt->rt6i_dev) {
1178 					dst_release(&grt->u.dst);
1179 					goto out;
1180 				}
1181 			} else {
1182 				dev = grt->rt6i_dev;
1183 				idev = grt->rt6i_idev;
1184 				dev_hold(dev);
1185 				in6_dev_hold(grt->rt6i_idev);
1186 			}
1187 			if (!(grt->rt6i_flags&RTF_GATEWAY))
1188 				err = 0;
1189 			dst_release(&grt->u.dst);
1190 
1191 			if (err)
1192 				goto out;
1193 		}
1194 		err = -EINVAL;
1195 		if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1196 			goto out;
1197 	}
1198 
1199 	err = -ENODEV;
1200 	if (dev == NULL)
1201 		goto out;
1202 
1203 	if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1204 		rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1205 		if (IS_ERR(rt->rt6i_nexthop)) {
1206 			err = PTR_ERR(rt->rt6i_nexthop);
1207 			rt->rt6i_nexthop = NULL;
1208 			goto out;
1209 		}
1210 	}
1211 
1212 	rt->rt6i_flags = cfg->fc_flags;
1213 
1214 install_route:
1215 	if (cfg->fc_mx) {
1216 		struct nlattr *nla;
1217 		int remaining;
1218 
1219 		nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1220 			int type = nla->nla_type;
1221 
1222 			if (type) {
1223 				if (type > RTAX_MAX) {
1224 					err = -EINVAL;
1225 					goto out;
1226 				}
1227 
1228 				rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
1229 			}
1230 		}
1231 	}
1232 
1233 	if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1234 		rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1235 	if (!rt->u.dst.metrics[RTAX_MTU-1])
1236 		rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1237 	if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1238 		rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1239 	rt->u.dst.dev = dev;
1240 	rt->rt6i_idev = idev;
1241 	rt->rt6i_table = table;
1242 	return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1243 
1244 out:
1245 	if (dev)
1246 		dev_put(dev);
1247 	if (idev)
1248 		in6_dev_put(idev);
1249 	if (rt)
1250 		dst_free(&rt->u.dst);
1251 	return err;
1252 }
1253 
1254 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1255 {
1256 	int err;
1257 	struct fib6_table *table;
1258 
1259 	if (rt == &ip6_null_entry)
1260 		return -ENOENT;
1261 
1262 	table = rt->rt6i_table;
1263 	write_lock_bh(&table->tb6_lock);
1264 
1265 	err = fib6_del(rt, info);
1266 	dst_release(&rt->u.dst);
1267 
1268 	write_unlock_bh(&table->tb6_lock);
1269 
1270 	return err;
1271 }
1272 
1273 int ip6_del_rt(struct rt6_info *rt)
1274 {
1275 	return __ip6_del_rt(rt, NULL);
1276 }
1277 
1278 static int ip6_route_del(struct fib6_config *cfg)
1279 {
1280 	struct fib6_table *table;
1281 	struct fib6_node *fn;
1282 	struct rt6_info *rt;
1283 	int err = -ESRCH;
1284 
1285 	table = fib6_get_table(cfg->fc_table);
1286 	if (table == NULL)
1287 		return err;
1288 
1289 	read_lock_bh(&table->tb6_lock);
1290 
1291 	fn = fib6_locate(&table->tb6_root,
1292 			 &cfg->fc_dst, cfg->fc_dst_len,
1293 			 &cfg->fc_src, cfg->fc_src_len);
1294 
1295 	if (fn) {
1296 		for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1297 			if (cfg->fc_ifindex &&
1298 			    (rt->rt6i_dev == NULL ||
1299 			     rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1300 				continue;
1301 			if (cfg->fc_flags & RTF_GATEWAY &&
1302 			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1303 				continue;
1304 			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1305 				continue;
1306 			dst_hold(&rt->u.dst);
1307 			read_unlock_bh(&table->tb6_lock);
1308 
1309 			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1310 		}
1311 	}
1312 	read_unlock_bh(&table->tb6_lock);
1313 
1314 	return err;
1315 }
1316 
1317 /*
1318  *	Handle redirects
1319  */
1320 struct ip6rd_flowi {
1321 	struct flowi fl;
1322 	struct in6_addr gateway;
1323 };
1324 
1325 static struct rt6_info *__ip6_route_redirect(struct fib6_table *table,
1326 					     struct flowi *fl,
1327 					     int flags)
1328 {
1329 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1330 	struct rt6_info *rt;
1331 	struct fib6_node *fn;
1332 
1333 	/*
1334 	 * Get the "current" route for this destination and
1335 	 * check if the redirect has come from approriate router.
1336 	 *
1337 	 * RFC 2461 specifies that redirects should only be
1338 	 * accepted if they come from the nexthop to the target.
1339 	 * Due to the way the routes are chosen, this notion
1340 	 * is a bit fuzzy and one might need to check all possible
1341 	 * routes.
1342 	 */
1343 
1344 	read_lock_bh(&table->tb6_lock);
1345 	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1346 restart:
1347 	for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1348 		/*
1349 		 * Current route is on-link; redirect is always invalid.
1350 		 *
1351 		 * Seems, previous statement is not true. It could
1352 		 * be node, which looks for us as on-link (f.e. proxy ndisc)
1353 		 * But then router serving it might decide, that we should
1354 		 * know truth 8)8) --ANK (980726).
1355 		 */
1356 		if (rt6_check_expired(rt))
1357 			continue;
1358 		if (!(rt->rt6i_flags & RTF_GATEWAY))
1359 			continue;
1360 		if (fl->oif != rt->rt6i_dev->ifindex)
1361 			continue;
1362 		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1363 			continue;
1364 		break;
1365 	}
1366 
1367 	if (!rt)
1368 		rt = &ip6_null_entry;
1369 	BACKTRACK(&fl->fl6_src);
1370 out:
1371 	dst_hold(&rt->u.dst);
1372 
1373 	read_unlock_bh(&table->tb6_lock);
1374 
1375 	return rt;
1376 };
1377 
1378 static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1379 					   struct in6_addr *src,
1380 					   struct in6_addr *gateway,
1381 					   struct net_device *dev)
1382 {
1383 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1384 	struct ip6rd_flowi rdfl = {
1385 		.fl = {
1386 			.oif = dev->ifindex,
1387 			.nl_u = {
1388 				.ip6_u = {
1389 					.daddr = *dest,
1390 					.saddr = *src,
1391 				},
1392 			},
1393 		},
1394 		.gateway = *gateway,
1395 	};
1396 
1397 	if (rt6_need_strict(dest))
1398 		flags |= RT6_LOOKUP_F_IFACE;
1399 
1400 	return (struct rt6_info *)fib6_rule_lookup((struct flowi *)&rdfl, flags, __ip6_route_redirect);
1401 }
1402 
1403 void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1404 		  struct in6_addr *saddr,
1405 		  struct neighbour *neigh, u8 *lladdr, int on_link)
1406 {
1407 	struct rt6_info *rt, *nrt = NULL;
1408 	struct netevent_redirect netevent;
1409 
1410 	rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1411 
1412 	if (rt == &ip6_null_entry) {
1413 		if (net_ratelimit())
1414 			printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1415 			       "for redirect target\n");
1416 		goto out;
1417 	}
1418 
1419 	/*
1420 	 *	We have finally decided to accept it.
1421 	 */
1422 
1423 	neigh_update(neigh, lladdr, NUD_STALE,
1424 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
1425 		     NEIGH_UPDATE_F_OVERRIDE|
1426 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1427 				     NEIGH_UPDATE_F_ISROUTER))
1428 		     );
1429 
1430 	/*
1431 	 * Redirect received -> path was valid.
1432 	 * Look, redirects are sent only in response to data packets,
1433 	 * so that this nexthop apparently is reachable. --ANK
1434 	 */
1435 	dst_confirm(&rt->u.dst);
1436 
1437 	/* Duplicate redirect: silently ignore. */
1438 	if (neigh == rt->u.dst.neighbour)
1439 		goto out;
1440 
1441 	nrt = ip6_rt_copy(rt);
1442 	if (nrt == NULL)
1443 		goto out;
1444 
1445 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1446 	if (on_link)
1447 		nrt->rt6i_flags &= ~RTF_GATEWAY;
1448 
1449 	ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1450 	nrt->rt6i_dst.plen = 128;
1451 	nrt->u.dst.flags |= DST_HOST;
1452 
1453 	ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1454 	nrt->rt6i_nexthop = neigh_clone(neigh);
1455 	/* Reset pmtu, it may be better */
1456 	nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1457 	nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1458 
1459 	if (ip6_ins_rt(nrt))
1460 		goto out;
1461 
1462 	netevent.old = &rt->u.dst;
1463 	netevent.new = &nrt->u.dst;
1464 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1465 
1466 	if (rt->rt6i_flags&RTF_CACHE) {
1467 		ip6_del_rt(rt);
1468 		return;
1469 	}
1470 
1471 out:
1472 	dst_release(&rt->u.dst);
1473 	return;
1474 }
1475 
1476 /*
1477  *	Handle ICMP "packet too big" messages
1478  *	i.e. Path MTU discovery
1479  */
1480 
1481 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1482 			struct net_device *dev, u32 pmtu)
1483 {
1484 	struct rt6_info *rt, *nrt;
1485 	int allfrag = 0;
1486 
1487 	rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1488 	if (rt == NULL)
1489 		return;
1490 
1491 	if (pmtu >= dst_mtu(&rt->u.dst))
1492 		goto out;
1493 
1494 	if (pmtu < IPV6_MIN_MTU) {
1495 		/*
1496 		 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1497 		 * MTU (1280) and a fragment header should always be included
1498 		 * after a node receiving Too Big message reporting PMTU is
1499 		 * less than the IPv6 Minimum Link MTU.
1500 		 */
1501 		pmtu = IPV6_MIN_MTU;
1502 		allfrag = 1;
1503 	}
1504 
1505 	/* New mtu received -> path was valid.
1506 	   They are sent only in response to data packets,
1507 	   so that this nexthop apparently is reachable. --ANK
1508 	 */
1509 	dst_confirm(&rt->u.dst);
1510 
1511 	/* Host route. If it is static, it would be better
1512 	   not to override it, but add new one, so that
1513 	   when cache entry will expire old pmtu
1514 	   would return automatically.
1515 	 */
1516 	if (rt->rt6i_flags & RTF_CACHE) {
1517 		rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1518 		if (allfrag)
1519 			rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1520 		dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1521 		rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1522 		goto out;
1523 	}
1524 
1525 	/* Network route.
1526 	   Two cases are possible:
1527 	   1. It is connected route. Action: COW
1528 	   2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1529 	 */
1530 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1531 		nrt = rt6_alloc_cow(rt, daddr, saddr);
1532 	else
1533 		nrt = rt6_alloc_clone(rt, daddr);
1534 
1535 	if (nrt) {
1536 		nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1537 		if (allfrag)
1538 			nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1539 
1540 		/* According to RFC 1981, detecting PMTU increase shouldn't be
1541 		 * happened within 5 mins, the recommended timer is 10 mins.
1542 		 * Here this route expiration time is set to ip6_rt_mtu_expires
1543 		 * which is 10 mins. After 10 mins the decreased pmtu is expired
1544 		 * and detecting PMTU increase will be automatically happened.
1545 		 */
1546 		dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1547 		nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1548 
1549 		ip6_ins_rt(nrt);
1550 	}
1551 out:
1552 	dst_release(&rt->u.dst);
1553 }
1554 
1555 /*
1556  *	Misc support functions
1557  */
1558 
1559 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1560 {
1561 	struct rt6_info *rt = ip6_dst_alloc();
1562 
1563 	if (rt) {
1564 		rt->u.dst.input = ort->u.dst.input;
1565 		rt->u.dst.output = ort->u.dst.output;
1566 
1567 		memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1568 		rt->u.dst.error = ort->u.dst.error;
1569 		rt->u.dst.dev = ort->u.dst.dev;
1570 		if (rt->u.dst.dev)
1571 			dev_hold(rt->u.dst.dev);
1572 		rt->rt6i_idev = ort->rt6i_idev;
1573 		if (rt->rt6i_idev)
1574 			in6_dev_hold(rt->rt6i_idev);
1575 		rt->u.dst.lastuse = jiffies;
1576 		rt->rt6i_expires = 0;
1577 
1578 		ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1579 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1580 		rt->rt6i_metric = 0;
1581 
1582 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1583 #ifdef CONFIG_IPV6_SUBTREES
1584 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1585 #endif
1586 		rt->rt6i_table = ort->rt6i_table;
1587 	}
1588 	return rt;
1589 }
1590 
1591 #ifdef CONFIG_IPV6_ROUTE_INFO
1592 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
1593 					   struct in6_addr *gwaddr, int ifindex)
1594 {
1595 	struct fib6_node *fn;
1596 	struct rt6_info *rt = NULL;
1597 	struct fib6_table *table;
1598 
1599 	table = fib6_get_table(RT6_TABLE_INFO);
1600 	if (table == NULL)
1601 		return NULL;
1602 
1603 	write_lock_bh(&table->tb6_lock);
1604 	fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1605 	if (!fn)
1606 		goto out;
1607 
1608 	for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1609 		if (rt->rt6i_dev->ifindex != ifindex)
1610 			continue;
1611 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1612 			continue;
1613 		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1614 			continue;
1615 		dst_hold(&rt->u.dst);
1616 		break;
1617 	}
1618 out:
1619 	write_unlock_bh(&table->tb6_lock);
1620 	return rt;
1621 }
1622 
1623 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
1624 					   struct in6_addr *gwaddr, int ifindex,
1625 					   unsigned pref)
1626 {
1627 	struct fib6_config cfg = {
1628 		.fc_table	= RT6_TABLE_INFO,
1629 		.fc_metric	= 1024,
1630 		.fc_ifindex	= ifindex,
1631 		.fc_dst_len	= prefixlen,
1632 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1633 				  RTF_UP | RTF_PREF(pref),
1634 	};
1635 
1636 	ipv6_addr_copy(&cfg.fc_dst, prefix);
1637 	ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1638 
1639 	/* We should treat it as a default route if prefix length is 0. */
1640 	if (!prefixlen)
1641 		cfg.fc_flags |= RTF_DEFAULT;
1642 
1643 	ip6_route_add(&cfg);
1644 
1645 	return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
1646 }
1647 #endif
1648 
1649 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1650 {
1651 	struct rt6_info *rt;
1652 	struct fib6_table *table;
1653 
1654 	table = fib6_get_table(RT6_TABLE_DFLT);
1655 	if (table == NULL)
1656 		return NULL;
1657 
1658 	write_lock_bh(&table->tb6_lock);
1659 	for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
1660 		if (dev == rt->rt6i_dev &&
1661 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1662 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
1663 			break;
1664 	}
1665 	if (rt)
1666 		dst_hold(&rt->u.dst);
1667 	write_unlock_bh(&table->tb6_lock);
1668 	return rt;
1669 }
1670 
1671 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1672 				     struct net_device *dev,
1673 				     unsigned int pref)
1674 {
1675 	struct fib6_config cfg = {
1676 		.fc_table	= RT6_TABLE_DFLT,
1677 		.fc_metric	= 1024,
1678 		.fc_ifindex	= dev->ifindex,
1679 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1680 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1681 	};
1682 
1683 	ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1684 
1685 	ip6_route_add(&cfg);
1686 
1687 	return rt6_get_dflt_router(gwaddr, dev);
1688 }
1689 
1690 void rt6_purge_dflt_routers(void)
1691 {
1692 	struct rt6_info *rt;
1693 	struct fib6_table *table;
1694 
1695 	/* NOTE: Keep consistent with rt6_get_dflt_router */
1696 	table = fib6_get_table(RT6_TABLE_DFLT);
1697 	if (table == NULL)
1698 		return;
1699 
1700 restart:
1701 	read_lock_bh(&table->tb6_lock);
1702 	for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
1703 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1704 			dst_hold(&rt->u.dst);
1705 			read_unlock_bh(&table->tb6_lock);
1706 			ip6_del_rt(rt);
1707 			goto restart;
1708 		}
1709 	}
1710 	read_unlock_bh(&table->tb6_lock);
1711 }
1712 
1713 static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg,
1714 				 struct fib6_config *cfg)
1715 {
1716 	memset(cfg, 0, sizeof(*cfg));
1717 
1718 	cfg->fc_table = RT6_TABLE_MAIN;
1719 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1720 	cfg->fc_metric = rtmsg->rtmsg_metric;
1721 	cfg->fc_expires = rtmsg->rtmsg_info;
1722 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1723 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
1724 	cfg->fc_flags = rtmsg->rtmsg_flags;
1725 
1726 	ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1727 	ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1728 	ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1729 }
1730 
1731 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1732 {
1733 	struct fib6_config cfg;
1734 	struct in6_rtmsg rtmsg;
1735 	int err;
1736 
1737 	switch(cmd) {
1738 	case SIOCADDRT:		/* Add a route */
1739 	case SIOCDELRT:		/* Delete a route */
1740 		if (!capable(CAP_NET_ADMIN))
1741 			return -EPERM;
1742 		err = copy_from_user(&rtmsg, arg,
1743 				     sizeof(struct in6_rtmsg));
1744 		if (err)
1745 			return -EFAULT;
1746 
1747 		rtmsg_to_fib6_config(&rtmsg, &cfg);
1748 
1749 		rtnl_lock();
1750 		switch (cmd) {
1751 		case SIOCADDRT:
1752 			err = ip6_route_add(&cfg);
1753 			break;
1754 		case SIOCDELRT:
1755 			err = ip6_route_del(&cfg);
1756 			break;
1757 		default:
1758 			err = -EINVAL;
1759 		}
1760 		rtnl_unlock();
1761 
1762 		return err;
1763 	}
1764 
1765 	return -EINVAL;
1766 }
1767 
1768 /*
1769  *	Drop the packet on the floor
1770  */
1771 
1772 static inline int ip6_pkt_drop(struct sk_buff *skb, int code,
1773 			       int ipstats_mib_noroutes)
1774 {
1775 	int type;
1776 	switch (ipstats_mib_noroutes) {
1777 	case IPSTATS_MIB_INNOROUTES:
1778 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
1779 		if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) {
1780 			IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
1781 			break;
1782 		}
1783 		/* FALLTHROUGH */
1784 	case IPSTATS_MIB_OUTNOROUTES:
1785 		IP6_INC_STATS(ip6_dst_idev(skb->dst), ipstats_mib_noroutes);
1786 		break;
1787 	}
1788 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
1789 	kfree_skb(skb);
1790 	return 0;
1791 }
1792 
1793 static int ip6_pkt_discard(struct sk_buff *skb)
1794 {
1795 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
1796 }
1797 
1798 static int ip6_pkt_discard_out(struct sk_buff *skb)
1799 {
1800 	skb->dev = skb->dst->dev;
1801 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1802 }
1803 
1804 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
1805 
1806 static int ip6_pkt_prohibit(struct sk_buff *skb)
1807 {
1808 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
1809 }
1810 
1811 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1812 {
1813 	skb->dev = skb->dst->dev;
1814 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
1815 }
1816 
1817 static int ip6_pkt_blk_hole(struct sk_buff *skb)
1818 {
1819 	kfree_skb(skb);
1820 	return 0;
1821 }
1822 
1823 #endif
1824 
1825 /*
1826  *	Allocate a dst for local (unicast / anycast) address.
1827  */
1828 
1829 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1830 				    const struct in6_addr *addr,
1831 				    int anycast)
1832 {
1833 	struct rt6_info *rt = ip6_dst_alloc();
1834 
1835 	if (rt == NULL)
1836 		return ERR_PTR(-ENOMEM);
1837 
1838 	dev_hold(&loopback_dev);
1839 	in6_dev_hold(idev);
1840 
1841 	rt->u.dst.flags = DST_HOST;
1842 	rt->u.dst.input = ip6_input;
1843 	rt->u.dst.output = ip6_output;
1844 	rt->rt6i_dev = &loopback_dev;
1845 	rt->rt6i_idev = idev;
1846 	rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1847 	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1848 	rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1849 	rt->u.dst.obsolete = -1;
1850 
1851 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1852 	if (anycast)
1853 		rt->rt6i_flags |= RTF_ANYCAST;
1854 	else
1855 		rt->rt6i_flags |= RTF_LOCAL;
1856 	rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1857 	if (rt->rt6i_nexthop == NULL) {
1858 		dst_free(&rt->u.dst);
1859 		return ERR_PTR(-ENOMEM);
1860 	}
1861 
1862 	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1863 	rt->rt6i_dst.plen = 128;
1864 	rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL);
1865 
1866 	atomic_set(&rt->u.dst.__refcnt, 1);
1867 
1868 	return rt;
1869 }
1870 
1871 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1872 {
1873 	if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1874 	    rt != &ip6_null_entry) {
1875 		RT6_TRACE("deleted by ifdown %p\n", rt);
1876 		return -1;
1877 	}
1878 	return 0;
1879 }
1880 
1881 void rt6_ifdown(struct net_device *dev)
1882 {
1883 	fib6_clean_all(fib6_ifdown, 0, dev);
1884 }
1885 
1886 struct rt6_mtu_change_arg
1887 {
1888 	struct net_device *dev;
1889 	unsigned mtu;
1890 };
1891 
1892 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1893 {
1894 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1895 	struct inet6_dev *idev;
1896 
1897 	/* In IPv6 pmtu discovery is not optional,
1898 	   so that RTAX_MTU lock cannot disable it.
1899 	   We still use this lock to block changes
1900 	   caused by addrconf/ndisc.
1901 	*/
1902 
1903 	idev = __in6_dev_get(arg->dev);
1904 	if (idev == NULL)
1905 		return 0;
1906 
1907 	/* For administrative MTU increase, there is no way to discover
1908 	   IPv6 PMTU increase, so PMTU increase should be updated here.
1909 	   Since RFC 1981 doesn't include administrative MTU increase
1910 	   update PMTU increase is a MUST. (i.e. jumbo frame)
1911 	 */
1912 	/*
1913 	   If new MTU is less than route PMTU, this new MTU will be the
1914 	   lowest MTU in the path, update the route PMTU to reflect PMTU
1915 	   decreases; if new MTU is greater than route PMTU, and the
1916 	   old MTU is the lowest MTU in the path, update the route PMTU
1917 	   to reflect the increase. In this case if the other nodes' MTU
1918 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
1919 	   PMTU discouvery.
1920 	 */
1921 	if (rt->rt6i_dev == arg->dev &&
1922 	    !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1923 	    (dst_mtu(&rt->u.dst) > arg->mtu ||
1924 	     (dst_mtu(&rt->u.dst) < arg->mtu &&
1925 	      dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1926 		rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1927 	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1928 	return 0;
1929 }
1930 
1931 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1932 {
1933 	struct rt6_mtu_change_arg arg = {
1934 		.dev = dev,
1935 		.mtu = mtu,
1936 	};
1937 
1938 	fib6_clean_all(rt6_mtu_change_route, 0, &arg);
1939 }
1940 
1941 static struct nla_policy rtm_ipv6_policy[RTA_MAX+1] __read_mostly = {
1942 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
1943 	[RTA_OIF]               = { .type = NLA_U32 },
1944 	[RTA_IIF]		= { .type = NLA_U32 },
1945 	[RTA_PRIORITY]          = { .type = NLA_U32 },
1946 	[RTA_METRICS]           = { .type = NLA_NESTED },
1947 };
1948 
1949 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
1950 			      struct fib6_config *cfg)
1951 {
1952 	struct rtmsg *rtm;
1953 	struct nlattr *tb[RTA_MAX+1];
1954 	int err;
1955 
1956 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
1957 	if (err < 0)
1958 		goto errout;
1959 
1960 	err = -EINVAL;
1961 	rtm = nlmsg_data(nlh);
1962 	memset(cfg, 0, sizeof(*cfg));
1963 
1964 	cfg->fc_table = rtm->rtm_table;
1965 	cfg->fc_dst_len = rtm->rtm_dst_len;
1966 	cfg->fc_src_len = rtm->rtm_src_len;
1967 	cfg->fc_flags = RTF_UP;
1968 	cfg->fc_protocol = rtm->rtm_protocol;
1969 
1970 	if (rtm->rtm_type == RTN_UNREACHABLE)
1971 		cfg->fc_flags |= RTF_REJECT;
1972 
1973 	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
1974 	cfg->fc_nlinfo.nlh = nlh;
1975 
1976 	if (tb[RTA_GATEWAY]) {
1977 		nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
1978 		cfg->fc_flags |= RTF_GATEWAY;
1979 	}
1980 
1981 	if (tb[RTA_DST]) {
1982 		int plen = (rtm->rtm_dst_len + 7) >> 3;
1983 
1984 		if (nla_len(tb[RTA_DST]) < plen)
1985 			goto errout;
1986 
1987 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1988 	}
1989 
1990 	if (tb[RTA_SRC]) {
1991 		int plen = (rtm->rtm_src_len + 7) >> 3;
1992 
1993 		if (nla_len(tb[RTA_SRC]) < plen)
1994 			goto errout;
1995 
1996 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1997 	}
1998 
1999 	if (tb[RTA_OIF])
2000 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2001 
2002 	if (tb[RTA_PRIORITY])
2003 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2004 
2005 	if (tb[RTA_METRICS]) {
2006 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2007 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2008 	}
2009 
2010 	if (tb[RTA_TABLE])
2011 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2012 
2013 	err = 0;
2014 errout:
2015 	return err;
2016 }
2017 
2018 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2019 {
2020 	struct fib6_config cfg;
2021 	int err;
2022 
2023 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2024 	if (err < 0)
2025 		return err;
2026 
2027 	return ip6_route_del(&cfg);
2028 }
2029 
2030 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2031 {
2032 	struct fib6_config cfg;
2033 	int err;
2034 
2035 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2036 	if (err < 0)
2037 		return err;
2038 
2039 	return ip6_route_add(&cfg);
2040 }
2041 
2042 static inline size_t rt6_nlmsg_size(void)
2043 {
2044 	return NLMSG_ALIGN(sizeof(struct rtmsg))
2045 	       + nla_total_size(16) /* RTA_SRC */
2046 	       + nla_total_size(16) /* RTA_DST */
2047 	       + nla_total_size(16) /* RTA_GATEWAY */
2048 	       + nla_total_size(16) /* RTA_PREFSRC */
2049 	       + nla_total_size(4) /* RTA_TABLE */
2050 	       + nla_total_size(4) /* RTA_IIF */
2051 	       + nla_total_size(4) /* RTA_OIF */
2052 	       + nla_total_size(4) /* RTA_PRIORITY */
2053 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2054 	       + nla_total_size(sizeof(struct rta_cacheinfo));
2055 }
2056 
2057 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
2058 			 struct in6_addr *dst, struct in6_addr *src,
2059 			 int iif, int type, u32 pid, u32 seq,
2060 			 int prefix, unsigned int flags)
2061 {
2062 	struct rtmsg *rtm;
2063 	struct nlmsghdr *nlh;
2064 	long expires;
2065 	u32 table;
2066 
2067 	if (prefix) {	/* user wants prefix routes only */
2068 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2069 			/* success since this is not a prefix route */
2070 			return 1;
2071 		}
2072 	}
2073 
2074 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2075 	if (nlh == NULL)
2076 		return -EMSGSIZE;
2077 
2078 	rtm = nlmsg_data(nlh);
2079 	rtm->rtm_family = AF_INET6;
2080 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
2081 	rtm->rtm_src_len = rt->rt6i_src.plen;
2082 	rtm->rtm_tos = 0;
2083 	if (rt->rt6i_table)
2084 		table = rt->rt6i_table->tb6_id;
2085 	else
2086 		table = RT6_TABLE_UNSPEC;
2087 	rtm->rtm_table = table;
2088 	NLA_PUT_U32(skb, RTA_TABLE, table);
2089 	if (rt->rt6i_flags&RTF_REJECT)
2090 		rtm->rtm_type = RTN_UNREACHABLE;
2091 	else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2092 		rtm->rtm_type = RTN_LOCAL;
2093 	else
2094 		rtm->rtm_type = RTN_UNICAST;
2095 	rtm->rtm_flags = 0;
2096 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2097 	rtm->rtm_protocol = rt->rt6i_protocol;
2098 	if (rt->rt6i_flags&RTF_DYNAMIC)
2099 		rtm->rtm_protocol = RTPROT_REDIRECT;
2100 	else if (rt->rt6i_flags & RTF_ADDRCONF)
2101 		rtm->rtm_protocol = RTPROT_KERNEL;
2102 	else if (rt->rt6i_flags&RTF_DEFAULT)
2103 		rtm->rtm_protocol = RTPROT_RA;
2104 
2105 	if (rt->rt6i_flags&RTF_CACHE)
2106 		rtm->rtm_flags |= RTM_F_CLONED;
2107 
2108 	if (dst) {
2109 		NLA_PUT(skb, RTA_DST, 16, dst);
2110 		rtm->rtm_dst_len = 128;
2111 	} else if (rtm->rtm_dst_len)
2112 		NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2113 #ifdef CONFIG_IPV6_SUBTREES
2114 	if (src) {
2115 		NLA_PUT(skb, RTA_SRC, 16, src);
2116 		rtm->rtm_src_len = 128;
2117 	} else if (rtm->rtm_src_len)
2118 		NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2119 #endif
2120 	if (iif)
2121 		NLA_PUT_U32(skb, RTA_IIF, iif);
2122 	else if (dst) {
2123 		struct in6_addr saddr_buf;
2124 		if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
2125 			NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2126 	}
2127 
2128 	if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
2129 		goto nla_put_failure;
2130 
2131 	if (rt->u.dst.neighbour)
2132 		NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2133 
2134 	if (rt->u.dst.dev)
2135 		NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2136 
2137 	NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2138 
2139 	expires = rt->rt6i_expires ? rt->rt6i_expires - jiffies : 0;
2140 	if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
2141 			       expires, rt->u.dst.error) < 0)
2142 		goto nla_put_failure;
2143 
2144 	return nlmsg_end(skb, nlh);
2145 
2146 nla_put_failure:
2147 	nlmsg_cancel(skb, nlh);
2148 	return -EMSGSIZE;
2149 }
2150 
2151 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2152 {
2153 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2154 	int prefix;
2155 
2156 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2157 		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2158 		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2159 	} else
2160 		prefix = 0;
2161 
2162 	return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2163 		     NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2164 		     prefix, NLM_F_MULTI);
2165 }
2166 
2167 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2168 {
2169 	struct nlattr *tb[RTA_MAX+1];
2170 	struct rt6_info *rt;
2171 	struct sk_buff *skb;
2172 	struct rtmsg *rtm;
2173 	struct flowi fl;
2174 	int err, iif = 0;
2175 
2176 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2177 	if (err < 0)
2178 		goto errout;
2179 
2180 	err = -EINVAL;
2181 	memset(&fl, 0, sizeof(fl));
2182 
2183 	if (tb[RTA_SRC]) {
2184 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2185 			goto errout;
2186 
2187 		ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2188 	}
2189 
2190 	if (tb[RTA_DST]) {
2191 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2192 			goto errout;
2193 
2194 		ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2195 	}
2196 
2197 	if (tb[RTA_IIF])
2198 		iif = nla_get_u32(tb[RTA_IIF]);
2199 
2200 	if (tb[RTA_OIF])
2201 		fl.oif = nla_get_u32(tb[RTA_OIF]);
2202 
2203 	if (iif) {
2204 		struct net_device *dev;
2205 		dev = __dev_get_by_index(iif);
2206 		if (!dev) {
2207 			err = -ENODEV;
2208 			goto errout;
2209 		}
2210 	}
2211 
2212 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2213 	if (skb == NULL) {
2214 		err = -ENOBUFS;
2215 		goto errout;
2216 	}
2217 
2218 	/* Reserve room for dummy headers, this skb can pass
2219 	   through good chunk of routing engine.
2220 	 */
2221 	skb_reset_mac_header(skb);
2222 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2223 
2224 	rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
2225 	skb->dst = &rt->u.dst;
2226 
2227 	err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
2228 			    RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2229 			    nlh->nlmsg_seq, 0, 0);
2230 	if (err < 0) {
2231 		kfree_skb(skb);
2232 		goto errout;
2233 	}
2234 
2235 	err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
2236 errout:
2237 	return err;
2238 }
2239 
2240 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2241 {
2242 	struct sk_buff *skb;
2243 	u32 pid = 0, seq = 0;
2244 	struct nlmsghdr *nlh = NULL;
2245 	int err = -ENOBUFS;
2246 
2247 	if (info) {
2248 		pid = info->pid;
2249 		nlh = info->nlh;
2250 		if (nlh)
2251 			seq = nlh->nlmsg_seq;
2252 	}
2253 
2254 	skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2255 	if (skb == NULL)
2256 		goto errout;
2257 
2258 	err = rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0);
2259 	if (err < 0) {
2260 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2261 		WARN_ON(err == -EMSGSIZE);
2262 		kfree_skb(skb);
2263 		goto errout;
2264 	}
2265 	err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any());
2266 errout:
2267 	if (err < 0)
2268 		rtnl_set_sk_err(RTNLGRP_IPV6_ROUTE, err);
2269 }
2270 
2271 /*
2272  *	/proc
2273  */
2274 
2275 #ifdef CONFIG_PROC_FS
2276 
2277 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2278 
2279 struct rt6_proc_arg
2280 {
2281 	char *buffer;
2282 	int offset;
2283 	int length;
2284 	int skip;
2285 	int len;
2286 };
2287 
2288 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2289 {
2290 	struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
2291 
2292 	if (arg->skip < arg->offset / RT6_INFO_LEN) {
2293 		arg->skip++;
2294 		return 0;
2295 	}
2296 
2297 	if (arg->len >= arg->length)
2298 		return 0;
2299 
2300 	arg->len += sprintf(arg->buffer + arg->len,
2301 			    NIP6_SEQFMT " %02x ",
2302 			    NIP6(rt->rt6i_dst.addr),
2303 			    rt->rt6i_dst.plen);
2304 
2305 #ifdef CONFIG_IPV6_SUBTREES
2306 	arg->len += sprintf(arg->buffer + arg->len,
2307 			    NIP6_SEQFMT " %02x ",
2308 			    NIP6(rt->rt6i_src.addr),
2309 			    rt->rt6i_src.plen);
2310 #else
2311 	arg->len += sprintf(arg->buffer + arg->len,
2312 			    "00000000000000000000000000000000 00 ");
2313 #endif
2314 
2315 	if (rt->rt6i_nexthop) {
2316 		arg->len += sprintf(arg->buffer + arg->len,
2317 				    NIP6_SEQFMT,
2318 				    NIP6(*((struct in6_addr *)rt->rt6i_nexthop->primary_key)));
2319 	} else {
2320 		arg->len += sprintf(arg->buffer + arg->len,
2321 				    "00000000000000000000000000000000");
2322 	}
2323 	arg->len += sprintf(arg->buffer + arg->len,
2324 			    " %08x %08x %08x %08x %8s\n",
2325 			    rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2326 			    rt->u.dst.__use, rt->rt6i_flags,
2327 			    rt->rt6i_dev ? rt->rt6i_dev->name : "");
2328 	return 0;
2329 }
2330 
2331 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
2332 {
2333 	struct rt6_proc_arg arg = {
2334 		.buffer = buffer,
2335 		.offset = offset,
2336 		.length = length,
2337 	};
2338 
2339 	fib6_clean_all(rt6_info_route, 0, &arg);
2340 
2341 	*start = buffer;
2342 	if (offset)
2343 		*start += offset % RT6_INFO_LEN;
2344 
2345 	arg.len -= offset % RT6_INFO_LEN;
2346 
2347 	if (arg.len > length)
2348 		arg.len = length;
2349 	if (arg.len < 0)
2350 		arg.len = 0;
2351 
2352 	return arg.len;
2353 }
2354 
2355 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2356 {
2357 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2358 		      rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
2359 		      rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
2360 		      rt6_stats.fib_rt_cache,
2361 		      atomic_read(&ip6_dst_ops.entries),
2362 		      rt6_stats.fib_discarded_routes);
2363 
2364 	return 0;
2365 }
2366 
2367 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2368 {
2369 	return single_open(file, rt6_stats_seq_show, NULL);
2370 }
2371 
2372 static const struct file_operations rt6_stats_seq_fops = {
2373 	.owner	 = THIS_MODULE,
2374 	.open	 = rt6_stats_seq_open,
2375 	.read	 = seq_read,
2376 	.llseek	 = seq_lseek,
2377 	.release = single_release,
2378 };
2379 #endif	/* CONFIG_PROC_FS */
2380 
2381 #ifdef CONFIG_SYSCTL
2382 
2383 static int flush_delay;
2384 
2385 static
2386 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2387 			      void __user *buffer, size_t *lenp, loff_t *ppos)
2388 {
2389 	if (write) {
2390 		proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2391 		fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2392 		return 0;
2393 	} else
2394 		return -EINVAL;
2395 }
2396 
2397 ctl_table ipv6_route_table[] = {
2398 	{
2399 		.ctl_name	=	NET_IPV6_ROUTE_FLUSH,
2400 		.procname	=	"flush",
2401 		.data		=	&flush_delay,
2402 		.maxlen		=	sizeof(int),
2403 		.mode		=	0200,
2404 		.proc_handler	=	&ipv6_sysctl_rtcache_flush
2405 	},
2406 	{
2407 		.ctl_name	=	NET_IPV6_ROUTE_GC_THRESH,
2408 		.procname	=	"gc_thresh",
2409 		.data		=	&ip6_dst_ops.gc_thresh,
2410 		.maxlen		=	sizeof(int),
2411 		.mode		=	0644,
2412 		.proc_handler	=	&proc_dointvec,
2413 	},
2414 	{
2415 		.ctl_name	=	NET_IPV6_ROUTE_MAX_SIZE,
2416 		.procname	=	"max_size",
2417 		.data		=	&ip6_rt_max_size,
2418 		.maxlen		=	sizeof(int),
2419 		.mode		=	0644,
2420 		.proc_handler	=	&proc_dointvec,
2421 	},
2422 	{
2423 		.ctl_name	=	NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2424 		.procname	=	"gc_min_interval",
2425 		.data		=	&ip6_rt_gc_min_interval,
2426 		.maxlen		=	sizeof(int),
2427 		.mode		=	0644,
2428 		.proc_handler	=	&proc_dointvec_jiffies,
2429 		.strategy	=	&sysctl_jiffies,
2430 	},
2431 	{
2432 		.ctl_name	=	NET_IPV6_ROUTE_GC_TIMEOUT,
2433 		.procname	=	"gc_timeout",
2434 		.data		=	&ip6_rt_gc_timeout,
2435 		.maxlen		=	sizeof(int),
2436 		.mode		=	0644,
2437 		.proc_handler	=	&proc_dointvec_jiffies,
2438 		.strategy	=	&sysctl_jiffies,
2439 	},
2440 	{
2441 		.ctl_name	=	NET_IPV6_ROUTE_GC_INTERVAL,
2442 		.procname	=	"gc_interval",
2443 		.data		=	&ip6_rt_gc_interval,
2444 		.maxlen		=	sizeof(int),
2445 		.mode		=	0644,
2446 		.proc_handler	=	&proc_dointvec_jiffies,
2447 		.strategy	=	&sysctl_jiffies,
2448 	},
2449 	{
2450 		.ctl_name	=	NET_IPV6_ROUTE_GC_ELASTICITY,
2451 		.procname	=	"gc_elasticity",
2452 		.data		=	&ip6_rt_gc_elasticity,
2453 		.maxlen		=	sizeof(int),
2454 		.mode		=	0644,
2455 		.proc_handler	=	&proc_dointvec_jiffies,
2456 		.strategy	=	&sysctl_jiffies,
2457 	},
2458 	{
2459 		.ctl_name	=	NET_IPV6_ROUTE_MTU_EXPIRES,
2460 		.procname	=	"mtu_expires",
2461 		.data		=	&ip6_rt_mtu_expires,
2462 		.maxlen		=	sizeof(int),
2463 		.mode		=	0644,
2464 		.proc_handler	=	&proc_dointvec_jiffies,
2465 		.strategy	=	&sysctl_jiffies,
2466 	},
2467 	{
2468 		.ctl_name	=	NET_IPV6_ROUTE_MIN_ADVMSS,
2469 		.procname	=	"min_adv_mss",
2470 		.data		=	&ip6_rt_min_advmss,
2471 		.maxlen		=	sizeof(int),
2472 		.mode		=	0644,
2473 		.proc_handler	=	&proc_dointvec_jiffies,
2474 		.strategy	=	&sysctl_jiffies,
2475 	},
2476 	{
2477 		.ctl_name	=	NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2478 		.procname	=	"gc_min_interval_ms",
2479 		.data		=	&ip6_rt_gc_min_interval,
2480 		.maxlen		=	sizeof(int),
2481 		.mode		=	0644,
2482 		.proc_handler	=	&proc_dointvec_ms_jiffies,
2483 		.strategy	=	&sysctl_ms_jiffies,
2484 	},
2485 	{ .ctl_name = 0 }
2486 };
2487 
2488 #endif
2489 
2490 void __init ip6_route_init(void)
2491 {
2492 #ifdef 	CONFIG_PROC_FS
2493 	struct proc_dir_entry *p;
2494 #endif
2495 	ip6_dst_ops.kmem_cachep =
2496 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2497 				  SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
2498 	fib6_init();
2499 #ifdef 	CONFIG_PROC_FS
2500 	p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2501 	if (p)
2502 		p->owner = THIS_MODULE;
2503 
2504 	proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2505 #endif
2506 #ifdef CONFIG_XFRM
2507 	xfrm6_init();
2508 #endif
2509 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2510 	fib6_rules_init();
2511 #endif
2512 
2513 	__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL);
2514 	__rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL);
2515 	__rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL);
2516 }
2517 
2518 void ip6_route_cleanup(void)
2519 {
2520 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2521 	fib6_rules_cleanup();
2522 #endif
2523 #ifdef CONFIG_PROC_FS
2524 	proc_net_remove("ipv6_route");
2525 	proc_net_remove("rt6_stats");
2526 #endif
2527 #ifdef CONFIG_XFRM
2528 	xfrm6_fini();
2529 #endif
2530 	rt6_ifdown(NULL);
2531 	fib6_gc_cleanup();
2532 	kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2533 }
2534