xref: /openbmc/linux/net/ipv6/route.c (revision 6891a346c387bd0a64afa50f4522f5fe8ba879d8)
1 /*
2  *	Linux INET6 implementation
3  *	FIB front-end.
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	$Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9  *
10  *	This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  */
15 
16 /*	Changes:
17  *
18  *	YOSHIFUJI Hideaki @USAGI
19  *		reworked default router selection.
20  *		- respect outgoing interface
21  *		- select from (probably) reachable routers (i.e.
22  *		routers in REACHABLE, STALE, DELAY or PROBE states).
23  *		- always select the same router if it is (probably)
24  *		reachable.  otherwise, round-robin the list.
25  *	Ville Nuorvala
26  *		Fixed routing subtrees.
27  */
28 
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/types.h>
32 #include <linux/times.h>
33 #include <linux/socket.h>
34 #include <linux/sockios.h>
35 #include <linux/net.h>
36 #include <linux/route.h>
37 #include <linux/netdevice.h>
38 #include <linux/in6.h>
39 #include <linux/init.h>
40 #include <linux/if_arp.h>
41 #include <linux/proc_fs.h>
42 #include <linux/seq_file.h>
43 #include <linux/nsproxy.h>
44 #include <net/net_namespace.h>
45 #include <net/snmp.h>
46 #include <net/ipv6.h>
47 #include <net/ip6_fib.h>
48 #include <net/ip6_route.h>
49 #include <net/ndisc.h>
50 #include <net/addrconf.h>
51 #include <net/tcp.h>
52 #include <linux/rtnetlink.h>
53 #include <net/dst.h>
54 #include <net/xfrm.h>
55 #include <net/netevent.h>
56 #include <net/netlink.h>
57 
58 #include <asm/uaccess.h>
59 
60 #ifdef CONFIG_SYSCTL
61 #include <linux/sysctl.h>
62 #endif
63 
64 /* Set to 3 to get tracing. */
65 #define RT6_DEBUG 2
66 
67 #if RT6_DEBUG >= 3
68 #define RDBG(x) printk x
69 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
70 #else
71 #define RDBG(x)
72 #define RT6_TRACE(x...) do { ; } while (0)
73 #endif
74 
75 #define CLONE_OFFLINK_ROUTE 0
76 
77 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
78 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
79 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
80 static void		ip6_dst_destroy(struct dst_entry *);
81 static void		ip6_dst_ifdown(struct dst_entry *,
82 				       struct net_device *dev, int how);
83 static int		 ip6_dst_gc(struct dst_ops *ops);
84 
85 static int		ip6_pkt_discard(struct sk_buff *skb);
86 static int		ip6_pkt_discard_out(struct sk_buff *skb);
87 static void		ip6_link_failure(struct sk_buff *skb);
88 static void		ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
89 
90 #ifdef CONFIG_IPV6_ROUTE_INFO
91 static struct rt6_info *rt6_add_route_info(struct net *net,
92 					   struct in6_addr *prefix, int prefixlen,
93 					   struct in6_addr *gwaddr, int ifindex,
94 					   unsigned pref);
95 static struct rt6_info *rt6_get_route_info(struct net *net,
96 					   struct in6_addr *prefix, int prefixlen,
97 					   struct in6_addr *gwaddr, int ifindex);
98 #endif
99 
100 static struct dst_ops ip6_dst_ops_template = {
101 	.family			=	AF_INET6,
102 	.protocol		=	__constant_htons(ETH_P_IPV6),
103 	.gc			=	ip6_dst_gc,
104 	.gc_thresh		=	1024,
105 	.check			=	ip6_dst_check,
106 	.destroy		=	ip6_dst_destroy,
107 	.ifdown			=	ip6_dst_ifdown,
108 	.negative_advice	=	ip6_negative_advice,
109 	.link_failure		=	ip6_link_failure,
110 	.update_pmtu		=	ip6_rt_update_pmtu,
111 	.local_out		=	ip6_local_out,
112 	.entry_size		=	sizeof(struct rt6_info),
113 	.entries		=	ATOMIC_INIT(0),
114 };
115 
116 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
117 {
118 }
119 
120 static struct dst_ops ip6_dst_blackhole_ops = {
121 	.family			=	AF_INET6,
122 	.protocol		=	__constant_htons(ETH_P_IPV6),
123 	.destroy		=	ip6_dst_destroy,
124 	.check			=	ip6_dst_check,
125 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
126 	.entry_size		=	sizeof(struct rt6_info),
127 	.entries		=	ATOMIC_INIT(0),
128 };
129 
130 static struct rt6_info ip6_null_entry_template = {
131 	.u = {
132 		.dst = {
133 			.__refcnt	= ATOMIC_INIT(1),
134 			.__use		= 1,
135 			.obsolete	= -1,
136 			.error		= -ENETUNREACH,
137 			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
138 			.input		= ip6_pkt_discard,
139 			.output		= ip6_pkt_discard_out,
140 		}
141 	},
142 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
143 	.rt6i_metric	= ~(u32) 0,
144 	.rt6i_ref	= ATOMIC_INIT(1),
145 };
146 
147 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
148 
149 static int ip6_pkt_prohibit(struct sk_buff *skb);
150 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
151 
152 struct rt6_info ip6_prohibit_entry_template = {
153 	.u = {
154 		.dst = {
155 			.__refcnt	= ATOMIC_INIT(1),
156 			.__use		= 1,
157 			.obsolete	= -1,
158 			.error		= -EACCES,
159 			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
160 			.input		= ip6_pkt_prohibit,
161 			.output		= ip6_pkt_prohibit_out,
162 		}
163 	},
164 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
165 	.rt6i_metric	= ~(u32) 0,
166 	.rt6i_ref	= ATOMIC_INIT(1),
167 };
168 
169 static struct rt6_info ip6_blk_hole_entry_template = {
170 	.u = {
171 		.dst = {
172 			.__refcnt	= ATOMIC_INIT(1),
173 			.__use		= 1,
174 			.obsolete	= -1,
175 			.error		= -EINVAL,
176 			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
177 			.input		= dst_discard,
178 			.output		= dst_discard,
179 		}
180 	},
181 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
182 	.rt6i_metric	= ~(u32) 0,
183 	.rt6i_ref	= ATOMIC_INIT(1),
184 };
185 
186 #endif
187 
188 /* allocate dst with ip6_dst_ops */
189 static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops)
190 {
191 	return (struct rt6_info *)dst_alloc(ops);
192 }
193 
194 static void ip6_dst_destroy(struct dst_entry *dst)
195 {
196 	struct rt6_info *rt = (struct rt6_info *)dst;
197 	struct inet6_dev *idev = rt->rt6i_idev;
198 
199 	if (idev != NULL) {
200 		rt->rt6i_idev = NULL;
201 		in6_dev_put(idev);
202 	}
203 }
204 
205 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
206 			   int how)
207 {
208 	struct rt6_info *rt = (struct rt6_info *)dst;
209 	struct inet6_dev *idev = rt->rt6i_idev;
210 	struct net_device *loopback_dev =
211 		dev->nd_net->loopback_dev;
212 
213 	if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
214 		struct inet6_dev *loopback_idev =
215 			in6_dev_get(loopback_dev);
216 		if (loopback_idev != NULL) {
217 			rt->rt6i_idev = loopback_idev;
218 			in6_dev_put(idev);
219 		}
220 	}
221 }
222 
223 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
224 {
225 	return (rt->rt6i_flags & RTF_EXPIRES &&
226 		time_after(jiffies, rt->rt6i_expires));
227 }
228 
229 static inline int rt6_need_strict(struct in6_addr *daddr)
230 {
231 	return (ipv6_addr_type(daddr) &
232 		(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
233 }
234 
235 /*
236  *	Route lookup. Any table->tb6_lock is implied.
237  */
238 
239 static inline struct rt6_info *rt6_device_match(struct net *net,
240 						    struct rt6_info *rt,
241 						    int oif,
242 						    int strict)
243 {
244 	struct rt6_info *local = NULL;
245 	struct rt6_info *sprt;
246 
247 	if (oif) {
248 		for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
249 			struct net_device *dev = sprt->rt6i_dev;
250 			if (dev->ifindex == oif)
251 				return sprt;
252 			if (dev->flags & IFF_LOOPBACK) {
253 				if (sprt->rt6i_idev == NULL ||
254 				    sprt->rt6i_idev->dev->ifindex != oif) {
255 					if (strict && oif)
256 						continue;
257 					if (local && (!oif ||
258 						      local->rt6i_idev->dev->ifindex == oif))
259 						continue;
260 				}
261 				local = sprt;
262 			}
263 		}
264 
265 		if (local)
266 			return local;
267 
268 		if (strict)
269 			return net->ipv6.ip6_null_entry;
270 	}
271 	return rt;
272 }
273 
274 #ifdef CONFIG_IPV6_ROUTER_PREF
275 static void rt6_probe(struct rt6_info *rt)
276 {
277 	struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
278 	/*
279 	 * Okay, this does not seem to be appropriate
280 	 * for now, however, we need to check if it
281 	 * is really so; aka Router Reachability Probing.
282 	 *
283 	 * Router Reachability Probe MUST be rate-limited
284 	 * to no more than one per minute.
285 	 */
286 	if (!neigh || (neigh->nud_state & NUD_VALID))
287 		return;
288 	read_lock_bh(&neigh->lock);
289 	if (!(neigh->nud_state & NUD_VALID) &&
290 	    time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
291 		struct in6_addr mcaddr;
292 		struct in6_addr *target;
293 
294 		neigh->updated = jiffies;
295 		read_unlock_bh(&neigh->lock);
296 
297 		target = (struct in6_addr *)&neigh->primary_key;
298 		addrconf_addr_solict_mult(target, &mcaddr);
299 		ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
300 	} else
301 		read_unlock_bh(&neigh->lock);
302 }
303 #else
304 static inline void rt6_probe(struct rt6_info *rt)
305 {
306 	return;
307 }
308 #endif
309 
310 /*
311  * Default Router Selection (RFC 2461 6.3.6)
312  */
313 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
314 {
315 	struct net_device *dev = rt->rt6i_dev;
316 	if (!oif || dev->ifindex == oif)
317 		return 2;
318 	if ((dev->flags & IFF_LOOPBACK) &&
319 	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
320 		return 1;
321 	return 0;
322 }
323 
324 static inline int rt6_check_neigh(struct rt6_info *rt)
325 {
326 	struct neighbour *neigh = rt->rt6i_nexthop;
327 	int m;
328 	if (rt->rt6i_flags & RTF_NONEXTHOP ||
329 	    !(rt->rt6i_flags & RTF_GATEWAY))
330 		m = 1;
331 	else if (neigh) {
332 		read_lock_bh(&neigh->lock);
333 		if (neigh->nud_state & NUD_VALID)
334 			m = 2;
335 #ifdef CONFIG_IPV6_ROUTER_PREF
336 		else if (neigh->nud_state & NUD_FAILED)
337 			m = 0;
338 #endif
339 		else
340 			m = 1;
341 		read_unlock_bh(&neigh->lock);
342 	} else
343 		m = 0;
344 	return m;
345 }
346 
347 static int rt6_score_route(struct rt6_info *rt, int oif,
348 			   int strict)
349 {
350 	int m, n;
351 
352 	m = rt6_check_dev(rt, oif);
353 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
354 		return -1;
355 #ifdef CONFIG_IPV6_ROUTER_PREF
356 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
357 #endif
358 	n = rt6_check_neigh(rt);
359 	if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
360 		return -1;
361 	return m;
362 }
363 
364 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
365 				   int *mpri, struct rt6_info *match)
366 {
367 	int m;
368 
369 	if (rt6_check_expired(rt))
370 		goto out;
371 
372 	m = rt6_score_route(rt, oif, strict);
373 	if (m < 0)
374 		goto out;
375 
376 	if (m > *mpri) {
377 		if (strict & RT6_LOOKUP_F_REACHABLE)
378 			rt6_probe(match);
379 		*mpri = m;
380 		match = rt;
381 	} else if (strict & RT6_LOOKUP_F_REACHABLE) {
382 		rt6_probe(rt);
383 	}
384 
385 out:
386 	return match;
387 }
388 
389 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
390 				     struct rt6_info *rr_head,
391 				     u32 metric, int oif, int strict)
392 {
393 	struct rt6_info *rt, *match;
394 	int mpri = -1;
395 
396 	match = NULL;
397 	for (rt = rr_head; rt && rt->rt6i_metric == metric;
398 	     rt = rt->u.dst.rt6_next)
399 		match = find_match(rt, oif, strict, &mpri, match);
400 	for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
401 	     rt = rt->u.dst.rt6_next)
402 		match = find_match(rt, oif, strict, &mpri, match);
403 
404 	return match;
405 }
406 
407 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
408 {
409 	struct rt6_info *match, *rt0;
410 	struct net *net;
411 
412 	RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
413 		  __FUNCTION__, fn->leaf, oif);
414 
415 	rt0 = fn->rr_ptr;
416 	if (!rt0)
417 		fn->rr_ptr = rt0 = fn->leaf;
418 
419 	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
420 
421 	if (!match &&
422 	    (strict & RT6_LOOKUP_F_REACHABLE)) {
423 		struct rt6_info *next = rt0->u.dst.rt6_next;
424 
425 		/* no entries matched; do round-robin */
426 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
427 			next = fn->leaf;
428 
429 		if (next != rt0)
430 			fn->rr_ptr = next;
431 	}
432 
433 	RT6_TRACE("%s() => %p\n",
434 		  __FUNCTION__, match);
435 
436 	net = rt0->rt6i_dev->nd_net;
437 	return (match ? match : net->ipv6.ip6_null_entry);
438 }
439 
440 #ifdef CONFIG_IPV6_ROUTE_INFO
441 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
442 		  struct in6_addr *gwaddr)
443 {
444 	struct net *net = dev->nd_net;
445 	struct route_info *rinfo = (struct route_info *) opt;
446 	struct in6_addr prefix_buf, *prefix;
447 	unsigned int pref;
448 	u32 lifetime;
449 	struct rt6_info *rt;
450 
451 	if (len < sizeof(struct route_info)) {
452 		return -EINVAL;
453 	}
454 
455 	/* Sanity check for prefix_len and length */
456 	if (rinfo->length > 3) {
457 		return -EINVAL;
458 	} else if (rinfo->prefix_len > 128) {
459 		return -EINVAL;
460 	} else if (rinfo->prefix_len > 64) {
461 		if (rinfo->length < 2) {
462 			return -EINVAL;
463 		}
464 	} else if (rinfo->prefix_len > 0) {
465 		if (rinfo->length < 1) {
466 			return -EINVAL;
467 		}
468 	}
469 
470 	pref = rinfo->route_pref;
471 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
472 		pref = ICMPV6_ROUTER_PREF_MEDIUM;
473 
474 	lifetime = ntohl(rinfo->lifetime);
475 	if (lifetime == 0xffffffff) {
476 		/* infinity */
477 	} else if (lifetime > 0x7fffffff/HZ) {
478 		/* Avoid arithmetic overflow */
479 		lifetime = 0x7fffffff/HZ - 1;
480 	}
481 
482 	if (rinfo->length == 3)
483 		prefix = (struct in6_addr *)rinfo->prefix;
484 	else {
485 		/* this function is safe */
486 		ipv6_addr_prefix(&prefix_buf,
487 				 (struct in6_addr *)rinfo->prefix,
488 				 rinfo->prefix_len);
489 		prefix = &prefix_buf;
490 	}
491 
492 	rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
493 				dev->ifindex);
494 
495 	if (rt && !lifetime) {
496 		ip6_del_rt(rt);
497 		rt = NULL;
498 	}
499 
500 	if (!rt && lifetime)
501 		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
502 					pref);
503 	else if (rt)
504 		rt->rt6i_flags = RTF_ROUTEINFO |
505 				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
506 
507 	if (rt) {
508 		if (lifetime == 0xffffffff) {
509 			rt->rt6i_flags &= ~RTF_EXPIRES;
510 		} else {
511 			rt->rt6i_expires = jiffies + HZ * lifetime;
512 			rt->rt6i_flags |= RTF_EXPIRES;
513 		}
514 		dst_release(&rt->u.dst);
515 	}
516 	return 0;
517 }
518 #endif
519 
520 #define BACKTRACK(__net, saddr)			\
521 do { \
522 	if (rt == __net->ipv6.ip6_null_entry) {	\
523 		struct fib6_node *pn; \
524 		while (1) { \
525 			if (fn->fn_flags & RTN_TL_ROOT) \
526 				goto out; \
527 			pn = fn->parent; \
528 			if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
529 				fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
530 			else \
531 				fn = pn; \
532 			if (fn->fn_flags & RTN_RTINFO) \
533 				goto restart; \
534 		} \
535 	} \
536 } while(0)
537 
538 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
539 					     struct fib6_table *table,
540 					     struct flowi *fl, int flags)
541 {
542 	struct fib6_node *fn;
543 	struct rt6_info *rt;
544 
545 	read_lock_bh(&table->tb6_lock);
546 	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
547 restart:
548 	rt = fn->leaf;
549 	rt = rt6_device_match(net, rt, fl->oif, flags);
550 	BACKTRACK(net, &fl->fl6_src);
551 out:
552 	dst_use(&rt->u.dst, jiffies);
553 	read_unlock_bh(&table->tb6_lock);
554 	return rt;
555 
556 }
557 
558 struct rt6_info *rt6_lookup(struct net *net, struct in6_addr *daddr,
559 			    struct in6_addr *saddr, int oif, int strict)
560 {
561 	struct flowi fl = {
562 		.oif = oif,
563 		.nl_u = {
564 			.ip6_u = {
565 				.daddr = *daddr,
566 			},
567 		},
568 	};
569 	struct dst_entry *dst;
570 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
571 
572 	if (saddr) {
573 		memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
574 		flags |= RT6_LOOKUP_F_HAS_SADDR;
575 	}
576 
577 	dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
578 	if (dst->error == 0)
579 		return (struct rt6_info *) dst;
580 
581 	dst_release(dst);
582 
583 	return NULL;
584 }
585 
586 EXPORT_SYMBOL(rt6_lookup);
587 
588 /* ip6_ins_rt is called with FREE table->tb6_lock.
589    It takes new route entry, the addition fails by any reason the
590    route is freed. In any case, if caller does not hold it, it may
591    be destroyed.
592  */
593 
594 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
595 {
596 	int err;
597 	struct fib6_table *table;
598 
599 	table = rt->rt6i_table;
600 	write_lock_bh(&table->tb6_lock);
601 	err = fib6_add(&table->tb6_root, rt, info);
602 	write_unlock_bh(&table->tb6_lock);
603 
604 	return err;
605 }
606 
607 int ip6_ins_rt(struct rt6_info *rt)
608 {
609 	struct nl_info info = {
610 		.nl_net = rt->rt6i_dev->nd_net,
611 	};
612 	return __ip6_ins_rt(rt, &info);
613 }
614 
615 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
616 				      struct in6_addr *saddr)
617 {
618 	struct rt6_info *rt;
619 
620 	/*
621 	 *	Clone the route.
622 	 */
623 
624 	rt = ip6_rt_copy(ort);
625 
626 	if (rt) {
627 		if (!(rt->rt6i_flags&RTF_GATEWAY)) {
628 			if (rt->rt6i_dst.plen != 128 &&
629 			    ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
630 				rt->rt6i_flags |= RTF_ANYCAST;
631 			ipv6_addr_copy(&rt->rt6i_gateway, daddr);
632 		}
633 
634 		ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
635 		rt->rt6i_dst.plen = 128;
636 		rt->rt6i_flags |= RTF_CACHE;
637 		rt->u.dst.flags |= DST_HOST;
638 
639 #ifdef CONFIG_IPV6_SUBTREES
640 		if (rt->rt6i_src.plen && saddr) {
641 			ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
642 			rt->rt6i_src.plen = 128;
643 		}
644 #endif
645 
646 		rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
647 
648 	}
649 
650 	return rt;
651 }
652 
653 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
654 {
655 	struct rt6_info *rt = ip6_rt_copy(ort);
656 	if (rt) {
657 		ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
658 		rt->rt6i_dst.plen = 128;
659 		rt->rt6i_flags |= RTF_CACHE;
660 		rt->u.dst.flags |= DST_HOST;
661 		rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
662 	}
663 	return rt;
664 }
665 
666 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
667 				      struct flowi *fl, int flags)
668 {
669 	struct fib6_node *fn;
670 	struct rt6_info *rt, *nrt;
671 	int strict = 0;
672 	int attempts = 3;
673 	int err;
674 	int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
675 
676 	strict |= flags & RT6_LOOKUP_F_IFACE;
677 
678 relookup:
679 	read_lock_bh(&table->tb6_lock);
680 
681 restart_2:
682 	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
683 
684 restart:
685 	rt = rt6_select(fn, oif, strict | reachable);
686 
687 	BACKTRACK(net, &fl->fl6_src);
688 	if (rt == net->ipv6.ip6_null_entry ||
689 	    rt->rt6i_flags & RTF_CACHE)
690 		goto out;
691 
692 	dst_hold(&rt->u.dst);
693 	read_unlock_bh(&table->tb6_lock);
694 
695 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
696 		nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
697 	else {
698 #if CLONE_OFFLINK_ROUTE
699 		nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
700 #else
701 		goto out2;
702 #endif
703 	}
704 
705 	dst_release(&rt->u.dst);
706 	rt = nrt ? : net->ipv6.ip6_null_entry;
707 
708 	dst_hold(&rt->u.dst);
709 	if (nrt) {
710 		err = ip6_ins_rt(nrt);
711 		if (!err)
712 			goto out2;
713 	}
714 
715 	if (--attempts <= 0)
716 		goto out2;
717 
718 	/*
719 	 * Race condition! In the gap, when table->tb6_lock was
720 	 * released someone could insert this route.  Relookup.
721 	 */
722 	dst_release(&rt->u.dst);
723 	goto relookup;
724 
725 out:
726 	if (reachable) {
727 		reachable = 0;
728 		goto restart_2;
729 	}
730 	dst_hold(&rt->u.dst);
731 	read_unlock_bh(&table->tb6_lock);
732 out2:
733 	rt->u.dst.lastuse = jiffies;
734 	rt->u.dst.__use++;
735 
736 	return rt;
737 }
738 
739 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
740 					    struct flowi *fl, int flags)
741 {
742 	return ip6_pol_route(net, table, fl->iif, fl, flags);
743 }
744 
745 void ip6_route_input(struct sk_buff *skb)
746 {
747 	struct ipv6hdr *iph = ipv6_hdr(skb);
748 	struct net *net = skb->dev->nd_net;
749 	int flags = RT6_LOOKUP_F_HAS_SADDR;
750 	struct flowi fl = {
751 		.iif = skb->dev->ifindex,
752 		.nl_u = {
753 			.ip6_u = {
754 				.daddr = iph->daddr,
755 				.saddr = iph->saddr,
756 				.flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
757 			},
758 		},
759 		.mark = skb->mark,
760 		.proto = iph->nexthdr,
761 	};
762 
763 	if (rt6_need_strict(&iph->daddr))
764 		flags |= RT6_LOOKUP_F_IFACE;
765 
766 	skb->dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input);
767 }
768 
769 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
770 					     struct flowi *fl, int flags)
771 {
772 	return ip6_pol_route(net, table, fl->oif, fl, flags);
773 }
774 
775 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
776 {
777 	int flags = 0;
778 
779 	if (rt6_need_strict(&fl->fl6_dst))
780 		flags |= RT6_LOOKUP_F_IFACE;
781 
782 	if (!ipv6_addr_any(&fl->fl6_src))
783 		flags |= RT6_LOOKUP_F_HAS_SADDR;
784 
785 	return fib6_rule_lookup(&init_net, fl, flags, ip6_pol_route_output);
786 }
787 
788 EXPORT_SYMBOL(ip6_route_output);
789 
790 int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
791 {
792 	struct rt6_info *ort = (struct rt6_info *) *dstp;
793 	struct rt6_info *rt = (struct rt6_info *)
794 		dst_alloc(&ip6_dst_blackhole_ops);
795 	struct dst_entry *new = NULL;
796 
797 	if (rt) {
798 		new = &rt->u.dst;
799 
800 		atomic_set(&new->__refcnt, 1);
801 		new->__use = 1;
802 		new->input = dst_discard;
803 		new->output = dst_discard;
804 
805 		memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
806 		new->dev = ort->u.dst.dev;
807 		if (new->dev)
808 			dev_hold(new->dev);
809 		rt->rt6i_idev = ort->rt6i_idev;
810 		if (rt->rt6i_idev)
811 			in6_dev_hold(rt->rt6i_idev);
812 		rt->rt6i_expires = 0;
813 
814 		ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
815 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
816 		rt->rt6i_metric = 0;
817 
818 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
819 #ifdef CONFIG_IPV6_SUBTREES
820 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
821 #endif
822 
823 		dst_free(new);
824 	}
825 
826 	dst_release(*dstp);
827 	*dstp = new;
828 	return (new ? 0 : -ENOMEM);
829 }
830 EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
831 
832 /*
833  *	Destination cache support functions
834  */
835 
836 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
837 {
838 	struct rt6_info *rt;
839 
840 	rt = (struct rt6_info *) dst;
841 
842 	if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
843 		return dst;
844 
845 	return NULL;
846 }
847 
848 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
849 {
850 	struct rt6_info *rt = (struct rt6_info *) dst;
851 
852 	if (rt) {
853 		if (rt->rt6i_flags & RTF_CACHE)
854 			ip6_del_rt(rt);
855 		else
856 			dst_release(dst);
857 	}
858 	return NULL;
859 }
860 
861 static void ip6_link_failure(struct sk_buff *skb)
862 {
863 	struct rt6_info *rt;
864 
865 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
866 
867 	rt = (struct rt6_info *) skb->dst;
868 	if (rt) {
869 		if (rt->rt6i_flags&RTF_CACHE) {
870 			dst_set_expires(&rt->u.dst, 0);
871 			rt->rt6i_flags |= RTF_EXPIRES;
872 		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
873 			rt->rt6i_node->fn_sernum = -1;
874 	}
875 }
876 
877 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
878 {
879 	struct rt6_info *rt6 = (struct rt6_info*)dst;
880 
881 	if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
882 		rt6->rt6i_flags |= RTF_MODIFIED;
883 		if (mtu < IPV6_MIN_MTU) {
884 			mtu = IPV6_MIN_MTU;
885 			dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
886 		}
887 		dst->metrics[RTAX_MTU-1] = mtu;
888 		call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
889 	}
890 }
891 
892 static int ipv6_get_mtu(struct net_device *dev);
893 
894 static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu)
895 {
896 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
897 
898 	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
899 		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
900 
901 	/*
902 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
903 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
904 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
905 	 * rely only on pmtu discovery"
906 	 */
907 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
908 		mtu = IPV6_MAXPLEN;
909 	return mtu;
910 }
911 
912 static struct dst_entry *icmp6_dst_gc_list;
913 static DEFINE_SPINLOCK(icmp6_dst_lock);
914 
915 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
916 				  struct neighbour *neigh,
917 				  struct in6_addr *addr)
918 {
919 	struct rt6_info *rt;
920 	struct inet6_dev *idev = in6_dev_get(dev);
921 	struct net *net = dev->nd_net;
922 
923 	if (unlikely(idev == NULL))
924 		return NULL;
925 
926 	rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops);
927 	if (unlikely(rt == NULL)) {
928 		in6_dev_put(idev);
929 		goto out;
930 	}
931 
932 	dev_hold(dev);
933 	if (neigh)
934 		neigh_hold(neigh);
935 	else
936 		neigh = ndisc_get_neigh(dev, addr);
937 
938 	rt->rt6i_dev	  = dev;
939 	rt->rt6i_idev     = idev;
940 	rt->rt6i_nexthop  = neigh;
941 	atomic_set(&rt->u.dst.__refcnt, 1);
942 	rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
943 	rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
944 	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
945 	rt->u.dst.output  = ip6_output;
946 
947 #if 0	/* there's no chance to use these for ndisc */
948 	rt->u.dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
949 				? DST_HOST
950 				: 0;
951 	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
952 	rt->rt6i_dst.plen = 128;
953 #endif
954 
955 	spin_lock_bh(&icmp6_dst_lock);
956 	rt->u.dst.next = icmp6_dst_gc_list;
957 	icmp6_dst_gc_list = &rt->u.dst;
958 	spin_unlock_bh(&icmp6_dst_lock);
959 
960 	fib6_force_start_gc(net);
961 
962 out:
963 	return &rt->u.dst;
964 }
965 
966 int icmp6_dst_gc(int *more)
967 {
968 	struct dst_entry *dst, *next, **pprev;
969 	int freed;
970 
971 	next = NULL;
972 	freed = 0;
973 
974 	spin_lock_bh(&icmp6_dst_lock);
975 	pprev = &icmp6_dst_gc_list;
976 
977 	while ((dst = *pprev) != NULL) {
978 		if (!atomic_read(&dst->__refcnt)) {
979 			*pprev = dst->next;
980 			dst_free(dst);
981 			freed++;
982 		} else {
983 			pprev = &dst->next;
984 			(*more)++;
985 		}
986 	}
987 
988 	spin_unlock_bh(&icmp6_dst_lock);
989 
990 	return freed;
991 }
992 
993 static int ip6_dst_gc(struct dst_ops *ops)
994 {
995 	struct net *net = ops->dst_net;
996 	unsigned long now = jiffies;
997 
998 	if (time_after(net->ipv6.ip6_rt_last_gc + net->ipv6.sysctl.ip6_rt_gc_min_interval, now) &&
999 	    atomic_read(&net->ipv6.ip6_dst_ops->entries) <= net->ipv6.sysctl.ip6_rt_max_size)
1000 		goto out;
1001 
1002 	net->ipv6.ip6_rt_gc_expire++;
1003 	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1004 	net->ipv6.ip6_rt_last_gc = now;
1005 	if (atomic_read(&net->ipv6.ip6_dst_ops->entries) < net->ipv6.ip6_dst_ops->gc_thresh)
1006 		net->ipv6.ip6_rt_gc_expire = net->ipv6.sysctl.ip6_rt_gc_timeout>>1;
1007 
1008 out:
1009 	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>net->ipv6.sysctl.ip6_rt_gc_elasticity;
1010 	return (atomic_read(&net->ipv6.ip6_dst_ops->entries) > net->ipv6.sysctl.ip6_rt_max_size);
1011 }
1012 
1013 /* Clean host part of a prefix. Not necessary in radix tree,
1014    but results in cleaner routing tables.
1015 
1016    Remove it only when all the things will work!
1017  */
1018 
1019 static int ipv6_get_mtu(struct net_device *dev)
1020 {
1021 	int mtu = IPV6_MIN_MTU;
1022 	struct inet6_dev *idev;
1023 
1024 	idev = in6_dev_get(dev);
1025 	if (idev) {
1026 		mtu = idev->cnf.mtu6;
1027 		in6_dev_put(idev);
1028 	}
1029 	return mtu;
1030 }
1031 
1032 int ipv6_get_hoplimit(struct net_device *dev)
1033 {
1034 	int hoplimit = ipv6_devconf.hop_limit;
1035 	struct inet6_dev *idev;
1036 
1037 	idev = in6_dev_get(dev);
1038 	if (idev) {
1039 		hoplimit = idev->cnf.hop_limit;
1040 		in6_dev_put(idev);
1041 	}
1042 	return hoplimit;
1043 }
1044 
1045 /*
1046  *
1047  */
1048 
1049 int ip6_route_add(struct fib6_config *cfg)
1050 {
1051 	int err;
1052 	struct net *net = cfg->fc_nlinfo.nl_net;
1053 	struct rt6_info *rt = NULL;
1054 	struct net_device *dev = NULL;
1055 	struct inet6_dev *idev = NULL;
1056 	struct fib6_table *table;
1057 	int addr_type;
1058 
1059 	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1060 		return -EINVAL;
1061 #ifndef CONFIG_IPV6_SUBTREES
1062 	if (cfg->fc_src_len)
1063 		return -EINVAL;
1064 #endif
1065 	if (cfg->fc_ifindex) {
1066 		err = -ENODEV;
1067 		dev = dev_get_by_index(net, cfg->fc_ifindex);
1068 		if (!dev)
1069 			goto out;
1070 		idev = in6_dev_get(dev);
1071 		if (!idev)
1072 			goto out;
1073 	}
1074 
1075 	if (cfg->fc_metric == 0)
1076 		cfg->fc_metric = IP6_RT_PRIO_USER;
1077 
1078 	table = fib6_new_table(net, cfg->fc_table);
1079 	if (table == NULL) {
1080 		err = -ENOBUFS;
1081 		goto out;
1082 	}
1083 
1084 	rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops);
1085 
1086 	if (rt == NULL) {
1087 		err = -ENOMEM;
1088 		goto out;
1089 	}
1090 
1091 	rt->u.dst.obsolete = -1;
1092 	rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires);
1093 
1094 	if (cfg->fc_protocol == RTPROT_UNSPEC)
1095 		cfg->fc_protocol = RTPROT_BOOT;
1096 	rt->rt6i_protocol = cfg->fc_protocol;
1097 
1098 	addr_type = ipv6_addr_type(&cfg->fc_dst);
1099 
1100 	if (addr_type & IPV6_ADDR_MULTICAST)
1101 		rt->u.dst.input = ip6_mc_input;
1102 	else
1103 		rt->u.dst.input = ip6_forward;
1104 
1105 	rt->u.dst.output = ip6_output;
1106 
1107 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1108 	rt->rt6i_dst.plen = cfg->fc_dst_len;
1109 	if (rt->rt6i_dst.plen == 128)
1110 	       rt->u.dst.flags = DST_HOST;
1111 
1112 #ifdef CONFIG_IPV6_SUBTREES
1113 	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1114 	rt->rt6i_src.plen = cfg->fc_src_len;
1115 #endif
1116 
1117 	rt->rt6i_metric = cfg->fc_metric;
1118 
1119 	/* We cannot add true routes via loopback here,
1120 	   they would result in kernel looping; promote them to reject routes
1121 	 */
1122 	if ((cfg->fc_flags & RTF_REJECT) ||
1123 	    (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1124 		/* hold loopback dev/idev if we haven't done so. */
1125 		if (dev != net->loopback_dev) {
1126 			if (dev) {
1127 				dev_put(dev);
1128 				in6_dev_put(idev);
1129 			}
1130 			dev = net->loopback_dev;
1131 			dev_hold(dev);
1132 			idev = in6_dev_get(dev);
1133 			if (!idev) {
1134 				err = -ENODEV;
1135 				goto out;
1136 			}
1137 		}
1138 		rt->u.dst.output = ip6_pkt_discard_out;
1139 		rt->u.dst.input = ip6_pkt_discard;
1140 		rt->u.dst.error = -ENETUNREACH;
1141 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1142 		goto install_route;
1143 	}
1144 
1145 	if (cfg->fc_flags & RTF_GATEWAY) {
1146 		struct in6_addr *gw_addr;
1147 		int gwa_type;
1148 
1149 		gw_addr = &cfg->fc_gateway;
1150 		ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1151 		gwa_type = ipv6_addr_type(gw_addr);
1152 
1153 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1154 			struct rt6_info *grt;
1155 
1156 			/* IPv6 strictly inhibits using not link-local
1157 			   addresses as nexthop address.
1158 			   Otherwise, router will not able to send redirects.
1159 			   It is very good, but in some (rare!) circumstances
1160 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
1161 			   some exceptions. --ANK
1162 			 */
1163 			err = -EINVAL;
1164 			if (!(gwa_type&IPV6_ADDR_UNICAST))
1165 				goto out;
1166 
1167 			grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1168 
1169 			err = -EHOSTUNREACH;
1170 			if (grt == NULL)
1171 				goto out;
1172 			if (dev) {
1173 				if (dev != grt->rt6i_dev) {
1174 					dst_release(&grt->u.dst);
1175 					goto out;
1176 				}
1177 			} else {
1178 				dev = grt->rt6i_dev;
1179 				idev = grt->rt6i_idev;
1180 				dev_hold(dev);
1181 				in6_dev_hold(grt->rt6i_idev);
1182 			}
1183 			if (!(grt->rt6i_flags&RTF_GATEWAY))
1184 				err = 0;
1185 			dst_release(&grt->u.dst);
1186 
1187 			if (err)
1188 				goto out;
1189 		}
1190 		err = -EINVAL;
1191 		if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1192 			goto out;
1193 	}
1194 
1195 	err = -ENODEV;
1196 	if (dev == NULL)
1197 		goto out;
1198 
1199 	if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1200 		rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1201 		if (IS_ERR(rt->rt6i_nexthop)) {
1202 			err = PTR_ERR(rt->rt6i_nexthop);
1203 			rt->rt6i_nexthop = NULL;
1204 			goto out;
1205 		}
1206 	}
1207 
1208 	rt->rt6i_flags = cfg->fc_flags;
1209 
1210 install_route:
1211 	if (cfg->fc_mx) {
1212 		struct nlattr *nla;
1213 		int remaining;
1214 
1215 		nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1216 			int type = nla_type(nla);
1217 
1218 			if (type) {
1219 				if (type > RTAX_MAX) {
1220 					err = -EINVAL;
1221 					goto out;
1222 				}
1223 
1224 				rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
1225 			}
1226 		}
1227 	}
1228 
1229 	if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1230 		rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1231 	if (!rt->u.dst.metrics[RTAX_MTU-1])
1232 		rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1233 	if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1234 		rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
1235 	rt->u.dst.dev = dev;
1236 	rt->rt6i_idev = idev;
1237 	rt->rt6i_table = table;
1238 
1239 	cfg->fc_nlinfo.nl_net = dev->nd_net;
1240 
1241 	return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1242 
1243 out:
1244 	if (dev)
1245 		dev_put(dev);
1246 	if (idev)
1247 		in6_dev_put(idev);
1248 	if (rt)
1249 		dst_free(&rt->u.dst);
1250 	return err;
1251 }
1252 
1253 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1254 {
1255 	int err;
1256 	struct fib6_table *table;
1257 	struct net *net = rt->rt6i_dev->nd_net;
1258 
1259 	if (rt == net->ipv6.ip6_null_entry)
1260 		return -ENOENT;
1261 
1262 	table = rt->rt6i_table;
1263 	write_lock_bh(&table->tb6_lock);
1264 
1265 	err = fib6_del(rt, info);
1266 	dst_release(&rt->u.dst);
1267 
1268 	write_unlock_bh(&table->tb6_lock);
1269 
1270 	return err;
1271 }
1272 
1273 int ip6_del_rt(struct rt6_info *rt)
1274 {
1275 	struct nl_info info = {
1276 		.nl_net = rt->rt6i_dev->nd_net,
1277 	};
1278 	return __ip6_del_rt(rt, &info);
1279 }
1280 
1281 static int ip6_route_del(struct fib6_config *cfg)
1282 {
1283 	struct fib6_table *table;
1284 	struct fib6_node *fn;
1285 	struct rt6_info *rt;
1286 	int err = -ESRCH;
1287 
1288 	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1289 	if (table == NULL)
1290 		return err;
1291 
1292 	read_lock_bh(&table->tb6_lock);
1293 
1294 	fn = fib6_locate(&table->tb6_root,
1295 			 &cfg->fc_dst, cfg->fc_dst_len,
1296 			 &cfg->fc_src, cfg->fc_src_len);
1297 
1298 	if (fn) {
1299 		for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1300 			if (cfg->fc_ifindex &&
1301 			    (rt->rt6i_dev == NULL ||
1302 			     rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1303 				continue;
1304 			if (cfg->fc_flags & RTF_GATEWAY &&
1305 			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1306 				continue;
1307 			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1308 				continue;
1309 			dst_hold(&rt->u.dst);
1310 			read_unlock_bh(&table->tb6_lock);
1311 
1312 			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1313 		}
1314 	}
1315 	read_unlock_bh(&table->tb6_lock);
1316 
1317 	return err;
1318 }
1319 
1320 /*
1321  *	Handle redirects
1322  */
1323 struct ip6rd_flowi {
1324 	struct flowi fl;
1325 	struct in6_addr gateway;
1326 };
1327 
1328 static struct rt6_info *__ip6_route_redirect(struct net *net,
1329 					     struct fib6_table *table,
1330 					     struct flowi *fl,
1331 					     int flags)
1332 {
1333 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1334 	struct rt6_info *rt;
1335 	struct fib6_node *fn;
1336 
1337 	/*
1338 	 * Get the "current" route for this destination and
1339 	 * check if the redirect has come from approriate router.
1340 	 *
1341 	 * RFC 2461 specifies that redirects should only be
1342 	 * accepted if they come from the nexthop to the target.
1343 	 * Due to the way the routes are chosen, this notion
1344 	 * is a bit fuzzy and one might need to check all possible
1345 	 * routes.
1346 	 */
1347 
1348 	read_lock_bh(&table->tb6_lock);
1349 	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1350 restart:
1351 	for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1352 		/*
1353 		 * Current route is on-link; redirect is always invalid.
1354 		 *
1355 		 * Seems, previous statement is not true. It could
1356 		 * be node, which looks for us as on-link (f.e. proxy ndisc)
1357 		 * But then router serving it might decide, that we should
1358 		 * know truth 8)8) --ANK (980726).
1359 		 */
1360 		if (rt6_check_expired(rt))
1361 			continue;
1362 		if (!(rt->rt6i_flags & RTF_GATEWAY))
1363 			continue;
1364 		if (fl->oif != rt->rt6i_dev->ifindex)
1365 			continue;
1366 		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1367 			continue;
1368 		break;
1369 	}
1370 
1371 	if (!rt)
1372 		rt = net->ipv6.ip6_null_entry;
1373 	BACKTRACK(net, &fl->fl6_src);
1374 out:
1375 	dst_hold(&rt->u.dst);
1376 
1377 	read_unlock_bh(&table->tb6_lock);
1378 
1379 	return rt;
1380 };
1381 
1382 static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1383 					   struct in6_addr *src,
1384 					   struct in6_addr *gateway,
1385 					   struct net_device *dev)
1386 {
1387 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1388 	struct net *net = dev->nd_net;
1389 	struct ip6rd_flowi rdfl = {
1390 		.fl = {
1391 			.oif = dev->ifindex,
1392 			.nl_u = {
1393 				.ip6_u = {
1394 					.daddr = *dest,
1395 					.saddr = *src,
1396 				},
1397 			},
1398 		},
1399 		.gateway = *gateway,
1400 	};
1401 
1402 	if (rt6_need_strict(dest))
1403 		flags |= RT6_LOOKUP_F_IFACE;
1404 
1405 	return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl,
1406 						   flags, __ip6_route_redirect);
1407 }
1408 
1409 void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1410 		  struct in6_addr *saddr,
1411 		  struct neighbour *neigh, u8 *lladdr, int on_link)
1412 {
1413 	struct rt6_info *rt, *nrt = NULL;
1414 	struct netevent_redirect netevent;
1415 	struct net *net = neigh->dev->nd_net;
1416 
1417 	rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1418 
1419 	if (rt == net->ipv6.ip6_null_entry) {
1420 		if (net_ratelimit())
1421 			printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1422 			       "for redirect target\n");
1423 		goto out;
1424 	}
1425 
1426 	/*
1427 	 *	We have finally decided to accept it.
1428 	 */
1429 
1430 	neigh_update(neigh, lladdr, NUD_STALE,
1431 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
1432 		     NEIGH_UPDATE_F_OVERRIDE|
1433 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1434 				     NEIGH_UPDATE_F_ISROUTER))
1435 		     );
1436 
1437 	/*
1438 	 * Redirect received -> path was valid.
1439 	 * Look, redirects are sent only in response to data packets,
1440 	 * so that this nexthop apparently is reachable. --ANK
1441 	 */
1442 	dst_confirm(&rt->u.dst);
1443 
1444 	/* Duplicate redirect: silently ignore. */
1445 	if (neigh == rt->u.dst.neighbour)
1446 		goto out;
1447 
1448 	nrt = ip6_rt_copy(rt);
1449 	if (nrt == NULL)
1450 		goto out;
1451 
1452 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1453 	if (on_link)
1454 		nrt->rt6i_flags &= ~RTF_GATEWAY;
1455 
1456 	ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1457 	nrt->rt6i_dst.plen = 128;
1458 	nrt->u.dst.flags |= DST_HOST;
1459 
1460 	ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1461 	nrt->rt6i_nexthop = neigh_clone(neigh);
1462 	/* Reset pmtu, it may be better */
1463 	nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1464 	nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(neigh->dev->nd_net,
1465 							dst_mtu(&nrt->u.dst));
1466 
1467 	if (ip6_ins_rt(nrt))
1468 		goto out;
1469 
1470 	netevent.old = &rt->u.dst;
1471 	netevent.new = &nrt->u.dst;
1472 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1473 
1474 	if (rt->rt6i_flags&RTF_CACHE) {
1475 		ip6_del_rt(rt);
1476 		return;
1477 	}
1478 
1479 out:
1480 	dst_release(&rt->u.dst);
1481 	return;
1482 }
1483 
1484 /*
1485  *	Handle ICMP "packet too big" messages
1486  *	i.e. Path MTU discovery
1487  */
1488 
1489 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1490 			struct net_device *dev, u32 pmtu)
1491 {
1492 	struct rt6_info *rt, *nrt;
1493 	struct net *net = dev->nd_net;
1494 	int allfrag = 0;
1495 
1496 	rt = rt6_lookup(net, daddr, saddr, dev->ifindex, 0);
1497 	if (rt == NULL)
1498 		return;
1499 
1500 	if (pmtu >= dst_mtu(&rt->u.dst))
1501 		goto out;
1502 
1503 	if (pmtu < IPV6_MIN_MTU) {
1504 		/*
1505 		 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1506 		 * MTU (1280) and a fragment header should always be included
1507 		 * after a node receiving Too Big message reporting PMTU is
1508 		 * less than the IPv6 Minimum Link MTU.
1509 		 */
1510 		pmtu = IPV6_MIN_MTU;
1511 		allfrag = 1;
1512 	}
1513 
1514 	/* New mtu received -> path was valid.
1515 	   They are sent only in response to data packets,
1516 	   so that this nexthop apparently is reachable. --ANK
1517 	 */
1518 	dst_confirm(&rt->u.dst);
1519 
1520 	/* Host route. If it is static, it would be better
1521 	   not to override it, but add new one, so that
1522 	   when cache entry will expire old pmtu
1523 	   would return automatically.
1524 	 */
1525 	if (rt->rt6i_flags & RTF_CACHE) {
1526 		rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1527 		if (allfrag)
1528 			rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1529 		dst_set_expires(&rt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1530 		rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1531 		goto out;
1532 	}
1533 
1534 	/* Network route.
1535 	   Two cases are possible:
1536 	   1. It is connected route. Action: COW
1537 	   2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1538 	 */
1539 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1540 		nrt = rt6_alloc_cow(rt, daddr, saddr);
1541 	else
1542 		nrt = rt6_alloc_clone(rt, daddr);
1543 
1544 	if (nrt) {
1545 		nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1546 		if (allfrag)
1547 			nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1548 
1549 		/* According to RFC 1981, detecting PMTU increase shouldn't be
1550 		 * happened within 5 mins, the recommended timer is 10 mins.
1551 		 * Here this route expiration time is set to ip6_rt_mtu_expires
1552 		 * which is 10 mins. After 10 mins the decreased pmtu is expired
1553 		 * and detecting PMTU increase will be automatically happened.
1554 		 */
1555 		dst_set_expires(&nrt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1556 		nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1557 
1558 		ip6_ins_rt(nrt);
1559 	}
1560 out:
1561 	dst_release(&rt->u.dst);
1562 }
1563 
1564 /*
1565  *	Misc support functions
1566  */
1567 
1568 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1569 {
1570 	struct net *net = ort->rt6i_dev->nd_net;
1571 	struct rt6_info *rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops);
1572 
1573 	if (rt) {
1574 		rt->u.dst.input = ort->u.dst.input;
1575 		rt->u.dst.output = ort->u.dst.output;
1576 
1577 		memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1578 		rt->u.dst.error = ort->u.dst.error;
1579 		rt->u.dst.dev = ort->u.dst.dev;
1580 		if (rt->u.dst.dev)
1581 			dev_hold(rt->u.dst.dev);
1582 		rt->rt6i_idev = ort->rt6i_idev;
1583 		if (rt->rt6i_idev)
1584 			in6_dev_hold(rt->rt6i_idev);
1585 		rt->u.dst.lastuse = jiffies;
1586 		rt->rt6i_expires = 0;
1587 
1588 		ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1589 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1590 		rt->rt6i_metric = 0;
1591 
1592 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1593 #ifdef CONFIG_IPV6_SUBTREES
1594 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1595 #endif
1596 		rt->rt6i_table = ort->rt6i_table;
1597 	}
1598 	return rt;
1599 }
1600 
1601 #ifdef CONFIG_IPV6_ROUTE_INFO
1602 static struct rt6_info *rt6_get_route_info(struct net *net,
1603 					   struct in6_addr *prefix, int prefixlen,
1604 					   struct in6_addr *gwaddr, int ifindex)
1605 {
1606 	struct fib6_node *fn;
1607 	struct rt6_info *rt = NULL;
1608 	struct fib6_table *table;
1609 
1610 	table = fib6_get_table(net, RT6_TABLE_INFO);
1611 	if (table == NULL)
1612 		return NULL;
1613 
1614 	write_lock_bh(&table->tb6_lock);
1615 	fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1616 	if (!fn)
1617 		goto out;
1618 
1619 	for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1620 		if (rt->rt6i_dev->ifindex != ifindex)
1621 			continue;
1622 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1623 			continue;
1624 		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1625 			continue;
1626 		dst_hold(&rt->u.dst);
1627 		break;
1628 	}
1629 out:
1630 	write_unlock_bh(&table->tb6_lock);
1631 	return rt;
1632 }
1633 
1634 static struct rt6_info *rt6_add_route_info(struct net *net,
1635 					   struct in6_addr *prefix, int prefixlen,
1636 					   struct in6_addr *gwaddr, int ifindex,
1637 					   unsigned pref)
1638 {
1639 	struct fib6_config cfg = {
1640 		.fc_table	= RT6_TABLE_INFO,
1641 		.fc_metric	= IP6_RT_PRIO_USER,
1642 		.fc_ifindex	= ifindex,
1643 		.fc_dst_len	= prefixlen,
1644 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1645 				  RTF_UP | RTF_PREF(pref),
1646 		.fc_nlinfo.pid = 0,
1647 		.fc_nlinfo.nlh = NULL,
1648 		.fc_nlinfo.nl_net = net,
1649 	};
1650 
1651 	ipv6_addr_copy(&cfg.fc_dst, prefix);
1652 	ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1653 
1654 	/* We should treat it as a default route if prefix length is 0. */
1655 	if (!prefixlen)
1656 		cfg.fc_flags |= RTF_DEFAULT;
1657 
1658 	ip6_route_add(&cfg);
1659 
1660 	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1661 }
1662 #endif
1663 
1664 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1665 {
1666 	struct rt6_info *rt;
1667 	struct fib6_table *table;
1668 
1669 	table = fib6_get_table(dev->nd_net, RT6_TABLE_DFLT);
1670 	if (table == NULL)
1671 		return NULL;
1672 
1673 	write_lock_bh(&table->tb6_lock);
1674 	for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
1675 		if (dev == rt->rt6i_dev &&
1676 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1677 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
1678 			break;
1679 	}
1680 	if (rt)
1681 		dst_hold(&rt->u.dst);
1682 	write_unlock_bh(&table->tb6_lock);
1683 	return rt;
1684 }
1685 
1686 EXPORT_SYMBOL(rt6_get_dflt_router);
1687 
1688 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1689 				     struct net_device *dev,
1690 				     unsigned int pref)
1691 {
1692 	struct fib6_config cfg = {
1693 		.fc_table	= RT6_TABLE_DFLT,
1694 		.fc_metric	= IP6_RT_PRIO_USER,
1695 		.fc_ifindex	= dev->ifindex,
1696 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1697 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1698 		.fc_nlinfo.pid = 0,
1699 		.fc_nlinfo.nlh = NULL,
1700 		.fc_nlinfo.nl_net = dev->nd_net,
1701 	};
1702 
1703 	ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1704 
1705 	ip6_route_add(&cfg);
1706 
1707 	return rt6_get_dflt_router(gwaddr, dev);
1708 }
1709 
1710 void rt6_purge_dflt_routers(struct net *net)
1711 {
1712 	struct rt6_info *rt;
1713 	struct fib6_table *table;
1714 
1715 	/* NOTE: Keep consistent with rt6_get_dflt_router */
1716 	table = fib6_get_table(net, RT6_TABLE_DFLT);
1717 	if (table == NULL)
1718 		return;
1719 
1720 restart:
1721 	read_lock_bh(&table->tb6_lock);
1722 	for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
1723 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1724 			dst_hold(&rt->u.dst);
1725 			read_unlock_bh(&table->tb6_lock);
1726 			ip6_del_rt(rt);
1727 			goto restart;
1728 		}
1729 	}
1730 	read_unlock_bh(&table->tb6_lock);
1731 }
1732 
1733 static void rtmsg_to_fib6_config(struct net *net,
1734 				 struct in6_rtmsg *rtmsg,
1735 				 struct fib6_config *cfg)
1736 {
1737 	memset(cfg, 0, sizeof(*cfg));
1738 
1739 	cfg->fc_table = RT6_TABLE_MAIN;
1740 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1741 	cfg->fc_metric = rtmsg->rtmsg_metric;
1742 	cfg->fc_expires = rtmsg->rtmsg_info;
1743 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1744 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
1745 	cfg->fc_flags = rtmsg->rtmsg_flags;
1746 
1747 	cfg->fc_nlinfo.nl_net = net;
1748 
1749 	ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1750 	ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1751 	ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1752 }
1753 
1754 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1755 {
1756 	struct fib6_config cfg;
1757 	struct in6_rtmsg rtmsg;
1758 	int err;
1759 
1760 	switch(cmd) {
1761 	case SIOCADDRT:		/* Add a route */
1762 	case SIOCDELRT:		/* Delete a route */
1763 		if (!capable(CAP_NET_ADMIN))
1764 			return -EPERM;
1765 		err = copy_from_user(&rtmsg, arg,
1766 				     sizeof(struct in6_rtmsg));
1767 		if (err)
1768 			return -EFAULT;
1769 
1770 		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1771 
1772 		rtnl_lock();
1773 		switch (cmd) {
1774 		case SIOCADDRT:
1775 			err = ip6_route_add(&cfg);
1776 			break;
1777 		case SIOCDELRT:
1778 			err = ip6_route_del(&cfg);
1779 			break;
1780 		default:
1781 			err = -EINVAL;
1782 		}
1783 		rtnl_unlock();
1784 
1785 		return err;
1786 	}
1787 
1788 	return -EINVAL;
1789 }
1790 
1791 /*
1792  *	Drop the packet on the floor
1793  */
1794 
1795 static int ip6_pkt_drop(struct sk_buff *skb, int code, int ipstats_mib_noroutes)
1796 {
1797 	int type;
1798 	switch (ipstats_mib_noroutes) {
1799 	case IPSTATS_MIB_INNOROUTES:
1800 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
1801 		if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) {
1802 			IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
1803 			break;
1804 		}
1805 		/* FALLTHROUGH */
1806 	case IPSTATS_MIB_OUTNOROUTES:
1807 		IP6_INC_STATS(ip6_dst_idev(skb->dst), ipstats_mib_noroutes);
1808 		break;
1809 	}
1810 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
1811 	kfree_skb(skb);
1812 	return 0;
1813 }
1814 
1815 static int ip6_pkt_discard(struct sk_buff *skb)
1816 {
1817 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
1818 }
1819 
1820 static int ip6_pkt_discard_out(struct sk_buff *skb)
1821 {
1822 	skb->dev = skb->dst->dev;
1823 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1824 }
1825 
1826 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
1827 
1828 static int ip6_pkt_prohibit(struct sk_buff *skb)
1829 {
1830 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
1831 }
1832 
1833 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1834 {
1835 	skb->dev = skb->dst->dev;
1836 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
1837 }
1838 
1839 #endif
1840 
1841 /*
1842  *	Allocate a dst for local (unicast / anycast) address.
1843  */
1844 
1845 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1846 				    const struct in6_addr *addr,
1847 				    int anycast)
1848 {
1849 	struct net *net = idev->dev->nd_net;
1850 	struct rt6_info *rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops);
1851 
1852 	if (rt == NULL)
1853 		return ERR_PTR(-ENOMEM);
1854 
1855 	dev_hold(net->loopback_dev);
1856 	in6_dev_hold(idev);
1857 
1858 	rt->u.dst.flags = DST_HOST;
1859 	rt->u.dst.input = ip6_input;
1860 	rt->u.dst.output = ip6_output;
1861 	rt->rt6i_dev = net->loopback_dev;
1862 	rt->rt6i_idev = idev;
1863 	rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1864 	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
1865 	rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1866 	rt->u.dst.obsolete = -1;
1867 
1868 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1869 	if (anycast)
1870 		rt->rt6i_flags |= RTF_ANYCAST;
1871 	else
1872 		rt->rt6i_flags |= RTF_LOCAL;
1873 	rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1874 	if (rt->rt6i_nexthop == NULL) {
1875 		dst_free(&rt->u.dst);
1876 		return ERR_PTR(-ENOMEM);
1877 	}
1878 
1879 	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1880 	rt->rt6i_dst.plen = 128;
1881 	rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1882 
1883 	atomic_set(&rt->u.dst.__refcnt, 1);
1884 
1885 	return rt;
1886 }
1887 
1888 struct arg_dev_net {
1889 	struct net_device *dev;
1890 	struct net *net;
1891 };
1892 
1893 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1894 {
1895 	struct net_device *dev = ((struct arg_dev_net *)arg)->dev;
1896 	struct net *net = ((struct arg_dev_net *)arg)->net;
1897 
1898 	if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
1899 	    rt != net->ipv6.ip6_null_entry) {
1900 		RT6_TRACE("deleted by ifdown %p\n", rt);
1901 		return -1;
1902 	}
1903 	return 0;
1904 }
1905 
1906 void rt6_ifdown(struct net *net, struct net_device *dev)
1907 {
1908 	struct arg_dev_net adn = {
1909 		.dev = dev,
1910 		.net = net,
1911 	};
1912 
1913 	fib6_clean_all(net, fib6_ifdown, 0, &adn);
1914 }
1915 
1916 struct rt6_mtu_change_arg
1917 {
1918 	struct net_device *dev;
1919 	unsigned mtu;
1920 };
1921 
1922 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1923 {
1924 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1925 	struct inet6_dev *idev;
1926 	struct net *net = arg->dev->nd_net;
1927 
1928 	/* In IPv6 pmtu discovery is not optional,
1929 	   so that RTAX_MTU lock cannot disable it.
1930 	   We still use this lock to block changes
1931 	   caused by addrconf/ndisc.
1932 	*/
1933 
1934 	idev = __in6_dev_get(arg->dev);
1935 	if (idev == NULL)
1936 		return 0;
1937 
1938 	/* For administrative MTU increase, there is no way to discover
1939 	   IPv6 PMTU increase, so PMTU increase should be updated here.
1940 	   Since RFC 1981 doesn't include administrative MTU increase
1941 	   update PMTU increase is a MUST. (i.e. jumbo frame)
1942 	 */
1943 	/*
1944 	   If new MTU is less than route PMTU, this new MTU will be the
1945 	   lowest MTU in the path, update the route PMTU to reflect PMTU
1946 	   decreases; if new MTU is greater than route PMTU, and the
1947 	   old MTU is the lowest MTU in the path, update the route PMTU
1948 	   to reflect the increase. In this case if the other nodes' MTU
1949 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
1950 	   PMTU discouvery.
1951 	 */
1952 	if (rt->rt6i_dev == arg->dev &&
1953 	    !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1954 	    (dst_mtu(&rt->u.dst) >= arg->mtu ||
1955 	     (dst_mtu(&rt->u.dst) < arg->mtu &&
1956 	      dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) {
1957 		rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1958 		rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu);
1959 	}
1960 	return 0;
1961 }
1962 
1963 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1964 {
1965 	struct rt6_mtu_change_arg arg = {
1966 		.dev = dev,
1967 		.mtu = mtu,
1968 	};
1969 
1970 	fib6_clean_all(dev->nd_net, rt6_mtu_change_route, 0, &arg);
1971 }
1972 
1973 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
1974 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
1975 	[RTA_OIF]               = { .type = NLA_U32 },
1976 	[RTA_IIF]		= { .type = NLA_U32 },
1977 	[RTA_PRIORITY]          = { .type = NLA_U32 },
1978 	[RTA_METRICS]           = { .type = NLA_NESTED },
1979 };
1980 
1981 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
1982 			      struct fib6_config *cfg)
1983 {
1984 	struct rtmsg *rtm;
1985 	struct nlattr *tb[RTA_MAX+1];
1986 	int err;
1987 
1988 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
1989 	if (err < 0)
1990 		goto errout;
1991 
1992 	err = -EINVAL;
1993 	rtm = nlmsg_data(nlh);
1994 	memset(cfg, 0, sizeof(*cfg));
1995 
1996 	cfg->fc_table = rtm->rtm_table;
1997 	cfg->fc_dst_len = rtm->rtm_dst_len;
1998 	cfg->fc_src_len = rtm->rtm_src_len;
1999 	cfg->fc_flags = RTF_UP;
2000 	cfg->fc_protocol = rtm->rtm_protocol;
2001 
2002 	if (rtm->rtm_type == RTN_UNREACHABLE)
2003 		cfg->fc_flags |= RTF_REJECT;
2004 
2005 	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2006 	cfg->fc_nlinfo.nlh = nlh;
2007 	cfg->fc_nlinfo.nl_net = skb->sk->sk_net;
2008 
2009 	if (tb[RTA_GATEWAY]) {
2010 		nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2011 		cfg->fc_flags |= RTF_GATEWAY;
2012 	}
2013 
2014 	if (tb[RTA_DST]) {
2015 		int plen = (rtm->rtm_dst_len + 7) >> 3;
2016 
2017 		if (nla_len(tb[RTA_DST]) < plen)
2018 			goto errout;
2019 
2020 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2021 	}
2022 
2023 	if (tb[RTA_SRC]) {
2024 		int plen = (rtm->rtm_src_len + 7) >> 3;
2025 
2026 		if (nla_len(tb[RTA_SRC]) < plen)
2027 			goto errout;
2028 
2029 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2030 	}
2031 
2032 	if (tb[RTA_OIF])
2033 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2034 
2035 	if (tb[RTA_PRIORITY])
2036 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2037 
2038 	if (tb[RTA_METRICS]) {
2039 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2040 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2041 	}
2042 
2043 	if (tb[RTA_TABLE])
2044 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2045 
2046 	err = 0;
2047 errout:
2048 	return err;
2049 }
2050 
2051 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2052 {
2053 	struct fib6_config cfg;
2054 	int err;
2055 
2056 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2057 	if (err < 0)
2058 		return err;
2059 
2060 	return ip6_route_del(&cfg);
2061 }
2062 
2063 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2064 {
2065 	struct fib6_config cfg;
2066 	int err;
2067 
2068 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2069 	if (err < 0)
2070 		return err;
2071 
2072 	return ip6_route_add(&cfg);
2073 }
2074 
2075 static inline size_t rt6_nlmsg_size(void)
2076 {
2077 	return NLMSG_ALIGN(sizeof(struct rtmsg))
2078 	       + nla_total_size(16) /* RTA_SRC */
2079 	       + nla_total_size(16) /* RTA_DST */
2080 	       + nla_total_size(16) /* RTA_GATEWAY */
2081 	       + nla_total_size(16) /* RTA_PREFSRC */
2082 	       + nla_total_size(4) /* RTA_TABLE */
2083 	       + nla_total_size(4) /* RTA_IIF */
2084 	       + nla_total_size(4) /* RTA_OIF */
2085 	       + nla_total_size(4) /* RTA_PRIORITY */
2086 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2087 	       + nla_total_size(sizeof(struct rta_cacheinfo));
2088 }
2089 
2090 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
2091 			 struct in6_addr *dst, struct in6_addr *src,
2092 			 int iif, int type, u32 pid, u32 seq,
2093 			 int prefix, unsigned int flags)
2094 {
2095 	struct rtmsg *rtm;
2096 	struct nlmsghdr *nlh;
2097 	long expires;
2098 	u32 table;
2099 
2100 	if (prefix) {	/* user wants prefix routes only */
2101 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2102 			/* success since this is not a prefix route */
2103 			return 1;
2104 		}
2105 	}
2106 
2107 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2108 	if (nlh == NULL)
2109 		return -EMSGSIZE;
2110 
2111 	rtm = nlmsg_data(nlh);
2112 	rtm->rtm_family = AF_INET6;
2113 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
2114 	rtm->rtm_src_len = rt->rt6i_src.plen;
2115 	rtm->rtm_tos = 0;
2116 	if (rt->rt6i_table)
2117 		table = rt->rt6i_table->tb6_id;
2118 	else
2119 		table = RT6_TABLE_UNSPEC;
2120 	rtm->rtm_table = table;
2121 	NLA_PUT_U32(skb, RTA_TABLE, table);
2122 	if (rt->rt6i_flags&RTF_REJECT)
2123 		rtm->rtm_type = RTN_UNREACHABLE;
2124 	else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2125 		rtm->rtm_type = RTN_LOCAL;
2126 	else
2127 		rtm->rtm_type = RTN_UNICAST;
2128 	rtm->rtm_flags = 0;
2129 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2130 	rtm->rtm_protocol = rt->rt6i_protocol;
2131 	if (rt->rt6i_flags&RTF_DYNAMIC)
2132 		rtm->rtm_protocol = RTPROT_REDIRECT;
2133 	else if (rt->rt6i_flags & RTF_ADDRCONF)
2134 		rtm->rtm_protocol = RTPROT_KERNEL;
2135 	else if (rt->rt6i_flags&RTF_DEFAULT)
2136 		rtm->rtm_protocol = RTPROT_RA;
2137 
2138 	if (rt->rt6i_flags&RTF_CACHE)
2139 		rtm->rtm_flags |= RTM_F_CLONED;
2140 
2141 	if (dst) {
2142 		NLA_PUT(skb, RTA_DST, 16, dst);
2143 		rtm->rtm_dst_len = 128;
2144 	} else if (rtm->rtm_dst_len)
2145 		NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2146 #ifdef CONFIG_IPV6_SUBTREES
2147 	if (src) {
2148 		NLA_PUT(skb, RTA_SRC, 16, src);
2149 		rtm->rtm_src_len = 128;
2150 	} else if (rtm->rtm_src_len)
2151 		NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2152 #endif
2153 	if (iif)
2154 		NLA_PUT_U32(skb, RTA_IIF, iif);
2155 	else if (dst) {
2156 		struct in6_addr saddr_buf;
2157 		if (ipv6_dev_get_saddr(ip6_dst_idev(&rt->u.dst)->dev,
2158 				       dst, &saddr_buf) == 0)
2159 			NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2160 	}
2161 
2162 	if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
2163 		goto nla_put_failure;
2164 
2165 	if (rt->u.dst.neighbour)
2166 		NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2167 
2168 	if (rt->u.dst.dev)
2169 		NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2170 
2171 	NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2172 
2173 	expires = rt->rt6i_expires ? rt->rt6i_expires - jiffies : 0;
2174 	if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
2175 			       expires, rt->u.dst.error) < 0)
2176 		goto nla_put_failure;
2177 
2178 	return nlmsg_end(skb, nlh);
2179 
2180 nla_put_failure:
2181 	nlmsg_cancel(skb, nlh);
2182 	return -EMSGSIZE;
2183 }
2184 
2185 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2186 {
2187 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2188 	int prefix;
2189 
2190 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2191 		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2192 		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2193 	} else
2194 		prefix = 0;
2195 
2196 	return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2197 		     NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2198 		     prefix, NLM_F_MULTI);
2199 }
2200 
2201 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2202 {
2203 	struct net *net = in_skb->sk->sk_net;
2204 	struct nlattr *tb[RTA_MAX+1];
2205 	struct rt6_info *rt;
2206 	struct sk_buff *skb;
2207 	struct rtmsg *rtm;
2208 	struct flowi fl;
2209 	int err, iif = 0;
2210 
2211 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2212 	if (err < 0)
2213 		goto errout;
2214 
2215 	err = -EINVAL;
2216 	memset(&fl, 0, sizeof(fl));
2217 
2218 	if (tb[RTA_SRC]) {
2219 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2220 			goto errout;
2221 
2222 		ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2223 	}
2224 
2225 	if (tb[RTA_DST]) {
2226 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2227 			goto errout;
2228 
2229 		ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2230 	}
2231 
2232 	if (tb[RTA_IIF])
2233 		iif = nla_get_u32(tb[RTA_IIF]);
2234 
2235 	if (tb[RTA_OIF])
2236 		fl.oif = nla_get_u32(tb[RTA_OIF]);
2237 
2238 	if (iif) {
2239 		struct net_device *dev;
2240 		dev = __dev_get_by_index(net, iif);
2241 		if (!dev) {
2242 			err = -ENODEV;
2243 			goto errout;
2244 		}
2245 	}
2246 
2247 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2248 	if (skb == NULL) {
2249 		err = -ENOBUFS;
2250 		goto errout;
2251 	}
2252 
2253 	/* Reserve room for dummy headers, this skb can pass
2254 	   through good chunk of routing engine.
2255 	 */
2256 	skb_reset_mac_header(skb);
2257 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2258 
2259 	rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
2260 	skb->dst = &rt->u.dst;
2261 
2262 	err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
2263 			    RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2264 			    nlh->nlmsg_seq, 0, 0);
2265 	if (err < 0) {
2266 		kfree_skb(skb);
2267 		goto errout;
2268 	}
2269 
2270 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2271 errout:
2272 	return err;
2273 }
2274 
2275 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2276 {
2277 	struct sk_buff *skb;
2278 	struct net *net = info->nl_net;
2279 	u32 seq;
2280 	int err;
2281 
2282 	err = -ENOBUFS;
2283 	seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
2284 
2285 	skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2286 	if (skb == NULL)
2287 		goto errout;
2288 
2289 	err = rt6_fill_node(skb, rt, NULL, NULL, 0,
2290 				event, info->pid, seq, 0, 0);
2291 	if (err < 0) {
2292 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2293 		WARN_ON(err == -EMSGSIZE);
2294 		kfree_skb(skb);
2295 		goto errout;
2296 	}
2297 	err = rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2298 			  info->nlh, gfp_any());
2299 errout:
2300 	if (err < 0)
2301 		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2302 }
2303 
2304 static int ip6_route_dev_notify(struct notifier_block *this,
2305 				unsigned long event, void *data)
2306 {
2307 	struct net_device *dev = (struct net_device *)data;
2308 	struct net *net = dev->nd_net;
2309 
2310 	if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2311 		net->ipv6.ip6_null_entry->u.dst.dev = dev;
2312 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2313 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2314 		net->ipv6.ip6_prohibit_entry->u.dst.dev = dev;
2315 		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2316 		net->ipv6.ip6_blk_hole_entry->u.dst.dev = dev;
2317 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2318 #endif
2319 	}
2320 
2321 	return NOTIFY_OK;
2322 }
2323 
2324 /*
2325  *	/proc
2326  */
2327 
2328 #ifdef CONFIG_PROC_FS
2329 
2330 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2331 
2332 struct rt6_proc_arg
2333 {
2334 	char *buffer;
2335 	int offset;
2336 	int length;
2337 	int skip;
2338 	int len;
2339 };
2340 
2341 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2342 {
2343 	struct seq_file *m = p_arg;
2344 
2345 	seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_dst.addr),
2346 		   rt->rt6i_dst.plen);
2347 
2348 #ifdef CONFIG_IPV6_SUBTREES
2349 	seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_src.addr),
2350 		   rt->rt6i_src.plen);
2351 #else
2352 	seq_puts(m, "00000000000000000000000000000000 00 ");
2353 #endif
2354 
2355 	if (rt->rt6i_nexthop) {
2356 		seq_printf(m, NIP6_SEQFMT,
2357 			   NIP6(*((struct in6_addr *)rt->rt6i_nexthop->primary_key)));
2358 	} else {
2359 		seq_puts(m, "00000000000000000000000000000000");
2360 	}
2361 	seq_printf(m, " %08x %08x %08x %08x %8s\n",
2362 		   rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2363 		   rt->u.dst.__use, rt->rt6i_flags,
2364 		   rt->rt6i_dev ? rt->rt6i_dev->name : "");
2365 	return 0;
2366 }
2367 
2368 static int ipv6_route_show(struct seq_file *m, void *v)
2369 {
2370 	struct net *net = (struct net *)m->private;
2371 	fib6_clean_all(net, rt6_info_route, 0, m);
2372 	return 0;
2373 }
2374 
2375 static int ipv6_route_open(struct inode *inode, struct file *file)
2376 {
2377 	struct net *net = get_proc_net(inode);
2378 	if (!net)
2379 		return -ENXIO;
2380 	return single_open(file, ipv6_route_show, net);
2381 }
2382 
2383 static int ipv6_route_release(struct inode *inode, struct file *file)
2384 {
2385 	struct seq_file *seq = file->private_data;
2386 	struct net *net = seq->private;
2387 	put_net(net);
2388 	return single_release(inode, file);
2389 }
2390 
2391 static const struct file_operations ipv6_route_proc_fops = {
2392 	.owner		= THIS_MODULE,
2393 	.open		= ipv6_route_open,
2394 	.read		= seq_read,
2395 	.llseek		= seq_lseek,
2396 	.release	= ipv6_route_release,
2397 };
2398 
2399 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2400 {
2401 	struct net *net = (struct net *)seq->private;
2402 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2403 		   net->ipv6.rt6_stats->fib_nodes,
2404 		   net->ipv6.rt6_stats->fib_route_nodes,
2405 		   net->ipv6.rt6_stats->fib_rt_alloc,
2406 		   net->ipv6.rt6_stats->fib_rt_entries,
2407 		   net->ipv6.rt6_stats->fib_rt_cache,
2408 		   atomic_read(&net->ipv6.ip6_dst_ops->entries),
2409 		   net->ipv6.rt6_stats->fib_discarded_routes);
2410 
2411 	return 0;
2412 }
2413 
2414 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2415 {
2416 	struct net *net = get_proc_net(inode);
2417 	return single_open(file, rt6_stats_seq_show, net);
2418 }
2419 
2420 static int rt6_stats_seq_release(struct inode *inode, struct file *file)
2421 {
2422 	struct seq_file *seq = file->private_data;
2423 	struct net *net = (struct net *)seq->private;
2424 	put_net(net);
2425 	return single_release(inode, file);
2426 }
2427 
2428 static const struct file_operations rt6_stats_seq_fops = {
2429 	.owner	 = THIS_MODULE,
2430 	.open	 = rt6_stats_seq_open,
2431 	.read	 = seq_read,
2432 	.llseek	 = seq_lseek,
2433 	.release = rt6_stats_seq_release,
2434 };
2435 #endif	/* CONFIG_PROC_FS */
2436 
2437 #ifdef CONFIG_SYSCTL
2438 
2439 static
2440 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2441 			      void __user *buffer, size_t *lenp, loff_t *ppos)
2442 {
2443 	struct net *net = current->nsproxy->net_ns;
2444 	int delay = net->ipv6.sysctl.flush_delay;
2445 	if (write) {
2446 		proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2447 		fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2448 		return 0;
2449 	} else
2450 		return -EINVAL;
2451 }
2452 
2453 ctl_table ipv6_route_table_template[] = {
2454 	{
2455 		.procname	=	"flush",
2456 		.data		=	&init_net.ipv6.sysctl.flush_delay,
2457 		.maxlen		=	sizeof(int),
2458 		.mode		=	0200,
2459 		.proc_handler	=	&ipv6_sysctl_rtcache_flush
2460 	},
2461 	{
2462 		.ctl_name	=	NET_IPV6_ROUTE_GC_THRESH,
2463 		.procname	=	"gc_thresh",
2464 		.data		=	&ip6_dst_ops_template.gc_thresh,
2465 		.maxlen		=	sizeof(int),
2466 		.mode		=	0644,
2467 		.proc_handler	=	&proc_dointvec,
2468 	},
2469 	{
2470 		.ctl_name	=	NET_IPV6_ROUTE_MAX_SIZE,
2471 		.procname	=	"max_size",
2472 		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
2473 		.maxlen		=	sizeof(int),
2474 		.mode		=	0644,
2475 		.proc_handler	=	&proc_dointvec,
2476 	},
2477 	{
2478 		.ctl_name	=	NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2479 		.procname	=	"gc_min_interval",
2480 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2481 		.maxlen		=	sizeof(int),
2482 		.mode		=	0644,
2483 		.proc_handler	=	&proc_dointvec_jiffies,
2484 		.strategy	=	&sysctl_jiffies,
2485 	},
2486 	{
2487 		.ctl_name	=	NET_IPV6_ROUTE_GC_TIMEOUT,
2488 		.procname	=	"gc_timeout",
2489 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2490 		.maxlen		=	sizeof(int),
2491 		.mode		=	0644,
2492 		.proc_handler	=	&proc_dointvec_jiffies,
2493 		.strategy	=	&sysctl_jiffies,
2494 	},
2495 	{
2496 		.ctl_name	=	NET_IPV6_ROUTE_GC_INTERVAL,
2497 		.procname	=	"gc_interval",
2498 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
2499 		.maxlen		=	sizeof(int),
2500 		.mode		=	0644,
2501 		.proc_handler	=	&proc_dointvec_jiffies,
2502 		.strategy	=	&sysctl_jiffies,
2503 	},
2504 	{
2505 		.ctl_name	=	NET_IPV6_ROUTE_GC_ELASTICITY,
2506 		.procname	=	"gc_elasticity",
2507 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2508 		.maxlen		=	sizeof(int),
2509 		.mode		=	0644,
2510 		.proc_handler	=	&proc_dointvec_jiffies,
2511 		.strategy	=	&sysctl_jiffies,
2512 	},
2513 	{
2514 		.ctl_name	=	NET_IPV6_ROUTE_MTU_EXPIRES,
2515 		.procname	=	"mtu_expires",
2516 		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2517 		.maxlen		=	sizeof(int),
2518 		.mode		=	0644,
2519 		.proc_handler	=	&proc_dointvec_jiffies,
2520 		.strategy	=	&sysctl_jiffies,
2521 	},
2522 	{
2523 		.ctl_name	=	NET_IPV6_ROUTE_MIN_ADVMSS,
2524 		.procname	=	"min_adv_mss",
2525 		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
2526 		.maxlen		=	sizeof(int),
2527 		.mode		=	0644,
2528 		.proc_handler	=	&proc_dointvec_jiffies,
2529 		.strategy	=	&sysctl_jiffies,
2530 	},
2531 	{
2532 		.ctl_name	=	NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2533 		.procname	=	"gc_min_interval_ms",
2534 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2535 		.maxlen		=	sizeof(int),
2536 		.mode		=	0644,
2537 		.proc_handler	=	&proc_dointvec_ms_jiffies,
2538 		.strategy	=	&sysctl_ms_jiffies,
2539 	},
2540 	{ .ctl_name = 0 }
2541 };
2542 
2543 struct ctl_table *ipv6_route_sysctl_init(struct net *net)
2544 {
2545 	struct ctl_table *table;
2546 
2547 	table = kmemdup(ipv6_route_table_template,
2548 			sizeof(ipv6_route_table_template),
2549 			GFP_KERNEL);
2550 
2551 	if (table) {
2552 		table[0].data = &net->ipv6.sysctl.flush_delay;
2553 		table[1].data = &net->ipv6.ip6_dst_ops->gc_thresh;
2554 		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2555 		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2556 		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2557 		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2558 		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2559 		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2560 		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2561 	}
2562 
2563 	return table;
2564 }
2565 #endif
2566 
2567 static int ip6_route_net_init(struct net *net)
2568 {
2569 	int ret = 0;
2570 
2571 	ret = -ENOMEM;
2572 	net->ipv6.ip6_dst_ops = kmemdup(&ip6_dst_ops_template,
2573 					sizeof(*net->ipv6.ip6_dst_ops),
2574 					GFP_KERNEL);
2575 	if (!net->ipv6.ip6_dst_ops)
2576 		goto out;
2577 	net->ipv6.ip6_dst_ops->dst_net = net;
2578 
2579 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2580 					   sizeof(*net->ipv6.ip6_null_entry),
2581 					   GFP_KERNEL);
2582 	if (!net->ipv6.ip6_null_entry)
2583 		goto out_ip6_dst_ops;
2584 	net->ipv6.ip6_null_entry->u.dst.path =
2585 		(struct dst_entry *)net->ipv6.ip6_null_entry;
2586 	net->ipv6.ip6_null_entry->u.dst.ops = net->ipv6.ip6_dst_ops;
2587 
2588 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2589 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2590 					       sizeof(*net->ipv6.ip6_prohibit_entry),
2591 					       GFP_KERNEL);
2592 	if (!net->ipv6.ip6_prohibit_entry) {
2593 		kfree(net->ipv6.ip6_null_entry);
2594 		goto out;
2595 	}
2596 	net->ipv6.ip6_prohibit_entry->u.dst.path =
2597 		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2598 	net->ipv6.ip6_prohibit_entry->u.dst.ops = net->ipv6.ip6_dst_ops;
2599 
2600 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2601 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
2602 					       GFP_KERNEL);
2603 	if (!net->ipv6.ip6_blk_hole_entry) {
2604 		kfree(net->ipv6.ip6_null_entry);
2605 		kfree(net->ipv6.ip6_prohibit_entry);
2606 		goto out;
2607 	}
2608 	net->ipv6.ip6_blk_hole_entry->u.dst.path =
2609 		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2610 	net->ipv6.ip6_blk_hole_entry->u.dst.ops = net->ipv6.ip6_dst_ops;
2611 #endif
2612 
2613 #ifdef CONFIG_PROC_FS
2614 	proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2615 	proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2616 #endif
2617 	net->ipv6.ip6_rt_gc_expire = 30*HZ;
2618 
2619 	ret = 0;
2620 out:
2621 	return ret;
2622 
2623 out_ip6_dst_ops:
2624 	kfree(net->ipv6.ip6_dst_ops);
2625 	goto out;
2626 }
2627 
2628 static void ip6_route_net_exit(struct net *net)
2629 {
2630 #ifdef CONFIG_PROC_FS
2631 	proc_net_remove(net, "ipv6_route");
2632 	proc_net_remove(net, "rt6_stats");
2633 #endif
2634 	kfree(net->ipv6.ip6_null_entry);
2635 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2636 	kfree(net->ipv6.ip6_prohibit_entry);
2637 	kfree(net->ipv6.ip6_blk_hole_entry);
2638 #endif
2639 	kfree(net->ipv6.ip6_dst_ops);
2640 }
2641 
2642 static struct pernet_operations ip6_route_net_ops = {
2643 	.init = ip6_route_net_init,
2644 	.exit = ip6_route_net_exit,
2645 };
2646 
2647 static struct notifier_block ip6_route_dev_notifier = {
2648 	.notifier_call = ip6_route_dev_notify,
2649 	.priority = 0,
2650 };
2651 
2652 int __init ip6_route_init(void)
2653 {
2654 	int ret;
2655 
2656 	ret = -ENOMEM;
2657 	ip6_dst_ops_template.kmem_cachep =
2658 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2659 				  SLAB_HWCACHE_ALIGN, NULL);
2660 	if (!ip6_dst_ops_template.kmem_cachep)
2661 		goto out;;
2662 
2663 	ret = register_pernet_subsys(&ip6_route_net_ops);
2664 	if (ret)
2665 		goto out_kmem_cache;
2666 
2667 	/* Registering of the loopback is done before this portion of code,
2668 	 * the loopback reference in rt6_info will not be taken, do it
2669 	 * manually for init_net */
2670 	init_net.ipv6.ip6_null_entry->u.dst.dev = init_net.loopback_dev;
2671 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2672   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2673 	init_net.ipv6.ip6_prohibit_entry->u.dst.dev = init_net.loopback_dev;
2674 	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2675 	init_net.ipv6.ip6_blk_hole_entry->u.dst.dev = init_net.loopback_dev;
2676 	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2677   #endif
2678 	ret = fib6_init();
2679 	if (ret)
2680 		goto out_register_subsys;
2681 
2682 	ret = xfrm6_init();
2683 	if (ret)
2684 		goto out_fib6_init;
2685 
2686 	ret = fib6_rules_init();
2687 	if (ret)
2688 		goto xfrm6_init;
2689 
2690 	ret = -ENOBUFS;
2691 	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2692 	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2693 	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2694 		goto fib6_rules_init;
2695 
2696 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
2697 	if (ret)
2698 		goto fib6_rules_init;
2699 
2700 out:
2701 	return ret;
2702 
2703 fib6_rules_init:
2704 	fib6_rules_cleanup();
2705 xfrm6_init:
2706 	xfrm6_fini();
2707 out_fib6_init:
2708 	fib6_gc_cleanup();
2709 out_register_subsys:
2710 	unregister_pernet_subsys(&ip6_route_net_ops);
2711 out_kmem_cache:
2712 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
2713 	goto out;
2714 }
2715 
2716 void ip6_route_cleanup(void)
2717 {
2718 	unregister_netdevice_notifier(&ip6_route_dev_notifier);
2719 	fib6_rules_cleanup();
2720 	xfrm6_fini();
2721 	fib6_gc_cleanup();
2722 	unregister_pernet_subsys(&ip6_route_net_ops);
2723 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
2724 }
2725