xref: /openbmc/linux/net/ipv6/route.c (revision 545e4006)
1 /*
2  *	Linux INET6 implementation
3  *	FIB front-end.
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13 
14 /*	Changes:
15  *
16  *	YOSHIFUJI Hideaki @USAGI
17  *		reworked default router selection.
18  *		- respect outgoing interface
19  *		- select from (probably) reachable routers (i.e.
20  *		routers in REACHABLE, STALE, DELAY or PROBE states).
21  *		- always select the same router if it is (probably)
22  *		reachable.  otherwise, round-robin the list.
23  *	Ville Nuorvala
24  *		Fixed routing subtrees.
25  */
26 
27 #include <linux/capability.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/times.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/route.h>
35 #include <linux/netdevice.h>
36 #include <linux/in6.h>
37 #include <linux/mroute6.h>
38 #include <linux/init.h>
39 #include <linux/if_arp.h>
40 #include <linux/proc_fs.h>
41 #include <linux/seq_file.h>
42 #include <linux/nsproxy.h>
43 #include <net/net_namespace.h>
44 #include <net/snmp.h>
45 #include <net/ipv6.h>
46 #include <net/ip6_fib.h>
47 #include <net/ip6_route.h>
48 #include <net/ndisc.h>
49 #include <net/addrconf.h>
50 #include <net/tcp.h>
51 #include <linux/rtnetlink.h>
52 #include <net/dst.h>
53 #include <net/xfrm.h>
54 #include <net/netevent.h>
55 #include <net/netlink.h>
56 
57 #include <asm/uaccess.h>
58 
59 #ifdef CONFIG_SYSCTL
60 #include <linux/sysctl.h>
61 #endif
62 
63 /* Set to 3 to get tracing. */
64 #define RT6_DEBUG 2
65 
66 #if RT6_DEBUG >= 3
67 #define RDBG(x) printk x
68 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
69 #else
70 #define RDBG(x)
71 #define RT6_TRACE(x...) do { ; } while (0)
72 #endif
73 
74 #define CLONE_OFFLINK_ROUTE 0
75 
76 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
77 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
78 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
79 static void		ip6_dst_destroy(struct dst_entry *);
80 static void		ip6_dst_ifdown(struct dst_entry *,
81 				       struct net_device *dev, int how);
82 static int		 ip6_dst_gc(struct dst_ops *ops);
83 
84 static int		ip6_pkt_discard(struct sk_buff *skb);
85 static int		ip6_pkt_discard_out(struct sk_buff *skb);
86 static void		ip6_link_failure(struct sk_buff *skb);
87 static void		ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
88 
89 #ifdef CONFIG_IPV6_ROUTE_INFO
90 static struct rt6_info *rt6_add_route_info(struct net *net,
91 					   struct in6_addr *prefix, int prefixlen,
92 					   struct in6_addr *gwaddr, int ifindex,
93 					   unsigned pref);
94 static struct rt6_info *rt6_get_route_info(struct net *net,
95 					   struct in6_addr *prefix, int prefixlen,
96 					   struct in6_addr *gwaddr, int ifindex);
97 #endif
98 
99 static struct dst_ops ip6_dst_ops_template = {
100 	.family			=	AF_INET6,
101 	.protocol		=	__constant_htons(ETH_P_IPV6),
102 	.gc			=	ip6_dst_gc,
103 	.gc_thresh		=	1024,
104 	.check			=	ip6_dst_check,
105 	.destroy		=	ip6_dst_destroy,
106 	.ifdown			=	ip6_dst_ifdown,
107 	.negative_advice	=	ip6_negative_advice,
108 	.link_failure		=	ip6_link_failure,
109 	.update_pmtu		=	ip6_rt_update_pmtu,
110 	.local_out		=	__ip6_local_out,
111 	.entry_size		=	sizeof(struct rt6_info),
112 	.entries		=	ATOMIC_INIT(0),
113 };
114 
115 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
116 {
117 }
118 
119 static struct dst_ops ip6_dst_blackhole_ops = {
120 	.family			=	AF_INET6,
121 	.protocol		=	__constant_htons(ETH_P_IPV6),
122 	.destroy		=	ip6_dst_destroy,
123 	.check			=	ip6_dst_check,
124 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
125 	.entry_size		=	sizeof(struct rt6_info),
126 	.entries		=	ATOMIC_INIT(0),
127 };
128 
129 static struct rt6_info ip6_null_entry_template = {
130 	.u = {
131 		.dst = {
132 			.__refcnt	= ATOMIC_INIT(1),
133 			.__use		= 1,
134 			.obsolete	= -1,
135 			.error		= -ENETUNREACH,
136 			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
137 			.input		= ip6_pkt_discard,
138 			.output		= ip6_pkt_discard_out,
139 		}
140 	},
141 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
142 	.rt6i_metric	= ~(u32) 0,
143 	.rt6i_ref	= ATOMIC_INIT(1),
144 };
145 
146 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
147 
148 static int ip6_pkt_prohibit(struct sk_buff *skb);
149 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
150 
151 static struct rt6_info ip6_prohibit_entry_template = {
152 	.u = {
153 		.dst = {
154 			.__refcnt	= ATOMIC_INIT(1),
155 			.__use		= 1,
156 			.obsolete	= -1,
157 			.error		= -EACCES,
158 			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
159 			.input		= ip6_pkt_prohibit,
160 			.output		= ip6_pkt_prohibit_out,
161 		}
162 	},
163 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
164 	.rt6i_metric	= ~(u32) 0,
165 	.rt6i_ref	= ATOMIC_INIT(1),
166 };
167 
168 static struct rt6_info ip6_blk_hole_entry_template = {
169 	.u = {
170 		.dst = {
171 			.__refcnt	= ATOMIC_INIT(1),
172 			.__use		= 1,
173 			.obsolete	= -1,
174 			.error		= -EINVAL,
175 			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
176 			.input		= dst_discard,
177 			.output		= dst_discard,
178 		}
179 	},
180 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
181 	.rt6i_metric	= ~(u32) 0,
182 	.rt6i_ref	= ATOMIC_INIT(1),
183 };
184 
185 #endif
186 
187 /* allocate dst with ip6_dst_ops */
188 static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops)
189 {
190 	return (struct rt6_info *)dst_alloc(ops);
191 }
192 
193 static void ip6_dst_destroy(struct dst_entry *dst)
194 {
195 	struct rt6_info *rt = (struct rt6_info *)dst;
196 	struct inet6_dev *idev = rt->rt6i_idev;
197 
198 	if (idev != NULL) {
199 		rt->rt6i_idev = NULL;
200 		in6_dev_put(idev);
201 	}
202 }
203 
204 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
205 			   int how)
206 {
207 	struct rt6_info *rt = (struct rt6_info *)dst;
208 	struct inet6_dev *idev = rt->rt6i_idev;
209 	struct net_device *loopback_dev =
210 		dev_net(dev)->loopback_dev;
211 
212 	if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
213 		struct inet6_dev *loopback_idev =
214 			in6_dev_get(loopback_dev);
215 		if (loopback_idev != NULL) {
216 			rt->rt6i_idev = loopback_idev;
217 			in6_dev_put(idev);
218 		}
219 	}
220 }
221 
222 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
223 {
224 	return (rt->rt6i_flags & RTF_EXPIRES &&
225 		time_after(jiffies, rt->rt6i_expires));
226 }
227 
228 static inline int rt6_need_strict(struct in6_addr *daddr)
229 {
230 	return (ipv6_addr_type(daddr) &
231 		(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK));
232 }
233 
234 /*
235  *	Route lookup. Any table->tb6_lock is implied.
236  */
237 
238 static inline struct rt6_info *rt6_device_match(struct net *net,
239 						    struct rt6_info *rt,
240 						    struct in6_addr *saddr,
241 						    int oif,
242 						    int flags)
243 {
244 	struct rt6_info *local = NULL;
245 	struct rt6_info *sprt;
246 
247 	if (!oif && ipv6_addr_any(saddr))
248 		goto out;
249 
250 	for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
251 		struct net_device *dev = sprt->rt6i_dev;
252 
253 		if (oif) {
254 			if (dev->ifindex == oif)
255 				return sprt;
256 			if (dev->flags & IFF_LOOPBACK) {
257 				if (sprt->rt6i_idev == NULL ||
258 				    sprt->rt6i_idev->dev->ifindex != oif) {
259 					if (flags & RT6_LOOKUP_F_IFACE && oif)
260 						continue;
261 					if (local && (!oif ||
262 						      local->rt6i_idev->dev->ifindex == oif))
263 						continue;
264 				}
265 				local = sprt;
266 			}
267 		} else {
268 			if (ipv6_chk_addr(net, saddr, dev,
269 					  flags & RT6_LOOKUP_F_IFACE))
270 				return sprt;
271 		}
272 	}
273 
274 	if (oif) {
275 		if (local)
276 			return local;
277 
278 		if (flags & RT6_LOOKUP_F_IFACE)
279 			return net->ipv6.ip6_null_entry;
280 	}
281 out:
282 	return rt;
283 }
284 
285 #ifdef CONFIG_IPV6_ROUTER_PREF
286 static void rt6_probe(struct rt6_info *rt)
287 {
288 	struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
289 	/*
290 	 * Okay, this does not seem to be appropriate
291 	 * for now, however, we need to check if it
292 	 * is really so; aka Router Reachability Probing.
293 	 *
294 	 * Router Reachability Probe MUST be rate-limited
295 	 * to no more than one per minute.
296 	 */
297 	if (!neigh || (neigh->nud_state & NUD_VALID))
298 		return;
299 	read_lock_bh(&neigh->lock);
300 	if (!(neigh->nud_state & NUD_VALID) &&
301 	    time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
302 		struct in6_addr mcaddr;
303 		struct in6_addr *target;
304 
305 		neigh->updated = jiffies;
306 		read_unlock_bh(&neigh->lock);
307 
308 		target = (struct in6_addr *)&neigh->primary_key;
309 		addrconf_addr_solict_mult(target, &mcaddr);
310 		ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
311 	} else
312 		read_unlock_bh(&neigh->lock);
313 }
314 #else
315 static inline void rt6_probe(struct rt6_info *rt)
316 {
317 	return;
318 }
319 #endif
320 
321 /*
322  * Default Router Selection (RFC 2461 6.3.6)
323  */
324 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
325 {
326 	struct net_device *dev = rt->rt6i_dev;
327 	if (!oif || dev->ifindex == oif)
328 		return 2;
329 	if ((dev->flags & IFF_LOOPBACK) &&
330 	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
331 		return 1;
332 	return 0;
333 }
334 
335 static inline int rt6_check_neigh(struct rt6_info *rt)
336 {
337 	struct neighbour *neigh = rt->rt6i_nexthop;
338 	int m;
339 	if (rt->rt6i_flags & RTF_NONEXTHOP ||
340 	    !(rt->rt6i_flags & RTF_GATEWAY))
341 		m = 1;
342 	else if (neigh) {
343 		read_lock_bh(&neigh->lock);
344 		if (neigh->nud_state & NUD_VALID)
345 			m = 2;
346 #ifdef CONFIG_IPV6_ROUTER_PREF
347 		else if (neigh->nud_state & NUD_FAILED)
348 			m = 0;
349 #endif
350 		else
351 			m = 1;
352 		read_unlock_bh(&neigh->lock);
353 	} else
354 		m = 0;
355 	return m;
356 }
357 
358 static int rt6_score_route(struct rt6_info *rt, int oif,
359 			   int strict)
360 {
361 	int m, n;
362 
363 	m = rt6_check_dev(rt, oif);
364 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
365 		return -1;
366 #ifdef CONFIG_IPV6_ROUTER_PREF
367 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
368 #endif
369 	n = rt6_check_neigh(rt);
370 	if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
371 		return -1;
372 	return m;
373 }
374 
375 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
376 				   int *mpri, struct rt6_info *match)
377 {
378 	int m;
379 
380 	if (rt6_check_expired(rt))
381 		goto out;
382 
383 	m = rt6_score_route(rt, oif, strict);
384 	if (m < 0)
385 		goto out;
386 
387 	if (m > *mpri) {
388 		if (strict & RT6_LOOKUP_F_REACHABLE)
389 			rt6_probe(match);
390 		*mpri = m;
391 		match = rt;
392 	} else if (strict & RT6_LOOKUP_F_REACHABLE) {
393 		rt6_probe(rt);
394 	}
395 
396 out:
397 	return match;
398 }
399 
400 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
401 				     struct rt6_info *rr_head,
402 				     u32 metric, int oif, int strict)
403 {
404 	struct rt6_info *rt, *match;
405 	int mpri = -1;
406 
407 	match = NULL;
408 	for (rt = rr_head; rt && rt->rt6i_metric == metric;
409 	     rt = rt->u.dst.rt6_next)
410 		match = find_match(rt, oif, strict, &mpri, match);
411 	for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
412 	     rt = rt->u.dst.rt6_next)
413 		match = find_match(rt, oif, strict, &mpri, match);
414 
415 	return match;
416 }
417 
418 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
419 {
420 	struct rt6_info *match, *rt0;
421 	struct net *net;
422 
423 	RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
424 		  __func__, fn->leaf, oif);
425 
426 	rt0 = fn->rr_ptr;
427 	if (!rt0)
428 		fn->rr_ptr = rt0 = fn->leaf;
429 
430 	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
431 
432 	if (!match &&
433 	    (strict & RT6_LOOKUP_F_REACHABLE)) {
434 		struct rt6_info *next = rt0->u.dst.rt6_next;
435 
436 		/* no entries matched; do round-robin */
437 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
438 			next = fn->leaf;
439 
440 		if (next != rt0)
441 			fn->rr_ptr = next;
442 	}
443 
444 	RT6_TRACE("%s() => %p\n",
445 		  __func__, match);
446 
447 	net = dev_net(rt0->rt6i_dev);
448 	return (match ? match : net->ipv6.ip6_null_entry);
449 }
450 
451 #ifdef CONFIG_IPV6_ROUTE_INFO
452 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
453 		  struct in6_addr *gwaddr)
454 {
455 	struct net *net = dev_net(dev);
456 	struct route_info *rinfo = (struct route_info *) opt;
457 	struct in6_addr prefix_buf, *prefix;
458 	unsigned int pref;
459 	unsigned long lifetime;
460 	struct rt6_info *rt;
461 
462 	if (len < sizeof(struct route_info)) {
463 		return -EINVAL;
464 	}
465 
466 	/* Sanity check for prefix_len and length */
467 	if (rinfo->length > 3) {
468 		return -EINVAL;
469 	} else if (rinfo->prefix_len > 128) {
470 		return -EINVAL;
471 	} else if (rinfo->prefix_len > 64) {
472 		if (rinfo->length < 2) {
473 			return -EINVAL;
474 		}
475 	} else if (rinfo->prefix_len > 0) {
476 		if (rinfo->length < 1) {
477 			return -EINVAL;
478 		}
479 	}
480 
481 	pref = rinfo->route_pref;
482 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
483 		pref = ICMPV6_ROUTER_PREF_MEDIUM;
484 
485 	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
486 
487 	if (rinfo->length == 3)
488 		prefix = (struct in6_addr *)rinfo->prefix;
489 	else {
490 		/* this function is safe */
491 		ipv6_addr_prefix(&prefix_buf,
492 				 (struct in6_addr *)rinfo->prefix,
493 				 rinfo->prefix_len);
494 		prefix = &prefix_buf;
495 	}
496 
497 	rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
498 				dev->ifindex);
499 
500 	if (rt && !lifetime) {
501 		ip6_del_rt(rt);
502 		rt = NULL;
503 	}
504 
505 	if (!rt && lifetime)
506 		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
507 					pref);
508 	else if (rt)
509 		rt->rt6i_flags = RTF_ROUTEINFO |
510 				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
511 
512 	if (rt) {
513 		if (!addrconf_finite_timeout(lifetime)) {
514 			rt->rt6i_flags &= ~RTF_EXPIRES;
515 		} else {
516 			rt->rt6i_expires = jiffies + HZ * lifetime;
517 			rt->rt6i_flags |= RTF_EXPIRES;
518 		}
519 		dst_release(&rt->u.dst);
520 	}
521 	return 0;
522 }
523 #endif
524 
525 #define BACKTRACK(__net, saddr)			\
526 do { \
527 	if (rt == __net->ipv6.ip6_null_entry) {	\
528 		struct fib6_node *pn; \
529 		while (1) { \
530 			if (fn->fn_flags & RTN_TL_ROOT) \
531 				goto out; \
532 			pn = fn->parent; \
533 			if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
534 				fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
535 			else \
536 				fn = pn; \
537 			if (fn->fn_flags & RTN_RTINFO) \
538 				goto restart; \
539 		} \
540 	} \
541 } while(0)
542 
543 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
544 					     struct fib6_table *table,
545 					     struct flowi *fl, int flags)
546 {
547 	struct fib6_node *fn;
548 	struct rt6_info *rt;
549 
550 	read_lock_bh(&table->tb6_lock);
551 	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
552 restart:
553 	rt = fn->leaf;
554 	rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags);
555 	BACKTRACK(net, &fl->fl6_src);
556 out:
557 	dst_use(&rt->u.dst, jiffies);
558 	read_unlock_bh(&table->tb6_lock);
559 	return rt;
560 
561 }
562 
563 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
564 			    const struct in6_addr *saddr, int oif, int strict)
565 {
566 	struct flowi fl = {
567 		.oif = oif,
568 		.nl_u = {
569 			.ip6_u = {
570 				.daddr = *daddr,
571 			},
572 		},
573 	};
574 	struct dst_entry *dst;
575 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
576 
577 	if (saddr) {
578 		memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
579 		flags |= RT6_LOOKUP_F_HAS_SADDR;
580 	}
581 
582 	dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
583 	if (dst->error == 0)
584 		return (struct rt6_info *) dst;
585 
586 	dst_release(dst);
587 
588 	return NULL;
589 }
590 
591 EXPORT_SYMBOL(rt6_lookup);
592 
593 /* ip6_ins_rt is called with FREE table->tb6_lock.
594    It takes new route entry, the addition fails by any reason the
595    route is freed. In any case, if caller does not hold it, it may
596    be destroyed.
597  */
598 
599 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
600 {
601 	int err;
602 	struct fib6_table *table;
603 
604 	table = rt->rt6i_table;
605 	write_lock_bh(&table->tb6_lock);
606 	err = fib6_add(&table->tb6_root, rt, info);
607 	write_unlock_bh(&table->tb6_lock);
608 
609 	return err;
610 }
611 
612 int ip6_ins_rt(struct rt6_info *rt)
613 {
614 	struct nl_info info = {
615 		.nl_net = dev_net(rt->rt6i_dev),
616 	};
617 	return __ip6_ins_rt(rt, &info);
618 }
619 
620 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
621 				      struct in6_addr *saddr)
622 {
623 	struct rt6_info *rt;
624 
625 	/*
626 	 *	Clone the route.
627 	 */
628 
629 	rt = ip6_rt_copy(ort);
630 
631 	if (rt) {
632 		if (!(rt->rt6i_flags&RTF_GATEWAY)) {
633 			if (rt->rt6i_dst.plen != 128 &&
634 			    ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
635 				rt->rt6i_flags |= RTF_ANYCAST;
636 			ipv6_addr_copy(&rt->rt6i_gateway, daddr);
637 		}
638 
639 		ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
640 		rt->rt6i_dst.plen = 128;
641 		rt->rt6i_flags |= RTF_CACHE;
642 		rt->u.dst.flags |= DST_HOST;
643 
644 #ifdef CONFIG_IPV6_SUBTREES
645 		if (rt->rt6i_src.plen && saddr) {
646 			ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
647 			rt->rt6i_src.plen = 128;
648 		}
649 #endif
650 
651 		rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
652 
653 	}
654 
655 	return rt;
656 }
657 
658 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
659 {
660 	struct rt6_info *rt = ip6_rt_copy(ort);
661 	if (rt) {
662 		ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
663 		rt->rt6i_dst.plen = 128;
664 		rt->rt6i_flags |= RTF_CACHE;
665 		rt->u.dst.flags |= DST_HOST;
666 		rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
667 	}
668 	return rt;
669 }
670 
671 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
672 				      struct flowi *fl, int flags)
673 {
674 	struct fib6_node *fn;
675 	struct rt6_info *rt, *nrt;
676 	int strict = 0;
677 	int attempts = 3;
678 	int err;
679 	int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
680 
681 	strict |= flags & RT6_LOOKUP_F_IFACE;
682 
683 relookup:
684 	read_lock_bh(&table->tb6_lock);
685 
686 restart_2:
687 	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
688 
689 restart:
690 	rt = rt6_select(fn, oif, strict | reachable);
691 
692 	BACKTRACK(net, &fl->fl6_src);
693 	if (rt == net->ipv6.ip6_null_entry ||
694 	    rt->rt6i_flags & RTF_CACHE)
695 		goto out;
696 
697 	dst_hold(&rt->u.dst);
698 	read_unlock_bh(&table->tb6_lock);
699 
700 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
701 		nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
702 	else {
703 #if CLONE_OFFLINK_ROUTE
704 		nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
705 #else
706 		goto out2;
707 #endif
708 	}
709 
710 	dst_release(&rt->u.dst);
711 	rt = nrt ? : net->ipv6.ip6_null_entry;
712 
713 	dst_hold(&rt->u.dst);
714 	if (nrt) {
715 		err = ip6_ins_rt(nrt);
716 		if (!err)
717 			goto out2;
718 	}
719 
720 	if (--attempts <= 0)
721 		goto out2;
722 
723 	/*
724 	 * Race condition! In the gap, when table->tb6_lock was
725 	 * released someone could insert this route.  Relookup.
726 	 */
727 	dst_release(&rt->u.dst);
728 	goto relookup;
729 
730 out:
731 	if (reachable) {
732 		reachable = 0;
733 		goto restart_2;
734 	}
735 	dst_hold(&rt->u.dst);
736 	read_unlock_bh(&table->tb6_lock);
737 out2:
738 	rt->u.dst.lastuse = jiffies;
739 	rt->u.dst.__use++;
740 
741 	return rt;
742 }
743 
744 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
745 					    struct flowi *fl, int flags)
746 {
747 	return ip6_pol_route(net, table, fl->iif, fl, flags);
748 }
749 
750 void ip6_route_input(struct sk_buff *skb)
751 {
752 	struct ipv6hdr *iph = ipv6_hdr(skb);
753 	struct net *net = dev_net(skb->dev);
754 	int flags = RT6_LOOKUP_F_HAS_SADDR;
755 	struct flowi fl = {
756 		.iif = skb->dev->ifindex,
757 		.nl_u = {
758 			.ip6_u = {
759 				.daddr = iph->daddr,
760 				.saddr = iph->saddr,
761 				.flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
762 			},
763 		},
764 		.mark = skb->mark,
765 		.proto = iph->nexthdr,
766 	};
767 
768 	if (rt6_need_strict(&iph->daddr))
769 		flags |= RT6_LOOKUP_F_IFACE;
770 
771 	skb->dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input);
772 }
773 
774 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
775 					     struct flowi *fl, int flags)
776 {
777 	return ip6_pol_route(net, table, fl->oif, fl, flags);
778 }
779 
780 struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
781 				    struct flowi *fl)
782 {
783 	int flags = 0;
784 
785 	if (rt6_need_strict(&fl->fl6_dst))
786 		flags |= RT6_LOOKUP_F_IFACE;
787 
788 	if (!ipv6_addr_any(&fl->fl6_src))
789 		flags |= RT6_LOOKUP_F_HAS_SADDR;
790 	else if (sk) {
791 		unsigned int prefs = inet6_sk(sk)->srcprefs;
792 		if (prefs & IPV6_PREFER_SRC_TMP)
793 			flags |= RT6_LOOKUP_F_SRCPREF_TMP;
794 		if (prefs & IPV6_PREFER_SRC_PUBLIC)
795 			flags |= RT6_LOOKUP_F_SRCPREF_PUBLIC;
796 		if (prefs & IPV6_PREFER_SRC_COA)
797 			flags |= RT6_LOOKUP_F_SRCPREF_COA;
798 	}
799 
800 	return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output);
801 }
802 
803 EXPORT_SYMBOL(ip6_route_output);
804 
805 int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
806 {
807 	struct rt6_info *ort = (struct rt6_info *) *dstp;
808 	struct rt6_info *rt = (struct rt6_info *)
809 		dst_alloc(&ip6_dst_blackhole_ops);
810 	struct dst_entry *new = NULL;
811 
812 	if (rt) {
813 		new = &rt->u.dst;
814 
815 		atomic_set(&new->__refcnt, 1);
816 		new->__use = 1;
817 		new->input = dst_discard;
818 		new->output = dst_discard;
819 
820 		memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
821 		new->dev = ort->u.dst.dev;
822 		if (new->dev)
823 			dev_hold(new->dev);
824 		rt->rt6i_idev = ort->rt6i_idev;
825 		if (rt->rt6i_idev)
826 			in6_dev_hold(rt->rt6i_idev);
827 		rt->rt6i_expires = 0;
828 
829 		ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
830 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
831 		rt->rt6i_metric = 0;
832 
833 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
834 #ifdef CONFIG_IPV6_SUBTREES
835 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
836 #endif
837 
838 		dst_free(new);
839 	}
840 
841 	dst_release(*dstp);
842 	*dstp = new;
843 	return (new ? 0 : -ENOMEM);
844 }
845 EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
846 
847 /*
848  *	Destination cache support functions
849  */
850 
851 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
852 {
853 	struct rt6_info *rt;
854 
855 	rt = (struct rt6_info *) dst;
856 
857 	if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
858 		return dst;
859 
860 	return NULL;
861 }
862 
863 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
864 {
865 	struct rt6_info *rt = (struct rt6_info *) dst;
866 
867 	if (rt) {
868 		if (rt->rt6i_flags & RTF_CACHE)
869 			ip6_del_rt(rt);
870 		else
871 			dst_release(dst);
872 	}
873 	return NULL;
874 }
875 
876 static void ip6_link_failure(struct sk_buff *skb)
877 {
878 	struct rt6_info *rt;
879 
880 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
881 
882 	rt = (struct rt6_info *) skb->dst;
883 	if (rt) {
884 		if (rt->rt6i_flags&RTF_CACHE) {
885 			dst_set_expires(&rt->u.dst, 0);
886 			rt->rt6i_flags |= RTF_EXPIRES;
887 		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
888 			rt->rt6i_node->fn_sernum = -1;
889 	}
890 }
891 
892 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
893 {
894 	struct rt6_info *rt6 = (struct rt6_info*)dst;
895 
896 	if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
897 		rt6->rt6i_flags |= RTF_MODIFIED;
898 		if (mtu < IPV6_MIN_MTU) {
899 			mtu = IPV6_MIN_MTU;
900 			dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
901 		}
902 		dst->metrics[RTAX_MTU-1] = mtu;
903 		call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
904 	}
905 }
906 
907 static int ipv6_get_mtu(struct net_device *dev);
908 
909 static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu)
910 {
911 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
912 
913 	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
914 		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
915 
916 	/*
917 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
918 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
919 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
920 	 * rely only on pmtu discovery"
921 	 */
922 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
923 		mtu = IPV6_MAXPLEN;
924 	return mtu;
925 }
926 
927 static struct dst_entry *icmp6_dst_gc_list;
928 static DEFINE_SPINLOCK(icmp6_dst_lock);
929 
930 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
931 				  struct neighbour *neigh,
932 				  const struct in6_addr *addr)
933 {
934 	struct rt6_info *rt;
935 	struct inet6_dev *idev = in6_dev_get(dev);
936 	struct net *net = dev_net(dev);
937 
938 	if (unlikely(idev == NULL))
939 		return NULL;
940 
941 	rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops);
942 	if (unlikely(rt == NULL)) {
943 		in6_dev_put(idev);
944 		goto out;
945 	}
946 
947 	dev_hold(dev);
948 	if (neigh)
949 		neigh_hold(neigh);
950 	else
951 		neigh = ndisc_get_neigh(dev, addr);
952 
953 	rt->rt6i_dev	  = dev;
954 	rt->rt6i_idev     = idev;
955 	rt->rt6i_nexthop  = neigh;
956 	atomic_set(&rt->u.dst.__refcnt, 1);
957 	rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
958 	rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
959 	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
960 	rt->u.dst.output  = ip6_output;
961 
962 #if 0	/* there's no chance to use these for ndisc */
963 	rt->u.dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
964 				? DST_HOST
965 				: 0;
966 	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
967 	rt->rt6i_dst.plen = 128;
968 #endif
969 
970 	spin_lock_bh(&icmp6_dst_lock);
971 	rt->u.dst.next = icmp6_dst_gc_list;
972 	icmp6_dst_gc_list = &rt->u.dst;
973 	spin_unlock_bh(&icmp6_dst_lock);
974 
975 	fib6_force_start_gc(net);
976 
977 out:
978 	return &rt->u.dst;
979 }
980 
981 int icmp6_dst_gc(void)
982 {
983 	struct dst_entry *dst, *next, **pprev;
984 	int more = 0;
985 
986 	next = NULL;
987 
988 	spin_lock_bh(&icmp6_dst_lock);
989 	pprev = &icmp6_dst_gc_list;
990 
991 	while ((dst = *pprev) != NULL) {
992 		if (!atomic_read(&dst->__refcnt)) {
993 			*pprev = dst->next;
994 			dst_free(dst);
995 		} else {
996 			pprev = &dst->next;
997 			++more;
998 		}
999 	}
1000 
1001 	spin_unlock_bh(&icmp6_dst_lock);
1002 
1003 	return more;
1004 }
1005 
1006 static int ip6_dst_gc(struct dst_ops *ops)
1007 {
1008 	unsigned long now = jiffies;
1009 	struct net *net = ops->dst_net;
1010 	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1011 	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1012 	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1013 	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1014 	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1015 
1016 	if (time_after(rt_last_gc + rt_min_interval, now) &&
1017 	    atomic_read(&ops->entries) <= rt_max_size)
1018 		goto out;
1019 
1020 	net->ipv6.ip6_rt_gc_expire++;
1021 	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1022 	net->ipv6.ip6_rt_last_gc = now;
1023 	if (atomic_read(&ops->entries) < ops->gc_thresh)
1024 		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1025 out:
1026 	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1027 	return (atomic_read(&ops->entries) > rt_max_size);
1028 }
1029 
1030 /* Clean host part of a prefix. Not necessary in radix tree,
1031    but results in cleaner routing tables.
1032 
1033    Remove it only when all the things will work!
1034  */
1035 
1036 static int ipv6_get_mtu(struct net_device *dev)
1037 {
1038 	int mtu = IPV6_MIN_MTU;
1039 	struct inet6_dev *idev;
1040 
1041 	idev = in6_dev_get(dev);
1042 	if (idev) {
1043 		mtu = idev->cnf.mtu6;
1044 		in6_dev_put(idev);
1045 	}
1046 	return mtu;
1047 }
1048 
1049 int ip6_dst_hoplimit(struct dst_entry *dst)
1050 {
1051 	int hoplimit = dst_metric(dst, RTAX_HOPLIMIT);
1052 	if (hoplimit < 0) {
1053 		struct net_device *dev = dst->dev;
1054 		struct inet6_dev *idev = in6_dev_get(dev);
1055 		if (idev) {
1056 			hoplimit = idev->cnf.hop_limit;
1057 			in6_dev_put(idev);
1058 		} else
1059 			hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1060 	}
1061 	return hoplimit;
1062 }
1063 
1064 /*
1065  *
1066  */
1067 
1068 int ip6_route_add(struct fib6_config *cfg)
1069 {
1070 	int err;
1071 	struct net *net = cfg->fc_nlinfo.nl_net;
1072 	struct rt6_info *rt = NULL;
1073 	struct net_device *dev = NULL;
1074 	struct inet6_dev *idev = NULL;
1075 	struct fib6_table *table;
1076 	int addr_type;
1077 
1078 	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1079 		return -EINVAL;
1080 #ifndef CONFIG_IPV6_SUBTREES
1081 	if (cfg->fc_src_len)
1082 		return -EINVAL;
1083 #endif
1084 	if (cfg->fc_ifindex) {
1085 		err = -ENODEV;
1086 		dev = dev_get_by_index(net, cfg->fc_ifindex);
1087 		if (!dev)
1088 			goto out;
1089 		idev = in6_dev_get(dev);
1090 		if (!idev)
1091 			goto out;
1092 	}
1093 
1094 	if (cfg->fc_metric == 0)
1095 		cfg->fc_metric = IP6_RT_PRIO_USER;
1096 
1097 	table = fib6_new_table(net, cfg->fc_table);
1098 	if (table == NULL) {
1099 		err = -ENOBUFS;
1100 		goto out;
1101 	}
1102 
1103 	rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops);
1104 
1105 	if (rt == NULL) {
1106 		err = -ENOMEM;
1107 		goto out;
1108 	}
1109 
1110 	rt->u.dst.obsolete = -1;
1111 	rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1112 				jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1113 				0;
1114 
1115 	if (cfg->fc_protocol == RTPROT_UNSPEC)
1116 		cfg->fc_protocol = RTPROT_BOOT;
1117 	rt->rt6i_protocol = cfg->fc_protocol;
1118 
1119 	addr_type = ipv6_addr_type(&cfg->fc_dst);
1120 
1121 	if (addr_type & IPV6_ADDR_MULTICAST)
1122 		rt->u.dst.input = ip6_mc_input;
1123 	else
1124 		rt->u.dst.input = ip6_forward;
1125 
1126 	rt->u.dst.output = ip6_output;
1127 
1128 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1129 	rt->rt6i_dst.plen = cfg->fc_dst_len;
1130 	if (rt->rt6i_dst.plen == 128)
1131 	       rt->u.dst.flags = DST_HOST;
1132 
1133 #ifdef CONFIG_IPV6_SUBTREES
1134 	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1135 	rt->rt6i_src.plen = cfg->fc_src_len;
1136 #endif
1137 
1138 	rt->rt6i_metric = cfg->fc_metric;
1139 
1140 	/* We cannot add true routes via loopback here,
1141 	   they would result in kernel looping; promote them to reject routes
1142 	 */
1143 	if ((cfg->fc_flags & RTF_REJECT) ||
1144 	    (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1145 		/* hold loopback dev/idev if we haven't done so. */
1146 		if (dev != net->loopback_dev) {
1147 			if (dev) {
1148 				dev_put(dev);
1149 				in6_dev_put(idev);
1150 			}
1151 			dev = net->loopback_dev;
1152 			dev_hold(dev);
1153 			idev = in6_dev_get(dev);
1154 			if (!idev) {
1155 				err = -ENODEV;
1156 				goto out;
1157 			}
1158 		}
1159 		rt->u.dst.output = ip6_pkt_discard_out;
1160 		rt->u.dst.input = ip6_pkt_discard;
1161 		rt->u.dst.error = -ENETUNREACH;
1162 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1163 		goto install_route;
1164 	}
1165 
1166 	if (cfg->fc_flags & RTF_GATEWAY) {
1167 		struct in6_addr *gw_addr;
1168 		int gwa_type;
1169 
1170 		gw_addr = &cfg->fc_gateway;
1171 		ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1172 		gwa_type = ipv6_addr_type(gw_addr);
1173 
1174 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1175 			struct rt6_info *grt;
1176 
1177 			/* IPv6 strictly inhibits using not link-local
1178 			   addresses as nexthop address.
1179 			   Otherwise, router will not able to send redirects.
1180 			   It is very good, but in some (rare!) circumstances
1181 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
1182 			   some exceptions. --ANK
1183 			 */
1184 			err = -EINVAL;
1185 			if (!(gwa_type&IPV6_ADDR_UNICAST))
1186 				goto out;
1187 
1188 			grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1189 
1190 			err = -EHOSTUNREACH;
1191 			if (grt == NULL)
1192 				goto out;
1193 			if (dev) {
1194 				if (dev != grt->rt6i_dev) {
1195 					dst_release(&grt->u.dst);
1196 					goto out;
1197 				}
1198 			} else {
1199 				dev = grt->rt6i_dev;
1200 				idev = grt->rt6i_idev;
1201 				dev_hold(dev);
1202 				in6_dev_hold(grt->rt6i_idev);
1203 			}
1204 			if (!(grt->rt6i_flags&RTF_GATEWAY))
1205 				err = 0;
1206 			dst_release(&grt->u.dst);
1207 
1208 			if (err)
1209 				goto out;
1210 		}
1211 		err = -EINVAL;
1212 		if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1213 			goto out;
1214 	}
1215 
1216 	err = -ENODEV;
1217 	if (dev == NULL)
1218 		goto out;
1219 
1220 	if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1221 		rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1222 		if (IS_ERR(rt->rt6i_nexthop)) {
1223 			err = PTR_ERR(rt->rt6i_nexthop);
1224 			rt->rt6i_nexthop = NULL;
1225 			goto out;
1226 		}
1227 	}
1228 
1229 	rt->rt6i_flags = cfg->fc_flags;
1230 
1231 install_route:
1232 	if (cfg->fc_mx) {
1233 		struct nlattr *nla;
1234 		int remaining;
1235 
1236 		nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1237 			int type = nla_type(nla);
1238 
1239 			if (type) {
1240 				if (type > RTAX_MAX) {
1241 					err = -EINVAL;
1242 					goto out;
1243 				}
1244 
1245 				rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
1246 			}
1247 		}
1248 	}
1249 
1250 	if (dst_metric(&rt->u.dst, RTAX_HOPLIMIT) == 0)
1251 		rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1252 	if (!dst_metric(&rt->u.dst, RTAX_MTU))
1253 		rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1254 	if (!dst_metric(&rt->u.dst, RTAX_ADVMSS))
1255 		rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
1256 	rt->u.dst.dev = dev;
1257 	rt->rt6i_idev = idev;
1258 	rt->rt6i_table = table;
1259 
1260 	cfg->fc_nlinfo.nl_net = dev_net(dev);
1261 
1262 	return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1263 
1264 out:
1265 	if (dev)
1266 		dev_put(dev);
1267 	if (idev)
1268 		in6_dev_put(idev);
1269 	if (rt)
1270 		dst_free(&rt->u.dst);
1271 	return err;
1272 }
1273 
1274 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1275 {
1276 	int err;
1277 	struct fib6_table *table;
1278 	struct net *net = dev_net(rt->rt6i_dev);
1279 
1280 	if (rt == net->ipv6.ip6_null_entry)
1281 		return -ENOENT;
1282 
1283 	table = rt->rt6i_table;
1284 	write_lock_bh(&table->tb6_lock);
1285 
1286 	err = fib6_del(rt, info);
1287 	dst_release(&rt->u.dst);
1288 
1289 	write_unlock_bh(&table->tb6_lock);
1290 
1291 	return err;
1292 }
1293 
1294 int ip6_del_rt(struct rt6_info *rt)
1295 {
1296 	struct nl_info info = {
1297 		.nl_net = dev_net(rt->rt6i_dev),
1298 	};
1299 	return __ip6_del_rt(rt, &info);
1300 }
1301 
1302 static int ip6_route_del(struct fib6_config *cfg)
1303 {
1304 	struct fib6_table *table;
1305 	struct fib6_node *fn;
1306 	struct rt6_info *rt;
1307 	int err = -ESRCH;
1308 
1309 	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1310 	if (table == NULL)
1311 		return err;
1312 
1313 	read_lock_bh(&table->tb6_lock);
1314 
1315 	fn = fib6_locate(&table->tb6_root,
1316 			 &cfg->fc_dst, cfg->fc_dst_len,
1317 			 &cfg->fc_src, cfg->fc_src_len);
1318 
1319 	if (fn) {
1320 		for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1321 			if (cfg->fc_ifindex &&
1322 			    (rt->rt6i_dev == NULL ||
1323 			     rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1324 				continue;
1325 			if (cfg->fc_flags & RTF_GATEWAY &&
1326 			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1327 				continue;
1328 			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1329 				continue;
1330 			dst_hold(&rt->u.dst);
1331 			read_unlock_bh(&table->tb6_lock);
1332 
1333 			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1334 		}
1335 	}
1336 	read_unlock_bh(&table->tb6_lock);
1337 
1338 	return err;
1339 }
1340 
1341 /*
1342  *	Handle redirects
1343  */
1344 struct ip6rd_flowi {
1345 	struct flowi fl;
1346 	struct in6_addr gateway;
1347 };
1348 
1349 static struct rt6_info *__ip6_route_redirect(struct net *net,
1350 					     struct fib6_table *table,
1351 					     struct flowi *fl,
1352 					     int flags)
1353 {
1354 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1355 	struct rt6_info *rt;
1356 	struct fib6_node *fn;
1357 
1358 	/*
1359 	 * Get the "current" route for this destination and
1360 	 * check if the redirect has come from approriate router.
1361 	 *
1362 	 * RFC 2461 specifies that redirects should only be
1363 	 * accepted if they come from the nexthop to the target.
1364 	 * Due to the way the routes are chosen, this notion
1365 	 * is a bit fuzzy and one might need to check all possible
1366 	 * routes.
1367 	 */
1368 
1369 	read_lock_bh(&table->tb6_lock);
1370 	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1371 restart:
1372 	for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1373 		/*
1374 		 * Current route is on-link; redirect is always invalid.
1375 		 *
1376 		 * Seems, previous statement is not true. It could
1377 		 * be node, which looks for us as on-link (f.e. proxy ndisc)
1378 		 * But then router serving it might decide, that we should
1379 		 * know truth 8)8) --ANK (980726).
1380 		 */
1381 		if (rt6_check_expired(rt))
1382 			continue;
1383 		if (!(rt->rt6i_flags & RTF_GATEWAY))
1384 			continue;
1385 		if (fl->oif != rt->rt6i_dev->ifindex)
1386 			continue;
1387 		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1388 			continue;
1389 		break;
1390 	}
1391 
1392 	if (!rt)
1393 		rt = net->ipv6.ip6_null_entry;
1394 	BACKTRACK(net, &fl->fl6_src);
1395 out:
1396 	dst_hold(&rt->u.dst);
1397 
1398 	read_unlock_bh(&table->tb6_lock);
1399 
1400 	return rt;
1401 };
1402 
1403 static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1404 					   struct in6_addr *src,
1405 					   struct in6_addr *gateway,
1406 					   struct net_device *dev)
1407 {
1408 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1409 	struct net *net = dev_net(dev);
1410 	struct ip6rd_flowi rdfl = {
1411 		.fl = {
1412 			.oif = dev->ifindex,
1413 			.nl_u = {
1414 				.ip6_u = {
1415 					.daddr = *dest,
1416 					.saddr = *src,
1417 				},
1418 			},
1419 		},
1420 		.gateway = *gateway,
1421 	};
1422 
1423 	if (rt6_need_strict(dest))
1424 		flags |= RT6_LOOKUP_F_IFACE;
1425 
1426 	return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl,
1427 						   flags, __ip6_route_redirect);
1428 }
1429 
1430 void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1431 		  struct in6_addr *saddr,
1432 		  struct neighbour *neigh, u8 *lladdr, int on_link)
1433 {
1434 	struct rt6_info *rt, *nrt = NULL;
1435 	struct netevent_redirect netevent;
1436 	struct net *net = dev_net(neigh->dev);
1437 
1438 	rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1439 
1440 	if (rt == net->ipv6.ip6_null_entry) {
1441 		if (net_ratelimit())
1442 			printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1443 			       "for redirect target\n");
1444 		goto out;
1445 	}
1446 
1447 	/*
1448 	 *	We have finally decided to accept it.
1449 	 */
1450 
1451 	neigh_update(neigh, lladdr, NUD_STALE,
1452 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
1453 		     NEIGH_UPDATE_F_OVERRIDE|
1454 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1455 				     NEIGH_UPDATE_F_ISROUTER))
1456 		     );
1457 
1458 	/*
1459 	 * Redirect received -> path was valid.
1460 	 * Look, redirects are sent only in response to data packets,
1461 	 * so that this nexthop apparently is reachable. --ANK
1462 	 */
1463 	dst_confirm(&rt->u.dst);
1464 
1465 	/* Duplicate redirect: silently ignore. */
1466 	if (neigh == rt->u.dst.neighbour)
1467 		goto out;
1468 
1469 	nrt = ip6_rt_copy(rt);
1470 	if (nrt == NULL)
1471 		goto out;
1472 
1473 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1474 	if (on_link)
1475 		nrt->rt6i_flags &= ~RTF_GATEWAY;
1476 
1477 	ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1478 	nrt->rt6i_dst.plen = 128;
1479 	nrt->u.dst.flags |= DST_HOST;
1480 
1481 	ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1482 	nrt->rt6i_nexthop = neigh_clone(neigh);
1483 	/* Reset pmtu, it may be better */
1484 	nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1485 	nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dev_net(neigh->dev),
1486 							dst_mtu(&nrt->u.dst));
1487 
1488 	if (ip6_ins_rt(nrt))
1489 		goto out;
1490 
1491 	netevent.old = &rt->u.dst;
1492 	netevent.new = &nrt->u.dst;
1493 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1494 
1495 	if (rt->rt6i_flags&RTF_CACHE) {
1496 		ip6_del_rt(rt);
1497 		return;
1498 	}
1499 
1500 out:
1501 	dst_release(&rt->u.dst);
1502 	return;
1503 }
1504 
1505 /*
1506  *	Handle ICMP "packet too big" messages
1507  *	i.e. Path MTU discovery
1508  */
1509 
1510 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1511 			struct net_device *dev, u32 pmtu)
1512 {
1513 	struct rt6_info *rt, *nrt;
1514 	struct net *net = dev_net(dev);
1515 	int allfrag = 0;
1516 
1517 	rt = rt6_lookup(net, daddr, saddr, dev->ifindex, 0);
1518 	if (rt == NULL)
1519 		return;
1520 
1521 	if (pmtu >= dst_mtu(&rt->u.dst))
1522 		goto out;
1523 
1524 	if (pmtu < IPV6_MIN_MTU) {
1525 		/*
1526 		 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1527 		 * MTU (1280) and a fragment header should always be included
1528 		 * after a node receiving Too Big message reporting PMTU is
1529 		 * less than the IPv6 Minimum Link MTU.
1530 		 */
1531 		pmtu = IPV6_MIN_MTU;
1532 		allfrag = 1;
1533 	}
1534 
1535 	/* New mtu received -> path was valid.
1536 	   They are sent only in response to data packets,
1537 	   so that this nexthop apparently is reachable. --ANK
1538 	 */
1539 	dst_confirm(&rt->u.dst);
1540 
1541 	/* Host route. If it is static, it would be better
1542 	   not to override it, but add new one, so that
1543 	   when cache entry will expire old pmtu
1544 	   would return automatically.
1545 	 */
1546 	if (rt->rt6i_flags & RTF_CACHE) {
1547 		rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1548 		if (allfrag)
1549 			rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1550 		dst_set_expires(&rt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1551 		rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1552 		goto out;
1553 	}
1554 
1555 	/* Network route.
1556 	   Two cases are possible:
1557 	   1. It is connected route. Action: COW
1558 	   2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1559 	 */
1560 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1561 		nrt = rt6_alloc_cow(rt, daddr, saddr);
1562 	else
1563 		nrt = rt6_alloc_clone(rt, daddr);
1564 
1565 	if (nrt) {
1566 		nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1567 		if (allfrag)
1568 			nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1569 
1570 		/* According to RFC 1981, detecting PMTU increase shouldn't be
1571 		 * happened within 5 mins, the recommended timer is 10 mins.
1572 		 * Here this route expiration time is set to ip6_rt_mtu_expires
1573 		 * which is 10 mins. After 10 mins the decreased pmtu is expired
1574 		 * and detecting PMTU increase will be automatically happened.
1575 		 */
1576 		dst_set_expires(&nrt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1577 		nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1578 
1579 		ip6_ins_rt(nrt);
1580 	}
1581 out:
1582 	dst_release(&rt->u.dst);
1583 }
1584 
1585 /*
1586  *	Misc support functions
1587  */
1588 
1589 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1590 {
1591 	struct net *net = dev_net(ort->rt6i_dev);
1592 	struct rt6_info *rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops);
1593 
1594 	if (rt) {
1595 		rt->u.dst.input = ort->u.dst.input;
1596 		rt->u.dst.output = ort->u.dst.output;
1597 
1598 		memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1599 		rt->u.dst.error = ort->u.dst.error;
1600 		rt->u.dst.dev = ort->u.dst.dev;
1601 		if (rt->u.dst.dev)
1602 			dev_hold(rt->u.dst.dev);
1603 		rt->rt6i_idev = ort->rt6i_idev;
1604 		if (rt->rt6i_idev)
1605 			in6_dev_hold(rt->rt6i_idev);
1606 		rt->u.dst.lastuse = jiffies;
1607 		rt->rt6i_expires = 0;
1608 
1609 		ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1610 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1611 		rt->rt6i_metric = 0;
1612 
1613 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1614 #ifdef CONFIG_IPV6_SUBTREES
1615 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1616 #endif
1617 		rt->rt6i_table = ort->rt6i_table;
1618 	}
1619 	return rt;
1620 }
1621 
1622 #ifdef CONFIG_IPV6_ROUTE_INFO
1623 static struct rt6_info *rt6_get_route_info(struct net *net,
1624 					   struct in6_addr *prefix, int prefixlen,
1625 					   struct in6_addr *gwaddr, int ifindex)
1626 {
1627 	struct fib6_node *fn;
1628 	struct rt6_info *rt = NULL;
1629 	struct fib6_table *table;
1630 
1631 	table = fib6_get_table(net, RT6_TABLE_INFO);
1632 	if (table == NULL)
1633 		return NULL;
1634 
1635 	write_lock_bh(&table->tb6_lock);
1636 	fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1637 	if (!fn)
1638 		goto out;
1639 
1640 	for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1641 		if (rt->rt6i_dev->ifindex != ifindex)
1642 			continue;
1643 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1644 			continue;
1645 		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1646 			continue;
1647 		dst_hold(&rt->u.dst);
1648 		break;
1649 	}
1650 out:
1651 	write_unlock_bh(&table->tb6_lock);
1652 	return rt;
1653 }
1654 
1655 static struct rt6_info *rt6_add_route_info(struct net *net,
1656 					   struct in6_addr *prefix, int prefixlen,
1657 					   struct in6_addr *gwaddr, int ifindex,
1658 					   unsigned pref)
1659 {
1660 	struct fib6_config cfg = {
1661 		.fc_table	= RT6_TABLE_INFO,
1662 		.fc_metric	= IP6_RT_PRIO_USER,
1663 		.fc_ifindex	= ifindex,
1664 		.fc_dst_len	= prefixlen,
1665 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1666 				  RTF_UP | RTF_PREF(pref),
1667 		.fc_nlinfo.pid = 0,
1668 		.fc_nlinfo.nlh = NULL,
1669 		.fc_nlinfo.nl_net = net,
1670 	};
1671 
1672 	ipv6_addr_copy(&cfg.fc_dst, prefix);
1673 	ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1674 
1675 	/* We should treat it as a default route if prefix length is 0. */
1676 	if (!prefixlen)
1677 		cfg.fc_flags |= RTF_DEFAULT;
1678 
1679 	ip6_route_add(&cfg);
1680 
1681 	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1682 }
1683 #endif
1684 
1685 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1686 {
1687 	struct rt6_info *rt;
1688 	struct fib6_table *table;
1689 
1690 	table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1691 	if (table == NULL)
1692 		return NULL;
1693 
1694 	write_lock_bh(&table->tb6_lock);
1695 	for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
1696 		if (dev == rt->rt6i_dev &&
1697 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1698 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
1699 			break;
1700 	}
1701 	if (rt)
1702 		dst_hold(&rt->u.dst);
1703 	write_unlock_bh(&table->tb6_lock);
1704 	return rt;
1705 }
1706 
1707 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1708 				     struct net_device *dev,
1709 				     unsigned int pref)
1710 {
1711 	struct fib6_config cfg = {
1712 		.fc_table	= RT6_TABLE_DFLT,
1713 		.fc_metric	= IP6_RT_PRIO_USER,
1714 		.fc_ifindex	= dev->ifindex,
1715 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1716 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1717 		.fc_nlinfo.pid = 0,
1718 		.fc_nlinfo.nlh = NULL,
1719 		.fc_nlinfo.nl_net = dev_net(dev),
1720 	};
1721 
1722 	ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1723 
1724 	ip6_route_add(&cfg);
1725 
1726 	return rt6_get_dflt_router(gwaddr, dev);
1727 }
1728 
1729 void rt6_purge_dflt_routers(struct net *net)
1730 {
1731 	struct rt6_info *rt;
1732 	struct fib6_table *table;
1733 
1734 	/* NOTE: Keep consistent with rt6_get_dflt_router */
1735 	table = fib6_get_table(net, RT6_TABLE_DFLT);
1736 	if (table == NULL)
1737 		return;
1738 
1739 restart:
1740 	read_lock_bh(&table->tb6_lock);
1741 	for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
1742 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1743 			dst_hold(&rt->u.dst);
1744 			read_unlock_bh(&table->tb6_lock);
1745 			ip6_del_rt(rt);
1746 			goto restart;
1747 		}
1748 	}
1749 	read_unlock_bh(&table->tb6_lock);
1750 }
1751 
1752 static void rtmsg_to_fib6_config(struct net *net,
1753 				 struct in6_rtmsg *rtmsg,
1754 				 struct fib6_config *cfg)
1755 {
1756 	memset(cfg, 0, sizeof(*cfg));
1757 
1758 	cfg->fc_table = RT6_TABLE_MAIN;
1759 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1760 	cfg->fc_metric = rtmsg->rtmsg_metric;
1761 	cfg->fc_expires = rtmsg->rtmsg_info;
1762 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1763 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
1764 	cfg->fc_flags = rtmsg->rtmsg_flags;
1765 
1766 	cfg->fc_nlinfo.nl_net = net;
1767 
1768 	ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1769 	ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1770 	ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1771 }
1772 
1773 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1774 {
1775 	struct fib6_config cfg;
1776 	struct in6_rtmsg rtmsg;
1777 	int err;
1778 
1779 	switch(cmd) {
1780 	case SIOCADDRT:		/* Add a route */
1781 	case SIOCDELRT:		/* Delete a route */
1782 		if (!capable(CAP_NET_ADMIN))
1783 			return -EPERM;
1784 		err = copy_from_user(&rtmsg, arg,
1785 				     sizeof(struct in6_rtmsg));
1786 		if (err)
1787 			return -EFAULT;
1788 
1789 		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1790 
1791 		rtnl_lock();
1792 		switch (cmd) {
1793 		case SIOCADDRT:
1794 			err = ip6_route_add(&cfg);
1795 			break;
1796 		case SIOCDELRT:
1797 			err = ip6_route_del(&cfg);
1798 			break;
1799 		default:
1800 			err = -EINVAL;
1801 		}
1802 		rtnl_unlock();
1803 
1804 		return err;
1805 	}
1806 
1807 	return -EINVAL;
1808 }
1809 
1810 /*
1811  *	Drop the packet on the floor
1812  */
1813 
1814 static int ip6_pkt_drop(struct sk_buff *skb, int code, int ipstats_mib_noroutes)
1815 {
1816 	int type;
1817 	switch (ipstats_mib_noroutes) {
1818 	case IPSTATS_MIB_INNOROUTES:
1819 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
1820 		if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) {
1821 			IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
1822 			break;
1823 		}
1824 		/* FALLTHROUGH */
1825 	case IPSTATS_MIB_OUTNOROUTES:
1826 		IP6_INC_STATS(ip6_dst_idev(skb->dst), ipstats_mib_noroutes);
1827 		break;
1828 	}
1829 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
1830 	kfree_skb(skb);
1831 	return 0;
1832 }
1833 
1834 static int ip6_pkt_discard(struct sk_buff *skb)
1835 {
1836 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
1837 }
1838 
1839 static int ip6_pkt_discard_out(struct sk_buff *skb)
1840 {
1841 	skb->dev = skb->dst->dev;
1842 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1843 }
1844 
1845 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
1846 
1847 static int ip6_pkt_prohibit(struct sk_buff *skb)
1848 {
1849 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
1850 }
1851 
1852 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1853 {
1854 	skb->dev = skb->dst->dev;
1855 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
1856 }
1857 
1858 #endif
1859 
1860 /*
1861  *	Allocate a dst for local (unicast / anycast) address.
1862  */
1863 
1864 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1865 				    const struct in6_addr *addr,
1866 				    int anycast)
1867 {
1868 	struct net *net = dev_net(idev->dev);
1869 	struct rt6_info *rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops);
1870 
1871 	if (rt == NULL)
1872 		return ERR_PTR(-ENOMEM);
1873 
1874 	dev_hold(net->loopback_dev);
1875 	in6_dev_hold(idev);
1876 
1877 	rt->u.dst.flags = DST_HOST;
1878 	rt->u.dst.input = ip6_input;
1879 	rt->u.dst.output = ip6_output;
1880 	rt->rt6i_dev = net->loopback_dev;
1881 	rt->rt6i_idev = idev;
1882 	rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1883 	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
1884 	rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1885 	rt->u.dst.obsolete = -1;
1886 
1887 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1888 	if (anycast)
1889 		rt->rt6i_flags |= RTF_ANYCAST;
1890 	else
1891 		rt->rt6i_flags |= RTF_LOCAL;
1892 	rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1893 	if (rt->rt6i_nexthop == NULL) {
1894 		dst_free(&rt->u.dst);
1895 		return ERR_PTR(-ENOMEM);
1896 	}
1897 
1898 	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1899 	rt->rt6i_dst.plen = 128;
1900 	rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1901 
1902 	atomic_set(&rt->u.dst.__refcnt, 1);
1903 
1904 	return rt;
1905 }
1906 
1907 struct arg_dev_net {
1908 	struct net_device *dev;
1909 	struct net *net;
1910 };
1911 
1912 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1913 {
1914 	struct net_device *dev = ((struct arg_dev_net *)arg)->dev;
1915 	struct net *net = ((struct arg_dev_net *)arg)->net;
1916 
1917 	if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
1918 	    rt != net->ipv6.ip6_null_entry) {
1919 		RT6_TRACE("deleted by ifdown %p\n", rt);
1920 		return -1;
1921 	}
1922 	return 0;
1923 }
1924 
1925 void rt6_ifdown(struct net *net, struct net_device *dev)
1926 {
1927 	struct arg_dev_net adn = {
1928 		.dev = dev,
1929 		.net = net,
1930 	};
1931 
1932 	fib6_clean_all(net, fib6_ifdown, 0, &adn);
1933 }
1934 
1935 struct rt6_mtu_change_arg
1936 {
1937 	struct net_device *dev;
1938 	unsigned mtu;
1939 };
1940 
1941 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1942 {
1943 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1944 	struct inet6_dev *idev;
1945 	struct net *net = dev_net(arg->dev);
1946 
1947 	/* In IPv6 pmtu discovery is not optional,
1948 	   so that RTAX_MTU lock cannot disable it.
1949 	   We still use this lock to block changes
1950 	   caused by addrconf/ndisc.
1951 	*/
1952 
1953 	idev = __in6_dev_get(arg->dev);
1954 	if (idev == NULL)
1955 		return 0;
1956 
1957 	/* For administrative MTU increase, there is no way to discover
1958 	   IPv6 PMTU increase, so PMTU increase should be updated here.
1959 	   Since RFC 1981 doesn't include administrative MTU increase
1960 	   update PMTU increase is a MUST. (i.e. jumbo frame)
1961 	 */
1962 	/*
1963 	   If new MTU is less than route PMTU, this new MTU will be the
1964 	   lowest MTU in the path, update the route PMTU to reflect PMTU
1965 	   decreases; if new MTU is greater than route PMTU, and the
1966 	   old MTU is the lowest MTU in the path, update the route PMTU
1967 	   to reflect the increase. In this case if the other nodes' MTU
1968 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
1969 	   PMTU discouvery.
1970 	 */
1971 	if (rt->rt6i_dev == arg->dev &&
1972 	    !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1973 	    (dst_mtu(&rt->u.dst) >= arg->mtu ||
1974 	     (dst_mtu(&rt->u.dst) < arg->mtu &&
1975 	      dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) {
1976 		rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1977 		rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu);
1978 	}
1979 	return 0;
1980 }
1981 
1982 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1983 {
1984 	struct rt6_mtu_change_arg arg = {
1985 		.dev = dev,
1986 		.mtu = mtu,
1987 	};
1988 
1989 	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
1990 }
1991 
1992 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
1993 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
1994 	[RTA_OIF]               = { .type = NLA_U32 },
1995 	[RTA_IIF]		= { .type = NLA_U32 },
1996 	[RTA_PRIORITY]          = { .type = NLA_U32 },
1997 	[RTA_METRICS]           = { .type = NLA_NESTED },
1998 };
1999 
2000 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2001 			      struct fib6_config *cfg)
2002 {
2003 	struct rtmsg *rtm;
2004 	struct nlattr *tb[RTA_MAX+1];
2005 	int err;
2006 
2007 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2008 	if (err < 0)
2009 		goto errout;
2010 
2011 	err = -EINVAL;
2012 	rtm = nlmsg_data(nlh);
2013 	memset(cfg, 0, sizeof(*cfg));
2014 
2015 	cfg->fc_table = rtm->rtm_table;
2016 	cfg->fc_dst_len = rtm->rtm_dst_len;
2017 	cfg->fc_src_len = rtm->rtm_src_len;
2018 	cfg->fc_flags = RTF_UP;
2019 	cfg->fc_protocol = rtm->rtm_protocol;
2020 
2021 	if (rtm->rtm_type == RTN_UNREACHABLE)
2022 		cfg->fc_flags |= RTF_REJECT;
2023 
2024 	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2025 	cfg->fc_nlinfo.nlh = nlh;
2026 	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2027 
2028 	if (tb[RTA_GATEWAY]) {
2029 		nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2030 		cfg->fc_flags |= RTF_GATEWAY;
2031 	}
2032 
2033 	if (tb[RTA_DST]) {
2034 		int plen = (rtm->rtm_dst_len + 7) >> 3;
2035 
2036 		if (nla_len(tb[RTA_DST]) < plen)
2037 			goto errout;
2038 
2039 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2040 	}
2041 
2042 	if (tb[RTA_SRC]) {
2043 		int plen = (rtm->rtm_src_len + 7) >> 3;
2044 
2045 		if (nla_len(tb[RTA_SRC]) < plen)
2046 			goto errout;
2047 
2048 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2049 	}
2050 
2051 	if (tb[RTA_OIF])
2052 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2053 
2054 	if (tb[RTA_PRIORITY])
2055 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2056 
2057 	if (tb[RTA_METRICS]) {
2058 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2059 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2060 	}
2061 
2062 	if (tb[RTA_TABLE])
2063 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2064 
2065 	err = 0;
2066 errout:
2067 	return err;
2068 }
2069 
2070 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2071 {
2072 	struct fib6_config cfg;
2073 	int err;
2074 
2075 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2076 	if (err < 0)
2077 		return err;
2078 
2079 	return ip6_route_del(&cfg);
2080 }
2081 
2082 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2083 {
2084 	struct fib6_config cfg;
2085 	int err;
2086 
2087 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2088 	if (err < 0)
2089 		return err;
2090 
2091 	return ip6_route_add(&cfg);
2092 }
2093 
2094 static inline size_t rt6_nlmsg_size(void)
2095 {
2096 	return NLMSG_ALIGN(sizeof(struct rtmsg))
2097 	       + nla_total_size(16) /* RTA_SRC */
2098 	       + nla_total_size(16) /* RTA_DST */
2099 	       + nla_total_size(16) /* RTA_GATEWAY */
2100 	       + nla_total_size(16) /* RTA_PREFSRC */
2101 	       + nla_total_size(4) /* RTA_TABLE */
2102 	       + nla_total_size(4) /* RTA_IIF */
2103 	       + nla_total_size(4) /* RTA_OIF */
2104 	       + nla_total_size(4) /* RTA_PRIORITY */
2105 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2106 	       + nla_total_size(sizeof(struct rta_cacheinfo));
2107 }
2108 
2109 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
2110 			 struct in6_addr *dst, struct in6_addr *src,
2111 			 int iif, int type, u32 pid, u32 seq,
2112 			 int prefix, int nowait, unsigned int flags)
2113 {
2114 	struct rtmsg *rtm;
2115 	struct nlmsghdr *nlh;
2116 	long expires;
2117 	u32 table;
2118 
2119 	if (prefix) {	/* user wants prefix routes only */
2120 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2121 			/* success since this is not a prefix route */
2122 			return 1;
2123 		}
2124 	}
2125 
2126 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2127 	if (nlh == NULL)
2128 		return -EMSGSIZE;
2129 
2130 	rtm = nlmsg_data(nlh);
2131 	rtm->rtm_family = AF_INET6;
2132 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
2133 	rtm->rtm_src_len = rt->rt6i_src.plen;
2134 	rtm->rtm_tos = 0;
2135 	if (rt->rt6i_table)
2136 		table = rt->rt6i_table->tb6_id;
2137 	else
2138 		table = RT6_TABLE_UNSPEC;
2139 	rtm->rtm_table = table;
2140 	NLA_PUT_U32(skb, RTA_TABLE, table);
2141 	if (rt->rt6i_flags&RTF_REJECT)
2142 		rtm->rtm_type = RTN_UNREACHABLE;
2143 	else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2144 		rtm->rtm_type = RTN_LOCAL;
2145 	else
2146 		rtm->rtm_type = RTN_UNICAST;
2147 	rtm->rtm_flags = 0;
2148 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2149 	rtm->rtm_protocol = rt->rt6i_protocol;
2150 	if (rt->rt6i_flags&RTF_DYNAMIC)
2151 		rtm->rtm_protocol = RTPROT_REDIRECT;
2152 	else if (rt->rt6i_flags & RTF_ADDRCONF)
2153 		rtm->rtm_protocol = RTPROT_KERNEL;
2154 	else if (rt->rt6i_flags&RTF_DEFAULT)
2155 		rtm->rtm_protocol = RTPROT_RA;
2156 
2157 	if (rt->rt6i_flags&RTF_CACHE)
2158 		rtm->rtm_flags |= RTM_F_CLONED;
2159 
2160 	if (dst) {
2161 		NLA_PUT(skb, RTA_DST, 16, dst);
2162 		rtm->rtm_dst_len = 128;
2163 	} else if (rtm->rtm_dst_len)
2164 		NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2165 #ifdef CONFIG_IPV6_SUBTREES
2166 	if (src) {
2167 		NLA_PUT(skb, RTA_SRC, 16, src);
2168 		rtm->rtm_src_len = 128;
2169 	} else if (rtm->rtm_src_len)
2170 		NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2171 #endif
2172 	if (iif) {
2173 #ifdef CONFIG_IPV6_MROUTE
2174 		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2175 			int err = ip6mr_get_route(skb, rtm, nowait);
2176 			if (err <= 0) {
2177 				if (!nowait) {
2178 					if (err == 0)
2179 						return 0;
2180 					goto nla_put_failure;
2181 				} else {
2182 					if (err == -EMSGSIZE)
2183 						goto nla_put_failure;
2184 				}
2185 			}
2186 		} else
2187 #endif
2188 			NLA_PUT_U32(skb, RTA_IIF, iif);
2189 	} else if (dst) {
2190 		struct in6_addr saddr_buf;
2191 		if (ipv6_dev_get_saddr(ip6_dst_idev(&rt->u.dst)->dev,
2192 				       dst, 0, &saddr_buf) == 0)
2193 			NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2194 	}
2195 
2196 	if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
2197 		goto nla_put_failure;
2198 
2199 	if (rt->u.dst.neighbour)
2200 		NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2201 
2202 	if (rt->u.dst.dev)
2203 		NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2204 
2205 	NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2206 
2207 	if (!(rt->rt6i_flags & RTF_EXPIRES))
2208 		expires = 0;
2209 	else if (rt->rt6i_expires - jiffies < INT_MAX)
2210 		expires = rt->rt6i_expires - jiffies;
2211 	else
2212 		expires = INT_MAX;
2213 
2214 	if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
2215 			       expires, rt->u.dst.error) < 0)
2216 		goto nla_put_failure;
2217 
2218 	return nlmsg_end(skb, nlh);
2219 
2220 nla_put_failure:
2221 	nlmsg_cancel(skb, nlh);
2222 	return -EMSGSIZE;
2223 }
2224 
2225 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2226 {
2227 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2228 	int prefix;
2229 
2230 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2231 		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2232 		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2233 	} else
2234 		prefix = 0;
2235 
2236 	return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2237 		     NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2238 		     prefix, 0, NLM_F_MULTI);
2239 }
2240 
2241 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2242 {
2243 	struct net *net = sock_net(in_skb->sk);
2244 	struct nlattr *tb[RTA_MAX+1];
2245 	struct rt6_info *rt;
2246 	struct sk_buff *skb;
2247 	struct rtmsg *rtm;
2248 	struct flowi fl;
2249 	int err, iif = 0;
2250 
2251 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2252 	if (err < 0)
2253 		goto errout;
2254 
2255 	err = -EINVAL;
2256 	memset(&fl, 0, sizeof(fl));
2257 
2258 	if (tb[RTA_SRC]) {
2259 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2260 			goto errout;
2261 
2262 		ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2263 	}
2264 
2265 	if (tb[RTA_DST]) {
2266 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2267 			goto errout;
2268 
2269 		ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2270 	}
2271 
2272 	if (tb[RTA_IIF])
2273 		iif = nla_get_u32(tb[RTA_IIF]);
2274 
2275 	if (tb[RTA_OIF])
2276 		fl.oif = nla_get_u32(tb[RTA_OIF]);
2277 
2278 	if (iif) {
2279 		struct net_device *dev;
2280 		dev = __dev_get_by_index(net, iif);
2281 		if (!dev) {
2282 			err = -ENODEV;
2283 			goto errout;
2284 		}
2285 	}
2286 
2287 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2288 	if (skb == NULL) {
2289 		err = -ENOBUFS;
2290 		goto errout;
2291 	}
2292 
2293 	/* Reserve room for dummy headers, this skb can pass
2294 	   through good chunk of routing engine.
2295 	 */
2296 	skb_reset_mac_header(skb);
2297 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2298 
2299 	rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl);
2300 	skb->dst = &rt->u.dst;
2301 
2302 	err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
2303 			    RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2304 			    nlh->nlmsg_seq, 0, 0, 0);
2305 	if (err < 0) {
2306 		kfree_skb(skb);
2307 		goto errout;
2308 	}
2309 
2310 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2311 errout:
2312 	return err;
2313 }
2314 
2315 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2316 {
2317 	struct sk_buff *skb;
2318 	struct net *net = info->nl_net;
2319 	u32 seq;
2320 	int err;
2321 
2322 	err = -ENOBUFS;
2323 	seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
2324 
2325 	skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2326 	if (skb == NULL)
2327 		goto errout;
2328 
2329 	err = rt6_fill_node(skb, rt, NULL, NULL, 0,
2330 				event, info->pid, seq, 0, 0, 0);
2331 	if (err < 0) {
2332 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2333 		WARN_ON(err == -EMSGSIZE);
2334 		kfree_skb(skb);
2335 		goto errout;
2336 	}
2337 	err = rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2338 			  info->nlh, gfp_any());
2339 errout:
2340 	if (err < 0)
2341 		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2342 }
2343 
2344 static int ip6_route_dev_notify(struct notifier_block *this,
2345 				unsigned long event, void *data)
2346 {
2347 	struct net_device *dev = (struct net_device *)data;
2348 	struct net *net = dev_net(dev);
2349 
2350 	if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2351 		net->ipv6.ip6_null_entry->u.dst.dev = dev;
2352 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2353 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2354 		net->ipv6.ip6_prohibit_entry->u.dst.dev = dev;
2355 		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2356 		net->ipv6.ip6_blk_hole_entry->u.dst.dev = dev;
2357 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2358 #endif
2359 	}
2360 
2361 	return NOTIFY_OK;
2362 }
2363 
2364 /*
2365  *	/proc
2366  */
2367 
2368 #ifdef CONFIG_PROC_FS
2369 
2370 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2371 
2372 struct rt6_proc_arg
2373 {
2374 	char *buffer;
2375 	int offset;
2376 	int length;
2377 	int skip;
2378 	int len;
2379 };
2380 
2381 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2382 {
2383 	struct seq_file *m = p_arg;
2384 
2385 	seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_dst.addr),
2386 		   rt->rt6i_dst.plen);
2387 
2388 #ifdef CONFIG_IPV6_SUBTREES
2389 	seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_src.addr),
2390 		   rt->rt6i_src.plen);
2391 #else
2392 	seq_puts(m, "00000000000000000000000000000000 00 ");
2393 #endif
2394 
2395 	if (rt->rt6i_nexthop) {
2396 		seq_printf(m, NIP6_SEQFMT,
2397 			   NIP6(*((struct in6_addr *)rt->rt6i_nexthop->primary_key)));
2398 	} else {
2399 		seq_puts(m, "00000000000000000000000000000000");
2400 	}
2401 	seq_printf(m, " %08x %08x %08x %08x %8s\n",
2402 		   rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2403 		   rt->u.dst.__use, rt->rt6i_flags,
2404 		   rt->rt6i_dev ? rt->rt6i_dev->name : "");
2405 	return 0;
2406 }
2407 
2408 static int ipv6_route_show(struct seq_file *m, void *v)
2409 {
2410 	struct net *net = (struct net *)m->private;
2411 	fib6_clean_all(net, rt6_info_route, 0, m);
2412 	return 0;
2413 }
2414 
2415 static int ipv6_route_open(struct inode *inode, struct file *file)
2416 {
2417 	return single_open_net(inode, file, ipv6_route_show);
2418 }
2419 
2420 static const struct file_operations ipv6_route_proc_fops = {
2421 	.owner		= THIS_MODULE,
2422 	.open		= ipv6_route_open,
2423 	.read		= seq_read,
2424 	.llseek		= seq_lseek,
2425 	.release	= single_release_net,
2426 };
2427 
2428 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2429 {
2430 	struct net *net = (struct net *)seq->private;
2431 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2432 		   net->ipv6.rt6_stats->fib_nodes,
2433 		   net->ipv6.rt6_stats->fib_route_nodes,
2434 		   net->ipv6.rt6_stats->fib_rt_alloc,
2435 		   net->ipv6.rt6_stats->fib_rt_entries,
2436 		   net->ipv6.rt6_stats->fib_rt_cache,
2437 		   atomic_read(&net->ipv6.ip6_dst_ops->entries),
2438 		   net->ipv6.rt6_stats->fib_discarded_routes);
2439 
2440 	return 0;
2441 }
2442 
2443 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2444 {
2445 	return single_open_net(inode, file, rt6_stats_seq_show);
2446 }
2447 
2448 static const struct file_operations rt6_stats_seq_fops = {
2449 	.owner	 = THIS_MODULE,
2450 	.open	 = rt6_stats_seq_open,
2451 	.read	 = seq_read,
2452 	.llseek	 = seq_lseek,
2453 	.release = single_release_net,
2454 };
2455 #endif	/* CONFIG_PROC_FS */
2456 
2457 #ifdef CONFIG_SYSCTL
2458 
2459 static
2460 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2461 			      void __user *buffer, size_t *lenp, loff_t *ppos)
2462 {
2463 	struct net *net = current->nsproxy->net_ns;
2464 	int delay = net->ipv6.sysctl.flush_delay;
2465 	if (write) {
2466 		proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2467 		fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2468 		return 0;
2469 	} else
2470 		return -EINVAL;
2471 }
2472 
2473 ctl_table ipv6_route_table_template[] = {
2474 	{
2475 		.procname	=	"flush",
2476 		.data		=	&init_net.ipv6.sysctl.flush_delay,
2477 		.maxlen		=	sizeof(int),
2478 		.mode		=	0200,
2479 		.proc_handler	=	&ipv6_sysctl_rtcache_flush
2480 	},
2481 	{
2482 		.ctl_name	=	NET_IPV6_ROUTE_GC_THRESH,
2483 		.procname	=	"gc_thresh",
2484 		.data		=	&ip6_dst_ops_template.gc_thresh,
2485 		.maxlen		=	sizeof(int),
2486 		.mode		=	0644,
2487 		.proc_handler	=	&proc_dointvec,
2488 	},
2489 	{
2490 		.ctl_name	=	NET_IPV6_ROUTE_MAX_SIZE,
2491 		.procname	=	"max_size",
2492 		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
2493 		.maxlen		=	sizeof(int),
2494 		.mode		=	0644,
2495 		.proc_handler	=	&proc_dointvec,
2496 	},
2497 	{
2498 		.ctl_name	=	NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2499 		.procname	=	"gc_min_interval",
2500 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2501 		.maxlen		=	sizeof(int),
2502 		.mode		=	0644,
2503 		.proc_handler	=	&proc_dointvec_jiffies,
2504 		.strategy	=	&sysctl_jiffies,
2505 	},
2506 	{
2507 		.ctl_name	=	NET_IPV6_ROUTE_GC_TIMEOUT,
2508 		.procname	=	"gc_timeout",
2509 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2510 		.maxlen		=	sizeof(int),
2511 		.mode		=	0644,
2512 		.proc_handler	=	&proc_dointvec_jiffies,
2513 		.strategy	=	&sysctl_jiffies,
2514 	},
2515 	{
2516 		.ctl_name	=	NET_IPV6_ROUTE_GC_INTERVAL,
2517 		.procname	=	"gc_interval",
2518 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
2519 		.maxlen		=	sizeof(int),
2520 		.mode		=	0644,
2521 		.proc_handler	=	&proc_dointvec_jiffies,
2522 		.strategy	=	&sysctl_jiffies,
2523 	},
2524 	{
2525 		.ctl_name	=	NET_IPV6_ROUTE_GC_ELASTICITY,
2526 		.procname	=	"gc_elasticity",
2527 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2528 		.maxlen		=	sizeof(int),
2529 		.mode		=	0644,
2530 		.proc_handler	=	&proc_dointvec_jiffies,
2531 		.strategy	=	&sysctl_jiffies,
2532 	},
2533 	{
2534 		.ctl_name	=	NET_IPV6_ROUTE_MTU_EXPIRES,
2535 		.procname	=	"mtu_expires",
2536 		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2537 		.maxlen		=	sizeof(int),
2538 		.mode		=	0644,
2539 		.proc_handler	=	&proc_dointvec_jiffies,
2540 		.strategy	=	&sysctl_jiffies,
2541 	},
2542 	{
2543 		.ctl_name	=	NET_IPV6_ROUTE_MIN_ADVMSS,
2544 		.procname	=	"min_adv_mss",
2545 		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
2546 		.maxlen		=	sizeof(int),
2547 		.mode		=	0644,
2548 		.proc_handler	=	&proc_dointvec_jiffies,
2549 		.strategy	=	&sysctl_jiffies,
2550 	},
2551 	{
2552 		.ctl_name	=	NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2553 		.procname	=	"gc_min_interval_ms",
2554 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2555 		.maxlen		=	sizeof(int),
2556 		.mode		=	0644,
2557 		.proc_handler	=	&proc_dointvec_ms_jiffies,
2558 		.strategy	=	&sysctl_ms_jiffies,
2559 	},
2560 	{ .ctl_name = 0 }
2561 };
2562 
2563 struct ctl_table *ipv6_route_sysctl_init(struct net *net)
2564 {
2565 	struct ctl_table *table;
2566 
2567 	table = kmemdup(ipv6_route_table_template,
2568 			sizeof(ipv6_route_table_template),
2569 			GFP_KERNEL);
2570 
2571 	if (table) {
2572 		table[0].data = &net->ipv6.sysctl.flush_delay;
2573 		table[1].data = &net->ipv6.ip6_dst_ops->gc_thresh;
2574 		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2575 		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2576 		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2577 		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2578 		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2579 		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2580 		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2581 	}
2582 
2583 	return table;
2584 }
2585 #endif
2586 
2587 static int ip6_route_net_init(struct net *net)
2588 {
2589 	int ret = -ENOMEM;
2590 
2591 	net->ipv6.ip6_dst_ops = kmemdup(&ip6_dst_ops_template,
2592 					sizeof(*net->ipv6.ip6_dst_ops),
2593 					GFP_KERNEL);
2594 	if (!net->ipv6.ip6_dst_ops)
2595 		goto out;
2596 	net->ipv6.ip6_dst_ops->dst_net = hold_net(net);
2597 
2598 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2599 					   sizeof(*net->ipv6.ip6_null_entry),
2600 					   GFP_KERNEL);
2601 	if (!net->ipv6.ip6_null_entry)
2602 		goto out_ip6_dst_ops;
2603 	net->ipv6.ip6_null_entry->u.dst.path =
2604 		(struct dst_entry *)net->ipv6.ip6_null_entry;
2605 	net->ipv6.ip6_null_entry->u.dst.ops = net->ipv6.ip6_dst_ops;
2606 
2607 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2608 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2609 					       sizeof(*net->ipv6.ip6_prohibit_entry),
2610 					       GFP_KERNEL);
2611 	if (!net->ipv6.ip6_prohibit_entry) {
2612 		kfree(net->ipv6.ip6_null_entry);
2613 		goto out;
2614 	}
2615 	net->ipv6.ip6_prohibit_entry->u.dst.path =
2616 		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2617 	net->ipv6.ip6_prohibit_entry->u.dst.ops = net->ipv6.ip6_dst_ops;
2618 
2619 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2620 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
2621 					       GFP_KERNEL);
2622 	if (!net->ipv6.ip6_blk_hole_entry) {
2623 		kfree(net->ipv6.ip6_null_entry);
2624 		kfree(net->ipv6.ip6_prohibit_entry);
2625 		goto out;
2626 	}
2627 	net->ipv6.ip6_blk_hole_entry->u.dst.path =
2628 		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2629 	net->ipv6.ip6_blk_hole_entry->u.dst.ops = net->ipv6.ip6_dst_ops;
2630 #endif
2631 
2632 #ifdef CONFIG_PROC_FS
2633 	proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2634 	proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2635 #endif
2636 	net->ipv6.ip6_rt_gc_expire = 30*HZ;
2637 
2638 	ret = 0;
2639 out:
2640 	return ret;
2641 
2642 out_ip6_dst_ops:
2643 	release_net(net->ipv6.ip6_dst_ops->dst_net);
2644 	kfree(net->ipv6.ip6_dst_ops);
2645 	goto out;
2646 }
2647 
2648 static void ip6_route_net_exit(struct net *net)
2649 {
2650 #ifdef CONFIG_PROC_FS
2651 	proc_net_remove(net, "ipv6_route");
2652 	proc_net_remove(net, "rt6_stats");
2653 #endif
2654 	kfree(net->ipv6.ip6_null_entry);
2655 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2656 	kfree(net->ipv6.ip6_prohibit_entry);
2657 	kfree(net->ipv6.ip6_blk_hole_entry);
2658 #endif
2659 	release_net(net->ipv6.ip6_dst_ops->dst_net);
2660 	kfree(net->ipv6.ip6_dst_ops);
2661 }
2662 
2663 static struct pernet_operations ip6_route_net_ops = {
2664 	.init = ip6_route_net_init,
2665 	.exit = ip6_route_net_exit,
2666 };
2667 
2668 static struct notifier_block ip6_route_dev_notifier = {
2669 	.notifier_call = ip6_route_dev_notify,
2670 	.priority = 0,
2671 };
2672 
2673 int __init ip6_route_init(void)
2674 {
2675 	int ret;
2676 
2677 	ret = -ENOMEM;
2678 	ip6_dst_ops_template.kmem_cachep =
2679 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2680 				  SLAB_HWCACHE_ALIGN, NULL);
2681 	if (!ip6_dst_ops_template.kmem_cachep)
2682 		goto out;;
2683 
2684 	ret = register_pernet_subsys(&ip6_route_net_ops);
2685 	if (ret)
2686 		goto out_kmem_cache;
2687 
2688 	/* Registering of the loopback is done before this portion of code,
2689 	 * the loopback reference in rt6_info will not be taken, do it
2690 	 * manually for init_net */
2691 	init_net.ipv6.ip6_null_entry->u.dst.dev = init_net.loopback_dev;
2692 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2693   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2694 	init_net.ipv6.ip6_prohibit_entry->u.dst.dev = init_net.loopback_dev;
2695 	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2696 	init_net.ipv6.ip6_blk_hole_entry->u.dst.dev = init_net.loopback_dev;
2697 	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2698   #endif
2699 	ret = fib6_init();
2700 	if (ret)
2701 		goto out_register_subsys;
2702 
2703 	ret = xfrm6_init();
2704 	if (ret)
2705 		goto out_fib6_init;
2706 
2707 	ret = fib6_rules_init();
2708 	if (ret)
2709 		goto xfrm6_init;
2710 
2711 	ret = -ENOBUFS;
2712 	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2713 	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2714 	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2715 		goto fib6_rules_init;
2716 
2717 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
2718 	if (ret)
2719 		goto fib6_rules_init;
2720 
2721 out:
2722 	return ret;
2723 
2724 fib6_rules_init:
2725 	fib6_rules_cleanup();
2726 xfrm6_init:
2727 	xfrm6_fini();
2728 out_fib6_init:
2729 	fib6_gc_cleanup();
2730 out_register_subsys:
2731 	unregister_pernet_subsys(&ip6_route_net_ops);
2732 out_kmem_cache:
2733 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
2734 	goto out;
2735 }
2736 
2737 void ip6_route_cleanup(void)
2738 {
2739 	unregister_netdevice_notifier(&ip6_route_dev_notifier);
2740 	fib6_rules_cleanup();
2741 	xfrm6_fini();
2742 	fib6_gc_cleanup();
2743 	unregister_pernet_subsys(&ip6_route_net_ops);
2744 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
2745 }
2746