xref: /openbmc/linux/net/ipv6/route.c (revision 9c1f8594)
1 /*
2  *	Linux INET6 implementation
3  *	FIB front-end.
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13 
14 /*	Changes:
15  *
16  *	YOSHIFUJI Hideaki @USAGI
17  *		reworked default router selection.
18  *		- respect outgoing interface
19  *		- select from (probably) reachable routers (i.e.
20  *		routers in REACHABLE, STALE, DELAY or PROBE states).
21  *		- always select the same router if it is (probably)
22  *		reachable.  otherwise, round-robin the list.
23  *	Ville Nuorvala
24  *		Fixed routing subtrees.
25  */
26 
27 #include <linux/capability.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/times.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/route.h>
35 #include <linux/netdevice.h>
36 #include <linux/in6.h>
37 #include <linux/mroute6.h>
38 #include <linux/init.h>
39 #include <linux/if_arp.h>
40 #include <linux/proc_fs.h>
41 #include <linux/seq_file.h>
42 #include <linux/nsproxy.h>
43 #include <linux/slab.h>
44 #include <net/net_namespace.h>
45 #include <net/snmp.h>
46 #include <net/ipv6.h>
47 #include <net/ip6_fib.h>
48 #include <net/ip6_route.h>
49 #include <net/ndisc.h>
50 #include <net/addrconf.h>
51 #include <net/tcp.h>
52 #include <linux/rtnetlink.h>
53 #include <net/dst.h>
54 #include <net/xfrm.h>
55 #include <net/netevent.h>
56 #include <net/netlink.h>
57 
58 #include <asm/uaccess.h>
59 
60 #ifdef CONFIG_SYSCTL
61 #include <linux/sysctl.h>
62 #endif
63 
64 /* Set to 3 to get tracing. */
65 #define RT6_DEBUG 2
66 
67 #if RT6_DEBUG >= 3
68 #define RDBG(x) printk x
69 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
70 #else
71 #define RDBG(x)
72 #define RT6_TRACE(x...) do { ; } while (0)
73 #endif
74 
75 static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
76 				    const struct in6_addr *dest);
77 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
78 static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
79 static unsigned int	 ip6_default_mtu(const struct dst_entry *dst);
80 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
81 static void		ip6_dst_destroy(struct dst_entry *);
82 static void		ip6_dst_ifdown(struct dst_entry *,
83 				       struct net_device *dev, int how);
84 static int		 ip6_dst_gc(struct dst_ops *ops);
85 
86 static int		ip6_pkt_discard(struct sk_buff *skb);
87 static int		ip6_pkt_discard_out(struct sk_buff *skb);
88 static void		ip6_link_failure(struct sk_buff *skb);
89 static void		ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
90 
91 #ifdef CONFIG_IPV6_ROUTE_INFO
92 static struct rt6_info *rt6_add_route_info(struct net *net,
93 					   const struct in6_addr *prefix, int prefixlen,
94 					   const struct in6_addr *gwaddr, int ifindex,
95 					   unsigned pref);
96 static struct rt6_info *rt6_get_route_info(struct net *net,
97 					   const struct in6_addr *prefix, int prefixlen,
98 					   const struct in6_addr *gwaddr, int ifindex);
99 #endif
100 
101 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
102 {
103 	struct rt6_info *rt = (struct rt6_info *) dst;
104 	struct inet_peer *peer;
105 	u32 *p = NULL;
106 
107 	if (!(rt->dst.flags & DST_HOST))
108 		return NULL;
109 
110 	if (!rt->rt6i_peer)
111 		rt6_bind_peer(rt, 1);
112 
113 	peer = rt->rt6i_peer;
114 	if (peer) {
115 		u32 *old_p = __DST_METRICS_PTR(old);
116 		unsigned long prev, new;
117 
118 		p = peer->metrics;
119 		if (inet_metrics_new(peer))
120 			memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
121 
122 		new = (unsigned long) p;
123 		prev = cmpxchg(&dst->_metrics, old, new);
124 
125 		if (prev != old) {
126 			p = __DST_METRICS_PTR(prev);
127 			if (prev & DST_METRICS_READ_ONLY)
128 				p = NULL;
129 		}
130 	}
131 	return p;
132 }
133 
134 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
135 {
136 	return __neigh_lookup_errno(&nd_tbl, daddr, dst->dev);
137 }
138 
139 static struct dst_ops ip6_dst_ops_template = {
140 	.family			=	AF_INET6,
141 	.protocol		=	cpu_to_be16(ETH_P_IPV6),
142 	.gc			=	ip6_dst_gc,
143 	.gc_thresh		=	1024,
144 	.check			=	ip6_dst_check,
145 	.default_advmss		=	ip6_default_advmss,
146 	.default_mtu		=	ip6_default_mtu,
147 	.cow_metrics		=	ipv6_cow_metrics,
148 	.destroy		=	ip6_dst_destroy,
149 	.ifdown			=	ip6_dst_ifdown,
150 	.negative_advice	=	ip6_negative_advice,
151 	.link_failure		=	ip6_link_failure,
152 	.update_pmtu		=	ip6_rt_update_pmtu,
153 	.local_out		=	__ip6_local_out,
154 	.neigh_lookup		=	ip6_neigh_lookup,
155 };
156 
157 static unsigned int ip6_blackhole_default_mtu(const struct dst_entry *dst)
158 {
159 	return 0;
160 }
161 
162 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
163 {
164 }
165 
166 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
167 					 unsigned long old)
168 {
169 	return NULL;
170 }
171 
172 static struct dst_ops ip6_dst_blackhole_ops = {
173 	.family			=	AF_INET6,
174 	.protocol		=	cpu_to_be16(ETH_P_IPV6),
175 	.destroy		=	ip6_dst_destroy,
176 	.check			=	ip6_dst_check,
177 	.default_mtu		=	ip6_blackhole_default_mtu,
178 	.default_advmss		=	ip6_default_advmss,
179 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
180 	.cow_metrics		=	ip6_rt_blackhole_cow_metrics,
181 	.neigh_lookup		=	ip6_neigh_lookup,
182 };
183 
184 static const u32 ip6_template_metrics[RTAX_MAX] = {
185 	[RTAX_HOPLIMIT - 1] = 255,
186 };
187 
188 static struct rt6_info ip6_null_entry_template = {
189 	.dst = {
190 		.__refcnt	= ATOMIC_INIT(1),
191 		.__use		= 1,
192 		.obsolete	= -1,
193 		.error		= -ENETUNREACH,
194 		.input		= ip6_pkt_discard,
195 		.output		= ip6_pkt_discard_out,
196 	},
197 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
198 	.rt6i_protocol  = RTPROT_KERNEL,
199 	.rt6i_metric	= ~(u32) 0,
200 	.rt6i_ref	= ATOMIC_INIT(1),
201 };
202 
203 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
204 
205 static int ip6_pkt_prohibit(struct sk_buff *skb);
206 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
207 
208 static struct rt6_info ip6_prohibit_entry_template = {
209 	.dst = {
210 		.__refcnt	= ATOMIC_INIT(1),
211 		.__use		= 1,
212 		.obsolete	= -1,
213 		.error		= -EACCES,
214 		.input		= ip6_pkt_prohibit,
215 		.output		= ip6_pkt_prohibit_out,
216 	},
217 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
218 	.rt6i_protocol  = RTPROT_KERNEL,
219 	.rt6i_metric	= ~(u32) 0,
220 	.rt6i_ref	= ATOMIC_INIT(1),
221 };
222 
223 static struct rt6_info ip6_blk_hole_entry_template = {
224 	.dst = {
225 		.__refcnt	= ATOMIC_INIT(1),
226 		.__use		= 1,
227 		.obsolete	= -1,
228 		.error		= -EINVAL,
229 		.input		= dst_discard,
230 		.output		= dst_discard,
231 	},
232 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
233 	.rt6i_protocol  = RTPROT_KERNEL,
234 	.rt6i_metric	= ~(u32) 0,
235 	.rt6i_ref	= ATOMIC_INIT(1),
236 };
237 
238 #endif
239 
240 /* allocate dst with ip6_dst_ops */
241 static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
242 					     struct net_device *dev,
243 					     int flags)
244 {
245 	struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
246 
247 	memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
248 
249 	return rt;
250 }
251 
252 static void ip6_dst_destroy(struct dst_entry *dst)
253 {
254 	struct rt6_info *rt = (struct rt6_info *)dst;
255 	struct inet6_dev *idev = rt->rt6i_idev;
256 	struct inet_peer *peer = rt->rt6i_peer;
257 
258 	if (!(rt->dst.flags & DST_HOST))
259 		dst_destroy_metrics_generic(dst);
260 
261 	if (idev != NULL) {
262 		rt->rt6i_idev = NULL;
263 		in6_dev_put(idev);
264 	}
265 	if (peer) {
266 		rt->rt6i_peer = NULL;
267 		inet_putpeer(peer);
268 	}
269 }
270 
271 static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
272 
273 static u32 rt6_peer_genid(void)
274 {
275 	return atomic_read(&__rt6_peer_genid);
276 }
277 
278 void rt6_bind_peer(struct rt6_info *rt, int create)
279 {
280 	struct inet_peer *peer;
281 
282 	peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
283 	if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
284 		inet_putpeer(peer);
285 	else
286 		rt->rt6i_peer_genid = rt6_peer_genid();
287 }
288 
289 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
290 			   int how)
291 {
292 	struct rt6_info *rt = (struct rt6_info *)dst;
293 	struct inet6_dev *idev = rt->rt6i_idev;
294 	struct net_device *loopback_dev =
295 		dev_net(dev)->loopback_dev;
296 
297 	if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
298 		struct inet6_dev *loopback_idev =
299 			in6_dev_get(loopback_dev);
300 		if (loopback_idev != NULL) {
301 			rt->rt6i_idev = loopback_idev;
302 			in6_dev_put(idev);
303 		}
304 	}
305 }
306 
307 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
308 {
309 	return (rt->rt6i_flags & RTF_EXPIRES) &&
310 		time_after(jiffies, rt->rt6i_expires);
311 }
312 
313 static inline int rt6_need_strict(const struct in6_addr *daddr)
314 {
315 	return ipv6_addr_type(daddr) &
316 		(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
317 }
318 
319 /*
320  *	Route lookup. Any table->tb6_lock is implied.
321  */
322 
323 static inline struct rt6_info *rt6_device_match(struct net *net,
324 						    struct rt6_info *rt,
325 						    const struct in6_addr *saddr,
326 						    int oif,
327 						    int flags)
328 {
329 	struct rt6_info *local = NULL;
330 	struct rt6_info *sprt;
331 
332 	if (!oif && ipv6_addr_any(saddr))
333 		goto out;
334 
335 	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
336 		struct net_device *dev = sprt->rt6i_dev;
337 
338 		if (oif) {
339 			if (dev->ifindex == oif)
340 				return sprt;
341 			if (dev->flags & IFF_LOOPBACK) {
342 				if (sprt->rt6i_idev == NULL ||
343 				    sprt->rt6i_idev->dev->ifindex != oif) {
344 					if (flags & RT6_LOOKUP_F_IFACE && oif)
345 						continue;
346 					if (local && (!oif ||
347 						      local->rt6i_idev->dev->ifindex == oif))
348 						continue;
349 				}
350 				local = sprt;
351 			}
352 		} else {
353 			if (ipv6_chk_addr(net, saddr, dev,
354 					  flags & RT6_LOOKUP_F_IFACE))
355 				return sprt;
356 		}
357 	}
358 
359 	if (oif) {
360 		if (local)
361 			return local;
362 
363 		if (flags & RT6_LOOKUP_F_IFACE)
364 			return net->ipv6.ip6_null_entry;
365 	}
366 out:
367 	return rt;
368 }
369 
370 #ifdef CONFIG_IPV6_ROUTER_PREF
371 static void rt6_probe(struct rt6_info *rt)
372 {
373 	struct neighbour *neigh;
374 	/*
375 	 * Okay, this does not seem to be appropriate
376 	 * for now, however, we need to check if it
377 	 * is really so; aka Router Reachability Probing.
378 	 *
379 	 * Router Reachability Probe MUST be rate-limited
380 	 * to no more than one per minute.
381 	 */
382 	rcu_read_lock();
383 	neigh = rt ? dst_get_neighbour(&rt->dst) : NULL;
384 	if (!neigh || (neigh->nud_state & NUD_VALID))
385 		goto out;
386 	read_lock_bh(&neigh->lock);
387 	if (!(neigh->nud_state & NUD_VALID) &&
388 	    time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
389 		struct in6_addr mcaddr;
390 		struct in6_addr *target;
391 
392 		neigh->updated = jiffies;
393 		read_unlock_bh(&neigh->lock);
394 
395 		target = (struct in6_addr *)&neigh->primary_key;
396 		addrconf_addr_solict_mult(target, &mcaddr);
397 		ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
398 	} else {
399 		read_unlock_bh(&neigh->lock);
400 	}
401 out:
402 	rcu_read_unlock();
403 }
404 #else
405 static inline void rt6_probe(struct rt6_info *rt)
406 {
407 }
408 #endif
409 
410 /*
411  * Default Router Selection (RFC 2461 6.3.6)
412  */
413 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
414 {
415 	struct net_device *dev = rt->rt6i_dev;
416 	if (!oif || dev->ifindex == oif)
417 		return 2;
418 	if ((dev->flags & IFF_LOOPBACK) &&
419 	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
420 		return 1;
421 	return 0;
422 }
423 
424 static inline int rt6_check_neigh(struct rt6_info *rt)
425 {
426 	struct neighbour *neigh;
427 	int m;
428 
429 	rcu_read_lock();
430 	neigh = dst_get_neighbour(&rt->dst);
431 	if (rt->rt6i_flags & RTF_NONEXTHOP ||
432 	    !(rt->rt6i_flags & RTF_GATEWAY))
433 		m = 1;
434 	else if (neigh) {
435 		read_lock_bh(&neigh->lock);
436 		if (neigh->nud_state & NUD_VALID)
437 			m = 2;
438 #ifdef CONFIG_IPV6_ROUTER_PREF
439 		else if (neigh->nud_state & NUD_FAILED)
440 			m = 0;
441 #endif
442 		else
443 			m = 1;
444 		read_unlock_bh(&neigh->lock);
445 	} else
446 		m = 0;
447 	rcu_read_unlock();
448 	return m;
449 }
450 
451 static int rt6_score_route(struct rt6_info *rt, int oif,
452 			   int strict)
453 {
454 	int m, n;
455 
456 	m = rt6_check_dev(rt, oif);
457 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
458 		return -1;
459 #ifdef CONFIG_IPV6_ROUTER_PREF
460 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
461 #endif
462 	n = rt6_check_neigh(rt);
463 	if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
464 		return -1;
465 	return m;
466 }
467 
468 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
469 				   int *mpri, struct rt6_info *match)
470 {
471 	int m;
472 
473 	if (rt6_check_expired(rt))
474 		goto out;
475 
476 	m = rt6_score_route(rt, oif, strict);
477 	if (m < 0)
478 		goto out;
479 
480 	if (m > *mpri) {
481 		if (strict & RT6_LOOKUP_F_REACHABLE)
482 			rt6_probe(match);
483 		*mpri = m;
484 		match = rt;
485 	} else if (strict & RT6_LOOKUP_F_REACHABLE) {
486 		rt6_probe(rt);
487 	}
488 
489 out:
490 	return match;
491 }
492 
493 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
494 				     struct rt6_info *rr_head,
495 				     u32 metric, int oif, int strict)
496 {
497 	struct rt6_info *rt, *match;
498 	int mpri = -1;
499 
500 	match = NULL;
501 	for (rt = rr_head; rt && rt->rt6i_metric == metric;
502 	     rt = rt->dst.rt6_next)
503 		match = find_match(rt, oif, strict, &mpri, match);
504 	for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
505 	     rt = rt->dst.rt6_next)
506 		match = find_match(rt, oif, strict, &mpri, match);
507 
508 	return match;
509 }
510 
511 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
512 {
513 	struct rt6_info *match, *rt0;
514 	struct net *net;
515 
516 	RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
517 		  __func__, fn->leaf, oif);
518 
519 	rt0 = fn->rr_ptr;
520 	if (!rt0)
521 		fn->rr_ptr = rt0 = fn->leaf;
522 
523 	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
524 
525 	if (!match &&
526 	    (strict & RT6_LOOKUP_F_REACHABLE)) {
527 		struct rt6_info *next = rt0->dst.rt6_next;
528 
529 		/* no entries matched; do round-robin */
530 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
531 			next = fn->leaf;
532 
533 		if (next != rt0)
534 			fn->rr_ptr = next;
535 	}
536 
537 	RT6_TRACE("%s() => %p\n",
538 		  __func__, match);
539 
540 	net = dev_net(rt0->rt6i_dev);
541 	return match ? match : net->ipv6.ip6_null_entry;
542 }
543 
544 #ifdef CONFIG_IPV6_ROUTE_INFO
545 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
546 		  const struct in6_addr *gwaddr)
547 {
548 	struct net *net = dev_net(dev);
549 	struct route_info *rinfo = (struct route_info *) opt;
550 	struct in6_addr prefix_buf, *prefix;
551 	unsigned int pref;
552 	unsigned long lifetime;
553 	struct rt6_info *rt;
554 
555 	if (len < sizeof(struct route_info)) {
556 		return -EINVAL;
557 	}
558 
559 	/* Sanity check for prefix_len and length */
560 	if (rinfo->length > 3) {
561 		return -EINVAL;
562 	} else if (rinfo->prefix_len > 128) {
563 		return -EINVAL;
564 	} else if (rinfo->prefix_len > 64) {
565 		if (rinfo->length < 2) {
566 			return -EINVAL;
567 		}
568 	} else if (rinfo->prefix_len > 0) {
569 		if (rinfo->length < 1) {
570 			return -EINVAL;
571 		}
572 	}
573 
574 	pref = rinfo->route_pref;
575 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
576 		return -EINVAL;
577 
578 	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
579 
580 	if (rinfo->length == 3)
581 		prefix = (struct in6_addr *)rinfo->prefix;
582 	else {
583 		/* this function is safe */
584 		ipv6_addr_prefix(&prefix_buf,
585 				 (struct in6_addr *)rinfo->prefix,
586 				 rinfo->prefix_len);
587 		prefix = &prefix_buf;
588 	}
589 
590 	rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
591 				dev->ifindex);
592 
593 	if (rt && !lifetime) {
594 		ip6_del_rt(rt);
595 		rt = NULL;
596 	}
597 
598 	if (!rt && lifetime)
599 		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
600 					pref);
601 	else if (rt)
602 		rt->rt6i_flags = RTF_ROUTEINFO |
603 				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
604 
605 	if (rt) {
606 		if (!addrconf_finite_timeout(lifetime)) {
607 			rt->rt6i_flags &= ~RTF_EXPIRES;
608 		} else {
609 			rt->rt6i_expires = jiffies + HZ * lifetime;
610 			rt->rt6i_flags |= RTF_EXPIRES;
611 		}
612 		dst_release(&rt->dst);
613 	}
614 	return 0;
615 }
616 #endif
617 
618 #define BACKTRACK(__net, saddr)			\
619 do { \
620 	if (rt == __net->ipv6.ip6_null_entry) {	\
621 		struct fib6_node *pn; \
622 		while (1) { \
623 			if (fn->fn_flags & RTN_TL_ROOT) \
624 				goto out; \
625 			pn = fn->parent; \
626 			if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
627 				fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
628 			else \
629 				fn = pn; \
630 			if (fn->fn_flags & RTN_RTINFO) \
631 				goto restart; \
632 		} \
633 	} \
634 } while(0)
635 
636 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
637 					     struct fib6_table *table,
638 					     struct flowi6 *fl6, int flags)
639 {
640 	struct fib6_node *fn;
641 	struct rt6_info *rt;
642 
643 	read_lock_bh(&table->tb6_lock);
644 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
645 restart:
646 	rt = fn->leaf;
647 	rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
648 	BACKTRACK(net, &fl6->saddr);
649 out:
650 	dst_use(&rt->dst, jiffies);
651 	read_unlock_bh(&table->tb6_lock);
652 	return rt;
653 
654 }
655 
656 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
657 			    const struct in6_addr *saddr, int oif, int strict)
658 {
659 	struct flowi6 fl6 = {
660 		.flowi6_oif = oif,
661 		.daddr = *daddr,
662 	};
663 	struct dst_entry *dst;
664 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
665 
666 	if (saddr) {
667 		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
668 		flags |= RT6_LOOKUP_F_HAS_SADDR;
669 	}
670 
671 	dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
672 	if (dst->error == 0)
673 		return (struct rt6_info *) dst;
674 
675 	dst_release(dst);
676 
677 	return NULL;
678 }
679 
680 EXPORT_SYMBOL(rt6_lookup);
681 
682 /* ip6_ins_rt is called with FREE table->tb6_lock.
683    It takes new route entry, the addition fails by any reason the
684    route is freed. In any case, if caller does not hold it, it may
685    be destroyed.
686  */
687 
688 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
689 {
690 	int err;
691 	struct fib6_table *table;
692 
693 	table = rt->rt6i_table;
694 	write_lock_bh(&table->tb6_lock);
695 	err = fib6_add(&table->tb6_root, rt, info);
696 	write_unlock_bh(&table->tb6_lock);
697 
698 	return err;
699 }
700 
701 int ip6_ins_rt(struct rt6_info *rt)
702 {
703 	struct nl_info info = {
704 		.nl_net = dev_net(rt->rt6i_dev),
705 	};
706 	return __ip6_ins_rt(rt, &info);
707 }
708 
709 static struct rt6_info *rt6_alloc_cow(const struct rt6_info *ort,
710 				      const struct in6_addr *daddr,
711 				      const struct in6_addr *saddr)
712 {
713 	struct rt6_info *rt;
714 
715 	/*
716 	 *	Clone the route.
717 	 */
718 
719 	rt = ip6_rt_copy(ort, daddr);
720 
721 	if (rt) {
722 		struct neighbour *neigh;
723 		int attempts = !in_softirq();
724 
725 		if (!(rt->rt6i_flags&RTF_GATEWAY)) {
726 			if (rt->rt6i_dst.plen != 128 &&
727 			    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
728 				rt->rt6i_flags |= RTF_ANYCAST;
729 			ipv6_addr_copy(&rt->rt6i_gateway, daddr);
730 		}
731 
732 		rt->rt6i_flags |= RTF_CACHE;
733 
734 #ifdef CONFIG_IPV6_SUBTREES
735 		if (rt->rt6i_src.plen && saddr) {
736 			ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
737 			rt->rt6i_src.plen = 128;
738 		}
739 #endif
740 
741 	retry:
742 		neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
743 		if (IS_ERR(neigh)) {
744 			struct net *net = dev_net(rt->rt6i_dev);
745 			int saved_rt_min_interval =
746 				net->ipv6.sysctl.ip6_rt_gc_min_interval;
747 			int saved_rt_elasticity =
748 				net->ipv6.sysctl.ip6_rt_gc_elasticity;
749 
750 			if (attempts-- > 0) {
751 				net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
752 				net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
753 
754 				ip6_dst_gc(&net->ipv6.ip6_dst_ops);
755 
756 				net->ipv6.sysctl.ip6_rt_gc_elasticity =
757 					saved_rt_elasticity;
758 				net->ipv6.sysctl.ip6_rt_gc_min_interval =
759 					saved_rt_min_interval;
760 				goto retry;
761 			}
762 
763 			if (net_ratelimit())
764 				printk(KERN_WARNING
765 				       "ipv6: Neighbour table overflow.\n");
766 			dst_free(&rt->dst);
767 			return NULL;
768 		}
769 		dst_set_neighbour(&rt->dst, neigh);
770 
771 	}
772 
773 	return rt;
774 }
775 
776 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
777 					const struct in6_addr *daddr)
778 {
779 	struct rt6_info *rt = ip6_rt_copy(ort, daddr);
780 
781 	if (rt) {
782 		rt->rt6i_flags |= RTF_CACHE;
783 		dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_raw(&ort->dst)));
784 	}
785 	return rt;
786 }
787 
788 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
789 				      struct flowi6 *fl6, int flags)
790 {
791 	struct fib6_node *fn;
792 	struct rt6_info *rt, *nrt;
793 	int strict = 0;
794 	int attempts = 3;
795 	int err;
796 	int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
797 
798 	strict |= flags & RT6_LOOKUP_F_IFACE;
799 
800 relookup:
801 	read_lock_bh(&table->tb6_lock);
802 
803 restart_2:
804 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
805 
806 restart:
807 	rt = rt6_select(fn, oif, strict | reachable);
808 
809 	BACKTRACK(net, &fl6->saddr);
810 	if (rt == net->ipv6.ip6_null_entry ||
811 	    rt->rt6i_flags & RTF_CACHE)
812 		goto out;
813 
814 	dst_hold(&rt->dst);
815 	read_unlock_bh(&table->tb6_lock);
816 
817 	if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
818 		nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
819 	else if (!(rt->dst.flags & DST_HOST))
820 		nrt = rt6_alloc_clone(rt, &fl6->daddr);
821 	else
822 		goto out2;
823 
824 	dst_release(&rt->dst);
825 	rt = nrt ? : net->ipv6.ip6_null_entry;
826 
827 	dst_hold(&rt->dst);
828 	if (nrt) {
829 		err = ip6_ins_rt(nrt);
830 		if (!err)
831 			goto out2;
832 	}
833 
834 	if (--attempts <= 0)
835 		goto out2;
836 
837 	/*
838 	 * Race condition! In the gap, when table->tb6_lock was
839 	 * released someone could insert this route.  Relookup.
840 	 */
841 	dst_release(&rt->dst);
842 	goto relookup;
843 
844 out:
845 	if (reachable) {
846 		reachable = 0;
847 		goto restart_2;
848 	}
849 	dst_hold(&rt->dst);
850 	read_unlock_bh(&table->tb6_lock);
851 out2:
852 	rt->dst.lastuse = jiffies;
853 	rt->dst.__use++;
854 
855 	return rt;
856 }
857 
858 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
859 					    struct flowi6 *fl6, int flags)
860 {
861 	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
862 }
863 
864 void ip6_route_input(struct sk_buff *skb)
865 {
866 	const struct ipv6hdr *iph = ipv6_hdr(skb);
867 	struct net *net = dev_net(skb->dev);
868 	int flags = RT6_LOOKUP_F_HAS_SADDR;
869 	struct flowi6 fl6 = {
870 		.flowi6_iif = skb->dev->ifindex,
871 		.daddr = iph->daddr,
872 		.saddr = iph->saddr,
873 		.flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
874 		.flowi6_mark = skb->mark,
875 		.flowi6_proto = iph->nexthdr,
876 	};
877 
878 	if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
879 		flags |= RT6_LOOKUP_F_IFACE;
880 
881 	skb_dst_set(skb, fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_input));
882 }
883 
884 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
885 					     struct flowi6 *fl6, int flags)
886 {
887 	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
888 }
889 
890 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
891 				    struct flowi6 *fl6)
892 {
893 	int flags = 0;
894 
895 	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
896 		flags |= RT6_LOOKUP_F_IFACE;
897 
898 	if (!ipv6_addr_any(&fl6->saddr))
899 		flags |= RT6_LOOKUP_F_HAS_SADDR;
900 	else if (sk)
901 		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
902 
903 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
904 }
905 
906 EXPORT_SYMBOL(ip6_route_output);
907 
908 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
909 {
910 	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
911 	struct dst_entry *new = NULL;
912 
913 	rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
914 	if (rt) {
915 		memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
916 
917 		new = &rt->dst;
918 
919 		new->__use = 1;
920 		new->input = dst_discard;
921 		new->output = dst_discard;
922 
923 		if (dst_metrics_read_only(&ort->dst))
924 			new->_metrics = ort->dst._metrics;
925 		else
926 			dst_copy_metrics(new, &ort->dst);
927 		rt->rt6i_idev = ort->rt6i_idev;
928 		if (rt->rt6i_idev)
929 			in6_dev_hold(rt->rt6i_idev);
930 		rt->rt6i_expires = 0;
931 
932 		ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
933 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
934 		rt->rt6i_metric = 0;
935 
936 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
937 #ifdef CONFIG_IPV6_SUBTREES
938 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
939 #endif
940 
941 		dst_free(new);
942 	}
943 
944 	dst_release(dst_orig);
945 	return new ? new : ERR_PTR(-ENOMEM);
946 }
947 
948 /*
949  *	Destination cache support functions
950  */
951 
952 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
953 {
954 	struct rt6_info *rt;
955 
956 	rt = (struct rt6_info *) dst;
957 
958 	if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
959 		if (rt->rt6i_peer_genid != rt6_peer_genid()) {
960 			if (!rt->rt6i_peer)
961 				rt6_bind_peer(rt, 0);
962 			rt->rt6i_peer_genid = rt6_peer_genid();
963 		}
964 		return dst;
965 	}
966 	return NULL;
967 }
968 
969 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
970 {
971 	struct rt6_info *rt = (struct rt6_info *) dst;
972 
973 	if (rt) {
974 		if (rt->rt6i_flags & RTF_CACHE) {
975 			if (rt6_check_expired(rt)) {
976 				ip6_del_rt(rt);
977 				dst = NULL;
978 			}
979 		} else {
980 			dst_release(dst);
981 			dst = NULL;
982 		}
983 	}
984 	return dst;
985 }
986 
987 static void ip6_link_failure(struct sk_buff *skb)
988 {
989 	struct rt6_info *rt;
990 
991 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
992 
993 	rt = (struct rt6_info *) skb_dst(skb);
994 	if (rt) {
995 		if (rt->rt6i_flags&RTF_CACHE) {
996 			dst_set_expires(&rt->dst, 0);
997 			rt->rt6i_flags |= RTF_EXPIRES;
998 		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
999 			rt->rt6i_node->fn_sernum = -1;
1000 	}
1001 }
1002 
1003 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1004 {
1005 	struct rt6_info *rt6 = (struct rt6_info*)dst;
1006 
1007 	if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1008 		rt6->rt6i_flags |= RTF_MODIFIED;
1009 		if (mtu < IPV6_MIN_MTU) {
1010 			u32 features = dst_metric(dst, RTAX_FEATURES);
1011 			mtu = IPV6_MIN_MTU;
1012 			features |= RTAX_FEATURE_ALLFRAG;
1013 			dst_metric_set(dst, RTAX_FEATURES, features);
1014 		}
1015 		dst_metric_set(dst, RTAX_MTU, mtu);
1016 	}
1017 }
1018 
1019 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1020 {
1021 	struct net_device *dev = dst->dev;
1022 	unsigned int mtu = dst_mtu(dst);
1023 	struct net *net = dev_net(dev);
1024 
1025 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1026 
1027 	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1028 		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1029 
1030 	/*
1031 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1032 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1033 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
1034 	 * rely only on pmtu discovery"
1035 	 */
1036 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1037 		mtu = IPV6_MAXPLEN;
1038 	return mtu;
1039 }
1040 
1041 static unsigned int ip6_default_mtu(const struct dst_entry *dst)
1042 {
1043 	unsigned int mtu = IPV6_MIN_MTU;
1044 	struct inet6_dev *idev;
1045 
1046 	rcu_read_lock();
1047 	idev = __in6_dev_get(dst->dev);
1048 	if (idev)
1049 		mtu = idev->cnf.mtu6;
1050 	rcu_read_unlock();
1051 
1052 	return mtu;
1053 }
1054 
1055 static struct dst_entry *icmp6_dst_gc_list;
1056 static DEFINE_SPINLOCK(icmp6_dst_lock);
1057 
1058 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1059 				  struct neighbour *neigh,
1060 				  const struct in6_addr *addr)
1061 {
1062 	struct rt6_info *rt;
1063 	struct inet6_dev *idev = in6_dev_get(dev);
1064 	struct net *net = dev_net(dev);
1065 
1066 	if (unlikely(idev == NULL))
1067 		return NULL;
1068 
1069 	rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
1070 	if (unlikely(rt == NULL)) {
1071 		in6_dev_put(idev);
1072 		goto out;
1073 	}
1074 
1075 	if (neigh)
1076 		neigh_hold(neigh);
1077 	else {
1078 		neigh = ndisc_get_neigh(dev, addr);
1079 		if (IS_ERR(neigh))
1080 			neigh = NULL;
1081 	}
1082 
1083 	rt->dst.flags |= DST_HOST;
1084 	rt->dst.output  = ip6_output;
1085 	dst_set_neighbour(&rt->dst, neigh);
1086 	atomic_set(&rt->dst.__refcnt, 1);
1087 	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1088 
1089 	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1090 	rt->rt6i_dst.plen = 128;
1091 	rt->rt6i_idev     = idev;
1092 
1093 	spin_lock_bh(&icmp6_dst_lock);
1094 	rt->dst.next = icmp6_dst_gc_list;
1095 	icmp6_dst_gc_list = &rt->dst;
1096 	spin_unlock_bh(&icmp6_dst_lock);
1097 
1098 	fib6_force_start_gc(net);
1099 
1100 out:
1101 	return &rt->dst;
1102 }
1103 
1104 int icmp6_dst_gc(void)
1105 {
1106 	struct dst_entry *dst, **pprev;
1107 	int more = 0;
1108 
1109 	spin_lock_bh(&icmp6_dst_lock);
1110 	pprev = &icmp6_dst_gc_list;
1111 
1112 	while ((dst = *pprev) != NULL) {
1113 		if (!atomic_read(&dst->__refcnt)) {
1114 			*pprev = dst->next;
1115 			dst_free(dst);
1116 		} else {
1117 			pprev = &dst->next;
1118 			++more;
1119 		}
1120 	}
1121 
1122 	spin_unlock_bh(&icmp6_dst_lock);
1123 
1124 	return more;
1125 }
1126 
1127 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1128 			    void *arg)
1129 {
1130 	struct dst_entry *dst, **pprev;
1131 
1132 	spin_lock_bh(&icmp6_dst_lock);
1133 	pprev = &icmp6_dst_gc_list;
1134 	while ((dst = *pprev) != NULL) {
1135 		struct rt6_info *rt = (struct rt6_info *) dst;
1136 		if (func(rt, arg)) {
1137 			*pprev = dst->next;
1138 			dst_free(dst);
1139 		} else {
1140 			pprev = &dst->next;
1141 		}
1142 	}
1143 	spin_unlock_bh(&icmp6_dst_lock);
1144 }
1145 
1146 static int ip6_dst_gc(struct dst_ops *ops)
1147 {
1148 	unsigned long now = jiffies;
1149 	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1150 	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1151 	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1152 	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1153 	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1154 	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1155 	int entries;
1156 
1157 	entries = dst_entries_get_fast(ops);
1158 	if (time_after(rt_last_gc + rt_min_interval, now) &&
1159 	    entries <= rt_max_size)
1160 		goto out;
1161 
1162 	net->ipv6.ip6_rt_gc_expire++;
1163 	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1164 	net->ipv6.ip6_rt_last_gc = now;
1165 	entries = dst_entries_get_slow(ops);
1166 	if (entries < ops->gc_thresh)
1167 		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1168 out:
1169 	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1170 	return entries > rt_max_size;
1171 }
1172 
1173 /* Clean host part of a prefix. Not necessary in radix tree,
1174    but results in cleaner routing tables.
1175 
1176    Remove it only when all the things will work!
1177  */
1178 
1179 int ip6_dst_hoplimit(struct dst_entry *dst)
1180 {
1181 	int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1182 	if (hoplimit == 0) {
1183 		struct net_device *dev = dst->dev;
1184 		struct inet6_dev *idev;
1185 
1186 		rcu_read_lock();
1187 		idev = __in6_dev_get(dev);
1188 		if (idev)
1189 			hoplimit = idev->cnf.hop_limit;
1190 		else
1191 			hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1192 		rcu_read_unlock();
1193 	}
1194 	return hoplimit;
1195 }
1196 EXPORT_SYMBOL(ip6_dst_hoplimit);
1197 
1198 /*
1199  *
1200  */
1201 
1202 int ip6_route_add(struct fib6_config *cfg)
1203 {
1204 	int err;
1205 	struct net *net = cfg->fc_nlinfo.nl_net;
1206 	struct rt6_info *rt = NULL;
1207 	struct net_device *dev = NULL;
1208 	struct inet6_dev *idev = NULL;
1209 	struct fib6_table *table;
1210 	int addr_type;
1211 
1212 	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1213 		return -EINVAL;
1214 #ifndef CONFIG_IPV6_SUBTREES
1215 	if (cfg->fc_src_len)
1216 		return -EINVAL;
1217 #endif
1218 	if (cfg->fc_ifindex) {
1219 		err = -ENODEV;
1220 		dev = dev_get_by_index(net, cfg->fc_ifindex);
1221 		if (!dev)
1222 			goto out;
1223 		idev = in6_dev_get(dev);
1224 		if (!idev)
1225 			goto out;
1226 	}
1227 
1228 	if (cfg->fc_metric == 0)
1229 		cfg->fc_metric = IP6_RT_PRIO_USER;
1230 
1231 	table = fib6_new_table(net, cfg->fc_table);
1232 	if (table == NULL) {
1233 		err = -ENOBUFS;
1234 		goto out;
1235 	}
1236 
1237 	rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT);
1238 
1239 	if (rt == NULL) {
1240 		err = -ENOMEM;
1241 		goto out;
1242 	}
1243 
1244 	rt->dst.obsolete = -1;
1245 	rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1246 				jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1247 				0;
1248 
1249 	if (cfg->fc_protocol == RTPROT_UNSPEC)
1250 		cfg->fc_protocol = RTPROT_BOOT;
1251 	rt->rt6i_protocol = cfg->fc_protocol;
1252 
1253 	addr_type = ipv6_addr_type(&cfg->fc_dst);
1254 
1255 	if (addr_type & IPV6_ADDR_MULTICAST)
1256 		rt->dst.input = ip6_mc_input;
1257 	else if (cfg->fc_flags & RTF_LOCAL)
1258 		rt->dst.input = ip6_input;
1259 	else
1260 		rt->dst.input = ip6_forward;
1261 
1262 	rt->dst.output = ip6_output;
1263 
1264 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1265 	rt->rt6i_dst.plen = cfg->fc_dst_len;
1266 	if (rt->rt6i_dst.plen == 128)
1267 	       rt->dst.flags |= DST_HOST;
1268 
1269 	if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1270 		u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1271 		if (!metrics) {
1272 			err = -ENOMEM;
1273 			goto out;
1274 		}
1275 		dst_init_metrics(&rt->dst, metrics, 0);
1276 	}
1277 #ifdef CONFIG_IPV6_SUBTREES
1278 	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1279 	rt->rt6i_src.plen = cfg->fc_src_len;
1280 #endif
1281 
1282 	rt->rt6i_metric = cfg->fc_metric;
1283 
1284 	/* We cannot add true routes via loopback here,
1285 	   they would result in kernel looping; promote them to reject routes
1286 	 */
1287 	if ((cfg->fc_flags & RTF_REJECT) ||
1288 	    (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK)
1289 					      && !(cfg->fc_flags&RTF_LOCAL))) {
1290 		/* hold loopback dev/idev if we haven't done so. */
1291 		if (dev != net->loopback_dev) {
1292 			if (dev) {
1293 				dev_put(dev);
1294 				in6_dev_put(idev);
1295 			}
1296 			dev = net->loopback_dev;
1297 			dev_hold(dev);
1298 			idev = in6_dev_get(dev);
1299 			if (!idev) {
1300 				err = -ENODEV;
1301 				goto out;
1302 			}
1303 		}
1304 		rt->dst.output = ip6_pkt_discard_out;
1305 		rt->dst.input = ip6_pkt_discard;
1306 		rt->dst.error = -ENETUNREACH;
1307 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1308 		goto install_route;
1309 	}
1310 
1311 	if (cfg->fc_flags & RTF_GATEWAY) {
1312 		const struct in6_addr *gw_addr;
1313 		int gwa_type;
1314 
1315 		gw_addr = &cfg->fc_gateway;
1316 		ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1317 		gwa_type = ipv6_addr_type(gw_addr);
1318 
1319 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1320 			struct rt6_info *grt;
1321 
1322 			/* IPv6 strictly inhibits using not link-local
1323 			   addresses as nexthop address.
1324 			   Otherwise, router will not able to send redirects.
1325 			   It is very good, but in some (rare!) circumstances
1326 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
1327 			   some exceptions. --ANK
1328 			 */
1329 			err = -EINVAL;
1330 			if (!(gwa_type&IPV6_ADDR_UNICAST))
1331 				goto out;
1332 
1333 			grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1334 
1335 			err = -EHOSTUNREACH;
1336 			if (grt == NULL)
1337 				goto out;
1338 			if (dev) {
1339 				if (dev != grt->rt6i_dev) {
1340 					dst_release(&grt->dst);
1341 					goto out;
1342 				}
1343 			} else {
1344 				dev = grt->rt6i_dev;
1345 				idev = grt->rt6i_idev;
1346 				dev_hold(dev);
1347 				in6_dev_hold(grt->rt6i_idev);
1348 			}
1349 			if (!(grt->rt6i_flags&RTF_GATEWAY))
1350 				err = 0;
1351 			dst_release(&grt->dst);
1352 
1353 			if (err)
1354 				goto out;
1355 		}
1356 		err = -EINVAL;
1357 		if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1358 			goto out;
1359 	}
1360 
1361 	err = -ENODEV;
1362 	if (dev == NULL)
1363 		goto out;
1364 
1365 	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1366 		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1367 			err = -EINVAL;
1368 			goto out;
1369 		}
1370 		ipv6_addr_copy(&rt->rt6i_prefsrc.addr, &cfg->fc_prefsrc);
1371 		rt->rt6i_prefsrc.plen = 128;
1372 	} else
1373 		rt->rt6i_prefsrc.plen = 0;
1374 
1375 	if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1376 		struct neighbour *n = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1377 		if (IS_ERR(n)) {
1378 			err = PTR_ERR(n);
1379 			goto out;
1380 		}
1381 		dst_set_neighbour(&rt->dst, n);
1382 	}
1383 
1384 	rt->rt6i_flags = cfg->fc_flags;
1385 
1386 install_route:
1387 	if (cfg->fc_mx) {
1388 		struct nlattr *nla;
1389 		int remaining;
1390 
1391 		nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1392 			int type = nla_type(nla);
1393 
1394 			if (type) {
1395 				if (type > RTAX_MAX) {
1396 					err = -EINVAL;
1397 					goto out;
1398 				}
1399 
1400 				dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1401 			}
1402 		}
1403 	}
1404 
1405 	rt->dst.dev = dev;
1406 	rt->rt6i_idev = idev;
1407 	rt->rt6i_table = table;
1408 
1409 	cfg->fc_nlinfo.nl_net = dev_net(dev);
1410 
1411 	return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1412 
1413 out:
1414 	if (dev)
1415 		dev_put(dev);
1416 	if (idev)
1417 		in6_dev_put(idev);
1418 	if (rt)
1419 		dst_free(&rt->dst);
1420 	return err;
1421 }
1422 
1423 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1424 {
1425 	int err;
1426 	struct fib6_table *table;
1427 	struct net *net = dev_net(rt->rt6i_dev);
1428 
1429 	if (rt == net->ipv6.ip6_null_entry)
1430 		return -ENOENT;
1431 
1432 	table = rt->rt6i_table;
1433 	write_lock_bh(&table->tb6_lock);
1434 
1435 	err = fib6_del(rt, info);
1436 	dst_release(&rt->dst);
1437 
1438 	write_unlock_bh(&table->tb6_lock);
1439 
1440 	return err;
1441 }
1442 
1443 int ip6_del_rt(struct rt6_info *rt)
1444 {
1445 	struct nl_info info = {
1446 		.nl_net = dev_net(rt->rt6i_dev),
1447 	};
1448 	return __ip6_del_rt(rt, &info);
1449 }
1450 
1451 static int ip6_route_del(struct fib6_config *cfg)
1452 {
1453 	struct fib6_table *table;
1454 	struct fib6_node *fn;
1455 	struct rt6_info *rt;
1456 	int err = -ESRCH;
1457 
1458 	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1459 	if (table == NULL)
1460 		return err;
1461 
1462 	read_lock_bh(&table->tb6_lock);
1463 
1464 	fn = fib6_locate(&table->tb6_root,
1465 			 &cfg->fc_dst, cfg->fc_dst_len,
1466 			 &cfg->fc_src, cfg->fc_src_len);
1467 
1468 	if (fn) {
1469 		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1470 			if (cfg->fc_ifindex &&
1471 			    (rt->rt6i_dev == NULL ||
1472 			     rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1473 				continue;
1474 			if (cfg->fc_flags & RTF_GATEWAY &&
1475 			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1476 				continue;
1477 			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1478 				continue;
1479 			dst_hold(&rt->dst);
1480 			read_unlock_bh(&table->tb6_lock);
1481 
1482 			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1483 		}
1484 	}
1485 	read_unlock_bh(&table->tb6_lock);
1486 
1487 	return err;
1488 }
1489 
1490 /*
1491  *	Handle redirects
1492  */
1493 struct ip6rd_flowi {
1494 	struct flowi6 fl6;
1495 	struct in6_addr gateway;
1496 };
1497 
1498 static struct rt6_info *__ip6_route_redirect(struct net *net,
1499 					     struct fib6_table *table,
1500 					     struct flowi6 *fl6,
1501 					     int flags)
1502 {
1503 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1504 	struct rt6_info *rt;
1505 	struct fib6_node *fn;
1506 
1507 	/*
1508 	 * Get the "current" route for this destination and
1509 	 * check if the redirect has come from approriate router.
1510 	 *
1511 	 * RFC 2461 specifies that redirects should only be
1512 	 * accepted if they come from the nexthop to the target.
1513 	 * Due to the way the routes are chosen, this notion
1514 	 * is a bit fuzzy and one might need to check all possible
1515 	 * routes.
1516 	 */
1517 
1518 	read_lock_bh(&table->tb6_lock);
1519 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1520 restart:
1521 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1522 		/*
1523 		 * Current route is on-link; redirect is always invalid.
1524 		 *
1525 		 * Seems, previous statement is not true. It could
1526 		 * be node, which looks for us as on-link (f.e. proxy ndisc)
1527 		 * But then router serving it might decide, that we should
1528 		 * know truth 8)8) --ANK (980726).
1529 		 */
1530 		if (rt6_check_expired(rt))
1531 			continue;
1532 		if (!(rt->rt6i_flags & RTF_GATEWAY))
1533 			continue;
1534 		if (fl6->flowi6_oif != rt->rt6i_dev->ifindex)
1535 			continue;
1536 		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1537 			continue;
1538 		break;
1539 	}
1540 
1541 	if (!rt)
1542 		rt = net->ipv6.ip6_null_entry;
1543 	BACKTRACK(net, &fl6->saddr);
1544 out:
1545 	dst_hold(&rt->dst);
1546 
1547 	read_unlock_bh(&table->tb6_lock);
1548 
1549 	return rt;
1550 };
1551 
1552 static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1553 					   const struct in6_addr *src,
1554 					   const struct in6_addr *gateway,
1555 					   struct net_device *dev)
1556 {
1557 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1558 	struct net *net = dev_net(dev);
1559 	struct ip6rd_flowi rdfl = {
1560 		.fl6 = {
1561 			.flowi6_oif = dev->ifindex,
1562 			.daddr = *dest,
1563 			.saddr = *src,
1564 		},
1565 	};
1566 
1567 	ipv6_addr_copy(&rdfl.gateway, gateway);
1568 
1569 	if (rt6_need_strict(dest))
1570 		flags |= RT6_LOOKUP_F_IFACE;
1571 
1572 	return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
1573 						   flags, __ip6_route_redirect);
1574 }
1575 
1576 void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1577 		  const struct in6_addr *saddr,
1578 		  struct neighbour *neigh, u8 *lladdr, int on_link)
1579 {
1580 	struct rt6_info *rt, *nrt = NULL;
1581 	struct netevent_redirect netevent;
1582 	struct net *net = dev_net(neigh->dev);
1583 
1584 	rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1585 
1586 	if (rt == net->ipv6.ip6_null_entry) {
1587 		if (net_ratelimit())
1588 			printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1589 			       "for redirect target\n");
1590 		goto out;
1591 	}
1592 
1593 	/*
1594 	 *	We have finally decided to accept it.
1595 	 */
1596 
1597 	neigh_update(neigh, lladdr, NUD_STALE,
1598 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
1599 		     NEIGH_UPDATE_F_OVERRIDE|
1600 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1601 				     NEIGH_UPDATE_F_ISROUTER))
1602 		     );
1603 
1604 	/*
1605 	 * Redirect received -> path was valid.
1606 	 * Look, redirects are sent only in response to data packets,
1607 	 * so that this nexthop apparently is reachable. --ANK
1608 	 */
1609 	dst_confirm(&rt->dst);
1610 
1611 	/* Duplicate redirect: silently ignore. */
1612 	if (neigh == dst_get_neighbour_raw(&rt->dst))
1613 		goto out;
1614 
1615 	nrt = ip6_rt_copy(rt, dest);
1616 	if (nrt == NULL)
1617 		goto out;
1618 
1619 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1620 	if (on_link)
1621 		nrt->rt6i_flags &= ~RTF_GATEWAY;
1622 
1623 	ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1624 	dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
1625 
1626 	if (ip6_ins_rt(nrt))
1627 		goto out;
1628 
1629 	netevent.old = &rt->dst;
1630 	netevent.new = &nrt->dst;
1631 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1632 
1633 	if (rt->rt6i_flags&RTF_CACHE) {
1634 		ip6_del_rt(rt);
1635 		return;
1636 	}
1637 
1638 out:
1639 	dst_release(&rt->dst);
1640 }
1641 
1642 /*
1643  *	Handle ICMP "packet too big" messages
1644  *	i.e. Path MTU discovery
1645  */
1646 
1647 static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
1648 			     struct net *net, u32 pmtu, int ifindex)
1649 {
1650 	struct rt6_info *rt, *nrt;
1651 	int allfrag = 0;
1652 again:
1653 	rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1654 	if (rt == NULL)
1655 		return;
1656 
1657 	if (rt6_check_expired(rt)) {
1658 		ip6_del_rt(rt);
1659 		goto again;
1660 	}
1661 
1662 	if (pmtu >= dst_mtu(&rt->dst))
1663 		goto out;
1664 
1665 	if (pmtu < IPV6_MIN_MTU) {
1666 		/*
1667 		 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1668 		 * MTU (1280) and a fragment header should always be included
1669 		 * after a node receiving Too Big message reporting PMTU is
1670 		 * less than the IPv6 Minimum Link MTU.
1671 		 */
1672 		pmtu = IPV6_MIN_MTU;
1673 		allfrag = 1;
1674 	}
1675 
1676 	/* New mtu received -> path was valid.
1677 	   They are sent only in response to data packets,
1678 	   so that this nexthop apparently is reachable. --ANK
1679 	 */
1680 	dst_confirm(&rt->dst);
1681 
1682 	/* Host route. If it is static, it would be better
1683 	   not to override it, but add new one, so that
1684 	   when cache entry will expire old pmtu
1685 	   would return automatically.
1686 	 */
1687 	if (rt->rt6i_flags & RTF_CACHE) {
1688 		dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1689 		if (allfrag) {
1690 			u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1691 			features |= RTAX_FEATURE_ALLFRAG;
1692 			dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1693 		}
1694 		dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1695 		rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1696 		goto out;
1697 	}
1698 
1699 	/* Network route.
1700 	   Two cases are possible:
1701 	   1. It is connected route. Action: COW
1702 	   2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1703 	 */
1704 	if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
1705 		nrt = rt6_alloc_cow(rt, daddr, saddr);
1706 	else
1707 		nrt = rt6_alloc_clone(rt, daddr);
1708 
1709 	if (nrt) {
1710 		dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1711 		if (allfrag) {
1712 			u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1713 			features |= RTAX_FEATURE_ALLFRAG;
1714 			dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1715 		}
1716 
1717 		/* According to RFC 1981, detecting PMTU increase shouldn't be
1718 		 * happened within 5 mins, the recommended timer is 10 mins.
1719 		 * Here this route expiration time is set to ip6_rt_mtu_expires
1720 		 * which is 10 mins. After 10 mins the decreased pmtu is expired
1721 		 * and detecting PMTU increase will be automatically happened.
1722 		 */
1723 		dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1724 		nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1725 
1726 		ip6_ins_rt(nrt);
1727 	}
1728 out:
1729 	dst_release(&rt->dst);
1730 }
1731 
1732 void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
1733 			struct net_device *dev, u32 pmtu)
1734 {
1735 	struct net *net = dev_net(dev);
1736 
1737 	/*
1738 	 * RFC 1981 states that a node "MUST reduce the size of the packets it
1739 	 * is sending along the path" that caused the Packet Too Big message.
1740 	 * Since it's not possible in the general case to determine which
1741 	 * interface was used to send the original packet, we update the MTU
1742 	 * on the interface that will be used to send future packets. We also
1743 	 * update the MTU on the interface that received the Packet Too Big in
1744 	 * case the original packet was forced out that interface with
1745 	 * SO_BINDTODEVICE or similar. This is the next best thing to the
1746 	 * correct behaviour, which would be to update the MTU on all
1747 	 * interfaces.
1748 	 */
1749 	rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1750 	rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1751 }
1752 
1753 /*
1754  *	Misc support functions
1755  */
1756 
1757 static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
1758 				    const struct in6_addr *dest)
1759 {
1760 	struct net *net = dev_net(ort->rt6i_dev);
1761 	struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
1762 					    ort->dst.dev, 0);
1763 
1764 	if (rt) {
1765 		rt->dst.input = ort->dst.input;
1766 		rt->dst.output = ort->dst.output;
1767 		rt->dst.flags |= DST_HOST;
1768 
1769 		ipv6_addr_copy(&rt->rt6i_dst.addr, dest);
1770 		rt->rt6i_dst.plen = 128;
1771 		dst_copy_metrics(&rt->dst, &ort->dst);
1772 		rt->dst.error = ort->dst.error;
1773 		rt->rt6i_idev = ort->rt6i_idev;
1774 		if (rt->rt6i_idev)
1775 			in6_dev_hold(rt->rt6i_idev);
1776 		rt->dst.lastuse = jiffies;
1777 		rt->rt6i_expires = 0;
1778 
1779 		ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1780 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1781 		rt->rt6i_metric = 0;
1782 
1783 #ifdef CONFIG_IPV6_SUBTREES
1784 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1785 #endif
1786 		memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1787 		rt->rt6i_table = ort->rt6i_table;
1788 	}
1789 	return rt;
1790 }
1791 
1792 #ifdef CONFIG_IPV6_ROUTE_INFO
1793 static struct rt6_info *rt6_get_route_info(struct net *net,
1794 					   const struct in6_addr *prefix, int prefixlen,
1795 					   const struct in6_addr *gwaddr, int ifindex)
1796 {
1797 	struct fib6_node *fn;
1798 	struct rt6_info *rt = NULL;
1799 	struct fib6_table *table;
1800 
1801 	table = fib6_get_table(net, RT6_TABLE_INFO);
1802 	if (table == NULL)
1803 		return NULL;
1804 
1805 	write_lock_bh(&table->tb6_lock);
1806 	fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1807 	if (!fn)
1808 		goto out;
1809 
1810 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1811 		if (rt->rt6i_dev->ifindex != ifindex)
1812 			continue;
1813 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1814 			continue;
1815 		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1816 			continue;
1817 		dst_hold(&rt->dst);
1818 		break;
1819 	}
1820 out:
1821 	write_unlock_bh(&table->tb6_lock);
1822 	return rt;
1823 }
1824 
1825 static struct rt6_info *rt6_add_route_info(struct net *net,
1826 					   const struct in6_addr *prefix, int prefixlen,
1827 					   const struct in6_addr *gwaddr, int ifindex,
1828 					   unsigned pref)
1829 {
1830 	struct fib6_config cfg = {
1831 		.fc_table	= RT6_TABLE_INFO,
1832 		.fc_metric	= IP6_RT_PRIO_USER,
1833 		.fc_ifindex	= ifindex,
1834 		.fc_dst_len	= prefixlen,
1835 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1836 				  RTF_UP | RTF_PREF(pref),
1837 		.fc_nlinfo.pid = 0,
1838 		.fc_nlinfo.nlh = NULL,
1839 		.fc_nlinfo.nl_net = net,
1840 	};
1841 
1842 	ipv6_addr_copy(&cfg.fc_dst, prefix);
1843 	ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1844 
1845 	/* We should treat it as a default route if prefix length is 0. */
1846 	if (!prefixlen)
1847 		cfg.fc_flags |= RTF_DEFAULT;
1848 
1849 	ip6_route_add(&cfg);
1850 
1851 	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1852 }
1853 #endif
1854 
1855 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1856 {
1857 	struct rt6_info *rt;
1858 	struct fib6_table *table;
1859 
1860 	table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1861 	if (table == NULL)
1862 		return NULL;
1863 
1864 	write_lock_bh(&table->tb6_lock);
1865 	for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1866 		if (dev == rt->rt6i_dev &&
1867 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1868 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
1869 			break;
1870 	}
1871 	if (rt)
1872 		dst_hold(&rt->dst);
1873 	write_unlock_bh(&table->tb6_lock);
1874 	return rt;
1875 }
1876 
1877 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1878 				     struct net_device *dev,
1879 				     unsigned int pref)
1880 {
1881 	struct fib6_config cfg = {
1882 		.fc_table	= RT6_TABLE_DFLT,
1883 		.fc_metric	= IP6_RT_PRIO_USER,
1884 		.fc_ifindex	= dev->ifindex,
1885 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1886 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1887 		.fc_nlinfo.pid = 0,
1888 		.fc_nlinfo.nlh = NULL,
1889 		.fc_nlinfo.nl_net = dev_net(dev),
1890 	};
1891 
1892 	ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1893 
1894 	ip6_route_add(&cfg);
1895 
1896 	return rt6_get_dflt_router(gwaddr, dev);
1897 }
1898 
1899 void rt6_purge_dflt_routers(struct net *net)
1900 {
1901 	struct rt6_info *rt;
1902 	struct fib6_table *table;
1903 
1904 	/* NOTE: Keep consistent with rt6_get_dflt_router */
1905 	table = fib6_get_table(net, RT6_TABLE_DFLT);
1906 	if (table == NULL)
1907 		return;
1908 
1909 restart:
1910 	read_lock_bh(&table->tb6_lock);
1911 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1912 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1913 			dst_hold(&rt->dst);
1914 			read_unlock_bh(&table->tb6_lock);
1915 			ip6_del_rt(rt);
1916 			goto restart;
1917 		}
1918 	}
1919 	read_unlock_bh(&table->tb6_lock);
1920 }
1921 
1922 static void rtmsg_to_fib6_config(struct net *net,
1923 				 struct in6_rtmsg *rtmsg,
1924 				 struct fib6_config *cfg)
1925 {
1926 	memset(cfg, 0, sizeof(*cfg));
1927 
1928 	cfg->fc_table = RT6_TABLE_MAIN;
1929 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1930 	cfg->fc_metric = rtmsg->rtmsg_metric;
1931 	cfg->fc_expires = rtmsg->rtmsg_info;
1932 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1933 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
1934 	cfg->fc_flags = rtmsg->rtmsg_flags;
1935 
1936 	cfg->fc_nlinfo.nl_net = net;
1937 
1938 	ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1939 	ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1940 	ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1941 }
1942 
1943 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1944 {
1945 	struct fib6_config cfg;
1946 	struct in6_rtmsg rtmsg;
1947 	int err;
1948 
1949 	switch(cmd) {
1950 	case SIOCADDRT:		/* Add a route */
1951 	case SIOCDELRT:		/* Delete a route */
1952 		if (!capable(CAP_NET_ADMIN))
1953 			return -EPERM;
1954 		err = copy_from_user(&rtmsg, arg,
1955 				     sizeof(struct in6_rtmsg));
1956 		if (err)
1957 			return -EFAULT;
1958 
1959 		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1960 
1961 		rtnl_lock();
1962 		switch (cmd) {
1963 		case SIOCADDRT:
1964 			err = ip6_route_add(&cfg);
1965 			break;
1966 		case SIOCDELRT:
1967 			err = ip6_route_del(&cfg);
1968 			break;
1969 		default:
1970 			err = -EINVAL;
1971 		}
1972 		rtnl_unlock();
1973 
1974 		return err;
1975 	}
1976 
1977 	return -EINVAL;
1978 }
1979 
1980 /*
1981  *	Drop the packet on the floor
1982  */
1983 
1984 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1985 {
1986 	int type;
1987 	struct dst_entry *dst = skb_dst(skb);
1988 	switch (ipstats_mib_noroutes) {
1989 	case IPSTATS_MIB_INNOROUTES:
1990 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
1991 		if (type == IPV6_ADDR_ANY) {
1992 			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1993 				      IPSTATS_MIB_INADDRERRORS);
1994 			break;
1995 		}
1996 		/* FALLTHROUGH */
1997 	case IPSTATS_MIB_OUTNOROUTES:
1998 		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1999 			      ipstats_mib_noroutes);
2000 		break;
2001 	}
2002 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2003 	kfree_skb(skb);
2004 	return 0;
2005 }
2006 
2007 static int ip6_pkt_discard(struct sk_buff *skb)
2008 {
2009 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2010 }
2011 
2012 static int ip6_pkt_discard_out(struct sk_buff *skb)
2013 {
2014 	skb->dev = skb_dst(skb)->dev;
2015 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2016 }
2017 
2018 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2019 
2020 static int ip6_pkt_prohibit(struct sk_buff *skb)
2021 {
2022 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2023 }
2024 
2025 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2026 {
2027 	skb->dev = skb_dst(skb)->dev;
2028 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2029 }
2030 
2031 #endif
2032 
2033 /*
2034  *	Allocate a dst for local (unicast / anycast) address.
2035  */
2036 
2037 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2038 				    const struct in6_addr *addr,
2039 				    int anycast)
2040 {
2041 	struct net *net = dev_net(idev->dev);
2042 	struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
2043 					    net->loopback_dev, 0);
2044 	struct neighbour *neigh;
2045 
2046 	if (rt == NULL) {
2047 		if (net_ratelimit())
2048 			pr_warning("IPv6:  Maximum number of routes reached,"
2049 				   " consider increasing route/max_size.\n");
2050 		return ERR_PTR(-ENOMEM);
2051 	}
2052 
2053 	in6_dev_hold(idev);
2054 
2055 	rt->dst.flags |= DST_HOST;
2056 	rt->dst.input = ip6_input;
2057 	rt->dst.output = ip6_output;
2058 	rt->rt6i_idev = idev;
2059 	rt->dst.obsolete = -1;
2060 
2061 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2062 	if (anycast)
2063 		rt->rt6i_flags |= RTF_ANYCAST;
2064 	else
2065 		rt->rt6i_flags |= RTF_LOCAL;
2066 	neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
2067 	if (IS_ERR(neigh)) {
2068 		dst_free(&rt->dst);
2069 
2070 		return ERR_CAST(neigh);
2071 	}
2072 	dst_set_neighbour(&rt->dst, neigh);
2073 
2074 	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
2075 	rt->rt6i_dst.plen = 128;
2076 	rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2077 
2078 	atomic_set(&rt->dst.__refcnt, 1);
2079 
2080 	return rt;
2081 }
2082 
2083 int ip6_route_get_saddr(struct net *net,
2084 			struct rt6_info *rt,
2085 			const struct in6_addr *daddr,
2086 			unsigned int prefs,
2087 			struct in6_addr *saddr)
2088 {
2089 	struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2090 	int err = 0;
2091 	if (rt->rt6i_prefsrc.plen)
2092 		ipv6_addr_copy(saddr, &rt->rt6i_prefsrc.addr);
2093 	else
2094 		err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2095 					 daddr, prefs, saddr);
2096 	return err;
2097 }
2098 
2099 /* remove deleted ip from prefsrc entries */
2100 struct arg_dev_net_ip {
2101 	struct net_device *dev;
2102 	struct net *net;
2103 	struct in6_addr *addr;
2104 };
2105 
2106 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2107 {
2108 	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2109 	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2110 	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2111 
2112 	if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
2113 	    rt != net->ipv6.ip6_null_entry &&
2114 	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2115 		/* remove prefsrc entry */
2116 		rt->rt6i_prefsrc.plen = 0;
2117 	}
2118 	return 0;
2119 }
2120 
2121 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2122 {
2123 	struct net *net = dev_net(ifp->idev->dev);
2124 	struct arg_dev_net_ip adni = {
2125 		.dev = ifp->idev->dev,
2126 		.net = net,
2127 		.addr = &ifp->addr,
2128 	};
2129 	fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2130 }
2131 
2132 struct arg_dev_net {
2133 	struct net_device *dev;
2134 	struct net *net;
2135 };
2136 
2137 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2138 {
2139 	const struct arg_dev_net *adn = arg;
2140 	const struct net_device *dev = adn->dev;
2141 
2142 	if ((rt->rt6i_dev == dev || dev == NULL) &&
2143 	    rt != adn->net->ipv6.ip6_null_entry) {
2144 		RT6_TRACE("deleted by ifdown %p\n", rt);
2145 		return -1;
2146 	}
2147 	return 0;
2148 }
2149 
2150 void rt6_ifdown(struct net *net, struct net_device *dev)
2151 {
2152 	struct arg_dev_net adn = {
2153 		.dev = dev,
2154 		.net = net,
2155 	};
2156 
2157 	fib6_clean_all(net, fib6_ifdown, 0, &adn);
2158 	icmp6_clean_all(fib6_ifdown, &adn);
2159 }
2160 
2161 struct rt6_mtu_change_arg
2162 {
2163 	struct net_device *dev;
2164 	unsigned mtu;
2165 };
2166 
2167 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2168 {
2169 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2170 	struct inet6_dev *idev;
2171 
2172 	/* In IPv6 pmtu discovery is not optional,
2173 	   so that RTAX_MTU lock cannot disable it.
2174 	   We still use this lock to block changes
2175 	   caused by addrconf/ndisc.
2176 	*/
2177 
2178 	idev = __in6_dev_get(arg->dev);
2179 	if (idev == NULL)
2180 		return 0;
2181 
2182 	/* For administrative MTU increase, there is no way to discover
2183 	   IPv6 PMTU increase, so PMTU increase should be updated here.
2184 	   Since RFC 1981 doesn't include administrative MTU increase
2185 	   update PMTU increase is a MUST. (i.e. jumbo frame)
2186 	 */
2187 	/*
2188 	   If new MTU is less than route PMTU, this new MTU will be the
2189 	   lowest MTU in the path, update the route PMTU to reflect PMTU
2190 	   decreases; if new MTU is greater than route PMTU, and the
2191 	   old MTU is the lowest MTU in the path, update the route PMTU
2192 	   to reflect the increase. In this case if the other nodes' MTU
2193 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
2194 	   PMTU discouvery.
2195 	 */
2196 	if (rt->rt6i_dev == arg->dev &&
2197 	    !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2198 	    (dst_mtu(&rt->dst) >= arg->mtu ||
2199 	     (dst_mtu(&rt->dst) < arg->mtu &&
2200 	      dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2201 		dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2202 	}
2203 	return 0;
2204 }
2205 
2206 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2207 {
2208 	struct rt6_mtu_change_arg arg = {
2209 		.dev = dev,
2210 		.mtu = mtu,
2211 	};
2212 
2213 	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2214 }
2215 
2216 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2217 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2218 	[RTA_OIF]               = { .type = NLA_U32 },
2219 	[RTA_IIF]		= { .type = NLA_U32 },
2220 	[RTA_PRIORITY]          = { .type = NLA_U32 },
2221 	[RTA_METRICS]           = { .type = NLA_NESTED },
2222 };
2223 
2224 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2225 			      struct fib6_config *cfg)
2226 {
2227 	struct rtmsg *rtm;
2228 	struct nlattr *tb[RTA_MAX+1];
2229 	int err;
2230 
2231 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2232 	if (err < 0)
2233 		goto errout;
2234 
2235 	err = -EINVAL;
2236 	rtm = nlmsg_data(nlh);
2237 	memset(cfg, 0, sizeof(*cfg));
2238 
2239 	cfg->fc_table = rtm->rtm_table;
2240 	cfg->fc_dst_len = rtm->rtm_dst_len;
2241 	cfg->fc_src_len = rtm->rtm_src_len;
2242 	cfg->fc_flags = RTF_UP;
2243 	cfg->fc_protocol = rtm->rtm_protocol;
2244 
2245 	if (rtm->rtm_type == RTN_UNREACHABLE)
2246 		cfg->fc_flags |= RTF_REJECT;
2247 
2248 	if (rtm->rtm_type == RTN_LOCAL)
2249 		cfg->fc_flags |= RTF_LOCAL;
2250 
2251 	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2252 	cfg->fc_nlinfo.nlh = nlh;
2253 	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2254 
2255 	if (tb[RTA_GATEWAY]) {
2256 		nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2257 		cfg->fc_flags |= RTF_GATEWAY;
2258 	}
2259 
2260 	if (tb[RTA_DST]) {
2261 		int plen = (rtm->rtm_dst_len + 7) >> 3;
2262 
2263 		if (nla_len(tb[RTA_DST]) < plen)
2264 			goto errout;
2265 
2266 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2267 	}
2268 
2269 	if (tb[RTA_SRC]) {
2270 		int plen = (rtm->rtm_src_len + 7) >> 3;
2271 
2272 		if (nla_len(tb[RTA_SRC]) < plen)
2273 			goto errout;
2274 
2275 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2276 	}
2277 
2278 	if (tb[RTA_PREFSRC])
2279 		nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2280 
2281 	if (tb[RTA_OIF])
2282 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2283 
2284 	if (tb[RTA_PRIORITY])
2285 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2286 
2287 	if (tb[RTA_METRICS]) {
2288 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2289 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2290 	}
2291 
2292 	if (tb[RTA_TABLE])
2293 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2294 
2295 	err = 0;
2296 errout:
2297 	return err;
2298 }
2299 
2300 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2301 {
2302 	struct fib6_config cfg;
2303 	int err;
2304 
2305 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2306 	if (err < 0)
2307 		return err;
2308 
2309 	return ip6_route_del(&cfg);
2310 }
2311 
2312 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2313 {
2314 	struct fib6_config cfg;
2315 	int err;
2316 
2317 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2318 	if (err < 0)
2319 		return err;
2320 
2321 	return ip6_route_add(&cfg);
2322 }
2323 
2324 static inline size_t rt6_nlmsg_size(void)
2325 {
2326 	return NLMSG_ALIGN(sizeof(struct rtmsg))
2327 	       + nla_total_size(16) /* RTA_SRC */
2328 	       + nla_total_size(16) /* RTA_DST */
2329 	       + nla_total_size(16) /* RTA_GATEWAY */
2330 	       + nla_total_size(16) /* RTA_PREFSRC */
2331 	       + nla_total_size(4) /* RTA_TABLE */
2332 	       + nla_total_size(4) /* RTA_IIF */
2333 	       + nla_total_size(4) /* RTA_OIF */
2334 	       + nla_total_size(4) /* RTA_PRIORITY */
2335 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2336 	       + nla_total_size(sizeof(struct rta_cacheinfo));
2337 }
2338 
2339 static int rt6_fill_node(struct net *net,
2340 			 struct sk_buff *skb, struct rt6_info *rt,
2341 			 struct in6_addr *dst, struct in6_addr *src,
2342 			 int iif, int type, u32 pid, u32 seq,
2343 			 int prefix, int nowait, unsigned int flags)
2344 {
2345 	struct rtmsg *rtm;
2346 	struct nlmsghdr *nlh;
2347 	long expires;
2348 	u32 table;
2349 	struct neighbour *n;
2350 
2351 	if (prefix) {	/* user wants prefix routes only */
2352 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2353 			/* success since this is not a prefix route */
2354 			return 1;
2355 		}
2356 	}
2357 
2358 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2359 	if (nlh == NULL)
2360 		return -EMSGSIZE;
2361 
2362 	rtm = nlmsg_data(nlh);
2363 	rtm->rtm_family = AF_INET6;
2364 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
2365 	rtm->rtm_src_len = rt->rt6i_src.plen;
2366 	rtm->rtm_tos = 0;
2367 	if (rt->rt6i_table)
2368 		table = rt->rt6i_table->tb6_id;
2369 	else
2370 		table = RT6_TABLE_UNSPEC;
2371 	rtm->rtm_table = table;
2372 	NLA_PUT_U32(skb, RTA_TABLE, table);
2373 	if (rt->rt6i_flags&RTF_REJECT)
2374 		rtm->rtm_type = RTN_UNREACHABLE;
2375 	else if (rt->rt6i_flags&RTF_LOCAL)
2376 		rtm->rtm_type = RTN_LOCAL;
2377 	else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2378 		rtm->rtm_type = RTN_LOCAL;
2379 	else
2380 		rtm->rtm_type = RTN_UNICAST;
2381 	rtm->rtm_flags = 0;
2382 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2383 	rtm->rtm_protocol = rt->rt6i_protocol;
2384 	if (rt->rt6i_flags&RTF_DYNAMIC)
2385 		rtm->rtm_protocol = RTPROT_REDIRECT;
2386 	else if (rt->rt6i_flags & RTF_ADDRCONF)
2387 		rtm->rtm_protocol = RTPROT_KERNEL;
2388 	else if (rt->rt6i_flags&RTF_DEFAULT)
2389 		rtm->rtm_protocol = RTPROT_RA;
2390 
2391 	if (rt->rt6i_flags&RTF_CACHE)
2392 		rtm->rtm_flags |= RTM_F_CLONED;
2393 
2394 	if (dst) {
2395 		NLA_PUT(skb, RTA_DST, 16, dst);
2396 		rtm->rtm_dst_len = 128;
2397 	} else if (rtm->rtm_dst_len)
2398 		NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2399 #ifdef CONFIG_IPV6_SUBTREES
2400 	if (src) {
2401 		NLA_PUT(skb, RTA_SRC, 16, src);
2402 		rtm->rtm_src_len = 128;
2403 	} else if (rtm->rtm_src_len)
2404 		NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2405 #endif
2406 	if (iif) {
2407 #ifdef CONFIG_IPV6_MROUTE
2408 		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2409 			int err = ip6mr_get_route(net, skb, rtm, nowait);
2410 			if (err <= 0) {
2411 				if (!nowait) {
2412 					if (err == 0)
2413 						return 0;
2414 					goto nla_put_failure;
2415 				} else {
2416 					if (err == -EMSGSIZE)
2417 						goto nla_put_failure;
2418 				}
2419 			}
2420 		} else
2421 #endif
2422 			NLA_PUT_U32(skb, RTA_IIF, iif);
2423 	} else if (dst) {
2424 		struct in6_addr saddr_buf;
2425 		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0)
2426 			NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2427 	}
2428 
2429 	if (rt->rt6i_prefsrc.plen) {
2430 		struct in6_addr saddr_buf;
2431 		ipv6_addr_copy(&saddr_buf, &rt->rt6i_prefsrc.addr);
2432 		NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2433 	}
2434 
2435 	if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2436 		goto nla_put_failure;
2437 
2438 	rcu_read_lock();
2439 	n = dst_get_neighbour(&rt->dst);
2440 	if (n)
2441 		NLA_PUT(skb, RTA_GATEWAY, 16, &n->primary_key);
2442 	rcu_read_unlock();
2443 
2444 	if (rt->dst.dev)
2445 		NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2446 
2447 	NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2448 
2449 	if (!(rt->rt6i_flags & RTF_EXPIRES))
2450 		expires = 0;
2451 	else if (rt->rt6i_expires - jiffies < INT_MAX)
2452 		expires = rt->rt6i_expires - jiffies;
2453 	else
2454 		expires = INT_MAX;
2455 
2456 	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2457 			       expires, rt->dst.error) < 0)
2458 		goto nla_put_failure;
2459 
2460 	return nlmsg_end(skb, nlh);
2461 
2462 nla_put_failure:
2463 	nlmsg_cancel(skb, nlh);
2464 	return -EMSGSIZE;
2465 }
2466 
2467 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2468 {
2469 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2470 	int prefix;
2471 
2472 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2473 		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2474 		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2475 	} else
2476 		prefix = 0;
2477 
2478 	return rt6_fill_node(arg->net,
2479 		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2480 		     NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2481 		     prefix, 0, NLM_F_MULTI);
2482 }
2483 
2484 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2485 {
2486 	struct net *net = sock_net(in_skb->sk);
2487 	struct nlattr *tb[RTA_MAX+1];
2488 	struct rt6_info *rt;
2489 	struct sk_buff *skb;
2490 	struct rtmsg *rtm;
2491 	struct flowi6 fl6;
2492 	int err, iif = 0;
2493 
2494 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2495 	if (err < 0)
2496 		goto errout;
2497 
2498 	err = -EINVAL;
2499 	memset(&fl6, 0, sizeof(fl6));
2500 
2501 	if (tb[RTA_SRC]) {
2502 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2503 			goto errout;
2504 
2505 		ipv6_addr_copy(&fl6.saddr, nla_data(tb[RTA_SRC]));
2506 	}
2507 
2508 	if (tb[RTA_DST]) {
2509 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2510 			goto errout;
2511 
2512 		ipv6_addr_copy(&fl6.daddr, nla_data(tb[RTA_DST]));
2513 	}
2514 
2515 	if (tb[RTA_IIF])
2516 		iif = nla_get_u32(tb[RTA_IIF]);
2517 
2518 	if (tb[RTA_OIF])
2519 		fl6.flowi6_oif = nla_get_u32(tb[RTA_OIF]);
2520 
2521 	if (iif) {
2522 		struct net_device *dev;
2523 		dev = __dev_get_by_index(net, iif);
2524 		if (!dev) {
2525 			err = -ENODEV;
2526 			goto errout;
2527 		}
2528 	}
2529 
2530 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2531 	if (skb == NULL) {
2532 		err = -ENOBUFS;
2533 		goto errout;
2534 	}
2535 
2536 	/* Reserve room for dummy headers, this skb can pass
2537 	   through good chunk of routing engine.
2538 	 */
2539 	skb_reset_mac_header(skb);
2540 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2541 
2542 	rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl6);
2543 	skb_dst_set(skb, &rt->dst);
2544 
2545 	err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2546 			    RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2547 			    nlh->nlmsg_seq, 0, 0, 0);
2548 	if (err < 0) {
2549 		kfree_skb(skb);
2550 		goto errout;
2551 	}
2552 
2553 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2554 errout:
2555 	return err;
2556 }
2557 
2558 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2559 {
2560 	struct sk_buff *skb;
2561 	struct net *net = info->nl_net;
2562 	u32 seq;
2563 	int err;
2564 
2565 	err = -ENOBUFS;
2566 	seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
2567 
2568 	skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2569 	if (skb == NULL)
2570 		goto errout;
2571 
2572 	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2573 				event, info->pid, seq, 0, 0, 0);
2574 	if (err < 0) {
2575 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2576 		WARN_ON(err == -EMSGSIZE);
2577 		kfree_skb(skb);
2578 		goto errout;
2579 	}
2580 	rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2581 		    info->nlh, gfp_any());
2582 	return;
2583 errout:
2584 	if (err < 0)
2585 		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2586 }
2587 
2588 static int ip6_route_dev_notify(struct notifier_block *this,
2589 				unsigned long event, void *data)
2590 {
2591 	struct net_device *dev = (struct net_device *)data;
2592 	struct net *net = dev_net(dev);
2593 
2594 	if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2595 		net->ipv6.ip6_null_entry->dst.dev = dev;
2596 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2597 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2598 		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2599 		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2600 		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2601 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2602 #endif
2603 	}
2604 
2605 	return NOTIFY_OK;
2606 }
2607 
2608 /*
2609  *	/proc
2610  */
2611 
2612 #ifdef CONFIG_PROC_FS
2613 
2614 struct rt6_proc_arg
2615 {
2616 	char *buffer;
2617 	int offset;
2618 	int length;
2619 	int skip;
2620 	int len;
2621 };
2622 
2623 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2624 {
2625 	struct seq_file *m = p_arg;
2626 	struct neighbour *n;
2627 
2628 	seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2629 
2630 #ifdef CONFIG_IPV6_SUBTREES
2631 	seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2632 #else
2633 	seq_puts(m, "00000000000000000000000000000000 00 ");
2634 #endif
2635 	rcu_read_lock();
2636 	n = dst_get_neighbour(&rt->dst);
2637 	if (n) {
2638 		seq_printf(m, "%pi6", n->primary_key);
2639 	} else {
2640 		seq_puts(m, "00000000000000000000000000000000");
2641 	}
2642 	rcu_read_unlock();
2643 	seq_printf(m, " %08x %08x %08x %08x %8s\n",
2644 		   rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2645 		   rt->dst.__use, rt->rt6i_flags,
2646 		   rt->rt6i_dev ? rt->rt6i_dev->name : "");
2647 	return 0;
2648 }
2649 
2650 static int ipv6_route_show(struct seq_file *m, void *v)
2651 {
2652 	struct net *net = (struct net *)m->private;
2653 	fib6_clean_all(net, rt6_info_route, 0, m);
2654 	return 0;
2655 }
2656 
2657 static int ipv6_route_open(struct inode *inode, struct file *file)
2658 {
2659 	return single_open_net(inode, file, ipv6_route_show);
2660 }
2661 
2662 static const struct file_operations ipv6_route_proc_fops = {
2663 	.owner		= THIS_MODULE,
2664 	.open		= ipv6_route_open,
2665 	.read		= seq_read,
2666 	.llseek		= seq_lseek,
2667 	.release	= single_release_net,
2668 };
2669 
2670 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2671 {
2672 	struct net *net = (struct net *)seq->private;
2673 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2674 		   net->ipv6.rt6_stats->fib_nodes,
2675 		   net->ipv6.rt6_stats->fib_route_nodes,
2676 		   net->ipv6.rt6_stats->fib_rt_alloc,
2677 		   net->ipv6.rt6_stats->fib_rt_entries,
2678 		   net->ipv6.rt6_stats->fib_rt_cache,
2679 		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2680 		   net->ipv6.rt6_stats->fib_discarded_routes);
2681 
2682 	return 0;
2683 }
2684 
2685 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2686 {
2687 	return single_open_net(inode, file, rt6_stats_seq_show);
2688 }
2689 
2690 static const struct file_operations rt6_stats_seq_fops = {
2691 	.owner	 = THIS_MODULE,
2692 	.open	 = rt6_stats_seq_open,
2693 	.read	 = seq_read,
2694 	.llseek	 = seq_lseek,
2695 	.release = single_release_net,
2696 };
2697 #endif	/* CONFIG_PROC_FS */
2698 
2699 #ifdef CONFIG_SYSCTL
2700 
2701 static
2702 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2703 			      void __user *buffer, size_t *lenp, loff_t *ppos)
2704 {
2705 	struct net *net;
2706 	int delay;
2707 	if (!write)
2708 		return -EINVAL;
2709 
2710 	net = (struct net *)ctl->extra1;
2711 	delay = net->ipv6.sysctl.flush_delay;
2712 	proc_dointvec(ctl, write, buffer, lenp, ppos);
2713 	fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2714 	return 0;
2715 }
2716 
2717 ctl_table ipv6_route_table_template[] = {
2718 	{
2719 		.procname	=	"flush",
2720 		.data		=	&init_net.ipv6.sysctl.flush_delay,
2721 		.maxlen		=	sizeof(int),
2722 		.mode		=	0200,
2723 		.proc_handler	=	ipv6_sysctl_rtcache_flush
2724 	},
2725 	{
2726 		.procname	=	"gc_thresh",
2727 		.data		=	&ip6_dst_ops_template.gc_thresh,
2728 		.maxlen		=	sizeof(int),
2729 		.mode		=	0644,
2730 		.proc_handler	=	proc_dointvec,
2731 	},
2732 	{
2733 		.procname	=	"max_size",
2734 		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
2735 		.maxlen		=	sizeof(int),
2736 		.mode		=	0644,
2737 		.proc_handler	=	proc_dointvec,
2738 	},
2739 	{
2740 		.procname	=	"gc_min_interval",
2741 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2742 		.maxlen		=	sizeof(int),
2743 		.mode		=	0644,
2744 		.proc_handler	=	proc_dointvec_jiffies,
2745 	},
2746 	{
2747 		.procname	=	"gc_timeout",
2748 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2749 		.maxlen		=	sizeof(int),
2750 		.mode		=	0644,
2751 		.proc_handler	=	proc_dointvec_jiffies,
2752 	},
2753 	{
2754 		.procname	=	"gc_interval",
2755 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
2756 		.maxlen		=	sizeof(int),
2757 		.mode		=	0644,
2758 		.proc_handler	=	proc_dointvec_jiffies,
2759 	},
2760 	{
2761 		.procname	=	"gc_elasticity",
2762 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2763 		.maxlen		=	sizeof(int),
2764 		.mode		=	0644,
2765 		.proc_handler	=	proc_dointvec,
2766 	},
2767 	{
2768 		.procname	=	"mtu_expires",
2769 		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2770 		.maxlen		=	sizeof(int),
2771 		.mode		=	0644,
2772 		.proc_handler	=	proc_dointvec_jiffies,
2773 	},
2774 	{
2775 		.procname	=	"min_adv_mss",
2776 		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
2777 		.maxlen		=	sizeof(int),
2778 		.mode		=	0644,
2779 		.proc_handler	=	proc_dointvec,
2780 	},
2781 	{
2782 		.procname	=	"gc_min_interval_ms",
2783 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2784 		.maxlen		=	sizeof(int),
2785 		.mode		=	0644,
2786 		.proc_handler	=	proc_dointvec_ms_jiffies,
2787 	},
2788 	{ }
2789 };
2790 
2791 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2792 {
2793 	struct ctl_table *table;
2794 
2795 	table = kmemdup(ipv6_route_table_template,
2796 			sizeof(ipv6_route_table_template),
2797 			GFP_KERNEL);
2798 
2799 	if (table) {
2800 		table[0].data = &net->ipv6.sysctl.flush_delay;
2801 		table[0].extra1 = net;
2802 		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2803 		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2804 		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2805 		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2806 		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2807 		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2808 		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2809 		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2810 		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2811 	}
2812 
2813 	return table;
2814 }
2815 #endif
2816 
2817 static int __net_init ip6_route_net_init(struct net *net)
2818 {
2819 	int ret = -ENOMEM;
2820 
2821 	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2822 	       sizeof(net->ipv6.ip6_dst_ops));
2823 
2824 	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2825 		goto out_ip6_dst_ops;
2826 
2827 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2828 					   sizeof(*net->ipv6.ip6_null_entry),
2829 					   GFP_KERNEL);
2830 	if (!net->ipv6.ip6_null_entry)
2831 		goto out_ip6_dst_entries;
2832 	net->ipv6.ip6_null_entry->dst.path =
2833 		(struct dst_entry *)net->ipv6.ip6_null_entry;
2834 	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2835 	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2836 			 ip6_template_metrics, true);
2837 
2838 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2839 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2840 					       sizeof(*net->ipv6.ip6_prohibit_entry),
2841 					       GFP_KERNEL);
2842 	if (!net->ipv6.ip6_prohibit_entry)
2843 		goto out_ip6_null_entry;
2844 	net->ipv6.ip6_prohibit_entry->dst.path =
2845 		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2846 	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2847 	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2848 			 ip6_template_metrics, true);
2849 
2850 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2851 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
2852 					       GFP_KERNEL);
2853 	if (!net->ipv6.ip6_blk_hole_entry)
2854 		goto out_ip6_prohibit_entry;
2855 	net->ipv6.ip6_blk_hole_entry->dst.path =
2856 		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2857 	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2858 	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2859 			 ip6_template_metrics, true);
2860 #endif
2861 
2862 	net->ipv6.sysctl.flush_delay = 0;
2863 	net->ipv6.sysctl.ip6_rt_max_size = 4096;
2864 	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2865 	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2866 	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2867 	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2868 	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2869 	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2870 
2871 #ifdef CONFIG_PROC_FS
2872 	proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2873 	proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2874 #endif
2875 	net->ipv6.ip6_rt_gc_expire = 30*HZ;
2876 
2877 	ret = 0;
2878 out:
2879 	return ret;
2880 
2881 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2882 out_ip6_prohibit_entry:
2883 	kfree(net->ipv6.ip6_prohibit_entry);
2884 out_ip6_null_entry:
2885 	kfree(net->ipv6.ip6_null_entry);
2886 #endif
2887 out_ip6_dst_entries:
2888 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2889 out_ip6_dst_ops:
2890 	goto out;
2891 }
2892 
2893 static void __net_exit ip6_route_net_exit(struct net *net)
2894 {
2895 #ifdef CONFIG_PROC_FS
2896 	proc_net_remove(net, "ipv6_route");
2897 	proc_net_remove(net, "rt6_stats");
2898 #endif
2899 	kfree(net->ipv6.ip6_null_entry);
2900 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2901 	kfree(net->ipv6.ip6_prohibit_entry);
2902 	kfree(net->ipv6.ip6_blk_hole_entry);
2903 #endif
2904 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2905 }
2906 
2907 static struct pernet_operations ip6_route_net_ops = {
2908 	.init = ip6_route_net_init,
2909 	.exit = ip6_route_net_exit,
2910 };
2911 
2912 static struct notifier_block ip6_route_dev_notifier = {
2913 	.notifier_call = ip6_route_dev_notify,
2914 	.priority = 0,
2915 };
2916 
2917 int __init ip6_route_init(void)
2918 {
2919 	int ret;
2920 
2921 	ret = -ENOMEM;
2922 	ip6_dst_ops_template.kmem_cachep =
2923 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2924 				  SLAB_HWCACHE_ALIGN, NULL);
2925 	if (!ip6_dst_ops_template.kmem_cachep)
2926 		goto out;
2927 
2928 	ret = dst_entries_init(&ip6_dst_blackhole_ops);
2929 	if (ret)
2930 		goto out_kmem_cache;
2931 
2932 	ret = register_pernet_subsys(&ip6_route_net_ops);
2933 	if (ret)
2934 		goto out_dst_entries;
2935 
2936 	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2937 
2938 	/* Registering of the loopback is done before this portion of code,
2939 	 * the loopback reference in rt6_info will not be taken, do it
2940 	 * manually for init_net */
2941 	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
2942 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2943   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2944 	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
2945 	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2946 	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
2947 	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2948   #endif
2949 	ret = fib6_init();
2950 	if (ret)
2951 		goto out_register_subsys;
2952 
2953 	ret = xfrm6_init();
2954 	if (ret)
2955 		goto out_fib6_init;
2956 
2957 	ret = fib6_rules_init();
2958 	if (ret)
2959 		goto xfrm6_init;
2960 
2961 	ret = -ENOBUFS;
2962 	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
2963 	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
2964 	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
2965 		goto fib6_rules_init;
2966 
2967 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
2968 	if (ret)
2969 		goto fib6_rules_init;
2970 
2971 out:
2972 	return ret;
2973 
2974 fib6_rules_init:
2975 	fib6_rules_cleanup();
2976 xfrm6_init:
2977 	xfrm6_fini();
2978 out_fib6_init:
2979 	fib6_gc_cleanup();
2980 out_register_subsys:
2981 	unregister_pernet_subsys(&ip6_route_net_ops);
2982 out_dst_entries:
2983 	dst_entries_destroy(&ip6_dst_blackhole_ops);
2984 out_kmem_cache:
2985 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
2986 	goto out;
2987 }
2988 
2989 void ip6_route_cleanup(void)
2990 {
2991 	unregister_netdevice_notifier(&ip6_route_dev_notifier);
2992 	fib6_rules_cleanup();
2993 	xfrm6_fini();
2994 	fib6_gc_cleanup();
2995 	unregister_pernet_subsys(&ip6_route_net_ops);
2996 	dst_entries_destroy(&ip6_dst_blackhole_ops);
2997 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
2998 }
2999