xref: /openbmc/linux/net/ipv6/route.c (revision d78c317f)
1 /*
2  *	Linux INET6 implementation
3  *	FIB front-end.
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13 
14 /*	Changes:
15  *
16  *	YOSHIFUJI Hideaki @USAGI
17  *		reworked default router selection.
18  *		- respect outgoing interface
19  *		- select from (probably) reachable routers (i.e.
20  *		routers in REACHABLE, STALE, DELAY or PROBE states).
21  *		- always select the same router if it is (probably)
22  *		reachable.  otherwise, round-robin the list.
23  *	Ville Nuorvala
24  *		Fixed routing subtrees.
25  */
26 
27 #include <linux/capability.h>
28 #include <linux/errno.h>
29 #include <linux/export.h>
30 #include <linux/types.h>
31 #include <linux/times.h>
32 #include <linux/socket.h>
33 #include <linux/sockios.h>
34 #include <linux/net.h>
35 #include <linux/route.h>
36 #include <linux/netdevice.h>
37 #include <linux/in6.h>
38 #include <linux/mroute6.h>
39 #include <linux/init.h>
40 #include <linux/if_arp.h>
41 #include <linux/proc_fs.h>
42 #include <linux/seq_file.h>
43 #include <linux/nsproxy.h>
44 #include <linux/slab.h>
45 #include <net/net_namespace.h>
46 #include <net/snmp.h>
47 #include <net/ipv6.h>
48 #include <net/ip6_fib.h>
49 #include <net/ip6_route.h>
50 #include <net/ndisc.h>
51 #include <net/addrconf.h>
52 #include <net/tcp.h>
53 #include <linux/rtnetlink.h>
54 #include <net/dst.h>
55 #include <net/xfrm.h>
56 #include <net/netevent.h>
57 #include <net/netlink.h>
58 
59 #include <asm/uaccess.h>
60 
61 #ifdef CONFIG_SYSCTL
62 #include <linux/sysctl.h>
63 #endif
64 
65 static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
66 				    const struct in6_addr *dest);
67 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
68 static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
69 static unsigned int	 ip6_mtu(const struct dst_entry *dst);
70 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
71 static void		ip6_dst_destroy(struct dst_entry *);
72 static void		ip6_dst_ifdown(struct dst_entry *,
73 				       struct net_device *dev, int how);
74 static int		 ip6_dst_gc(struct dst_ops *ops);
75 
76 static int		ip6_pkt_discard(struct sk_buff *skb);
77 static int		ip6_pkt_discard_out(struct sk_buff *skb);
78 static void		ip6_link_failure(struct sk_buff *skb);
79 static void		ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
80 
81 #ifdef CONFIG_IPV6_ROUTE_INFO
82 static struct rt6_info *rt6_add_route_info(struct net *net,
83 					   const struct in6_addr *prefix, int prefixlen,
84 					   const struct in6_addr *gwaddr, int ifindex,
85 					   unsigned pref);
86 static struct rt6_info *rt6_get_route_info(struct net *net,
87 					   const struct in6_addr *prefix, int prefixlen,
88 					   const struct in6_addr *gwaddr, int ifindex);
89 #endif
90 
91 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
92 {
93 	struct rt6_info *rt = (struct rt6_info *) dst;
94 	struct inet_peer *peer;
95 	u32 *p = NULL;
96 
97 	if (!(rt->dst.flags & DST_HOST))
98 		return NULL;
99 
100 	if (!rt->rt6i_peer)
101 		rt6_bind_peer(rt, 1);
102 
103 	peer = rt->rt6i_peer;
104 	if (peer) {
105 		u32 *old_p = __DST_METRICS_PTR(old);
106 		unsigned long prev, new;
107 
108 		p = peer->metrics;
109 		if (inet_metrics_new(peer))
110 			memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
111 
112 		new = (unsigned long) p;
113 		prev = cmpxchg(&dst->_metrics, old, new);
114 
115 		if (prev != old) {
116 			p = __DST_METRICS_PTR(prev);
117 			if (prev & DST_METRICS_READ_ONLY)
118 				p = NULL;
119 		}
120 	}
121 	return p;
122 }
123 
124 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
125 {
126 	struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
127 	if (n)
128 		return n;
129 	return neigh_create(&nd_tbl, daddr, dst->dev);
130 }
131 
132 static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
133 {
134 	struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
135 	if (!n) {
136 		n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
137 		if (IS_ERR(n))
138 			return PTR_ERR(n);
139 	}
140 	dst_set_neighbour(&rt->dst, n);
141 
142 	return 0;
143 }
144 
145 static struct dst_ops ip6_dst_ops_template = {
146 	.family			=	AF_INET6,
147 	.protocol		=	cpu_to_be16(ETH_P_IPV6),
148 	.gc			=	ip6_dst_gc,
149 	.gc_thresh		=	1024,
150 	.check			=	ip6_dst_check,
151 	.default_advmss		=	ip6_default_advmss,
152 	.mtu			=	ip6_mtu,
153 	.cow_metrics		=	ipv6_cow_metrics,
154 	.destroy		=	ip6_dst_destroy,
155 	.ifdown			=	ip6_dst_ifdown,
156 	.negative_advice	=	ip6_negative_advice,
157 	.link_failure		=	ip6_link_failure,
158 	.update_pmtu		=	ip6_rt_update_pmtu,
159 	.local_out		=	__ip6_local_out,
160 	.neigh_lookup		=	ip6_neigh_lookup,
161 };
162 
163 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
164 {
165 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
166 
167 	return mtu ? : dst->dev->mtu;
168 }
169 
170 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
171 {
172 }
173 
174 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
175 					 unsigned long old)
176 {
177 	return NULL;
178 }
179 
180 static struct dst_ops ip6_dst_blackhole_ops = {
181 	.family			=	AF_INET6,
182 	.protocol		=	cpu_to_be16(ETH_P_IPV6),
183 	.destroy		=	ip6_dst_destroy,
184 	.check			=	ip6_dst_check,
185 	.mtu			=	ip6_blackhole_mtu,
186 	.default_advmss		=	ip6_default_advmss,
187 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
188 	.cow_metrics		=	ip6_rt_blackhole_cow_metrics,
189 	.neigh_lookup		=	ip6_neigh_lookup,
190 };
191 
192 static const u32 ip6_template_metrics[RTAX_MAX] = {
193 	[RTAX_HOPLIMIT - 1] = 255,
194 };
195 
196 static struct rt6_info ip6_null_entry_template = {
197 	.dst = {
198 		.__refcnt	= ATOMIC_INIT(1),
199 		.__use		= 1,
200 		.obsolete	= -1,
201 		.error		= -ENETUNREACH,
202 		.input		= ip6_pkt_discard,
203 		.output		= ip6_pkt_discard_out,
204 	},
205 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
206 	.rt6i_protocol  = RTPROT_KERNEL,
207 	.rt6i_metric	= ~(u32) 0,
208 	.rt6i_ref	= ATOMIC_INIT(1),
209 };
210 
211 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
212 
213 static int ip6_pkt_prohibit(struct sk_buff *skb);
214 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
215 
216 static struct rt6_info ip6_prohibit_entry_template = {
217 	.dst = {
218 		.__refcnt	= ATOMIC_INIT(1),
219 		.__use		= 1,
220 		.obsolete	= -1,
221 		.error		= -EACCES,
222 		.input		= ip6_pkt_prohibit,
223 		.output		= ip6_pkt_prohibit_out,
224 	},
225 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
226 	.rt6i_protocol  = RTPROT_KERNEL,
227 	.rt6i_metric	= ~(u32) 0,
228 	.rt6i_ref	= ATOMIC_INIT(1),
229 };
230 
231 static struct rt6_info ip6_blk_hole_entry_template = {
232 	.dst = {
233 		.__refcnt	= ATOMIC_INIT(1),
234 		.__use		= 1,
235 		.obsolete	= -1,
236 		.error		= -EINVAL,
237 		.input		= dst_discard,
238 		.output		= dst_discard,
239 	},
240 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
241 	.rt6i_protocol  = RTPROT_KERNEL,
242 	.rt6i_metric	= ~(u32) 0,
243 	.rt6i_ref	= ATOMIC_INIT(1),
244 };
245 
246 #endif
247 
248 /* allocate dst with ip6_dst_ops */
249 static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
250 					     struct net_device *dev,
251 					     int flags)
252 {
253 	struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
254 
255 	if (rt)
256 		memset(&rt->rt6i_table, 0,
257 		       sizeof(*rt) - sizeof(struct dst_entry));
258 
259 	return rt;
260 }
261 
262 static void ip6_dst_destroy(struct dst_entry *dst)
263 {
264 	struct rt6_info *rt = (struct rt6_info *)dst;
265 	struct inet6_dev *idev = rt->rt6i_idev;
266 	struct inet_peer *peer = rt->rt6i_peer;
267 
268 	if (!(rt->dst.flags & DST_HOST))
269 		dst_destroy_metrics_generic(dst);
270 
271 	if (idev) {
272 		rt->rt6i_idev = NULL;
273 		in6_dev_put(idev);
274 	}
275 	if (peer) {
276 		rt->rt6i_peer = NULL;
277 		inet_putpeer(peer);
278 	}
279 }
280 
281 static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
282 
283 static u32 rt6_peer_genid(void)
284 {
285 	return atomic_read(&__rt6_peer_genid);
286 }
287 
288 void rt6_bind_peer(struct rt6_info *rt, int create)
289 {
290 	struct inet_peer *peer;
291 
292 	peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
293 	if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
294 		inet_putpeer(peer);
295 	else
296 		rt->rt6i_peer_genid = rt6_peer_genid();
297 }
298 
299 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
300 			   int how)
301 {
302 	struct rt6_info *rt = (struct rt6_info *)dst;
303 	struct inet6_dev *idev = rt->rt6i_idev;
304 	struct net_device *loopback_dev =
305 		dev_net(dev)->loopback_dev;
306 
307 	if (dev != loopback_dev && idev && idev->dev == dev) {
308 		struct inet6_dev *loopback_idev =
309 			in6_dev_get(loopback_dev);
310 		if (loopback_idev) {
311 			rt->rt6i_idev = loopback_idev;
312 			in6_dev_put(idev);
313 		}
314 	}
315 }
316 
317 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
318 {
319 	return (rt->rt6i_flags & RTF_EXPIRES) &&
320 		time_after(jiffies, rt->dst.expires);
321 }
322 
323 static inline int rt6_need_strict(const struct in6_addr *daddr)
324 {
325 	return ipv6_addr_type(daddr) &
326 		(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
327 }
328 
329 /*
330  *	Route lookup. Any table->tb6_lock is implied.
331  */
332 
333 static inline struct rt6_info *rt6_device_match(struct net *net,
334 						    struct rt6_info *rt,
335 						    const struct in6_addr *saddr,
336 						    int oif,
337 						    int flags)
338 {
339 	struct rt6_info *local = NULL;
340 	struct rt6_info *sprt;
341 
342 	if (!oif && ipv6_addr_any(saddr))
343 		goto out;
344 
345 	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
346 		struct net_device *dev = sprt->dst.dev;
347 
348 		if (oif) {
349 			if (dev->ifindex == oif)
350 				return sprt;
351 			if (dev->flags & IFF_LOOPBACK) {
352 				if (!sprt->rt6i_idev ||
353 				    sprt->rt6i_idev->dev->ifindex != oif) {
354 					if (flags & RT6_LOOKUP_F_IFACE && oif)
355 						continue;
356 					if (local && (!oif ||
357 						      local->rt6i_idev->dev->ifindex == oif))
358 						continue;
359 				}
360 				local = sprt;
361 			}
362 		} else {
363 			if (ipv6_chk_addr(net, saddr, dev,
364 					  flags & RT6_LOOKUP_F_IFACE))
365 				return sprt;
366 		}
367 	}
368 
369 	if (oif) {
370 		if (local)
371 			return local;
372 
373 		if (flags & RT6_LOOKUP_F_IFACE)
374 			return net->ipv6.ip6_null_entry;
375 	}
376 out:
377 	return rt;
378 }
379 
380 #ifdef CONFIG_IPV6_ROUTER_PREF
381 static void rt6_probe(struct rt6_info *rt)
382 {
383 	struct neighbour *neigh;
384 	/*
385 	 * Okay, this does not seem to be appropriate
386 	 * for now, however, we need to check if it
387 	 * is really so; aka Router Reachability Probing.
388 	 *
389 	 * Router Reachability Probe MUST be rate-limited
390 	 * to no more than one per minute.
391 	 */
392 	rcu_read_lock();
393 	neigh = rt ? dst_get_neighbour_noref(&rt->dst) : NULL;
394 	if (!neigh || (neigh->nud_state & NUD_VALID))
395 		goto out;
396 	read_lock_bh(&neigh->lock);
397 	if (!(neigh->nud_state & NUD_VALID) &&
398 	    time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
399 		struct in6_addr mcaddr;
400 		struct in6_addr *target;
401 
402 		neigh->updated = jiffies;
403 		read_unlock_bh(&neigh->lock);
404 
405 		target = (struct in6_addr *)&neigh->primary_key;
406 		addrconf_addr_solict_mult(target, &mcaddr);
407 		ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
408 	} else {
409 		read_unlock_bh(&neigh->lock);
410 	}
411 out:
412 	rcu_read_unlock();
413 }
414 #else
415 static inline void rt6_probe(struct rt6_info *rt)
416 {
417 }
418 #endif
419 
420 /*
421  * Default Router Selection (RFC 2461 6.3.6)
422  */
423 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
424 {
425 	struct net_device *dev = rt->dst.dev;
426 	if (!oif || dev->ifindex == oif)
427 		return 2;
428 	if ((dev->flags & IFF_LOOPBACK) &&
429 	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
430 		return 1;
431 	return 0;
432 }
433 
434 static inline int rt6_check_neigh(struct rt6_info *rt)
435 {
436 	struct neighbour *neigh;
437 	int m;
438 
439 	rcu_read_lock();
440 	neigh = dst_get_neighbour_noref(&rt->dst);
441 	if (rt->rt6i_flags & RTF_NONEXTHOP ||
442 	    !(rt->rt6i_flags & RTF_GATEWAY))
443 		m = 1;
444 	else if (neigh) {
445 		read_lock_bh(&neigh->lock);
446 		if (neigh->nud_state & NUD_VALID)
447 			m = 2;
448 #ifdef CONFIG_IPV6_ROUTER_PREF
449 		else if (neigh->nud_state & NUD_FAILED)
450 			m = 0;
451 #endif
452 		else
453 			m = 1;
454 		read_unlock_bh(&neigh->lock);
455 	} else
456 		m = 0;
457 	rcu_read_unlock();
458 	return m;
459 }
460 
461 static int rt6_score_route(struct rt6_info *rt, int oif,
462 			   int strict)
463 {
464 	int m, n;
465 
466 	m = rt6_check_dev(rt, oif);
467 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
468 		return -1;
469 #ifdef CONFIG_IPV6_ROUTER_PREF
470 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
471 #endif
472 	n = rt6_check_neigh(rt);
473 	if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
474 		return -1;
475 	return m;
476 }
477 
478 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
479 				   int *mpri, struct rt6_info *match)
480 {
481 	int m;
482 
483 	if (rt6_check_expired(rt))
484 		goto out;
485 
486 	m = rt6_score_route(rt, oif, strict);
487 	if (m < 0)
488 		goto out;
489 
490 	if (m > *mpri) {
491 		if (strict & RT6_LOOKUP_F_REACHABLE)
492 			rt6_probe(match);
493 		*mpri = m;
494 		match = rt;
495 	} else if (strict & RT6_LOOKUP_F_REACHABLE) {
496 		rt6_probe(rt);
497 	}
498 
499 out:
500 	return match;
501 }
502 
503 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
504 				     struct rt6_info *rr_head,
505 				     u32 metric, int oif, int strict)
506 {
507 	struct rt6_info *rt, *match;
508 	int mpri = -1;
509 
510 	match = NULL;
511 	for (rt = rr_head; rt && rt->rt6i_metric == metric;
512 	     rt = rt->dst.rt6_next)
513 		match = find_match(rt, oif, strict, &mpri, match);
514 	for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
515 	     rt = rt->dst.rt6_next)
516 		match = find_match(rt, oif, strict, &mpri, match);
517 
518 	return match;
519 }
520 
521 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
522 {
523 	struct rt6_info *match, *rt0;
524 	struct net *net;
525 
526 	rt0 = fn->rr_ptr;
527 	if (!rt0)
528 		fn->rr_ptr = rt0 = fn->leaf;
529 
530 	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
531 
532 	if (!match &&
533 	    (strict & RT6_LOOKUP_F_REACHABLE)) {
534 		struct rt6_info *next = rt0->dst.rt6_next;
535 
536 		/* no entries matched; do round-robin */
537 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
538 			next = fn->leaf;
539 
540 		if (next != rt0)
541 			fn->rr_ptr = next;
542 	}
543 
544 	net = dev_net(rt0->dst.dev);
545 	return match ? match : net->ipv6.ip6_null_entry;
546 }
547 
548 #ifdef CONFIG_IPV6_ROUTE_INFO
549 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
550 		  const struct in6_addr *gwaddr)
551 {
552 	struct net *net = dev_net(dev);
553 	struct route_info *rinfo = (struct route_info *) opt;
554 	struct in6_addr prefix_buf, *prefix;
555 	unsigned int pref;
556 	unsigned long lifetime;
557 	struct rt6_info *rt;
558 
559 	if (len < sizeof(struct route_info)) {
560 		return -EINVAL;
561 	}
562 
563 	/* Sanity check for prefix_len and length */
564 	if (rinfo->length > 3) {
565 		return -EINVAL;
566 	} else if (rinfo->prefix_len > 128) {
567 		return -EINVAL;
568 	} else if (rinfo->prefix_len > 64) {
569 		if (rinfo->length < 2) {
570 			return -EINVAL;
571 		}
572 	} else if (rinfo->prefix_len > 0) {
573 		if (rinfo->length < 1) {
574 			return -EINVAL;
575 		}
576 	}
577 
578 	pref = rinfo->route_pref;
579 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
580 		return -EINVAL;
581 
582 	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
583 
584 	if (rinfo->length == 3)
585 		prefix = (struct in6_addr *)rinfo->prefix;
586 	else {
587 		/* this function is safe */
588 		ipv6_addr_prefix(&prefix_buf,
589 				 (struct in6_addr *)rinfo->prefix,
590 				 rinfo->prefix_len);
591 		prefix = &prefix_buf;
592 	}
593 
594 	rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
595 				dev->ifindex);
596 
597 	if (rt && !lifetime) {
598 		ip6_del_rt(rt);
599 		rt = NULL;
600 	}
601 
602 	if (!rt && lifetime)
603 		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
604 					pref);
605 	else if (rt)
606 		rt->rt6i_flags = RTF_ROUTEINFO |
607 				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
608 
609 	if (rt) {
610 		if (!addrconf_finite_timeout(lifetime)) {
611 			rt->rt6i_flags &= ~RTF_EXPIRES;
612 		} else {
613 			rt->dst.expires = jiffies + HZ * lifetime;
614 			rt->rt6i_flags |= RTF_EXPIRES;
615 		}
616 		dst_release(&rt->dst);
617 	}
618 	return 0;
619 }
620 #endif
621 
622 #define BACKTRACK(__net, saddr)			\
623 do { \
624 	if (rt == __net->ipv6.ip6_null_entry) {	\
625 		struct fib6_node *pn; \
626 		while (1) { \
627 			if (fn->fn_flags & RTN_TL_ROOT) \
628 				goto out; \
629 			pn = fn->parent; \
630 			if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
631 				fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
632 			else \
633 				fn = pn; \
634 			if (fn->fn_flags & RTN_RTINFO) \
635 				goto restart; \
636 		} \
637 	} \
638 } while (0)
639 
640 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
641 					     struct fib6_table *table,
642 					     struct flowi6 *fl6, int flags)
643 {
644 	struct fib6_node *fn;
645 	struct rt6_info *rt;
646 
647 	read_lock_bh(&table->tb6_lock);
648 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
649 restart:
650 	rt = fn->leaf;
651 	rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
652 	BACKTRACK(net, &fl6->saddr);
653 out:
654 	dst_use(&rt->dst, jiffies);
655 	read_unlock_bh(&table->tb6_lock);
656 	return rt;
657 
658 }
659 
660 struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
661 				    int flags)
662 {
663 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
664 }
665 EXPORT_SYMBOL_GPL(ip6_route_lookup);
666 
667 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
668 			    const struct in6_addr *saddr, int oif, int strict)
669 {
670 	struct flowi6 fl6 = {
671 		.flowi6_oif = oif,
672 		.daddr = *daddr,
673 	};
674 	struct dst_entry *dst;
675 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
676 
677 	if (saddr) {
678 		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
679 		flags |= RT6_LOOKUP_F_HAS_SADDR;
680 	}
681 
682 	dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
683 	if (dst->error == 0)
684 		return (struct rt6_info *) dst;
685 
686 	dst_release(dst);
687 
688 	return NULL;
689 }
690 
691 EXPORT_SYMBOL(rt6_lookup);
692 
693 /* ip6_ins_rt is called with FREE table->tb6_lock.
694    It takes new route entry, the addition fails by any reason the
695    route is freed. In any case, if caller does not hold it, it may
696    be destroyed.
697  */
698 
699 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
700 {
701 	int err;
702 	struct fib6_table *table;
703 
704 	table = rt->rt6i_table;
705 	write_lock_bh(&table->tb6_lock);
706 	err = fib6_add(&table->tb6_root, rt, info);
707 	write_unlock_bh(&table->tb6_lock);
708 
709 	return err;
710 }
711 
712 int ip6_ins_rt(struct rt6_info *rt)
713 {
714 	struct nl_info info = {
715 		.nl_net = dev_net(rt->dst.dev),
716 	};
717 	return __ip6_ins_rt(rt, &info);
718 }
719 
720 static struct rt6_info *rt6_alloc_cow(const struct rt6_info *ort,
721 				      const struct in6_addr *daddr,
722 				      const struct in6_addr *saddr)
723 {
724 	struct rt6_info *rt;
725 
726 	/*
727 	 *	Clone the route.
728 	 */
729 
730 	rt = ip6_rt_copy(ort, daddr);
731 
732 	if (rt) {
733 		int attempts = !in_softirq();
734 
735 		if (!(rt->rt6i_flags & RTF_GATEWAY)) {
736 			if (ort->rt6i_dst.plen != 128 &&
737 			    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
738 				rt->rt6i_flags |= RTF_ANYCAST;
739 			rt->rt6i_gateway = *daddr;
740 		}
741 
742 		rt->rt6i_flags |= RTF_CACHE;
743 
744 #ifdef CONFIG_IPV6_SUBTREES
745 		if (rt->rt6i_src.plen && saddr) {
746 			rt->rt6i_src.addr = *saddr;
747 			rt->rt6i_src.plen = 128;
748 		}
749 #endif
750 
751 	retry:
752 		if (rt6_bind_neighbour(rt, rt->dst.dev)) {
753 			struct net *net = dev_net(rt->dst.dev);
754 			int saved_rt_min_interval =
755 				net->ipv6.sysctl.ip6_rt_gc_min_interval;
756 			int saved_rt_elasticity =
757 				net->ipv6.sysctl.ip6_rt_gc_elasticity;
758 
759 			if (attempts-- > 0) {
760 				net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
761 				net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
762 
763 				ip6_dst_gc(&net->ipv6.ip6_dst_ops);
764 
765 				net->ipv6.sysctl.ip6_rt_gc_elasticity =
766 					saved_rt_elasticity;
767 				net->ipv6.sysctl.ip6_rt_gc_min_interval =
768 					saved_rt_min_interval;
769 				goto retry;
770 			}
771 
772 			if (net_ratelimit())
773 				printk(KERN_WARNING
774 				       "ipv6: Neighbour table overflow.\n");
775 			dst_free(&rt->dst);
776 			return NULL;
777 		}
778 	}
779 
780 	return rt;
781 }
782 
783 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
784 					const struct in6_addr *daddr)
785 {
786 	struct rt6_info *rt = ip6_rt_copy(ort, daddr);
787 
788 	if (rt) {
789 		rt->rt6i_flags |= RTF_CACHE;
790 		dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_noref_raw(&ort->dst)));
791 	}
792 	return rt;
793 }
794 
795 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
796 				      struct flowi6 *fl6, int flags)
797 {
798 	struct fib6_node *fn;
799 	struct rt6_info *rt, *nrt;
800 	int strict = 0;
801 	int attempts = 3;
802 	int err;
803 	int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
804 
805 	strict |= flags & RT6_LOOKUP_F_IFACE;
806 
807 relookup:
808 	read_lock_bh(&table->tb6_lock);
809 
810 restart_2:
811 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
812 
813 restart:
814 	rt = rt6_select(fn, oif, strict | reachable);
815 
816 	BACKTRACK(net, &fl6->saddr);
817 	if (rt == net->ipv6.ip6_null_entry ||
818 	    rt->rt6i_flags & RTF_CACHE)
819 		goto out;
820 
821 	dst_hold(&rt->dst);
822 	read_unlock_bh(&table->tb6_lock);
823 
824 	if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
825 		nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
826 	else if (!(rt->dst.flags & DST_HOST))
827 		nrt = rt6_alloc_clone(rt, &fl6->daddr);
828 	else
829 		goto out2;
830 
831 	dst_release(&rt->dst);
832 	rt = nrt ? : net->ipv6.ip6_null_entry;
833 
834 	dst_hold(&rt->dst);
835 	if (nrt) {
836 		err = ip6_ins_rt(nrt);
837 		if (!err)
838 			goto out2;
839 	}
840 
841 	if (--attempts <= 0)
842 		goto out2;
843 
844 	/*
845 	 * Race condition! In the gap, when table->tb6_lock was
846 	 * released someone could insert this route.  Relookup.
847 	 */
848 	dst_release(&rt->dst);
849 	goto relookup;
850 
851 out:
852 	if (reachable) {
853 		reachable = 0;
854 		goto restart_2;
855 	}
856 	dst_hold(&rt->dst);
857 	read_unlock_bh(&table->tb6_lock);
858 out2:
859 	rt->dst.lastuse = jiffies;
860 	rt->dst.__use++;
861 
862 	return rt;
863 }
864 
865 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
866 					    struct flowi6 *fl6, int flags)
867 {
868 	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
869 }
870 
871 void ip6_route_input(struct sk_buff *skb)
872 {
873 	const struct ipv6hdr *iph = ipv6_hdr(skb);
874 	struct net *net = dev_net(skb->dev);
875 	int flags = RT6_LOOKUP_F_HAS_SADDR;
876 	struct flowi6 fl6 = {
877 		.flowi6_iif = skb->dev->ifindex,
878 		.daddr = iph->daddr,
879 		.saddr = iph->saddr,
880 		.flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
881 		.flowi6_mark = skb->mark,
882 		.flowi6_proto = iph->nexthdr,
883 	};
884 
885 	if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
886 		flags |= RT6_LOOKUP_F_IFACE;
887 
888 	skb_dst_set(skb, fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_input));
889 }
890 
891 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
892 					     struct flowi6 *fl6, int flags)
893 {
894 	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
895 }
896 
897 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
898 				    struct flowi6 *fl6)
899 {
900 	int flags = 0;
901 
902 	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
903 		flags |= RT6_LOOKUP_F_IFACE;
904 
905 	if (!ipv6_addr_any(&fl6->saddr))
906 		flags |= RT6_LOOKUP_F_HAS_SADDR;
907 	else if (sk)
908 		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
909 
910 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
911 }
912 
913 EXPORT_SYMBOL(ip6_route_output);
914 
915 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
916 {
917 	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
918 	struct dst_entry *new = NULL;
919 
920 	rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
921 	if (rt) {
922 		memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
923 
924 		new = &rt->dst;
925 
926 		new->__use = 1;
927 		new->input = dst_discard;
928 		new->output = dst_discard;
929 
930 		if (dst_metrics_read_only(&ort->dst))
931 			new->_metrics = ort->dst._metrics;
932 		else
933 			dst_copy_metrics(new, &ort->dst);
934 		rt->rt6i_idev = ort->rt6i_idev;
935 		if (rt->rt6i_idev)
936 			in6_dev_hold(rt->rt6i_idev);
937 		rt->dst.expires = 0;
938 
939 		rt->rt6i_gateway = ort->rt6i_gateway;
940 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
941 		rt->rt6i_metric = 0;
942 
943 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
944 #ifdef CONFIG_IPV6_SUBTREES
945 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
946 #endif
947 
948 		dst_free(new);
949 	}
950 
951 	dst_release(dst_orig);
952 	return new ? new : ERR_PTR(-ENOMEM);
953 }
954 
955 /*
956  *	Destination cache support functions
957  */
958 
959 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
960 {
961 	struct rt6_info *rt;
962 
963 	rt = (struct rt6_info *) dst;
964 
965 	if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
966 		if (rt->rt6i_peer_genid != rt6_peer_genid()) {
967 			if (!rt->rt6i_peer)
968 				rt6_bind_peer(rt, 0);
969 			rt->rt6i_peer_genid = rt6_peer_genid();
970 		}
971 		return dst;
972 	}
973 	return NULL;
974 }
975 
976 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
977 {
978 	struct rt6_info *rt = (struct rt6_info *) dst;
979 
980 	if (rt) {
981 		if (rt->rt6i_flags & RTF_CACHE) {
982 			if (rt6_check_expired(rt)) {
983 				ip6_del_rt(rt);
984 				dst = NULL;
985 			}
986 		} else {
987 			dst_release(dst);
988 			dst = NULL;
989 		}
990 	}
991 	return dst;
992 }
993 
994 static void ip6_link_failure(struct sk_buff *skb)
995 {
996 	struct rt6_info *rt;
997 
998 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
999 
1000 	rt = (struct rt6_info *) skb_dst(skb);
1001 	if (rt) {
1002 		if (rt->rt6i_flags & RTF_CACHE) {
1003 			dst_set_expires(&rt->dst, 0);
1004 			rt->rt6i_flags |= RTF_EXPIRES;
1005 		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1006 			rt->rt6i_node->fn_sernum = -1;
1007 	}
1008 }
1009 
1010 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1011 {
1012 	struct rt6_info *rt6 = (struct rt6_info*)dst;
1013 
1014 	if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1015 		rt6->rt6i_flags |= RTF_MODIFIED;
1016 		if (mtu < IPV6_MIN_MTU) {
1017 			u32 features = dst_metric(dst, RTAX_FEATURES);
1018 			mtu = IPV6_MIN_MTU;
1019 			features |= RTAX_FEATURE_ALLFRAG;
1020 			dst_metric_set(dst, RTAX_FEATURES, features);
1021 		}
1022 		dst_metric_set(dst, RTAX_MTU, mtu);
1023 	}
1024 }
1025 
1026 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1027 {
1028 	struct net_device *dev = dst->dev;
1029 	unsigned int mtu = dst_mtu(dst);
1030 	struct net *net = dev_net(dev);
1031 
1032 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1033 
1034 	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1035 		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1036 
1037 	/*
1038 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1039 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1040 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
1041 	 * rely only on pmtu discovery"
1042 	 */
1043 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1044 		mtu = IPV6_MAXPLEN;
1045 	return mtu;
1046 }
1047 
1048 static unsigned int ip6_mtu(const struct dst_entry *dst)
1049 {
1050 	struct inet6_dev *idev;
1051 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1052 
1053 	if (mtu)
1054 		return mtu;
1055 
1056 	mtu = IPV6_MIN_MTU;
1057 
1058 	rcu_read_lock();
1059 	idev = __in6_dev_get(dst->dev);
1060 	if (idev)
1061 		mtu = idev->cnf.mtu6;
1062 	rcu_read_unlock();
1063 
1064 	return mtu;
1065 }
1066 
1067 static struct dst_entry *icmp6_dst_gc_list;
1068 static DEFINE_SPINLOCK(icmp6_dst_lock);
1069 
1070 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1071 				  struct neighbour *neigh,
1072 				  struct flowi6 *fl6)
1073 {
1074 	struct dst_entry *dst;
1075 	struct rt6_info *rt;
1076 	struct inet6_dev *idev = in6_dev_get(dev);
1077 	struct net *net = dev_net(dev);
1078 
1079 	if (unlikely(!idev))
1080 		return NULL;
1081 
1082 	rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
1083 	if (unlikely(!rt)) {
1084 		in6_dev_put(idev);
1085 		dst = ERR_PTR(-ENOMEM);
1086 		goto out;
1087 	}
1088 
1089 	if (neigh)
1090 		neigh_hold(neigh);
1091 	else {
1092 		neigh = ip6_neigh_lookup(&rt->dst, &fl6->daddr);
1093 		if (IS_ERR(neigh)) {
1094 			in6_dev_put(idev);
1095 			dst_free(&rt->dst);
1096 			return ERR_CAST(neigh);
1097 		}
1098 	}
1099 
1100 	rt->dst.flags |= DST_HOST;
1101 	rt->dst.output  = ip6_output;
1102 	dst_set_neighbour(&rt->dst, neigh);
1103 	atomic_set(&rt->dst.__refcnt, 1);
1104 	rt->rt6i_dst.addr = fl6->daddr;
1105 	rt->rt6i_dst.plen = 128;
1106 	rt->rt6i_idev     = idev;
1107 	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1108 
1109 	spin_lock_bh(&icmp6_dst_lock);
1110 	rt->dst.next = icmp6_dst_gc_list;
1111 	icmp6_dst_gc_list = &rt->dst;
1112 	spin_unlock_bh(&icmp6_dst_lock);
1113 
1114 	fib6_force_start_gc(net);
1115 
1116 	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1117 
1118 out:
1119 	return dst;
1120 }
1121 
1122 int icmp6_dst_gc(void)
1123 {
1124 	struct dst_entry *dst, **pprev;
1125 	int more = 0;
1126 
1127 	spin_lock_bh(&icmp6_dst_lock);
1128 	pprev = &icmp6_dst_gc_list;
1129 
1130 	while ((dst = *pprev) != NULL) {
1131 		if (!atomic_read(&dst->__refcnt)) {
1132 			*pprev = dst->next;
1133 			dst_free(dst);
1134 		} else {
1135 			pprev = &dst->next;
1136 			++more;
1137 		}
1138 	}
1139 
1140 	spin_unlock_bh(&icmp6_dst_lock);
1141 
1142 	return more;
1143 }
1144 
1145 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1146 			    void *arg)
1147 {
1148 	struct dst_entry *dst, **pprev;
1149 
1150 	spin_lock_bh(&icmp6_dst_lock);
1151 	pprev = &icmp6_dst_gc_list;
1152 	while ((dst = *pprev) != NULL) {
1153 		struct rt6_info *rt = (struct rt6_info *) dst;
1154 		if (func(rt, arg)) {
1155 			*pprev = dst->next;
1156 			dst_free(dst);
1157 		} else {
1158 			pprev = &dst->next;
1159 		}
1160 	}
1161 	spin_unlock_bh(&icmp6_dst_lock);
1162 }
1163 
1164 static int ip6_dst_gc(struct dst_ops *ops)
1165 {
1166 	unsigned long now = jiffies;
1167 	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1168 	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1169 	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1170 	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1171 	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1172 	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1173 	int entries;
1174 
1175 	entries = dst_entries_get_fast(ops);
1176 	if (time_after(rt_last_gc + rt_min_interval, now) &&
1177 	    entries <= rt_max_size)
1178 		goto out;
1179 
1180 	net->ipv6.ip6_rt_gc_expire++;
1181 	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1182 	net->ipv6.ip6_rt_last_gc = now;
1183 	entries = dst_entries_get_slow(ops);
1184 	if (entries < ops->gc_thresh)
1185 		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1186 out:
1187 	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1188 	return entries > rt_max_size;
1189 }
1190 
1191 /* Clean host part of a prefix. Not necessary in radix tree,
1192    but results in cleaner routing tables.
1193 
1194    Remove it only when all the things will work!
1195  */
1196 
1197 int ip6_dst_hoplimit(struct dst_entry *dst)
1198 {
1199 	int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1200 	if (hoplimit == 0) {
1201 		struct net_device *dev = dst->dev;
1202 		struct inet6_dev *idev;
1203 
1204 		rcu_read_lock();
1205 		idev = __in6_dev_get(dev);
1206 		if (idev)
1207 			hoplimit = idev->cnf.hop_limit;
1208 		else
1209 			hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1210 		rcu_read_unlock();
1211 	}
1212 	return hoplimit;
1213 }
1214 EXPORT_SYMBOL(ip6_dst_hoplimit);
1215 
1216 /*
1217  *
1218  */
1219 
1220 int ip6_route_add(struct fib6_config *cfg)
1221 {
1222 	int err;
1223 	struct net *net = cfg->fc_nlinfo.nl_net;
1224 	struct rt6_info *rt = NULL;
1225 	struct net_device *dev = NULL;
1226 	struct inet6_dev *idev = NULL;
1227 	struct fib6_table *table;
1228 	int addr_type;
1229 
1230 	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1231 		return -EINVAL;
1232 #ifndef CONFIG_IPV6_SUBTREES
1233 	if (cfg->fc_src_len)
1234 		return -EINVAL;
1235 #endif
1236 	if (cfg->fc_ifindex) {
1237 		err = -ENODEV;
1238 		dev = dev_get_by_index(net, cfg->fc_ifindex);
1239 		if (!dev)
1240 			goto out;
1241 		idev = in6_dev_get(dev);
1242 		if (!idev)
1243 			goto out;
1244 	}
1245 
1246 	if (cfg->fc_metric == 0)
1247 		cfg->fc_metric = IP6_RT_PRIO_USER;
1248 
1249 	err = -ENOBUFS;
1250 	if (cfg->fc_nlinfo.nlh &&
1251 	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1252 		table = fib6_get_table(net, cfg->fc_table);
1253 		if (!table) {
1254 			printk(KERN_WARNING "IPv6: NLM_F_CREATE should be specified when creating new route\n");
1255 			table = fib6_new_table(net, cfg->fc_table);
1256 		}
1257 	} else {
1258 		table = fib6_new_table(net, cfg->fc_table);
1259 	}
1260 
1261 	if (!table)
1262 		goto out;
1263 
1264 	rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT);
1265 
1266 	if (!rt) {
1267 		err = -ENOMEM;
1268 		goto out;
1269 	}
1270 
1271 	rt->dst.obsolete = -1;
1272 	rt->dst.expires = (cfg->fc_flags & RTF_EXPIRES) ?
1273 				jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1274 				0;
1275 
1276 	if (cfg->fc_protocol == RTPROT_UNSPEC)
1277 		cfg->fc_protocol = RTPROT_BOOT;
1278 	rt->rt6i_protocol = cfg->fc_protocol;
1279 
1280 	addr_type = ipv6_addr_type(&cfg->fc_dst);
1281 
1282 	if (addr_type & IPV6_ADDR_MULTICAST)
1283 		rt->dst.input = ip6_mc_input;
1284 	else if (cfg->fc_flags & RTF_LOCAL)
1285 		rt->dst.input = ip6_input;
1286 	else
1287 		rt->dst.input = ip6_forward;
1288 
1289 	rt->dst.output = ip6_output;
1290 
1291 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1292 	rt->rt6i_dst.plen = cfg->fc_dst_len;
1293 	if (rt->rt6i_dst.plen == 128)
1294 	       rt->dst.flags |= DST_HOST;
1295 
1296 	if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1297 		u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1298 		if (!metrics) {
1299 			err = -ENOMEM;
1300 			goto out;
1301 		}
1302 		dst_init_metrics(&rt->dst, metrics, 0);
1303 	}
1304 #ifdef CONFIG_IPV6_SUBTREES
1305 	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1306 	rt->rt6i_src.plen = cfg->fc_src_len;
1307 #endif
1308 
1309 	rt->rt6i_metric = cfg->fc_metric;
1310 
1311 	/* We cannot add true routes via loopback here,
1312 	   they would result in kernel looping; promote them to reject routes
1313 	 */
1314 	if ((cfg->fc_flags & RTF_REJECT) ||
1315 	    (dev && (dev->flags & IFF_LOOPBACK) &&
1316 	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
1317 	     !(cfg->fc_flags & RTF_LOCAL))) {
1318 		/* hold loopback dev/idev if we haven't done so. */
1319 		if (dev != net->loopback_dev) {
1320 			if (dev) {
1321 				dev_put(dev);
1322 				in6_dev_put(idev);
1323 			}
1324 			dev = net->loopback_dev;
1325 			dev_hold(dev);
1326 			idev = in6_dev_get(dev);
1327 			if (!idev) {
1328 				err = -ENODEV;
1329 				goto out;
1330 			}
1331 		}
1332 		rt->dst.output = ip6_pkt_discard_out;
1333 		rt->dst.input = ip6_pkt_discard;
1334 		rt->dst.error = -ENETUNREACH;
1335 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1336 		goto install_route;
1337 	}
1338 
1339 	if (cfg->fc_flags & RTF_GATEWAY) {
1340 		const struct in6_addr *gw_addr;
1341 		int gwa_type;
1342 
1343 		gw_addr = &cfg->fc_gateway;
1344 		rt->rt6i_gateway = *gw_addr;
1345 		gwa_type = ipv6_addr_type(gw_addr);
1346 
1347 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1348 			struct rt6_info *grt;
1349 
1350 			/* IPv6 strictly inhibits using not link-local
1351 			   addresses as nexthop address.
1352 			   Otherwise, router will not able to send redirects.
1353 			   It is very good, but in some (rare!) circumstances
1354 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
1355 			   some exceptions. --ANK
1356 			 */
1357 			err = -EINVAL;
1358 			if (!(gwa_type & IPV6_ADDR_UNICAST))
1359 				goto out;
1360 
1361 			grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1362 
1363 			err = -EHOSTUNREACH;
1364 			if (!grt)
1365 				goto out;
1366 			if (dev) {
1367 				if (dev != grt->dst.dev) {
1368 					dst_release(&grt->dst);
1369 					goto out;
1370 				}
1371 			} else {
1372 				dev = grt->dst.dev;
1373 				idev = grt->rt6i_idev;
1374 				dev_hold(dev);
1375 				in6_dev_hold(grt->rt6i_idev);
1376 			}
1377 			if (!(grt->rt6i_flags & RTF_GATEWAY))
1378 				err = 0;
1379 			dst_release(&grt->dst);
1380 
1381 			if (err)
1382 				goto out;
1383 		}
1384 		err = -EINVAL;
1385 		if (!dev || (dev->flags & IFF_LOOPBACK))
1386 			goto out;
1387 	}
1388 
1389 	err = -ENODEV;
1390 	if (!dev)
1391 		goto out;
1392 
1393 	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1394 		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1395 			err = -EINVAL;
1396 			goto out;
1397 		}
1398 		rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1399 		rt->rt6i_prefsrc.plen = 128;
1400 	} else
1401 		rt->rt6i_prefsrc.plen = 0;
1402 
1403 	if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1404 		err = rt6_bind_neighbour(rt, dev);
1405 		if (err)
1406 			goto out;
1407 	}
1408 
1409 	rt->rt6i_flags = cfg->fc_flags;
1410 
1411 install_route:
1412 	if (cfg->fc_mx) {
1413 		struct nlattr *nla;
1414 		int remaining;
1415 
1416 		nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1417 			int type = nla_type(nla);
1418 
1419 			if (type) {
1420 				if (type > RTAX_MAX) {
1421 					err = -EINVAL;
1422 					goto out;
1423 				}
1424 
1425 				dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1426 			}
1427 		}
1428 	}
1429 
1430 	rt->dst.dev = dev;
1431 	rt->rt6i_idev = idev;
1432 	rt->rt6i_table = table;
1433 
1434 	cfg->fc_nlinfo.nl_net = dev_net(dev);
1435 
1436 	return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1437 
1438 out:
1439 	if (dev)
1440 		dev_put(dev);
1441 	if (idev)
1442 		in6_dev_put(idev);
1443 	if (rt)
1444 		dst_free(&rt->dst);
1445 	return err;
1446 }
1447 
1448 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1449 {
1450 	int err;
1451 	struct fib6_table *table;
1452 	struct net *net = dev_net(rt->dst.dev);
1453 
1454 	if (rt == net->ipv6.ip6_null_entry)
1455 		return -ENOENT;
1456 
1457 	table = rt->rt6i_table;
1458 	write_lock_bh(&table->tb6_lock);
1459 
1460 	err = fib6_del(rt, info);
1461 	dst_release(&rt->dst);
1462 
1463 	write_unlock_bh(&table->tb6_lock);
1464 
1465 	return err;
1466 }
1467 
1468 int ip6_del_rt(struct rt6_info *rt)
1469 {
1470 	struct nl_info info = {
1471 		.nl_net = dev_net(rt->dst.dev),
1472 	};
1473 	return __ip6_del_rt(rt, &info);
1474 }
1475 
1476 static int ip6_route_del(struct fib6_config *cfg)
1477 {
1478 	struct fib6_table *table;
1479 	struct fib6_node *fn;
1480 	struct rt6_info *rt;
1481 	int err = -ESRCH;
1482 
1483 	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1484 	if (!table)
1485 		return err;
1486 
1487 	read_lock_bh(&table->tb6_lock);
1488 
1489 	fn = fib6_locate(&table->tb6_root,
1490 			 &cfg->fc_dst, cfg->fc_dst_len,
1491 			 &cfg->fc_src, cfg->fc_src_len);
1492 
1493 	if (fn) {
1494 		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1495 			if (cfg->fc_ifindex &&
1496 			    (!rt->dst.dev ||
1497 			     rt->dst.dev->ifindex != cfg->fc_ifindex))
1498 				continue;
1499 			if (cfg->fc_flags & RTF_GATEWAY &&
1500 			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1501 				continue;
1502 			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1503 				continue;
1504 			dst_hold(&rt->dst);
1505 			read_unlock_bh(&table->tb6_lock);
1506 
1507 			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1508 		}
1509 	}
1510 	read_unlock_bh(&table->tb6_lock);
1511 
1512 	return err;
1513 }
1514 
1515 /*
1516  *	Handle redirects
1517  */
1518 struct ip6rd_flowi {
1519 	struct flowi6 fl6;
1520 	struct in6_addr gateway;
1521 };
1522 
1523 static struct rt6_info *__ip6_route_redirect(struct net *net,
1524 					     struct fib6_table *table,
1525 					     struct flowi6 *fl6,
1526 					     int flags)
1527 {
1528 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1529 	struct rt6_info *rt;
1530 	struct fib6_node *fn;
1531 
1532 	/*
1533 	 * Get the "current" route for this destination and
1534 	 * check if the redirect has come from approriate router.
1535 	 *
1536 	 * RFC 2461 specifies that redirects should only be
1537 	 * accepted if they come from the nexthop to the target.
1538 	 * Due to the way the routes are chosen, this notion
1539 	 * is a bit fuzzy and one might need to check all possible
1540 	 * routes.
1541 	 */
1542 
1543 	read_lock_bh(&table->tb6_lock);
1544 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1545 restart:
1546 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1547 		/*
1548 		 * Current route is on-link; redirect is always invalid.
1549 		 *
1550 		 * Seems, previous statement is not true. It could
1551 		 * be node, which looks for us as on-link (f.e. proxy ndisc)
1552 		 * But then router serving it might decide, that we should
1553 		 * know truth 8)8) --ANK (980726).
1554 		 */
1555 		if (rt6_check_expired(rt))
1556 			continue;
1557 		if (!(rt->rt6i_flags & RTF_GATEWAY))
1558 			continue;
1559 		if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1560 			continue;
1561 		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1562 			continue;
1563 		break;
1564 	}
1565 
1566 	if (!rt)
1567 		rt = net->ipv6.ip6_null_entry;
1568 	BACKTRACK(net, &fl6->saddr);
1569 out:
1570 	dst_hold(&rt->dst);
1571 
1572 	read_unlock_bh(&table->tb6_lock);
1573 
1574 	return rt;
1575 };
1576 
1577 static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1578 					   const struct in6_addr *src,
1579 					   const struct in6_addr *gateway,
1580 					   struct net_device *dev)
1581 {
1582 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1583 	struct net *net = dev_net(dev);
1584 	struct ip6rd_flowi rdfl = {
1585 		.fl6 = {
1586 			.flowi6_oif = dev->ifindex,
1587 			.daddr = *dest,
1588 			.saddr = *src,
1589 		},
1590 	};
1591 
1592 	rdfl.gateway = *gateway;
1593 
1594 	if (rt6_need_strict(dest))
1595 		flags |= RT6_LOOKUP_F_IFACE;
1596 
1597 	return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
1598 						   flags, __ip6_route_redirect);
1599 }
1600 
1601 void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1602 		  const struct in6_addr *saddr,
1603 		  struct neighbour *neigh, u8 *lladdr, int on_link)
1604 {
1605 	struct rt6_info *rt, *nrt = NULL;
1606 	struct netevent_redirect netevent;
1607 	struct net *net = dev_net(neigh->dev);
1608 
1609 	rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1610 
1611 	if (rt == net->ipv6.ip6_null_entry) {
1612 		if (net_ratelimit())
1613 			printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1614 			       "for redirect target\n");
1615 		goto out;
1616 	}
1617 
1618 	/*
1619 	 *	We have finally decided to accept it.
1620 	 */
1621 
1622 	neigh_update(neigh, lladdr, NUD_STALE,
1623 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
1624 		     NEIGH_UPDATE_F_OVERRIDE|
1625 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1626 				     NEIGH_UPDATE_F_ISROUTER))
1627 		     );
1628 
1629 	/*
1630 	 * Redirect received -> path was valid.
1631 	 * Look, redirects are sent only in response to data packets,
1632 	 * so that this nexthop apparently is reachable. --ANK
1633 	 */
1634 	dst_confirm(&rt->dst);
1635 
1636 	/* Duplicate redirect: silently ignore. */
1637 	if (neigh == dst_get_neighbour_noref_raw(&rt->dst))
1638 		goto out;
1639 
1640 	nrt = ip6_rt_copy(rt, dest);
1641 	if (!nrt)
1642 		goto out;
1643 
1644 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1645 	if (on_link)
1646 		nrt->rt6i_flags &= ~RTF_GATEWAY;
1647 
1648 	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1649 	dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
1650 
1651 	if (ip6_ins_rt(nrt))
1652 		goto out;
1653 
1654 	netevent.old = &rt->dst;
1655 	netevent.new = &nrt->dst;
1656 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1657 
1658 	if (rt->rt6i_flags & RTF_CACHE) {
1659 		ip6_del_rt(rt);
1660 		return;
1661 	}
1662 
1663 out:
1664 	dst_release(&rt->dst);
1665 }
1666 
1667 /*
1668  *	Handle ICMP "packet too big" messages
1669  *	i.e. Path MTU discovery
1670  */
1671 
1672 static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
1673 			     struct net *net, u32 pmtu, int ifindex)
1674 {
1675 	struct rt6_info *rt, *nrt;
1676 	int allfrag = 0;
1677 again:
1678 	rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1679 	if (!rt)
1680 		return;
1681 
1682 	if (rt6_check_expired(rt)) {
1683 		ip6_del_rt(rt);
1684 		goto again;
1685 	}
1686 
1687 	if (pmtu >= dst_mtu(&rt->dst))
1688 		goto out;
1689 
1690 	if (pmtu < IPV6_MIN_MTU) {
1691 		/*
1692 		 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1693 		 * MTU (1280) and a fragment header should always be included
1694 		 * after a node receiving Too Big message reporting PMTU is
1695 		 * less than the IPv6 Minimum Link MTU.
1696 		 */
1697 		pmtu = IPV6_MIN_MTU;
1698 		allfrag = 1;
1699 	}
1700 
1701 	/* New mtu received -> path was valid.
1702 	   They are sent only in response to data packets,
1703 	   so that this nexthop apparently is reachable. --ANK
1704 	 */
1705 	dst_confirm(&rt->dst);
1706 
1707 	/* Host route. If it is static, it would be better
1708 	   not to override it, but add new one, so that
1709 	   when cache entry will expire old pmtu
1710 	   would return automatically.
1711 	 */
1712 	if (rt->rt6i_flags & RTF_CACHE) {
1713 		dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1714 		if (allfrag) {
1715 			u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1716 			features |= RTAX_FEATURE_ALLFRAG;
1717 			dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1718 		}
1719 		dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1720 		rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1721 		goto out;
1722 	}
1723 
1724 	/* Network route.
1725 	   Two cases are possible:
1726 	   1. It is connected route. Action: COW
1727 	   2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1728 	 */
1729 	if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
1730 		nrt = rt6_alloc_cow(rt, daddr, saddr);
1731 	else
1732 		nrt = rt6_alloc_clone(rt, daddr);
1733 
1734 	if (nrt) {
1735 		dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1736 		if (allfrag) {
1737 			u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1738 			features |= RTAX_FEATURE_ALLFRAG;
1739 			dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1740 		}
1741 
1742 		/* According to RFC 1981, detecting PMTU increase shouldn't be
1743 		 * happened within 5 mins, the recommended timer is 10 mins.
1744 		 * Here this route expiration time is set to ip6_rt_mtu_expires
1745 		 * which is 10 mins. After 10 mins the decreased pmtu is expired
1746 		 * and detecting PMTU increase will be automatically happened.
1747 		 */
1748 		dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1749 		nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1750 
1751 		ip6_ins_rt(nrt);
1752 	}
1753 out:
1754 	dst_release(&rt->dst);
1755 }
1756 
1757 void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
1758 			struct net_device *dev, u32 pmtu)
1759 {
1760 	struct net *net = dev_net(dev);
1761 
1762 	/*
1763 	 * RFC 1981 states that a node "MUST reduce the size of the packets it
1764 	 * is sending along the path" that caused the Packet Too Big message.
1765 	 * Since it's not possible in the general case to determine which
1766 	 * interface was used to send the original packet, we update the MTU
1767 	 * on the interface that will be used to send future packets. We also
1768 	 * update the MTU on the interface that received the Packet Too Big in
1769 	 * case the original packet was forced out that interface with
1770 	 * SO_BINDTODEVICE or similar. This is the next best thing to the
1771 	 * correct behaviour, which would be to update the MTU on all
1772 	 * interfaces.
1773 	 */
1774 	rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1775 	rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1776 }
1777 
1778 /*
1779  *	Misc support functions
1780  */
1781 
1782 static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
1783 				    const struct in6_addr *dest)
1784 {
1785 	struct net *net = dev_net(ort->dst.dev);
1786 	struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
1787 					    ort->dst.dev, 0);
1788 
1789 	if (rt) {
1790 		rt->dst.input = ort->dst.input;
1791 		rt->dst.output = ort->dst.output;
1792 		rt->dst.flags |= DST_HOST;
1793 
1794 		rt->rt6i_dst.addr = *dest;
1795 		rt->rt6i_dst.plen = 128;
1796 		dst_copy_metrics(&rt->dst, &ort->dst);
1797 		rt->dst.error = ort->dst.error;
1798 		rt->rt6i_idev = ort->rt6i_idev;
1799 		if (rt->rt6i_idev)
1800 			in6_dev_hold(rt->rt6i_idev);
1801 		rt->dst.lastuse = jiffies;
1802 		rt->dst.expires = 0;
1803 
1804 		rt->rt6i_gateway = ort->rt6i_gateway;
1805 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1806 		rt->rt6i_metric = 0;
1807 
1808 #ifdef CONFIG_IPV6_SUBTREES
1809 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1810 #endif
1811 		memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1812 		rt->rt6i_table = ort->rt6i_table;
1813 	}
1814 	return rt;
1815 }
1816 
1817 #ifdef CONFIG_IPV6_ROUTE_INFO
1818 static struct rt6_info *rt6_get_route_info(struct net *net,
1819 					   const struct in6_addr *prefix, int prefixlen,
1820 					   const struct in6_addr *gwaddr, int ifindex)
1821 {
1822 	struct fib6_node *fn;
1823 	struct rt6_info *rt = NULL;
1824 	struct fib6_table *table;
1825 
1826 	table = fib6_get_table(net, RT6_TABLE_INFO);
1827 	if (!table)
1828 		return NULL;
1829 
1830 	write_lock_bh(&table->tb6_lock);
1831 	fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1832 	if (!fn)
1833 		goto out;
1834 
1835 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1836 		if (rt->dst.dev->ifindex != ifindex)
1837 			continue;
1838 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1839 			continue;
1840 		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1841 			continue;
1842 		dst_hold(&rt->dst);
1843 		break;
1844 	}
1845 out:
1846 	write_unlock_bh(&table->tb6_lock);
1847 	return rt;
1848 }
1849 
1850 static struct rt6_info *rt6_add_route_info(struct net *net,
1851 					   const struct in6_addr *prefix, int prefixlen,
1852 					   const struct in6_addr *gwaddr, int ifindex,
1853 					   unsigned pref)
1854 {
1855 	struct fib6_config cfg = {
1856 		.fc_table	= RT6_TABLE_INFO,
1857 		.fc_metric	= IP6_RT_PRIO_USER,
1858 		.fc_ifindex	= ifindex,
1859 		.fc_dst_len	= prefixlen,
1860 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1861 				  RTF_UP | RTF_PREF(pref),
1862 		.fc_nlinfo.pid = 0,
1863 		.fc_nlinfo.nlh = NULL,
1864 		.fc_nlinfo.nl_net = net,
1865 	};
1866 
1867 	cfg.fc_dst = *prefix;
1868 	cfg.fc_gateway = *gwaddr;
1869 
1870 	/* We should treat it as a default route if prefix length is 0. */
1871 	if (!prefixlen)
1872 		cfg.fc_flags |= RTF_DEFAULT;
1873 
1874 	ip6_route_add(&cfg);
1875 
1876 	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1877 }
1878 #endif
1879 
1880 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1881 {
1882 	struct rt6_info *rt;
1883 	struct fib6_table *table;
1884 
1885 	table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1886 	if (!table)
1887 		return NULL;
1888 
1889 	write_lock_bh(&table->tb6_lock);
1890 	for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1891 		if (dev == rt->dst.dev &&
1892 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1893 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
1894 			break;
1895 	}
1896 	if (rt)
1897 		dst_hold(&rt->dst);
1898 	write_unlock_bh(&table->tb6_lock);
1899 	return rt;
1900 }
1901 
1902 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1903 				     struct net_device *dev,
1904 				     unsigned int pref)
1905 {
1906 	struct fib6_config cfg = {
1907 		.fc_table	= RT6_TABLE_DFLT,
1908 		.fc_metric	= IP6_RT_PRIO_USER,
1909 		.fc_ifindex	= dev->ifindex,
1910 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1911 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1912 		.fc_nlinfo.pid = 0,
1913 		.fc_nlinfo.nlh = NULL,
1914 		.fc_nlinfo.nl_net = dev_net(dev),
1915 	};
1916 
1917 	cfg.fc_gateway = *gwaddr;
1918 
1919 	ip6_route_add(&cfg);
1920 
1921 	return rt6_get_dflt_router(gwaddr, dev);
1922 }
1923 
1924 void rt6_purge_dflt_routers(struct net *net)
1925 {
1926 	struct rt6_info *rt;
1927 	struct fib6_table *table;
1928 
1929 	/* NOTE: Keep consistent with rt6_get_dflt_router */
1930 	table = fib6_get_table(net, RT6_TABLE_DFLT);
1931 	if (!table)
1932 		return;
1933 
1934 restart:
1935 	read_lock_bh(&table->tb6_lock);
1936 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1937 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1938 			dst_hold(&rt->dst);
1939 			read_unlock_bh(&table->tb6_lock);
1940 			ip6_del_rt(rt);
1941 			goto restart;
1942 		}
1943 	}
1944 	read_unlock_bh(&table->tb6_lock);
1945 }
1946 
1947 static void rtmsg_to_fib6_config(struct net *net,
1948 				 struct in6_rtmsg *rtmsg,
1949 				 struct fib6_config *cfg)
1950 {
1951 	memset(cfg, 0, sizeof(*cfg));
1952 
1953 	cfg->fc_table = RT6_TABLE_MAIN;
1954 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1955 	cfg->fc_metric = rtmsg->rtmsg_metric;
1956 	cfg->fc_expires = rtmsg->rtmsg_info;
1957 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1958 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
1959 	cfg->fc_flags = rtmsg->rtmsg_flags;
1960 
1961 	cfg->fc_nlinfo.nl_net = net;
1962 
1963 	cfg->fc_dst = rtmsg->rtmsg_dst;
1964 	cfg->fc_src = rtmsg->rtmsg_src;
1965 	cfg->fc_gateway = rtmsg->rtmsg_gateway;
1966 }
1967 
1968 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1969 {
1970 	struct fib6_config cfg;
1971 	struct in6_rtmsg rtmsg;
1972 	int err;
1973 
1974 	switch(cmd) {
1975 	case SIOCADDRT:		/* Add a route */
1976 	case SIOCDELRT:		/* Delete a route */
1977 		if (!capable(CAP_NET_ADMIN))
1978 			return -EPERM;
1979 		err = copy_from_user(&rtmsg, arg,
1980 				     sizeof(struct in6_rtmsg));
1981 		if (err)
1982 			return -EFAULT;
1983 
1984 		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1985 
1986 		rtnl_lock();
1987 		switch (cmd) {
1988 		case SIOCADDRT:
1989 			err = ip6_route_add(&cfg);
1990 			break;
1991 		case SIOCDELRT:
1992 			err = ip6_route_del(&cfg);
1993 			break;
1994 		default:
1995 			err = -EINVAL;
1996 		}
1997 		rtnl_unlock();
1998 
1999 		return err;
2000 	}
2001 
2002 	return -EINVAL;
2003 }
2004 
2005 /*
2006  *	Drop the packet on the floor
2007  */
2008 
2009 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2010 {
2011 	int type;
2012 	struct dst_entry *dst = skb_dst(skb);
2013 	switch (ipstats_mib_noroutes) {
2014 	case IPSTATS_MIB_INNOROUTES:
2015 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2016 		if (type == IPV6_ADDR_ANY) {
2017 			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2018 				      IPSTATS_MIB_INADDRERRORS);
2019 			break;
2020 		}
2021 		/* FALLTHROUGH */
2022 	case IPSTATS_MIB_OUTNOROUTES:
2023 		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2024 			      ipstats_mib_noroutes);
2025 		break;
2026 	}
2027 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2028 	kfree_skb(skb);
2029 	return 0;
2030 }
2031 
2032 static int ip6_pkt_discard(struct sk_buff *skb)
2033 {
2034 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2035 }
2036 
2037 static int ip6_pkt_discard_out(struct sk_buff *skb)
2038 {
2039 	skb->dev = skb_dst(skb)->dev;
2040 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2041 }
2042 
2043 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2044 
2045 static int ip6_pkt_prohibit(struct sk_buff *skb)
2046 {
2047 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2048 }
2049 
2050 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2051 {
2052 	skb->dev = skb_dst(skb)->dev;
2053 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2054 }
2055 
2056 #endif
2057 
2058 /*
2059  *	Allocate a dst for local (unicast / anycast) address.
2060  */
2061 
2062 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2063 				    const struct in6_addr *addr,
2064 				    bool anycast)
2065 {
2066 	struct net *net = dev_net(idev->dev);
2067 	struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
2068 					    net->loopback_dev, 0);
2069 	int err;
2070 
2071 	if (!rt) {
2072 		if (net_ratelimit())
2073 			pr_warning("IPv6:  Maximum number of routes reached,"
2074 				   " consider increasing route/max_size.\n");
2075 		return ERR_PTR(-ENOMEM);
2076 	}
2077 
2078 	in6_dev_hold(idev);
2079 
2080 	rt->dst.flags |= DST_HOST;
2081 	rt->dst.input = ip6_input;
2082 	rt->dst.output = ip6_output;
2083 	rt->rt6i_idev = idev;
2084 	rt->dst.obsolete = -1;
2085 
2086 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2087 	if (anycast)
2088 		rt->rt6i_flags |= RTF_ANYCAST;
2089 	else
2090 		rt->rt6i_flags |= RTF_LOCAL;
2091 	err = rt6_bind_neighbour(rt, rt->dst.dev);
2092 	if (err) {
2093 		dst_free(&rt->dst);
2094 		return ERR_PTR(err);
2095 	}
2096 
2097 	rt->rt6i_dst.addr = *addr;
2098 	rt->rt6i_dst.plen = 128;
2099 	rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2100 
2101 	atomic_set(&rt->dst.__refcnt, 1);
2102 
2103 	return rt;
2104 }
2105 
2106 int ip6_route_get_saddr(struct net *net,
2107 			struct rt6_info *rt,
2108 			const struct in6_addr *daddr,
2109 			unsigned int prefs,
2110 			struct in6_addr *saddr)
2111 {
2112 	struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2113 	int err = 0;
2114 	if (rt->rt6i_prefsrc.plen)
2115 		*saddr = rt->rt6i_prefsrc.addr;
2116 	else
2117 		err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2118 					 daddr, prefs, saddr);
2119 	return err;
2120 }
2121 
2122 /* remove deleted ip from prefsrc entries */
2123 struct arg_dev_net_ip {
2124 	struct net_device *dev;
2125 	struct net *net;
2126 	struct in6_addr *addr;
2127 };
2128 
2129 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2130 {
2131 	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2132 	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2133 	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2134 
2135 	if (((void *)rt->dst.dev == dev || !dev) &&
2136 	    rt != net->ipv6.ip6_null_entry &&
2137 	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2138 		/* remove prefsrc entry */
2139 		rt->rt6i_prefsrc.plen = 0;
2140 	}
2141 	return 0;
2142 }
2143 
2144 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2145 {
2146 	struct net *net = dev_net(ifp->idev->dev);
2147 	struct arg_dev_net_ip adni = {
2148 		.dev = ifp->idev->dev,
2149 		.net = net,
2150 		.addr = &ifp->addr,
2151 	};
2152 	fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2153 }
2154 
2155 struct arg_dev_net {
2156 	struct net_device *dev;
2157 	struct net *net;
2158 };
2159 
2160 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2161 {
2162 	const struct arg_dev_net *adn = arg;
2163 	const struct net_device *dev = adn->dev;
2164 
2165 	if ((rt->dst.dev == dev || !dev) &&
2166 	    rt != adn->net->ipv6.ip6_null_entry)
2167 		return -1;
2168 
2169 	return 0;
2170 }
2171 
2172 void rt6_ifdown(struct net *net, struct net_device *dev)
2173 {
2174 	struct arg_dev_net adn = {
2175 		.dev = dev,
2176 		.net = net,
2177 	};
2178 
2179 	fib6_clean_all(net, fib6_ifdown, 0, &adn);
2180 	icmp6_clean_all(fib6_ifdown, &adn);
2181 }
2182 
2183 struct rt6_mtu_change_arg
2184 {
2185 	struct net_device *dev;
2186 	unsigned mtu;
2187 };
2188 
2189 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2190 {
2191 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2192 	struct inet6_dev *idev;
2193 
2194 	/* In IPv6 pmtu discovery is not optional,
2195 	   so that RTAX_MTU lock cannot disable it.
2196 	   We still use this lock to block changes
2197 	   caused by addrconf/ndisc.
2198 	*/
2199 
2200 	idev = __in6_dev_get(arg->dev);
2201 	if (!idev)
2202 		return 0;
2203 
2204 	/* For administrative MTU increase, there is no way to discover
2205 	   IPv6 PMTU increase, so PMTU increase should be updated here.
2206 	   Since RFC 1981 doesn't include administrative MTU increase
2207 	   update PMTU increase is a MUST. (i.e. jumbo frame)
2208 	 */
2209 	/*
2210 	   If new MTU is less than route PMTU, this new MTU will be the
2211 	   lowest MTU in the path, update the route PMTU to reflect PMTU
2212 	   decreases; if new MTU is greater than route PMTU, and the
2213 	   old MTU is the lowest MTU in the path, update the route PMTU
2214 	   to reflect the increase. In this case if the other nodes' MTU
2215 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
2216 	   PMTU discouvery.
2217 	 */
2218 	if (rt->dst.dev == arg->dev &&
2219 	    !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2220 	    (dst_mtu(&rt->dst) >= arg->mtu ||
2221 	     (dst_mtu(&rt->dst) < arg->mtu &&
2222 	      dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2223 		dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2224 	}
2225 	return 0;
2226 }
2227 
2228 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2229 {
2230 	struct rt6_mtu_change_arg arg = {
2231 		.dev = dev,
2232 		.mtu = mtu,
2233 	};
2234 
2235 	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2236 }
2237 
2238 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2239 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2240 	[RTA_OIF]               = { .type = NLA_U32 },
2241 	[RTA_IIF]		= { .type = NLA_U32 },
2242 	[RTA_PRIORITY]          = { .type = NLA_U32 },
2243 	[RTA_METRICS]           = { .type = NLA_NESTED },
2244 };
2245 
2246 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2247 			      struct fib6_config *cfg)
2248 {
2249 	struct rtmsg *rtm;
2250 	struct nlattr *tb[RTA_MAX+1];
2251 	int err;
2252 
2253 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2254 	if (err < 0)
2255 		goto errout;
2256 
2257 	err = -EINVAL;
2258 	rtm = nlmsg_data(nlh);
2259 	memset(cfg, 0, sizeof(*cfg));
2260 
2261 	cfg->fc_table = rtm->rtm_table;
2262 	cfg->fc_dst_len = rtm->rtm_dst_len;
2263 	cfg->fc_src_len = rtm->rtm_src_len;
2264 	cfg->fc_flags = RTF_UP;
2265 	cfg->fc_protocol = rtm->rtm_protocol;
2266 
2267 	if (rtm->rtm_type == RTN_UNREACHABLE)
2268 		cfg->fc_flags |= RTF_REJECT;
2269 
2270 	if (rtm->rtm_type == RTN_LOCAL)
2271 		cfg->fc_flags |= RTF_LOCAL;
2272 
2273 	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2274 	cfg->fc_nlinfo.nlh = nlh;
2275 	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2276 
2277 	if (tb[RTA_GATEWAY]) {
2278 		nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2279 		cfg->fc_flags |= RTF_GATEWAY;
2280 	}
2281 
2282 	if (tb[RTA_DST]) {
2283 		int plen = (rtm->rtm_dst_len + 7) >> 3;
2284 
2285 		if (nla_len(tb[RTA_DST]) < plen)
2286 			goto errout;
2287 
2288 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2289 	}
2290 
2291 	if (tb[RTA_SRC]) {
2292 		int plen = (rtm->rtm_src_len + 7) >> 3;
2293 
2294 		if (nla_len(tb[RTA_SRC]) < plen)
2295 			goto errout;
2296 
2297 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2298 	}
2299 
2300 	if (tb[RTA_PREFSRC])
2301 		nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2302 
2303 	if (tb[RTA_OIF])
2304 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2305 
2306 	if (tb[RTA_PRIORITY])
2307 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2308 
2309 	if (tb[RTA_METRICS]) {
2310 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2311 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2312 	}
2313 
2314 	if (tb[RTA_TABLE])
2315 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2316 
2317 	err = 0;
2318 errout:
2319 	return err;
2320 }
2321 
2322 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2323 {
2324 	struct fib6_config cfg;
2325 	int err;
2326 
2327 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2328 	if (err < 0)
2329 		return err;
2330 
2331 	return ip6_route_del(&cfg);
2332 }
2333 
2334 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2335 {
2336 	struct fib6_config cfg;
2337 	int err;
2338 
2339 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2340 	if (err < 0)
2341 		return err;
2342 
2343 	return ip6_route_add(&cfg);
2344 }
2345 
2346 static inline size_t rt6_nlmsg_size(void)
2347 {
2348 	return NLMSG_ALIGN(sizeof(struct rtmsg))
2349 	       + nla_total_size(16) /* RTA_SRC */
2350 	       + nla_total_size(16) /* RTA_DST */
2351 	       + nla_total_size(16) /* RTA_GATEWAY */
2352 	       + nla_total_size(16) /* RTA_PREFSRC */
2353 	       + nla_total_size(4) /* RTA_TABLE */
2354 	       + nla_total_size(4) /* RTA_IIF */
2355 	       + nla_total_size(4) /* RTA_OIF */
2356 	       + nla_total_size(4) /* RTA_PRIORITY */
2357 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2358 	       + nla_total_size(sizeof(struct rta_cacheinfo));
2359 }
2360 
2361 static int rt6_fill_node(struct net *net,
2362 			 struct sk_buff *skb, struct rt6_info *rt,
2363 			 struct in6_addr *dst, struct in6_addr *src,
2364 			 int iif, int type, u32 pid, u32 seq,
2365 			 int prefix, int nowait, unsigned int flags)
2366 {
2367 	const struct inet_peer *peer;
2368 	struct rtmsg *rtm;
2369 	struct nlmsghdr *nlh;
2370 	long expires;
2371 	u32 table;
2372 	struct neighbour *n;
2373 	u32 ts, tsage;
2374 
2375 	if (prefix) {	/* user wants prefix routes only */
2376 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2377 			/* success since this is not a prefix route */
2378 			return 1;
2379 		}
2380 	}
2381 
2382 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2383 	if (!nlh)
2384 		return -EMSGSIZE;
2385 
2386 	rtm = nlmsg_data(nlh);
2387 	rtm->rtm_family = AF_INET6;
2388 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
2389 	rtm->rtm_src_len = rt->rt6i_src.plen;
2390 	rtm->rtm_tos = 0;
2391 	if (rt->rt6i_table)
2392 		table = rt->rt6i_table->tb6_id;
2393 	else
2394 		table = RT6_TABLE_UNSPEC;
2395 	rtm->rtm_table = table;
2396 	NLA_PUT_U32(skb, RTA_TABLE, table);
2397 	if (rt->rt6i_flags & RTF_REJECT)
2398 		rtm->rtm_type = RTN_UNREACHABLE;
2399 	else if (rt->rt6i_flags & RTF_LOCAL)
2400 		rtm->rtm_type = RTN_LOCAL;
2401 	else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2402 		rtm->rtm_type = RTN_LOCAL;
2403 	else
2404 		rtm->rtm_type = RTN_UNICAST;
2405 	rtm->rtm_flags = 0;
2406 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2407 	rtm->rtm_protocol = rt->rt6i_protocol;
2408 	if (rt->rt6i_flags & RTF_DYNAMIC)
2409 		rtm->rtm_protocol = RTPROT_REDIRECT;
2410 	else if (rt->rt6i_flags & RTF_ADDRCONF)
2411 		rtm->rtm_protocol = RTPROT_KERNEL;
2412 	else if (rt->rt6i_flags & RTF_DEFAULT)
2413 		rtm->rtm_protocol = RTPROT_RA;
2414 
2415 	if (rt->rt6i_flags & RTF_CACHE)
2416 		rtm->rtm_flags |= RTM_F_CLONED;
2417 
2418 	if (dst) {
2419 		NLA_PUT(skb, RTA_DST, 16, dst);
2420 		rtm->rtm_dst_len = 128;
2421 	} else if (rtm->rtm_dst_len)
2422 		NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2423 #ifdef CONFIG_IPV6_SUBTREES
2424 	if (src) {
2425 		NLA_PUT(skb, RTA_SRC, 16, src);
2426 		rtm->rtm_src_len = 128;
2427 	} else if (rtm->rtm_src_len)
2428 		NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2429 #endif
2430 	if (iif) {
2431 #ifdef CONFIG_IPV6_MROUTE
2432 		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2433 			int err = ip6mr_get_route(net, skb, rtm, nowait);
2434 			if (err <= 0) {
2435 				if (!nowait) {
2436 					if (err == 0)
2437 						return 0;
2438 					goto nla_put_failure;
2439 				} else {
2440 					if (err == -EMSGSIZE)
2441 						goto nla_put_failure;
2442 				}
2443 			}
2444 		} else
2445 #endif
2446 			NLA_PUT_U32(skb, RTA_IIF, iif);
2447 	} else if (dst) {
2448 		struct in6_addr saddr_buf;
2449 		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0)
2450 			NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2451 	}
2452 
2453 	if (rt->rt6i_prefsrc.plen) {
2454 		struct in6_addr saddr_buf;
2455 		saddr_buf = rt->rt6i_prefsrc.addr;
2456 		NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2457 	}
2458 
2459 	if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2460 		goto nla_put_failure;
2461 
2462 	rcu_read_lock();
2463 	n = dst_get_neighbour_noref(&rt->dst);
2464 	if (n)
2465 		NLA_PUT(skb, RTA_GATEWAY, 16, &n->primary_key);
2466 	rcu_read_unlock();
2467 
2468 	if (rt->dst.dev)
2469 		NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex);
2470 
2471 	NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2472 
2473 	if (!(rt->rt6i_flags & RTF_EXPIRES))
2474 		expires = 0;
2475 	else if (rt->dst.expires - jiffies < INT_MAX)
2476 		expires = rt->dst.expires - jiffies;
2477 	else
2478 		expires = INT_MAX;
2479 
2480 	peer = rt->rt6i_peer;
2481 	ts = tsage = 0;
2482 	if (peer && peer->tcp_ts_stamp) {
2483 		ts = peer->tcp_ts;
2484 		tsage = get_seconds() - peer->tcp_ts_stamp;
2485 	}
2486 
2487 	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, ts, tsage,
2488 			       expires, rt->dst.error) < 0)
2489 		goto nla_put_failure;
2490 
2491 	return nlmsg_end(skb, nlh);
2492 
2493 nla_put_failure:
2494 	nlmsg_cancel(skb, nlh);
2495 	return -EMSGSIZE;
2496 }
2497 
2498 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2499 {
2500 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2501 	int prefix;
2502 
2503 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2504 		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2505 		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2506 	} else
2507 		prefix = 0;
2508 
2509 	return rt6_fill_node(arg->net,
2510 		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2511 		     NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2512 		     prefix, 0, NLM_F_MULTI);
2513 }
2514 
2515 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2516 {
2517 	struct net *net = sock_net(in_skb->sk);
2518 	struct nlattr *tb[RTA_MAX+1];
2519 	struct rt6_info *rt;
2520 	struct sk_buff *skb;
2521 	struct rtmsg *rtm;
2522 	struct flowi6 fl6;
2523 	int err, iif = 0;
2524 
2525 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2526 	if (err < 0)
2527 		goto errout;
2528 
2529 	err = -EINVAL;
2530 	memset(&fl6, 0, sizeof(fl6));
2531 
2532 	if (tb[RTA_SRC]) {
2533 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2534 			goto errout;
2535 
2536 		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2537 	}
2538 
2539 	if (tb[RTA_DST]) {
2540 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2541 			goto errout;
2542 
2543 		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2544 	}
2545 
2546 	if (tb[RTA_IIF])
2547 		iif = nla_get_u32(tb[RTA_IIF]);
2548 
2549 	if (tb[RTA_OIF])
2550 		fl6.flowi6_oif = nla_get_u32(tb[RTA_OIF]);
2551 
2552 	if (iif) {
2553 		struct net_device *dev;
2554 		dev = __dev_get_by_index(net, iif);
2555 		if (!dev) {
2556 			err = -ENODEV;
2557 			goto errout;
2558 		}
2559 	}
2560 
2561 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2562 	if (!skb) {
2563 		err = -ENOBUFS;
2564 		goto errout;
2565 	}
2566 
2567 	/* Reserve room for dummy headers, this skb can pass
2568 	   through good chunk of routing engine.
2569 	 */
2570 	skb_reset_mac_header(skb);
2571 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2572 
2573 	rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl6);
2574 	skb_dst_set(skb, &rt->dst);
2575 
2576 	err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2577 			    RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2578 			    nlh->nlmsg_seq, 0, 0, 0);
2579 	if (err < 0) {
2580 		kfree_skb(skb);
2581 		goto errout;
2582 	}
2583 
2584 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2585 errout:
2586 	return err;
2587 }
2588 
2589 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2590 {
2591 	struct sk_buff *skb;
2592 	struct net *net = info->nl_net;
2593 	u32 seq;
2594 	int err;
2595 
2596 	err = -ENOBUFS;
2597 	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2598 
2599 	skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2600 	if (!skb)
2601 		goto errout;
2602 
2603 	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2604 				event, info->pid, seq, 0, 0, 0);
2605 	if (err < 0) {
2606 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2607 		WARN_ON(err == -EMSGSIZE);
2608 		kfree_skb(skb);
2609 		goto errout;
2610 	}
2611 	rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2612 		    info->nlh, gfp_any());
2613 	return;
2614 errout:
2615 	if (err < 0)
2616 		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2617 }
2618 
2619 static int ip6_route_dev_notify(struct notifier_block *this,
2620 				unsigned long event, void *data)
2621 {
2622 	struct net_device *dev = (struct net_device *)data;
2623 	struct net *net = dev_net(dev);
2624 
2625 	if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2626 		net->ipv6.ip6_null_entry->dst.dev = dev;
2627 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2628 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2629 		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2630 		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2631 		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2632 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2633 #endif
2634 	}
2635 
2636 	return NOTIFY_OK;
2637 }
2638 
2639 /*
2640  *	/proc
2641  */
2642 
2643 #ifdef CONFIG_PROC_FS
2644 
2645 struct rt6_proc_arg
2646 {
2647 	char *buffer;
2648 	int offset;
2649 	int length;
2650 	int skip;
2651 	int len;
2652 };
2653 
2654 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2655 {
2656 	struct seq_file *m = p_arg;
2657 	struct neighbour *n;
2658 
2659 	seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2660 
2661 #ifdef CONFIG_IPV6_SUBTREES
2662 	seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2663 #else
2664 	seq_puts(m, "00000000000000000000000000000000 00 ");
2665 #endif
2666 	rcu_read_lock();
2667 	n = dst_get_neighbour_noref(&rt->dst);
2668 	if (n) {
2669 		seq_printf(m, "%pi6", n->primary_key);
2670 	} else {
2671 		seq_puts(m, "00000000000000000000000000000000");
2672 	}
2673 	rcu_read_unlock();
2674 	seq_printf(m, " %08x %08x %08x %08x %8s\n",
2675 		   rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2676 		   rt->dst.__use, rt->rt6i_flags,
2677 		   rt->dst.dev ? rt->dst.dev->name : "");
2678 	return 0;
2679 }
2680 
2681 static int ipv6_route_show(struct seq_file *m, void *v)
2682 {
2683 	struct net *net = (struct net *)m->private;
2684 	fib6_clean_all_ro(net, rt6_info_route, 0, m);
2685 	return 0;
2686 }
2687 
2688 static int ipv6_route_open(struct inode *inode, struct file *file)
2689 {
2690 	return single_open_net(inode, file, ipv6_route_show);
2691 }
2692 
2693 static const struct file_operations ipv6_route_proc_fops = {
2694 	.owner		= THIS_MODULE,
2695 	.open		= ipv6_route_open,
2696 	.read		= seq_read,
2697 	.llseek		= seq_lseek,
2698 	.release	= single_release_net,
2699 };
2700 
2701 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2702 {
2703 	struct net *net = (struct net *)seq->private;
2704 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2705 		   net->ipv6.rt6_stats->fib_nodes,
2706 		   net->ipv6.rt6_stats->fib_route_nodes,
2707 		   net->ipv6.rt6_stats->fib_rt_alloc,
2708 		   net->ipv6.rt6_stats->fib_rt_entries,
2709 		   net->ipv6.rt6_stats->fib_rt_cache,
2710 		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2711 		   net->ipv6.rt6_stats->fib_discarded_routes);
2712 
2713 	return 0;
2714 }
2715 
2716 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2717 {
2718 	return single_open_net(inode, file, rt6_stats_seq_show);
2719 }
2720 
2721 static const struct file_operations rt6_stats_seq_fops = {
2722 	.owner	 = THIS_MODULE,
2723 	.open	 = rt6_stats_seq_open,
2724 	.read	 = seq_read,
2725 	.llseek	 = seq_lseek,
2726 	.release = single_release_net,
2727 };
2728 #endif	/* CONFIG_PROC_FS */
2729 
2730 #ifdef CONFIG_SYSCTL
2731 
2732 static
2733 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2734 			      void __user *buffer, size_t *lenp, loff_t *ppos)
2735 {
2736 	struct net *net;
2737 	int delay;
2738 	if (!write)
2739 		return -EINVAL;
2740 
2741 	net = (struct net *)ctl->extra1;
2742 	delay = net->ipv6.sysctl.flush_delay;
2743 	proc_dointvec(ctl, write, buffer, lenp, ppos);
2744 	fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2745 	return 0;
2746 }
2747 
2748 ctl_table ipv6_route_table_template[] = {
2749 	{
2750 		.procname	=	"flush",
2751 		.data		=	&init_net.ipv6.sysctl.flush_delay,
2752 		.maxlen		=	sizeof(int),
2753 		.mode		=	0200,
2754 		.proc_handler	=	ipv6_sysctl_rtcache_flush
2755 	},
2756 	{
2757 		.procname	=	"gc_thresh",
2758 		.data		=	&ip6_dst_ops_template.gc_thresh,
2759 		.maxlen		=	sizeof(int),
2760 		.mode		=	0644,
2761 		.proc_handler	=	proc_dointvec,
2762 	},
2763 	{
2764 		.procname	=	"max_size",
2765 		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
2766 		.maxlen		=	sizeof(int),
2767 		.mode		=	0644,
2768 		.proc_handler	=	proc_dointvec,
2769 	},
2770 	{
2771 		.procname	=	"gc_min_interval",
2772 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2773 		.maxlen		=	sizeof(int),
2774 		.mode		=	0644,
2775 		.proc_handler	=	proc_dointvec_jiffies,
2776 	},
2777 	{
2778 		.procname	=	"gc_timeout",
2779 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2780 		.maxlen		=	sizeof(int),
2781 		.mode		=	0644,
2782 		.proc_handler	=	proc_dointvec_jiffies,
2783 	},
2784 	{
2785 		.procname	=	"gc_interval",
2786 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
2787 		.maxlen		=	sizeof(int),
2788 		.mode		=	0644,
2789 		.proc_handler	=	proc_dointvec_jiffies,
2790 	},
2791 	{
2792 		.procname	=	"gc_elasticity",
2793 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2794 		.maxlen		=	sizeof(int),
2795 		.mode		=	0644,
2796 		.proc_handler	=	proc_dointvec,
2797 	},
2798 	{
2799 		.procname	=	"mtu_expires",
2800 		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2801 		.maxlen		=	sizeof(int),
2802 		.mode		=	0644,
2803 		.proc_handler	=	proc_dointvec_jiffies,
2804 	},
2805 	{
2806 		.procname	=	"min_adv_mss",
2807 		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
2808 		.maxlen		=	sizeof(int),
2809 		.mode		=	0644,
2810 		.proc_handler	=	proc_dointvec,
2811 	},
2812 	{
2813 		.procname	=	"gc_min_interval_ms",
2814 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2815 		.maxlen		=	sizeof(int),
2816 		.mode		=	0644,
2817 		.proc_handler	=	proc_dointvec_ms_jiffies,
2818 	},
2819 	{ }
2820 };
2821 
2822 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2823 {
2824 	struct ctl_table *table;
2825 
2826 	table = kmemdup(ipv6_route_table_template,
2827 			sizeof(ipv6_route_table_template),
2828 			GFP_KERNEL);
2829 
2830 	if (table) {
2831 		table[0].data = &net->ipv6.sysctl.flush_delay;
2832 		table[0].extra1 = net;
2833 		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2834 		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2835 		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2836 		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2837 		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2838 		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2839 		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2840 		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2841 		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2842 	}
2843 
2844 	return table;
2845 }
2846 #endif
2847 
2848 static int __net_init ip6_route_net_init(struct net *net)
2849 {
2850 	int ret = -ENOMEM;
2851 
2852 	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2853 	       sizeof(net->ipv6.ip6_dst_ops));
2854 
2855 	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2856 		goto out_ip6_dst_ops;
2857 
2858 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2859 					   sizeof(*net->ipv6.ip6_null_entry),
2860 					   GFP_KERNEL);
2861 	if (!net->ipv6.ip6_null_entry)
2862 		goto out_ip6_dst_entries;
2863 	net->ipv6.ip6_null_entry->dst.path =
2864 		(struct dst_entry *)net->ipv6.ip6_null_entry;
2865 	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2866 	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2867 			 ip6_template_metrics, true);
2868 
2869 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2870 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2871 					       sizeof(*net->ipv6.ip6_prohibit_entry),
2872 					       GFP_KERNEL);
2873 	if (!net->ipv6.ip6_prohibit_entry)
2874 		goto out_ip6_null_entry;
2875 	net->ipv6.ip6_prohibit_entry->dst.path =
2876 		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2877 	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2878 	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2879 			 ip6_template_metrics, true);
2880 
2881 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2882 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
2883 					       GFP_KERNEL);
2884 	if (!net->ipv6.ip6_blk_hole_entry)
2885 		goto out_ip6_prohibit_entry;
2886 	net->ipv6.ip6_blk_hole_entry->dst.path =
2887 		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2888 	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2889 	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2890 			 ip6_template_metrics, true);
2891 #endif
2892 
2893 	net->ipv6.sysctl.flush_delay = 0;
2894 	net->ipv6.sysctl.ip6_rt_max_size = 4096;
2895 	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2896 	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2897 	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2898 	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2899 	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2900 	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2901 
2902 #ifdef CONFIG_PROC_FS
2903 	proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2904 	proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2905 #endif
2906 	net->ipv6.ip6_rt_gc_expire = 30*HZ;
2907 
2908 	ret = 0;
2909 out:
2910 	return ret;
2911 
2912 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2913 out_ip6_prohibit_entry:
2914 	kfree(net->ipv6.ip6_prohibit_entry);
2915 out_ip6_null_entry:
2916 	kfree(net->ipv6.ip6_null_entry);
2917 #endif
2918 out_ip6_dst_entries:
2919 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2920 out_ip6_dst_ops:
2921 	goto out;
2922 }
2923 
2924 static void __net_exit ip6_route_net_exit(struct net *net)
2925 {
2926 #ifdef CONFIG_PROC_FS
2927 	proc_net_remove(net, "ipv6_route");
2928 	proc_net_remove(net, "rt6_stats");
2929 #endif
2930 	kfree(net->ipv6.ip6_null_entry);
2931 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2932 	kfree(net->ipv6.ip6_prohibit_entry);
2933 	kfree(net->ipv6.ip6_blk_hole_entry);
2934 #endif
2935 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2936 }
2937 
2938 static struct pernet_operations ip6_route_net_ops = {
2939 	.init = ip6_route_net_init,
2940 	.exit = ip6_route_net_exit,
2941 };
2942 
2943 static struct notifier_block ip6_route_dev_notifier = {
2944 	.notifier_call = ip6_route_dev_notify,
2945 	.priority = 0,
2946 };
2947 
2948 int __init ip6_route_init(void)
2949 {
2950 	int ret;
2951 
2952 	ret = -ENOMEM;
2953 	ip6_dst_ops_template.kmem_cachep =
2954 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2955 				  SLAB_HWCACHE_ALIGN, NULL);
2956 	if (!ip6_dst_ops_template.kmem_cachep)
2957 		goto out;
2958 
2959 	ret = dst_entries_init(&ip6_dst_blackhole_ops);
2960 	if (ret)
2961 		goto out_kmem_cache;
2962 
2963 	ret = register_pernet_subsys(&ip6_route_net_ops);
2964 	if (ret)
2965 		goto out_dst_entries;
2966 
2967 	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2968 
2969 	/* Registering of the loopback is done before this portion of code,
2970 	 * the loopback reference in rt6_info will not be taken, do it
2971 	 * manually for init_net */
2972 	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
2973 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2974   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2975 	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
2976 	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2977 	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
2978 	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2979   #endif
2980 	ret = fib6_init();
2981 	if (ret)
2982 		goto out_register_subsys;
2983 
2984 	ret = xfrm6_init();
2985 	if (ret)
2986 		goto out_fib6_init;
2987 
2988 	ret = fib6_rules_init();
2989 	if (ret)
2990 		goto xfrm6_init;
2991 
2992 	ret = -ENOBUFS;
2993 	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
2994 	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
2995 	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
2996 		goto fib6_rules_init;
2997 
2998 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
2999 	if (ret)
3000 		goto fib6_rules_init;
3001 
3002 out:
3003 	return ret;
3004 
3005 fib6_rules_init:
3006 	fib6_rules_cleanup();
3007 xfrm6_init:
3008 	xfrm6_fini();
3009 out_fib6_init:
3010 	fib6_gc_cleanup();
3011 out_register_subsys:
3012 	unregister_pernet_subsys(&ip6_route_net_ops);
3013 out_dst_entries:
3014 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3015 out_kmem_cache:
3016 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3017 	goto out;
3018 }
3019 
3020 void ip6_route_cleanup(void)
3021 {
3022 	unregister_netdevice_notifier(&ip6_route_dev_notifier);
3023 	fib6_rules_cleanup();
3024 	xfrm6_fini();
3025 	fib6_gc_cleanup();
3026 	unregister_pernet_subsys(&ip6_route_net_ops);
3027 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3028 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3029 }
3030