xref: /openbmc/linux/net/ipv6/route.c (revision 1ab142d4)
1 /*
2  *	Linux INET6 implementation
3  *	FIB front-end.
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13 
14 /*	Changes:
15  *
16  *	YOSHIFUJI Hideaki @USAGI
17  *		reworked default router selection.
18  *		- respect outgoing interface
19  *		- select from (probably) reachable routers (i.e.
20  *		routers in REACHABLE, STALE, DELAY or PROBE states).
21  *		- always select the same router if it is (probably)
22  *		reachable.  otherwise, round-robin the list.
23  *	Ville Nuorvala
24  *		Fixed routing subtrees.
25  */
26 
27 #include <linux/capability.h>
28 #include <linux/errno.h>
29 #include <linux/export.h>
30 #include <linux/types.h>
31 #include <linux/times.h>
32 #include <linux/socket.h>
33 #include <linux/sockios.h>
34 #include <linux/net.h>
35 #include <linux/route.h>
36 #include <linux/netdevice.h>
37 #include <linux/in6.h>
38 #include <linux/mroute6.h>
39 #include <linux/init.h>
40 #include <linux/if_arp.h>
41 #include <linux/proc_fs.h>
42 #include <linux/seq_file.h>
43 #include <linux/nsproxy.h>
44 #include <linux/slab.h>
45 #include <net/net_namespace.h>
46 #include <net/snmp.h>
47 #include <net/ipv6.h>
48 #include <net/ip6_fib.h>
49 #include <net/ip6_route.h>
50 #include <net/ndisc.h>
51 #include <net/addrconf.h>
52 #include <net/tcp.h>
53 #include <linux/rtnetlink.h>
54 #include <net/dst.h>
55 #include <net/xfrm.h>
56 #include <net/netevent.h>
57 #include <net/netlink.h>
58 
59 #include <asm/uaccess.h>
60 
61 #ifdef CONFIG_SYSCTL
62 #include <linux/sysctl.h>
63 #endif
64 
65 static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
66 				    const struct in6_addr *dest);
67 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
68 static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
69 static unsigned int	 ip6_mtu(const struct dst_entry *dst);
70 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
71 static void		ip6_dst_destroy(struct dst_entry *);
72 static void		ip6_dst_ifdown(struct dst_entry *,
73 				       struct net_device *dev, int how);
74 static int		 ip6_dst_gc(struct dst_ops *ops);
75 
76 static int		ip6_pkt_discard(struct sk_buff *skb);
77 static int		ip6_pkt_discard_out(struct sk_buff *skb);
78 static void		ip6_link_failure(struct sk_buff *skb);
79 static void		ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
80 
81 #ifdef CONFIG_IPV6_ROUTE_INFO
82 static struct rt6_info *rt6_add_route_info(struct net *net,
83 					   const struct in6_addr *prefix, int prefixlen,
84 					   const struct in6_addr *gwaddr, int ifindex,
85 					   unsigned pref);
86 static struct rt6_info *rt6_get_route_info(struct net *net,
87 					   const struct in6_addr *prefix, int prefixlen,
88 					   const struct in6_addr *gwaddr, int ifindex);
89 #endif
90 
91 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
92 {
93 	struct rt6_info *rt = (struct rt6_info *) dst;
94 	struct inet_peer *peer;
95 	u32 *p = NULL;
96 
97 	if (!(rt->dst.flags & DST_HOST))
98 		return NULL;
99 
100 	if (!rt->rt6i_peer)
101 		rt6_bind_peer(rt, 1);
102 
103 	peer = rt->rt6i_peer;
104 	if (peer) {
105 		u32 *old_p = __DST_METRICS_PTR(old);
106 		unsigned long prev, new;
107 
108 		p = peer->metrics;
109 		if (inet_metrics_new(peer))
110 			memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
111 
112 		new = (unsigned long) p;
113 		prev = cmpxchg(&dst->_metrics, old, new);
114 
115 		if (prev != old) {
116 			p = __DST_METRICS_PTR(prev);
117 			if (prev & DST_METRICS_READ_ONLY)
118 				p = NULL;
119 		}
120 	}
121 	return p;
122 }
123 
124 static inline const void *choose_neigh_daddr(struct rt6_info *rt, const void *daddr)
125 {
126 	struct in6_addr *p = &rt->rt6i_gateway;
127 
128 	if (!ipv6_addr_any(p))
129 		return (const void *) p;
130 	return daddr;
131 }
132 
133 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
134 {
135 	struct rt6_info *rt = (struct rt6_info *) dst;
136 	struct neighbour *n;
137 
138 	daddr = choose_neigh_daddr(rt, daddr);
139 	n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
140 	if (n)
141 		return n;
142 	return neigh_create(&nd_tbl, daddr, dst->dev);
143 }
144 
145 static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
146 {
147 	struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
148 	if (!n) {
149 		n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
150 		if (IS_ERR(n))
151 			return PTR_ERR(n);
152 	}
153 	dst_set_neighbour(&rt->dst, n);
154 
155 	return 0;
156 }
157 
158 static struct dst_ops ip6_dst_ops_template = {
159 	.family			=	AF_INET6,
160 	.protocol		=	cpu_to_be16(ETH_P_IPV6),
161 	.gc			=	ip6_dst_gc,
162 	.gc_thresh		=	1024,
163 	.check			=	ip6_dst_check,
164 	.default_advmss		=	ip6_default_advmss,
165 	.mtu			=	ip6_mtu,
166 	.cow_metrics		=	ipv6_cow_metrics,
167 	.destroy		=	ip6_dst_destroy,
168 	.ifdown			=	ip6_dst_ifdown,
169 	.negative_advice	=	ip6_negative_advice,
170 	.link_failure		=	ip6_link_failure,
171 	.update_pmtu		=	ip6_rt_update_pmtu,
172 	.local_out		=	__ip6_local_out,
173 	.neigh_lookup		=	ip6_neigh_lookup,
174 };
175 
176 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
177 {
178 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
179 
180 	return mtu ? : dst->dev->mtu;
181 }
182 
183 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
184 {
185 }
186 
187 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
188 					 unsigned long old)
189 {
190 	return NULL;
191 }
192 
193 static struct dst_ops ip6_dst_blackhole_ops = {
194 	.family			=	AF_INET6,
195 	.protocol		=	cpu_to_be16(ETH_P_IPV6),
196 	.destroy		=	ip6_dst_destroy,
197 	.check			=	ip6_dst_check,
198 	.mtu			=	ip6_blackhole_mtu,
199 	.default_advmss		=	ip6_default_advmss,
200 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
201 	.cow_metrics		=	ip6_rt_blackhole_cow_metrics,
202 	.neigh_lookup		=	ip6_neigh_lookup,
203 };
204 
205 static const u32 ip6_template_metrics[RTAX_MAX] = {
206 	[RTAX_HOPLIMIT - 1] = 255,
207 };
208 
209 static struct rt6_info ip6_null_entry_template = {
210 	.dst = {
211 		.__refcnt	= ATOMIC_INIT(1),
212 		.__use		= 1,
213 		.obsolete	= -1,
214 		.error		= -ENETUNREACH,
215 		.input		= ip6_pkt_discard,
216 		.output		= ip6_pkt_discard_out,
217 	},
218 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
219 	.rt6i_protocol  = RTPROT_KERNEL,
220 	.rt6i_metric	= ~(u32) 0,
221 	.rt6i_ref	= ATOMIC_INIT(1),
222 };
223 
224 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
225 
226 static int ip6_pkt_prohibit(struct sk_buff *skb);
227 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
228 
229 static struct rt6_info ip6_prohibit_entry_template = {
230 	.dst = {
231 		.__refcnt	= ATOMIC_INIT(1),
232 		.__use		= 1,
233 		.obsolete	= -1,
234 		.error		= -EACCES,
235 		.input		= ip6_pkt_prohibit,
236 		.output		= ip6_pkt_prohibit_out,
237 	},
238 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
239 	.rt6i_protocol  = RTPROT_KERNEL,
240 	.rt6i_metric	= ~(u32) 0,
241 	.rt6i_ref	= ATOMIC_INIT(1),
242 };
243 
244 static struct rt6_info ip6_blk_hole_entry_template = {
245 	.dst = {
246 		.__refcnt	= ATOMIC_INIT(1),
247 		.__use		= 1,
248 		.obsolete	= -1,
249 		.error		= -EINVAL,
250 		.input		= dst_discard,
251 		.output		= dst_discard,
252 	},
253 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
254 	.rt6i_protocol  = RTPROT_KERNEL,
255 	.rt6i_metric	= ~(u32) 0,
256 	.rt6i_ref	= ATOMIC_INIT(1),
257 };
258 
259 #endif
260 
261 /* allocate dst with ip6_dst_ops */
262 static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
263 					     struct net_device *dev,
264 					     int flags)
265 {
266 	struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
267 
268 	if (rt)
269 		memset(&rt->rt6i_table, 0,
270 		       sizeof(*rt) - sizeof(struct dst_entry));
271 
272 	return rt;
273 }
274 
275 static void ip6_dst_destroy(struct dst_entry *dst)
276 {
277 	struct rt6_info *rt = (struct rt6_info *)dst;
278 	struct inet6_dev *idev = rt->rt6i_idev;
279 	struct inet_peer *peer = rt->rt6i_peer;
280 
281 	if (!(rt->dst.flags & DST_HOST))
282 		dst_destroy_metrics_generic(dst);
283 
284 	if (idev) {
285 		rt->rt6i_idev = NULL;
286 		in6_dev_put(idev);
287 	}
288 	if (peer) {
289 		rt->rt6i_peer = NULL;
290 		inet_putpeer(peer);
291 	}
292 }
293 
294 static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
295 
296 static u32 rt6_peer_genid(void)
297 {
298 	return atomic_read(&__rt6_peer_genid);
299 }
300 
301 void rt6_bind_peer(struct rt6_info *rt, int create)
302 {
303 	struct inet_peer *peer;
304 
305 	peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
306 	if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
307 		inet_putpeer(peer);
308 	else
309 		rt->rt6i_peer_genid = rt6_peer_genid();
310 }
311 
312 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
313 			   int how)
314 {
315 	struct rt6_info *rt = (struct rt6_info *)dst;
316 	struct inet6_dev *idev = rt->rt6i_idev;
317 	struct net_device *loopback_dev =
318 		dev_net(dev)->loopback_dev;
319 
320 	if (dev != loopback_dev && idev && idev->dev == dev) {
321 		struct inet6_dev *loopback_idev =
322 			in6_dev_get(loopback_dev);
323 		if (loopback_idev) {
324 			rt->rt6i_idev = loopback_idev;
325 			in6_dev_put(idev);
326 		}
327 	}
328 }
329 
330 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
331 {
332 	return (rt->rt6i_flags & RTF_EXPIRES) &&
333 		time_after(jiffies, rt->dst.expires);
334 }
335 
336 static inline int rt6_need_strict(const struct in6_addr *daddr)
337 {
338 	return ipv6_addr_type(daddr) &
339 		(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
340 }
341 
342 /*
343  *	Route lookup. Any table->tb6_lock is implied.
344  */
345 
346 static inline struct rt6_info *rt6_device_match(struct net *net,
347 						    struct rt6_info *rt,
348 						    const struct in6_addr *saddr,
349 						    int oif,
350 						    int flags)
351 {
352 	struct rt6_info *local = NULL;
353 	struct rt6_info *sprt;
354 
355 	if (!oif && ipv6_addr_any(saddr))
356 		goto out;
357 
358 	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
359 		struct net_device *dev = sprt->dst.dev;
360 
361 		if (oif) {
362 			if (dev->ifindex == oif)
363 				return sprt;
364 			if (dev->flags & IFF_LOOPBACK) {
365 				if (!sprt->rt6i_idev ||
366 				    sprt->rt6i_idev->dev->ifindex != oif) {
367 					if (flags & RT6_LOOKUP_F_IFACE && oif)
368 						continue;
369 					if (local && (!oif ||
370 						      local->rt6i_idev->dev->ifindex == oif))
371 						continue;
372 				}
373 				local = sprt;
374 			}
375 		} else {
376 			if (ipv6_chk_addr(net, saddr, dev,
377 					  flags & RT6_LOOKUP_F_IFACE))
378 				return sprt;
379 		}
380 	}
381 
382 	if (oif) {
383 		if (local)
384 			return local;
385 
386 		if (flags & RT6_LOOKUP_F_IFACE)
387 			return net->ipv6.ip6_null_entry;
388 	}
389 out:
390 	return rt;
391 }
392 
393 #ifdef CONFIG_IPV6_ROUTER_PREF
394 static void rt6_probe(struct rt6_info *rt)
395 {
396 	struct neighbour *neigh;
397 	/*
398 	 * Okay, this does not seem to be appropriate
399 	 * for now, however, we need to check if it
400 	 * is really so; aka Router Reachability Probing.
401 	 *
402 	 * Router Reachability Probe MUST be rate-limited
403 	 * to no more than one per minute.
404 	 */
405 	rcu_read_lock();
406 	neigh = rt ? dst_get_neighbour_noref(&rt->dst) : NULL;
407 	if (!neigh || (neigh->nud_state & NUD_VALID))
408 		goto out;
409 	read_lock_bh(&neigh->lock);
410 	if (!(neigh->nud_state & NUD_VALID) &&
411 	    time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
412 		struct in6_addr mcaddr;
413 		struct in6_addr *target;
414 
415 		neigh->updated = jiffies;
416 		read_unlock_bh(&neigh->lock);
417 
418 		target = (struct in6_addr *)&neigh->primary_key;
419 		addrconf_addr_solict_mult(target, &mcaddr);
420 		ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
421 	} else {
422 		read_unlock_bh(&neigh->lock);
423 	}
424 out:
425 	rcu_read_unlock();
426 }
427 #else
428 static inline void rt6_probe(struct rt6_info *rt)
429 {
430 }
431 #endif
432 
433 /*
434  * Default Router Selection (RFC 2461 6.3.6)
435  */
436 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
437 {
438 	struct net_device *dev = rt->dst.dev;
439 	if (!oif || dev->ifindex == oif)
440 		return 2;
441 	if ((dev->flags & IFF_LOOPBACK) &&
442 	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
443 		return 1;
444 	return 0;
445 }
446 
447 static inline int rt6_check_neigh(struct rt6_info *rt)
448 {
449 	struct neighbour *neigh;
450 	int m;
451 
452 	rcu_read_lock();
453 	neigh = dst_get_neighbour_noref(&rt->dst);
454 	if (rt->rt6i_flags & RTF_NONEXTHOP ||
455 	    !(rt->rt6i_flags & RTF_GATEWAY))
456 		m = 1;
457 	else if (neigh) {
458 		read_lock_bh(&neigh->lock);
459 		if (neigh->nud_state & NUD_VALID)
460 			m = 2;
461 #ifdef CONFIG_IPV6_ROUTER_PREF
462 		else if (neigh->nud_state & NUD_FAILED)
463 			m = 0;
464 #endif
465 		else
466 			m = 1;
467 		read_unlock_bh(&neigh->lock);
468 	} else
469 		m = 0;
470 	rcu_read_unlock();
471 	return m;
472 }
473 
474 static int rt6_score_route(struct rt6_info *rt, int oif,
475 			   int strict)
476 {
477 	int m, n;
478 
479 	m = rt6_check_dev(rt, oif);
480 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
481 		return -1;
482 #ifdef CONFIG_IPV6_ROUTER_PREF
483 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
484 #endif
485 	n = rt6_check_neigh(rt);
486 	if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
487 		return -1;
488 	return m;
489 }
490 
491 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
492 				   int *mpri, struct rt6_info *match)
493 {
494 	int m;
495 
496 	if (rt6_check_expired(rt))
497 		goto out;
498 
499 	m = rt6_score_route(rt, oif, strict);
500 	if (m < 0)
501 		goto out;
502 
503 	if (m > *mpri) {
504 		if (strict & RT6_LOOKUP_F_REACHABLE)
505 			rt6_probe(match);
506 		*mpri = m;
507 		match = rt;
508 	} else if (strict & RT6_LOOKUP_F_REACHABLE) {
509 		rt6_probe(rt);
510 	}
511 
512 out:
513 	return match;
514 }
515 
516 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
517 				     struct rt6_info *rr_head,
518 				     u32 metric, int oif, int strict)
519 {
520 	struct rt6_info *rt, *match;
521 	int mpri = -1;
522 
523 	match = NULL;
524 	for (rt = rr_head; rt && rt->rt6i_metric == metric;
525 	     rt = rt->dst.rt6_next)
526 		match = find_match(rt, oif, strict, &mpri, match);
527 	for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
528 	     rt = rt->dst.rt6_next)
529 		match = find_match(rt, oif, strict, &mpri, match);
530 
531 	return match;
532 }
533 
534 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
535 {
536 	struct rt6_info *match, *rt0;
537 	struct net *net;
538 
539 	rt0 = fn->rr_ptr;
540 	if (!rt0)
541 		fn->rr_ptr = rt0 = fn->leaf;
542 
543 	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
544 
545 	if (!match &&
546 	    (strict & RT6_LOOKUP_F_REACHABLE)) {
547 		struct rt6_info *next = rt0->dst.rt6_next;
548 
549 		/* no entries matched; do round-robin */
550 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
551 			next = fn->leaf;
552 
553 		if (next != rt0)
554 			fn->rr_ptr = next;
555 	}
556 
557 	net = dev_net(rt0->dst.dev);
558 	return match ? match : net->ipv6.ip6_null_entry;
559 }
560 
561 #ifdef CONFIG_IPV6_ROUTE_INFO
562 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
563 		  const struct in6_addr *gwaddr)
564 {
565 	struct net *net = dev_net(dev);
566 	struct route_info *rinfo = (struct route_info *) opt;
567 	struct in6_addr prefix_buf, *prefix;
568 	unsigned int pref;
569 	unsigned long lifetime;
570 	struct rt6_info *rt;
571 
572 	if (len < sizeof(struct route_info)) {
573 		return -EINVAL;
574 	}
575 
576 	/* Sanity check for prefix_len and length */
577 	if (rinfo->length > 3) {
578 		return -EINVAL;
579 	} else if (rinfo->prefix_len > 128) {
580 		return -EINVAL;
581 	} else if (rinfo->prefix_len > 64) {
582 		if (rinfo->length < 2) {
583 			return -EINVAL;
584 		}
585 	} else if (rinfo->prefix_len > 0) {
586 		if (rinfo->length < 1) {
587 			return -EINVAL;
588 		}
589 	}
590 
591 	pref = rinfo->route_pref;
592 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
593 		return -EINVAL;
594 
595 	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
596 
597 	if (rinfo->length == 3)
598 		prefix = (struct in6_addr *)rinfo->prefix;
599 	else {
600 		/* this function is safe */
601 		ipv6_addr_prefix(&prefix_buf,
602 				 (struct in6_addr *)rinfo->prefix,
603 				 rinfo->prefix_len);
604 		prefix = &prefix_buf;
605 	}
606 
607 	rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
608 				dev->ifindex);
609 
610 	if (rt && !lifetime) {
611 		ip6_del_rt(rt);
612 		rt = NULL;
613 	}
614 
615 	if (!rt && lifetime)
616 		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
617 					pref);
618 	else if (rt)
619 		rt->rt6i_flags = RTF_ROUTEINFO |
620 				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
621 
622 	if (rt) {
623 		if (!addrconf_finite_timeout(lifetime)) {
624 			rt->rt6i_flags &= ~RTF_EXPIRES;
625 		} else {
626 			rt->dst.expires = jiffies + HZ * lifetime;
627 			rt->rt6i_flags |= RTF_EXPIRES;
628 		}
629 		dst_release(&rt->dst);
630 	}
631 	return 0;
632 }
633 #endif
634 
635 #define BACKTRACK(__net, saddr)			\
636 do { \
637 	if (rt == __net->ipv6.ip6_null_entry) {	\
638 		struct fib6_node *pn; \
639 		while (1) { \
640 			if (fn->fn_flags & RTN_TL_ROOT) \
641 				goto out; \
642 			pn = fn->parent; \
643 			if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
644 				fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
645 			else \
646 				fn = pn; \
647 			if (fn->fn_flags & RTN_RTINFO) \
648 				goto restart; \
649 		} \
650 	} \
651 } while (0)
652 
653 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
654 					     struct fib6_table *table,
655 					     struct flowi6 *fl6, int flags)
656 {
657 	struct fib6_node *fn;
658 	struct rt6_info *rt;
659 
660 	read_lock_bh(&table->tb6_lock);
661 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
662 restart:
663 	rt = fn->leaf;
664 	rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
665 	BACKTRACK(net, &fl6->saddr);
666 out:
667 	dst_use(&rt->dst, jiffies);
668 	read_unlock_bh(&table->tb6_lock);
669 	return rt;
670 
671 }
672 
673 struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
674 				    int flags)
675 {
676 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
677 }
678 EXPORT_SYMBOL_GPL(ip6_route_lookup);
679 
680 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
681 			    const struct in6_addr *saddr, int oif, int strict)
682 {
683 	struct flowi6 fl6 = {
684 		.flowi6_oif = oif,
685 		.daddr = *daddr,
686 	};
687 	struct dst_entry *dst;
688 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
689 
690 	if (saddr) {
691 		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
692 		flags |= RT6_LOOKUP_F_HAS_SADDR;
693 	}
694 
695 	dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
696 	if (dst->error == 0)
697 		return (struct rt6_info *) dst;
698 
699 	dst_release(dst);
700 
701 	return NULL;
702 }
703 
704 EXPORT_SYMBOL(rt6_lookup);
705 
706 /* ip6_ins_rt is called with FREE table->tb6_lock.
707    It takes new route entry, the addition fails by any reason the
708    route is freed. In any case, if caller does not hold it, it may
709    be destroyed.
710  */
711 
712 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
713 {
714 	int err;
715 	struct fib6_table *table;
716 
717 	table = rt->rt6i_table;
718 	write_lock_bh(&table->tb6_lock);
719 	err = fib6_add(&table->tb6_root, rt, info);
720 	write_unlock_bh(&table->tb6_lock);
721 
722 	return err;
723 }
724 
725 int ip6_ins_rt(struct rt6_info *rt)
726 {
727 	struct nl_info info = {
728 		.nl_net = dev_net(rt->dst.dev),
729 	};
730 	return __ip6_ins_rt(rt, &info);
731 }
732 
733 static struct rt6_info *rt6_alloc_cow(const struct rt6_info *ort,
734 				      const struct in6_addr *daddr,
735 				      const struct in6_addr *saddr)
736 {
737 	struct rt6_info *rt;
738 
739 	/*
740 	 *	Clone the route.
741 	 */
742 
743 	rt = ip6_rt_copy(ort, daddr);
744 
745 	if (rt) {
746 		int attempts = !in_softirq();
747 
748 		if (!(rt->rt6i_flags & RTF_GATEWAY)) {
749 			if (ort->rt6i_dst.plen != 128 &&
750 			    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
751 				rt->rt6i_flags |= RTF_ANYCAST;
752 			rt->rt6i_gateway = *daddr;
753 		}
754 
755 		rt->rt6i_flags |= RTF_CACHE;
756 
757 #ifdef CONFIG_IPV6_SUBTREES
758 		if (rt->rt6i_src.plen && saddr) {
759 			rt->rt6i_src.addr = *saddr;
760 			rt->rt6i_src.plen = 128;
761 		}
762 #endif
763 
764 	retry:
765 		if (rt6_bind_neighbour(rt, rt->dst.dev)) {
766 			struct net *net = dev_net(rt->dst.dev);
767 			int saved_rt_min_interval =
768 				net->ipv6.sysctl.ip6_rt_gc_min_interval;
769 			int saved_rt_elasticity =
770 				net->ipv6.sysctl.ip6_rt_gc_elasticity;
771 
772 			if (attempts-- > 0) {
773 				net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
774 				net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
775 
776 				ip6_dst_gc(&net->ipv6.ip6_dst_ops);
777 
778 				net->ipv6.sysctl.ip6_rt_gc_elasticity =
779 					saved_rt_elasticity;
780 				net->ipv6.sysctl.ip6_rt_gc_min_interval =
781 					saved_rt_min_interval;
782 				goto retry;
783 			}
784 
785 			if (net_ratelimit())
786 				printk(KERN_WARNING
787 				       "ipv6: Neighbour table overflow.\n");
788 			dst_free(&rt->dst);
789 			return NULL;
790 		}
791 	}
792 
793 	return rt;
794 }
795 
796 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
797 					const struct in6_addr *daddr)
798 {
799 	struct rt6_info *rt = ip6_rt_copy(ort, daddr);
800 
801 	if (rt) {
802 		rt->rt6i_flags |= RTF_CACHE;
803 		dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_noref_raw(&ort->dst)));
804 	}
805 	return rt;
806 }
807 
808 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
809 				      struct flowi6 *fl6, int flags)
810 {
811 	struct fib6_node *fn;
812 	struct rt6_info *rt, *nrt;
813 	int strict = 0;
814 	int attempts = 3;
815 	int err;
816 	int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
817 
818 	strict |= flags & RT6_LOOKUP_F_IFACE;
819 
820 relookup:
821 	read_lock_bh(&table->tb6_lock);
822 
823 restart_2:
824 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
825 
826 restart:
827 	rt = rt6_select(fn, oif, strict | reachable);
828 
829 	BACKTRACK(net, &fl6->saddr);
830 	if (rt == net->ipv6.ip6_null_entry ||
831 	    rt->rt6i_flags & RTF_CACHE)
832 		goto out;
833 
834 	dst_hold(&rt->dst);
835 	read_unlock_bh(&table->tb6_lock);
836 
837 	if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
838 		nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
839 	else if (!(rt->dst.flags & DST_HOST))
840 		nrt = rt6_alloc_clone(rt, &fl6->daddr);
841 	else
842 		goto out2;
843 
844 	dst_release(&rt->dst);
845 	rt = nrt ? : net->ipv6.ip6_null_entry;
846 
847 	dst_hold(&rt->dst);
848 	if (nrt) {
849 		err = ip6_ins_rt(nrt);
850 		if (!err)
851 			goto out2;
852 	}
853 
854 	if (--attempts <= 0)
855 		goto out2;
856 
857 	/*
858 	 * Race condition! In the gap, when table->tb6_lock was
859 	 * released someone could insert this route.  Relookup.
860 	 */
861 	dst_release(&rt->dst);
862 	goto relookup;
863 
864 out:
865 	if (reachable) {
866 		reachable = 0;
867 		goto restart_2;
868 	}
869 	dst_hold(&rt->dst);
870 	read_unlock_bh(&table->tb6_lock);
871 out2:
872 	rt->dst.lastuse = jiffies;
873 	rt->dst.__use++;
874 
875 	return rt;
876 }
877 
878 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
879 					    struct flowi6 *fl6, int flags)
880 {
881 	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
882 }
883 
884 void ip6_route_input(struct sk_buff *skb)
885 {
886 	const struct ipv6hdr *iph = ipv6_hdr(skb);
887 	struct net *net = dev_net(skb->dev);
888 	int flags = RT6_LOOKUP_F_HAS_SADDR;
889 	struct flowi6 fl6 = {
890 		.flowi6_iif = skb->dev->ifindex,
891 		.daddr = iph->daddr,
892 		.saddr = iph->saddr,
893 		.flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
894 		.flowi6_mark = skb->mark,
895 		.flowi6_proto = iph->nexthdr,
896 	};
897 
898 	if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
899 		flags |= RT6_LOOKUP_F_IFACE;
900 
901 	skb_dst_set(skb, fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_input));
902 }
903 
904 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
905 					     struct flowi6 *fl6, int flags)
906 {
907 	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
908 }
909 
910 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
911 				    struct flowi6 *fl6)
912 {
913 	int flags = 0;
914 
915 	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
916 		flags |= RT6_LOOKUP_F_IFACE;
917 
918 	if (!ipv6_addr_any(&fl6->saddr))
919 		flags |= RT6_LOOKUP_F_HAS_SADDR;
920 	else if (sk)
921 		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
922 
923 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
924 }
925 
926 EXPORT_SYMBOL(ip6_route_output);
927 
928 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
929 {
930 	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
931 	struct dst_entry *new = NULL;
932 
933 	rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
934 	if (rt) {
935 		memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
936 
937 		new = &rt->dst;
938 
939 		new->__use = 1;
940 		new->input = dst_discard;
941 		new->output = dst_discard;
942 
943 		if (dst_metrics_read_only(&ort->dst))
944 			new->_metrics = ort->dst._metrics;
945 		else
946 			dst_copy_metrics(new, &ort->dst);
947 		rt->rt6i_idev = ort->rt6i_idev;
948 		if (rt->rt6i_idev)
949 			in6_dev_hold(rt->rt6i_idev);
950 		rt->dst.expires = 0;
951 
952 		rt->rt6i_gateway = ort->rt6i_gateway;
953 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
954 		rt->rt6i_metric = 0;
955 
956 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
957 #ifdef CONFIG_IPV6_SUBTREES
958 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
959 #endif
960 
961 		dst_free(new);
962 	}
963 
964 	dst_release(dst_orig);
965 	return new ? new : ERR_PTR(-ENOMEM);
966 }
967 
968 /*
969  *	Destination cache support functions
970  */
971 
972 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
973 {
974 	struct rt6_info *rt;
975 
976 	rt = (struct rt6_info *) dst;
977 
978 	if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
979 		if (rt->rt6i_peer_genid != rt6_peer_genid()) {
980 			if (!rt->rt6i_peer)
981 				rt6_bind_peer(rt, 0);
982 			rt->rt6i_peer_genid = rt6_peer_genid();
983 		}
984 		return dst;
985 	}
986 	return NULL;
987 }
988 
989 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
990 {
991 	struct rt6_info *rt = (struct rt6_info *) dst;
992 
993 	if (rt) {
994 		if (rt->rt6i_flags & RTF_CACHE) {
995 			if (rt6_check_expired(rt)) {
996 				ip6_del_rt(rt);
997 				dst = NULL;
998 			}
999 		} else {
1000 			dst_release(dst);
1001 			dst = NULL;
1002 		}
1003 	}
1004 	return dst;
1005 }
1006 
1007 static void ip6_link_failure(struct sk_buff *skb)
1008 {
1009 	struct rt6_info *rt;
1010 
1011 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1012 
1013 	rt = (struct rt6_info *) skb_dst(skb);
1014 	if (rt) {
1015 		if (rt->rt6i_flags & RTF_CACHE) {
1016 			dst_set_expires(&rt->dst, 0);
1017 			rt->rt6i_flags |= RTF_EXPIRES;
1018 		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1019 			rt->rt6i_node->fn_sernum = -1;
1020 	}
1021 }
1022 
1023 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1024 {
1025 	struct rt6_info *rt6 = (struct rt6_info*)dst;
1026 
1027 	if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1028 		rt6->rt6i_flags |= RTF_MODIFIED;
1029 		if (mtu < IPV6_MIN_MTU) {
1030 			u32 features = dst_metric(dst, RTAX_FEATURES);
1031 			mtu = IPV6_MIN_MTU;
1032 			features |= RTAX_FEATURE_ALLFRAG;
1033 			dst_metric_set(dst, RTAX_FEATURES, features);
1034 		}
1035 		dst_metric_set(dst, RTAX_MTU, mtu);
1036 	}
1037 }
1038 
1039 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1040 {
1041 	struct net_device *dev = dst->dev;
1042 	unsigned int mtu = dst_mtu(dst);
1043 	struct net *net = dev_net(dev);
1044 
1045 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1046 
1047 	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1048 		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1049 
1050 	/*
1051 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1052 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1053 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
1054 	 * rely only on pmtu discovery"
1055 	 */
1056 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1057 		mtu = IPV6_MAXPLEN;
1058 	return mtu;
1059 }
1060 
1061 static unsigned int ip6_mtu(const struct dst_entry *dst)
1062 {
1063 	struct inet6_dev *idev;
1064 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1065 
1066 	if (mtu)
1067 		return mtu;
1068 
1069 	mtu = IPV6_MIN_MTU;
1070 
1071 	rcu_read_lock();
1072 	idev = __in6_dev_get(dst->dev);
1073 	if (idev)
1074 		mtu = idev->cnf.mtu6;
1075 	rcu_read_unlock();
1076 
1077 	return mtu;
1078 }
1079 
1080 static struct dst_entry *icmp6_dst_gc_list;
1081 static DEFINE_SPINLOCK(icmp6_dst_lock);
1082 
1083 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1084 				  struct neighbour *neigh,
1085 				  struct flowi6 *fl6)
1086 {
1087 	struct dst_entry *dst;
1088 	struct rt6_info *rt;
1089 	struct inet6_dev *idev = in6_dev_get(dev);
1090 	struct net *net = dev_net(dev);
1091 
1092 	if (unlikely(!idev))
1093 		return ERR_PTR(-ENODEV);
1094 
1095 	rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
1096 	if (unlikely(!rt)) {
1097 		in6_dev_put(idev);
1098 		dst = ERR_PTR(-ENOMEM);
1099 		goto out;
1100 	}
1101 
1102 	if (neigh)
1103 		neigh_hold(neigh);
1104 	else {
1105 		neigh = ip6_neigh_lookup(&rt->dst, &fl6->daddr);
1106 		if (IS_ERR(neigh)) {
1107 			in6_dev_put(idev);
1108 			dst_free(&rt->dst);
1109 			return ERR_CAST(neigh);
1110 		}
1111 	}
1112 
1113 	rt->dst.flags |= DST_HOST;
1114 	rt->dst.output  = ip6_output;
1115 	dst_set_neighbour(&rt->dst, neigh);
1116 	atomic_set(&rt->dst.__refcnt, 1);
1117 	rt->rt6i_dst.addr = fl6->daddr;
1118 	rt->rt6i_dst.plen = 128;
1119 	rt->rt6i_idev     = idev;
1120 	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1121 
1122 	spin_lock_bh(&icmp6_dst_lock);
1123 	rt->dst.next = icmp6_dst_gc_list;
1124 	icmp6_dst_gc_list = &rt->dst;
1125 	spin_unlock_bh(&icmp6_dst_lock);
1126 
1127 	fib6_force_start_gc(net);
1128 
1129 	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1130 
1131 out:
1132 	return dst;
1133 }
1134 
1135 int icmp6_dst_gc(void)
1136 {
1137 	struct dst_entry *dst, **pprev;
1138 	int more = 0;
1139 
1140 	spin_lock_bh(&icmp6_dst_lock);
1141 	pprev = &icmp6_dst_gc_list;
1142 
1143 	while ((dst = *pprev) != NULL) {
1144 		if (!atomic_read(&dst->__refcnt)) {
1145 			*pprev = dst->next;
1146 			dst_free(dst);
1147 		} else {
1148 			pprev = &dst->next;
1149 			++more;
1150 		}
1151 	}
1152 
1153 	spin_unlock_bh(&icmp6_dst_lock);
1154 
1155 	return more;
1156 }
1157 
1158 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1159 			    void *arg)
1160 {
1161 	struct dst_entry *dst, **pprev;
1162 
1163 	spin_lock_bh(&icmp6_dst_lock);
1164 	pprev = &icmp6_dst_gc_list;
1165 	while ((dst = *pprev) != NULL) {
1166 		struct rt6_info *rt = (struct rt6_info *) dst;
1167 		if (func(rt, arg)) {
1168 			*pprev = dst->next;
1169 			dst_free(dst);
1170 		} else {
1171 			pprev = &dst->next;
1172 		}
1173 	}
1174 	spin_unlock_bh(&icmp6_dst_lock);
1175 }
1176 
1177 static int ip6_dst_gc(struct dst_ops *ops)
1178 {
1179 	unsigned long now = jiffies;
1180 	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1181 	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1182 	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1183 	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1184 	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1185 	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1186 	int entries;
1187 
1188 	entries = dst_entries_get_fast(ops);
1189 	if (time_after(rt_last_gc + rt_min_interval, now) &&
1190 	    entries <= rt_max_size)
1191 		goto out;
1192 
1193 	net->ipv6.ip6_rt_gc_expire++;
1194 	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1195 	net->ipv6.ip6_rt_last_gc = now;
1196 	entries = dst_entries_get_slow(ops);
1197 	if (entries < ops->gc_thresh)
1198 		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1199 out:
1200 	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1201 	return entries > rt_max_size;
1202 }
1203 
1204 /* Clean host part of a prefix. Not necessary in radix tree,
1205    but results in cleaner routing tables.
1206 
1207    Remove it only when all the things will work!
1208  */
1209 
1210 int ip6_dst_hoplimit(struct dst_entry *dst)
1211 {
1212 	int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1213 	if (hoplimit == 0) {
1214 		struct net_device *dev = dst->dev;
1215 		struct inet6_dev *idev;
1216 
1217 		rcu_read_lock();
1218 		idev = __in6_dev_get(dev);
1219 		if (idev)
1220 			hoplimit = idev->cnf.hop_limit;
1221 		else
1222 			hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1223 		rcu_read_unlock();
1224 	}
1225 	return hoplimit;
1226 }
1227 EXPORT_SYMBOL(ip6_dst_hoplimit);
1228 
1229 /*
1230  *
1231  */
1232 
1233 int ip6_route_add(struct fib6_config *cfg)
1234 {
1235 	int err;
1236 	struct net *net = cfg->fc_nlinfo.nl_net;
1237 	struct rt6_info *rt = NULL;
1238 	struct net_device *dev = NULL;
1239 	struct inet6_dev *idev = NULL;
1240 	struct fib6_table *table;
1241 	int addr_type;
1242 
1243 	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1244 		return -EINVAL;
1245 #ifndef CONFIG_IPV6_SUBTREES
1246 	if (cfg->fc_src_len)
1247 		return -EINVAL;
1248 #endif
1249 	if (cfg->fc_ifindex) {
1250 		err = -ENODEV;
1251 		dev = dev_get_by_index(net, cfg->fc_ifindex);
1252 		if (!dev)
1253 			goto out;
1254 		idev = in6_dev_get(dev);
1255 		if (!idev)
1256 			goto out;
1257 	}
1258 
1259 	if (cfg->fc_metric == 0)
1260 		cfg->fc_metric = IP6_RT_PRIO_USER;
1261 
1262 	err = -ENOBUFS;
1263 	if (cfg->fc_nlinfo.nlh &&
1264 	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1265 		table = fib6_get_table(net, cfg->fc_table);
1266 		if (!table) {
1267 			printk(KERN_WARNING "IPv6: NLM_F_CREATE should be specified when creating new route\n");
1268 			table = fib6_new_table(net, cfg->fc_table);
1269 		}
1270 	} else {
1271 		table = fib6_new_table(net, cfg->fc_table);
1272 	}
1273 
1274 	if (!table)
1275 		goto out;
1276 
1277 	rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT);
1278 
1279 	if (!rt) {
1280 		err = -ENOMEM;
1281 		goto out;
1282 	}
1283 
1284 	rt->dst.obsolete = -1;
1285 	rt->dst.expires = (cfg->fc_flags & RTF_EXPIRES) ?
1286 				jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1287 				0;
1288 
1289 	if (cfg->fc_protocol == RTPROT_UNSPEC)
1290 		cfg->fc_protocol = RTPROT_BOOT;
1291 	rt->rt6i_protocol = cfg->fc_protocol;
1292 
1293 	addr_type = ipv6_addr_type(&cfg->fc_dst);
1294 
1295 	if (addr_type & IPV6_ADDR_MULTICAST)
1296 		rt->dst.input = ip6_mc_input;
1297 	else if (cfg->fc_flags & RTF_LOCAL)
1298 		rt->dst.input = ip6_input;
1299 	else
1300 		rt->dst.input = ip6_forward;
1301 
1302 	rt->dst.output = ip6_output;
1303 
1304 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1305 	rt->rt6i_dst.plen = cfg->fc_dst_len;
1306 	if (rt->rt6i_dst.plen == 128)
1307 	       rt->dst.flags |= DST_HOST;
1308 
1309 	if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1310 		u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1311 		if (!metrics) {
1312 			err = -ENOMEM;
1313 			goto out;
1314 		}
1315 		dst_init_metrics(&rt->dst, metrics, 0);
1316 	}
1317 #ifdef CONFIG_IPV6_SUBTREES
1318 	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1319 	rt->rt6i_src.plen = cfg->fc_src_len;
1320 #endif
1321 
1322 	rt->rt6i_metric = cfg->fc_metric;
1323 
1324 	/* We cannot add true routes via loopback here,
1325 	   they would result in kernel looping; promote them to reject routes
1326 	 */
1327 	if ((cfg->fc_flags & RTF_REJECT) ||
1328 	    (dev && (dev->flags & IFF_LOOPBACK) &&
1329 	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
1330 	     !(cfg->fc_flags & RTF_LOCAL))) {
1331 		/* hold loopback dev/idev if we haven't done so. */
1332 		if (dev != net->loopback_dev) {
1333 			if (dev) {
1334 				dev_put(dev);
1335 				in6_dev_put(idev);
1336 			}
1337 			dev = net->loopback_dev;
1338 			dev_hold(dev);
1339 			idev = in6_dev_get(dev);
1340 			if (!idev) {
1341 				err = -ENODEV;
1342 				goto out;
1343 			}
1344 		}
1345 		rt->dst.output = ip6_pkt_discard_out;
1346 		rt->dst.input = ip6_pkt_discard;
1347 		rt->dst.error = -ENETUNREACH;
1348 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1349 		goto install_route;
1350 	}
1351 
1352 	if (cfg->fc_flags & RTF_GATEWAY) {
1353 		const struct in6_addr *gw_addr;
1354 		int gwa_type;
1355 
1356 		gw_addr = &cfg->fc_gateway;
1357 		rt->rt6i_gateway = *gw_addr;
1358 		gwa_type = ipv6_addr_type(gw_addr);
1359 
1360 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1361 			struct rt6_info *grt;
1362 
1363 			/* IPv6 strictly inhibits using not link-local
1364 			   addresses as nexthop address.
1365 			   Otherwise, router will not able to send redirects.
1366 			   It is very good, but in some (rare!) circumstances
1367 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
1368 			   some exceptions. --ANK
1369 			 */
1370 			err = -EINVAL;
1371 			if (!(gwa_type & IPV6_ADDR_UNICAST))
1372 				goto out;
1373 
1374 			grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1375 
1376 			err = -EHOSTUNREACH;
1377 			if (!grt)
1378 				goto out;
1379 			if (dev) {
1380 				if (dev != grt->dst.dev) {
1381 					dst_release(&grt->dst);
1382 					goto out;
1383 				}
1384 			} else {
1385 				dev = grt->dst.dev;
1386 				idev = grt->rt6i_idev;
1387 				dev_hold(dev);
1388 				in6_dev_hold(grt->rt6i_idev);
1389 			}
1390 			if (!(grt->rt6i_flags & RTF_GATEWAY))
1391 				err = 0;
1392 			dst_release(&grt->dst);
1393 
1394 			if (err)
1395 				goto out;
1396 		}
1397 		err = -EINVAL;
1398 		if (!dev || (dev->flags & IFF_LOOPBACK))
1399 			goto out;
1400 	}
1401 
1402 	err = -ENODEV;
1403 	if (!dev)
1404 		goto out;
1405 
1406 	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1407 		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1408 			err = -EINVAL;
1409 			goto out;
1410 		}
1411 		rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1412 		rt->rt6i_prefsrc.plen = 128;
1413 	} else
1414 		rt->rt6i_prefsrc.plen = 0;
1415 
1416 	if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1417 		err = rt6_bind_neighbour(rt, dev);
1418 		if (err)
1419 			goto out;
1420 	}
1421 
1422 	rt->rt6i_flags = cfg->fc_flags;
1423 
1424 install_route:
1425 	if (cfg->fc_mx) {
1426 		struct nlattr *nla;
1427 		int remaining;
1428 
1429 		nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1430 			int type = nla_type(nla);
1431 
1432 			if (type) {
1433 				if (type > RTAX_MAX) {
1434 					err = -EINVAL;
1435 					goto out;
1436 				}
1437 
1438 				dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1439 			}
1440 		}
1441 	}
1442 
1443 	rt->dst.dev = dev;
1444 	rt->rt6i_idev = idev;
1445 	rt->rt6i_table = table;
1446 
1447 	cfg->fc_nlinfo.nl_net = dev_net(dev);
1448 
1449 	return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1450 
1451 out:
1452 	if (dev)
1453 		dev_put(dev);
1454 	if (idev)
1455 		in6_dev_put(idev);
1456 	if (rt)
1457 		dst_free(&rt->dst);
1458 	return err;
1459 }
1460 
1461 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1462 {
1463 	int err;
1464 	struct fib6_table *table;
1465 	struct net *net = dev_net(rt->dst.dev);
1466 
1467 	if (rt == net->ipv6.ip6_null_entry)
1468 		return -ENOENT;
1469 
1470 	table = rt->rt6i_table;
1471 	write_lock_bh(&table->tb6_lock);
1472 
1473 	err = fib6_del(rt, info);
1474 	dst_release(&rt->dst);
1475 
1476 	write_unlock_bh(&table->tb6_lock);
1477 
1478 	return err;
1479 }
1480 
1481 int ip6_del_rt(struct rt6_info *rt)
1482 {
1483 	struct nl_info info = {
1484 		.nl_net = dev_net(rt->dst.dev),
1485 	};
1486 	return __ip6_del_rt(rt, &info);
1487 }
1488 
1489 static int ip6_route_del(struct fib6_config *cfg)
1490 {
1491 	struct fib6_table *table;
1492 	struct fib6_node *fn;
1493 	struct rt6_info *rt;
1494 	int err = -ESRCH;
1495 
1496 	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1497 	if (!table)
1498 		return err;
1499 
1500 	read_lock_bh(&table->tb6_lock);
1501 
1502 	fn = fib6_locate(&table->tb6_root,
1503 			 &cfg->fc_dst, cfg->fc_dst_len,
1504 			 &cfg->fc_src, cfg->fc_src_len);
1505 
1506 	if (fn) {
1507 		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1508 			if (cfg->fc_ifindex &&
1509 			    (!rt->dst.dev ||
1510 			     rt->dst.dev->ifindex != cfg->fc_ifindex))
1511 				continue;
1512 			if (cfg->fc_flags & RTF_GATEWAY &&
1513 			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1514 				continue;
1515 			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1516 				continue;
1517 			dst_hold(&rt->dst);
1518 			read_unlock_bh(&table->tb6_lock);
1519 
1520 			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1521 		}
1522 	}
1523 	read_unlock_bh(&table->tb6_lock);
1524 
1525 	return err;
1526 }
1527 
1528 /*
1529  *	Handle redirects
1530  */
1531 struct ip6rd_flowi {
1532 	struct flowi6 fl6;
1533 	struct in6_addr gateway;
1534 };
1535 
1536 static struct rt6_info *__ip6_route_redirect(struct net *net,
1537 					     struct fib6_table *table,
1538 					     struct flowi6 *fl6,
1539 					     int flags)
1540 {
1541 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1542 	struct rt6_info *rt;
1543 	struct fib6_node *fn;
1544 
1545 	/*
1546 	 * Get the "current" route for this destination and
1547 	 * check if the redirect has come from approriate router.
1548 	 *
1549 	 * RFC 2461 specifies that redirects should only be
1550 	 * accepted if they come from the nexthop to the target.
1551 	 * Due to the way the routes are chosen, this notion
1552 	 * is a bit fuzzy and one might need to check all possible
1553 	 * routes.
1554 	 */
1555 
1556 	read_lock_bh(&table->tb6_lock);
1557 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1558 restart:
1559 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1560 		/*
1561 		 * Current route is on-link; redirect is always invalid.
1562 		 *
1563 		 * Seems, previous statement is not true. It could
1564 		 * be node, which looks for us as on-link (f.e. proxy ndisc)
1565 		 * But then router serving it might decide, that we should
1566 		 * know truth 8)8) --ANK (980726).
1567 		 */
1568 		if (rt6_check_expired(rt))
1569 			continue;
1570 		if (!(rt->rt6i_flags & RTF_GATEWAY))
1571 			continue;
1572 		if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1573 			continue;
1574 		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1575 			continue;
1576 		break;
1577 	}
1578 
1579 	if (!rt)
1580 		rt = net->ipv6.ip6_null_entry;
1581 	BACKTRACK(net, &fl6->saddr);
1582 out:
1583 	dst_hold(&rt->dst);
1584 
1585 	read_unlock_bh(&table->tb6_lock);
1586 
1587 	return rt;
1588 };
1589 
1590 static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1591 					   const struct in6_addr *src,
1592 					   const struct in6_addr *gateway,
1593 					   struct net_device *dev)
1594 {
1595 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1596 	struct net *net = dev_net(dev);
1597 	struct ip6rd_flowi rdfl = {
1598 		.fl6 = {
1599 			.flowi6_oif = dev->ifindex,
1600 			.daddr = *dest,
1601 			.saddr = *src,
1602 		},
1603 	};
1604 
1605 	rdfl.gateway = *gateway;
1606 
1607 	if (rt6_need_strict(dest))
1608 		flags |= RT6_LOOKUP_F_IFACE;
1609 
1610 	return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
1611 						   flags, __ip6_route_redirect);
1612 }
1613 
1614 void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1615 		  const struct in6_addr *saddr,
1616 		  struct neighbour *neigh, u8 *lladdr, int on_link)
1617 {
1618 	struct rt6_info *rt, *nrt = NULL;
1619 	struct netevent_redirect netevent;
1620 	struct net *net = dev_net(neigh->dev);
1621 
1622 	rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1623 
1624 	if (rt == net->ipv6.ip6_null_entry) {
1625 		if (net_ratelimit())
1626 			printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1627 			       "for redirect target\n");
1628 		goto out;
1629 	}
1630 
1631 	/*
1632 	 *	We have finally decided to accept it.
1633 	 */
1634 
1635 	neigh_update(neigh, lladdr, NUD_STALE,
1636 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
1637 		     NEIGH_UPDATE_F_OVERRIDE|
1638 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1639 				     NEIGH_UPDATE_F_ISROUTER))
1640 		     );
1641 
1642 	/*
1643 	 * Redirect received -> path was valid.
1644 	 * Look, redirects are sent only in response to data packets,
1645 	 * so that this nexthop apparently is reachable. --ANK
1646 	 */
1647 	dst_confirm(&rt->dst);
1648 
1649 	/* Duplicate redirect: silently ignore. */
1650 	if (neigh == dst_get_neighbour_noref_raw(&rt->dst))
1651 		goto out;
1652 
1653 	nrt = ip6_rt_copy(rt, dest);
1654 	if (!nrt)
1655 		goto out;
1656 
1657 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1658 	if (on_link)
1659 		nrt->rt6i_flags &= ~RTF_GATEWAY;
1660 
1661 	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1662 	dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
1663 
1664 	if (ip6_ins_rt(nrt))
1665 		goto out;
1666 
1667 	netevent.old = &rt->dst;
1668 	netevent.new = &nrt->dst;
1669 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1670 
1671 	if (rt->rt6i_flags & RTF_CACHE) {
1672 		ip6_del_rt(rt);
1673 		return;
1674 	}
1675 
1676 out:
1677 	dst_release(&rt->dst);
1678 }
1679 
1680 /*
1681  *	Handle ICMP "packet too big" messages
1682  *	i.e. Path MTU discovery
1683  */
1684 
1685 static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
1686 			     struct net *net, u32 pmtu, int ifindex)
1687 {
1688 	struct rt6_info *rt, *nrt;
1689 	int allfrag = 0;
1690 again:
1691 	rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1692 	if (!rt)
1693 		return;
1694 
1695 	if (rt6_check_expired(rt)) {
1696 		ip6_del_rt(rt);
1697 		goto again;
1698 	}
1699 
1700 	if (pmtu >= dst_mtu(&rt->dst))
1701 		goto out;
1702 
1703 	if (pmtu < IPV6_MIN_MTU) {
1704 		/*
1705 		 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1706 		 * MTU (1280) and a fragment header should always be included
1707 		 * after a node receiving Too Big message reporting PMTU is
1708 		 * less than the IPv6 Minimum Link MTU.
1709 		 */
1710 		pmtu = IPV6_MIN_MTU;
1711 		allfrag = 1;
1712 	}
1713 
1714 	/* New mtu received -> path was valid.
1715 	   They are sent only in response to data packets,
1716 	   so that this nexthop apparently is reachable. --ANK
1717 	 */
1718 	dst_confirm(&rt->dst);
1719 
1720 	/* Host route. If it is static, it would be better
1721 	   not to override it, but add new one, so that
1722 	   when cache entry will expire old pmtu
1723 	   would return automatically.
1724 	 */
1725 	if (rt->rt6i_flags & RTF_CACHE) {
1726 		dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1727 		if (allfrag) {
1728 			u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1729 			features |= RTAX_FEATURE_ALLFRAG;
1730 			dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1731 		}
1732 		dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1733 		rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1734 		goto out;
1735 	}
1736 
1737 	/* Network route.
1738 	   Two cases are possible:
1739 	   1. It is connected route. Action: COW
1740 	   2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1741 	 */
1742 	if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
1743 		nrt = rt6_alloc_cow(rt, daddr, saddr);
1744 	else
1745 		nrt = rt6_alloc_clone(rt, daddr);
1746 
1747 	if (nrt) {
1748 		dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1749 		if (allfrag) {
1750 			u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1751 			features |= RTAX_FEATURE_ALLFRAG;
1752 			dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1753 		}
1754 
1755 		/* According to RFC 1981, detecting PMTU increase shouldn't be
1756 		 * happened within 5 mins, the recommended timer is 10 mins.
1757 		 * Here this route expiration time is set to ip6_rt_mtu_expires
1758 		 * which is 10 mins. After 10 mins the decreased pmtu is expired
1759 		 * and detecting PMTU increase will be automatically happened.
1760 		 */
1761 		dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1762 		nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1763 
1764 		ip6_ins_rt(nrt);
1765 	}
1766 out:
1767 	dst_release(&rt->dst);
1768 }
1769 
1770 void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
1771 			struct net_device *dev, u32 pmtu)
1772 {
1773 	struct net *net = dev_net(dev);
1774 
1775 	/*
1776 	 * RFC 1981 states that a node "MUST reduce the size of the packets it
1777 	 * is sending along the path" that caused the Packet Too Big message.
1778 	 * Since it's not possible in the general case to determine which
1779 	 * interface was used to send the original packet, we update the MTU
1780 	 * on the interface that will be used to send future packets. We also
1781 	 * update the MTU on the interface that received the Packet Too Big in
1782 	 * case the original packet was forced out that interface with
1783 	 * SO_BINDTODEVICE or similar. This is the next best thing to the
1784 	 * correct behaviour, which would be to update the MTU on all
1785 	 * interfaces.
1786 	 */
1787 	rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1788 	rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1789 }
1790 
1791 /*
1792  *	Misc support functions
1793  */
1794 
1795 static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
1796 				    const struct in6_addr *dest)
1797 {
1798 	struct net *net = dev_net(ort->dst.dev);
1799 	struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
1800 					    ort->dst.dev, 0);
1801 
1802 	if (rt) {
1803 		rt->dst.input = ort->dst.input;
1804 		rt->dst.output = ort->dst.output;
1805 		rt->dst.flags |= DST_HOST;
1806 
1807 		rt->rt6i_dst.addr = *dest;
1808 		rt->rt6i_dst.plen = 128;
1809 		dst_copy_metrics(&rt->dst, &ort->dst);
1810 		rt->dst.error = ort->dst.error;
1811 		rt->rt6i_idev = ort->rt6i_idev;
1812 		if (rt->rt6i_idev)
1813 			in6_dev_hold(rt->rt6i_idev);
1814 		rt->dst.lastuse = jiffies;
1815 		rt->dst.expires = 0;
1816 
1817 		rt->rt6i_gateway = ort->rt6i_gateway;
1818 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1819 		rt->rt6i_metric = 0;
1820 
1821 #ifdef CONFIG_IPV6_SUBTREES
1822 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1823 #endif
1824 		memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1825 		rt->rt6i_table = ort->rt6i_table;
1826 	}
1827 	return rt;
1828 }
1829 
1830 #ifdef CONFIG_IPV6_ROUTE_INFO
1831 static struct rt6_info *rt6_get_route_info(struct net *net,
1832 					   const struct in6_addr *prefix, int prefixlen,
1833 					   const struct in6_addr *gwaddr, int ifindex)
1834 {
1835 	struct fib6_node *fn;
1836 	struct rt6_info *rt = NULL;
1837 	struct fib6_table *table;
1838 
1839 	table = fib6_get_table(net, RT6_TABLE_INFO);
1840 	if (!table)
1841 		return NULL;
1842 
1843 	write_lock_bh(&table->tb6_lock);
1844 	fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1845 	if (!fn)
1846 		goto out;
1847 
1848 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1849 		if (rt->dst.dev->ifindex != ifindex)
1850 			continue;
1851 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1852 			continue;
1853 		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1854 			continue;
1855 		dst_hold(&rt->dst);
1856 		break;
1857 	}
1858 out:
1859 	write_unlock_bh(&table->tb6_lock);
1860 	return rt;
1861 }
1862 
1863 static struct rt6_info *rt6_add_route_info(struct net *net,
1864 					   const struct in6_addr *prefix, int prefixlen,
1865 					   const struct in6_addr *gwaddr, int ifindex,
1866 					   unsigned pref)
1867 {
1868 	struct fib6_config cfg = {
1869 		.fc_table	= RT6_TABLE_INFO,
1870 		.fc_metric	= IP6_RT_PRIO_USER,
1871 		.fc_ifindex	= ifindex,
1872 		.fc_dst_len	= prefixlen,
1873 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1874 				  RTF_UP | RTF_PREF(pref),
1875 		.fc_nlinfo.pid = 0,
1876 		.fc_nlinfo.nlh = NULL,
1877 		.fc_nlinfo.nl_net = net,
1878 	};
1879 
1880 	cfg.fc_dst = *prefix;
1881 	cfg.fc_gateway = *gwaddr;
1882 
1883 	/* We should treat it as a default route if prefix length is 0. */
1884 	if (!prefixlen)
1885 		cfg.fc_flags |= RTF_DEFAULT;
1886 
1887 	ip6_route_add(&cfg);
1888 
1889 	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1890 }
1891 #endif
1892 
1893 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1894 {
1895 	struct rt6_info *rt;
1896 	struct fib6_table *table;
1897 
1898 	table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1899 	if (!table)
1900 		return NULL;
1901 
1902 	write_lock_bh(&table->tb6_lock);
1903 	for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1904 		if (dev == rt->dst.dev &&
1905 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1906 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
1907 			break;
1908 	}
1909 	if (rt)
1910 		dst_hold(&rt->dst);
1911 	write_unlock_bh(&table->tb6_lock);
1912 	return rt;
1913 }
1914 
1915 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1916 				     struct net_device *dev,
1917 				     unsigned int pref)
1918 {
1919 	struct fib6_config cfg = {
1920 		.fc_table	= RT6_TABLE_DFLT,
1921 		.fc_metric	= IP6_RT_PRIO_USER,
1922 		.fc_ifindex	= dev->ifindex,
1923 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1924 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1925 		.fc_nlinfo.pid = 0,
1926 		.fc_nlinfo.nlh = NULL,
1927 		.fc_nlinfo.nl_net = dev_net(dev),
1928 	};
1929 
1930 	cfg.fc_gateway = *gwaddr;
1931 
1932 	ip6_route_add(&cfg);
1933 
1934 	return rt6_get_dflt_router(gwaddr, dev);
1935 }
1936 
1937 void rt6_purge_dflt_routers(struct net *net)
1938 {
1939 	struct rt6_info *rt;
1940 	struct fib6_table *table;
1941 
1942 	/* NOTE: Keep consistent with rt6_get_dflt_router */
1943 	table = fib6_get_table(net, RT6_TABLE_DFLT);
1944 	if (!table)
1945 		return;
1946 
1947 restart:
1948 	read_lock_bh(&table->tb6_lock);
1949 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1950 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1951 			dst_hold(&rt->dst);
1952 			read_unlock_bh(&table->tb6_lock);
1953 			ip6_del_rt(rt);
1954 			goto restart;
1955 		}
1956 	}
1957 	read_unlock_bh(&table->tb6_lock);
1958 }
1959 
1960 static void rtmsg_to_fib6_config(struct net *net,
1961 				 struct in6_rtmsg *rtmsg,
1962 				 struct fib6_config *cfg)
1963 {
1964 	memset(cfg, 0, sizeof(*cfg));
1965 
1966 	cfg->fc_table = RT6_TABLE_MAIN;
1967 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1968 	cfg->fc_metric = rtmsg->rtmsg_metric;
1969 	cfg->fc_expires = rtmsg->rtmsg_info;
1970 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1971 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
1972 	cfg->fc_flags = rtmsg->rtmsg_flags;
1973 
1974 	cfg->fc_nlinfo.nl_net = net;
1975 
1976 	cfg->fc_dst = rtmsg->rtmsg_dst;
1977 	cfg->fc_src = rtmsg->rtmsg_src;
1978 	cfg->fc_gateway = rtmsg->rtmsg_gateway;
1979 }
1980 
1981 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1982 {
1983 	struct fib6_config cfg;
1984 	struct in6_rtmsg rtmsg;
1985 	int err;
1986 
1987 	switch(cmd) {
1988 	case SIOCADDRT:		/* Add a route */
1989 	case SIOCDELRT:		/* Delete a route */
1990 		if (!capable(CAP_NET_ADMIN))
1991 			return -EPERM;
1992 		err = copy_from_user(&rtmsg, arg,
1993 				     sizeof(struct in6_rtmsg));
1994 		if (err)
1995 			return -EFAULT;
1996 
1997 		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1998 
1999 		rtnl_lock();
2000 		switch (cmd) {
2001 		case SIOCADDRT:
2002 			err = ip6_route_add(&cfg);
2003 			break;
2004 		case SIOCDELRT:
2005 			err = ip6_route_del(&cfg);
2006 			break;
2007 		default:
2008 			err = -EINVAL;
2009 		}
2010 		rtnl_unlock();
2011 
2012 		return err;
2013 	}
2014 
2015 	return -EINVAL;
2016 }
2017 
2018 /*
2019  *	Drop the packet on the floor
2020  */
2021 
2022 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2023 {
2024 	int type;
2025 	struct dst_entry *dst = skb_dst(skb);
2026 	switch (ipstats_mib_noroutes) {
2027 	case IPSTATS_MIB_INNOROUTES:
2028 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2029 		if (type == IPV6_ADDR_ANY) {
2030 			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2031 				      IPSTATS_MIB_INADDRERRORS);
2032 			break;
2033 		}
2034 		/* FALLTHROUGH */
2035 	case IPSTATS_MIB_OUTNOROUTES:
2036 		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2037 			      ipstats_mib_noroutes);
2038 		break;
2039 	}
2040 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2041 	kfree_skb(skb);
2042 	return 0;
2043 }
2044 
2045 static int ip6_pkt_discard(struct sk_buff *skb)
2046 {
2047 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2048 }
2049 
2050 static int ip6_pkt_discard_out(struct sk_buff *skb)
2051 {
2052 	skb->dev = skb_dst(skb)->dev;
2053 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2054 }
2055 
2056 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2057 
2058 static int ip6_pkt_prohibit(struct sk_buff *skb)
2059 {
2060 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2061 }
2062 
2063 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2064 {
2065 	skb->dev = skb_dst(skb)->dev;
2066 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2067 }
2068 
2069 #endif
2070 
2071 /*
2072  *	Allocate a dst for local (unicast / anycast) address.
2073  */
2074 
2075 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2076 				    const struct in6_addr *addr,
2077 				    bool anycast)
2078 {
2079 	struct net *net = dev_net(idev->dev);
2080 	struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
2081 					    net->loopback_dev, 0);
2082 	int err;
2083 
2084 	if (!rt) {
2085 		if (net_ratelimit())
2086 			pr_warning("IPv6:  Maximum number of routes reached,"
2087 				   " consider increasing route/max_size.\n");
2088 		return ERR_PTR(-ENOMEM);
2089 	}
2090 
2091 	in6_dev_hold(idev);
2092 
2093 	rt->dst.flags |= DST_HOST;
2094 	rt->dst.input = ip6_input;
2095 	rt->dst.output = ip6_output;
2096 	rt->rt6i_idev = idev;
2097 	rt->dst.obsolete = -1;
2098 
2099 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2100 	if (anycast)
2101 		rt->rt6i_flags |= RTF_ANYCAST;
2102 	else
2103 		rt->rt6i_flags |= RTF_LOCAL;
2104 	err = rt6_bind_neighbour(rt, rt->dst.dev);
2105 	if (err) {
2106 		dst_free(&rt->dst);
2107 		return ERR_PTR(err);
2108 	}
2109 
2110 	rt->rt6i_dst.addr = *addr;
2111 	rt->rt6i_dst.plen = 128;
2112 	rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2113 
2114 	atomic_set(&rt->dst.__refcnt, 1);
2115 
2116 	return rt;
2117 }
2118 
2119 int ip6_route_get_saddr(struct net *net,
2120 			struct rt6_info *rt,
2121 			const struct in6_addr *daddr,
2122 			unsigned int prefs,
2123 			struct in6_addr *saddr)
2124 {
2125 	struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2126 	int err = 0;
2127 	if (rt->rt6i_prefsrc.plen)
2128 		*saddr = rt->rt6i_prefsrc.addr;
2129 	else
2130 		err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2131 					 daddr, prefs, saddr);
2132 	return err;
2133 }
2134 
2135 /* remove deleted ip from prefsrc entries */
2136 struct arg_dev_net_ip {
2137 	struct net_device *dev;
2138 	struct net *net;
2139 	struct in6_addr *addr;
2140 };
2141 
2142 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2143 {
2144 	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2145 	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2146 	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2147 
2148 	if (((void *)rt->dst.dev == dev || !dev) &&
2149 	    rt != net->ipv6.ip6_null_entry &&
2150 	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2151 		/* remove prefsrc entry */
2152 		rt->rt6i_prefsrc.plen = 0;
2153 	}
2154 	return 0;
2155 }
2156 
2157 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2158 {
2159 	struct net *net = dev_net(ifp->idev->dev);
2160 	struct arg_dev_net_ip adni = {
2161 		.dev = ifp->idev->dev,
2162 		.net = net,
2163 		.addr = &ifp->addr,
2164 	};
2165 	fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2166 }
2167 
2168 struct arg_dev_net {
2169 	struct net_device *dev;
2170 	struct net *net;
2171 };
2172 
2173 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2174 {
2175 	const struct arg_dev_net *adn = arg;
2176 	const struct net_device *dev = adn->dev;
2177 
2178 	if ((rt->dst.dev == dev || !dev) &&
2179 	    rt != adn->net->ipv6.ip6_null_entry)
2180 		return -1;
2181 
2182 	return 0;
2183 }
2184 
2185 void rt6_ifdown(struct net *net, struct net_device *dev)
2186 {
2187 	struct arg_dev_net adn = {
2188 		.dev = dev,
2189 		.net = net,
2190 	};
2191 
2192 	fib6_clean_all(net, fib6_ifdown, 0, &adn);
2193 	icmp6_clean_all(fib6_ifdown, &adn);
2194 }
2195 
2196 struct rt6_mtu_change_arg
2197 {
2198 	struct net_device *dev;
2199 	unsigned mtu;
2200 };
2201 
2202 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2203 {
2204 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2205 	struct inet6_dev *idev;
2206 
2207 	/* In IPv6 pmtu discovery is not optional,
2208 	   so that RTAX_MTU lock cannot disable it.
2209 	   We still use this lock to block changes
2210 	   caused by addrconf/ndisc.
2211 	*/
2212 
2213 	idev = __in6_dev_get(arg->dev);
2214 	if (!idev)
2215 		return 0;
2216 
2217 	/* For administrative MTU increase, there is no way to discover
2218 	   IPv6 PMTU increase, so PMTU increase should be updated here.
2219 	   Since RFC 1981 doesn't include administrative MTU increase
2220 	   update PMTU increase is a MUST. (i.e. jumbo frame)
2221 	 */
2222 	/*
2223 	   If new MTU is less than route PMTU, this new MTU will be the
2224 	   lowest MTU in the path, update the route PMTU to reflect PMTU
2225 	   decreases; if new MTU is greater than route PMTU, and the
2226 	   old MTU is the lowest MTU in the path, update the route PMTU
2227 	   to reflect the increase. In this case if the other nodes' MTU
2228 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
2229 	   PMTU discouvery.
2230 	 */
2231 	if (rt->dst.dev == arg->dev &&
2232 	    !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2233 	    (dst_mtu(&rt->dst) >= arg->mtu ||
2234 	     (dst_mtu(&rt->dst) < arg->mtu &&
2235 	      dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2236 		dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2237 	}
2238 	return 0;
2239 }
2240 
2241 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2242 {
2243 	struct rt6_mtu_change_arg arg = {
2244 		.dev = dev,
2245 		.mtu = mtu,
2246 	};
2247 
2248 	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2249 }
2250 
2251 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2252 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2253 	[RTA_OIF]               = { .type = NLA_U32 },
2254 	[RTA_IIF]		= { .type = NLA_U32 },
2255 	[RTA_PRIORITY]          = { .type = NLA_U32 },
2256 	[RTA_METRICS]           = { .type = NLA_NESTED },
2257 };
2258 
2259 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2260 			      struct fib6_config *cfg)
2261 {
2262 	struct rtmsg *rtm;
2263 	struct nlattr *tb[RTA_MAX+1];
2264 	int err;
2265 
2266 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2267 	if (err < 0)
2268 		goto errout;
2269 
2270 	err = -EINVAL;
2271 	rtm = nlmsg_data(nlh);
2272 	memset(cfg, 0, sizeof(*cfg));
2273 
2274 	cfg->fc_table = rtm->rtm_table;
2275 	cfg->fc_dst_len = rtm->rtm_dst_len;
2276 	cfg->fc_src_len = rtm->rtm_src_len;
2277 	cfg->fc_flags = RTF_UP;
2278 	cfg->fc_protocol = rtm->rtm_protocol;
2279 
2280 	if (rtm->rtm_type == RTN_UNREACHABLE)
2281 		cfg->fc_flags |= RTF_REJECT;
2282 
2283 	if (rtm->rtm_type == RTN_LOCAL)
2284 		cfg->fc_flags |= RTF_LOCAL;
2285 
2286 	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2287 	cfg->fc_nlinfo.nlh = nlh;
2288 	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2289 
2290 	if (tb[RTA_GATEWAY]) {
2291 		nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2292 		cfg->fc_flags |= RTF_GATEWAY;
2293 	}
2294 
2295 	if (tb[RTA_DST]) {
2296 		int plen = (rtm->rtm_dst_len + 7) >> 3;
2297 
2298 		if (nla_len(tb[RTA_DST]) < plen)
2299 			goto errout;
2300 
2301 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2302 	}
2303 
2304 	if (tb[RTA_SRC]) {
2305 		int plen = (rtm->rtm_src_len + 7) >> 3;
2306 
2307 		if (nla_len(tb[RTA_SRC]) < plen)
2308 			goto errout;
2309 
2310 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2311 	}
2312 
2313 	if (tb[RTA_PREFSRC])
2314 		nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2315 
2316 	if (tb[RTA_OIF])
2317 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2318 
2319 	if (tb[RTA_PRIORITY])
2320 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2321 
2322 	if (tb[RTA_METRICS]) {
2323 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2324 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2325 	}
2326 
2327 	if (tb[RTA_TABLE])
2328 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2329 
2330 	err = 0;
2331 errout:
2332 	return err;
2333 }
2334 
2335 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2336 {
2337 	struct fib6_config cfg;
2338 	int err;
2339 
2340 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2341 	if (err < 0)
2342 		return err;
2343 
2344 	return ip6_route_del(&cfg);
2345 }
2346 
2347 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2348 {
2349 	struct fib6_config cfg;
2350 	int err;
2351 
2352 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2353 	if (err < 0)
2354 		return err;
2355 
2356 	return ip6_route_add(&cfg);
2357 }
2358 
2359 static inline size_t rt6_nlmsg_size(void)
2360 {
2361 	return NLMSG_ALIGN(sizeof(struct rtmsg))
2362 	       + nla_total_size(16) /* RTA_SRC */
2363 	       + nla_total_size(16) /* RTA_DST */
2364 	       + nla_total_size(16) /* RTA_GATEWAY */
2365 	       + nla_total_size(16) /* RTA_PREFSRC */
2366 	       + nla_total_size(4) /* RTA_TABLE */
2367 	       + nla_total_size(4) /* RTA_IIF */
2368 	       + nla_total_size(4) /* RTA_OIF */
2369 	       + nla_total_size(4) /* RTA_PRIORITY */
2370 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2371 	       + nla_total_size(sizeof(struct rta_cacheinfo));
2372 }
2373 
2374 static int rt6_fill_node(struct net *net,
2375 			 struct sk_buff *skb, struct rt6_info *rt,
2376 			 struct in6_addr *dst, struct in6_addr *src,
2377 			 int iif, int type, u32 pid, u32 seq,
2378 			 int prefix, int nowait, unsigned int flags)
2379 {
2380 	const struct inet_peer *peer;
2381 	struct rtmsg *rtm;
2382 	struct nlmsghdr *nlh;
2383 	long expires;
2384 	u32 table;
2385 	struct neighbour *n;
2386 	u32 ts, tsage;
2387 
2388 	if (prefix) {	/* user wants prefix routes only */
2389 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2390 			/* success since this is not a prefix route */
2391 			return 1;
2392 		}
2393 	}
2394 
2395 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2396 	if (!nlh)
2397 		return -EMSGSIZE;
2398 
2399 	rtm = nlmsg_data(nlh);
2400 	rtm->rtm_family = AF_INET6;
2401 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
2402 	rtm->rtm_src_len = rt->rt6i_src.plen;
2403 	rtm->rtm_tos = 0;
2404 	if (rt->rt6i_table)
2405 		table = rt->rt6i_table->tb6_id;
2406 	else
2407 		table = RT6_TABLE_UNSPEC;
2408 	rtm->rtm_table = table;
2409 	NLA_PUT_U32(skb, RTA_TABLE, table);
2410 	if (rt->rt6i_flags & RTF_REJECT)
2411 		rtm->rtm_type = RTN_UNREACHABLE;
2412 	else if (rt->rt6i_flags & RTF_LOCAL)
2413 		rtm->rtm_type = RTN_LOCAL;
2414 	else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2415 		rtm->rtm_type = RTN_LOCAL;
2416 	else
2417 		rtm->rtm_type = RTN_UNICAST;
2418 	rtm->rtm_flags = 0;
2419 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2420 	rtm->rtm_protocol = rt->rt6i_protocol;
2421 	if (rt->rt6i_flags & RTF_DYNAMIC)
2422 		rtm->rtm_protocol = RTPROT_REDIRECT;
2423 	else if (rt->rt6i_flags & RTF_ADDRCONF)
2424 		rtm->rtm_protocol = RTPROT_KERNEL;
2425 	else if (rt->rt6i_flags & RTF_DEFAULT)
2426 		rtm->rtm_protocol = RTPROT_RA;
2427 
2428 	if (rt->rt6i_flags & RTF_CACHE)
2429 		rtm->rtm_flags |= RTM_F_CLONED;
2430 
2431 	if (dst) {
2432 		NLA_PUT(skb, RTA_DST, 16, dst);
2433 		rtm->rtm_dst_len = 128;
2434 	} else if (rtm->rtm_dst_len)
2435 		NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2436 #ifdef CONFIG_IPV6_SUBTREES
2437 	if (src) {
2438 		NLA_PUT(skb, RTA_SRC, 16, src);
2439 		rtm->rtm_src_len = 128;
2440 	} else if (rtm->rtm_src_len)
2441 		NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2442 #endif
2443 	if (iif) {
2444 #ifdef CONFIG_IPV6_MROUTE
2445 		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2446 			int err = ip6mr_get_route(net, skb, rtm, nowait);
2447 			if (err <= 0) {
2448 				if (!nowait) {
2449 					if (err == 0)
2450 						return 0;
2451 					goto nla_put_failure;
2452 				} else {
2453 					if (err == -EMSGSIZE)
2454 						goto nla_put_failure;
2455 				}
2456 			}
2457 		} else
2458 #endif
2459 			NLA_PUT_U32(skb, RTA_IIF, iif);
2460 	} else if (dst) {
2461 		struct in6_addr saddr_buf;
2462 		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0)
2463 			NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2464 	}
2465 
2466 	if (rt->rt6i_prefsrc.plen) {
2467 		struct in6_addr saddr_buf;
2468 		saddr_buf = rt->rt6i_prefsrc.addr;
2469 		NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2470 	}
2471 
2472 	if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2473 		goto nla_put_failure;
2474 
2475 	rcu_read_lock();
2476 	n = dst_get_neighbour_noref(&rt->dst);
2477 	if (n)
2478 		NLA_PUT(skb, RTA_GATEWAY, 16, &n->primary_key);
2479 	rcu_read_unlock();
2480 
2481 	if (rt->dst.dev)
2482 		NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex);
2483 
2484 	NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2485 
2486 	if (!(rt->rt6i_flags & RTF_EXPIRES))
2487 		expires = 0;
2488 	else if (rt->dst.expires - jiffies < INT_MAX)
2489 		expires = rt->dst.expires - jiffies;
2490 	else
2491 		expires = INT_MAX;
2492 
2493 	peer = rt->rt6i_peer;
2494 	ts = tsage = 0;
2495 	if (peer && peer->tcp_ts_stamp) {
2496 		ts = peer->tcp_ts;
2497 		tsage = get_seconds() - peer->tcp_ts_stamp;
2498 	}
2499 
2500 	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, ts, tsage,
2501 			       expires, rt->dst.error) < 0)
2502 		goto nla_put_failure;
2503 
2504 	return nlmsg_end(skb, nlh);
2505 
2506 nla_put_failure:
2507 	nlmsg_cancel(skb, nlh);
2508 	return -EMSGSIZE;
2509 }
2510 
2511 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2512 {
2513 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2514 	int prefix;
2515 
2516 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2517 		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2518 		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2519 	} else
2520 		prefix = 0;
2521 
2522 	return rt6_fill_node(arg->net,
2523 		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2524 		     NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2525 		     prefix, 0, NLM_F_MULTI);
2526 }
2527 
2528 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2529 {
2530 	struct net *net = sock_net(in_skb->sk);
2531 	struct nlattr *tb[RTA_MAX+1];
2532 	struct rt6_info *rt;
2533 	struct sk_buff *skb;
2534 	struct rtmsg *rtm;
2535 	struct flowi6 fl6;
2536 	int err, iif = 0;
2537 
2538 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2539 	if (err < 0)
2540 		goto errout;
2541 
2542 	err = -EINVAL;
2543 	memset(&fl6, 0, sizeof(fl6));
2544 
2545 	if (tb[RTA_SRC]) {
2546 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2547 			goto errout;
2548 
2549 		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2550 	}
2551 
2552 	if (tb[RTA_DST]) {
2553 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2554 			goto errout;
2555 
2556 		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2557 	}
2558 
2559 	if (tb[RTA_IIF])
2560 		iif = nla_get_u32(tb[RTA_IIF]);
2561 
2562 	if (tb[RTA_OIF])
2563 		fl6.flowi6_oif = nla_get_u32(tb[RTA_OIF]);
2564 
2565 	if (iif) {
2566 		struct net_device *dev;
2567 		dev = __dev_get_by_index(net, iif);
2568 		if (!dev) {
2569 			err = -ENODEV;
2570 			goto errout;
2571 		}
2572 	}
2573 
2574 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2575 	if (!skb) {
2576 		err = -ENOBUFS;
2577 		goto errout;
2578 	}
2579 
2580 	/* Reserve room for dummy headers, this skb can pass
2581 	   through good chunk of routing engine.
2582 	 */
2583 	skb_reset_mac_header(skb);
2584 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2585 
2586 	rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl6);
2587 	skb_dst_set(skb, &rt->dst);
2588 
2589 	err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2590 			    RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2591 			    nlh->nlmsg_seq, 0, 0, 0);
2592 	if (err < 0) {
2593 		kfree_skb(skb);
2594 		goto errout;
2595 	}
2596 
2597 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2598 errout:
2599 	return err;
2600 }
2601 
2602 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2603 {
2604 	struct sk_buff *skb;
2605 	struct net *net = info->nl_net;
2606 	u32 seq;
2607 	int err;
2608 
2609 	err = -ENOBUFS;
2610 	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2611 
2612 	skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2613 	if (!skb)
2614 		goto errout;
2615 
2616 	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2617 				event, info->pid, seq, 0, 0, 0);
2618 	if (err < 0) {
2619 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2620 		WARN_ON(err == -EMSGSIZE);
2621 		kfree_skb(skb);
2622 		goto errout;
2623 	}
2624 	rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2625 		    info->nlh, gfp_any());
2626 	return;
2627 errout:
2628 	if (err < 0)
2629 		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2630 }
2631 
2632 static int ip6_route_dev_notify(struct notifier_block *this,
2633 				unsigned long event, void *data)
2634 {
2635 	struct net_device *dev = (struct net_device *)data;
2636 	struct net *net = dev_net(dev);
2637 
2638 	if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2639 		net->ipv6.ip6_null_entry->dst.dev = dev;
2640 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2641 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2642 		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2643 		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2644 		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2645 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2646 #endif
2647 	}
2648 
2649 	return NOTIFY_OK;
2650 }
2651 
2652 /*
2653  *	/proc
2654  */
2655 
2656 #ifdef CONFIG_PROC_FS
2657 
2658 struct rt6_proc_arg
2659 {
2660 	char *buffer;
2661 	int offset;
2662 	int length;
2663 	int skip;
2664 	int len;
2665 };
2666 
2667 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2668 {
2669 	struct seq_file *m = p_arg;
2670 	struct neighbour *n;
2671 
2672 	seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2673 
2674 #ifdef CONFIG_IPV6_SUBTREES
2675 	seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2676 #else
2677 	seq_puts(m, "00000000000000000000000000000000 00 ");
2678 #endif
2679 	rcu_read_lock();
2680 	n = dst_get_neighbour_noref(&rt->dst);
2681 	if (n) {
2682 		seq_printf(m, "%pi6", n->primary_key);
2683 	} else {
2684 		seq_puts(m, "00000000000000000000000000000000");
2685 	}
2686 	rcu_read_unlock();
2687 	seq_printf(m, " %08x %08x %08x %08x %8s\n",
2688 		   rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2689 		   rt->dst.__use, rt->rt6i_flags,
2690 		   rt->dst.dev ? rt->dst.dev->name : "");
2691 	return 0;
2692 }
2693 
2694 static int ipv6_route_show(struct seq_file *m, void *v)
2695 {
2696 	struct net *net = (struct net *)m->private;
2697 	fib6_clean_all_ro(net, rt6_info_route, 0, m);
2698 	return 0;
2699 }
2700 
2701 static int ipv6_route_open(struct inode *inode, struct file *file)
2702 {
2703 	return single_open_net(inode, file, ipv6_route_show);
2704 }
2705 
2706 static const struct file_operations ipv6_route_proc_fops = {
2707 	.owner		= THIS_MODULE,
2708 	.open		= ipv6_route_open,
2709 	.read		= seq_read,
2710 	.llseek		= seq_lseek,
2711 	.release	= single_release_net,
2712 };
2713 
2714 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2715 {
2716 	struct net *net = (struct net *)seq->private;
2717 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2718 		   net->ipv6.rt6_stats->fib_nodes,
2719 		   net->ipv6.rt6_stats->fib_route_nodes,
2720 		   net->ipv6.rt6_stats->fib_rt_alloc,
2721 		   net->ipv6.rt6_stats->fib_rt_entries,
2722 		   net->ipv6.rt6_stats->fib_rt_cache,
2723 		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2724 		   net->ipv6.rt6_stats->fib_discarded_routes);
2725 
2726 	return 0;
2727 }
2728 
2729 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2730 {
2731 	return single_open_net(inode, file, rt6_stats_seq_show);
2732 }
2733 
2734 static const struct file_operations rt6_stats_seq_fops = {
2735 	.owner	 = THIS_MODULE,
2736 	.open	 = rt6_stats_seq_open,
2737 	.read	 = seq_read,
2738 	.llseek	 = seq_lseek,
2739 	.release = single_release_net,
2740 };
2741 #endif	/* CONFIG_PROC_FS */
2742 
2743 #ifdef CONFIG_SYSCTL
2744 
2745 static
2746 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2747 			      void __user *buffer, size_t *lenp, loff_t *ppos)
2748 {
2749 	struct net *net;
2750 	int delay;
2751 	if (!write)
2752 		return -EINVAL;
2753 
2754 	net = (struct net *)ctl->extra1;
2755 	delay = net->ipv6.sysctl.flush_delay;
2756 	proc_dointvec(ctl, write, buffer, lenp, ppos);
2757 	fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2758 	return 0;
2759 }
2760 
2761 ctl_table ipv6_route_table_template[] = {
2762 	{
2763 		.procname	=	"flush",
2764 		.data		=	&init_net.ipv6.sysctl.flush_delay,
2765 		.maxlen		=	sizeof(int),
2766 		.mode		=	0200,
2767 		.proc_handler	=	ipv6_sysctl_rtcache_flush
2768 	},
2769 	{
2770 		.procname	=	"gc_thresh",
2771 		.data		=	&ip6_dst_ops_template.gc_thresh,
2772 		.maxlen		=	sizeof(int),
2773 		.mode		=	0644,
2774 		.proc_handler	=	proc_dointvec,
2775 	},
2776 	{
2777 		.procname	=	"max_size",
2778 		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
2779 		.maxlen		=	sizeof(int),
2780 		.mode		=	0644,
2781 		.proc_handler	=	proc_dointvec,
2782 	},
2783 	{
2784 		.procname	=	"gc_min_interval",
2785 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2786 		.maxlen		=	sizeof(int),
2787 		.mode		=	0644,
2788 		.proc_handler	=	proc_dointvec_jiffies,
2789 	},
2790 	{
2791 		.procname	=	"gc_timeout",
2792 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2793 		.maxlen		=	sizeof(int),
2794 		.mode		=	0644,
2795 		.proc_handler	=	proc_dointvec_jiffies,
2796 	},
2797 	{
2798 		.procname	=	"gc_interval",
2799 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
2800 		.maxlen		=	sizeof(int),
2801 		.mode		=	0644,
2802 		.proc_handler	=	proc_dointvec_jiffies,
2803 	},
2804 	{
2805 		.procname	=	"gc_elasticity",
2806 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2807 		.maxlen		=	sizeof(int),
2808 		.mode		=	0644,
2809 		.proc_handler	=	proc_dointvec,
2810 	},
2811 	{
2812 		.procname	=	"mtu_expires",
2813 		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2814 		.maxlen		=	sizeof(int),
2815 		.mode		=	0644,
2816 		.proc_handler	=	proc_dointvec_jiffies,
2817 	},
2818 	{
2819 		.procname	=	"min_adv_mss",
2820 		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
2821 		.maxlen		=	sizeof(int),
2822 		.mode		=	0644,
2823 		.proc_handler	=	proc_dointvec,
2824 	},
2825 	{
2826 		.procname	=	"gc_min_interval_ms",
2827 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2828 		.maxlen		=	sizeof(int),
2829 		.mode		=	0644,
2830 		.proc_handler	=	proc_dointvec_ms_jiffies,
2831 	},
2832 	{ }
2833 };
2834 
2835 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2836 {
2837 	struct ctl_table *table;
2838 
2839 	table = kmemdup(ipv6_route_table_template,
2840 			sizeof(ipv6_route_table_template),
2841 			GFP_KERNEL);
2842 
2843 	if (table) {
2844 		table[0].data = &net->ipv6.sysctl.flush_delay;
2845 		table[0].extra1 = net;
2846 		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2847 		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2848 		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2849 		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2850 		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2851 		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2852 		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2853 		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2854 		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2855 	}
2856 
2857 	return table;
2858 }
2859 #endif
2860 
2861 static int __net_init ip6_route_net_init(struct net *net)
2862 {
2863 	int ret = -ENOMEM;
2864 
2865 	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2866 	       sizeof(net->ipv6.ip6_dst_ops));
2867 
2868 	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2869 		goto out_ip6_dst_ops;
2870 
2871 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2872 					   sizeof(*net->ipv6.ip6_null_entry),
2873 					   GFP_KERNEL);
2874 	if (!net->ipv6.ip6_null_entry)
2875 		goto out_ip6_dst_entries;
2876 	net->ipv6.ip6_null_entry->dst.path =
2877 		(struct dst_entry *)net->ipv6.ip6_null_entry;
2878 	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2879 	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2880 			 ip6_template_metrics, true);
2881 
2882 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2883 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2884 					       sizeof(*net->ipv6.ip6_prohibit_entry),
2885 					       GFP_KERNEL);
2886 	if (!net->ipv6.ip6_prohibit_entry)
2887 		goto out_ip6_null_entry;
2888 	net->ipv6.ip6_prohibit_entry->dst.path =
2889 		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2890 	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2891 	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2892 			 ip6_template_metrics, true);
2893 
2894 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2895 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
2896 					       GFP_KERNEL);
2897 	if (!net->ipv6.ip6_blk_hole_entry)
2898 		goto out_ip6_prohibit_entry;
2899 	net->ipv6.ip6_blk_hole_entry->dst.path =
2900 		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2901 	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2902 	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2903 			 ip6_template_metrics, true);
2904 #endif
2905 
2906 	net->ipv6.sysctl.flush_delay = 0;
2907 	net->ipv6.sysctl.ip6_rt_max_size = 4096;
2908 	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2909 	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2910 	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2911 	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2912 	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2913 	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2914 
2915 #ifdef CONFIG_PROC_FS
2916 	proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2917 	proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2918 #endif
2919 	net->ipv6.ip6_rt_gc_expire = 30*HZ;
2920 
2921 	ret = 0;
2922 out:
2923 	return ret;
2924 
2925 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2926 out_ip6_prohibit_entry:
2927 	kfree(net->ipv6.ip6_prohibit_entry);
2928 out_ip6_null_entry:
2929 	kfree(net->ipv6.ip6_null_entry);
2930 #endif
2931 out_ip6_dst_entries:
2932 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2933 out_ip6_dst_ops:
2934 	goto out;
2935 }
2936 
2937 static void __net_exit ip6_route_net_exit(struct net *net)
2938 {
2939 #ifdef CONFIG_PROC_FS
2940 	proc_net_remove(net, "ipv6_route");
2941 	proc_net_remove(net, "rt6_stats");
2942 #endif
2943 	kfree(net->ipv6.ip6_null_entry);
2944 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2945 	kfree(net->ipv6.ip6_prohibit_entry);
2946 	kfree(net->ipv6.ip6_blk_hole_entry);
2947 #endif
2948 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2949 }
2950 
2951 static struct pernet_operations ip6_route_net_ops = {
2952 	.init = ip6_route_net_init,
2953 	.exit = ip6_route_net_exit,
2954 };
2955 
2956 static struct notifier_block ip6_route_dev_notifier = {
2957 	.notifier_call = ip6_route_dev_notify,
2958 	.priority = 0,
2959 };
2960 
2961 int __init ip6_route_init(void)
2962 {
2963 	int ret;
2964 
2965 	ret = -ENOMEM;
2966 	ip6_dst_ops_template.kmem_cachep =
2967 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2968 				  SLAB_HWCACHE_ALIGN, NULL);
2969 	if (!ip6_dst_ops_template.kmem_cachep)
2970 		goto out;
2971 
2972 	ret = dst_entries_init(&ip6_dst_blackhole_ops);
2973 	if (ret)
2974 		goto out_kmem_cache;
2975 
2976 	ret = register_pernet_subsys(&ip6_route_net_ops);
2977 	if (ret)
2978 		goto out_dst_entries;
2979 
2980 	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2981 
2982 	/* Registering of the loopback is done before this portion of code,
2983 	 * the loopback reference in rt6_info will not be taken, do it
2984 	 * manually for init_net */
2985 	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
2986 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2987   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2988 	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
2989 	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2990 	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
2991 	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2992   #endif
2993 	ret = fib6_init();
2994 	if (ret)
2995 		goto out_register_subsys;
2996 
2997 	ret = xfrm6_init();
2998 	if (ret)
2999 		goto out_fib6_init;
3000 
3001 	ret = fib6_rules_init();
3002 	if (ret)
3003 		goto xfrm6_init;
3004 
3005 	ret = -ENOBUFS;
3006 	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3007 	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3008 	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3009 		goto fib6_rules_init;
3010 
3011 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3012 	if (ret)
3013 		goto fib6_rules_init;
3014 
3015 out:
3016 	return ret;
3017 
3018 fib6_rules_init:
3019 	fib6_rules_cleanup();
3020 xfrm6_init:
3021 	xfrm6_fini();
3022 out_fib6_init:
3023 	fib6_gc_cleanup();
3024 out_register_subsys:
3025 	unregister_pernet_subsys(&ip6_route_net_ops);
3026 out_dst_entries:
3027 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3028 out_kmem_cache:
3029 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3030 	goto out;
3031 }
3032 
3033 void ip6_route_cleanup(void)
3034 {
3035 	unregister_netdevice_notifier(&ip6_route_dev_notifier);
3036 	fib6_rules_cleanup();
3037 	xfrm6_fini();
3038 	fib6_gc_cleanup();
3039 	unregister_pernet_subsys(&ip6_route_net_ops);
3040 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3041 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3042 }
3043