xref: /openbmc/linux/net/ipv6/route.c (revision 97da55fc)
1 /*
2  *	Linux INET6 implementation
3  *	FIB front-end.
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13 
14 /*	Changes:
15  *
16  *	YOSHIFUJI Hideaki @USAGI
17  *		reworked default router selection.
18  *		- respect outgoing interface
19  *		- select from (probably) reachable routers (i.e.
20  *		routers in REACHABLE, STALE, DELAY or PROBE states).
21  *		- always select the same router if it is (probably)
22  *		reachable.  otherwise, round-robin the list.
23  *	Ville Nuorvala
24  *		Fixed routing subtrees.
25  */
26 
27 #define pr_fmt(fmt) "IPv6: " fmt
28 
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/xfrm.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60 #include <net/nexthop.h>
61 
62 #include <asm/uaccess.h>
63 
64 #ifdef CONFIG_SYSCTL
65 #include <linux/sysctl.h>
66 #endif
67 
68 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
69 				    const struct in6_addr *dest);
70 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
71 static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
72 static unsigned int	 ip6_mtu(const struct dst_entry *dst);
73 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
74 static void		ip6_dst_destroy(struct dst_entry *);
75 static void		ip6_dst_ifdown(struct dst_entry *,
76 				       struct net_device *dev, int how);
77 static int		 ip6_dst_gc(struct dst_ops *ops);
78 
79 static int		ip6_pkt_discard(struct sk_buff *skb);
80 static int		ip6_pkt_discard_out(struct sk_buff *skb);
81 static void		ip6_link_failure(struct sk_buff *skb);
82 static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
83 					   struct sk_buff *skb, u32 mtu);
84 static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
85 					struct sk_buff *skb);
86 
87 #ifdef CONFIG_IPV6_ROUTE_INFO
88 static struct rt6_info *rt6_add_route_info(struct net *net,
89 					   const struct in6_addr *prefix, int prefixlen,
90 					   const struct in6_addr *gwaddr, int ifindex,
91 					   unsigned int pref);
92 static struct rt6_info *rt6_get_route_info(struct net *net,
93 					   const struct in6_addr *prefix, int prefixlen,
94 					   const struct in6_addr *gwaddr, int ifindex);
95 #endif
96 
97 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
98 {
99 	struct rt6_info *rt = (struct rt6_info *) dst;
100 	struct inet_peer *peer;
101 	u32 *p = NULL;
102 
103 	if (!(rt->dst.flags & DST_HOST))
104 		return NULL;
105 
106 	peer = rt6_get_peer_create(rt);
107 	if (peer) {
108 		u32 *old_p = __DST_METRICS_PTR(old);
109 		unsigned long prev, new;
110 
111 		p = peer->metrics;
112 		if (inet_metrics_new(peer))
113 			memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
114 
115 		new = (unsigned long) p;
116 		prev = cmpxchg(&dst->_metrics, old, new);
117 
118 		if (prev != old) {
119 			p = __DST_METRICS_PTR(prev);
120 			if (prev & DST_METRICS_READ_ONLY)
121 				p = NULL;
122 		}
123 	}
124 	return p;
125 }
126 
127 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
128 					     struct sk_buff *skb,
129 					     const void *daddr)
130 {
131 	struct in6_addr *p = &rt->rt6i_gateway;
132 
133 	if (!ipv6_addr_any(p))
134 		return (const void *) p;
135 	else if (skb)
136 		return &ipv6_hdr(skb)->daddr;
137 	return daddr;
138 }
139 
140 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
141 					  struct sk_buff *skb,
142 					  const void *daddr)
143 {
144 	struct rt6_info *rt = (struct rt6_info *) dst;
145 	struct neighbour *n;
146 
147 	daddr = choose_neigh_daddr(rt, skb, daddr);
148 	n = __ipv6_neigh_lookup(dst->dev, daddr);
149 	if (n)
150 		return n;
151 	return neigh_create(&nd_tbl, daddr, dst->dev);
152 }
153 
154 static struct dst_ops ip6_dst_ops_template = {
155 	.family			=	AF_INET6,
156 	.protocol		=	cpu_to_be16(ETH_P_IPV6),
157 	.gc			=	ip6_dst_gc,
158 	.gc_thresh		=	1024,
159 	.check			=	ip6_dst_check,
160 	.default_advmss		=	ip6_default_advmss,
161 	.mtu			=	ip6_mtu,
162 	.cow_metrics		=	ipv6_cow_metrics,
163 	.destroy		=	ip6_dst_destroy,
164 	.ifdown			=	ip6_dst_ifdown,
165 	.negative_advice	=	ip6_negative_advice,
166 	.link_failure		=	ip6_link_failure,
167 	.update_pmtu		=	ip6_rt_update_pmtu,
168 	.redirect		=	rt6_do_redirect,
169 	.local_out		=	__ip6_local_out,
170 	.neigh_lookup		=	ip6_neigh_lookup,
171 };
172 
173 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
174 {
175 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
176 
177 	return mtu ? : dst->dev->mtu;
178 }
179 
180 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
181 					 struct sk_buff *skb, u32 mtu)
182 {
183 }
184 
185 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
186 				      struct sk_buff *skb)
187 {
188 }
189 
190 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
191 					 unsigned long old)
192 {
193 	return NULL;
194 }
195 
196 static struct dst_ops ip6_dst_blackhole_ops = {
197 	.family			=	AF_INET6,
198 	.protocol		=	cpu_to_be16(ETH_P_IPV6),
199 	.destroy		=	ip6_dst_destroy,
200 	.check			=	ip6_dst_check,
201 	.mtu			=	ip6_blackhole_mtu,
202 	.default_advmss		=	ip6_default_advmss,
203 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
204 	.redirect		=	ip6_rt_blackhole_redirect,
205 	.cow_metrics		=	ip6_rt_blackhole_cow_metrics,
206 	.neigh_lookup		=	ip6_neigh_lookup,
207 };
208 
209 static const u32 ip6_template_metrics[RTAX_MAX] = {
210 	[RTAX_HOPLIMIT - 1] = 0,
211 };
212 
213 static const struct rt6_info ip6_null_entry_template = {
214 	.dst = {
215 		.__refcnt	= ATOMIC_INIT(1),
216 		.__use		= 1,
217 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
218 		.error		= -ENETUNREACH,
219 		.input		= ip6_pkt_discard,
220 		.output		= ip6_pkt_discard_out,
221 	},
222 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
223 	.rt6i_protocol  = RTPROT_KERNEL,
224 	.rt6i_metric	= ~(u32) 0,
225 	.rt6i_ref	= ATOMIC_INIT(1),
226 };
227 
228 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
229 
230 static int ip6_pkt_prohibit(struct sk_buff *skb);
231 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
232 
233 static const struct rt6_info ip6_prohibit_entry_template = {
234 	.dst = {
235 		.__refcnt	= ATOMIC_INIT(1),
236 		.__use		= 1,
237 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
238 		.error		= -EACCES,
239 		.input		= ip6_pkt_prohibit,
240 		.output		= ip6_pkt_prohibit_out,
241 	},
242 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
243 	.rt6i_protocol  = RTPROT_KERNEL,
244 	.rt6i_metric	= ~(u32) 0,
245 	.rt6i_ref	= ATOMIC_INIT(1),
246 };
247 
248 static const struct rt6_info ip6_blk_hole_entry_template = {
249 	.dst = {
250 		.__refcnt	= ATOMIC_INIT(1),
251 		.__use		= 1,
252 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
253 		.error		= -EINVAL,
254 		.input		= dst_discard,
255 		.output		= dst_discard,
256 	},
257 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
258 	.rt6i_protocol  = RTPROT_KERNEL,
259 	.rt6i_metric	= ~(u32) 0,
260 	.rt6i_ref	= ATOMIC_INIT(1),
261 };
262 
263 #endif
264 
265 /* allocate dst with ip6_dst_ops */
266 static inline struct rt6_info *ip6_dst_alloc(struct net *net,
267 					     struct net_device *dev,
268 					     int flags,
269 					     struct fib6_table *table)
270 {
271 	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
272 					0, DST_OBSOLETE_FORCE_CHK, flags);
273 
274 	if (rt) {
275 		struct dst_entry *dst = &rt->dst;
276 
277 		memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
278 		rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
279 		rt->rt6i_genid = rt_genid(net);
280 		INIT_LIST_HEAD(&rt->rt6i_siblings);
281 		rt->rt6i_nsiblings = 0;
282 	}
283 	return rt;
284 }
285 
286 static void ip6_dst_destroy(struct dst_entry *dst)
287 {
288 	struct rt6_info *rt = (struct rt6_info *)dst;
289 	struct inet6_dev *idev = rt->rt6i_idev;
290 	struct dst_entry *from = dst->from;
291 
292 	if (!(rt->dst.flags & DST_HOST))
293 		dst_destroy_metrics_generic(dst);
294 
295 	if (idev) {
296 		rt->rt6i_idev = NULL;
297 		in6_dev_put(idev);
298 	}
299 
300 	dst->from = NULL;
301 	dst_release(from);
302 
303 	if (rt6_has_peer(rt)) {
304 		struct inet_peer *peer = rt6_peer_ptr(rt);
305 		inet_putpeer(peer);
306 	}
307 }
308 
309 void rt6_bind_peer(struct rt6_info *rt, int create)
310 {
311 	struct inet_peer_base *base;
312 	struct inet_peer *peer;
313 
314 	base = inetpeer_base_ptr(rt->_rt6i_peer);
315 	if (!base)
316 		return;
317 
318 	peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
319 	if (peer) {
320 		if (!rt6_set_peer(rt, peer))
321 			inet_putpeer(peer);
322 	}
323 }
324 
325 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
326 			   int how)
327 {
328 	struct rt6_info *rt = (struct rt6_info *)dst;
329 	struct inet6_dev *idev = rt->rt6i_idev;
330 	struct net_device *loopback_dev =
331 		dev_net(dev)->loopback_dev;
332 
333 	if (dev != loopback_dev) {
334 		if (idev && idev->dev == dev) {
335 			struct inet6_dev *loopback_idev =
336 				in6_dev_get(loopback_dev);
337 			if (loopback_idev) {
338 				rt->rt6i_idev = loopback_idev;
339 				in6_dev_put(idev);
340 			}
341 		}
342 	}
343 }
344 
345 static bool rt6_check_expired(const struct rt6_info *rt)
346 {
347 	if (rt->rt6i_flags & RTF_EXPIRES) {
348 		if (time_after(jiffies, rt->dst.expires))
349 			return true;
350 	} else if (rt->dst.from) {
351 		return rt6_check_expired((struct rt6_info *) rt->dst.from);
352 	}
353 	return false;
354 }
355 
356 static bool rt6_need_strict(const struct in6_addr *daddr)
357 {
358 	return ipv6_addr_type(daddr) &
359 		(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
360 }
361 
362 /* Multipath route selection:
363  *   Hash based function using packet header and flowlabel.
364  * Adapted from fib_info_hashfn()
365  */
366 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
367 			       const struct flowi6 *fl6)
368 {
369 	unsigned int val = fl6->flowi6_proto;
370 
371 	val ^= ipv6_addr_hash(&fl6->daddr);
372 	val ^= ipv6_addr_hash(&fl6->saddr);
373 
374 	/* Work only if this not encapsulated */
375 	switch (fl6->flowi6_proto) {
376 	case IPPROTO_UDP:
377 	case IPPROTO_TCP:
378 	case IPPROTO_SCTP:
379 		val ^= (__force u16)fl6->fl6_sport;
380 		val ^= (__force u16)fl6->fl6_dport;
381 		break;
382 
383 	case IPPROTO_ICMPV6:
384 		val ^= (__force u16)fl6->fl6_icmp_type;
385 		val ^= (__force u16)fl6->fl6_icmp_code;
386 		break;
387 	}
388 	/* RFC6438 recommands to use flowlabel */
389 	val ^= (__force u32)fl6->flowlabel;
390 
391 	/* Perhaps, we need to tune, this function? */
392 	val = val ^ (val >> 7) ^ (val >> 12);
393 	return val % candidate_count;
394 }
395 
396 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
397 					     struct flowi6 *fl6)
398 {
399 	struct rt6_info *sibling, *next_sibling;
400 	int route_choosen;
401 
402 	route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
403 	/* Don't change the route, if route_choosen == 0
404 	 * (siblings does not include ourself)
405 	 */
406 	if (route_choosen)
407 		list_for_each_entry_safe(sibling, next_sibling,
408 				&match->rt6i_siblings, rt6i_siblings) {
409 			route_choosen--;
410 			if (route_choosen == 0) {
411 				match = sibling;
412 				break;
413 			}
414 		}
415 	return match;
416 }
417 
418 /*
419  *	Route lookup. Any table->tb6_lock is implied.
420  */
421 
422 static inline struct rt6_info *rt6_device_match(struct net *net,
423 						    struct rt6_info *rt,
424 						    const struct in6_addr *saddr,
425 						    int oif,
426 						    int flags)
427 {
428 	struct rt6_info *local = NULL;
429 	struct rt6_info *sprt;
430 
431 	if (!oif && ipv6_addr_any(saddr))
432 		goto out;
433 
434 	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
435 		struct net_device *dev = sprt->dst.dev;
436 
437 		if (oif) {
438 			if (dev->ifindex == oif)
439 				return sprt;
440 			if (dev->flags & IFF_LOOPBACK) {
441 				if (!sprt->rt6i_idev ||
442 				    sprt->rt6i_idev->dev->ifindex != oif) {
443 					if (flags & RT6_LOOKUP_F_IFACE && oif)
444 						continue;
445 					if (local && (!oif ||
446 						      local->rt6i_idev->dev->ifindex == oif))
447 						continue;
448 				}
449 				local = sprt;
450 			}
451 		} else {
452 			if (ipv6_chk_addr(net, saddr, dev,
453 					  flags & RT6_LOOKUP_F_IFACE))
454 				return sprt;
455 		}
456 	}
457 
458 	if (oif) {
459 		if (local)
460 			return local;
461 
462 		if (flags & RT6_LOOKUP_F_IFACE)
463 			return net->ipv6.ip6_null_entry;
464 	}
465 out:
466 	return rt;
467 }
468 
469 #ifdef CONFIG_IPV6_ROUTER_PREF
470 static void rt6_probe(struct rt6_info *rt)
471 {
472 	struct neighbour *neigh;
473 	/*
474 	 * Okay, this does not seem to be appropriate
475 	 * for now, however, we need to check if it
476 	 * is really so; aka Router Reachability Probing.
477 	 *
478 	 * Router Reachability Probe MUST be rate-limited
479 	 * to no more than one per minute.
480 	 */
481 	if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
482 		return;
483 	rcu_read_lock_bh();
484 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
485 	if (neigh) {
486 		write_lock(&neigh->lock);
487 		if (neigh->nud_state & NUD_VALID)
488 			goto out;
489 	}
490 
491 	if (!neigh ||
492 	    time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
493 		struct in6_addr mcaddr;
494 		struct in6_addr *target;
495 
496 		if (neigh) {
497 			neigh->updated = jiffies;
498 			write_unlock(&neigh->lock);
499 		}
500 
501 		target = (struct in6_addr *)&rt->rt6i_gateway;
502 		addrconf_addr_solict_mult(target, &mcaddr);
503 		ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
504 	} else {
505 out:
506 		write_unlock(&neigh->lock);
507 	}
508 	rcu_read_unlock_bh();
509 }
510 #else
511 static inline void rt6_probe(struct rt6_info *rt)
512 {
513 }
514 #endif
515 
516 /*
517  * Default Router Selection (RFC 2461 6.3.6)
518  */
519 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
520 {
521 	struct net_device *dev = rt->dst.dev;
522 	if (!oif || dev->ifindex == oif)
523 		return 2;
524 	if ((dev->flags & IFF_LOOPBACK) &&
525 	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
526 		return 1;
527 	return 0;
528 }
529 
530 static inline bool rt6_check_neigh(struct rt6_info *rt)
531 {
532 	struct neighbour *neigh;
533 	bool ret = false;
534 
535 	if (rt->rt6i_flags & RTF_NONEXTHOP ||
536 	    !(rt->rt6i_flags & RTF_GATEWAY))
537 		return true;
538 
539 	rcu_read_lock_bh();
540 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
541 	if (neigh) {
542 		read_lock(&neigh->lock);
543 		if (neigh->nud_state & NUD_VALID)
544 			ret = true;
545 #ifdef CONFIG_IPV6_ROUTER_PREF
546 		else if (!(neigh->nud_state & NUD_FAILED))
547 			ret = true;
548 #endif
549 		read_unlock(&neigh->lock);
550 	}
551 	rcu_read_unlock_bh();
552 
553 	return ret;
554 }
555 
556 static int rt6_score_route(struct rt6_info *rt, int oif,
557 			   int strict)
558 {
559 	int m;
560 
561 	m = rt6_check_dev(rt, oif);
562 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
563 		return -1;
564 #ifdef CONFIG_IPV6_ROUTER_PREF
565 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
566 #endif
567 	if (!rt6_check_neigh(rt) && (strict & RT6_LOOKUP_F_REACHABLE))
568 		return -1;
569 	return m;
570 }
571 
572 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
573 				   int *mpri, struct rt6_info *match)
574 {
575 	int m;
576 
577 	if (rt6_check_expired(rt))
578 		goto out;
579 
580 	m = rt6_score_route(rt, oif, strict);
581 	if (m < 0)
582 		goto out;
583 
584 	if (m > *mpri) {
585 		if (strict & RT6_LOOKUP_F_REACHABLE)
586 			rt6_probe(match);
587 		*mpri = m;
588 		match = rt;
589 	} else if (strict & RT6_LOOKUP_F_REACHABLE) {
590 		rt6_probe(rt);
591 	}
592 
593 out:
594 	return match;
595 }
596 
597 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
598 				     struct rt6_info *rr_head,
599 				     u32 metric, int oif, int strict)
600 {
601 	struct rt6_info *rt, *match;
602 	int mpri = -1;
603 
604 	match = NULL;
605 	for (rt = rr_head; rt && rt->rt6i_metric == metric;
606 	     rt = rt->dst.rt6_next)
607 		match = find_match(rt, oif, strict, &mpri, match);
608 	for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
609 	     rt = rt->dst.rt6_next)
610 		match = find_match(rt, oif, strict, &mpri, match);
611 
612 	return match;
613 }
614 
615 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
616 {
617 	struct rt6_info *match, *rt0;
618 	struct net *net;
619 
620 	rt0 = fn->rr_ptr;
621 	if (!rt0)
622 		fn->rr_ptr = rt0 = fn->leaf;
623 
624 	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
625 
626 	if (!match &&
627 	    (strict & RT6_LOOKUP_F_REACHABLE)) {
628 		struct rt6_info *next = rt0->dst.rt6_next;
629 
630 		/* no entries matched; do round-robin */
631 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
632 			next = fn->leaf;
633 
634 		if (next != rt0)
635 			fn->rr_ptr = next;
636 	}
637 
638 	net = dev_net(rt0->dst.dev);
639 	return match ? match : net->ipv6.ip6_null_entry;
640 }
641 
642 #ifdef CONFIG_IPV6_ROUTE_INFO
643 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
644 		  const struct in6_addr *gwaddr)
645 {
646 	struct net *net = dev_net(dev);
647 	struct route_info *rinfo = (struct route_info *) opt;
648 	struct in6_addr prefix_buf, *prefix;
649 	unsigned int pref;
650 	unsigned long lifetime;
651 	struct rt6_info *rt;
652 
653 	if (len < sizeof(struct route_info)) {
654 		return -EINVAL;
655 	}
656 
657 	/* Sanity check for prefix_len and length */
658 	if (rinfo->length > 3) {
659 		return -EINVAL;
660 	} else if (rinfo->prefix_len > 128) {
661 		return -EINVAL;
662 	} else if (rinfo->prefix_len > 64) {
663 		if (rinfo->length < 2) {
664 			return -EINVAL;
665 		}
666 	} else if (rinfo->prefix_len > 0) {
667 		if (rinfo->length < 1) {
668 			return -EINVAL;
669 		}
670 	}
671 
672 	pref = rinfo->route_pref;
673 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
674 		return -EINVAL;
675 
676 	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
677 
678 	if (rinfo->length == 3)
679 		prefix = (struct in6_addr *)rinfo->prefix;
680 	else {
681 		/* this function is safe */
682 		ipv6_addr_prefix(&prefix_buf,
683 				 (struct in6_addr *)rinfo->prefix,
684 				 rinfo->prefix_len);
685 		prefix = &prefix_buf;
686 	}
687 
688 	rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
689 				dev->ifindex);
690 
691 	if (rt && !lifetime) {
692 		ip6_del_rt(rt);
693 		rt = NULL;
694 	}
695 
696 	if (!rt && lifetime)
697 		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
698 					pref);
699 	else if (rt)
700 		rt->rt6i_flags = RTF_ROUTEINFO |
701 				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
702 
703 	if (rt) {
704 		if (!addrconf_finite_timeout(lifetime))
705 			rt6_clean_expires(rt);
706 		else
707 			rt6_set_expires(rt, jiffies + HZ * lifetime);
708 
709 		ip6_rt_put(rt);
710 	}
711 	return 0;
712 }
713 #endif
714 
715 #define BACKTRACK(__net, saddr)			\
716 do { \
717 	if (rt == __net->ipv6.ip6_null_entry) {	\
718 		struct fib6_node *pn; \
719 		while (1) { \
720 			if (fn->fn_flags & RTN_TL_ROOT) \
721 				goto out; \
722 			pn = fn->parent; \
723 			if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
724 				fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
725 			else \
726 				fn = pn; \
727 			if (fn->fn_flags & RTN_RTINFO) \
728 				goto restart; \
729 		} \
730 	} \
731 } while (0)
732 
733 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
734 					     struct fib6_table *table,
735 					     struct flowi6 *fl6, int flags)
736 {
737 	struct fib6_node *fn;
738 	struct rt6_info *rt;
739 
740 	read_lock_bh(&table->tb6_lock);
741 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
742 restart:
743 	rt = fn->leaf;
744 	rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
745 	if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
746 		rt = rt6_multipath_select(rt, fl6);
747 	BACKTRACK(net, &fl6->saddr);
748 out:
749 	dst_use(&rt->dst, jiffies);
750 	read_unlock_bh(&table->tb6_lock);
751 	return rt;
752 
753 }
754 
755 struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
756 				    int flags)
757 {
758 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
759 }
760 EXPORT_SYMBOL_GPL(ip6_route_lookup);
761 
762 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
763 			    const struct in6_addr *saddr, int oif, int strict)
764 {
765 	struct flowi6 fl6 = {
766 		.flowi6_oif = oif,
767 		.daddr = *daddr,
768 	};
769 	struct dst_entry *dst;
770 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
771 
772 	if (saddr) {
773 		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
774 		flags |= RT6_LOOKUP_F_HAS_SADDR;
775 	}
776 
777 	dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
778 	if (dst->error == 0)
779 		return (struct rt6_info *) dst;
780 
781 	dst_release(dst);
782 
783 	return NULL;
784 }
785 
786 EXPORT_SYMBOL(rt6_lookup);
787 
788 /* ip6_ins_rt is called with FREE table->tb6_lock.
789    It takes new route entry, the addition fails by any reason the
790    route is freed. In any case, if caller does not hold it, it may
791    be destroyed.
792  */
793 
794 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
795 {
796 	int err;
797 	struct fib6_table *table;
798 
799 	table = rt->rt6i_table;
800 	write_lock_bh(&table->tb6_lock);
801 	err = fib6_add(&table->tb6_root, rt, info);
802 	write_unlock_bh(&table->tb6_lock);
803 
804 	return err;
805 }
806 
807 int ip6_ins_rt(struct rt6_info *rt)
808 {
809 	struct nl_info info = {
810 		.nl_net = dev_net(rt->dst.dev),
811 	};
812 	return __ip6_ins_rt(rt, &info);
813 }
814 
815 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
816 				      const struct in6_addr *daddr,
817 				      const struct in6_addr *saddr)
818 {
819 	struct rt6_info *rt;
820 
821 	/*
822 	 *	Clone the route.
823 	 */
824 
825 	rt = ip6_rt_copy(ort, daddr);
826 
827 	if (rt) {
828 		if (!(rt->rt6i_flags & RTF_GATEWAY)) {
829 			if (ort->rt6i_dst.plen != 128 &&
830 			    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
831 				rt->rt6i_flags |= RTF_ANYCAST;
832 			rt->rt6i_gateway = *daddr;
833 		}
834 
835 		rt->rt6i_flags |= RTF_CACHE;
836 
837 #ifdef CONFIG_IPV6_SUBTREES
838 		if (rt->rt6i_src.plen && saddr) {
839 			rt->rt6i_src.addr = *saddr;
840 			rt->rt6i_src.plen = 128;
841 		}
842 #endif
843 	}
844 
845 	return rt;
846 }
847 
848 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
849 					const struct in6_addr *daddr)
850 {
851 	struct rt6_info *rt = ip6_rt_copy(ort, daddr);
852 
853 	if (rt)
854 		rt->rt6i_flags |= RTF_CACHE;
855 	return rt;
856 }
857 
858 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
859 				      struct flowi6 *fl6, int flags)
860 {
861 	struct fib6_node *fn;
862 	struct rt6_info *rt, *nrt;
863 	int strict = 0;
864 	int attempts = 3;
865 	int err;
866 	int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
867 
868 	strict |= flags & RT6_LOOKUP_F_IFACE;
869 
870 relookup:
871 	read_lock_bh(&table->tb6_lock);
872 
873 restart_2:
874 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
875 
876 restart:
877 	rt = rt6_select(fn, oif, strict | reachable);
878 	if (rt->rt6i_nsiblings && oif == 0)
879 		rt = rt6_multipath_select(rt, fl6);
880 	BACKTRACK(net, &fl6->saddr);
881 	if (rt == net->ipv6.ip6_null_entry ||
882 	    rt->rt6i_flags & RTF_CACHE)
883 		goto out;
884 
885 	dst_hold(&rt->dst);
886 	read_unlock_bh(&table->tb6_lock);
887 
888 	if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY)))
889 		nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
890 	else if (!(rt->dst.flags & DST_HOST))
891 		nrt = rt6_alloc_clone(rt, &fl6->daddr);
892 	else
893 		goto out2;
894 
895 	ip6_rt_put(rt);
896 	rt = nrt ? : net->ipv6.ip6_null_entry;
897 
898 	dst_hold(&rt->dst);
899 	if (nrt) {
900 		err = ip6_ins_rt(nrt);
901 		if (!err)
902 			goto out2;
903 	}
904 
905 	if (--attempts <= 0)
906 		goto out2;
907 
908 	/*
909 	 * Race condition! In the gap, when table->tb6_lock was
910 	 * released someone could insert this route.  Relookup.
911 	 */
912 	ip6_rt_put(rt);
913 	goto relookup;
914 
915 out:
916 	if (reachable) {
917 		reachable = 0;
918 		goto restart_2;
919 	}
920 	dst_hold(&rt->dst);
921 	read_unlock_bh(&table->tb6_lock);
922 out2:
923 	rt->dst.lastuse = jiffies;
924 	rt->dst.__use++;
925 
926 	return rt;
927 }
928 
929 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
930 					    struct flowi6 *fl6, int flags)
931 {
932 	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
933 }
934 
935 static struct dst_entry *ip6_route_input_lookup(struct net *net,
936 						struct net_device *dev,
937 						struct flowi6 *fl6, int flags)
938 {
939 	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
940 		flags |= RT6_LOOKUP_F_IFACE;
941 
942 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
943 }
944 
945 void ip6_route_input(struct sk_buff *skb)
946 {
947 	const struct ipv6hdr *iph = ipv6_hdr(skb);
948 	struct net *net = dev_net(skb->dev);
949 	int flags = RT6_LOOKUP_F_HAS_SADDR;
950 	struct flowi6 fl6 = {
951 		.flowi6_iif = skb->dev->ifindex,
952 		.daddr = iph->daddr,
953 		.saddr = iph->saddr,
954 		.flowlabel = ip6_flowinfo(iph),
955 		.flowi6_mark = skb->mark,
956 		.flowi6_proto = iph->nexthdr,
957 	};
958 
959 	skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
960 }
961 
962 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
963 					     struct flowi6 *fl6, int flags)
964 {
965 	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
966 }
967 
968 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
969 				    struct flowi6 *fl6)
970 {
971 	int flags = 0;
972 
973 	fl6->flowi6_iif = LOOPBACK_IFINDEX;
974 
975 	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
976 		flags |= RT6_LOOKUP_F_IFACE;
977 
978 	if (!ipv6_addr_any(&fl6->saddr))
979 		flags |= RT6_LOOKUP_F_HAS_SADDR;
980 	else if (sk)
981 		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
982 
983 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
984 }
985 
986 EXPORT_SYMBOL(ip6_route_output);
987 
988 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
989 {
990 	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
991 	struct dst_entry *new = NULL;
992 
993 	rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
994 	if (rt) {
995 		new = &rt->dst;
996 
997 		memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
998 		rt6_init_peer(rt, net->ipv6.peers);
999 
1000 		new->__use = 1;
1001 		new->input = dst_discard;
1002 		new->output = dst_discard;
1003 
1004 		if (dst_metrics_read_only(&ort->dst))
1005 			new->_metrics = ort->dst._metrics;
1006 		else
1007 			dst_copy_metrics(new, &ort->dst);
1008 		rt->rt6i_idev = ort->rt6i_idev;
1009 		if (rt->rt6i_idev)
1010 			in6_dev_hold(rt->rt6i_idev);
1011 
1012 		rt->rt6i_gateway = ort->rt6i_gateway;
1013 		rt->rt6i_flags = ort->rt6i_flags;
1014 		rt->rt6i_metric = 0;
1015 
1016 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1017 #ifdef CONFIG_IPV6_SUBTREES
1018 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1019 #endif
1020 
1021 		dst_free(new);
1022 	}
1023 
1024 	dst_release(dst_orig);
1025 	return new ? new : ERR_PTR(-ENOMEM);
1026 }
1027 
1028 /*
1029  *	Destination cache support functions
1030  */
1031 
1032 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1033 {
1034 	struct rt6_info *rt;
1035 
1036 	rt = (struct rt6_info *) dst;
1037 
1038 	/* All IPV6 dsts are created with ->obsolete set to the value
1039 	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1040 	 * into this function always.
1041 	 */
1042 	if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev)))
1043 		return NULL;
1044 
1045 	if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
1046 		return dst;
1047 
1048 	return NULL;
1049 }
1050 
1051 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1052 {
1053 	struct rt6_info *rt = (struct rt6_info *) dst;
1054 
1055 	if (rt) {
1056 		if (rt->rt6i_flags & RTF_CACHE) {
1057 			if (rt6_check_expired(rt)) {
1058 				ip6_del_rt(rt);
1059 				dst = NULL;
1060 			}
1061 		} else {
1062 			dst_release(dst);
1063 			dst = NULL;
1064 		}
1065 	}
1066 	return dst;
1067 }
1068 
1069 static void ip6_link_failure(struct sk_buff *skb)
1070 {
1071 	struct rt6_info *rt;
1072 
1073 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1074 
1075 	rt = (struct rt6_info *) skb_dst(skb);
1076 	if (rt) {
1077 		if (rt->rt6i_flags & RTF_CACHE)
1078 			rt6_update_expires(rt, 0);
1079 		else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1080 			rt->rt6i_node->fn_sernum = -1;
1081 	}
1082 }
1083 
1084 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1085 			       struct sk_buff *skb, u32 mtu)
1086 {
1087 	struct rt6_info *rt6 = (struct rt6_info*)dst;
1088 
1089 	dst_confirm(dst);
1090 	if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1091 		struct net *net = dev_net(dst->dev);
1092 
1093 		rt6->rt6i_flags |= RTF_MODIFIED;
1094 		if (mtu < IPV6_MIN_MTU) {
1095 			u32 features = dst_metric(dst, RTAX_FEATURES);
1096 			mtu = IPV6_MIN_MTU;
1097 			features |= RTAX_FEATURE_ALLFRAG;
1098 			dst_metric_set(dst, RTAX_FEATURES, features);
1099 		}
1100 		dst_metric_set(dst, RTAX_MTU, mtu);
1101 		rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1102 	}
1103 }
1104 
1105 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1106 		     int oif, u32 mark)
1107 {
1108 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1109 	struct dst_entry *dst;
1110 	struct flowi6 fl6;
1111 
1112 	memset(&fl6, 0, sizeof(fl6));
1113 	fl6.flowi6_oif = oif;
1114 	fl6.flowi6_mark = mark;
1115 	fl6.flowi6_flags = 0;
1116 	fl6.daddr = iph->daddr;
1117 	fl6.saddr = iph->saddr;
1118 	fl6.flowlabel = ip6_flowinfo(iph);
1119 
1120 	dst = ip6_route_output(net, NULL, &fl6);
1121 	if (!dst->error)
1122 		ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1123 	dst_release(dst);
1124 }
1125 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1126 
1127 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1128 {
1129 	ip6_update_pmtu(skb, sock_net(sk), mtu,
1130 			sk->sk_bound_dev_if, sk->sk_mark);
1131 }
1132 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1133 
1134 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1135 {
1136 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1137 	struct dst_entry *dst;
1138 	struct flowi6 fl6;
1139 
1140 	memset(&fl6, 0, sizeof(fl6));
1141 	fl6.flowi6_oif = oif;
1142 	fl6.flowi6_mark = mark;
1143 	fl6.flowi6_flags = 0;
1144 	fl6.daddr = iph->daddr;
1145 	fl6.saddr = iph->saddr;
1146 	fl6.flowlabel = ip6_flowinfo(iph);
1147 
1148 	dst = ip6_route_output(net, NULL, &fl6);
1149 	if (!dst->error)
1150 		rt6_do_redirect(dst, NULL, skb);
1151 	dst_release(dst);
1152 }
1153 EXPORT_SYMBOL_GPL(ip6_redirect);
1154 
1155 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1156 {
1157 	ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1158 }
1159 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1160 
1161 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1162 {
1163 	struct net_device *dev = dst->dev;
1164 	unsigned int mtu = dst_mtu(dst);
1165 	struct net *net = dev_net(dev);
1166 
1167 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1168 
1169 	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1170 		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1171 
1172 	/*
1173 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1174 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1175 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
1176 	 * rely only on pmtu discovery"
1177 	 */
1178 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1179 		mtu = IPV6_MAXPLEN;
1180 	return mtu;
1181 }
1182 
1183 static unsigned int ip6_mtu(const struct dst_entry *dst)
1184 {
1185 	struct inet6_dev *idev;
1186 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1187 
1188 	if (mtu)
1189 		return mtu;
1190 
1191 	mtu = IPV6_MIN_MTU;
1192 
1193 	rcu_read_lock();
1194 	idev = __in6_dev_get(dst->dev);
1195 	if (idev)
1196 		mtu = idev->cnf.mtu6;
1197 	rcu_read_unlock();
1198 
1199 	return mtu;
1200 }
1201 
1202 static struct dst_entry *icmp6_dst_gc_list;
1203 static DEFINE_SPINLOCK(icmp6_dst_lock);
1204 
1205 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1206 				  struct flowi6 *fl6)
1207 {
1208 	struct dst_entry *dst;
1209 	struct rt6_info *rt;
1210 	struct inet6_dev *idev = in6_dev_get(dev);
1211 	struct net *net = dev_net(dev);
1212 
1213 	if (unlikely(!idev))
1214 		return ERR_PTR(-ENODEV);
1215 
1216 	rt = ip6_dst_alloc(net, dev, 0, NULL);
1217 	if (unlikely(!rt)) {
1218 		in6_dev_put(idev);
1219 		dst = ERR_PTR(-ENOMEM);
1220 		goto out;
1221 	}
1222 
1223 	rt->dst.flags |= DST_HOST;
1224 	rt->dst.output  = ip6_output;
1225 	atomic_set(&rt->dst.__refcnt, 1);
1226 	rt->rt6i_dst.addr = fl6->daddr;
1227 	rt->rt6i_dst.plen = 128;
1228 	rt->rt6i_idev     = idev;
1229 	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1230 
1231 	spin_lock_bh(&icmp6_dst_lock);
1232 	rt->dst.next = icmp6_dst_gc_list;
1233 	icmp6_dst_gc_list = &rt->dst;
1234 	spin_unlock_bh(&icmp6_dst_lock);
1235 
1236 	fib6_force_start_gc(net);
1237 
1238 	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1239 
1240 out:
1241 	return dst;
1242 }
1243 
1244 int icmp6_dst_gc(void)
1245 {
1246 	struct dst_entry *dst, **pprev;
1247 	int more = 0;
1248 
1249 	spin_lock_bh(&icmp6_dst_lock);
1250 	pprev = &icmp6_dst_gc_list;
1251 
1252 	while ((dst = *pprev) != NULL) {
1253 		if (!atomic_read(&dst->__refcnt)) {
1254 			*pprev = dst->next;
1255 			dst_free(dst);
1256 		} else {
1257 			pprev = &dst->next;
1258 			++more;
1259 		}
1260 	}
1261 
1262 	spin_unlock_bh(&icmp6_dst_lock);
1263 
1264 	return more;
1265 }
1266 
1267 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1268 			    void *arg)
1269 {
1270 	struct dst_entry *dst, **pprev;
1271 
1272 	spin_lock_bh(&icmp6_dst_lock);
1273 	pprev = &icmp6_dst_gc_list;
1274 	while ((dst = *pprev) != NULL) {
1275 		struct rt6_info *rt = (struct rt6_info *) dst;
1276 		if (func(rt, arg)) {
1277 			*pprev = dst->next;
1278 			dst_free(dst);
1279 		} else {
1280 			pprev = &dst->next;
1281 		}
1282 	}
1283 	spin_unlock_bh(&icmp6_dst_lock);
1284 }
1285 
1286 static int ip6_dst_gc(struct dst_ops *ops)
1287 {
1288 	unsigned long now = jiffies;
1289 	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1290 	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1291 	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1292 	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1293 	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1294 	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1295 	int entries;
1296 
1297 	entries = dst_entries_get_fast(ops);
1298 	if (time_after(rt_last_gc + rt_min_interval, now) &&
1299 	    entries <= rt_max_size)
1300 		goto out;
1301 
1302 	net->ipv6.ip6_rt_gc_expire++;
1303 	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1304 	net->ipv6.ip6_rt_last_gc = now;
1305 	entries = dst_entries_get_slow(ops);
1306 	if (entries < ops->gc_thresh)
1307 		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1308 out:
1309 	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1310 	return entries > rt_max_size;
1311 }
1312 
1313 int ip6_dst_hoplimit(struct dst_entry *dst)
1314 {
1315 	int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1316 	if (hoplimit == 0) {
1317 		struct net_device *dev = dst->dev;
1318 		struct inet6_dev *idev;
1319 
1320 		rcu_read_lock();
1321 		idev = __in6_dev_get(dev);
1322 		if (idev)
1323 			hoplimit = idev->cnf.hop_limit;
1324 		else
1325 			hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1326 		rcu_read_unlock();
1327 	}
1328 	return hoplimit;
1329 }
1330 EXPORT_SYMBOL(ip6_dst_hoplimit);
1331 
1332 /*
1333  *
1334  */
1335 
1336 int ip6_route_add(struct fib6_config *cfg)
1337 {
1338 	int err;
1339 	struct net *net = cfg->fc_nlinfo.nl_net;
1340 	struct rt6_info *rt = NULL;
1341 	struct net_device *dev = NULL;
1342 	struct inet6_dev *idev = NULL;
1343 	struct fib6_table *table;
1344 	int addr_type;
1345 
1346 	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1347 		return -EINVAL;
1348 #ifndef CONFIG_IPV6_SUBTREES
1349 	if (cfg->fc_src_len)
1350 		return -EINVAL;
1351 #endif
1352 	if (cfg->fc_ifindex) {
1353 		err = -ENODEV;
1354 		dev = dev_get_by_index(net, cfg->fc_ifindex);
1355 		if (!dev)
1356 			goto out;
1357 		idev = in6_dev_get(dev);
1358 		if (!idev)
1359 			goto out;
1360 	}
1361 
1362 	if (cfg->fc_metric == 0)
1363 		cfg->fc_metric = IP6_RT_PRIO_USER;
1364 
1365 	err = -ENOBUFS;
1366 	if (cfg->fc_nlinfo.nlh &&
1367 	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1368 		table = fib6_get_table(net, cfg->fc_table);
1369 		if (!table) {
1370 			pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1371 			table = fib6_new_table(net, cfg->fc_table);
1372 		}
1373 	} else {
1374 		table = fib6_new_table(net, cfg->fc_table);
1375 	}
1376 
1377 	if (!table)
1378 		goto out;
1379 
1380 	rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
1381 
1382 	if (!rt) {
1383 		err = -ENOMEM;
1384 		goto out;
1385 	}
1386 
1387 	if (cfg->fc_flags & RTF_EXPIRES)
1388 		rt6_set_expires(rt, jiffies +
1389 				clock_t_to_jiffies(cfg->fc_expires));
1390 	else
1391 		rt6_clean_expires(rt);
1392 
1393 	if (cfg->fc_protocol == RTPROT_UNSPEC)
1394 		cfg->fc_protocol = RTPROT_BOOT;
1395 	rt->rt6i_protocol = cfg->fc_protocol;
1396 
1397 	addr_type = ipv6_addr_type(&cfg->fc_dst);
1398 
1399 	if (addr_type & IPV6_ADDR_MULTICAST)
1400 		rt->dst.input = ip6_mc_input;
1401 	else if (cfg->fc_flags & RTF_LOCAL)
1402 		rt->dst.input = ip6_input;
1403 	else
1404 		rt->dst.input = ip6_forward;
1405 
1406 	rt->dst.output = ip6_output;
1407 
1408 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1409 	rt->rt6i_dst.plen = cfg->fc_dst_len;
1410 	if (rt->rt6i_dst.plen == 128)
1411 	       rt->dst.flags |= DST_HOST;
1412 
1413 	if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1414 		u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1415 		if (!metrics) {
1416 			err = -ENOMEM;
1417 			goto out;
1418 		}
1419 		dst_init_metrics(&rt->dst, metrics, 0);
1420 	}
1421 #ifdef CONFIG_IPV6_SUBTREES
1422 	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1423 	rt->rt6i_src.plen = cfg->fc_src_len;
1424 #endif
1425 
1426 	rt->rt6i_metric = cfg->fc_metric;
1427 
1428 	/* We cannot add true routes via loopback here,
1429 	   they would result in kernel looping; promote them to reject routes
1430 	 */
1431 	if ((cfg->fc_flags & RTF_REJECT) ||
1432 	    (dev && (dev->flags & IFF_LOOPBACK) &&
1433 	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
1434 	     !(cfg->fc_flags & RTF_LOCAL))) {
1435 		/* hold loopback dev/idev if we haven't done so. */
1436 		if (dev != net->loopback_dev) {
1437 			if (dev) {
1438 				dev_put(dev);
1439 				in6_dev_put(idev);
1440 			}
1441 			dev = net->loopback_dev;
1442 			dev_hold(dev);
1443 			idev = in6_dev_get(dev);
1444 			if (!idev) {
1445 				err = -ENODEV;
1446 				goto out;
1447 			}
1448 		}
1449 		rt->dst.output = ip6_pkt_discard_out;
1450 		rt->dst.input = ip6_pkt_discard;
1451 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1452 		switch (cfg->fc_type) {
1453 		case RTN_BLACKHOLE:
1454 			rt->dst.error = -EINVAL;
1455 			break;
1456 		case RTN_PROHIBIT:
1457 			rt->dst.error = -EACCES;
1458 			break;
1459 		case RTN_THROW:
1460 			rt->dst.error = -EAGAIN;
1461 			break;
1462 		default:
1463 			rt->dst.error = -ENETUNREACH;
1464 			break;
1465 		}
1466 		goto install_route;
1467 	}
1468 
1469 	if (cfg->fc_flags & RTF_GATEWAY) {
1470 		const struct in6_addr *gw_addr;
1471 		int gwa_type;
1472 
1473 		gw_addr = &cfg->fc_gateway;
1474 		rt->rt6i_gateway = *gw_addr;
1475 		gwa_type = ipv6_addr_type(gw_addr);
1476 
1477 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1478 			struct rt6_info *grt;
1479 
1480 			/* IPv6 strictly inhibits using not link-local
1481 			   addresses as nexthop address.
1482 			   Otherwise, router will not able to send redirects.
1483 			   It is very good, but in some (rare!) circumstances
1484 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
1485 			   some exceptions. --ANK
1486 			 */
1487 			err = -EINVAL;
1488 			if (!(gwa_type & IPV6_ADDR_UNICAST))
1489 				goto out;
1490 
1491 			grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1492 
1493 			err = -EHOSTUNREACH;
1494 			if (!grt)
1495 				goto out;
1496 			if (dev) {
1497 				if (dev != grt->dst.dev) {
1498 					ip6_rt_put(grt);
1499 					goto out;
1500 				}
1501 			} else {
1502 				dev = grt->dst.dev;
1503 				idev = grt->rt6i_idev;
1504 				dev_hold(dev);
1505 				in6_dev_hold(grt->rt6i_idev);
1506 			}
1507 			if (!(grt->rt6i_flags & RTF_GATEWAY))
1508 				err = 0;
1509 			ip6_rt_put(grt);
1510 
1511 			if (err)
1512 				goto out;
1513 		}
1514 		err = -EINVAL;
1515 		if (!dev || (dev->flags & IFF_LOOPBACK))
1516 			goto out;
1517 	}
1518 
1519 	err = -ENODEV;
1520 	if (!dev)
1521 		goto out;
1522 
1523 	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1524 		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1525 			err = -EINVAL;
1526 			goto out;
1527 		}
1528 		rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1529 		rt->rt6i_prefsrc.plen = 128;
1530 	} else
1531 		rt->rt6i_prefsrc.plen = 0;
1532 
1533 	rt->rt6i_flags = cfg->fc_flags;
1534 
1535 install_route:
1536 	if (cfg->fc_mx) {
1537 		struct nlattr *nla;
1538 		int remaining;
1539 
1540 		nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1541 			int type = nla_type(nla);
1542 
1543 			if (type) {
1544 				if (type > RTAX_MAX) {
1545 					err = -EINVAL;
1546 					goto out;
1547 				}
1548 
1549 				dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1550 			}
1551 		}
1552 	}
1553 
1554 	rt->dst.dev = dev;
1555 	rt->rt6i_idev = idev;
1556 	rt->rt6i_table = table;
1557 
1558 	cfg->fc_nlinfo.nl_net = dev_net(dev);
1559 
1560 	return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1561 
1562 out:
1563 	if (dev)
1564 		dev_put(dev);
1565 	if (idev)
1566 		in6_dev_put(idev);
1567 	if (rt)
1568 		dst_free(&rt->dst);
1569 	return err;
1570 }
1571 
1572 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1573 {
1574 	int err;
1575 	struct fib6_table *table;
1576 	struct net *net = dev_net(rt->dst.dev);
1577 
1578 	if (rt == net->ipv6.ip6_null_entry) {
1579 		err = -ENOENT;
1580 		goto out;
1581 	}
1582 
1583 	table = rt->rt6i_table;
1584 	write_lock_bh(&table->tb6_lock);
1585 	err = fib6_del(rt, info);
1586 	write_unlock_bh(&table->tb6_lock);
1587 
1588 out:
1589 	ip6_rt_put(rt);
1590 	return err;
1591 }
1592 
1593 int ip6_del_rt(struct rt6_info *rt)
1594 {
1595 	struct nl_info info = {
1596 		.nl_net = dev_net(rt->dst.dev),
1597 	};
1598 	return __ip6_del_rt(rt, &info);
1599 }
1600 
1601 static int ip6_route_del(struct fib6_config *cfg)
1602 {
1603 	struct fib6_table *table;
1604 	struct fib6_node *fn;
1605 	struct rt6_info *rt;
1606 	int err = -ESRCH;
1607 
1608 	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1609 	if (!table)
1610 		return err;
1611 
1612 	read_lock_bh(&table->tb6_lock);
1613 
1614 	fn = fib6_locate(&table->tb6_root,
1615 			 &cfg->fc_dst, cfg->fc_dst_len,
1616 			 &cfg->fc_src, cfg->fc_src_len);
1617 
1618 	if (fn) {
1619 		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1620 			if (cfg->fc_ifindex &&
1621 			    (!rt->dst.dev ||
1622 			     rt->dst.dev->ifindex != cfg->fc_ifindex))
1623 				continue;
1624 			if (cfg->fc_flags & RTF_GATEWAY &&
1625 			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1626 				continue;
1627 			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1628 				continue;
1629 			dst_hold(&rt->dst);
1630 			read_unlock_bh(&table->tb6_lock);
1631 
1632 			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1633 		}
1634 	}
1635 	read_unlock_bh(&table->tb6_lock);
1636 
1637 	return err;
1638 }
1639 
1640 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1641 {
1642 	struct net *net = dev_net(skb->dev);
1643 	struct netevent_redirect netevent;
1644 	struct rt6_info *rt, *nrt = NULL;
1645 	struct ndisc_options ndopts;
1646 	struct inet6_dev *in6_dev;
1647 	struct neighbour *neigh;
1648 	struct rd_msg *msg;
1649 	int optlen, on_link;
1650 	u8 *lladdr;
1651 
1652 	optlen = skb->tail - skb->transport_header;
1653 	optlen -= sizeof(*msg);
1654 
1655 	if (optlen < 0) {
1656 		net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1657 		return;
1658 	}
1659 
1660 	msg = (struct rd_msg *)icmp6_hdr(skb);
1661 
1662 	if (ipv6_addr_is_multicast(&msg->dest)) {
1663 		net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1664 		return;
1665 	}
1666 
1667 	on_link = 0;
1668 	if (ipv6_addr_equal(&msg->dest, &msg->target)) {
1669 		on_link = 1;
1670 	} else if (ipv6_addr_type(&msg->target) !=
1671 		   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1672 		net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1673 		return;
1674 	}
1675 
1676 	in6_dev = __in6_dev_get(skb->dev);
1677 	if (!in6_dev)
1678 		return;
1679 	if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1680 		return;
1681 
1682 	/* RFC2461 8.1:
1683 	 *	The IP source address of the Redirect MUST be the same as the current
1684 	 *	first-hop router for the specified ICMP Destination Address.
1685 	 */
1686 
1687 	if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
1688 		net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1689 		return;
1690 	}
1691 
1692 	lladdr = NULL;
1693 	if (ndopts.nd_opts_tgt_lladdr) {
1694 		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1695 					     skb->dev);
1696 		if (!lladdr) {
1697 			net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1698 			return;
1699 		}
1700 	}
1701 
1702 	rt = (struct rt6_info *) dst;
1703 	if (rt == net->ipv6.ip6_null_entry) {
1704 		net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1705 		return;
1706 	}
1707 
1708 	/* Redirect received -> path was valid.
1709 	 * Look, redirects are sent only in response to data packets,
1710 	 * so that this nexthop apparently is reachable. --ANK
1711 	 */
1712 	dst_confirm(&rt->dst);
1713 
1714 	neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
1715 	if (!neigh)
1716 		return;
1717 
1718 	/*
1719 	 *	We have finally decided to accept it.
1720 	 */
1721 
1722 	neigh_update(neigh, lladdr, NUD_STALE,
1723 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
1724 		     NEIGH_UPDATE_F_OVERRIDE|
1725 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1726 				     NEIGH_UPDATE_F_ISROUTER))
1727 		     );
1728 
1729 	nrt = ip6_rt_copy(rt, &msg->dest);
1730 	if (!nrt)
1731 		goto out;
1732 
1733 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1734 	if (on_link)
1735 		nrt->rt6i_flags &= ~RTF_GATEWAY;
1736 
1737 	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1738 
1739 	if (ip6_ins_rt(nrt))
1740 		goto out;
1741 
1742 	netevent.old = &rt->dst;
1743 	netevent.new = &nrt->dst;
1744 	netevent.daddr = &msg->dest;
1745 	netevent.neigh = neigh;
1746 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1747 
1748 	if (rt->rt6i_flags & RTF_CACHE) {
1749 		rt = (struct rt6_info *) dst_clone(&rt->dst);
1750 		ip6_del_rt(rt);
1751 	}
1752 
1753 out:
1754 	neigh_release(neigh);
1755 }
1756 
1757 /*
1758  *	Misc support functions
1759  */
1760 
1761 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1762 				    const struct in6_addr *dest)
1763 {
1764 	struct net *net = dev_net(ort->dst.dev);
1765 	struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1766 					    ort->rt6i_table);
1767 
1768 	if (rt) {
1769 		rt->dst.input = ort->dst.input;
1770 		rt->dst.output = ort->dst.output;
1771 		rt->dst.flags |= DST_HOST;
1772 
1773 		rt->rt6i_dst.addr = *dest;
1774 		rt->rt6i_dst.plen = 128;
1775 		dst_copy_metrics(&rt->dst, &ort->dst);
1776 		rt->dst.error = ort->dst.error;
1777 		rt->rt6i_idev = ort->rt6i_idev;
1778 		if (rt->rt6i_idev)
1779 			in6_dev_hold(rt->rt6i_idev);
1780 		rt->dst.lastuse = jiffies;
1781 
1782 		rt->rt6i_gateway = ort->rt6i_gateway;
1783 		rt->rt6i_flags = ort->rt6i_flags;
1784 		if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1785 		    (RTF_DEFAULT | RTF_ADDRCONF))
1786 			rt6_set_from(rt, ort);
1787 		rt->rt6i_metric = 0;
1788 
1789 #ifdef CONFIG_IPV6_SUBTREES
1790 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1791 #endif
1792 		memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1793 		rt->rt6i_table = ort->rt6i_table;
1794 	}
1795 	return rt;
1796 }
1797 
1798 #ifdef CONFIG_IPV6_ROUTE_INFO
1799 static struct rt6_info *rt6_get_route_info(struct net *net,
1800 					   const struct in6_addr *prefix, int prefixlen,
1801 					   const struct in6_addr *gwaddr, int ifindex)
1802 {
1803 	struct fib6_node *fn;
1804 	struct rt6_info *rt = NULL;
1805 	struct fib6_table *table;
1806 
1807 	table = fib6_get_table(net, RT6_TABLE_INFO);
1808 	if (!table)
1809 		return NULL;
1810 
1811 	read_lock_bh(&table->tb6_lock);
1812 	fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1813 	if (!fn)
1814 		goto out;
1815 
1816 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1817 		if (rt->dst.dev->ifindex != ifindex)
1818 			continue;
1819 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1820 			continue;
1821 		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1822 			continue;
1823 		dst_hold(&rt->dst);
1824 		break;
1825 	}
1826 out:
1827 	read_unlock_bh(&table->tb6_lock);
1828 	return rt;
1829 }
1830 
1831 static struct rt6_info *rt6_add_route_info(struct net *net,
1832 					   const struct in6_addr *prefix, int prefixlen,
1833 					   const struct in6_addr *gwaddr, int ifindex,
1834 					   unsigned int pref)
1835 {
1836 	struct fib6_config cfg = {
1837 		.fc_table	= RT6_TABLE_INFO,
1838 		.fc_metric	= IP6_RT_PRIO_USER,
1839 		.fc_ifindex	= ifindex,
1840 		.fc_dst_len	= prefixlen,
1841 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1842 				  RTF_UP | RTF_PREF(pref),
1843 		.fc_nlinfo.portid = 0,
1844 		.fc_nlinfo.nlh = NULL,
1845 		.fc_nlinfo.nl_net = net,
1846 	};
1847 
1848 	cfg.fc_dst = *prefix;
1849 	cfg.fc_gateway = *gwaddr;
1850 
1851 	/* We should treat it as a default route if prefix length is 0. */
1852 	if (!prefixlen)
1853 		cfg.fc_flags |= RTF_DEFAULT;
1854 
1855 	ip6_route_add(&cfg);
1856 
1857 	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1858 }
1859 #endif
1860 
1861 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1862 {
1863 	struct rt6_info *rt;
1864 	struct fib6_table *table;
1865 
1866 	table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1867 	if (!table)
1868 		return NULL;
1869 
1870 	read_lock_bh(&table->tb6_lock);
1871 	for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1872 		if (dev == rt->dst.dev &&
1873 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1874 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
1875 			break;
1876 	}
1877 	if (rt)
1878 		dst_hold(&rt->dst);
1879 	read_unlock_bh(&table->tb6_lock);
1880 	return rt;
1881 }
1882 
1883 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1884 				     struct net_device *dev,
1885 				     unsigned int pref)
1886 {
1887 	struct fib6_config cfg = {
1888 		.fc_table	= RT6_TABLE_DFLT,
1889 		.fc_metric	= IP6_RT_PRIO_USER,
1890 		.fc_ifindex	= dev->ifindex,
1891 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1892 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1893 		.fc_nlinfo.portid = 0,
1894 		.fc_nlinfo.nlh = NULL,
1895 		.fc_nlinfo.nl_net = dev_net(dev),
1896 	};
1897 
1898 	cfg.fc_gateway = *gwaddr;
1899 
1900 	ip6_route_add(&cfg);
1901 
1902 	return rt6_get_dflt_router(gwaddr, dev);
1903 }
1904 
1905 void rt6_purge_dflt_routers(struct net *net)
1906 {
1907 	struct rt6_info *rt;
1908 	struct fib6_table *table;
1909 
1910 	/* NOTE: Keep consistent with rt6_get_dflt_router */
1911 	table = fib6_get_table(net, RT6_TABLE_DFLT);
1912 	if (!table)
1913 		return;
1914 
1915 restart:
1916 	read_lock_bh(&table->tb6_lock);
1917 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1918 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
1919 		    (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
1920 			dst_hold(&rt->dst);
1921 			read_unlock_bh(&table->tb6_lock);
1922 			ip6_del_rt(rt);
1923 			goto restart;
1924 		}
1925 	}
1926 	read_unlock_bh(&table->tb6_lock);
1927 }
1928 
1929 static void rtmsg_to_fib6_config(struct net *net,
1930 				 struct in6_rtmsg *rtmsg,
1931 				 struct fib6_config *cfg)
1932 {
1933 	memset(cfg, 0, sizeof(*cfg));
1934 
1935 	cfg->fc_table = RT6_TABLE_MAIN;
1936 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1937 	cfg->fc_metric = rtmsg->rtmsg_metric;
1938 	cfg->fc_expires = rtmsg->rtmsg_info;
1939 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1940 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
1941 	cfg->fc_flags = rtmsg->rtmsg_flags;
1942 
1943 	cfg->fc_nlinfo.nl_net = net;
1944 
1945 	cfg->fc_dst = rtmsg->rtmsg_dst;
1946 	cfg->fc_src = rtmsg->rtmsg_src;
1947 	cfg->fc_gateway = rtmsg->rtmsg_gateway;
1948 }
1949 
1950 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1951 {
1952 	struct fib6_config cfg;
1953 	struct in6_rtmsg rtmsg;
1954 	int err;
1955 
1956 	switch(cmd) {
1957 	case SIOCADDRT:		/* Add a route */
1958 	case SIOCDELRT:		/* Delete a route */
1959 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1960 			return -EPERM;
1961 		err = copy_from_user(&rtmsg, arg,
1962 				     sizeof(struct in6_rtmsg));
1963 		if (err)
1964 			return -EFAULT;
1965 
1966 		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1967 
1968 		rtnl_lock();
1969 		switch (cmd) {
1970 		case SIOCADDRT:
1971 			err = ip6_route_add(&cfg);
1972 			break;
1973 		case SIOCDELRT:
1974 			err = ip6_route_del(&cfg);
1975 			break;
1976 		default:
1977 			err = -EINVAL;
1978 		}
1979 		rtnl_unlock();
1980 
1981 		return err;
1982 	}
1983 
1984 	return -EINVAL;
1985 }
1986 
1987 /*
1988  *	Drop the packet on the floor
1989  */
1990 
1991 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1992 {
1993 	int type;
1994 	struct dst_entry *dst = skb_dst(skb);
1995 	switch (ipstats_mib_noroutes) {
1996 	case IPSTATS_MIB_INNOROUTES:
1997 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
1998 		if (type == IPV6_ADDR_ANY) {
1999 			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2000 				      IPSTATS_MIB_INADDRERRORS);
2001 			break;
2002 		}
2003 		/* FALLTHROUGH */
2004 	case IPSTATS_MIB_OUTNOROUTES:
2005 		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2006 			      ipstats_mib_noroutes);
2007 		break;
2008 	}
2009 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2010 	kfree_skb(skb);
2011 	return 0;
2012 }
2013 
2014 static int ip6_pkt_discard(struct sk_buff *skb)
2015 {
2016 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2017 }
2018 
2019 static int ip6_pkt_discard_out(struct sk_buff *skb)
2020 {
2021 	skb->dev = skb_dst(skb)->dev;
2022 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2023 }
2024 
2025 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2026 
2027 static int ip6_pkt_prohibit(struct sk_buff *skb)
2028 {
2029 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2030 }
2031 
2032 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2033 {
2034 	skb->dev = skb_dst(skb)->dev;
2035 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2036 }
2037 
2038 #endif
2039 
2040 /*
2041  *	Allocate a dst for local (unicast / anycast) address.
2042  */
2043 
2044 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2045 				    const struct in6_addr *addr,
2046 				    bool anycast)
2047 {
2048 	struct net *net = dev_net(idev->dev);
2049 	struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
2050 
2051 	if (!rt) {
2052 		net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
2053 		return ERR_PTR(-ENOMEM);
2054 	}
2055 
2056 	in6_dev_hold(idev);
2057 
2058 	rt->dst.flags |= DST_HOST;
2059 	rt->dst.input = ip6_input;
2060 	rt->dst.output = ip6_output;
2061 	rt->rt6i_idev = idev;
2062 
2063 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2064 	if (anycast)
2065 		rt->rt6i_flags |= RTF_ANYCAST;
2066 	else
2067 		rt->rt6i_flags |= RTF_LOCAL;
2068 
2069 	rt->rt6i_dst.addr = *addr;
2070 	rt->rt6i_dst.plen = 128;
2071 	rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2072 
2073 	atomic_set(&rt->dst.__refcnt, 1);
2074 
2075 	return rt;
2076 }
2077 
2078 int ip6_route_get_saddr(struct net *net,
2079 			struct rt6_info *rt,
2080 			const struct in6_addr *daddr,
2081 			unsigned int prefs,
2082 			struct in6_addr *saddr)
2083 {
2084 	struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2085 	int err = 0;
2086 	if (rt->rt6i_prefsrc.plen)
2087 		*saddr = rt->rt6i_prefsrc.addr;
2088 	else
2089 		err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2090 					 daddr, prefs, saddr);
2091 	return err;
2092 }
2093 
2094 /* remove deleted ip from prefsrc entries */
2095 struct arg_dev_net_ip {
2096 	struct net_device *dev;
2097 	struct net *net;
2098 	struct in6_addr *addr;
2099 };
2100 
2101 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2102 {
2103 	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2104 	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2105 	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2106 
2107 	if (((void *)rt->dst.dev == dev || !dev) &&
2108 	    rt != net->ipv6.ip6_null_entry &&
2109 	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2110 		/* remove prefsrc entry */
2111 		rt->rt6i_prefsrc.plen = 0;
2112 	}
2113 	return 0;
2114 }
2115 
2116 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2117 {
2118 	struct net *net = dev_net(ifp->idev->dev);
2119 	struct arg_dev_net_ip adni = {
2120 		.dev = ifp->idev->dev,
2121 		.net = net,
2122 		.addr = &ifp->addr,
2123 	};
2124 	fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2125 }
2126 
2127 struct arg_dev_net {
2128 	struct net_device *dev;
2129 	struct net *net;
2130 };
2131 
2132 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2133 {
2134 	const struct arg_dev_net *adn = arg;
2135 	const struct net_device *dev = adn->dev;
2136 
2137 	if ((rt->dst.dev == dev || !dev) &&
2138 	    rt != adn->net->ipv6.ip6_null_entry)
2139 		return -1;
2140 
2141 	return 0;
2142 }
2143 
2144 void rt6_ifdown(struct net *net, struct net_device *dev)
2145 {
2146 	struct arg_dev_net adn = {
2147 		.dev = dev,
2148 		.net = net,
2149 	};
2150 
2151 	fib6_clean_all(net, fib6_ifdown, 0, &adn);
2152 	icmp6_clean_all(fib6_ifdown, &adn);
2153 }
2154 
2155 struct rt6_mtu_change_arg {
2156 	struct net_device *dev;
2157 	unsigned int mtu;
2158 };
2159 
2160 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2161 {
2162 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2163 	struct inet6_dev *idev;
2164 
2165 	/* In IPv6 pmtu discovery is not optional,
2166 	   so that RTAX_MTU lock cannot disable it.
2167 	   We still use this lock to block changes
2168 	   caused by addrconf/ndisc.
2169 	*/
2170 
2171 	idev = __in6_dev_get(arg->dev);
2172 	if (!idev)
2173 		return 0;
2174 
2175 	/* For administrative MTU increase, there is no way to discover
2176 	   IPv6 PMTU increase, so PMTU increase should be updated here.
2177 	   Since RFC 1981 doesn't include administrative MTU increase
2178 	   update PMTU increase is a MUST. (i.e. jumbo frame)
2179 	 */
2180 	/*
2181 	   If new MTU is less than route PMTU, this new MTU will be the
2182 	   lowest MTU in the path, update the route PMTU to reflect PMTU
2183 	   decreases; if new MTU is greater than route PMTU, and the
2184 	   old MTU is the lowest MTU in the path, update the route PMTU
2185 	   to reflect the increase. In this case if the other nodes' MTU
2186 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
2187 	   PMTU discouvery.
2188 	 */
2189 	if (rt->dst.dev == arg->dev &&
2190 	    !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2191 	    (dst_mtu(&rt->dst) >= arg->mtu ||
2192 	     (dst_mtu(&rt->dst) < arg->mtu &&
2193 	      dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2194 		dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2195 	}
2196 	return 0;
2197 }
2198 
2199 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2200 {
2201 	struct rt6_mtu_change_arg arg = {
2202 		.dev = dev,
2203 		.mtu = mtu,
2204 	};
2205 
2206 	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2207 }
2208 
2209 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2210 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2211 	[RTA_OIF]               = { .type = NLA_U32 },
2212 	[RTA_IIF]		= { .type = NLA_U32 },
2213 	[RTA_PRIORITY]          = { .type = NLA_U32 },
2214 	[RTA_METRICS]           = { .type = NLA_NESTED },
2215 	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
2216 };
2217 
2218 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2219 			      struct fib6_config *cfg)
2220 {
2221 	struct rtmsg *rtm;
2222 	struct nlattr *tb[RTA_MAX+1];
2223 	int err;
2224 
2225 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2226 	if (err < 0)
2227 		goto errout;
2228 
2229 	err = -EINVAL;
2230 	rtm = nlmsg_data(nlh);
2231 	memset(cfg, 0, sizeof(*cfg));
2232 
2233 	cfg->fc_table = rtm->rtm_table;
2234 	cfg->fc_dst_len = rtm->rtm_dst_len;
2235 	cfg->fc_src_len = rtm->rtm_src_len;
2236 	cfg->fc_flags = RTF_UP;
2237 	cfg->fc_protocol = rtm->rtm_protocol;
2238 	cfg->fc_type = rtm->rtm_type;
2239 
2240 	if (rtm->rtm_type == RTN_UNREACHABLE ||
2241 	    rtm->rtm_type == RTN_BLACKHOLE ||
2242 	    rtm->rtm_type == RTN_PROHIBIT ||
2243 	    rtm->rtm_type == RTN_THROW)
2244 		cfg->fc_flags |= RTF_REJECT;
2245 
2246 	if (rtm->rtm_type == RTN_LOCAL)
2247 		cfg->fc_flags |= RTF_LOCAL;
2248 
2249 	cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2250 	cfg->fc_nlinfo.nlh = nlh;
2251 	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2252 
2253 	if (tb[RTA_GATEWAY]) {
2254 		nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2255 		cfg->fc_flags |= RTF_GATEWAY;
2256 	}
2257 
2258 	if (tb[RTA_DST]) {
2259 		int plen = (rtm->rtm_dst_len + 7) >> 3;
2260 
2261 		if (nla_len(tb[RTA_DST]) < plen)
2262 			goto errout;
2263 
2264 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2265 	}
2266 
2267 	if (tb[RTA_SRC]) {
2268 		int plen = (rtm->rtm_src_len + 7) >> 3;
2269 
2270 		if (nla_len(tb[RTA_SRC]) < plen)
2271 			goto errout;
2272 
2273 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2274 	}
2275 
2276 	if (tb[RTA_PREFSRC])
2277 		nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2278 
2279 	if (tb[RTA_OIF])
2280 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2281 
2282 	if (tb[RTA_PRIORITY])
2283 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2284 
2285 	if (tb[RTA_METRICS]) {
2286 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2287 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2288 	}
2289 
2290 	if (tb[RTA_TABLE])
2291 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2292 
2293 	if (tb[RTA_MULTIPATH]) {
2294 		cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2295 		cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2296 	}
2297 
2298 	err = 0;
2299 errout:
2300 	return err;
2301 }
2302 
2303 static int ip6_route_multipath(struct fib6_config *cfg, int add)
2304 {
2305 	struct fib6_config r_cfg;
2306 	struct rtnexthop *rtnh;
2307 	int remaining;
2308 	int attrlen;
2309 	int err = 0, last_err = 0;
2310 
2311 beginning:
2312 	rtnh = (struct rtnexthop *)cfg->fc_mp;
2313 	remaining = cfg->fc_mp_len;
2314 
2315 	/* Parse a Multipath Entry */
2316 	while (rtnh_ok(rtnh, remaining)) {
2317 		memcpy(&r_cfg, cfg, sizeof(*cfg));
2318 		if (rtnh->rtnh_ifindex)
2319 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2320 
2321 		attrlen = rtnh_attrlen(rtnh);
2322 		if (attrlen > 0) {
2323 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2324 
2325 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2326 			if (nla) {
2327 				nla_memcpy(&r_cfg.fc_gateway, nla, 16);
2328 				r_cfg.fc_flags |= RTF_GATEWAY;
2329 			}
2330 		}
2331 		err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
2332 		if (err) {
2333 			last_err = err;
2334 			/* If we are trying to remove a route, do not stop the
2335 			 * loop when ip6_route_del() fails (because next hop is
2336 			 * already gone), we should try to remove all next hops.
2337 			 */
2338 			if (add) {
2339 				/* If add fails, we should try to delete all
2340 				 * next hops that have been already added.
2341 				 */
2342 				add = 0;
2343 				goto beginning;
2344 			}
2345 		}
2346 		/* Because each route is added like a single route we remove
2347 		 * this flag after the first nexthop (if there is a collision,
2348 		 * we have already fail to add the first nexthop:
2349 		 * fib6_add_rt2node() has reject it).
2350 		 */
2351 		cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL;
2352 		rtnh = rtnh_next(rtnh, &remaining);
2353 	}
2354 
2355 	return last_err;
2356 }
2357 
2358 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2359 {
2360 	struct fib6_config cfg;
2361 	int err;
2362 
2363 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2364 	if (err < 0)
2365 		return err;
2366 
2367 	if (cfg.fc_mp)
2368 		return ip6_route_multipath(&cfg, 0);
2369 	else
2370 		return ip6_route_del(&cfg);
2371 }
2372 
2373 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2374 {
2375 	struct fib6_config cfg;
2376 	int err;
2377 
2378 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2379 	if (err < 0)
2380 		return err;
2381 
2382 	if (cfg.fc_mp)
2383 		return ip6_route_multipath(&cfg, 1);
2384 	else
2385 		return ip6_route_add(&cfg);
2386 }
2387 
2388 static inline size_t rt6_nlmsg_size(void)
2389 {
2390 	return NLMSG_ALIGN(sizeof(struct rtmsg))
2391 	       + nla_total_size(16) /* RTA_SRC */
2392 	       + nla_total_size(16) /* RTA_DST */
2393 	       + nla_total_size(16) /* RTA_GATEWAY */
2394 	       + nla_total_size(16) /* RTA_PREFSRC */
2395 	       + nla_total_size(4) /* RTA_TABLE */
2396 	       + nla_total_size(4) /* RTA_IIF */
2397 	       + nla_total_size(4) /* RTA_OIF */
2398 	       + nla_total_size(4) /* RTA_PRIORITY */
2399 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2400 	       + nla_total_size(sizeof(struct rta_cacheinfo));
2401 }
2402 
2403 static int rt6_fill_node(struct net *net,
2404 			 struct sk_buff *skb, struct rt6_info *rt,
2405 			 struct in6_addr *dst, struct in6_addr *src,
2406 			 int iif, int type, u32 portid, u32 seq,
2407 			 int prefix, int nowait, unsigned int flags)
2408 {
2409 	struct rtmsg *rtm;
2410 	struct nlmsghdr *nlh;
2411 	long expires;
2412 	u32 table;
2413 
2414 	if (prefix) {	/* user wants prefix routes only */
2415 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2416 			/* success since this is not a prefix route */
2417 			return 1;
2418 		}
2419 	}
2420 
2421 	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
2422 	if (!nlh)
2423 		return -EMSGSIZE;
2424 
2425 	rtm = nlmsg_data(nlh);
2426 	rtm->rtm_family = AF_INET6;
2427 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
2428 	rtm->rtm_src_len = rt->rt6i_src.plen;
2429 	rtm->rtm_tos = 0;
2430 	if (rt->rt6i_table)
2431 		table = rt->rt6i_table->tb6_id;
2432 	else
2433 		table = RT6_TABLE_UNSPEC;
2434 	rtm->rtm_table = table;
2435 	if (nla_put_u32(skb, RTA_TABLE, table))
2436 		goto nla_put_failure;
2437 	if (rt->rt6i_flags & RTF_REJECT) {
2438 		switch (rt->dst.error) {
2439 		case -EINVAL:
2440 			rtm->rtm_type = RTN_BLACKHOLE;
2441 			break;
2442 		case -EACCES:
2443 			rtm->rtm_type = RTN_PROHIBIT;
2444 			break;
2445 		case -EAGAIN:
2446 			rtm->rtm_type = RTN_THROW;
2447 			break;
2448 		default:
2449 			rtm->rtm_type = RTN_UNREACHABLE;
2450 			break;
2451 		}
2452 	}
2453 	else if (rt->rt6i_flags & RTF_LOCAL)
2454 		rtm->rtm_type = RTN_LOCAL;
2455 	else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2456 		rtm->rtm_type = RTN_LOCAL;
2457 	else
2458 		rtm->rtm_type = RTN_UNICAST;
2459 	rtm->rtm_flags = 0;
2460 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2461 	rtm->rtm_protocol = rt->rt6i_protocol;
2462 	if (rt->rt6i_flags & RTF_DYNAMIC)
2463 		rtm->rtm_protocol = RTPROT_REDIRECT;
2464 	else if (rt->rt6i_flags & RTF_ADDRCONF) {
2465 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2466 			rtm->rtm_protocol = RTPROT_RA;
2467 		else
2468 			rtm->rtm_protocol = RTPROT_KERNEL;
2469 	}
2470 
2471 	if (rt->rt6i_flags & RTF_CACHE)
2472 		rtm->rtm_flags |= RTM_F_CLONED;
2473 
2474 	if (dst) {
2475 		if (nla_put(skb, RTA_DST, 16, dst))
2476 			goto nla_put_failure;
2477 		rtm->rtm_dst_len = 128;
2478 	} else if (rtm->rtm_dst_len)
2479 		if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2480 			goto nla_put_failure;
2481 #ifdef CONFIG_IPV6_SUBTREES
2482 	if (src) {
2483 		if (nla_put(skb, RTA_SRC, 16, src))
2484 			goto nla_put_failure;
2485 		rtm->rtm_src_len = 128;
2486 	} else if (rtm->rtm_src_len &&
2487 		   nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2488 		goto nla_put_failure;
2489 #endif
2490 	if (iif) {
2491 #ifdef CONFIG_IPV6_MROUTE
2492 		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2493 			int err = ip6mr_get_route(net, skb, rtm, nowait);
2494 			if (err <= 0) {
2495 				if (!nowait) {
2496 					if (err == 0)
2497 						return 0;
2498 					goto nla_put_failure;
2499 				} else {
2500 					if (err == -EMSGSIZE)
2501 						goto nla_put_failure;
2502 				}
2503 			}
2504 		} else
2505 #endif
2506 			if (nla_put_u32(skb, RTA_IIF, iif))
2507 				goto nla_put_failure;
2508 	} else if (dst) {
2509 		struct in6_addr saddr_buf;
2510 		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2511 		    nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2512 			goto nla_put_failure;
2513 	}
2514 
2515 	if (rt->rt6i_prefsrc.plen) {
2516 		struct in6_addr saddr_buf;
2517 		saddr_buf = rt->rt6i_prefsrc.addr;
2518 		if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2519 			goto nla_put_failure;
2520 	}
2521 
2522 	if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2523 		goto nla_put_failure;
2524 
2525 	if (rt->rt6i_flags & RTF_GATEWAY) {
2526 		if (nla_put(skb, RTA_GATEWAY, 16, &rt->rt6i_gateway) < 0)
2527 			goto nla_put_failure;
2528 	}
2529 
2530 	if (rt->dst.dev &&
2531 	    nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2532 		goto nla_put_failure;
2533 	if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2534 		goto nla_put_failure;
2535 
2536 	expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2537 
2538 	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2539 		goto nla_put_failure;
2540 
2541 	return nlmsg_end(skb, nlh);
2542 
2543 nla_put_failure:
2544 	nlmsg_cancel(skb, nlh);
2545 	return -EMSGSIZE;
2546 }
2547 
2548 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2549 {
2550 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2551 	int prefix;
2552 
2553 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2554 		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2555 		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2556 	} else
2557 		prefix = 0;
2558 
2559 	return rt6_fill_node(arg->net,
2560 		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2561 		     NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
2562 		     prefix, 0, NLM_F_MULTI);
2563 }
2564 
2565 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2566 {
2567 	struct net *net = sock_net(in_skb->sk);
2568 	struct nlattr *tb[RTA_MAX+1];
2569 	struct rt6_info *rt;
2570 	struct sk_buff *skb;
2571 	struct rtmsg *rtm;
2572 	struct flowi6 fl6;
2573 	int err, iif = 0, oif = 0;
2574 
2575 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2576 	if (err < 0)
2577 		goto errout;
2578 
2579 	err = -EINVAL;
2580 	memset(&fl6, 0, sizeof(fl6));
2581 
2582 	if (tb[RTA_SRC]) {
2583 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2584 			goto errout;
2585 
2586 		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2587 	}
2588 
2589 	if (tb[RTA_DST]) {
2590 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2591 			goto errout;
2592 
2593 		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2594 	}
2595 
2596 	if (tb[RTA_IIF])
2597 		iif = nla_get_u32(tb[RTA_IIF]);
2598 
2599 	if (tb[RTA_OIF])
2600 		oif = nla_get_u32(tb[RTA_OIF]);
2601 
2602 	if (iif) {
2603 		struct net_device *dev;
2604 		int flags = 0;
2605 
2606 		dev = __dev_get_by_index(net, iif);
2607 		if (!dev) {
2608 			err = -ENODEV;
2609 			goto errout;
2610 		}
2611 
2612 		fl6.flowi6_iif = iif;
2613 
2614 		if (!ipv6_addr_any(&fl6.saddr))
2615 			flags |= RT6_LOOKUP_F_HAS_SADDR;
2616 
2617 		rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2618 							       flags);
2619 	} else {
2620 		fl6.flowi6_oif = oif;
2621 
2622 		rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2623 	}
2624 
2625 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2626 	if (!skb) {
2627 		ip6_rt_put(rt);
2628 		err = -ENOBUFS;
2629 		goto errout;
2630 	}
2631 
2632 	/* Reserve room for dummy headers, this skb can pass
2633 	   through good chunk of routing engine.
2634 	 */
2635 	skb_reset_mac_header(skb);
2636 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2637 
2638 	skb_dst_set(skb, &rt->dst);
2639 
2640 	err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2641 			    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
2642 			    nlh->nlmsg_seq, 0, 0, 0);
2643 	if (err < 0) {
2644 		kfree_skb(skb);
2645 		goto errout;
2646 	}
2647 
2648 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2649 errout:
2650 	return err;
2651 }
2652 
2653 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2654 {
2655 	struct sk_buff *skb;
2656 	struct net *net = info->nl_net;
2657 	u32 seq;
2658 	int err;
2659 
2660 	err = -ENOBUFS;
2661 	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2662 
2663 	skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2664 	if (!skb)
2665 		goto errout;
2666 
2667 	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2668 				event, info->portid, seq, 0, 0, 0);
2669 	if (err < 0) {
2670 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2671 		WARN_ON(err == -EMSGSIZE);
2672 		kfree_skb(skb);
2673 		goto errout;
2674 	}
2675 	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2676 		    info->nlh, gfp_any());
2677 	return;
2678 errout:
2679 	if (err < 0)
2680 		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2681 }
2682 
2683 static int ip6_route_dev_notify(struct notifier_block *this,
2684 				unsigned long event, void *data)
2685 {
2686 	struct net_device *dev = (struct net_device *)data;
2687 	struct net *net = dev_net(dev);
2688 
2689 	if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2690 		net->ipv6.ip6_null_entry->dst.dev = dev;
2691 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2692 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2693 		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2694 		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2695 		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2696 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2697 #endif
2698 	}
2699 
2700 	return NOTIFY_OK;
2701 }
2702 
2703 /*
2704  *	/proc
2705  */
2706 
2707 #ifdef CONFIG_PROC_FS
2708 
2709 struct rt6_proc_arg
2710 {
2711 	char *buffer;
2712 	int offset;
2713 	int length;
2714 	int skip;
2715 	int len;
2716 };
2717 
2718 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2719 {
2720 	struct seq_file *m = p_arg;
2721 
2722 	seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2723 
2724 #ifdef CONFIG_IPV6_SUBTREES
2725 	seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2726 #else
2727 	seq_puts(m, "00000000000000000000000000000000 00 ");
2728 #endif
2729 	if (rt->rt6i_flags & RTF_GATEWAY) {
2730 		seq_printf(m, "%pi6", &rt->rt6i_gateway);
2731 	} else {
2732 		seq_puts(m, "00000000000000000000000000000000");
2733 	}
2734 	seq_printf(m, " %08x %08x %08x %08x %8s\n",
2735 		   rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2736 		   rt->dst.__use, rt->rt6i_flags,
2737 		   rt->dst.dev ? rt->dst.dev->name : "");
2738 	return 0;
2739 }
2740 
2741 static int ipv6_route_show(struct seq_file *m, void *v)
2742 {
2743 	struct net *net = (struct net *)m->private;
2744 	fib6_clean_all_ro(net, rt6_info_route, 0, m);
2745 	return 0;
2746 }
2747 
2748 static int ipv6_route_open(struct inode *inode, struct file *file)
2749 {
2750 	return single_open_net(inode, file, ipv6_route_show);
2751 }
2752 
2753 static const struct file_operations ipv6_route_proc_fops = {
2754 	.owner		= THIS_MODULE,
2755 	.open		= ipv6_route_open,
2756 	.read		= seq_read,
2757 	.llseek		= seq_lseek,
2758 	.release	= single_release_net,
2759 };
2760 
2761 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2762 {
2763 	struct net *net = (struct net *)seq->private;
2764 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2765 		   net->ipv6.rt6_stats->fib_nodes,
2766 		   net->ipv6.rt6_stats->fib_route_nodes,
2767 		   net->ipv6.rt6_stats->fib_rt_alloc,
2768 		   net->ipv6.rt6_stats->fib_rt_entries,
2769 		   net->ipv6.rt6_stats->fib_rt_cache,
2770 		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2771 		   net->ipv6.rt6_stats->fib_discarded_routes);
2772 
2773 	return 0;
2774 }
2775 
2776 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2777 {
2778 	return single_open_net(inode, file, rt6_stats_seq_show);
2779 }
2780 
2781 static const struct file_operations rt6_stats_seq_fops = {
2782 	.owner	 = THIS_MODULE,
2783 	.open	 = rt6_stats_seq_open,
2784 	.read	 = seq_read,
2785 	.llseek	 = seq_lseek,
2786 	.release = single_release_net,
2787 };
2788 #endif	/* CONFIG_PROC_FS */
2789 
2790 #ifdef CONFIG_SYSCTL
2791 
2792 static
2793 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2794 			      void __user *buffer, size_t *lenp, loff_t *ppos)
2795 {
2796 	struct net *net;
2797 	int delay;
2798 	if (!write)
2799 		return -EINVAL;
2800 
2801 	net = (struct net *)ctl->extra1;
2802 	delay = net->ipv6.sysctl.flush_delay;
2803 	proc_dointvec(ctl, write, buffer, lenp, ppos);
2804 	fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2805 	return 0;
2806 }
2807 
2808 ctl_table ipv6_route_table_template[] = {
2809 	{
2810 		.procname	=	"flush",
2811 		.data		=	&init_net.ipv6.sysctl.flush_delay,
2812 		.maxlen		=	sizeof(int),
2813 		.mode		=	0200,
2814 		.proc_handler	=	ipv6_sysctl_rtcache_flush
2815 	},
2816 	{
2817 		.procname	=	"gc_thresh",
2818 		.data		=	&ip6_dst_ops_template.gc_thresh,
2819 		.maxlen		=	sizeof(int),
2820 		.mode		=	0644,
2821 		.proc_handler	=	proc_dointvec,
2822 	},
2823 	{
2824 		.procname	=	"max_size",
2825 		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
2826 		.maxlen		=	sizeof(int),
2827 		.mode		=	0644,
2828 		.proc_handler	=	proc_dointvec,
2829 	},
2830 	{
2831 		.procname	=	"gc_min_interval",
2832 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2833 		.maxlen		=	sizeof(int),
2834 		.mode		=	0644,
2835 		.proc_handler	=	proc_dointvec_jiffies,
2836 	},
2837 	{
2838 		.procname	=	"gc_timeout",
2839 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2840 		.maxlen		=	sizeof(int),
2841 		.mode		=	0644,
2842 		.proc_handler	=	proc_dointvec_jiffies,
2843 	},
2844 	{
2845 		.procname	=	"gc_interval",
2846 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
2847 		.maxlen		=	sizeof(int),
2848 		.mode		=	0644,
2849 		.proc_handler	=	proc_dointvec_jiffies,
2850 	},
2851 	{
2852 		.procname	=	"gc_elasticity",
2853 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2854 		.maxlen		=	sizeof(int),
2855 		.mode		=	0644,
2856 		.proc_handler	=	proc_dointvec,
2857 	},
2858 	{
2859 		.procname	=	"mtu_expires",
2860 		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2861 		.maxlen		=	sizeof(int),
2862 		.mode		=	0644,
2863 		.proc_handler	=	proc_dointvec_jiffies,
2864 	},
2865 	{
2866 		.procname	=	"min_adv_mss",
2867 		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
2868 		.maxlen		=	sizeof(int),
2869 		.mode		=	0644,
2870 		.proc_handler	=	proc_dointvec,
2871 	},
2872 	{
2873 		.procname	=	"gc_min_interval_ms",
2874 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2875 		.maxlen		=	sizeof(int),
2876 		.mode		=	0644,
2877 		.proc_handler	=	proc_dointvec_ms_jiffies,
2878 	},
2879 	{ }
2880 };
2881 
2882 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2883 {
2884 	struct ctl_table *table;
2885 
2886 	table = kmemdup(ipv6_route_table_template,
2887 			sizeof(ipv6_route_table_template),
2888 			GFP_KERNEL);
2889 
2890 	if (table) {
2891 		table[0].data = &net->ipv6.sysctl.flush_delay;
2892 		table[0].extra1 = net;
2893 		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2894 		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2895 		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2896 		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2897 		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2898 		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2899 		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2900 		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2901 		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2902 
2903 		/* Don't export sysctls to unprivileged users */
2904 		if (net->user_ns != &init_user_ns)
2905 			table[0].procname = NULL;
2906 	}
2907 
2908 	return table;
2909 }
2910 #endif
2911 
2912 static int __net_init ip6_route_net_init(struct net *net)
2913 {
2914 	int ret = -ENOMEM;
2915 
2916 	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2917 	       sizeof(net->ipv6.ip6_dst_ops));
2918 
2919 	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2920 		goto out_ip6_dst_ops;
2921 
2922 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2923 					   sizeof(*net->ipv6.ip6_null_entry),
2924 					   GFP_KERNEL);
2925 	if (!net->ipv6.ip6_null_entry)
2926 		goto out_ip6_dst_entries;
2927 	net->ipv6.ip6_null_entry->dst.path =
2928 		(struct dst_entry *)net->ipv6.ip6_null_entry;
2929 	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2930 	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2931 			 ip6_template_metrics, true);
2932 
2933 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2934 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2935 					       sizeof(*net->ipv6.ip6_prohibit_entry),
2936 					       GFP_KERNEL);
2937 	if (!net->ipv6.ip6_prohibit_entry)
2938 		goto out_ip6_null_entry;
2939 	net->ipv6.ip6_prohibit_entry->dst.path =
2940 		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2941 	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2942 	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2943 			 ip6_template_metrics, true);
2944 
2945 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2946 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
2947 					       GFP_KERNEL);
2948 	if (!net->ipv6.ip6_blk_hole_entry)
2949 		goto out_ip6_prohibit_entry;
2950 	net->ipv6.ip6_blk_hole_entry->dst.path =
2951 		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2952 	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2953 	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2954 			 ip6_template_metrics, true);
2955 #endif
2956 
2957 	net->ipv6.sysctl.flush_delay = 0;
2958 	net->ipv6.sysctl.ip6_rt_max_size = 4096;
2959 	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2960 	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2961 	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2962 	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2963 	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2964 	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2965 
2966 	net->ipv6.ip6_rt_gc_expire = 30*HZ;
2967 
2968 	ret = 0;
2969 out:
2970 	return ret;
2971 
2972 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2973 out_ip6_prohibit_entry:
2974 	kfree(net->ipv6.ip6_prohibit_entry);
2975 out_ip6_null_entry:
2976 	kfree(net->ipv6.ip6_null_entry);
2977 #endif
2978 out_ip6_dst_entries:
2979 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2980 out_ip6_dst_ops:
2981 	goto out;
2982 }
2983 
2984 static void __net_exit ip6_route_net_exit(struct net *net)
2985 {
2986 	kfree(net->ipv6.ip6_null_entry);
2987 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2988 	kfree(net->ipv6.ip6_prohibit_entry);
2989 	kfree(net->ipv6.ip6_blk_hole_entry);
2990 #endif
2991 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2992 }
2993 
2994 static int __net_init ip6_route_net_init_late(struct net *net)
2995 {
2996 #ifdef CONFIG_PROC_FS
2997 	proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
2998 	proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
2999 #endif
3000 	return 0;
3001 }
3002 
3003 static void __net_exit ip6_route_net_exit_late(struct net *net)
3004 {
3005 #ifdef CONFIG_PROC_FS
3006 	remove_proc_entry("ipv6_route", net->proc_net);
3007 	remove_proc_entry("rt6_stats", net->proc_net);
3008 #endif
3009 }
3010 
3011 static struct pernet_operations ip6_route_net_ops = {
3012 	.init = ip6_route_net_init,
3013 	.exit = ip6_route_net_exit,
3014 };
3015 
3016 static int __net_init ipv6_inetpeer_init(struct net *net)
3017 {
3018 	struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3019 
3020 	if (!bp)
3021 		return -ENOMEM;
3022 	inet_peer_base_init(bp);
3023 	net->ipv6.peers = bp;
3024 	return 0;
3025 }
3026 
3027 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3028 {
3029 	struct inet_peer_base *bp = net->ipv6.peers;
3030 
3031 	net->ipv6.peers = NULL;
3032 	inetpeer_invalidate_tree(bp);
3033 	kfree(bp);
3034 }
3035 
3036 static struct pernet_operations ipv6_inetpeer_ops = {
3037 	.init	=	ipv6_inetpeer_init,
3038 	.exit	=	ipv6_inetpeer_exit,
3039 };
3040 
3041 static struct pernet_operations ip6_route_net_late_ops = {
3042 	.init = ip6_route_net_init_late,
3043 	.exit = ip6_route_net_exit_late,
3044 };
3045 
3046 static struct notifier_block ip6_route_dev_notifier = {
3047 	.notifier_call = ip6_route_dev_notify,
3048 	.priority = 0,
3049 };
3050 
3051 int __init ip6_route_init(void)
3052 {
3053 	int ret;
3054 
3055 	ret = -ENOMEM;
3056 	ip6_dst_ops_template.kmem_cachep =
3057 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3058 				  SLAB_HWCACHE_ALIGN, NULL);
3059 	if (!ip6_dst_ops_template.kmem_cachep)
3060 		goto out;
3061 
3062 	ret = dst_entries_init(&ip6_dst_blackhole_ops);
3063 	if (ret)
3064 		goto out_kmem_cache;
3065 
3066 	ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3067 	if (ret)
3068 		goto out_dst_entries;
3069 
3070 	ret = register_pernet_subsys(&ip6_route_net_ops);
3071 	if (ret)
3072 		goto out_register_inetpeer;
3073 
3074 	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3075 
3076 	/* Registering of the loopback is done before this portion of code,
3077 	 * the loopback reference in rt6_info will not be taken, do it
3078 	 * manually for init_net */
3079 	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3080 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3081   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3082 	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3083 	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3084 	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3085 	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3086   #endif
3087 	ret = fib6_init();
3088 	if (ret)
3089 		goto out_register_subsys;
3090 
3091 	ret = xfrm6_init();
3092 	if (ret)
3093 		goto out_fib6_init;
3094 
3095 	ret = fib6_rules_init();
3096 	if (ret)
3097 		goto xfrm6_init;
3098 
3099 	ret = register_pernet_subsys(&ip6_route_net_late_ops);
3100 	if (ret)
3101 		goto fib6_rules_init;
3102 
3103 	ret = -ENOBUFS;
3104 	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3105 	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3106 	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3107 		goto out_register_late_subsys;
3108 
3109 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3110 	if (ret)
3111 		goto out_register_late_subsys;
3112 
3113 out:
3114 	return ret;
3115 
3116 out_register_late_subsys:
3117 	unregister_pernet_subsys(&ip6_route_net_late_ops);
3118 fib6_rules_init:
3119 	fib6_rules_cleanup();
3120 xfrm6_init:
3121 	xfrm6_fini();
3122 out_fib6_init:
3123 	fib6_gc_cleanup();
3124 out_register_subsys:
3125 	unregister_pernet_subsys(&ip6_route_net_ops);
3126 out_register_inetpeer:
3127 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3128 out_dst_entries:
3129 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3130 out_kmem_cache:
3131 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3132 	goto out;
3133 }
3134 
3135 void ip6_route_cleanup(void)
3136 {
3137 	unregister_netdevice_notifier(&ip6_route_dev_notifier);
3138 	unregister_pernet_subsys(&ip6_route_net_late_ops);
3139 	fib6_rules_cleanup();
3140 	xfrm6_fini();
3141 	fib6_gc_cleanup();
3142 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3143 	unregister_pernet_subsys(&ip6_route_net_ops);
3144 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3145 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3146 }
3147