xref: /openbmc/linux/net/ipv6/route.c (revision 840ef8b7cc584a23c4f9d05352f4dbaf8e56e5ab)
1 /*
2  *	Linux INET6 implementation
3  *	FIB front-end.
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13 
14 /*	Changes:
15  *
16  *	YOSHIFUJI Hideaki @USAGI
17  *		reworked default router selection.
18  *		- respect outgoing interface
19  *		- select from (probably) reachable routers (i.e.
20  *		routers in REACHABLE, STALE, DELAY or PROBE states).
21  *		- always select the same router if it is (probably)
22  *		reachable.  otherwise, round-robin the list.
23  *	Ville Nuorvala
24  *		Fixed routing subtrees.
25  */
26 
27 #define pr_fmt(fmt) "IPv6: " fmt
28 
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/xfrm.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60 #include <net/nexthop.h>
61 
62 #include <asm/uaccess.h>
63 
64 #ifdef CONFIG_SYSCTL
65 #include <linux/sysctl.h>
66 #endif
67 
68 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
69 				    const struct in6_addr *dest);
70 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
71 static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
72 static unsigned int	 ip6_mtu(const struct dst_entry *dst);
73 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
74 static void		ip6_dst_destroy(struct dst_entry *);
75 static void		ip6_dst_ifdown(struct dst_entry *,
76 				       struct net_device *dev, int how);
77 static int		 ip6_dst_gc(struct dst_ops *ops);
78 
79 static int		ip6_pkt_discard(struct sk_buff *skb);
80 static int		ip6_pkt_discard_out(struct sk_buff *skb);
81 static void		ip6_link_failure(struct sk_buff *skb);
82 static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
83 					   struct sk_buff *skb, u32 mtu);
84 static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
85 					struct sk_buff *skb);
86 
87 #ifdef CONFIG_IPV6_ROUTE_INFO
88 static struct rt6_info *rt6_add_route_info(struct net *net,
89 					   const struct in6_addr *prefix, int prefixlen,
90 					   const struct in6_addr *gwaddr, int ifindex,
91 					   unsigned int pref);
92 static struct rt6_info *rt6_get_route_info(struct net *net,
93 					   const struct in6_addr *prefix, int prefixlen,
94 					   const struct in6_addr *gwaddr, int ifindex);
95 #endif
96 
97 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
98 {
99 	struct rt6_info *rt = (struct rt6_info *) dst;
100 	struct inet_peer *peer;
101 	u32 *p = NULL;
102 
103 	if (!(rt->dst.flags & DST_HOST))
104 		return NULL;
105 
106 	peer = rt6_get_peer_create(rt);
107 	if (peer) {
108 		u32 *old_p = __DST_METRICS_PTR(old);
109 		unsigned long prev, new;
110 
111 		p = peer->metrics;
112 		if (inet_metrics_new(peer))
113 			memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
114 
115 		new = (unsigned long) p;
116 		prev = cmpxchg(&dst->_metrics, old, new);
117 
118 		if (prev != old) {
119 			p = __DST_METRICS_PTR(prev);
120 			if (prev & DST_METRICS_READ_ONLY)
121 				p = NULL;
122 		}
123 	}
124 	return p;
125 }
126 
127 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
128 					     struct sk_buff *skb,
129 					     const void *daddr)
130 {
131 	struct in6_addr *p = &rt->rt6i_gateway;
132 
133 	if (!ipv6_addr_any(p))
134 		return (const void *) p;
135 	else if (skb)
136 		return &ipv6_hdr(skb)->daddr;
137 	return daddr;
138 }
139 
140 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
141 					  struct sk_buff *skb,
142 					  const void *daddr)
143 {
144 	struct rt6_info *rt = (struct rt6_info *) dst;
145 	struct neighbour *n;
146 
147 	daddr = choose_neigh_daddr(rt, skb, daddr);
148 	n = __ipv6_neigh_lookup(dst->dev, daddr);
149 	if (n)
150 		return n;
151 	return neigh_create(&nd_tbl, daddr, dst->dev);
152 }
153 
154 static struct dst_ops ip6_dst_ops_template = {
155 	.family			=	AF_INET6,
156 	.protocol		=	cpu_to_be16(ETH_P_IPV6),
157 	.gc			=	ip6_dst_gc,
158 	.gc_thresh		=	1024,
159 	.check			=	ip6_dst_check,
160 	.default_advmss		=	ip6_default_advmss,
161 	.mtu			=	ip6_mtu,
162 	.cow_metrics		=	ipv6_cow_metrics,
163 	.destroy		=	ip6_dst_destroy,
164 	.ifdown			=	ip6_dst_ifdown,
165 	.negative_advice	=	ip6_negative_advice,
166 	.link_failure		=	ip6_link_failure,
167 	.update_pmtu		=	ip6_rt_update_pmtu,
168 	.redirect		=	rt6_do_redirect,
169 	.local_out		=	__ip6_local_out,
170 	.neigh_lookup		=	ip6_neigh_lookup,
171 };
172 
173 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
174 {
175 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
176 
177 	return mtu ? : dst->dev->mtu;
178 }
179 
180 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
181 					 struct sk_buff *skb, u32 mtu)
182 {
183 }
184 
185 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
186 				      struct sk_buff *skb)
187 {
188 }
189 
190 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
191 					 unsigned long old)
192 {
193 	return NULL;
194 }
195 
196 static struct dst_ops ip6_dst_blackhole_ops = {
197 	.family			=	AF_INET6,
198 	.protocol		=	cpu_to_be16(ETH_P_IPV6),
199 	.destroy		=	ip6_dst_destroy,
200 	.check			=	ip6_dst_check,
201 	.mtu			=	ip6_blackhole_mtu,
202 	.default_advmss		=	ip6_default_advmss,
203 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
204 	.redirect		=	ip6_rt_blackhole_redirect,
205 	.cow_metrics		=	ip6_rt_blackhole_cow_metrics,
206 	.neigh_lookup		=	ip6_neigh_lookup,
207 };
208 
209 static const u32 ip6_template_metrics[RTAX_MAX] = {
210 	[RTAX_HOPLIMIT - 1] = 0,
211 };
212 
213 static const struct rt6_info ip6_null_entry_template = {
214 	.dst = {
215 		.__refcnt	= ATOMIC_INIT(1),
216 		.__use		= 1,
217 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
218 		.error		= -ENETUNREACH,
219 		.input		= ip6_pkt_discard,
220 		.output		= ip6_pkt_discard_out,
221 	},
222 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
223 	.rt6i_protocol  = RTPROT_KERNEL,
224 	.rt6i_metric	= ~(u32) 0,
225 	.rt6i_ref	= ATOMIC_INIT(1),
226 };
227 
228 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
229 
230 static int ip6_pkt_prohibit(struct sk_buff *skb);
231 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
232 
233 static const struct rt6_info ip6_prohibit_entry_template = {
234 	.dst = {
235 		.__refcnt	= ATOMIC_INIT(1),
236 		.__use		= 1,
237 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
238 		.error		= -EACCES,
239 		.input		= ip6_pkt_prohibit,
240 		.output		= ip6_pkt_prohibit_out,
241 	},
242 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
243 	.rt6i_protocol  = RTPROT_KERNEL,
244 	.rt6i_metric	= ~(u32) 0,
245 	.rt6i_ref	= ATOMIC_INIT(1),
246 };
247 
248 static const struct rt6_info ip6_blk_hole_entry_template = {
249 	.dst = {
250 		.__refcnt	= ATOMIC_INIT(1),
251 		.__use		= 1,
252 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
253 		.error		= -EINVAL,
254 		.input		= dst_discard,
255 		.output		= dst_discard,
256 	},
257 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
258 	.rt6i_protocol  = RTPROT_KERNEL,
259 	.rt6i_metric	= ~(u32) 0,
260 	.rt6i_ref	= ATOMIC_INIT(1),
261 };
262 
263 #endif
264 
265 /* allocate dst with ip6_dst_ops */
266 static inline struct rt6_info *ip6_dst_alloc(struct net *net,
267 					     struct net_device *dev,
268 					     int flags,
269 					     struct fib6_table *table)
270 {
271 	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
272 					0, DST_OBSOLETE_FORCE_CHK, flags);
273 
274 	if (rt) {
275 		struct dst_entry *dst = &rt->dst;
276 
277 		memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
278 		rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
279 		rt->rt6i_genid = rt_genid(net);
280 		INIT_LIST_HEAD(&rt->rt6i_siblings);
281 		rt->rt6i_nsiblings = 0;
282 	}
283 	return rt;
284 }
285 
286 static void ip6_dst_destroy(struct dst_entry *dst)
287 {
288 	struct rt6_info *rt = (struct rt6_info *)dst;
289 	struct inet6_dev *idev = rt->rt6i_idev;
290 	struct dst_entry *from = dst->from;
291 
292 	if (!(rt->dst.flags & DST_HOST))
293 		dst_destroy_metrics_generic(dst);
294 
295 	if (idev) {
296 		rt->rt6i_idev = NULL;
297 		in6_dev_put(idev);
298 	}
299 
300 	dst->from = NULL;
301 	dst_release(from);
302 
303 	if (rt6_has_peer(rt)) {
304 		struct inet_peer *peer = rt6_peer_ptr(rt);
305 		inet_putpeer(peer);
306 	}
307 }
308 
309 void rt6_bind_peer(struct rt6_info *rt, int create)
310 {
311 	struct inet_peer_base *base;
312 	struct inet_peer *peer;
313 
314 	base = inetpeer_base_ptr(rt->_rt6i_peer);
315 	if (!base)
316 		return;
317 
318 	peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
319 	if (peer) {
320 		if (!rt6_set_peer(rt, peer))
321 			inet_putpeer(peer);
322 	}
323 }
324 
325 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
326 			   int how)
327 {
328 	struct rt6_info *rt = (struct rt6_info *)dst;
329 	struct inet6_dev *idev = rt->rt6i_idev;
330 	struct net_device *loopback_dev =
331 		dev_net(dev)->loopback_dev;
332 
333 	if (dev != loopback_dev) {
334 		if (idev && idev->dev == dev) {
335 			struct inet6_dev *loopback_idev =
336 				in6_dev_get(loopback_dev);
337 			if (loopback_idev) {
338 				rt->rt6i_idev = loopback_idev;
339 				in6_dev_put(idev);
340 			}
341 		}
342 	}
343 }
344 
345 static bool rt6_check_expired(const struct rt6_info *rt)
346 {
347 	if (rt->rt6i_flags & RTF_EXPIRES) {
348 		if (time_after(jiffies, rt->dst.expires))
349 			return true;
350 	} else if (rt->dst.from) {
351 		return rt6_check_expired((struct rt6_info *) rt->dst.from);
352 	}
353 	return false;
354 }
355 
356 static bool rt6_need_strict(const struct in6_addr *daddr)
357 {
358 	return ipv6_addr_type(daddr) &
359 		(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
360 }
361 
362 /* Multipath route selection:
363  *   Hash based function using packet header and flowlabel.
364  * Adapted from fib_info_hashfn()
365  */
366 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
367 			       const struct flowi6 *fl6)
368 {
369 	unsigned int val = fl6->flowi6_proto;
370 
371 	val ^= ipv6_addr_hash(&fl6->daddr);
372 	val ^= ipv6_addr_hash(&fl6->saddr);
373 
374 	/* Work only if this not encapsulated */
375 	switch (fl6->flowi6_proto) {
376 	case IPPROTO_UDP:
377 	case IPPROTO_TCP:
378 	case IPPROTO_SCTP:
379 		val ^= (__force u16)fl6->fl6_sport;
380 		val ^= (__force u16)fl6->fl6_dport;
381 		break;
382 
383 	case IPPROTO_ICMPV6:
384 		val ^= (__force u16)fl6->fl6_icmp_type;
385 		val ^= (__force u16)fl6->fl6_icmp_code;
386 		break;
387 	}
388 	/* RFC6438 recommands to use flowlabel */
389 	val ^= (__force u32)fl6->flowlabel;
390 
391 	/* Perhaps, we need to tune, this function? */
392 	val = val ^ (val >> 7) ^ (val >> 12);
393 	return val % candidate_count;
394 }
395 
396 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
397 					     struct flowi6 *fl6)
398 {
399 	struct rt6_info *sibling, *next_sibling;
400 	int route_choosen;
401 
402 	route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
403 	/* Don't change the route, if route_choosen == 0
404 	 * (siblings does not include ourself)
405 	 */
406 	if (route_choosen)
407 		list_for_each_entry_safe(sibling, next_sibling,
408 				&match->rt6i_siblings, rt6i_siblings) {
409 			route_choosen--;
410 			if (route_choosen == 0) {
411 				match = sibling;
412 				break;
413 			}
414 		}
415 	return match;
416 }
417 
418 /*
419  *	Route lookup. Any table->tb6_lock is implied.
420  */
421 
422 static inline struct rt6_info *rt6_device_match(struct net *net,
423 						    struct rt6_info *rt,
424 						    const struct in6_addr *saddr,
425 						    int oif,
426 						    int flags)
427 {
428 	struct rt6_info *local = NULL;
429 	struct rt6_info *sprt;
430 
431 	if (!oif && ipv6_addr_any(saddr))
432 		goto out;
433 
434 	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
435 		struct net_device *dev = sprt->dst.dev;
436 
437 		if (oif) {
438 			if (dev->ifindex == oif)
439 				return sprt;
440 			if (dev->flags & IFF_LOOPBACK) {
441 				if (!sprt->rt6i_idev ||
442 				    sprt->rt6i_idev->dev->ifindex != oif) {
443 					if (flags & RT6_LOOKUP_F_IFACE && oif)
444 						continue;
445 					if (local && (!oif ||
446 						      local->rt6i_idev->dev->ifindex == oif))
447 						continue;
448 				}
449 				local = sprt;
450 			}
451 		} else {
452 			if (ipv6_chk_addr(net, saddr, dev,
453 					  flags & RT6_LOOKUP_F_IFACE))
454 				return sprt;
455 		}
456 	}
457 
458 	if (oif) {
459 		if (local)
460 			return local;
461 
462 		if (flags & RT6_LOOKUP_F_IFACE)
463 			return net->ipv6.ip6_null_entry;
464 	}
465 out:
466 	return rt;
467 }
468 
469 #ifdef CONFIG_IPV6_ROUTER_PREF
470 static void rt6_probe(struct rt6_info *rt)
471 {
472 	struct neighbour *neigh;
473 	/*
474 	 * Okay, this does not seem to be appropriate
475 	 * for now, however, we need to check if it
476 	 * is really so; aka Router Reachability Probing.
477 	 *
478 	 * Router Reachability Probe MUST be rate-limited
479 	 * to no more than one per minute.
480 	 */
481 	if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
482 		return;
483 	rcu_read_lock_bh();
484 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
485 	if (neigh) {
486 		write_lock(&neigh->lock);
487 		if (neigh->nud_state & NUD_VALID)
488 			goto out;
489 	}
490 
491 	if (!neigh ||
492 	    time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
493 		struct in6_addr mcaddr;
494 		struct in6_addr *target;
495 
496 		if (neigh) {
497 			neigh->updated = jiffies;
498 			write_unlock(&neigh->lock);
499 		}
500 
501 		target = (struct in6_addr *)&rt->rt6i_gateway;
502 		addrconf_addr_solict_mult(target, &mcaddr);
503 		ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
504 	} else {
505 out:
506 		write_unlock(&neigh->lock);
507 	}
508 	rcu_read_unlock_bh();
509 }
510 #else
511 static inline void rt6_probe(struct rt6_info *rt)
512 {
513 }
514 #endif
515 
516 /*
517  * Default Router Selection (RFC 2461 6.3.6)
518  */
519 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
520 {
521 	struct net_device *dev = rt->dst.dev;
522 	if (!oif || dev->ifindex == oif)
523 		return 2;
524 	if ((dev->flags & IFF_LOOPBACK) &&
525 	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
526 		return 1;
527 	return 0;
528 }
529 
530 static inline bool rt6_check_neigh(struct rt6_info *rt)
531 {
532 	struct neighbour *neigh;
533 	bool ret = false;
534 
535 	if (rt->rt6i_flags & RTF_NONEXTHOP ||
536 	    !(rt->rt6i_flags & RTF_GATEWAY))
537 		return true;
538 
539 	rcu_read_lock_bh();
540 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
541 	if (neigh) {
542 		read_lock(&neigh->lock);
543 		if (neigh->nud_state & NUD_VALID)
544 			ret = true;
545 #ifdef CONFIG_IPV6_ROUTER_PREF
546 		else if (!(neigh->nud_state & NUD_FAILED))
547 			ret = true;
548 #endif
549 		read_unlock(&neigh->lock);
550 	}
551 	rcu_read_unlock_bh();
552 
553 	return ret;
554 }
555 
556 static int rt6_score_route(struct rt6_info *rt, int oif,
557 			   int strict)
558 {
559 	int m;
560 
561 	m = rt6_check_dev(rt, oif);
562 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
563 		return -1;
564 #ifdef CONFIG_IPV6_ROUTER_PREF
565 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
566 #endif
567 	if (!rt6_check_neigh(rt) && (strict & RT6_LOOKUP_F_REACHABLE))
568 		return -1;
569 	return m;
570 }
571 
572 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
573 				   int *mpri, struct rt6_info *match)
574 {
575 	int m;
576 
577 	if (rt6_check_expired(rt))
578 		goto out;
579 
580 	m = rt6_score_route(rt, oif, strict);
581 	if (m < 0)
582 		goto out;
583 
584 	if (m > *mpri) {
585 		if (strict & RT6_LOOKUP_F_REACHABLE)
586 			rt6_probe(match);
587 		*mpri = m;
588 		match = rt;
589 	} else if (strict & RT6_LOOKUP_F_REACHABLE) {
590 		rt6_probe(rt);
591 	}
592 
593 out:
594 	return match;
595 }
596 
597 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
598 				     struct rt6_info *rr_head,
599 				     u32 metric, int oif, int strict)
600 {
601 	struct rt6_info *rt, *match;
602 	int mpri = -1;
603 
604 	match = NULL;
605 	for (rt = rr_head; rt && rt->rt6i_metric == metric;
606 	     rt = rt->dst.rt6_next)
607 		match = find_match(rt, oif, strict, &mpri, match);
608 	for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
609 	     rt = rt->dst.rt6_next)
610 		match = find_match(rt, oif, strict, &mpri, match);
611 
612 	return match;
613 }
614 
615 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
616 {
617 	struct rt6_info *match, *rt0;
618 	struct net *net;
619 
620 	rt0 = fn->rr_ptr;
621 	if (!rt0)
622 		fn->rr_ptr = rt0 = fn->leaf;
623 
624 	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
625 
626 	if (!match &&
627 	    (strict & RT6_LOOKUP_F_REACHABLE)) {
628 		struct rt6_info *next = rt0->dst.rt6_next;
629 
630 		/* no entries matched; do round-robin */
631 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
632 			next = fn->leaf;
633 
634 		if (next != rt0)
635 			fn->rr_ptr = next;
636 	}
637 
638 	net = dev_net(rt0->dst.dev);
639 	return match ? match : net->ipv6.ip6_null_entry;
640 }
641 
642 #ifdef CONFIG_IPV6_ROUTE_INFO
643 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
644 		  const struct in6_addr *gwaddr)
645 {
646 	struct net *net = dev_net(dev);
647 	struct route_info *rinfo = (struct route_info *) opt;
648 	struct in6_addr prefix_buf, *prefix;
649 	unsigned int pref;
650 	unsigned long lifetime;
651 	struct rt6_info *rt;
652 
653 	if (len < sizeof(struct route_info)) {
654 		return -EINVAL;
655 	}
656 
657 	/* Sanity check for prefix_len and length */
658 	if (rinfo->length > 3) {
659 		return -EINVAL;
660 	} else if (rinfo->prefix_len > 128) {
661 		return -EINVAL;
662 	} else if (rinfo->prefix_len > 64) {
663 		if (rinfo->length < 2) {
664 			return -EINVAL;
665 		}
666 	} else if (rinfo->prefix_len > 0) {
667 		if (rinfo->length < 1) {
668 			return -EINVAL;
669 		}
670 	}
671 
672 	pref = rinfo->route_pref;
673 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
674 		return -EINVAL;
675 
676 	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
677 
678 	if (rinfo->length == 3)
679 		prefix = (struct in6_addr *)rinfo->prefix;
680 	else {
681 		/* this function is safe */
682 		ipv6_addr_prefix(&prefix_buf,
683 				 (struct in6_addr *)rinfo->prefix,
684 				 rinfo->prefix_len);
685 		prefix = &prefix_buf;
686 	}
687 
688 	rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
689 				dev->ifindex);
690 
691 	if (rt && !lifetime) {
692 		ip6_del_rt(rt);
693 		rt = NULL;
694 	}
695 
696 	if (!rt && lifetime)
697 		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
698 					pref);
699 	else if (rt)
700 		rt->rt6i_flags = RTF_ROUTEINFO |
701 				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
702 
703 	if (rt) {
704 		if (!addrconf_finite_timeout(lifetime))
705 			rt6_clean_expires(rt);
706 		else
707 			rt6_set_expires(rt, jiffies + HZ * lifetime);
708 
709 		ip6_rt_put(rt);
710 	}
711 	return 0;
712 }
713 #endif
714 
715 #define BACKTRACK(__net, saddr)			\
716 do { \
717 	if (rt == __net->ipv6.ip6_null_entry) {	\
718 		struct fib6_node *pn; \
719 		while (1) { \
720 			if (fn->fn_flags & RTN_TL_ROOT) \
721 				goto out; \
722 			pn = fn->parent; \
723 			if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
724 				fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
725 			else \
726 				fn = pn; \
727 			if (fn->fn_flags & RTN_RTINFO) \
728 				goto restart; \
729 		} \
730 	} \
731 } while (0)
732 
733 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
734 					     struct fib6_table *table,
735 					     struct flowi6 *fl6, int flags)
736 {
737 	struct fib6_node *fn;
738 	struct rt6_info *rt;
739 
740 	read_lock_bh(&table->tb6_lock);
741 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
742 restart:
743 	rt = fn->leaf;
744 	rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
745 	if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
746 		rt = rt6_multipath_select(rt, fl6);
747 	BACKTRACK(net, &fl6->saddr);
748 out:
749 	dst_use(&rt->dst, jiffies);
750 	read_unlock_bh(&table->tb6_lock);
751 	return rt;
752 
753 }
754 
755 struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
756 				    int flags)
757 {
758 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
759 }
760 EXPORT_SYMBOL_GPL(ip6_route_lookup);
761 
762 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
763 			    const struct in6_addr *saddr, int oif, int strict)
764 {
765 	struct flowi6 fl6 = {
766 		.flowi6_oif = oif,
767 		.daddr = *daddr,
768 	};
769 	struct dst_entry *dst;
770 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
771 
772 	if (saddr) {
773 		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
774 		flags |= RT6_LOOKUP_F_HAS_SADDR;
775 	}
776 
777 	dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
778 	if (dst->error == 0)
779 		return (struct rt6_info *) dst;
780 
781 	dst_release(dst);
782 
783 	return NULL;
784 }
785 
786 EXPORT_SYMBOL(rt6_lookup);
787 
788 /* ip6_ins_rt is called with FREE table->tb6_lock.
789    It takes new route entry, the addition fails by any reason the
790    route is freed. In any case, if caller does not hold it, it may
791    be destroyed.
792  */
793 
794 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
795 {
796 	int err;
797 	struct fib6_table *table;
798 
799 	table = rt->rt6i_table;
800 	write_lock_bh(&table->tb6_lock);
801 	err = fib6_add(&table->tb6_root, rt, info);
802 	write_unlock_bh(&table->tb6_lock);
803 
804 	return err;
805 }
806 
807 int ip6_ins_rt(struct rt6_info *rt)
808 {
809 	struct nl_info info = {
810 		.nl_net = dev_net(rt->dst.dev),
811 	};
812 	return __ip6_ins_rt(rt, &info);
813 }
814 
815 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
816 				      const struct in6_addr *daddr,
817 				      const struct in6_addr *saddr)
818 {
819 	struct rt6_info *rt;
820 
821 	/*
822 	 *	Clone the route.
823 	 */
824 
825 	rt = ip6_rt_copy(ort, daddr);
826 
827 	if (rt) {
828 		if (!(rt->rt6i_flags & RTF_GATEWAY)) {
829 			if (ort->rt6i_dst.plen != 128 &&
830 			    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
831 				rt->rt6i_flags |= RTF_ANYCAST;
832 			rt->rt6i_gateway = *daddr;
833 		}
834 
835 		rt->rt6i_flags |= RTF_CACHE;
836 
837 #ifdef CONFIG_IPV6_SUBTREES
838 		if (rt->rt6i_src.plen && saddr) {
839 			rt->rt6i_src.addr = *saddr;
840 			rt->rt6i_src.plen = 128;
841 		}
842 #endif
843 	}
844 
845 	return rt;
846 }
847 
848 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
849 					const struct in6_addr *daddr)
850 {
851 	struct rt6_info *rt = ip6_rt_copy(ort, daddr);
852 
853 	if (rt)
854 		rt->rt6i_flags |= RTF_CACHE;
855 	return rt;
856 }
857 
858 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
859 				      struct flowi6 *fl6, int flags)
860 {
861 	struct fib6_node *fn;
862 	struct rt6_info *rt, *nrt;
863 	int strict = 0;
864 	int attempts = 3;
865 	int err;
866 	int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
867 
868 	strict |= flags & RT6_LOOKUP_F_IFACE;
869 
870 relookup:
871 	read_lock_bh(&table->tb6_lock);
872 
873 restart_2:
874 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
875 
876 restart:
877 	rt = rt6_select(fn, oif, strict | reachable);
878 	if (rt->rt6i_nsiblings && oif == 0)
879 		rt = rt6_multipath_select(rt, fl6);
880 	BACKTRACK(net, &fl6->saddr);
881 	if (rt == net->ipv6.ip6_null_entry ||
882 	    rt->rt6i_flags & RTF_CACHE)
883 		goto out;
884 
885 	dst_hold(&rt->dst);
886 	read_unlock_bh(&table->tb6_lock);
887 
888 	if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY)))
889 		nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
890 	else if (!(rt->dst.flags & DST_HOST))
891 		nrt = rt6_alloc_clone(rt, &fl6->daddr);
892 	else
893 		goto out2;
894 
895 	ip6_rt_put(rt);
896 	rt = nrt ? : net->ipv6.ip6_null_entry;
897 
898 	dst_hold(&rt->dst);
899 	if (nrt) {
900 		err = ip6_ins_rt(nrt);
901 		if (!err)
902 			goto out2;
903 	}
904 
905 	if (--attempts <= 0)
906 		goto out2;
907 
908 	/*
909 	 * Race condition! In the gap, when table->tb6_lock was
910 	 * released someone could insert this route.  Relookup.
911 	 */
912 	ip6_rt_put(rt);
913 	goto relookup;
914 
915 out:
916 	if (reachable) {
917 		reachable = 0;
918 		goto restart_2;
919 	}
920 	dst_hold(&rt->dst);
921 	read_unlock_bh(&table->tb6_lock);
922 out2:
923 	rt->dst.lastuse = jiffies;
924 	rt->dst.__use++;
925 
926 	return rt;
927 }
928 
929 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
930 					    struct flowi6 *fl6, int flags)
931 {
932 	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
933 }
934 
935 static struct dst_entry *ip6_route_input_lookup(struct net *net,
936 						struct net_device *dev,
937 						struct flowi6 *fl6, int flags)
938 {
939 	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
940 		flags |= RT6_LOOKUP_F_IFACE;
941 
942 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
943 }
944 
945 void ip6_route_input(struct sk_buff *skb)
946 {
947 	const struct ipv6hdr *iph = ipv6_hdr(skb);
948 	struct net *net = dev_net(skb->dev);
949 	int flags = RT6_LOOKUP_F_HAS_SADDR;
950 	struct flowi6 fl6 = {
951 		.flowi6_iif = skb->dev->ifindex,
952 		.daddr = iph->daddr,
953 		.saddr = iph->saddr,
954 		.flowlabel = ip6_flowinfo(iph),
955 		.flowi6_mark = skb->mark,
956 		.flowi6_proto = iph->nexthdr,
957 	};
958 
959 	skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
960 }
961 
962 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
963 					     struct flowi6 *fl6, int flags)
964 {
965 	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
966 }
967 
968 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
969 				    struct flowi6 *fl6)
970 {
971 	int flags = 0;
972 
973 	fl6->flowi6_iif = LOOPBACK_IFINDEX;
974 
975 	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
976 		flags |= RT6_LOOKUP_F_IFACE;
977 
978 	if (!ipv6_addr_any(&fl6->saddr))
979 		flags |= RT6_LOOKUP_F_HAS_SADDR;
980 	else if (sk)
981 		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
982 
983 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
984 }
985 
986 EXPORT_SYMBOL(ip6_route_output);
987 
988 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
989 {
990 	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
991 	struct dst_entry *new = NULL;
992 
993 	rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
994 	if (rt) {
995 		new = &rt->dst;
996 
997 		memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
998 		rt6_init_peer(rt, net->ipv6.peers);
999 
1000 		new->__use = 1;
1001 		new->input = dst_discard;
1002 		new->output = dst_discard;
1003 
1004 		if (dst_metrics_read_only(&ort->dst))
1005 			new->_metrics = ort->dst._metrics;
1006 		else
1007 			dst_copy_metrics(new, &ort->dst);
1008 		rt->rt6i_idev = ort->rt6i_idev;
1009 		if (rt->rt6i_idev)
1010 			in6_dev_hold(rt->rt6i_idev);
1011 
1012 		rt->rt6i_gateway = ort->rt6i_gateway;
1013 		rt->rt6i_flags = ort->rt6i_flags;
1014 		rt->rt6i_metric = 0;
1015 
1016 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1017 #ifdef CONFIG_IPV6_SUBTREES
1018 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1019 #endif
1020 
1021 		dst_free(new);
1022 	}
1023 
1024 	dst_release(dst_orig);
1025 	return new ? new : ERR_PTR(-ENOMEM);
1026 }
1027 
1028 /*
1029  *	Destination cache support functions
1030  */
1031 
1032 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1033 {
1034 	struct rt6_info *rt;
1035 
1036 	rt = (struct rt6_info *) dst;
1037 
1038 	/* All IPV6 dsts are created with ->obsolete set to the value
1039 	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1040 	 * into this function always.
1041 	 */
1042 	if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev)))
1043 		return NULL;
1044 
1045 	if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
1046 		return dst;
1047 
1048 	return NULL;
1049 }
1050 
1051 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1052 {
1053 	struct rt6_info *rt = (struct rt6_info *) dst;
1054 
1055 	if (rt) {
1056 		if (rt->rt6i_flags & RTF_CACHE) {
1057 			if (rt6_check_expired(rt)) {
1058 				ip6_del_rt(rt);
1059 				dst = NULL;
1060 			}
1061 		} else {
1062 			dst_release(dst);
1063 			dst = NULL;
1064 		}
1065 	}
1066 	return dst;
1067 }
1068 
1069 static void ip6_link_failure(struct sk_buff *skb)
1070 {
1071 	struct rt6_info *rt;
1072 
1073 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1074 
1075 	rt = (struct rt6_info *) skb_dst(skb);
1076 	if (rt) {
1077 		if (rt->rt6i_flags & RTF_CACHE)
1078 			rt6_update_expires(rt, 0);
1079 		else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1080 			rt->rt6i_node->fn_sernum = -1;
1081 	}
1082 }
1083 
1084 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1085 			       struct sk_buff *skb, u32 mtu)
1086 {
1087 	struct rt6_info *rt6 = (struct rt6_info*)dst;
1088 
1089 	dst_confirm(dst);
1090 	if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1091 		struct net *net = dev_net(dst->dev);
1092 
1093 		rt6->rt6i_flags |= RTF_MODIFIED;
1094 		if (mtu < IPV6_MIN_MTU) {
1095 			u32 features = dst_metric(dst, RTAX_FEATURES);
1096 			mtu = IPV6_MIN_MTU;
1097 			features |= RTAX_FEATURE_ALLFRAG;
1098 			dst_metric_set(dst, RTAX_FEATURES, features);
1099 		}
1100 		dst_metric_set(dst, RTAX_MTU, mtu);
1101 		rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1102 	}
1103 }
1104 
1105 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1106 		     int oif, u32 mark)
1107 {
1108 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1109 	struct dst_entry *dst;
1110 	struct flowi6 fl6;
1111 
1112 	memset(&fl6, 0, sizeof(fl6));
1113 	fl6.flowi6_oif = oif;
1114 	fl6.flowi6_mark = mark;
1115 	fl6.flowi6_flags = 0;
1116 	fl6.daddr = iph->daddr;
1117 	fl6.saddr = iph->saddr;
1118 	fl6.flowlabel = ip6_flowinfo(iph);
1119 
1120 	dst = ip6_route_output(net, NULL, &fl6);
1121 	if (!dst->error)
1122 		ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1123 	dst_release(dst);
1124 }
1125 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1126 
1127 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1128 {
1129 	ip6_update_pmtu(skb, sock_net(sk), mtu,
1130 			sk->sk_bound_dev_if, sk->sk_mark);
1131 }
1132 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1133 
1134 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1135 {
1136 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1137 	struct dst_entry *dst;
1138 	struct flowi6 fl6;
1139 
1140 	memset(&fl6, 0, sizeof(fl6));
1141 	fl6.flowi6_oif = oif;
1142 	fl6.flowi6_mark = mark;
1143 	fl6.flowi6_flags = 0;
1144 	fl6.daddr = iph->daddr;
1145 	fl6.saddr = iph->saddr;
1146 	fl6.flowlabel = ip6_flowinfo(iph);
1147 
1148 	dst = ip6_route_output(net, NULL, &fl6);
1149 	if (!dst->error)
1150 		rt6_do_redirect(dst, NULL, skb);
1151 	dst_release(dst);
1152 }
1153 EXPORT_SYMBOL_GPL(ip6_redirect);
1154 
1155 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1156 {
1157 	ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1158 }
1159 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1160 
1161 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1162 {
1163 	struct net_device *dev = dst->dev;
1164 	unsigned int mtu = dst_mtu(dst);
1165 	struct net *net = dev_net(dev);
1166 
1167 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1168 
1169 	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1170 		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1171 
1172 	/*
1173 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1174 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1175 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
1176 	 * rely only on pmtu discovery"
1177 	 */
1178 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1179 		mtu = IPV6_MAXPLEN;
1180 	return mtu;
1181 }
1182 
1183 static unsigned int ip6_mtu(const struct dst_entry *dst)
1184 {
1185 	struct inet6_dev *idev;
1186 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1187 
1188 	if (mtu)
1189 		return mtu;
1190 
1191 	mtu = IPV6_MIN_MTU;
1192 
1193 	rcu_read_lock();
1194 	idev = __in6_dev_get(dst->dev);
1195 	if (idev)
1196 		mtu = idev->cnf.mtu6;
1197 	rcu_read_unlock();
1198 
1199 	return mtu;
1200 }
1201 
1202 static struct dst_entry *icmp6_dst_gc_list;
1203 static DEFINE_SPINLOCK(icmp6_dst_lock);
1204 
1205 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1206 				  struct flowi6 *fl6)
1207 {
1208 	struct dst_entry *dst;
1209 	struct rt6_info *rt;
1210 	struct inet6_dev *idev = in6_dev_get(dev);
1211 	struct net *net = dev_net(dev);
1212 
1213 	if (unlikely(!idev))
1214 		return ERR_PTR(-ENODEV);
1215 
1216 	rt = ip6_dst_alloc(net, dev, 0, NULL);
1217 	if (unlikely(!rt)) {
1218 		in6_dev_put(idev);
1219 		dst = ERR_PTR(-ENOMEM);
1220 		goto out;
1221 	}
1222 
1223 	rt->dst.flags |= DST_HOST;
1224 	rt->dst.output  = ip6_output;
1225 	atomic_set(&rt->dst.__refcnt, 1);
1226 	rt->rt6i_dst.addr = fl6->daddr;
1227 	rt->rt6i_dst.plen = 128;
1228 	rt->rt6i_idev     = idev;
1229 	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1230 
1231 	spin_lock_bh(&icmp6_dst_lock);
1232 	rt->dst.next = icmp6_dst_gc_list;
1233 	icmp6_dst_gc_list = &rt->dst;
1234 	spin_unlock_bh(&icmp6_dst_lock);
1235 
1236 	fib6_force_start_gc(net);
1237 
1238 	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1239 
1240 out:
1241 	return dst;
1242 }
1243 
1244 int icmp6_dst_gc(void)
1245 {
1246 	struct dst_entry *dst, **pprev;
1247 	int more = 0;
1248 
1249 	spin_lock_bh(&icmp6_dst_lock);
1250 	pprev = &icmp6_dst_gc_list;
1251 
1252 	while ((dst = *pprev) != NULL) {
1253 		if (!atomic_read(&dst->__refcnt)) {
1254 			*pprev = dst->next;
1255 			dst_free(dst);
1256 		} else {
1257 			pprev = &dst->next;
1258 			++more;
1259 		}
1260 	}
1261 
1262 	spin_unlock_bh(&icmp6_dst_lock);
1263 
1264 	return more;
1265 }
1266 
1267 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1268 			    void *arg)
1269 {
1270 	struct dst_entry *dst, **pprev;
1271 
1272 	spin_lock_bh(&icmp6_dst_lock);
1273 	pprev = &icmp6_dst_gc_list;
1274 	while ((dst = *pprev) != NULL) {
1275 		struct rt6_info *rt = (struct rt6_info *) dst;
1276 		if (func(rt, arg)) {
1277 			*pprev = dst->next;
1278 			dst_free(dst);
1279 		} else {
1280 			pprev = &dst->next;
1281 		}
1282 	}
1283 	spin_unlock_bh(&icmp6_dst_lock);
1284 }
1285 
1286 static int ip6_dst_gc(struct dst_ops *ops)
1287 {
1288 	unsigned long now = jiffies;
1289 	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1290 	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1291 	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1292 	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1293 	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1294 	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1295 	int entries;
1296 
1297 	entries = dst_entries_get_fast(ops);
1298 	if (time_after(rt_last_gc + rt_min_interval, now) &&
1299 	    entries <= rt_max_size)
1300 		goto out;
1301 
1302 	net->ipv6.ip6_rt_gc_expire++;
1303 	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1304 	net->ipv6.ip6_rt_last_gc = now;
1305 	entries = dst_entries_get_slow(ops);
1306 	if (entries < ops->gc_thresh)
1307 		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1308 out:
1309 	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1310 	return entries > rt_max_size;
1311 }
1312 
1313 int ip6_dst_hoplimit(struct dst_entry *dst)
1314 {
1315 	int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1316 	if (hoplimit == 0) {
1317 		struct net_device *dev = dst->dev;
1318 		struct inet6_dev *idev;
1319 
1320 		rcu_read_lock();
1321 		idev = __in6_dev_get(dev);
1322 		if (idev)
1323 			hoplimit = idev->cnf.hop_limit;
1324 		else
1325 			hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1326 		rcu_read_unlock();
1327 	}
1328 	return hoplimit;
1329 }
1330 EXPORT_SYMBOL(ip6_dst_hoplimit);
1331 
1332 /*
1333  *
1334  */
1335 
1336 int ip6_route_add(struct fib6_config *cfg)
1337 {
1338 	int err;
1339 	struct net *net = cfg->fc_nlinfo.nl_net;
1340 	struct rt6_info *rt = NULL;
1341 	struct net_device *dev = NULL;
1342 	struct inet6_dev *idev = NULL;
1343 	struct fib6_table *table;
1344 	int addr_type;
1345 
1346 	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1347 		return -EINVAL;
1348 #ifndef CONFIG_IPV6_SUBTREES
1349 	if (cfg->fc_src_len)
1350 		return -EINVAL;
1351 #endif
1352 	if (cfg->fc_ifindex) {
1353 		err = -ENODEV;
1354 		dev = dev_get_by_index(net, cfg->fc_ifindex);
1355 		if (!dev)
1356 			goto out;
1357 		idev = in6_dev_get(dev);
1358 		if (!idev)
1359 			goto out;
1360 	}
1361 
1362 	if (cfg->fc_metric == 0)
1363 		cfg->fc_metric = IP6_RT_PRIO_USER;
1364 
1365 	err = -ENOBUFS;
1366 	if (cfg->fc_nlinfo.nlh &&
1367 	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1368 		table = fib6_get_table(net, cfg->fc_table);
1369 		if (!table) {
1370 			pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1371 			table = fib6_new_table(net, cfg->fc_table);
1372 		}
1373 	} else {
1374 		table = fib6_new_table(net, cfg->fc_table);
1375 	}
1376 
1377 	if (!table)
1378 		goto out;
1379 
1380 	rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
1381 
1382 	if (!rt) {
1383 		err = -ENOMEM;
1384 		goto out;
1385 	}
1386 
1387 	if (cfg->fc_flags & RTF_EXPIRES)
1388 		rt6_set_expires(rt, jiffies +
1389 				clock_t_to_jiffies(cfg->fc_expires));
1390 	else
1391 		rt6_clean_expires(rt);
1392 
1393 	if (cfg->fc_protocol == RTPROT_UNSPEC)
1394 		cfg->fc_protocol = RTPROT_BOOT;
1395 	rt->rt6i_protocol = cfg->fc_protocol;
1396 
1397 	addr_type = ipv6_addr_type(&cfg->fc_dst);
1398 
1399 	if (addr_type & IPV6_ADDR_MULTICAST)
1400 		rt->dst.input = ip6_mc_input;
1401 	else if (cfg->fc_flags & RTF_LOCAL)
1402 		rt->dst.input = ip6_input;
1403 	else
1404 		rt->dst.input = ip6_forward;
1405 
1406 	rt->dst.output = ip6_output;
1407 
1408 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1409 	rt->rt6i_dst.plen = cfg->fc_dst_len;
1410 	if (rt->rt6i_dst.plen == 128)
1411 	       rt->dst.flags |= DST_HOST;
1412 
1413 	if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1414 		u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1415 		if (!metrics) {
1416 			err = -ENOMEM;
1417 			goto out;
1418 		}
1419 		dst_init_metrics(&rt->dst, metrics, 0);
1420 	}
1421 #ifdef CONFIG_IPV6_SUBTREES
1422 	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1423 	rt->rt6i_src.plen = cfg->fc_src_len;
1424 #endif
1425 
1426 	rt->rt6i_metric = cfg->fc_metric;
1427 
1428 	/* We cannot add true routes via loopback here,
1429 	   they would result in kernel looping; promote them to reject routes
1430 	 */
1431 	if ((cfg->fc_flags & RTF_REJECT) ||
1432 	    (dev && (dev->flags & IFF_LOOPBACK) &&
1433 	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
1434 	     !(cfg->fc_flags & RTF_LOCAL))) {
1435 		/* hold loopback dev/idev if we haven't done so. */
1436 		if (dev != net->loopback_dev) {
1437 			if (dev) {
1438 				dev_put(dev);
1439 				in6_dev_put(idev);
1440 			}
1441 			dev = net->loopback_dev;
1442 			dev_hold(dev);
1443 			idev = in6_dev_get(dev);
1444 			if (!idev) {
1445 				err = -ENODEV;
1446 				goto out;
1447 			}
1448 		}
1449 		rt->dst.output = ip6_pkt_discard_out;
1450 		rt->dst.input = ip6_pkt_discard;
1451 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1452 		switch (cfg->fc_type) {
1453 		case RTN_BLACKHOLE:
1454 			rt->dst.error = -EINVAL;
1455 			break;
1456 		case RTN_PROHIBIT:
1457 			rt->dst.error = -EACCES;
1458 			break;
1459 		case RTN_THROW:
1460 			rt->dst.error = -EAGAIN;
1461 			break;
1462 		default:
1463 			rt->dst.error = -ENETUNREACH;
1464 			break;
1465 		}
1466 		goto install_route;
1467 	}
1468 
1469 	if (cfg->fc_flags & RTF_GATEWAY) {
1470 		const struct in6_addr *gw_addr;
1471 		int gwa_type;
1472 
1473 		gw_addr = &cfg->fc_gateway;
1474 		rt->rt6i_gateway = *gw_addr;
1475 		gwa_type = ipv6_addr_type(gw_addr);
1476 
1477 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1478 			struct rt6_info *grt;
1479 
1480 			/* IPv6 strictly inhibits using not link-local
1481 			   addresses as nexthop address.
1482 			   Otherwise, router will not able to send redirects.
1483 			   It is very good, but in some (rare!) circumstances
1484 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
1485 			   some exceptions. --ANK
1486 			 */
1487 			err = -EINVAL;
1488 			if (!(gwa_type & IPV6_ADDR_UNICAST))
1489 				goto out;
1490 
1491 			grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1492 
1493 			err = -EHOSTUNREACH;
1494 			if (!grt)
1495 				goto out;
1496 			if (dev) {
1497 				if (dev != grt->dst.dev) {
1498 					ip6_rt_put(grt);
1499 					goto out;
1500 				}
1501 			} else {
1502 				dev = grt->dst.dev;
1503 				idev = grt->rt6i_idev;
1504 				dev_hold(dev);
1505 				in6_dev_hold(grt->rt6i_idev);
1506 			}
1507 			if (!(grt->rt6i_flags & RTF_GATEWAY))
1508 				err = 0;
1509 			ip6_rt_put(grt);
1510 
1511 			if (err)
1512 				goto out;
1513 		}
1514 		err = -EINVAL;
1515 		if (!dev || (dev->flags & IFF_LOOPBACK))
1516 			goto out;
1517 	}
1518 
1519 	err = -ENODEV;
1520 	if (!dev)
1521 		goto out;
1522 
1523 	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1524 		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1525 			err = -EINVAL;
1526 			goto out;
1527 		}
1528 		rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1529 		rt->rt6i_prefsrc.plen = 128;
1530 	} else
1531 		rt->rt6i_prefsrc.plen = 0;
1532 
1533 	rt->rt6i_flags = cfg->fc_flags;
1534 
1535 install_route:
1536 	if (cfg->fc_mx) {
1537 		struct nlattr *nla;
1538 		int remaining;
1539 
1540 		nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1541 			int type = nla_type(nla);
1542 
1543 			if (type) {
1544 				if (type > RTAX_MAX) {
1545 					err = -EINVAL;
1546 					goto out;
1547 				}
1548 
1549 				dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1550 			}
1551 		}
1552 	}
1553 
1554 	rt->dst.dev = dev;
1555 	rt->rt6i_idev = idev;
1556 	rt->rt6i_table = table;
1557 
1558 	cfg->fc_nlinfo.nl_net = dev_net(dev);
1559 
1560 	return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1561 
1562 out:
1563 	if (dev)
1564 		dev_put(dev);
1565 	if (idev)
1566 		in6_dev_put(idev);
1567 	if (rt)
1568 		dst_free(&rt->dst);
1569 	return err;
1570 }
1571 
1572 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1573 {
1574 	int err;
1575 	struct fib6_table *table;
1576 	struct net *net = dev_net(rt->dst.dev);
1577 
1578 	if (rt == net->ipv6.ip6_null_entry) {
1579 		err = -ENOENT;
1580 		goto out;
1581 	}
1582 
1583 	table = rt->rt6i_table;
1584 	write_lock_bh(&table->tb6_lock);
1585 	err = fib6_del(rt, info);
1586 	write_unlock_bh(&table->tb6_lock);
1587 
1588 out:
1589 	ip6_rt_put(rt);
1590 	return err;
1591 }
1592 
1593 int ip6_del_rt(struct rt6_info *rt)
1594 {
1595 	struct nl_info info = {
1596 		.nl_net = dev_net(rt->dst.dev),
1597 	};
1598 	return __ip6_del_rt(rt, &info);
1599 }
1600 
1601 static int ip6_route_del(struct fib6_config *cfg)
1602 {
1603 	struct fib6_table *table;
1604 	struct fib6_node *fn;
1605 	struct rt6_info *rt;
1606 	int err = -ESRCH;
1607 
1608 	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1609 	if (!table)
1610 		return err;
1611 
1612 	read_lock_bh(&table->tb6_lock);
1613 
1614 	fn = fib6_locate(&table->tb6_root,
1615 			 &cfg->fc_dst, cfg->fc_dst_len,
1616 			 &cfg->fc_src, cfg->fc_src_len);
1617 
1618 	if (fn) {
1619 		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1620 			if (cfg->fc_ifindex &&
1621 			    (!rt->dst.dev ||
1622 			     rt->dst.dev->ifindex != cfg->fc_ifindex))
1623 				continue;
1624 			if (cfg->fc_flags & RTF_GATEWAY &&
1625 			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1626 				continue;
1627 			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1628 				continue;
1629 			dst_hold(&rt->dst);
1630 			read_unlock_bh(&table->tb6_lock);
1631 
1632 			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1633 		}
1634 	}
1635 	read_unlock_bh(&table->tb6_lock);
1636 
1637 	return err;
1638 }
1639 
1640 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1641 {
1642 	struct net *net = dev_net(skb->dev);
1643 	struct netevent_redirect netevent;
1644 	struct rt6_info *rt, *nrt = NULL;
1645 	struct ndisc_options ndopts;
1646 	struct inet6_dev *in6_dev;
1647 	struct neighbour *neigh;
1648 	struct rd_msg *msg;
1649 	int optlen, on_link;
1650 	u8 *lladdr;
1651 
1652 	optlen = skb->tail - skb->transport_header;
1653 	optlen -= sizeof(*msg);
1654 
1655 	if (optlen < 0) {
1656 		net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1657 		return;
1658 	}
1659 
1660 	msg = (struct rd_msg *)icmp6_hdr(skb);
1661 
1662 	if (ipv6_addr_is_multicast(&msg->dest)) {
1663 		net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1664 		return;
1665 	}
1666 
1667 	on_link = 0;
1668 	if (ipv6_addr_equal(&msg->dest, &msg->target)) {
1669 		on_link = 1;
1670 	} else if (ipv6_addr_type(&msg->target) !=
1671 		   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1672 		net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1673 		return;
1674 	}
1675 
1676 	in6_dev = __in6_dev_get(skb->dev);
1677 	if (!in6_dev)
1678 		return;
1679 	if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1680 		return;
1681 
1682 	/* RFC2461 8.1:
1683 	 *	The IP source address of the Redirect MUST be the same as the current
1684 	 *	first-hop router for the specified ICMP Destination Address.
1685 	 */
1686 
1687 	if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
1688 		net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1689 		return;
1690 	}
1691 
1692 	lladdr = NULL;
1693 	if (ndopts.nd_opts_tgt_lladdr) {
1694 		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1695 					     skb->dev);
1696 		if (!lladdr) {
1697 			net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1698 			return;
1699 		}
1700 	}
1701 
1702 	rt = (struct rt6_info *) dst;
1703 	if (rt == net->ipv6.ip6_null_entry) {
1704 		net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1705 		return;
1706 	}
1707 
1708 	/* Redirect received -> path was valid.
1709 	 * Look, redirects are sent only in response to data packets,
1710 	 * so that this nexthop apparently is reachable. --ANK
1711 	 */
1712 	dst_confirm(&rt->dst);
1713 
1714 	neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
1715 	if (!neigh)
1716 		return;
1717 
1718 	/*
1719 	 *	We have finally decided to accept it.
1720 	 */
1721 
1722 	neigh_update(neigh, lladdr, NUD_STALE,
1723 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
1724 		     NEIGH_UPDATE_F_OVERRIDE|
1725 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1726 				     NEIGH_UPDATE_F_ISROUTER))
1727 		     );
1728 
1729 	nrt = ip6_rt_copy(rt, &msg->dest);
1730 	if (!nrt)
1731 		goto out;
1732 
1733 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1734 	if (on_link)
1735 		nrt->rt6i_flags &= ~RTF_GATEWAY;
1736 
1737 	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1738 
1739 	if (ip6_ins_rt(nrt))
1740 		goto out;
1741 
1742 	netevent.old = &rt->dst;
1743 	netevent.new = &nrt->dst;
1744 	netevent.daddr = &msg->dest;
1745 	netevent.neigh = neigh;
1746 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1747 
1748 	if (rt->rt6i_flags & RTF_CACHE) {
1749 		rt = (struct rt6_info *) dst_clone(&rt->dst);
1750 		ip6_del_rt(rt);
1751 	}
1752 
1753 out:
1754 	neigh_release(neigh);
1755 }
1756 
1757 /*
1758  *	Misc support functions
1759  */
1760 
1761 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1762 				    const struct in6_addr *dest)
1763 {
1764 	struct net *net = dev_net(ort->dst.dev);
1765 	struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1766 					    ort->rt6i_table);
1767 
1768 	if (rt) {
1769 		rt->dst.input = ort->dst.input;
1770 		rt->dst.output = ort->dst.output;
1771 		rt->dst.flags |= DST_HOST;
1772 
1773 		rt->rt6i_dst.addr = *dest;
1774 		rt->rt6i_dst.plen = 128;
1775 		dst_copy_metrics(&rt->dst, &ort->dst);
1776 		rt->dst.error = ort->dst.error;
1777 		rt->rt6i_idev = ort->rt6i_idev;
1778 		if (rt->rt6i_idev)
1779 			in6_dev_hold(rt->rt6i_idev);
1780 		rt->dst.lastuse = jiffies;
1781 
1782 		rt->rt6i_gateway = ort->rt6i_gateway;
1783 		rt->rt6i_flags = ort->rt6i_flags;
1784 		if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1785 		    (RTF_DEFAULT | RTF_ADDRCONF))
1786 			rt6_set_from(rt, ort);
1787 		rt->rt6i_metric = 0;
1788 
1789 #ifdef CONFIG_IPV6_SUBTREES
1790 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1791 #endif
1792 		memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1793 		rt->rt6i_table = ort->rt6i_table;
1794 	}
1795 	return rt;
1796 }
1797 
1798 #ifdef CONFIG_IPV6_ROUTE_INFO
1799 static struct rt6_info *rt6_get_route_info(struct net *net,
1800 					   const struct in6_addr *prefix, int prefixlen,
1801 					   const struct in6_addr *gwaddr, int ifindex)
1802 {
1803 	struct fib6_node *fn;
1804 	struct rt6_info *rt = NULL;
1805 	struct fib6_table *table;
1806 
1807 	table = fib6_get_table(net, RT6_TABLE_INFO);
1808 	if (!table)
1809 		return NULL;
1810 
1811 	read_lock_bh(&table->tb6_lock);
1812 	fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1813 	if (!fn)
1814 		goto out;
1815 
1816 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1817 		if (rt->dst.dev->ifindex != ifindex)
1818 			continue;
1819 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1820 			continue;
1821 		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1822 			continue;
1823 		dst_hold(&rt->dst);
1824 		break;
1825 	}
1826 out:
1827 	read_unlock_bh(&table->tb6_lock);
1828 	return rt;
1829 }
1830 
1831 static struct rt6_info *rt6_add_route_info(struct net *net,
1832 					   const struct in6_addr *prefix, int prefixlen,
1833 					   const struct in6_addr *gwaddr, int ifindex,
1834 					   unsigned int pref)
1835 {
1836 	struct fib6_config cfg = {
1837 		.fc_table	= RT6_TABLE_INFO,
1838 		.fc_metric	= IP6_RT_PRIO_USER,
1839 		.fc_ifindex	= ifindex,
1840 		.fc_dst_len	= prefixlen,
1841 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1842 				  RTF_UP | RTF_PREF(pref),
1843 		.fc_nlinfo.portid = 0,
1844 		.fc_nlinfo.nlh = NULL,
1845 		.fc_nlinfo.nl_net = net,
1846 	};
1847 
1848 	cfg.fc_dst = *prefix;
1849 	cfg.fc_gateway = *gwaddr;
1850 
1851 	/* We should treat it as a default route if prefix length is 0. */
1852 	if (!prefixlen)
1853 		cfg.fc_flags |= RTF_DEFAULT;
1854 
1855 	ip6_route_add(&cfg);
1856 
1857 	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1858 }
1859 #endif
1860 
1861 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1862 {
1863 	struct rt6_info *rt;
1864 	struct fib6_table *table;
1865 
1866 	table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1867 	if (!table)
1868 		return NULL;
1869 
1870 	read_lock_bh(&table->tb6_lock);
1871 	for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1872 		if (dev == rt->dst.dev &&
1873 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1874 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
1875 			break;
1876 	}
1877 	if (rt)
1878 		dst_hold(&rt->dst);
1879 	read_unlock_bh(&table->tb6_lock);
1880 	return rt;
1881 }
1882 
1883 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1884 				     struct net_device *dev,
1885 				     unsigned int pref)
1886 {
1887 	struct fib6_config cfg = {
1888 		.fc_table	= RT6_TABLE_DFLT,
1889 		.fc_metric	= IP6_RT_PRIO_USER,
1890 		.fc_ifindex	= dev->ifindex,
1891 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1892 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1893 		.fc_nlinfo.portid = 0,
1894 		.fc_nlinfo.nlh = NULL,
1895 		.fc_nlinfo.nl_net = dev_net(dev),
1896 	};
1897 
1898 	cfg.fc_gateway = *gwaddr;
1899 
1900 	ip6_route_add(&cfg);
1901 
1902 	return rt6_get_dflt_router(gwaddr, dev);
1903 }
1904 
1905 void rt6_purge_dflt_routers(struct net *net)
1906 {
1907 	struct rt6_info *rt;
1908 	struct fib6_table *table;
1909 
1910 	/* NOTE: Keep consistent with rt6_get_dflt_router */
1911 	table = fib6_get_table(net, RT6_TABLE_DFLT);
1912 	if (!table)
1913 		return;
1914 
1915 restart:
1916 	read_lock_bh(&table->tb6_lock);
1917 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1918 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1919 			dst_hold(&rt->dst);
1920 			read_unlock_bh(&table->tb6_lock);
1921 			ip6_del_rt(rt);
1922 			goto restart;
1923 		}
1924 	}
1925 	read_unlock_bh(&table->tb6_lock);
1926 }
1927 
1928 static void rtmsg_to_fib6_config(struct net *net,
1929 				 struct in6_rtmsg *rtmsg,
1930 				 struct fib6_config *cfg)
1931 {
1932 	memset(cfg, 0, sizeof(*cfg));
1933 
1934 	cfg->fc_table = RT6_TABLE_MAIN;
1935 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1936 	cfg->fc_metric = rtmsg->rtmsg_metric;
1937 	cfg->fc_expires = rtmsg->rtmsg_info;
1938 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1939 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
1940 	cfg->fc_flags = rtmsg->rtmsg_flags;
1941 
1942 	cfg->fc_nlinfo.nl_net = net;
1943 
1944 	cfg->fc_dst = rtmsg->rtmsg_dst;
1945 	cfg->fc_src = rtmsg->rtmsg_src;
1946 	cfg->fc_gateway = rtmsg->rtmsg_gateway;
1947 }
1948 
1949 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1950 {
1951 	struct fib6_config cfg;
1952 	struct in6_rtmsg rtmsg;
1953 	int err;
1954 
1955 	switch(cmd) {
1956 	case SIOCADDRT:		/* Add a route */
1957 	case SIOCDELRT:		/* Delete a route */
1958 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1959 			return -EPERM;
1960 		err = copy_from_user(&rtmsg, arg,
1961 				     sizeof(struct in6_rtmsg));
1962 		if (err)
1963 			return -EFAULT;
1964 
1965 		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1966 
1967 		rtnl_lock();
1968 		switch (cmd) {
1969 		case SIOCADDRT:
1970 			err = ip6_route_add(&cfg);
1971 			break;
1972 		case SIOCDELRT:
1973 			err = ip6_route_del(&cfg);
1974 			break;
1975 		default:
1976 			err = -EINVAL;
1977 		}
1978 		rtnl_unlock();
1979 
1980 		return err;
1981 	}
1982 
1983 	return -EINVAL;
1984 }
1985 
1986 /*
1987  *	Drop the packet on the floor
1988  */
1989 
1990 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1991 {
1992 	int type;
1993 	struct dst_entry *dst = skb_dst(skb);
1994 	switch (ipstats_mib_noroutes) {
1995 	case IPSTATS_MIB_INNOROUTES:
1996 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
1997 		if (type == IPV6_ADDR_ANY) {
1998 			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1999 				      IPSTATS_MIB_INADDRERRORS);
2000 			break;
2001 		}
2002 		/* FALLTHROUGH */
2003 	case IPSTATS_MIB_OUTNOROUTES:
2004 		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2005 			      ipstats_mib_noroutes);
2006 		break;
2007 	}
2008 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2009 	kfree_skb(skb);
2010 	return 0;
2011 }
2012 
2013 static int ip6_pkt_discard(struct sk_buff *skb)
2014 {
2015 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2016 }
2017 
2018 static int ip6_pkt_discard_out(struct sk_buff *skb)
2019 {
2020 	skb->dev = skb_dst(skb)->dev;
2021 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2022 }
2023 
2024 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2025 
2026 static int ip6_pkt_prohibit(struct sk_buff *skb)
2027 {
2028 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2029 }
2030 
2031 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2032 {
2033 	skb->dev = skb_dst(skb)->dev;
2034 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2035 }
2036 
2037 #endif
2038 
2039 /*
2040  *	Allocate a dst for local (unicast / anycast) address.
2041  */
2042 
2043 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2044 				    const struct in6_addr *addr,
2045 				    bool anycast)
2046 {
2047 	struct net *net = dev_net(idev->dev);
2048 	struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
2049 
2050 	if (!rt) {
2051 		net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
2052 		return ERR_PTR(-ENOMEM);
2053 	}
2054 
2055 	in6_dev_hold(idev);
2056 
2057 	rt->dst.flags |= DST_HOST;
2058 	rt->dst.input = ip6_input;
2059 	rt->dst.output = ip6_output;
2060 	rt->rt6i_idev = idev;
2061 
2062 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2063 	if (anycast)
2064 		rt->rt6i_flags |= RTF_ANYCAST;
2065 	else
2066 		rt->rt6i_flags |= RTF_LOCAL;
2067 
2068 	rt->rt6i_dst.addr = *addr;
2069 	rt->rt6i_dst.plen = 128;
2070 	rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2071 
2072 	atomic_set(&rt->dst.__refcnt, 1);
2073 
2074 	return rt;
2075 }
2076 
2077 int ip6_route_get_saddr(struct net *net,
2078 			struct rt6_info *rt,
2079 			const struct in6_addr *daddr,
2080 			unsigned int prefs,
2081 			struct in6_addr *saddr)
2082 {
2083 	struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2084 	int err = 0;
2085 	if (rt->rt6i_prefsrc.plen)
2086 		*saddr = rt->rt6i_prefsrc.addr;
2087 	else
2088 		err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2089 					 daddr, prefs, saddr);
2090 	return err;
2091 }
2092 
2093 /* remove deleted ip from prefsrc entries */
2094 struct arg_dev_net_ip {
2095 	struct net_device *dev;
2096 	struct net *net;
2097 	struct in6_addr *addr;
2098 };
2099 
2100 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2101 {
2102 	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2103 	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2104 	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2105 
2106 	if (((void *)rt->dst.dev == dev || !dev) &&
2107 	    rt != net->ipv6.ip6_null_entry &&
2108 	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2109 		/* remove prefsrc entry */
2110 		rt->rt6i_prefsrc.plen = 0;
2111 	}
2112 	return 0;
2113 }
2114 
2115 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2116 {
2117 	struct net *net = dev_net(ifp->idev->dev);
2118 	struct arg_dev_net_ip adni = {
2119 		.dev = ifp->idev->dev,
2120 		.net = net,
2121 		.addr = &ifp->addr,
2122 	};
2123 	fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2124 }
2125 
2126 struct arg_dev_net {
2127 	struct net_device *dev;
2128 	struct net *net;
2129 };
2130 
2131 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2132 {
2133 	const struct arg_dev_net *adn = arg;
2134 	const struct net_device *dev = adn->dev;
2135 
2136 	if ((rt->dst.dev == dev || !dev) &&
2137 	    rt != adn->net->ipv6.ip6_null_entry)
2138 		return -1;
2139 
2140 	return 0;
2141 }
2142 
2143 void rt6_ifdown(struct net *net, struct net_device *dev)
2144 {
2145 	struct arg_dev_net adn = {
2146 		.dev = dev,
2147 		.net = net,
2148 	};
2149 
2150 	fib6_clean_all(net, fib6_ifdown, 0, &adn);
2151 	icmp6_clean_all(fib6_ifdown, &adn);
2152 }
2153 
2154 struct rt6_mtu_change_arg {
2155 	struct net_device *dev;
2156 	unsigned int mtu;
2157 };
2158 
2159 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2160 {
2161 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2162 	struct inet6_dev *idev;
2163 
2164 	/* In IPv6 pmtu discovery is not optional,
2165 	   so that RTAX_MTU lock cannot disable it.
2166 	   We still use this lock to block changes
2167 	   caused by addrconf/ndisc.
2168 	*/
2169 
2170 	idev = __in6_dev_get(arg->dev);
2171 	if (!idev)
2172 		return 0;
2173 
2174 	/* For administrative MTU increase, there is no way to discover
2175 	   IPv6 PMTU increase, so PMTU increase should be updated here.
2176 	   Since RFC 1981 doesn't include administrative MTU increase
2177 	   update PMTU increase is a MUST. (i.e. jumbo frame)
2178 	 */
2179 	/*
2180 	   If new MTU is less than route PMTU, this new MTU will be the
2181 	   lowest MTU in the path, update the route PMTU to reflect PMTU
2182 	   decreases; if new MTU is greater than route PMTU, and the
2183 	   old MTU is the lowest MTU in the path, update the route PMTU
2184 	   to reflect the increase. In this case if the other nodes' MTU
2185 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
2186 	   PMTU discouvery.
2187 	 */
2188 	if (rt->dst.dev == arg->dev &&
2189 	    !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2190 	    (dst_mtu(&rt->dst) >= arg->mtu ||
2191 	     (dst_mtu(&rt->dst) < arg->mtu &&
2192 	      dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2193 		dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2194 	}
2195 	return 0;
2196 }
2197 
2198 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2199 {
2200 	struct rt6_mtu_change_arg arg = {
2201 		.dev = dev,
2202 		.mtu = mtu,
2203 	};
2204 
2205 	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2206 }
2207 
2208 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2209 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2210 	[RTA_OIF]               = { .type = NLA_U32 },
2211 	[RTA_IIF]		= { .type = NLA_U32 },
2212 	[RTA_PRIORITY]          = { .type = NLA_U32 },
2213 	[RTA_METRICS]           = { .type = NLA_NESTED },
2214 	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
2215 };
2216 
2217 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2218 			      struct fib6_config *cfg)
2219 {
2220 	struct rtmsg *rtm;
2221 	struct nlattr *tb[RTA_MAX+1];
2222 	int err;
2223 
2224 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2225 	if (err < 0)
2226 		goto errout;
2227 
2228 	err = -EINVAL;
2229 	rtm = nlmsg_data(nlh);
2230 	memset(cfg, 0, sizeof(*cfg));
2231 
2232 	cfg->fc_table = rtm->rtm_table;
2233 	cfg->fc_dst_len = rtm->rtm_dst_len;
2234 	cfg->fc_src_len = rtm->rtm_src_len;
2235 	cfg->fc_flags = RTF_UP;
2236 	cfg->fc_protocol = rtm->rtm_protocol;
2237 	cfg->fc_type = rtm->rtm_type;
2238 
2239 	if (rtm->rtm_type == RTN_UNREACHABLE ||
2240 	    rtm->rtm_type == RTN_BLACKHOLE ||
2241 	    rtm->rtm_type == RTN_PROHIBIT ||
2242 	    rtm->rtm_type == RTN_THROW)
2243 		cfg->fc_flags |= RTF_REJECT;
2244 
2245 	if (rtm->rtm_type == RTN_LOCAL)
2246 		cfg->fc_flags |= RTF_LOCAL;
2247 
2248 	cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2249 	cfg->fc_nlinfo.nlh = nlh;
2250 	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2251 
2252 	if (tb[RTA_GATEWAY]) {
2253 		nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2254 		cfg->fc_flags |= RTF_GATEWAY;
2255 	}
2256 
2257 	if (tb[RTA_DST]) {
2258 		int plen = (rtm->rtm_dst_len + 7) >> 3;
2259 
2260 		if (nla_len(tb[RTA_DST]) < plen)
2261 			goto errout;
2262 
2263 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2264 	}
2265 
2266 	if (tb[RTA_SRC]) {
2267 		int plen = (rtm->rtm_src_len + 7) >> 3;
2268 
2269 		if (nla_len(tb[RTA_SRC]) < plen)
2270 			goto errout;
2271 
2272 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2273 	}
2274 
2275 	if (tb[RTA_PREFSRC])
2276 		nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2277 
2278 	if (tb[RTA_OIF])
2279 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2280 
2281 	if (tb[RTA_PRIORITY])
2282 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2283 
2284 	if (tb[RTA_METRICS]) {
2285 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2286 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2287 	}
2288 
2289 	if (tb[RTA_TABLE])
2290 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2291 
2292 	if (tb[RTA_MULTIPATH]) {
2293 		cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2294 		cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2295 	}
2296 
2297 	err = 0;
2298 errout:
2299 	return err;
2300 }
2301 
2302 static int ip6_route_multipath(struct fib6_config *cfg, int add)
2303 {
2304 	struct fib6_config r_cfg;
2305 	struct rtnexthop *rtnh;
2306 	int remaining;
2307 	int attrlen;
2308 	int err = 0, last_err = 0;
2309 
2310 beginning:
2311 	rtnh = (struct rtnexthop *)cfg->fc_mp;
2312 	remaining = cfg->fc_mp_len;
2313 
2314 	/* Parse a Multipath Entry */
2315 	while (rtnh_ok(rtnh, remaining)) {
2316 		memcpy(&r_cfg, cfg, sizeof(*cfg));
2317 		if (rtnh->rtnh_ifindex)
2318 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2319 
2320 		attrlen = rtnh_attrlen(rtnh);
2321 		if (attrlen > 0) {
2322 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2323 
2324 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2325 			if (nla) {
2326 				nla_memcpy(&r_cfg.fc_gateway, nla, 16);
2327 				r_cfg.fc_flags |= RTF_GATEWAY;
2328 			}
2329 		}
2330 		err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
2331 		if (err) {
2332 			last_err = err;
2333 			/* If we are trying to remove a route, do not stop the
2334 			 * loop when ip6_route_del() fails (because next hop is
2335 			 * already gone), we should try to remove all next hops.
2336 			 */
2337 			if (add) {
2338 				/* If add fails, we should try to delete all
2339 				 * next hops that have been already added.
2340 				 */
2341 				add = 0;
2342 				goto beginning;
2343 			}
2344 		}
2345 		/* Because each route is added like a single route we remove
2346 		 * this flag after the first nexthop (if there is a collision,
2347 		 * we have already fail to add the first nexthop:
2348 		 * fib6_add_rt2node() has reject it).
2349 		 */
2350 		cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL;
2351 		rtnh = rtnh_next(rtnh, &remaining);
2352 	}
2353 
2354 	return last_err;
2355 }
2356 
2357 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2358 {
2359 	struct fib6_config cfg;
2360 	int err;
2361 
2362 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2363 	if (err < 0)
2364 		return err;
2365 
2366 	if (cfg.fc_mp)
2367 		return ip6_route_multipath(&cfg, 0);
2368 	else
2369 		return ip6_route_del(&cfg);
2370 }
2371 
2372 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2373 {
2374 	struct fib6_config cfg;
2375 	int err;
2376 
2377 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2378 	if (err < 0)
2379 		return err;
2380 
2381 	if (cfg.fc_mp)
2382 		return ip6_route_multipath(&cfg, 1);
2383 	else
2384 		return ip6_route_add(&cfg);
2385 }
2386 
2387 static inline size_t rt6_nlmsg_size(void)
2388 {
2389 	return NLMSG_ALIGN(sizeof(struct rtmsg))
2390 	       + nla_total_size(16) /* RTA_SRC */
2391 	       + nla_total_size(16) /* RTA_DST */
2392 	       + nla_total_size(16) /* RTA_GATEWAY */
2393 	       + nla_total_size(16) /* RTA_PREFSRC */
2394 	       + nla_total_size(4) /* RTA_TABLE */
2395 	       + nla_total_size(4) /* RTA_IIF */
2396 	       + nla_total_size(4) /* RTA_OIF */
2397 	       + nla_total_size(4) /* RTA_PRIORITY */
2398 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2399 	       + nla_total_size(sizeof(struct rta_cacheinfo));
2400 }
2401 
2402 static int rt6_fill_node(struct net *net,
2403 			 struct sk_buff *skb, struct rt6_info *rt,
2404 			 struct in6_addr *dst, struct in6_addr *src,
2405 			 int iif, int type, u32 portid, u32 seq,
2406 			 int prefix, int nowait, unsigned int flags)
2407 {
2408 	struct rtmsg *rtm;
2409 	struct nlmsghdr *nlh;
2410 	long expires;
2411 	u32 table;
2412 
2413 	if (prefix) {	/* user wants prefix routes only */
2414 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2415 			/* success since this is not a prefix route */
2416 			return 1;
2417 		}
2418 	}
2419 
2420 	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
2421 	if (!nlh)
2422 		return -EMSGSIZE;
2423 
2424 	rtm = nlmsg_data(nlh);
2425 	rtm->rtm_family = AF_INET6;
2426 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
2427 	rtm->rtm_src_len = rt->rt6i_src.plen;
2428 	rtm->rtm_tos = 0;
2429 	if (rt->rt6i_table)
2430 		table = rt->rt6i_table->tb6_id;
2431 	else
2432 		table = RT6_TABLE_UNSPEC;
2433 	rtm->rtm_table = table;
2434 	if (nla_put_u32(skb, RTA_TABLE, table))
2435 		goto nla_put_failure;
2436 	if (rt->rt6i_flags & RTF_REJECT) {
2437 		switch (rt->dst.error) {
2438 		case -EINVAL:
2439 			rtm->rtm_type = RTN_BLACKHOLE;
2440 			break;
2441 		case -EACCES:
2442 			rtm->rtm_type = RTN_PROHIBIT;
2443 			break;
2444 		case -EAGAIN:
2445 			rtm->rtm_type = RTN_THROW;
2446 			break;
2447 		default:
2448 			rtm->rtm_type = RTN_UNREACHABLE;
2449 			break;
2450 		}
2451 	}
2452 	else if (rt->rt6i_flags & RTF_LOCAL)
2453 		rtm->rtm_type = RTN_LOCAL;
2454 	else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2455 		rtm->rtm_type = RTN_LOCAL;
2456 	else
2457 		rtm->rtm_type = RTN_UNICAST;
2458 	rtm->rtm_flags = 0;
2459 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2460 	rtm->rtm_protocol = rt->rt6i_protocol;
2461 	if (rt->rt6i_flags & RTF_DYNAMIC)
2462 		rtm->rtm_protocol = RTPROT_REDIRECT;
2463 	else if (rt->rt6i_flags & RTF_ADDRCONF) {
2464 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2465 			rtm->rtm_protocol = RTPROT_RA;
2466 		else
2467 			rtm->rtm_protocol = RTPROT_KERNEL;
2468 	}
2469 
2470 	if (rt->rt6i_flags & RTF_CACHE)
2471 		rtm->rtm_flags |= RTM_F_CLONED;
2472 
2473 	if (dst) {
2474 		if (nla_put(skb, RTA_DST, 16, dst))
2475 			goto nla_put_failure;
2476 		rtm->rtm_dst_len = 128;
2477 	} else if (rtm->rtm_dst_len)
2478 		if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2479 			goto nla_put_failure;
2480 #ifdef CONFIG_IPV6_SUBTREES
2481 	if (src) {
2482 		if (nla_put(skb, RTA_SRC, 16, src))
2483 			goto nla_put_failure;
2484 		rtm->rtm_src_len = 128;
2485 	} else if (rtm->rtm_src_len &&
2486 		   nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2487 		goto nla_put_failure;
2488 #endif
2489 	if (iif) {
2490 #ifdef CONFIG_IPV6_MROUTE
2491 		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2492 			int err = ip6mr_get_route(net, skb, rtm, nowait);
2493 			if (err <= 0) {
2494 				if (!nowait) {
2495 					if (err == 0)
2496 						return 0;
2497 					goto nla_put_failure;
2498 				} else {
2499 					if (err == -EMSGSIZE)
2500 						goto nla_put_failure;
2501 				}
2502 			}
2503 		} else
2504 #endif
2505 			if (nla_put_u32(skb, RTA_IIF, iif))
2506 				goto nla_put_failure;
2507 	} else if (dst) {
2508 		struct in6_addr saddr_buf;
2509 		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2510 		    nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2511 			goto nla_put_failure;
2512 	}
2513 
2514 	if (rt->rt6i_prefsrc.plen) {
2515 		struct in6_addr saddr_buf;
2516 		saddr_buf = rt->rt6i_prefsrc.addr;
2517 		if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2518 			goto nla_put_failure;
2519 	}
2520 
2521 	if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2522 		goto nla_put_failure;
2523 
2524 	if (rt->rt6i_flags & RTF_GATEWAY) {
2525 		if (nla_put(skb, RTA_GATEWAY, 16, &rt->rt6i_gateway) < 0)
2526 			goto nla_put_failure;
2527 	}
2528 
2529 	if (rt->dst.dev &&
2530 	    nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2531 		goto nla_put_failure;
2532 	if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2533 		goto nla_put_failure;
2534 
2535 	expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2536 
2537 	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2538 		goto nla_put_failure;
2539 
2540 	return nlmsg_end(skb, nlh);
2541 
2542 nla_put_failure:
2543 	nlmsg_cancel(skb, nlh);
2544 	return -EMSGSIZE;
2545 }
2546 
2547 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2548 {
2549 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2550 	int prefix;
2551 
2552 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2553 		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2554 		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2555 	} else
2556 		prefix = 0;
2557 
2558 	return rt6_fill_node(arg->net,
2559 		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2560 		     NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
2561 		     prefix, 0, NLM_F_MULTI);
2562 }
2563 
2564 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2565 {
2566 	struct net *net = sock_net(in_skb->sk);
2567 	struct nlattr *tb[RTA_MAX+1];
2568 	struct rt6_info *rt;
2569 	struct sk_buff *skb;
2570 	struct rtmsg *rtm;
2571 	struct flowi6 fl6;
2572 	int err, iif = 0, oif = 0;
2573 
2574 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2575 	if (err < 0)
2576 		goto errout;
2577 
2578 	err = -EINVAL;
2579 	memset(&fl6, 0, sizeof(fl6));
2580 
2581 	if (tb[RTA_SRC]) {
2582 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2583 			goto errout;
2584 
2585 		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2586 	}
2587 
2588 	if (tb[RTA_DST]) {
2589 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2590 			goto errout;
2591 
2592 		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2593 	}
2594 
2595 	if (tb[RTA_IIF])
2596 		iif = nla_get_u32(tb[RTA_IIF]);
2597 
2598 	if (tb[RTA_OIF])
2599 		oif = nla_get_u32(tb[RTA_OIF]);
2600 
2601 	if (iif) {
2602 		struct net_device *dev;
2603 		int flags = 0;
2604 
2605 		dev = __dev_get_by_index(net, iif);
2606 		if (!dev) {
2607 			err = -ENODEV;
2608 			goto errout;
2609 		}
2610 
2611 		fl6.flowi6_iif = iif;
2612 
2613 		if (!ipv6_addr_any(&fl6.saddr))
2614 			flags |= RT6_LOOKUP_F_HAS_SADDR;
2615 
2616 		rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2617 							       flags);
2618 	} else {
2619 		fl6.flowi6_oif = oif;
2620 
2621 		rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2622 	}
2623 
2624 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2625 	if (!skb) {
2626 		ip6_rt_put(rt);
2627 		err = -ENOBUFS;
2628 		goto errout;
2629 	}
2630 
2631 	/* Reserve room for dummy headers, this skb can pass
2632 	   through good chunk of routing engine.
2633 	 */
2634 	skb_reset_mac_header(skb);
2635 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2636 
2637 	skb_dst_set(skb, &rt->dst);
2638 
2639 	err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2640 			    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
2641 			    nlh->nlmsg_seq, 0, 0, 0);
2642 	if (err < 0) {
2643 		kfree_skb(skb);
2644 		goto errout;
2645 	}
2646 
2647 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2648 errout:
2649 	return err;
2650 }
2651 
2652 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2653 {
2654 	struct sk_buff *skb;
2655 	struct net *net = info->nl_net;
2656 	u32 seq;
2657 	int err;
2658 
2659 	err = -ENOBUFS;
2660 	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2661 
2662 	skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2663 	if (!skb)
2664 		goto errout;
2665 
2666 	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2667 				event, info->portid, seq, 0, 0, 0);
2668 	if (err < 0) {
2669 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2670 		WARN_ON(err == -EMSGSIZE);
2671 		kfree_skb(skb);
2672 		goto errout;
2673 	}
2674 	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2675 		    info->nlh, gfp_any());
2676 	return;
2677 errout:
2678 	if (err < 0)
2679 		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2680 }
2681 
2682 static int ip6_route_dev_notify(struct notifier_block *this,
2683 				unsigned long event, void *data)
2684 {
2685 	struct net_device *dev = (struct net_device *)data;
2686 	struct net *net = dev_net(dev);
2687 
2688 	if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2689 		net->ipv6.ip6_null_entry->dst.dev = dev;
2690 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2691 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2692 		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2693 		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2694 		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2695 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2696 #endif
2697 	}
2698 
2699 	return NOTIFY_OK;
2700 }
2701 
2702 /*
2703  *	/proc
2704  */
2705 
2706 #ifdef CONFIG_PROC_FS
2707 
2708 struct rt6_proc_arg
2709 {
2710 	char *buffer;
2711 	int offset;
2712 	int length;
2713 	int skip;
2714 	int len;
2715 };
2716 
2717 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2718 {
2719 	struct seq_file *m = p_arg;
2720 
2721 	seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2722 
2723 #ifdef CONFIG_IPV6_SUBTREES
2724 	seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2725 #else
2726 	seq_puts(m, "00000000000000000000000000000000 00 ");
2727 #endif
2728 	if (rt->rt6i_flags & RTF_GATEWAY) {
2729 		seq_printf(m, "%pi6", &rt->rt6i_gateway);
2730 	} else {
2731 		seq_puts(m, "00000000000000000000000000000000");
2732 	}
2733 	seq_printf(m, " %08x %08x %08x %08x %8s\n",
2734 		   rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2735 		   rt->dst.__use, rt->rt6i_flags,
2736 		   rt->dst.dev ? rt->dst.dev->name : "");
2737 	return 0;
2738 }
2739 
2740 static int ipv6_route_show(struct seq_file *m, void *v)
2741 {
2742 	struct net *net = (struct net *)m->private;
2743 	fib6_clean_all_ro(net, rt6_info_route, 0, m);
2744 	return 0;
2745 }
2746 
2747 static int ipv6_route_open(struct inode *inode, struct file *file)
2748 {
2749 	return single_open_net(inode, file, ipv6_route_show);
2750 }
2751 
2752 static const struct file_operations ipv6_route_proc_fops = {
2753 	.owner		= THIS_MODULE,
2754 	.open		= ipv6_route_open,
2755 	.read		= seq_read,
2756 	.llseek		= seq_lseek,
2757 	.release	= single_release_net,
2758 };
2759 
2760 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2761 {
2762 	struct net *net = (struct net *)seq->private;
2763 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2764 		   net->ipv6.rt6_stats->fib_nodes,
2765 		   net->ipv6.rt6_stats->fib_route_nodes,
2766 		   net->ipv6.rt6_stats->fib_rt_alloc,
2767 		   net->ipv6.rt6_stats->fib_rt_entries,
2768 		   net->ipv6.rt6_stats->fib_rt_cache,
2769 		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2770 		   net->ipv6.rt6_stats->fib_discarded_routes);
2771 
2772 	return 0;
2773 }
2774 
2775 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2776 {
2777 	return single_open_net(inode, file, rt6_stats_seq_show);
2778 }
2779 
2780 static const struct file_operations rt6_stats_seq_fops = {
2781 	.owner	 = THIS_MODULE,
2782 	.open	 = rt6_stats_seq_open,
2783 	.read	 = seq_read,
2784 	.llseek	 = seq_lseek,
2785 	.release = single_release_net,
2786 };
2787 #endif	/* CONFIG_PROC_FS */
2788 
2789 #ifdef CONFIG_SYSCTL
2790 
2791 static
2792 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2793 			      void __user *buffer, size_t *lenp, loff_t *ppos)
2794 {
2795 	struct net *net;
2796 	int delay;
2797 	if (!write)
2798 		return -EINVAL;
2799 
2800 	net = (struct net *)ctl->extra1;
2801 	delay = net->ipv6.sysctl.flush_delay;
2802 	proc_dointvec(ctl, write, buffer, lenp, ppos);
2803 	fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2804 	return 0;
2805 }
2806 
2807 ctl_table ipv6_route_table_template[] = {
2808 	{
2809 		.procname	=	"flush",
2810 		.data		=	&init_net.ipv6.sysctl.flush_delay,
2811 		.maxlen		=	sizeof(int),
2812 		.mode		=	0200,
2813 		.proc_handler	=	ipv6_sysctl_rtcache_flush
2814 	},
2815 	{
2816 		.procname	=	"gc_thresh",
2817 		.data		=	&ip6_dst_ops_template.gc_thresh,
2818 		.maxlen		=	sizeof(int),
2819 		.mode		=	0644,
2820 		.proc_handler	=	proc_dointvec,
2821 	},
2822 	{
2823 		.procname	=	"max_size",
2824 		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
2825 		.maxlen		=	sizeof(int),
2826 		.mode		=	0644,
2827 		.proc_handler	=	proc_dointvec,
2828 	},
2829 	{
2830 		.procname	=	"gc_min_interval",
2831 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2832 		.maxlen		=	sizeof(int),
2833 		.mode		=	0644,
2834 		.proc_handler	=	proc_dointvec_jiffies,
2835 	},
2836 	{
2837 		.procname	=	"gc_timeout",
2838 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2839 		.maxlen		=	sizeof(int),
2840 		.mode		=	0644,
2841 		.proc_handler	=	proc_dointvec_jiffies,
2842 	},
2843 	{
2844 		.procname	=	"gc_interval",
2845 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
2846 		.maxlen		=	sizeof(int),
2847 		.mode		=	0644,
2848 		.proc_handler	=	proc_dointvec_jiffies,
2849 	},
2850 	{
2851 		.procname	=	"gc_elasticity",
2852 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2853 		.maxlen		=	sizeof(int),
2854 		.mode		=	0644,
2855 		.proc_handler	=	proc_dointvec,
2856 	},
2857 	{
2858 		.procname	=	"mtu_expires",
2859 		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2860 		.maxlen		=	sizeof(int),
2861 		.mode		=	0644,
2862 		.proc_handler	=	proc_dointvec_jiffies,
2863 	},
2864 	{
2865 		.procname	=	"min_adv_mss",
2866 		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
2867 		.maxlen		=	sizeof(int),
2868 		.mode		=	0644,
2869 		.proc_handler	=	proc_dointvec,
2870 	},
2871 	{
2872 		.procname	=	"gc_min_interval_ms",
2873 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2874 		.maxlen		=	sizeof(int),
2875 		.mode		=	0644,
2876 		.proc_handler	=	proc_dointvec_ms_jiffies,
2877 	},
2878 	{ }
2879 };
2880 
2881 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2882 {
2883 	struct ctl_table *table;
2884 
2885 	table = kmemdup(ipv6_route_table_template,
2886 			sizeof(ipv6_route_table_template),
2887 			GFP_KERNEL);
2888 
2889 	if (table) {
2890 		table[0].data = &net->ipv6.sysctl.flush_delay;
2891 		table[0].extra1 = net;
2892 		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2893 		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2894 		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2895 		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2896 		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2897 		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2898 		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2899 		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2900 		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2901 
2902 		/* Don't export sysctls to unprivileged users */
2903 		if (net->user_ns != &init_user_ns)
2904 			table[0].procname = NULL;
2905 	}
2906 
2907 	return table;
2908 }
2909 #endif
2910 
2911 static int __net_init ip6_route_net_init(struct net *net)
2912 {
2913 	int ret = -ENOMEM;
2914 
2915 	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2916 	       sizeof(net->ipv6.ip6_dst_ops));
2917 
2918 	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2919 		goto out_ip6_dst_ops;
2920 
2921 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2922 					   sizeof(*net->ipv6.ip6_null_entry),
2923 					   GFP_KERNEL);
2924 	if (!net->ipv6.ip6_null_entry)
2925 		goto out_ip6_dst_entries;
2926 	net->ipv6.ip6_null_entry->dst.path =
2927 		(struct dst_entry *)net->ipv6.ip6_null_entry;
2928 	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2929 	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2930 			 ip6_template_metrics, true);
2931 
2932 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2933 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2934 					       sizeof(*net->ipv6.ip6_prohibit_entry),
2935 					       GFP_KERNEL);
2936 	if (!net->ipv6.ip6_prohibit_entry)
2937 		goto out_ip6_null_entry;
2938 	net->ipv6.ip6_prohibit_entry->dst.path =
2939 		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2940 	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2941 	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2942 			 ip6_template_metrics, true);
2943 
2944 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2945 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
2946 					       GFP_KERNEL);
2947 	if (!net->ipv6.ip6_blk_hole_entry)
2948 		goto out_ip6_prohibit_entry;
2949 	net->ipv6.ip6_blk_hole_entry->dst.path =
2950 		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2951 	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2952 	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2953 			 ip6_template_metrics, true);
2954 #endif
2955 
2956 	net->ipv6.sysctl.flush_delay = 0;
2957 	net->ipv6.sysctl.ip6_rt_max_size = 4096;
2958 	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2959 	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2960 	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2961 	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2962 	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2963 	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2964 
2965 	net->ipv6.ip6_rt_gc_expire = 30*HZ;
2966 
2967 	ret = 0;
2968 out:
2969 	return ret;
2970 
2971 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2972 out_ip6_prohibit_entry:
2973 	kfree(net->ipv6.ip6_prohibit_entry);
2974 out_ip6_null_entry:
2975 	kfree(net->ipv6.ip6_null_entry);
2976 #endif
2977 out_ip6_dst_entries:
2978 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2979 out_ip6_dst_ops:
2980 	goto out;
2981 }
2982 
2983 static void __net_exit ip6_route_net_exit(struct net *net)
2984 {
2985 	kfree(net->ipv6.ip6_null_entry);
2986 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2987 	kfree(net->ipv6.ip6_prohibit_entry);
2988 	kfree(net->ipv6.ip6_blk_hole_entry);
2989 #endif
2990 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2991 }
2992 
2993 static int __net_init ip6_route_net_init_late(struct net *net)
2994 {
2995 #ifdef CONFIG_PROC_FS
2996 	proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
2997 	proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
2998 #endif
2999 	return 0;
3000 }
3001 
3002 static void __net_exit ip6_route_net_exit_late(struct net *net)
3003 {
3004 #ifdef CONFIG_PROC_FS
3005 	remove_proc_entry("ipv6_route", net->proc_net);
3006 	remove_proc_entry("rt6_stats", net->proc_net);
3007 #endif
3008 }
3009 
3010 static struct pernet_operations ip6_route_net_ops = {
3011 	.init = ip6_route_net_init,
3012 	.exit = ip6_route_net_exit,
3013 };
3014 
3015 static int __net_init ipv6_inetpeer_init(struct net *net)
3016 {
3017 	struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3018 
3019 	if (!bp)
3020 		return -ENOMEM;
3021 	inet_peer_base_init(bp);
3022 	net->ipv6.peers = bp;
3023 	return 0;
3024 }
3025 
3026 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3027 {
3028 	struct inet_peer_base *bp = net->ipv6.peers;
3029 
3030 	net->ipv6.peers = NULL;
3031 	inetpeer_invalidate_tree(bp);
3032 	kfree(bp);
3033 }
3034 
3035 static struct pernet_operations ipv6_inetpeer_ops = {
3036 	.init	=	ipv6_inetpeer_init,
3037 	.exit	=	ipv6_inetpeer_exit,
3038 };
3039 
3040 static struct pernet_operations ip6_route_net_late_ops = {
3041 	.init = ip6_route_net_init_late,
3042 	.exit = ip6_route_net_exit_late,
3043 };
3044 
3045 static struct notifier_block ip6_route_dev_notifier = {
3046 	.notifier_call = ip6_route_dev_notify,
3047 	.priority = 0,
3048 };
3049 
3050 int __init ip6_route_init(void)
3051 {
3052 	int ret;
3053 
3054 	ret = -ENOMEM;
3055 	ip6_dst_ops_template.kmem_cachep =
3056 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3057 				  SLAB_HWCACHE_ALIGN, NULL);
3058 	if (!ip6_dst_ops_template.kmem_cachep)
3059 		goto out;
3060 
3061 	ret = dst_entries_init(&ip6_dst_blackhole_ops);
3062 	if (ret)
3063 		goto out_kmem_cache;
3064 
3065 	ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3066 	if (ret)
3067 		goto out_dst_entries;
3068 
3069 	ret = register_pernet_subsys(&ip6_route_net_ops);
3070 	if (ret)
3071 		goto out_register_inetpeer;
3072 
3073 	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3074 
3075 	/* Registering of the loopback is done before this portion of code,
3076 	 * the loopback reference in rt6_info will not be taken, do it
3077 	 * manually for init_net */
3078 	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3079 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3080   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3081 	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3082 	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3083 	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3084 	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3085   #endif
3086 	ret = fib6_init();
3087 	if (ret)
3088 		goto out_register_subsys;
3089 
3090 	ret = xfrm6_init();
3091 	if (ret)
3092 		goto out_fib6_init;
3093 
3094 	ret = fib6_rules_init();
3095 	if (ret)
3096 		goto xfrm6_init;
3097 
3098 	ret = register_pernet_subsys(&ip6_route_net_late_ops);
3099 	if (ret)
3100 		goto fib6_rules_init;
3101 
3102 	ret = -ENOBUFS;
3103 	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3104 	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3105 	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3106 		goto out_register_late_subsys;
3107 
3108 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3109 	if (ret)
3110 		goto out_register_late_subsys;
3111 
3112 out:
3113 	return ret;
3114 
3115 out_register_late_subsys:
3116 	unregister_pernet_subsys(&ip6_route_net_late_ops);
3117 fib6_rules_init:
3118 	fib6_rules_cleanup();
3119 xfrm6_init:
3120 	xfrm6_fini();
3121 out_fib6_init:
3122 	fib6_gc_cleanup();
3123 out_register_subsys:
3124 	unregister_pernet_subsys(&ip6_route_net_ops);
3125 out_register_inetpeer:
3126 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3127 out_dst_entries:
3128 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3129 out_kmem_cache:
3130 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3131 	goto out;
3132 }
3133 
3134 void ip6_route_cleanup(void)
3135 {
3136 	unregister_netdevice_notifier(&ip6_route_dev_notifier);
3137 	unregister_pernet_subsys(&ip6_route_net_late_ops);
3138 	fib6_rules_cleanup();
3139 	xfrm6_fini();
3140 	fib6_gc_cleanup();
3141 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3142 	unregister_pernet_subsys(&ip6_route_net_ops);
3143 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3144 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3145 }
3146