xref: /openbmc/linux/net/ipv6/route.c (revision 161f4089)
1 /*
2  *	Linux INET6 implementation
3  *	FIB front-end.
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13 
14 /*	Changes:
15  *
16  *	YOSHIFUJI Hideaki @USAGI
17  *		reworked default router selection.
18  *		- respect outgoing interface
19  *		- select from (probably) reachable routers (i.e.
20  *		routers in REACHABLE, STALE, DELAY or PROBE states).
21  *		- always select the same router if it is (probably)
22  *		reachable.  otherwise, round-robin the list.
23  *	Ville Nuorvala
24  *		Fixed routing subtrees.
25  */
26 
27 #define pr_fmt(fmt) "IPv6: " fmt
28 
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/xfrm.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60 #include <net/nexthop.h>
61 
62 #include <asm/uaccess.h>
63 
64 #ifdef CONFIG_SYSCTL
65 #include <linux/sysctl.h>
66 #endif
67 
68 enum rt6_nud_state {
69 	RT6_NUD_FAIL_HARD = -2,
70 	RT6_NUD_FAIL_SOFT = -1,
71 	RT6_NUD_SUCCEED = 1
72 };
73 
74 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
75 				    const struct in6_addr *dest);
76 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
77 static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
78 static unsigned int	 ip6_mtu(const struct dst_entry *dst);
79 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
80 static void		ip6_dst_destroy(struct dst_entry *);
81 static void		ip6_dst_ifdown(struct dst_entry *,
82 				       struct net_device *dev, int how);
83 static int		 ip6_dst_gc(struct dst_ops *ops);
84 
85 static int		ip6_pkt_discard(struct sk_buff *skb);
86 static int		ip6_pkt_discard_out(struct sk_buff *skb);
87 static void		ip6_link_failure(struct sk_buff *skb);
88 static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
89 					   struct sk_buff *skb, u32 mtu);
90 static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
91 					struct sk_buff *skb);
92 static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
93 
94 #ifdef CONFIG_IPV6_ROUTE_INFO
95 static struct rt6_info *rt6_add_route_info(struct net *net,
96 					   const struct in6_addr *prefix, int prefixlen,
97 					   const struct in6_addr *gwaddr, int ifindex,
98 					   unsigned int pref);
99 static struct rt6_info *rt6_get_route_info(struct net *net,
100 					   const struct in6_addr *prefix, int prefixlen,
101 					   const struct in6_addr *gwaddr, int ifindex);
102 #endif
103 
104 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
105 {
106 	struct rt6_info *rt = (struct rt6_info *) dst;
107 	struct inet_peer *peer;
108 	u32 *p = NULL;
109 
110 	if (!(rt->dst.flags & DST_HOST))
111 		return NULL;
112 
113 	peer = rt6_get_peer_create(rt);
114 	if (peer) {
115 		u32 *old_p = __DST_METRICS_PTR(old);
116 		unsigned long prev, new;
117 
118 		p = peer->metrics;
119 		if (inet_metrics_new(peer))
120 			memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
121 
122 		new = (unsigned long) p;
123 		prev = cmpxchg(&dst->_metrics, old, new);
124 
125 		if (prev != old) {
126 			p = __DST_METRICS_PTR(prev);
127 			if (prev & DST_METRICS_READ_ONLY)
128 				p = NULL;
129 		}
130 	}
131 	return p;
132 }
133 
134 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
135 					     struct sk_buff *skb,
136 					     const void *daddr)
137 {
138 	struct in6_addr *p = &rt->rt6i_gateway;
139 
140 	if (!ipv6_addr_any(p))
141 		return (const void *) p;
142 	else if (skb)
143 		return &ipv6_hdr(skb)->daddr;
144 	return daddr;
145 }
146 
147 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
148 					  struct sk_buff *skb,
149 					  const void *daddr)
150 {
151 	struct rt6_info *rt = (struct rt6_info *) dst;
152 	struct neighbour *n;
153 
154 	daddr = choose_neigh_daddr(rt, skb, daddr);
155 	n = __ipv6_neigh_lookup(dst->dev, daddr);
156 	if (n)
157 		return n;
158 	return neigh_create(&nd_tbl, daddr, dst->dev);
159 }
160 
161 static struct dst_ops ip6_dst_ops_template = {
162 	.family			=	AF_INET6,
163 	.protocol		=	cpu_to_be16(ETH_P_IPV6),
164 	.gc			=	ip6_dst_gc,
165 	.gc_thresh		=	1024,
166 	.check			=	ip6_dst_check,
167 	.default_advmss		=	ip6_default_advmss,
168 	.mtu			=	ip6_mtu,
169 	.cow_metrics		=	ipv6_cow_metrics,
170 	.destroy		=	ip6_dst_destroy,
171 	.ifdown			=	ip6_dst_ifdown,
172 	.negative_advice	=	ip6_negative_advice,
173 	.link_failure		=	ip6_link_failure,
174 	.update_pmtu		=	ip6_rt_update_pmtu,
175 	.redirect		=	rt6_do_redirect,
176 	.local_out		=	__ip6_local_out,
177 	.neigh_lookup		=	ip6_neigh_lookup,
178 };
179 
180 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
181 {
182 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
183 
184 	return mtu ? : dst->dev->mtu;
185 }
186 
187 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
188 					 struct sk_buff *skb, u32 mtu)
189 {
190 }
191 
192 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
193 				      struct sk_buff *skb)
194 {
195 }
196 
197 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
198 					 unsigned long old)
199 {
200 	return NULL;
201 }
202 
203 static struct dst_ops ip6_dst_blackhole_ops = {
204 	.family			=	AF_INET6,
205 	.protocol		=	cpu_to_be16(ETH_P_IPV6),
206 	.destroy		=	ip6_dst_destroy,
207 	.check			=	ip6_dst_check,
208 	.mtu			=	ip6_blackhole_mtu,
209 	.default_advmss		=	ip6_default_advmss,
210 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
211 	.redirect		=	ip6_rt_blackhole_redirect,
212 	.cow_metrics		=	ip6_rt_blackhole_cow_metrics,
213 	.neigh_lookup		=	ip6_neigh_lookup,
214 };
215 
216 static const u32 ip6_template_metrics[RTAX_MAX] = {
217 	[RTAX_HOPLIMIT - 1] = 0,
218 };
219 
220 static const struct rt6_info ip6_null_entry_template = {
221 	.dst = {
222 		.__refcnt	= ATOMIC_INIT(1),
223 		.__use		= 1,
224 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
225 		.error		= -ENETUNREACH,
226 		.input		= ip6_pkt_discard,
227 		.output		= ip6_pkt_discard_out,
228 	},
229 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
230 	.rt6i_protocol  = RTPROT_KERNEL,
231 	.rt6i_metric	= ~(u32) 0,
232 	.rt6i_ref	= ATOMIC_INIT(1),
233 };
234 
235 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
236 
237 static int ip6_pkt_prohibit(struct sk_buff *skb);
238 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
239 
240 static const struct rt6_info ip6_prohibit_entry_template = {
241 	.dst = {
242 		.__refcnt	= ATOMIC_INIT(1),
243 		.__use		= 1,
244 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
245 		.error		= -EACCES,
246 		.input		= ip6_pkt_prohibit,
247 		.output		= ip6_pkt_prohibit_out,
248 	},
249 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
250 	.rt6i_protocol  = RTPROT_KERNEL,
251 	.rt6i_metric	= ~(u32) 0,
252 	.rt6i_ref	= ATOMIC_INIT(1),
253 };
254 
255 static const struct rt6_info ip6_blk_hole_entry_template = {
256 	.dst = {
257 		.__refcnt	= ATOMIC_INIT(1),
258 		.__use		= 1,
259 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
260 		.error		= -EINVAL,
261 		.input		= dst_discard,
262 		.output		= dst_discard,
263 	},
264 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
265 	.rt6i_protocol  = RTPROT_KERNEL,
266 	.rt6i_metric	= ~(u32) 0,
267 	.rt6i_ref	= ATOMIC_INIT(1),
268 };
269 
270 #endif
271 
272 /* allocate dst with ip6_dst_ops */
273 static inline struct rt6_info *ip6_dst_alloc(struct net *net,
274 					     struct net_device *dev,
275 					     int flags,
276 					     struct fib6_table *table)
277 {
278 	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
279 					0, DST_OBSOLETE_FORCE_CHK, flags);
280 
281 	if (rt) {
282 		struct dst_entry *dst = &rt->dst;
283 
284 		memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
285 		rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
286 		rt->rt6i_genid = rt_genid_ipv6(net);
287 		INIT_LIST_HEAD(&rt->rt6i_siblings);
288 	}
289 	return rt;
290 }
291 
292 static void ip6_dst_destroy(struct dst_entry *dst)
293 {
294 	struct rt6_info *rt = (struct rt6_info *)dst;
295 	struct inet6_dev *idev = rt->rt6i_idev;
296 	struct dst_entry *from = dst->from;
297 
298 	if (!(rt->dst.flags & DST_HOST))
299 		dst_destroy_metrics_generic(dst);
300 
301 	if (idev) {
302 		rt->rt6i_idev = NULL;
303 		in6_dev_put(idev);
304 	}
305 
306 	dst->from = NULL;
307 	dst_release(from);
308 
309 	if (rt6_has_peer(rt)) {
310 		struct inet_peer *peer = rt6_peer_ptr(rt);
311 		inet_putpeer(peer);
312 	}
313 }
314 
315 void rt6_bind_peer(struct rt6_info *rt, int create)
316 {
317 	struct inet_peer_base *base;
318 	struct inet_peer *peer;
319 
320 	base = inetpeer_base_ptr(rt->_rt6i_peer);
321 	if (!base)
322 		return;
323 
324 	peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
325 	if (peer) {
326 		if (!rt6_set_peer(rt, peer))
327 			inet_putpeer(peer);
328 	}
329 }
330 
331 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
332 			   int how)
333 {
334 	struct rt6_info *rt = (struct rt6_info *)dst;
335 	struct inet6_dev *idev = rt->rt6i_idev;
336 	struct net_device *loopback_dev =
337 		dev_net(dev)->loopback_dev;
338 
339 	if (dev != loopback_dev) {
340 		if (idev && idev->dev == dev) {
341 			struct inet6_dev *loopback_idev =
342 				in6_dev_get(loopback_dev);
343 			if (loopback_idev) {
344 				rt->rt6i_idev = loopback_idev;
345 				in6_dev_put(idev);
346 			}
347 		}
348 	}
349 }
350 
351 static bool rt6_check_expired(const struct rt6_info *rt)
352 {
353 	if (rt->rt6i_flags & RTF_EXPIRES) {
354 		if (time_after(jiffies, rt->dst.expires))
355 			return true;
356 	} else if (rt->dst.from) {
357 		return rt6_check_expired((struct rt6_info *) rt->dst.from);
358 	}
359 	return false;
360 }
361 
362 static bool rt6_need_strict(const struct in6_addr *daddr)
363 {
364 	return ipv6_addr_type(daddr) &
365 		(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
366 }
367 
368 /* Multipath route selection:
369  *   Hash based function using packet header and flowlabel.
370  * Adapted from fib_info_hashfn()
371  */
372 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
373 			       const struct flowi6 *fl6)
374 {
375 	unsigned int val = fl6->flowi6_proto;
376 
377 	val ^= ipv6_addr_hash(&fl6->daddr);
378 	val ^= ipv6_addr_hash(&fl6->saddr);
379 
380 	/* Work only if this not encapsulated */
381 	switch (fl6->flowi6_proto) {
382 	case IPPROTO_UDP:
383 	case IPPROTO_TCP:
384 	case IPPROTO_SCTP:
385 		val ^= (__force u16)fl6->fl6_sport;
386 		val ^= (__force u16)fl6->fl6_dport;
387 		break;
388 
389 	case IPPROTO_ICMPV6:
390 		val ^= (__force u16)fl6->fl6_icmp_type;
391 		val ^= (__force u16)fl6->fl6_icmp_code;
392 		break;
393 	}
394 	/* RFC6438 recommands to use flowlabel */
395 	val ^= (__force u32)fl6->flowlabel;
396 
397 	/* Perhaps, we need to tune, this function? */
398 	val = val ^ (val >> 7) ^ (val >> 12);
399 	return val % candidate_count;
400 }
401 
402 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
403 					     struct flowi6 *fl6, int oif,
404 					     int strict)
405 {
406 	struct rt6_info *sibling, *next_sibling;
407 	int route_choosen;
408 
409 	route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
410 	/* Don't change the route, if route_choosen == 0
411 	 * (siblings does not include ourself)
412 	 */
413 	if (route_choosen)
414 		list_for_each_entry_safe(sibling, next_sibling,
415 				&match->rt6i_siblings, rt6i_siblings) {
416 			route_choosen--;
417 			if (route_choosen == 0) {
418 				if (rt6_score_route(sibling, oif, strict) < 0)
419 					break;
420 				match = sibling;
421 				break;
422 			}
423 		}
424 	return match;
425 }
426 
427 /*
428  *	Route lookup. Any table->tb6_lock is implied.
429  */
430 
431 static inline struct rt6_info *rt6_device_match(struct net *net,
432 						    struct rt6_info *rt,
433 						    const struct in6_addr *saddr,
434 						    int oif,
435 						    int flags)
436 {
437 	struct rt6_info *local = NULL;
438 	struct rt6_info *sprt;
439 
440 	if (!oif && ipv6_addr_any(saddr))
441 		goto out;
442 
443 	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
444 		struct net_device *dev = sprt->dst.dev;
445 
446 		if (oif) {
447 			if (dev->ifindex == oif)
448 				return sprt;
449 			if (dev->flags & IFF_LOOPBACK) {
450 				if (!sprt->rt6i_idev ||
451 				    sprt->rt6i_idev->dev->ifindex != oif) {
452 					if (flags & RT6_LOOKUP_F_IFACE && oif)
453 						continue;
454 					if (local && (!oif ||
455 						      local->rt6i_idev->dev->ifindex == oif))
456 						continue;
457 				}
458 				local = sprt;
459 			}
460 		} else {
461 			if (ipv6_chk_addr(net, saddr, dev,
462 					  flags & RT6_LOOKUP_F_IFACE))
463 				return sprt;
464 		}
465 	}
466 
467 	if (oif) {
468 		if (local)
469 			return local;
470 
471 		if (flags & RT6_LOOKUP_F_IFACE)
472 			return net->ipv6.ip6_null_entry;
473 	}
474 out:
475 	return rt;
476 }
477 
478 #ifdef CONFIG_IPV6_ROUTER_PREF
479 struct __rt6_probe_work {
480 	struct work_struct work;
481 	struct in6_addr target;
482 	struct net_device *dev;
483 };
484 
485 static void rt6_probe_deferred(struct work_struct *w)
486 {
487 	struct in6_addr mcaddr;
488 	struct __rt6_probe_work *work =
489 		container_of(w, struct __rt6_probe_work, work);
490 
491 	addrconf_addr_solict_mult(&work->target, &mcaddr);
492 	ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL);
493 	dev_put(work->dev);
494 	kfree(w);
495 }
496 
497 static void rt6_probe(struct rt6_info *rt)
498 {
499 	struct neighbour *neigh;
500 	/*
501 	 * Okay, this does not seem to be appropriate
502 	 * for now, however, we need to check if it
503 	 * is really so; aka Router Reachability Probing.
504 	 *
505 	 * Router Reachability Probe MUST be rate-limited
506 	 * to no more than one per minute.
507 	 */
508 	if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
509 		return;
510 	rcu_read_lock_bh();
511 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
512 	if (neigh) {
513 		write_lock(&neigh->lock);
514 		if (neigh->nud_state & NUD_VALID)
515 			goto out;
516 	}
517 
518 	if (!neigh ||
519 	    time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
520 		struct __rt6_probe_work *work;
521 
522 		work = kmalloc(sizeof(*work), GFP_ATOMIC);
523 
524 		if (neigh && work)
525 			neigh->updated = jiffies;
526 
527 		if (neigh)
528 			write_unlock(&neigh->lock);
529 
530 		if (work) {
531 			INIT_WORK(&work->work, rt6_probe_deferred);
532 			work->target = rt->rt6i_gateway;
533 			dev_hold(rt->dst.dev);
534 			work->dev = rt->dst.dev;
535 			schedule_work(&work->work);
536 		}
537 	} else {
538 out:
539 		write_unlock(&neigh->lock);
540 	}
541 	rcu_read_unlock_bh();
542 }
543 #else
544 static inline void rt6_probe(struct rt6_info *rt)
545 {
546 }
547 #endif
548 
549 /*
550  * Default Router Selection (RFC 2461 6.3.6)
551  */
552 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
553 {
554 	struct net_device *dev = rt->dst.dev;
555 	if (!oif || dev->ifindex == oif)
556 		return 2;
557 	if ((dev->flags & IFF_LOOPBACK) &&
558 	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
559 		return 1;
560 	return 0;
561 }
562 
563 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
564 {
565 	struct neighbour *neigh;
566 	enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
567 
568 	if (rt->rt6i_flags & RTF_NONEXTHOP ||
569 	    !(rt->rt6i_flags & RTF_GATEWAY))
570 		return RT6_NUD_SUCCEED;
571 
572 	rcu_read_lock_bh();
573 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
574 	if (neigh) {
575 		read_lock(&neigh->lock);
576 		if (neigh->nud_state & NUD_VALID)
577 			ret = RT6_NUD_SUCCEED;
578 #ifdef CONFIG_IPV6_ROUTER_PREF
579 		else if (!(neigh->nud_state & NUD_FAILED))
580 			ret = RT6_NUD_SUCCEED;
581 #endif
582 		read_unlock(&neigh->lock);
583 	} else {
584 		ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
585 		      RT6_NUD_SUCCEED : RT6_NUD_FAIL_SOFT;
586 	}
587 	rcu_read_unlock_bh();
588 
589 	return ret;
590 }
591 
592 static int rt6_score_route(struct rt6_info *rt, int oif,
593 			   int strict)
594 {
595 	int m;
596 
597 	m = rt6_check_dev(rt, oif);
598 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
599 		return RT6_NUD_FAIL_HARD;
600 #ifdef CONFIG_IPV6_ROUTER_PREF
601 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
602 #endif
603 	if (strict & RT6_LOOKUP_F_REACHABLE) {
604 		int n = rt6_check_neigh(rt);
605 		if (n < 0)
606 			return n;
607 	}
608 	return m;
609 }
610 
611 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
612 				   int *mpri, struct rt6_info *match,
613 				   bool *do_rr)
614 {
615 	int m;
616 	bool match_do_rr = false;
617 
618 	if (rt6_check_expired(rt))
619 		goto out;
620 
621 	m = rt6_score_route(rt, oif, strict);
622 	if (m == RT6_NUD_FAIL_SOFT && !IS_ENABLED(CONFIG_IPV6_ROUTER_PREF)) {
623 		match_do_rr = true;
624 		m = 0; /* lowest valid score */
625 	} else if (m < 0) {
626 		goto out;
627 	}
628 
629 	if (strict & RT6_LOOKUP_F_REACHABLE)
630 		rt6_probe(rt);
631 
632 	if (m > *mpri) {
633 		*do_rr = match_do_rr;
634 		*mpri = m;
635 		match = rt;
636 	}
637 out:
638 	return match;
639 }
640 
641 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
642 				     struct rt6_info *rr_head,
643 				     u32 metric, int oif, int strict,
644 				     bool *do_rr)
645 {
646 	struct rt6_info *rt, *match;
647 	int mpri = -1;
648 
649 	match = NULL;
650 	for (rt = rr_head; rt && rt->rt6i_metric == metric;
651 	     rt = rt->dst.rt6_next)
652 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
653 	for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
654 	     rt = rt->dst.rt6_next)
655 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
656 
657 	return match;
658 }
659 
660 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
661 {
662 	struct rt6_info *match, *rt0;
663 	struct net *net;
664 	bool do_rr = false;
665 
666 	rt0 = fn->rr_ptr;
667 	if (!rt0)
668 		fn->rr_ptr = rt0 = fn->leaf;
669 
670 	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
671 			     &do_rr);
672 
673 	if (do_rr) {
674 		struct rt6_info *next = rt0->dst.rt6_next;
675 
676 		/* no entries matched; do round-robin */
677 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
678 			next = fn->leaf;
679 
680 		if (next != rt0)
681 			fn->rr_ptr = next;
682 	}
683 
684 	net = dev_net(rt0->dst.dev);
685 	return match ? match : net->ipv6.ip6_null_entry;
686 }
687 
688 #ifdef CONFIG_IPV6_ROUTE_INFO
689 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
690 		  const struct in6_addr *gwaddr)
691 {
692 	struct net *net = dev_net(dev);
693 	struct route_info *rinfo = (struct route_info *) opt;
694 	struct in6_addr prefix_buf, *prefix;
695 	unsigned int pref;
696 	unsigned long lifetime;
697 	struct rt6_info *rt;
698 
699 	if (len < sizeof(struct route_info)) {
700 		return -EINVAL;
701 	}
702 
703 	/* Sanity check for prefix_len and length */
704 	if (rinfo->length > 3) {
705 		return -EINVAL;
706 	} else if (rinfo->prefix_len > 128) {
707 		return -EINVAL;
708 	} else if (rinfo->prefix_len > 64) {
709 		if (rinfo->length < 2) {
710 			return -EINVAL;
711 		}
712 	} else if (rinfo->prefix_len > 0) {
713 		if (rinfo->length < 1) {
714 			return -EINVAL;
715 		}
716 	}
717 
718 	pref = rinfo->route_pref;
719 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
720 		return -EINVAL;
721 
722 	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
723 
724 	if (rinfo->length == 3)
725 		prefix = (struct in6_addr *)rinfo->prefix;
726 	else {
727 		/* this function is safe */
728 		ipv6_addr_prefix(&prefix_buf,
729 				 (struct in6_addr *)rinfo->prefix,
730 				 rinfo->prefix_len);
731 		prefix = &prefix_buf;
732 	}
733 
734 	rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
735 				dev->ifindex);
736 
737 	if (rt && !lifetime) {
738 		ip6_del_rt(rt);
739 		rt = NULL;
740 	}
741 
742 	if (!rt && lifetime)
743 		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
744 					pref);
745 	else if (rt)
746 		rt->rt6i_flags = RTF_ROUTEINFO |
747 				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
748 
749 	if (rt) {
750 		if (!addrconf_finite_timeout(lifetime))
751 			rt6_clean_expires(rt);
752 		else
753 			rt6_set_expires(rt, jiffies + HZ * lifetime);
754 
755 		ip6_rt_put(rt);
756 	}
757 	return 0;
758 }
759 #endif
760 
761 #define BACKTRACK(__net, saddr)			\
762 do { \
763 	if (rt == __net->ipv6.ip6_null_entry) {	\
764 		struct fib6_node *pn; \
765 		while (1) { \
766 			if (fn->fn_flags & RTN_TL_ROOT) \
767 				goto out; \
768 			pn = fn->parent; \
769 			if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
770 				fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
771 			else \
772 				fn = pn; \
773 			if (fn->fn_flags & RTN_RTINFO) \
774 				goto restart; \
775 		} \
776 	} \
777 } while (0)
778 
779 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
780 					     struct fib6_table *table,
781 					     struct flowi6 *fl6, int flags)
782 {
783 	struct fib6_node *fn;
784 	struct rt6_info *rt;
785 
786 	read_lock_bh(&table->tb6_lock);
787 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
788 restart:
789 	rt = fn->leaf;
790 	rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
791 	if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
792 		rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
793 	BACKTRACK(net, &fl6->saddr);
794 out:
795 	dst_use(&rt->dst, jiffies);
796 	read_unlock_bh(&table->tb6_lock);
797 	return rt;
798 
799 }
800 
801 struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
802 				    int flags)
803 {
804 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
805 }
806 EXPORT_SYMBOL_GPL(ip6_route_lookup);
807 
808 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
809 			    const struct in6_addr *saddr, int oif, int strict)
810 {
811 	struct flowi6 fl6 = {
812 		.flowi6_oif = oif,
813 		.daddr = *daddr,
814 	};
815 	struct dst_entry *dst;
816 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
817 
818 	if (saddr) {
819 		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
820 		flags |= RT6_LOOKUP_F_HAS_SADDR;
821 	}
822 
823 	dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
824 	if (dst->error == 0)
825 		return (struct rt6_info *) dst;
826 
827 	dst_release(dst);
828 
829 	return NULL;
830 }
831 
832 EXPORT_SYMBOL(rt6_lookup);
833 
834 /* ip6_ins_rt is called with FREE table->tb6_lock.
835    It takes new route entry, the addition fails by any reason the
836    route is freed. In any case, if caller does not hold it, it may
837    be destroyed.
838  */
839 
840 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
841 {
842 	int err;
843 	struct fib6_table *table;
844 
845 	table = rt->rt6i_table;
846 	write_lock_bh(&table->tb6_lock);
847 	err = fib6_add(&table->tb6_root, rt, info);
848 	write_unlock_bh(&table->tb6_lock);
849 
850 	return err;
851 }
852 
853 int ip6_ins_rt(struct rt6_info *rt)
854 {
855 	struct nl_info info = {
856 		.nl_net = dev_net(rt->dst.dev),
857 	};
858 	return __ip6_ins_rt(rt, &info);
859 }
860 
861 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
862 				      const struct in6_addr *daddr,
863 				      const struct in6_addr *saddr)
864 {
865 	struct rt6_info *rt;
866 
867 	/*
868 	 *	Clone the route.
869 	 */
870 
871 	rt = ip6_rt_copy(ort, daddr);
872 
873 	if (rt) {
874 		if (!(rt->rt6i_flags & RTF_GATEWAY)) {
875 			if (ort->rt6i_dst.plen != 128 &&
876 			    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
877 				rt->rt6i_flags |= RTF_ANYCAST;
878 		}
879 
880 		rt->rt6i_flags |= RTF_CACHE;
881 
882 #ifdef CONFIG_IPV6_SUBTREES
883 		if (rt->rt6i_src.plen && saddr) {
884 			rt->rt6i_src.addr = *saddr;
885 			rt->rt6i_src.plen = 128;
886 		}
887 #endif
888 	}
889 
890 	return rt;
891 }
892 
893 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
894 					const struct in6_addr *daddr)
895 {
896 	struct rt6_info *rt = ip6_rt_copy(ort, daddr);
897 
898 	if (rt)
899 		rt->rt6i_flags |= RTF_CACHE;
900 	return rt;
901 }
902 
903 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
904 				      struct flowi6 *fl6, int flags)
905 {
906 	struct fib6_node *fn;
907 	struct rt6_info *rt, *nrt;
908 	int strict = 0;
909 	int attempts = 3;
910 	int err;
911 	int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
912 
913 	strict |= flags & RT6_LOOKUP_F_IFACE;
914 
915 relookup:
916 	read_lock_bh(&table->tb6_lock);
917 
918 restart_2:
919 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
920 
921 restart:
922 	rt = rt6_select(fn, oif, strict | reachable);
923 	if (rt->rt6i_nsiblings)
924 		rt = rt6_multipath_select(rt, fl6, oif, strict | reachable);
925 	BACKTRACK(net, &fl6->saddr);
926 	if (rt == net->ipv6.ip6_null_entry ||
927 	    rt->rt6i_flags & RTF_CACHE)
928 		goto out;
929 
930 	dst_hold(&rt->dst);
931 	read_unlock_bh(&table->tb6_lock);
932 
933 	if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY)))
934 		nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
935 	else if (!(rt->dst.flags & DST_HOST))
936 		nrt = rt6_alloc_clone(rt, &fl6->daddr);
937 	else
938 		goto out2;
939 
940 	ip6_rt_put(rt);
941 	rt = nrt ? : net->ipv6.ip6_null_entry;
942 
943 	dst_hold(&rt->dst);
944 	if (nrt) {
945 		err = ip6_ins_rt(nrt);
946 		if (!err)
947 			goto out2;
948 	}
949 
950 	if (--attempts <= 0)
951 		goto out2;
952 
953 	/*
954 	 * Race condition! In the gap, when table->tb6_lock was
955 	 * released someone could insert this route.  Relookup.
956 	 */
957 	ip6_rt_put(rt);
958 	goto relookup;
959 
960 out:
961 	if (reachable) {
962 		reachable = 0;
963 		goto restart_2;
964 	}
965 	dst_hold(&rt->dst);
966 	read_unlock_bh(&table->tb6_lock);
967 out2:
968 	rt->dst.lastuse = jiffies;
969 	rt->dst.__use++;
970 
971 	return rt;
972 }
973 
974 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
975 					    struct flowi6 *fl6, int flags)
976 {
977 	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
978 }
979 
980 static struct dst_entry *ip6_route_input_lookup(struct net *net,
981 						struct net_device *dev,
982 						struct flowi6 *fl6, int flags)
983 {
984 	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
985 		flags |= RT6_LOOKUP_F_IFACE;
986 
987 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
988 }
989 
990 void ip6_route_input(struct sk_buff *skb)
991 {
992 	const struct ipv6hdr *iph = ipv6_hdr(skb);
993 	struct net *net = dev_net(skb->dev);
994 	int flags = RT6_LOOKUP_F_HAS_SADDR;
995 	struct flowi6 fl6 = {
996 		.flowi6_iif = skb->dev->ifindex,
997 		.daddr = iph->daddr,
998 		.saddr = iph->saddr,
999 		.flowlabel = ip6_flowinfo(iph),
1000 		.flowi6_mark = skb->mark,
1001 		.flowi6_proto = iph->nexthdr,
1002 	};
1003 
1004 	skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1005 }
1006 
1007 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1008 					     struct flowi6 *fl6, int flags)
1009 {
1010 	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1011 }
1012 
1013 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
1014 				    struct flowi6 *fl6)
1015 {
1016 	int flags = 0;
1017 
1018 	fl6->flowi6_iif = LOOPBACK_IFINDEX;
1019 
1020 	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
1021 		flags |= RT6_LOOKUP_F_IFACE;
1022 
1023 	if (!ipv6_addr_any(&fl6->saddr))
1024 		flags |= RT6_LOOKUP_F_HAS_SADDR;
1025 	else if (sk)
1026 		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1027 
1028 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1029 }
1030 
1031 EXPORT_SYMBOL(ip6_route_output);
1032 
1033 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1034 {
1035 	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1036 	struct dst_entry *new = NULL;
1037 
1038 	rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1039 	if (rt) {
1040 		new = &rt->dst;
1041 
1042 		memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
1043 		rt6_init_peer(rt, net->ipv6.peers);
1044 
1045 		new->__use = 1;
1046 		new->input = dst_discard;
1047 		new->output = dst_discard;
1048 
1049 		if (dst_metrics_read_only(&ort->dst))
1050 			new->_metrics = ort->dst._metrics;
1051 		else
1052 			dst_copy_metrics(new, &ort->dst);
1053 		rt->rt6i_idev = ort->rt6i_idev;
1054 		if (rt->rt6i_idev)
1055 			in6_dev_hold(rt->rt6i_idev);
1056 
1057 		rt->rt6i_gateway = ort->rt6i_gateway;
1058 		rt->rt6i_flags = ort->rt6i_flags;
1059 		rt->rt6i_metric = 0;
1060 
1061 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1062 #ifdef CONFIG_IPV6_SUBTREES
1063 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1064 #endif
1065 
1066 		dst_free(new);
1067 	}
1068 
1069 	dst_release(dst_orig);
1070 	return new ? new : ERR_PTR(-ENOMEM);
1071 }
1072 
1073 /*
1074  *	Destination cache support functions
1075  */
1076 
1077 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1078 {
1079 	struct rt6_info *rt;
1080 
1081 	rt = (struct rt6_info *) dst;
1082 
1083 	/* All IPV6 dsts are created with ->obsolete set to the value
1084 	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1085 	 * into this function always.
1086 	 */
1087 	if (rt->rt6i_genid != rt_genid_ipv6(dev_net(rt->dst.dev)))
1088 		return NULL;
1089 
1090 	if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1091 		return NULL;
1092 
1093 	if (rt6_check_expired(rt))
1094 		return NULL;
1095 
1096 	return dst;
1097 }
1098 
1099 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1100 {
1101 	struct rt6_info *rt = (struct rt6_info *) dst;
1102 
1103 	if (rt) {
1104 		if (rt->rt6i_flags & RTF_CACHE) {
1105 			if (rt6_check_expired(rt)) {
1106 				ip6_del_rt(rt);
1107 				dst = NULL;
1108 			}
1109 		} else {
1110 			dst_release(dst);
1111 			dst = NULL;
1112 		}
1113 	}
1114 	return dst;
1115 }
1116 
1117 static void ip6_link_failure(struct sk_buff *skb)
1118 {
1119 	struct rt6_info *rt;
1120 
1121 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1122 
1123 	rt = (struct rt6_info *) skb_dst(skb);
1124 	if (rt) {
1125 		if (rt->rt6i_flags & RTF_CACHE) {
1126 			dst_hold(&rt->dst);
1127 			if (ip6_del_rt(rt))
1128 				dst_free(&rt->dst);
1129 		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1130 			rt->rt6i_node->fn_sernum = -1;
1131 		}
1132 	}
1133 }
1134 
1135 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1136 			       struct sk_buff *skb, u32 mtu)
1137 {
1138 	struct rt6_info *rt6 = (struct rt6_info*)dst;
1139 
1140 	dst_confirm(dst);
1141 	if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1142 		struct net *net = dev_net(dst->dev);
1143 
1144 		rt6->rt6i_flags |= RTF_MODIFIED;
1145 		if (mtu < IPV6_MIN_MTU) {
1146 			u32 features = dst_metric(dst, RTAX_FEATURES);
1147 			mtu = IPV6_MIN_MTU;
1148 			features |= RTAX_FEATURE_ALLFRAG;
1149 			dst_metric_set(dst, RTAX_FEATURES, features);
1150 		}
1151 		dst_metric_set(dst, RTAX_MTU, mtu);
1152 		rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1153 	}
1154 }
1155 
1156 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1157 		     int oif, u32 mark)
1158 {
1159 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1160 	struct dst_entry *dst;
1161 	struct flowi6 fl6;
1162 
1163 	memset(&fl6, 0, sizeof(fl6));
1164 	fl6.flowi6_oif = oif;
1165 	fl6.flowi6_mark = mark;
1166 	fl6.flowi6_flags = 0;
1167 	fl6.daddr = iph->daddr;
1168 	fl6.saddr = iph->saddr;
1169 	fl6.flowlabel = ip6_flowinfo(iph);
1170 
1171 	dst = ip6_route_output(net, NULL, &fl6);
1172 	if (!dst->error)
1173 		ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1174 	dst_release(dst);
1175 }
1176 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1177 
1178 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1179 {
1180 	ip6_update_pmtu(skb, sock_net(sk), mtu,
1181 			sk->sk_bound_dev_if, sk->sk_mark);
1182 }
1183 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1184 
1185 /* Handle redirects */
1186 struct ip6rd_flowi {
1187 	struct flowi6 fl6;
1188 	struct in6_addr gateway;
1189 };
1190 
1191 static struct rt6_info *__ip6_route_redirect(struct net *net,
1192 					     struct fib6_table *table,
1193 					     struct flowi6 *fl6,
1194 					     int flags)
1195 {
1196 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1197 	struct rt6_info *rt;
1198 	struct fib6_node *fn;
1199 
1200 	/* Get the "current" route for this destination and
1201 	 * check if the redirect has come from approriate router.
1202 	 *
1203 	 * RFC 4861 specifies that redirects should only be
1204 	 * accepted if they come from the nexthop to the target.
1205 	 * Due to the way the routes are chosen, this notion
1206 	 * is a bit fuzzy and one might need to check all possible
1207 	 * routes.
1208 	 */
1209 
1210 	read_lock_bh(&table->tb6_lock);
1211 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1212 restart:
1213 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1214 		if (rt6_check_expired(rt))
1215 			continue;
1216 		if (rt->dst.error)
1217 			break;
1218 		if (!(rt->rt6i_flags & RTF_GATEWAY))
1219 			continue;
1220 		if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1221 			continue;
1222 		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1223 			continue;
1224 		break;
1225 	}
1226 
1227 	if (!rt)
1228 		rt = net->ipv6.ip6_null_entry;
1229 	else if (rt->dst.error) {
1230 		rt = net->ipv6.ip6_null_entry;
1231 		goto out;
1232 	}
1233 	BACKTRACK(net, &fl6->saddr);
1234 out:
1235 	dst_hold(&rt->dst);
1236 
1237 	read_unlock_bh(&table->tb6_lock);
1238 
1239 	return rt;
1240 };
1241 
1242 static struct dst_entry *ip6_route_redirect(struct net *net,
1243 					const struct flowi6 *fl6,
1244 					const struct in6_addr *gateway)
1245 {
1246 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1247 	struct ip6rd_flowi rdfl;
1248 
1249 	rdfl.fl6 = *fl6;
1250 	rdfl.gateway = *gateway;
1251 
1252 	return fib6_rule_lookup(net, &rdfl.fl6,
1253 				flags, __ip6_route_redirect);
1254 }
1255 
1256 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1257 {
1258 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1259 	struct dst_entry *dst;
1260 	struct flowi6 fl6;
1261 
1262 	memset(&fl6, 0, sizeof(fl6));
1263 	fl6.flowi6_oif = oif;
1264 	fl6.flowi6_mark = mark;
1265 	fl6.flowi6_flags = 0;
1266 	fl6.daddr = iph->daddr;
1267 	fl6.saddr = iph->saddr;
1268 	fl6.flowlabel = ip6_flowinfo(iph);
1269 
1270 	dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1271 	rt6_do_redirect(dst, NULL, skb);
1272 	dst_release(dst);
1273 }
1274 EXPORT_SYMBOL_GPL(ip6_redirect);
1275 
1276 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1277 			    u32 mark)
1278 {
1279 	const struct ipv6hdr *iph = ipv6_hdr(skb);
1280 	const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1281 	struct dst_entry *dst;
1282 	struct flowi6 fl6;
1283 
1284 	memset(&fl6, 0, sizeof(fl6));
1285 	fl6.flowi6_oif = oif;
1286 	fl6.flowi6_mark = mark;
1287 	fl6.flowi6_flags = 0;
1288 	fl6.daddr = msg->dest;
1289 	fl6.saddr = iph->daddr;
1290 
1291 	dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1292 	rt6_do_redirect(dst, NULL, skb);
1293 	dst_release(dst);
1294 }
1295 
1296 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1297 {
1298 	ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1299 }
1300 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1301 
1302 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1303 {
1304 	struct net_device *dev = dst->dev;
1305 	unsigned int mtu = dst_mtu(dst);
1306 	struct net *net = dev_net(dev);
1307 
1308 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1309 
1310 	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1311 		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1312 
1313 	/*
1314 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1315 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1316 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
1317 	 * rely only on pmtu discovery"
1318 	 */
1319 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1320 		mtu = IPV6_MAXPLEN;
1321 	return mtu;
1322 }
1323 
1324 static unsigned int ip6_mtu(const struct dst_entry *dst)
1325 {
1326 	struct inet6_dev *idev;
1327 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1328 
1329 	if (mtu)
1330 		return mtu;
1331 
1332 	mtu = IPV6_MIN_MTU;
1333 
1334 	rcu_read_lock();
1335 	idev = __in6_dev_get(dst->dev);
1336 	if (idev)
1337 		mtu = idev->cnf.mtu6;
1338 	rcu_read_unlock();
1339 
1340 	return mtu;
1341 }
1342 
1343 static struct dst_entry *icmp6_dst_gc_list;
1344 static DEFINE_SPINLOCK(icmp6_dst_lock);
1345 
1346 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1347 				  struct flowi6 *fl6)
1348 {
1349 	struct dst_entry *dst;
1350 	struct rt6_info *rt;
1351 	struct inet6_dev *idev = in6_dev_get(dev);
1352 	struct net *net = dev_net(dev);
1353 
1354 	if (unlikely(!idev))
1355 		return ERR_PTR(-ENODEV);
1356 
1357 	rt = ip6_dst_alloc(net, dev, 0, NULL);
1358 	if (unlikely(!rt)) {
1359 		in6_dev_put(idev);
1360 		dst = ERR_PTR(-ENOMEM);
1361 		goto out;
1362 	}
1363 
1364 	rt->dst.flags |= DST_HOST;
1365 	rt->dst.output  = ip6_output;
1366 	atomic_set(&rt->dst.__refcnt, 1);
1367 	rt->rt6i_gateway  = fl6->daddr;
1368 	rt->rt6i_dst.addr = fl6->daddr;
1369 	rt->rt6i_dst.plen = 128;
1370 	rt->rt6i_idev     = idev;
1371 	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1372 
1373 	spin_lock_bh(&icmp6_dst_lock);
1374 	rt->dst.next = icmp6_dst_gc_list;
1375 	icmp6_dst_gc_list = &rt->dst;
1376 	spin_unlock_bh(&icmp6_dst_lock);
1377 
1378 	fib6_force_start_gc(net);
1379 
1380 	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1381 
1382 out:
1383 	return dst;
1384 }
1385 
1386 int icmp6_dst_gc(void)
1387 {
1388 	struct dst_entry *dst, **pprev;
1389 	int more = 0;
1390 
1391 	spin_lock_bh(&icmp6_dst_lock);
1392 	pprev = &icmp6_dst_gc_list;
1393 
1394 	while ((dst = *pprev) != NULL) {
1395 		if (!atomic_read(&dst->__refcnt)) {
1396 			*pprev = dst->next;
1397 			dst_free(dst);
1398 		} else {
1399 			pprev = &dst->next;
1400 			++more;
1401 		}
1402 	}
1403 
1404 	spin_unlock_bh(&icmp6_dst_lock);
1405 
1406 	return more;
1407 }
1408 
1409 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1410 			    void *arg)
1411 {
1412 	struct dst_entry *dst, **pprev;
1413 
1414 	spin_lock_bh(&icmp6_dst_lock);
1415 	pprev = &icmp6_dst_gc_list;
1416 	while ((dst = *pprev) != NULL) {
1417 		struct rt6_info *rt = (struct rt6_info *) dst;
1418 		if (func(rt, arg)) {
1419 			*pprev = dst->next;
1420 			dst_free(dst);
1421 		} else {
1422 			pprev = &dst->next;
1423 		}
1424 	}
1425 	spin_unlock_bh(&icmp6_dst_lock);
1426 }
1427 
1428 static int ip6_dst_gc(struct dst_ops *ops)
1429 {
1430 	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1431 	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1432 	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1433 	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1434 	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1435 	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1436 	int entries;
1437 
1438 	entries = dst_entries_get_fast(ops);
1439 	if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1440 	    entries <= rt_max_size)
1441 		goto out;
1442 
1443 	net->ipv6.ip6_rt_gc_expire++;
1444 	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, entries > rt_max_size);
1445 	entries = dst_entries_get_slow(ops);
1446 	if (entries < ops->gc_thresh)
1447 		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1448 out:
1449 	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1450 	return entries > rt_max_size;
1451 }
1452 
1453 /*
1454  *
1455  */
1456 
1457 int ip6_route_add(struct fib6_config *cfg)
1458 {
1459 	int err;
1460 	struct net *net = cfg->fc_nlinfo.nl_net;
1461 	struct rt6_info *rt = NULL;
1462 	struct net_device *dev = NULL;
1463 	struct inet6_dev *idev = NULL;
1464 	struct fib6_table *table;
1465 	int addr_type;
1466 
1467 	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1468 		return -EINVAL;
1469 #ifndef CONFIG_IPV6_SUBTREES
1470 	if (cfg->fc_src_len)
1471 		return -EINVAL;
1472 #endif
1473 	if (cfg->fc_ifindex) {
1474 		err = -ENODEV;
1475 		dev = dev_get_by_index(net, cfg->fc_ifindex);
1476 		if (!dev)
1477 			goto out;
1478 		idev = in6_dev_get(dev);
1479 		if (!idev)
1480 			goto out;
1481 	}
1482 
1483 	if (cfg->fc_metric == 0)
1484 		cfg->fc_metric = IP6_RT_PRIO_USER;
1485 
1486 	err = -ENOBUFS;
1487 	if (cfg->fc_nlinfo.nlh &&
1488 	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1489 		table = fib6_get_table(net, cfg->fc_table);
1490 		if (!table) {
1491 			pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1492 			table = fib6_new_table(net, cfg->fc_table);
1493 		}
1494 	} else {
1495 		table = fib6_new_table(net, cfg->fc_table);
1496 	}
1497 
1498 	if (!table)
1499 		goto out;
1500 
1501 	rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
1502 
1503 	if (!rt) {
1504 		err = -ENOMEM;
1505 		goto out;
1506 	}
1507 
1508 	if (cfg->fc_flags & RTF_EXPIRES)
1509 		rt6_set_expires(rt, jiffies +
1510 				clock_t_to_jiffies(cfg->fc_expires));
1511 	else
1512 		rt6_clean_expires(rt);
1513 
1514 	if (cfg->fc_protocol == RTPROT_UNSPEC)
1515 		cfg->fc_protocol = RTPROT_BOOT;
1516 	rt->rt6i_protocol = cfg->fc_protocol;
1517 
1518 	addr_type = ipv6_addr_type(&cfg->fc_dst);
1519 
1520 	if (addr_type & IPV6_ADDR_MULTICAST)
1521 		rt->dst.input = ip6_mc_input;
1522 	else if (cfg->fc_flags & RTF_LOCAL)
1523 		rt->dst.input = ip6_input;
1524 	else
1525 		rt->dst.input = ip6_forward;
1526 
1527 	rt->dst.output = ip6_output;
1528 
1529 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1530 	rt->rt6i_dst.plen = cfg->fc_dst_len;
1531 	if (rt->rt6i_dst.plen == 128)
1532 	       rt->dst.flags |= DST_HOST;
1533 
1534 	if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1535 		u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1536 		if (!metrics) {
1537 			err = -ENOMEM;
1538 			goto out;
1539 		}
1540 		dst_init_metrics(&rt->dst, metrics, 0);
1541 	}
1542 #ifdef CONFIG_IPV6_SUBTREES
1543 	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1544 	rt->rt6i_src.plen = cfg->fc_src_len;
1545 #endif
1546 
1547 	rt->rt6i_metric = cfg->fc_metric;
1548 
1549 	/* We cannot add true routes via loopback here,
1550 	   they would result in kernel looping; promote them to reject routes
1551 	 */
1552 	if ((cfg->fc_flags & RTF_REJECT) ||
1553 	    (dev && (dev->flags & IFF_LOOPBACK) &&
1554 	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
1555 	     !(cfg->fc_flags & RTF_LOCAL))) {
1556 		/* hold loopback dev/idev if we haven't done so. */
1557 		if (dev != net->loopback_dev) {
1558 			if (dev) {
1559 				dev_put(dev);
1560 				in6_dev_put(idev);
1561 			}
1562 			dev = net->loopback_dev;
1563 			dev_hold(dev);
1564 			idev = in6_dev_get(dev);
1565 			if (!idev) {
1566 				err = -ENODEV;
1567 				goto out;
1568 			}
1569 		}
1570 		rt->dst.output = ip6_pkt_discard_out;
1571 		rt->dst.input = ip6_pkt_discard;
1572 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1573 		switch (cfg->fc_type) {
1574 		case RTN_BLACKHOLE:
1575 			rt->dst.error = -EINVAL;
1576 			break;
1577 		case RTN_PROHIBIT:
1578 			rt->dst.error = -EACCES;
1579 			break;
1580 		case RTN_THROW:
1581 			rt->dst.error = -EAGAIN;
1582 			break;
1583 		default:
1584 			rt->dst.error = -ENETUNREACH;
1585 			break;
1586 		}
1587 		goto install_route;
1588 	}
1589 
1590 	if (cfg->fc_flags & RTF_GATEWAY) {
1591 		const struct in6_addr *gw_addr;
1592 		int gwa_type;
1593 
1594 		gw_addr = &cfg->fc_gateway;
1595 		rt->rt6i_gateway = *gw_addr;
1596 		gwa_type = ipv6_addr_type(gw_addr);
1597 
1598 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1599 			struct rt6_info *grt;
1600 
1601 			/* IPv6 strictly inhibits using not link-local
1602 			   addresses as nexthop address.
1603 			   Otherwise, router will not able to send redirects.
1604 			   It is very good, but in some (rare!) circumstances
1605 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
1606 			   some exceptions. --ANK
1607 			 */
1608 			err = -EINVAL;
1609 			if (!(gwa_type & IPV6_ADDR_UNICAST))
1610 				goto out;
1611 
1612 			grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1613 
1614 			err = -EHOSTUNREACH;
1615 			if (!grt)
1616 				goto out;
1617 			if (dev) {
1618 				if (dev != grt->dst.dev) {
1619 					ip6_rt_put(grt);
1620 					goto out;
1621 				}
1622 			} else {
1623 				dev = grt->dst.dev;
1624 				idev = grt->rt6i_idev;
1625 				dev_hold(dev);
1626 				in6_dev_hold(grt->rt6i_idev);
1627 			}
1628 			if (!(grt->rt6i_flags & RTF_GATEWAY))
1629 				err = 0;
1630 			ip6_rt_put(grt);
1631 
1632 			if (err)
1633 				goto out;
1634 		}
1635 		err = -EINVAL;
1636 		if (!dev || (dev->flags & IFF_LOOPBACK))
1637 			goto out;
1638 	}
1639 
1640 	err = -ENODEV;
1641 	if (!dev)
1642 		goto out;
1643 
1644 	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1645 		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1646 			err = -EINVAL;
1647 			goto out;
1648 		}
1649 		rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1650 		rt->rt6i_prefsrc.plen = 128;
1651 	} else
1652 		rt->rt6i_prefsrc.plen = 0;
1653 
1654 	rt->rt6i_flags = cfg->fc_flags;
1655 
1656 install_route:
1657 	if (cfg->fc_mx) {
1658 		struct nlattr *nla;
1659 		int remaining;
1660 
1661 		nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1662 			int type = nla_type(nla);
1663 
1664 			if (type) {
1665 				if (type > RTAX_MAX) {
1666 					err = -EINVAL;
1667 					goto out;
1668 				}
1669 
1670 				dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1671 			}
1672 		}
1673 	}
1674 
1675 	rt->dst.dev = dev;
1676 	rt->rt6i_idev = idev;
1677 	rt->rt6i_table = table;
1678 
1679 	cfg->fc_nlinfo.nl_net = dev_net(dev);
1680 
1681 	return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1682 
1683 out:
1684 	if (dev)
1685 		dev_put(dev);
1686 	if (idev)
1687 		in6_dev_put(idev);
1688 	if (rt)
1689 		dst_free(&rt->dst);
1690 	return err;
1691 }
1692 
1693 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1694 {
1695 	int err;
1696 	struct fib6_table *table;
1697 	struct net *net = dev_net(rt->dst.dev);
1698 
1699 	if (rt == net->ipv6.ip6_null_entry) {
1700 		err = -ENOENT;
1701 		goto out;
1702 	}
1703 
1704 	table = rt->rt6i_table;
1705 	write_lock_bh(&table->tb6_lock);
1706 	err = fib6_del(rt, info);
1707 	write_unlock_bh(&table->tb6_lock);
1708 
1709 out:
1710 	ip6_rt_put(rt);
1711 	return err;
1712 }
1713 
1714 int ip6_del_rt(struct rt6_info *rt)
1715 {
1716 	struct nl_info info = {
1717 		.nl_net = dev_net(rt->dst.dev),
1718 	};
1719 	return __ip6_del_rt(rt, &info);
1720 }
1721 
1722 static int ip6_route_del(struct fib6_config *cfg)
1723 {
1724 	struct fib6_table *table;
1725 	struct fib6_node *fn;
1726 	struct rt6_info *rt;
1727 	int err = -ESRCH;
1728 
1729 	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1730 	if (!table)
1731 		return err;
1732 
1733 	read_lock_bh(&table->tb6_lock);
1734 
1735 	fn = fib6_locate(&table->tb6_root,
1736 			 &cfg->fc_dst, cfg->fc_dst_len,
1737 			 &cfg->fc_src, cfg->fc_src_len);
1738 
1739 	if (fn) {
1740 		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1741 			if (cfg->fc_ifindex &&
1742 			    (!rt->dst.dev ||
1743 			     rt->dst.dev->ifindex != cfg->fc_ifindex))
1744 				continue;
1745 			if (cfg->fc_flags & RTF_GATEWAY &&
1746 			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1747 				continue;
1748 			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1749 				continue;
1750 			dst_hold(&rt->dst);
1751 			read_unlock_bh(&table->tb6_lock);
1752 
1753 			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1754 		}
1755 	}
1756 	read_unlock_bh(&table->tb6_lock);
1757 
1758 	return err;
1759 }
1760 
1761 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1762 {
1763 	struct net *net = dev_net(skb->dev);
1764 	struct netevent_redirect netevent;
1765 	struct rt6_info *rt, *nrt = NULL;
1766 	struct ndisc_options ndopts;
1767 	struct inet6_dev *in6_dev;
1768 	struct neighbour *neigh;
1769 	struct rd_msg *msg;
1770 	int optlen, on_link;
1771 	u8 *lladdr;
1772 
1773 	optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
1774 	optlen -= sizeof(*msg);
1775 
1776 	if (optlen < 0) {
1777 		net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1778 		return;
1779 	}
1780 
1781 	msg = (struct rd_msg *)icmp6_hdr(skb);
1782 
1783 	if (ipv6_addr_is_multicast(&msg->dest)) {
1784 		net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1785 		return;
1786 	}
1787 
1788 	on_link = 0;
1789 	if (ipv6_addr_equal(&msg->dest, &msg->target)) {
1790 		on_link = 1;
1791 	} else if (ipv6_addr_type(&msg->target) !=
1792 		   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1793 		net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1794 		return;
1795 	}
1796 
1797 	in6_dev = __in6_dev_get(skb->dev);
1798 	if (!in6_dev)
1799 		return;
1800 	if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1801 		return;
1802 
1803 	/* RFC2461 8.1:
1804 	 *	The IP source address of the Redirect MUST be the same as the current
1805 	 *	first-hop router for the specified ICMP Destination Address.
1806 	 */
1807 
1808 	if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
1809 		net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1810 		return;
1811 	}
1812 
1813 	lladdr = NULL;
1814 	if (ndopts.nd_opts_tgt_lladdr) {
1815 		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1816 					     skb->dev);
1817 		if (!lladdr) {
1818 			net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1819 			return;
1820 		}
1821 	}
1822 
1823 	rt = (struct rt6_info *) dst;
1824 	if (rt == net->ipv6.ip6_null_entry) {
1825 		net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1826 		return;
1827 	}
1828 
1829 	/* Redirect received -> path was valid.
1830 	 * Look, redirects are sent only in response to data packets,
1831 	 * so that this nexthop apparently is reachable. --ANK
1832 	 */
1833 	dst_confirm(&rt->dst);
1834 
1835 	neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
1836 	if (!neigh)
1837 		return;
1838 
1839 	/*
1840 	 *	We have finally decided to accept it.
1841 	 */
1842 
1843 	neigh_update(neigh, lladdr, NUD_STALE,
1844 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
1845 		     NEIGH_UPDATE_F_OVERRIDE|
1846 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1847 				     NEIGH_UPDATE_F_ISROUTER))
1848 		     );
1849 
1850 	nrt = ip6_rt_copy(rt, &msg->dest);
1851 	if (!nrt)
1852 		goto out;
1853 
1854 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1855 	if (on_link)
1856 		nrt->rt6i_flags &= ~RTF_GATEWAY;
1857 
1858 	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1859 
1860 	if (ip6_ins_rt(nrt))
1861 		goto out;
1862 
1863 	netevent.old = &rt->dst;
1864 	netevent.new = &nrt->dst;
1865 	netevent.daddr = &msg->dest;
1866 	netevent.neigh = neigh;
1867 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1868 
1869 	if (rt->rt6i_flags & RTF_CACHE) {
1870 		rt = (struct rt6_info *) dst_clone(&rt->dst);
1871 		ip6_del_rt(rt);
1872 	}
1873 
1874 out:
1875 	neigh_release(neigh);
1876 }
1877 
1878 /*
1879  *	Misc support functions
1880  */
1881 
1882 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1883 				    const struct in6_addr *dest)
1884 {
1885 	struct net *net = dev_net(ort->dst.dev);
1886 	struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1887 					    ort->rt6i_table);
1888 
1889 	if (rt) {
1890 		rt->dst.input = ort->dst.input;
1891 		rt->dst.output = ort->dst.output;
1892 		rt->dst.flags |= DST_HOST;
1893 
1894 		rt->rt6i_dst.addr = *dest;
1895 		rt->rt6i_dst.plen = 128;
1896 		dst_copy_metrics(&rt->dst, &ort->dst);
1897 		rt->dst.error = ort->dst.error;
1898 		rt->rt6i_idev = ort->rt6i_idev;
1899 		if (rt->rt6i_idev)
1900 			in6_dev_hold(rt->rt6i_idev);
1901 		rt->dst.lastuse = jiffies;
1902 
1903 		if (ort->rt6i_flags & RTF_GATEWAY)
1904 			rt->rt6i_gateway = ort->rt6i_gateway;
1905 		else
1906 			rt->rt6i_gateway = *dest;
1907 		rt->rt6i_flags = ort->rt6i_flags;
1908 		if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1909 		    (RTF_DEFAULT | RTF_ADDRCONF))
1910 			rt6_set_from(rt, ort);
1911 		rt->rt6i_metric = 0;
1912 
1913 #ifdef CONFIG_IPV6_SUBTREES
1914 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1915 #endif
1916 		memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1917 		rt->rt6i_table = ort->rt6i_table;
1918 	}
1919 	return rt;
1920 }
1921 
1922 #ifdef CONFIG_IPV6_ROUTE_INFO
1923 static struct rt6_info *rt6_get_route_info(struct net *net,
1924 					   const struct in6_addr *prefix, int prefixlen,
1925 					   const struct in6_addr *gwaddr, int ifindex)
1926 {
1927 	struct fib6_node *fn;
1928 	struct rt6_info *rt = NULL;
1929 	struct fib6_table *table;
1930 
1931 	table = fib6_get_table(net, RT6_TABLE_INFO);
1932 	if (!table)
1933 		return NULL;
1934 
1935 	read_lock_bh(&table->tb6_lock);
1936 	fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1937 	if (!fn)
1938 		goto out;
1939 
1940 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1941 		if (rt->dst.dev->ifindex != ifindex)
1942 			continue;
1943 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1944 			continue;
1945 		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1946 			continue;
1947 		dst_hold(&rt->dst);
1948 		break;
1949 	}
1950 out:
1951 	read_unlock_bh(&table->tb6_lock);
1952 	return rt;
1953 }
1954 
1955 static struct rt6_info *rt6_add_route_info(struct net *net,
1956 					   const struct in6_addr *prefix, int prefixlen,
1957 					   const struct in6_addr *gwaddr, int ifindex,
1958 					   unsigned int pref)
1959 {
1960 	struct fib6_config cfg = {
1961 		.fc_table	= RT6_TABLE_INFO,
1962 		.fc_metric	= IP6_RT_PRIO_USER,
1963 		.fc_ifindex	= ifindex,
1964 		.fc_dst_len	= prefixlen,
1965 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1966 				  RTF_UP | RTF_PREF(pref),
1967 		.fc_nlinfo.portid = 0,
1968 		.fc_nlinfo.nlh = NULL,
1969 		.fc_nlinfo.nl_net = net,
1970 	};
1971 
1972 	cfg.fc_dst = *prefix;
1973 	cfg.fc_gateway = *gwaddr;
1974 
1975 	/* We should treat it as a default route if prefix length is 0. */
1976 	if (!prefixlen)
1977 		cfg.fc_flags |= RTF_DEFAULT;
1978 
1979 	ip6_route_add(&cfg);
1980 
1981 	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1982 }
1983 #endif
1984 
1985 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1986 {
1987 	struct rt6_info *rt;
1988 	struct fib6_table *table;
1989 
1990 	table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1991 	if (!table)
1992 		return NULL;
1993 
1994 	read_lock_bh(&table->tb6_lock);
1995 	for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1996 		if (dev == rt->dst.dev &&
1997 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1998 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
1999 			break;
2000 	}
2001 	if (rt)
2002 		dst_hold(&rt->dst);
2003 	read_unlock_bh(&table->tb6_lock);
2004 	return rt;
2005 }
2006 
2007 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
2008 				     struct net_device *dev,
2009 				     unsigned int pref)
2010 {
2011 	struct fib6_config cfg = {
2012 		.fc_table	= RT6_TABLE_DFLT,
2013 		.fc_metric	= IP6_RT_PRIO_USER,
2014 		.fc_ifindex	= dev->ifindex,
2015 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2016 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
2017 		.fc_nlinfo.portid = 0,
2018 		.fc_nlinfo.nlh = NULL,
2019 		.fc_nlinfo.nl_net = dev_net(dev),
2020 	};
2021 
2022 	cfg.fc_gateway = *gwaddr;
2023 
2024 	ip6_route_add(&cfg);
2025 
2026 	return rt6_get_dflt_router(gwaddr, dev);
2027 }
2028 
2029 void rt6_purge_dflt_routers(struct net *net)
2030 {
2031 	struct rt6_info *rt;
2032 	struct fib6_table *table;
2033 
2034 	/* NOTE: Keep consistent with rt6_get_dflt_router */
2035 	table = fib6_get_table(net, RT6_TABLE_DFLT);
2036 	if (!table)
2037 		return;
2038 
2039 restart:
2040 	read_lock_bh(&table->tb6_lock);
2041 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2042 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2043 		    (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
2044 			dst_hold(&rt->dst);
2045 			read_unlock_bh(&table->tb6_lock);
2046 			ip6_del_rt(rt);
2047 			goto restart;
2048 		}
2049 	}
2050 	read_unlock_bh(&table->tb6_lock);
2051 }
2052 
2053 static void rtmsg_to_fib6_config(struct net *net,
2054 				 struct in6_rtmsg *rtmsg,
2055 				 struct fib6_config *cfg)
2056 {
2057 	memset(cfg, 0, sizeof(*cfg));
2058 
2059 	cfg->fc_table = RT6_TABLE_MAIN;
2060 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2061 	cfg->fc_metric = rtmsg->rtmsg_metric;
2062 	cfg->fc_expires = rtmsg->rtmsg_info;
2063 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2064 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
2065 	cfg->fc_flags = rtmsg->rtmsg_flags;
2066 
2067 	cfg->fc_nlinfo.nl_net = net;
2068 
2069 	cfg->fc_dst = rtmsg->rtmsg_dst;
2070 	cfg->fc_src = rtmsg->rtmsg_src;
2071 	cfg->fc_gateway = rtmsg->rtmsg_gateway;
2072 }
2073 
2074 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2075 {
2076 	struct fib6_config cfg;
2077 	struct in6_rtmsg rtmsg;
2078 	int err;
2079 
2080 	switch(cmd) {
2081 	case SIOCADDRT:		/* Add a route */
2082 	case SIOCDELRT:		/* Delete a route */
2083 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2084 			return -EPERM;
2085 		err = copy_from_user(&rtmsg, arg,
2086 				     sizeof(struct in6_rtmsg));
2087 		if (err)
2088 			return -EFAULT;
2089 
2090 		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2091 
2092 		rtnl_lock();
2093 		switch (cmd) {
2094 		case SIOCADDRT:
2095 			err = ip6_route_add(&cfg);
2096 			break;
2097 		case SIOCDELRT:
2098 			err = ip6_route_del(&cfg);
2099 			break;
2100 		default:
2101 			err = -EINVAL;
2102 		}
2103 		rtnl_unlock();
2104 
2105 		return err;
2106 	}
2107 
2108 	return -EINVAL;
2109 }
2110 
2111 /*
2112  *	Drop the packet on the floor
2113  */
2114 
2115 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2116 {
2117 	int type;
2118 	struct dst_entry *dst = skb_dst(skb);
2119 	switch (ipstats_mib_noroutes) {
2120 	case IPSTATS_MIB_INNOROUTES:
2121 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2122 		if (type == IPV6_ADDR_ANY) {
2123 			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2124 				      IPSTATS_MIB_INADDRERRORS);
2125 			break;
2126 		}
2127 		/* FALLTHROUGH */
2128 	case IPSTATS_MIB_OUTNOROUTES:
2129 		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2130 			      ipstats_mib_noroutes);
2131 		break;
2132 	}
2133 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2134 	kfree_skb(skb);
2135 	return 0;
2136 }
2137 
2138 static int ip6_pkt_discard(struct sk_buff *skb)
2139 {
2140 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2141 }
2142 
2143 static int ip6_pkt_discard_out(struct sk_buff *skb)
2144 {
2145 	skb->dev = skb_dst(skb)->dev;
2146 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2147 }
2148 
2149 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2150 
2151 static int ip6_pkt_prohibit(struct sk_buff *skb)
2152 {
2153 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2154 }
2155 
2156 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2157 {
2158 	skb->dev = skb_dst(skb)->dev;
2159 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2160 }
2161 
2162 #endif
2163 
2164 /*
2165  *	Allocate a dst for local (unicast / anycast) address.
2166  */
2167 
2168 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2169 				    const struct in6_addr *addr,
2170 				    bool anycast)
2171 {
2172 	struct net *net = dev_net(idev->dev);
2173 	struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
2174 
2175 	if (!rt) {
2176 		net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
2177 		return ERR_PTR(-ENOMEM);
2178 	}
2179 
2180 	in6_dev_hold(idev);
2181 
2182 	rt->dst.flags |= DST_HOST;
2183 	rt->dst.input = ip6_input;
2184 	rt->dst.output = ip6_output;
2185 	rt->rt6i_idev = idev;
2186 
2187 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2188 	if (anycast)
2189 		rt->rt6i_flags |= RTF_ANYCAST;
2190 	else
2191 		rt->rt6i_flags |= RTF_LOCAL;
2192 
2193 	rt->rt6i_gateway  = *addr;
2194 	rt->rt6i_dst.addr = *addr;
2195 	rt->rt6i_dst.plen = 128;
2196 	rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2197 
2198 	atomic_set(&rt->dst.__refcnt, 1);
2199 
2200 	return rt;
2201 }
2202 
2203 int ip6_route_get_saddr(struct net *net,
2204 			struct rt6_info *rt,
2205 			const struct in6_addr *daddr,
2206 			unsigned int prefs,
2207 			struct in6_addr *saddr)
2208 {
2209 	struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2210 	int err = 0;
2211 	if (rt->rt6i_prefsrc.plen)
2212 		*saddr = rt->rt6i_prefsrc.addr;
2213 	else
2214 		err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2215 					 daddr, prefs, saddr);
2216 	return err;
2217 }
2218 
2219 /* remove deleted ip from prefsrc entries */
2220 struct arg_dev_net_ip {
2221 	struct net_device *dev;
2222 	struct net *net;
2223 	struct in6_addr *addr;
2224 };
2225 
2226 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2227 {
2228 	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2229 	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2230 	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2231 
2232 	if (((void *)rt->dst.dev == dev || !dev) &&
2233 	    rt != net->ipv6.ip6_null_entry &&
2234 	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2235 		/* remove prefsrc entry */
2236 		rt->rt6i_prefsrc.plen = 0;
2237 	}
2238 	return 0;
2239 }
2240 
2241 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2242 {
2243 	struct net *net = dev_net(ifp->idev->dev);
2244 	struct arg_dev_net_ip adni = {
2245 		.dev = ifp->idev->dev,
2246 		.net = net,
2247 		.addr = &ifp->addr,
2248 	};
2249 	fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2250 }
2251 
2252 struct arg_dev_net {
2253 	struct net_device *dev;
2254 	struct net *net;
2255 };
2256 
2257 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2258 {
2259 	const struct arg_dev_net *adn = arg;
2260 	const struct net_device *dev = adn->dev;
2261 
2262 	if ((rt->dst.dev == dev || !dev) &&
2263 	    rt != adn->net->ipv6.ip6_null_entry)
2264 		return -1;
2265 
2266 	return 0;
2267 }
2268 
2269 void rt6_ifdown(struct net *net, struct net_device *dev)
2270 {
2271 	struct arg_dev_net adn = {
2272 		.dev = dev,
2273 		.net = net,
2274 	};
2275 
2276 	fib6_clean_all(net, fib6_ifdown, 0, &adn);
2277 	icmp6_clean_all(fib6_ifdown, &adn);
2278 }
2279 
2280 struct rt6_mtu_change_arg {
2281 	struct net_device *dev;
2282 	unsigned int mtu;
2283 };
2284 
2285 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2286 {
2287 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2288 	struct inet6_dev *idev;
2289 
2290 	/* In IPv6 pmtu discovery is not optional,
2291 	   so that RTAX_MTU lock cannot disable it.
2292 	   We still use this lock to block changes
2293 	   caused by addrconf/ndisc.
2294 	*/
2295 
2296 	idev = __in6_dev_get(arg->dev);
2297 	if (!idev)
2298 		return 0;
2299 
2300 	/* For administrative MTU increase, there is no way to discover
2301 	   IPv6 PMTU increase, so PMTU increase should be updated here.
2302 	   Since RFC 1981 doesn't include administrative MTU increase
2303 	   update PMTU increase is a MUST. (i.e. jumbo frame)
2304 	 */
2305 	/*
2306 	   If new MTU is less than route PMTU, this new MTU will be the
2307 	   lowest MTU in the path, update the route PMTU to reflect PMTU
2308 	   decreases; if new MTU is greater than route PMTU, and the
2309 	   old MTU is the lowest MTU in the path, update the route PMTU
2310 	   to reflect the increase. In this case if the other nodes' MTU
2311 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
2312 	   PMTU discouvery.
2313 	 */
2314 	if (rt->dst.dev == arg->dev &&
2315 	    !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2316 	    (dst_mtu(&rt->dst) >= arg->mtu ||
2317 	     (dst_mtu(&rt->dst) < arg->mtu &&
2318 	      dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2319 		dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2320 	}
2321 	return 0;
2322 }
2323 
2324 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2325 {
2326 	struct rt6_mtu_change_arg arg = {
2327 		.dev = dev,
2328 		.mtu = mtu,
2329 	};
2330 
2331 	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2332 }
2333 
2334 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2335 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2336 	[RTA_OIF]               = { .type = NLA_U32 },
2337 	[RTA_IIF]		= { .type = NLA_U32 },
2338 	[RTA_PRIORITY]          = { .type = NLA_U32 },
2339 	[RTA_METRICS]           = { .type = NLA_NESTED },
2340 	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
2341 };
2342 
2343 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2344 			      struct fib6_config *cfg)
2345 {
2346 	struct rtmsg *rtm;
2347 	struct nlattr *tb[RTA_MAX+1];
2348 	int err;
2349 
2350 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2351 	if (err < 0)
2352 		goto errout;
2353 
2354 	err = -EINVAL;
2355 	rtm = nlmsg_data(nlh);
2356 	memset(cfg, 0, sizeof(*cfg));
2357 
2358 	cfg->fc_table = rtm->rtm_table;
2359 	cfg->fc_dst_len = rtm->rtm_dst_len;
2360 	cfg->fc_src_len = rtm->rtm_src_len;
2361 	cfg->fc_flags = RTF_UP;
2362 	cfg->fc_protocol = rtm->rtm_protocol;
2363 	cfg->fc_type = rtm->rtm_type;
2364 
2365 	if (rtm->rtm_type == RTN_UNREACHABLE ||
2366 	    rtm->rtm_type == RTN_BLACKHOLE ||
2367 	    rtm->rtm_type == RTN_PROHIBIT ||
2368 	    rtm->rtm_type == RTN_THROW)
2369 		cfg->fc_flags |= RTF_REJECT;
2370 
2371 	if (rtm->rtm_type == RTN_LOCAL)
2372 		cfg->fc_flags |= RTF_LOCAL;
2373 
2374 	cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2375 	cfg->fc_nlinfo.nlh = nlh;
2376 	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2377 
2378 	if (tb[RTA_GATEWAY]) {
2379 		nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2380 		cfg->fc_flags |= RTF_GATEWAY;
2381 	}
2382 
2383 	if (tb[RTA_DST]) {
2384 		int plen = (rtm->rtm_dst_len + 7) >> 3;
2385 
2386 		if (nla_len(tb[RTA_DST]) < plen)
2387 			goto errout;
2388 
2389 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2390 	}
2391 
2392 	if (tb[RTA_SRC]) {
2393 		int plen = (rtm->rtm_src_len + 7) >> 3;
2394 
2395 		if (nla_len(tb[RTA_SRC]) < plen)
2396 			goto errout;
2397 
2398 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2399 	}
2400 
2401 	if (tb[RTA_PREFSRC])
2402 		nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2403 
2404 	if (tb[RTA_OIF])
2405 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2406 
2407 	if (tb[RTA_PRIORITY])
2408 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2409 
2410 	if (tb[RTA_METRICS]) {
2411 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2412 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2413 	}
2414 
2415 	if (tb[RTA_TABLE])
2416 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2417 
2418 	if (tb[RTA_MULTIPATH]) {
2419 		cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2420 		cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2421 	}
2422 
2423 	err = 0;
2424 errout:
2425 	return err;
2426 }
2427 
2428 static int ip6_route_multipath(struct fib6_config *cfg, int add)
2429 {
2430 	struct fib6_config r_cfg;
2431 	struct rtnexthop *rtnh;
2432 	int remaining;
2433 	int attrlen;
2434 	int err = 0, last_err = 0;
2435 
2436 beginning:
2437 	rtnh = (struct rtnexthop *)cfg->fc_mp;
2438 	remaining = cfg->fc_mp_len;
2439 
2440 	/* Parse a Multipath Entry */
2441 	while (rtnh_ok(rtnh, remaining)) {
2442 		memcpy(&r_cfg, cfg, sizeof(*cfg));
2443 		if (rtnh->rtnh_ifindex)
2444 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2445 
2446 		attrlen = rtnh_attrlen(rtnh);
2447 		if (attrlen > 0) {
2448 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2449 
2450 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2451 			if (nla) {
2452 				nla_memcpy(&r_cfg.fc_gateway, nla, 16);
2453 				r_cfg.fc_flags |= RTF_GATEWAY;
2454 			}
2455 		}
2456 		err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
2457 		if (err) {
2458 			last_err = err;
2459 			/* If we are trying to remove a route, do not stop the
2460 			 * loop when ip6_route_del() fails (because next hop is
2461 			 * already gone), we should try to remove all next hops.
2462 			 */
2463 			if (add) {
2464 				/* If add fails, we should try to delete all
2465 				 * next hops that have been already added.
2466 				 */
2467 				add = 0;
2468 				goto beginning;
2469 			}
2470 		}
2471 		/* Because each route is added like a single route we remove
2472 		 * this flag after the first nexthop (if there is a collision,
2473 		 * we have already fail to add the first nexthop:
2474 		 * fib6_add_rt2node() has reject it).
2475 		 */
2476 		cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL;
2477 		rtnh = rtnh_next(rtnh, &remaining);
2478 	}
2479 
2480 	return last_err;
2481 }
2482 
2483 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh)
2484 {
2485 	struct fib6_config cfg;
2486 	int err;
2487 
2488 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2489 	if (err < 0)
2490 		return err;
2491 
2492 	if (cfg.fc_mp)
2493 		return ip6_route_multipath(&cfg, 0);
2494 	else
2495 		return ip6_route_del(&cfg);
2496 }
2497 
2498 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh)
2499 {
2500 	struct fib6_config cfg;
2501 	int err;
2502 
2503 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2504 	if (err < 0)
2505 		return err;
2506 
2507 	if (cfg.fc_mp)
2508 		return ip6_route_multipath(&cfg, 1);
2509 	else
2510 		return ip6_route_add(&cfg);
2511 }
2512 
2513 static inline size_t rt6_nlmsg_size(void)
2514 {
2515 	return NLMSG_ALIGN(sizeof(struct rtmsg))
2516 	       + nla_total_size(16) /* RTA_SRC */
2517 	       + nla_total_size(16) /* RTA_DST */
2518 	       + nla_total_size(16) /* RTA_GATEWAY */
2519 	       + nla_total_size(16) /* RTA_PREFSRC */
2520 	       + nla_total_size(4) /* RTA_TABLE */
2521 	       + nla_total_size(4) /* RTA_IIF */
2522 	       + nla_total_size(4) /* RTA_OIF */
2523 	       + nla_total_size(4) /* RTA_PRIORITY */
2524 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2525 	       + nla_total_size(sizeof(struct rta_cacheinfo));
2526 }
2527 
2528 static int rt6_fill_node(struct net *net,
2529 			 struct sk_buff *skb, struct rt6_info *rt,
2530 			 struct in6_addr *dst, struct in6_addr *src,
2531 			 int iif, int type, u32 portid, u32 seq,
2532 			 int prefix, int nowait, unsigned int flags)
2533 {
2534 	struct rtmsg *rtm;
2535 	struct nlmsghdr *nlh;
2536 	long expires;
2537 	u32 table;
2538 
2539 	if (prefix) {	/* user wants prefix routes only */
2540 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2541 			/* success since this is not a prefix route */
2542 			return 1;
2543 		}
2544 	}
2545 
2546 	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
2547 	if (!nlh)
2548 		return -EMSGSIZE;
2549 
2550 	rtm = nlmsg_data(nlh);
2551 	rtm->rtm_family = AF_INET6;
2552 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
2553 	rtm->rtm_src_len = rt->rt6i_src.plen;
2554 	rtm->rtm_tos = 0;
2555 	if (rt->rt6i_table)
2556 		table = rt->rt6i_table->tb6_id;
2557 	else
2558 		table = RT6_TABLE_UNSPEC;
2559 	rtm->rtm_table = table;
2560 	if (nla_put_u32(skb, RTA_TABLE, table))
2561 		goto nla_put_failure;
2562 	if (rt->rt6i_flags & RTF_REJECT) {
2563 		switch (rt->dst.error) {
2564 		case -EINVAL:
2565 			rtm->rtm_type = RTN_BLACKHOLE;
2566 			break;
2567 		case -EACCES:
2568 			rtm->rtm_type = RTN_PROHIBIT;
2569 			break;
2570 		case -EAGAIN:
2571 			rtm->rtm_type = RTN_THROW;
2572 			break;
2573 		default:
2574 			rtm->rtm_type = RTN_UNREACHABLE;
2575 			break;
2576 		}
2577 	}
2578 	else if (rt->rt6i_flags & RTF_LOCAL)
2579 		rtm->rtm_type = RTN_LOCAL;
2580 	else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2581 		rtm->rtm_type = RTN_LOCAL;
2582 	else
2583 		rtm->rtm_type = RTN_UNICAST;
2584 	rtm->rtm_flags = 0;
2585 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2586 	rtm->rtm_protocol = rt->rt6i_protocol;
2587 	if (rt->rt6i_flags & RTF_DYNAMIC)
2588 		rtm->rtm_protocol = RTPROT_REDIRECT;
2589 	else if (rt->rt6i_flags & RTF_ADDRCONF) {
2590 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2591 			rtm->rtm_protocol = RTPROT_RA;
2592 		else
2593 			rtm->rtm_protocol = RTPROT_KERNEL;
2594 	}
2595 
2596 	if (rt->rt6i_flags & RTF_CACHE)
2597 		rtm->rtm_flags |= RTM_F_CLONED;
2598 
2599 	if (dst) {
2600 		if (nla_put(skb, RTA_DST, 16, dst))
2601 			goto nla_put_failure;
2602 		rtm->rtm_dst_len = 128;
2603 	} else if (rtm->rtm_dst_len)
2604 		if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2605 			goto nla_put_failure;
2606 #ifdef CONFIG_IPV6_SUBTREES
2607 	if (src) {
2608 		if (nla_put(skb, RTA_SRC, 16, src))
2609 			goto nla_put_failure;
2610 		rtm->rtm_src_len = 128;
2611 	} else if (rtm->rtm_src_len &&
2612 		   nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2613 		goto nla_put_failure;
2614 #endif
2615 	if (iif) {
2616 #ifdef CONFIG_IPV6_MROUTE
2617 		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2618 			int err = ip6mr_get_route(net, skb, rtm, nowait);
2619 			if (err <= 0) {
2620 				if (!nowait) {
2621 					if (err == 0)
2622 						return 0;
2623 					goto nla_put_failure;
2624 				} else {
2625 					if (err == -EMSGSIZE)
2626 						goto nla_put_failure;
2627 				}
2628 			}
2629 		} else
2630 #endif
2631 			if (nla_put_u32(skb, RTA_IIF, iif))
2632 				goto nla_put_failure;
2633 	} else if (dst) {
2634 		struct in6_addr saddr_buf;
2635 		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2636 		    nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2637 			goto nla_put_failure;
2638 	}
2639 
2640 	if (rt->rt6i_prefsrc.plen) {
2641 		struct in6_addr saddr_buf;
2642 		saddr_buf = rt->rt6i_prefsrc.addr;
2643 		if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2644 			goto nla_put_failure;
2645 	}
2646 
2647 	if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2648 		goto nla_put_failure;
2649 
2650 	if (rt->rt6i_flags & RTF_GATEWAY) {
2651 		if (nla_put(skb, RTA_GATEWAY, 16, &rt->rt6i_gateway) < 0)
2652 			goto nla_put_failure;
2653 	}
2654 
2655 	if (rt->dst.dev &&
2656 	    nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2657 		goto nla_put_failure;
2658 	if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2659 		goto nla_put_failure;
2660 
2661 	expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2662 
2663 	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2664 		goto nla_put_failure;
2665 
2666 	return nlmsg_end(skb, nlh);
2667 
2668 nla_put_failure:
2669 	nlmsg_cancel(skb, nlh);
2670 	return -EMSGSIZE;
2671 }
2672 
2673 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2674 {
2675 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2676 	int prefix;
2677 
2678 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2679 		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2680 		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2681 	} else
2682 		prefix = 0;
2683 
2684 	return rt6_fill_node(arg->net,
2685 		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2686 		     NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
2687 		     prefix, 0, NLM_F_MULTI);
2688 }
2689 
2690 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh)
2691 {
2692 	struct net *net = sock_net(in_skb->sk);
2693 	struct nlattr *tb[RTA_MAX+1];
2694 	struct rt6_info *rt;
2695 	struct sk_buff *skb;
2696 	struct rtmsg *rtm;
2697 	struct flowi6 fl6;
2698 	int err, iif = 0, oif = 0;
2699 
2700 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2701 	if (err < 0)
2702 		goto errout;
2703 
2704 	err = -EINVAL;
2705 	memset(&fl6, 0, sizeof(fl6));
2706 
2707 	if (tb[RTA_SRC]) {
2708 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2709 			goto errout;
2710 
2711 		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2712 	}
2713 
2714 	if (tb[RTA_DST]) {
2715 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2716 			goto errout;
2717 
2718 		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2719 	}
2720 
2721 	if (tb[RTA_IIF])
2722 		iif = nla_get_u32(tb[RTA_IIF]);
2723 
2724 	if (tb[RTA_OIF])
2725 		oif = nla_get_u32(tb[RTA_OIF]);
2726 
2727 	if (iif) {
2728 		struct net_device *dev;
2729 		int flags = 0;
2730 
2731 		dev = __dev_get_by_index(net, iif);
2732 		if (!dev) {
2733 			err = -ENODEV;
2734 			goto errout;
2735 		}
2736 
2737 		fl6.flowi6_iif = iif;
2738 
2739 		if (!ipv6_addr_any(&fl6.saddr))
2740 			flags |= RT6_LOOKUP_F_HAS_SADDR;
2741 
2742 		rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2743 							       flags);
2744 	} else {
2745 		fl6.flowi6_oif = oif;
2746 
2747 		rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2748 	}
2749 
2750 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2751 	if (!skb) {
2752 		ip6_rt_put(rt);
2753 		err = -ENOBUFS;
2754 		goto errout;
2755 	}
2756 
2757 	/* Reserve room for dummy headers, this skb can pass
2758 	   through good chunk of routing engine.
2759 	 */
2760 	skb_reset_mac_header(skb);
2761 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2762 
2763 	skb_dst_set(skb, &rt->dst);
2764 
2765 	err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2766 			    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
2767 			    nlh->nlmsg_seq, 0, 0, 0);
2768 	if (err < 0) {
2769 		kfree_skb(skb);
2770 		goto errout;
2771 	}
2772 
2773 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2774 errout:
2775 	return err;
2776 }
2777 
2778 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2779 {
2780 	struct sk_buff *skb;
2781 	struct net *net = info->nl_net;
2782 	u32 seq;
2783 	int err;
2784 
2785 	err = -ENOBUFS;
2786 	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2787 
2788 	skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2789 	if (!skb)
2790 		goto errout;
2791 
2792 	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2793 				event, info->portid, seq, 0, 0, 0);
2794 	if (err < 0) {
2795 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2796 		WARN_ON(err == -EMSGSIZE);
2797 		kfree_skb(skb);
2798 		goto errout;
2799 	}
2800 	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2801 		    info->nlh, gfp_any());
2802 	return;
2803 errout:
2804 	if (err < 0)
2805 		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2806 }
2807 
2808 static int ip6_route_dev_notify(struct notifier_block *this,
2809 				unsigned long event, void *ptr)
2810 {
2811 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
2812 	struct net *net = dev_net(dev);
2813 
2814 	if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2815 		net->ipv6.ip6_null_entry->dst.dev = dev;
2816 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2817 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2818 		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2819 		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2820 		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2821 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2822 #endif
2823 	}
2824 
2825 	return NOTIFY_OK;
2826 }
2827 
2828 /*
2829  *	/proc
2830  */
2831 
2832 #ifdef CONFIG_PROC_FS
2833 
2834 struct rt6_proc_arg
2835 {
2836 	char *buffer;
2837 	int offset;
2838 	int length;
2839 	int skip;
2840 	int len;
2841 };
2842 
2843 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2844 {
2845 	struct seq_file *m = p_arg;
2846 
2847 	seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2848 
2849 #ifdef CONFIG_IPV6_SUBTREES
2850 	seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2851 #else
2852 	seq_puts(m, "00000000000000000000000000000000 00 ");
2853 #endif
2854 	if (rt->rt6i_flags & RTF_GATEWAY) {
2855 		seq_printf(m, "%pi6", &rt->rt6i_gateway);
2856 	} else {
2857 		seq_puts(m, "00000000000000000000000000000000");
2858 	}
2859 	seq_printf(m, " %08x %08x %08x %08x %8s\n",
2860 		   rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2861 		   rt->dst.__use, rt->rt6i_flags,
2862 		   rt->dst.dev ? rt->dst.dev->name : "");
2863 	return 0;
2864 }
2865 
2866 static int ipv6_route_show(struct seq_file *m, void *v)
2867 {
2868 	struct net *net = (struct net *)m->private;
2869 	fib6_clean_all_ro(net, rt6_info_route, 0, m);
2870 	return 0;
2871 }
2872 
2873 static int ipv6_route_open(struct inode *inode, struct file *file)
2874 {
2875 	return single_open_net(inode, file, ipv6_route_show);
2876 }
2877 
2878 static const struct file_operations ipv6_route_proc_fops = {
2879 	.owner		= THIS_MODULE,
2880 	.open		= ipv6_route_open,
2881 	.read		= seq_read,
2882 	.llseek		= seq_lseek,
2883 	.release	= single_release_net,
2884 };
2885 
2886 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2887 {
2888 	struct net *net = (struct net *)seq->private;
2889 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2890 		   net->ipv6.rt6_stats->fib_nodes,
2891 		   net->ipv6.rt6_stats->fib_route_nodes,
2892 		   net->ipv6.rt6_stats->fib_rt_alloc,
2893 		   net->ipv6.rt6_stats->fib_rt_entries,
2894 		   net->ipv6.rt6_stats->fib_rt_cache,
2895 		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2896 		   net->ipv6.rt6_stats->fib_discarded_routes);
2897 
2898 	return 0;
2899 }
2900 
2901 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2902 {
2903 	return single_open_net(inode, file, rt6_stats_seq_show);
2904 }
2905 
2906 static const struct file_operations rt6_stats_seq_fops = {
2907 	.owner	 = THIS_MODULE,
2908 	.open	 = rt6_stats_seq_open,
2909 	.read	 = seq_read,
2910 	.llseek	 = seq_lseek,
2911 	.release = single_release_net,
2912 };
2913 #endif	/* CONFIG_PROC_FS */
2914 
2915 #ifdef CONFIG_SYSCTL
2916 
2917 static
2918 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
2919 			      void __user *buffer, size_t *lenp, loff_t *ppos)
2920 {
2921 	struct net *net;
2922 	int delay;
2923 	if (!write)
2924 		return -EINVAL;
2925 
2926 	net = (struct net *)ctl->extra1;
2927 	delay = net->ipv6.sysctl.flush_delay;
2928 	proc_dointvec(ctl, write, buffer, lenp, ppos);
2929 	fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
2930 	return 0;
2931 }
2932 
2933 struct ctl_table ipv6_route_table_template[] = {
2934 	{
2935 		.procname	=	"flush",
2936 		.data		=	&init_net.ipv6.sysctl.flush_delay,
2937 		.maxlen		=	sizeof(int),
2938 		.mode		=	0200,
2939 		.proc_handler	=	ipv6_sysctl_rtcache_flush
2940 	},
2941 	{
2942 		.procname	=	"gc_thresh",
2943 		.data		=	&ip6_dst_ops_template.gc_thresh,
2944 		.maxlen		=	sizeof(int),
2945 		.mode		=	0644,
2946 		.proc_handler	=	proc_dointvec,
2947 	},
2948 	{
2949 		.procname	=	"max_size",
2950 		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
2951 		.maxlen		=	sizeof(int),
2952 		.mode		=	0644,
2953 		.proc_handler	=	proc_dointvec,
2954 	},
2955 	{
2956 		.procname	=	"gc_min_interval",
2957 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2958 		.maxlen		=	sizeof(int),
2959 		.mode		=	0644,
2960 		.proc_handler	=	proc_dointvec_jiffies,
2961 	},
2962 	{
2963 		.procname	=	"gc_timeout",
2964 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2965 		.maxlen		=	sizeof(int),
2966 		.mode		=	0644,
2967 		.proc_handler	=	proc_dointvec_jiffies,
2968 	},
2969 	{
2970 		.procname	=	"gc_interval",
2971 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
2972 		.maxlen		=	sizeof(int),
2973 		.mode		=	0644,
2974 		.proc_handler	=	proc_dointvec_jiffies,
2975 	},
2976 	{
2977 		.procname	=	"gc_elasticity",
2978 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2979 		.maxlen		=	sizeof(int),
2980 		.mode		=	0644,
2981 		.proc_handler	=	proc_dointvec,
2982 	},
2983 	{
2984 		.procname	=	"mtu_expires",
2985 		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2986 		.maxlen		=	sizeof(int),
2987 		.mode		=	0644,
2988 		.proc_handler	=	proc_dointvec_jiffies,
2989 	},
2990 	{
2991 		.procname	=	"min_adv_mss",
2992 		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
2993 		.maxlen		=	sizeof(int),
2994 		.mode		=	0644,
2995 		.proc_handler	=	proc_dointvec,
2996 	},
2997 	{
2998 		.procname	=	"gc_min_interval_ms",
2999 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3000 		.maxlen		=	sizeof(int),
3001 		.mode		=	0644,
3002 		.proc_handler	=	proc_dointvec_ms_jiffies,
3003 	},
3004 	{ }
3005 };
3006 
3007 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
3008 {
3009 	struct ctl_table *table;
3010 
3011 	table = kmemdup(ipv6_route_table_template,
3012 			sizeof(ipv6_route_table_template),
3013 			GFP_KERNEL);
3014 
3015 	if (table) {
3016 		table[0].data = &net->ipv6.sysctl.flush_delay;
3017 		table[0].extra1 = net;
3018 		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
3019 		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3020 		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3021 		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3022 		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3023 		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3024 		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3025 		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
3026 		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3027 
3028 		/* Don't export sysctls to unprivileged users */
3029 		if (net->user_ns != &init_user_ns)
3030 			table[0].procname = NULL;
3031 	}
3032 
3033 	return table;
3034 }
3035 #endif
3036 
3037 static int __net_init ip6_route_net_init(struct net *net)
3038 {
3039 	int ret = -ENOMEM;
3040 
3041 	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3042 	       sizeof(net->ipv6.ip6_dst_ops));
3043 
3044 	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3045 		goto out_ip6_dst_ops;
3046 
3047 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3048 					   sizeof(*net->ipv6.ip6_null_entry),
3049 					   GFP_KERNEL);
3050 	if (!net->ipv6.ip6_null_entry)
3051 		goto out_ip6_dst_entries;
3052 	net->ipv6.ip6_null_entry->dst.path =
3053 		(struct dst_entry *)net->ipv6.ip6_null_entry;
3054 	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3055 	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3056 			 ip6_template_metrics, true);
3057 
3058 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3059 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3060 					       sizeof(*net->ipv6.ip6_prohibit_entry),
3061 					       GFP_KERNEL);
3062 	if (!net->ipv6.ip6_prohibit_entry)
3063 		goto out_ip6_null_entry;
3064 	net->ipv6.ip6_prohibit_entry->dst.path =
3065 		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3066 	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3067 	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3068 			 ip6_template_metrics, true);
3069 
3070 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3071 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
3072 					       GFP_KERNEL);
3073 	if (!net->ipv6.ip6_blk_hole_entry)
3074 		goto out_ip6_prohibit_entry;
3075 	net->ipv6.ip6_blk_hole_entry->dst.path =
3076 		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3077 	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3078 	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3079 			 ip6_template_metrics, true);
3080 #endif
3081 
3082 	net->ipv6.sysctl.flush_delay = 0;
3083 	net->ipv6.sysctl.ip6_rt_max_size = 4096;
3084 	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3085 	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3086 	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3087 	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3088 	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3089 	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3090 
3091 	net->ipv6.ip6_rt_gc_expire = 30*HZ;
3092 
3093 	ret = 0;
3094 out:
3095 	return ret;
3096 
3097 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3098 out_ip6_prohibit_entry:
3099 	kfree(net->ipv6.ip6_prohibit_entry);
3100 out_ip6_null_entry:
3101 	kfree(net->ipv6.ip6_null_entry);
3102 #endif
3103 out_ip6_dst_entries:
3104 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3105 out_ip6_dst_ops:
3106 	goto out;
3107 }
3108 
3109 static void __net_exit ip6_route_net_exit(struct net *net)
3110 {
3111 	kfree(net->ipv6.ip6_null_entry);
3112 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3113 	kfree(net->ipv6.ip6_prohibit_entry);
3114 	kfree(net->ipv6.ip6_blk_hole_entry);
3115 #endif
3116 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3117 }
3118 
3119 static int __net_init ip6_route_net_init_late(struct net *net)
3120 {
3121 #ifdef CONFIG_PROC_FS
3122 	proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3123 	proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3124 #endif
3125 	return 0;
3126 }
3127 
3128 static void __net_exit ip6_route_net_exit_late(struct net *net)
3129 {
3130 #ifdef CONFIG_PROC_FS
3131 	remove_proc_entry("ipv6_route", net->proc_net);
3132 	remove_proc_entry("rt6_stats", net->proc_net);
3133 #endif
3134 }
3135 
3136 static struct pernet_operations ip6_route_net_ops = {
3137 	.init = ip6_route_net_init,
3138 	.exit = ip6_route_net_exit,
3139 };
3140 
3141 static int __net_init ipv6_inetpeer_init(struct net *net)
3142 {
3143 	struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3144 
3145 	if (!bp)
3146 		return -ENOMEM;
3147 	inet_peer_base_init(bp);
3148 	net->ipv6.peers = bp;
3149 	return 0;
3150 }
3151 
3152 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3153 {
3154 	struct inet_peer_base *bp = net->ipv6.peers;
3155 
3156 	net->ipv6.peers = NULL;
3157 	inetpeer_invalidate_tree(bp);
3158 	kfree(bp);
3159 }
3160 
3161 static struct pernet_operations ipv6_inetpeer_ops = {
3162 	.init	=	ipv6_inetpeer_init,
3163 	.exit	=	ipv6_inetpeer_exit,
3164 };
3165 
3166 static struct pernet_operations ip6_route_net_late_ops = {
3167 	.init = ip6_route_net_init_late,
3168 	.exit = ip6_route_net_exit_late,
3169 };
3170 
3171 static struct notifier_block ip6_route_dev_notifier = {
3172 	.notifier_call = ip6_route_dev_notify,
3173 	.priority = 0,
3174 };
3175 
3176 int __init ip6_route_init(void)
3177 {
3178 	int ret;
3179 
3180 	ret = -ENOMEM;
3181 	ip6_dst_ops_template.kmem_cachep =
3182 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3183 				  SLAB_HWCACHE_ALIGN, NULL);
3184 	if (!ip6_dst_ops_template.kmem_cachep)
3185 		goto out;
3186 
3187 	ret = dst_entries_init(&ip6_dst_blackhole_ops);
3188 	if (ret)
3189 		goto out_kmem_cache;
3190 
3191 	ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3192 	if (ret)
3193 		goto out_dst_entries;
3194 
3195 	ret = register_pernet_subsys(&ip6_route_net_ops);
3196 	if (ret)
3197 		goto out_register_inetpeer;
3198 
3199 	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3200 
3201 	/* Registering of the loopback is done before this portion of code,
3202 	 * the loopback reference in rt6_info will not be taken, do it
3203 	 * manually for init_net */
3204 	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3205 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3206   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3207 	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3208 	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3209 	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3210 	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3211   #endif
3212 	ret = fib6_init();
3213 	if (ret)
3214 		goto out_register_subsys;
3215 
3216 	ret = xfrm6_init();
3217 	if (ret)
3218 		goto out_fib6_init;
3219 
3220 	ret = fib6_rules_init();
3221 	if (ret)
3222 		goto xfrm6_init;
3223 
3224 	ret = register_pernet_subsys(&ip6_route_net_late_ops);
3225 	if (ret)
3226 		goto fib6_rules_init;
3227 
3228 	ret = -ENOBUFS;
3229 	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3230 	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3231 	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3232 		goto out_register_late_subsys;
3233 
3234 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3235 	if (ret)
3236 		goto out_register_late_subsys;
3237 
3238 out:
3239 	return ret;
3240 
3241 out_register_late_subsys:
3242 	unregister_pernet_subsys(&ip6_route_net_late_ops);
3243 fib6_rules_init:
3244 	fib6_rules_cleanup();
3245 xfrm6_init:
3246 	xfrm6_fini();
3247 out_fib6_init:
3248 	fib6_gc_cleanup();
3249 out_register_subsys:
3250 	unregister_pernet_subsys(&ip6_route_net_ops);
3251 out_register_inetpeer:
3252 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3253 out_dst_entries:
3254 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3255 out_kmem_cache:
3256 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3257 	goto out;
3258 }
3259 
3260 void ip6_route_cleanup(void)
3261 {
3262 	unregister_netdevice_notifier(&ip6_route_dev_notifier);
3263 	unregister_pernet_subsys(&ip6_route_net_late_ops);
3264 	fib6_rules_cleanup();
3265 	xfrm6_fini();
3266 	fib6_gc_cleanup();
3267 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3268 	unregister_pernet_subsys(&ip6_route_net_ops);
3269 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3270 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3271 }
3272