xref: /openbmc/linux/net/ipv6/route.c (revision 034f90b3)
1 /*
2  *	Linux INET6 implementation
3  *	FIB front-end.
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13 
14 /*	Changes:
15  *
16  *	YOSHIFUJI Hideaki @USAGI
17  *		reworked default router selection.
18  *		- respect outgoing interface
19  *		- select from (probably) reachable routers (i.e.
20  *		routers in REACHABLE, STALE, DELAY or PROBE states).
21  *		- always select the same router if it is (probably)
22  *		reachable.  otherwise, round-robin the list.
23  *	Ville Nuorvala
24  *		Fixed routing subtrees.
25  */
26 
27 #define pr_fmt(fmt) "IPv6: " fmt
28 
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/xfrm.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60 #include <net/nexthop.h>
61 
62 #include <asm/uaccess.h>
63 
64 #ifdef CONFIG_SYSCTL
65 #include <linux/sysctl.h>
66 #endif
67 
68 enum rt6_nud_state {
69 	RT6_NUD_FAIL_HARD = -3,
70 	RT6_NUD_FAIL_PROBE = -2,
71 	RT6_NUD_FAIL_DO_RR = -1,
72 	RT6_NUD_SUCCEED = 1
73 };
74 
75 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
76 				    const struct in6_addr *dest);
77 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
78 static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
79 static unsigned int	 ip6_mtu(const struct dst_entry *dst);
80 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
81 static void		ip6_dst_destroy(struct dst_entry *);
82 static void		ip6_dst_ifdown(struct dst_entry *,
83 				       struct net_device *dev, int how);
84 static int		 ip6_dst_gc(struct dst_ops *ops);
85 
86 static int		ip6_pkt_discard(struct sk_buff *skb);
87 static int		ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb);
88 static int		ip6_pkt_prohibit(struct sk_buff *skb);
89 static int		ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb);
90 static void		ip6_link_failure(struct sk_buff *skb);
91 static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
92 					   struct sk_buff *skb, u32 mtu);
93 static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
94 					struct sk_buff *skb);
95 static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
96 
97 #ifdef CONFIG_IPV6_ROUTE_INFO
98 static struct rt6_info *rt6_add_route_info(struct net *net,
99 					   const struct in6_addr *prefix, int prefixlen,
100 					   const struct in6_addr *gwaddr, int ifindex,
101 					   unsigned int pref);
102 static struct rt6_info *rt6_get_route_info(struct net *net,
103 					   const struct in6_addr *prefix, int prefixlen,
104 					   const struct in6_addr *gwaddr, int ifindex);
105 #endif
106 
107 static void rt6_bind_peer(struct rt6_info *rt, int create)
108 {
109 	struct inet_peer_base *base;
110 	struct inet_peer *peer;
111 
112 	base = inetpeer_base_ptr(rt->_rt6i_peer);
113 	if (!base)
114 		return;
115 
116 	peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
117 	if (peer) {
118 		if (!rt6_set_peer(rt, peer))
119 			inet_putpeer(peer);
120 	}
121 }
122 
123 static struct inet_peer *__rt6_get_peer(struct rt6_info *rt, int create)
124 {
125 	if (rt6_has_peer(rt))
126 		return rt6_peer_ptr(rt);
127 
128 	rt6_bind_peer(rt, create);
129 	return (rt6_has_peer(rt) ? rt6_peer_ptr(rt) : NULL);
130 }
131 
132 static struct inet_peer *rt6_get_peer_create(struct rt6_info *rt)
133 {
134 	return __rt6_get_peer(rt, 1);
135 }
136 
137 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
138 {
139 	struct rt6_info *rt = (struct rt6_info *) dst;
140 	struct inet_peer *peer;
141 	u32 *p = NULL;
142 
143 	if (!(rt->dst.flags & DST_HOST))
144 		return dst_cow_metrics_generic(dst, old);
145 
146 	peer = rt6_get_peer_create(rt);
147 	if (peer) {
148 		u32 *old_p = __DST_METRICS_PTR(old);
149 		unsigned long prev, new;
150 
151 		p = peer->metrics;
152 		if (inet_metrics_new(peer) ||
153 		    (old & DST_METRICS_FORCE_OVERWRITE))
154 			memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
155 
156 		new = (unsigned long) p;
157 		prev = cmpxchg(&dst->_metrics, old, new);
158 
159 		if (prev != old) {
160 			p = __DST_METRICS_PTR(prev);
161 			if (prev & DST_METRICS_READ_ONLY)
162 				p = NULL;
163 		}
164 	}
165 	return p;
166 }
167 
168 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
169 					     struct sk_buff *skb,
170 					     const void *daddr)
171 {
172 	struct in6_addr *p = &rt->rt6i_gateway;
173 
174 	if (!ipv6_addr_any(p))
175 		return (const void *) p;
176 	else if (skb)
177 		return &ipv6_hdr(skb)->daddr;
178 	return daddr;
179 }
180 
181 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
182 					  struct sk_buff *skb,
183 					  const void *daddr)
184 {
185 	struct rt6_info *rt = (struct rt6_info *) dst;
186 	struct neighbour *n;
187 
188 	daddr = choose_neigh_daddr(rt, skb, daddr);
189 	n = __ipv6_neigh_lookup(dst->dev, daddr);
190 	if (n)
191 		return n;
192 	return neigh_create(&nd_tbl, daddr, dst->dev);
193 }
194 
195 static struct dst_ops ip6_dst_ops_template = {
196 	.family			=	AF_INET6,
197 	.protocol		=	cpu_to_be16(ETH_P_IPV6),
198 	.gc			=	ip6_dst_gc,
199 	.gc_thresh		=	1024,
200 	.check			=	ip6_dst_check,
201 	.default_advmss		=	ip6_default_advmss,
202 	.mtu			=	ip6_mtu,
203 	.cow_metrics		=	ipv6_cow_metrics,
204 	.destroy		=	ip6_dst_destroy,
205 	.ifdown			=	ip6_dst_ifdown,
206 	.negative_advice	=	ip6_negative_advice,
207 	.link_failure		=	ip6_link_failure,
208 	.update_pmtu		=	ip6_rt_update_pmtu,
209 	.redirect		=	rt6_do_redirect,
210 	.local_out		=	__ip6_local_out,
211 	.neigh_lookup		=	ip6_neigh_lookup,
212 };
213 
214 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
215 {
216 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
217 
218 	return mtu ? : dst->dev->mtu;
219 }
220 
221 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
222 					 struct sk_buff *skb, u32 mtu)
223 {
224 }
225 
226 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
227 				      struct sk_buff *skb)
228 {
229 }
230 
231 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
232 					 unsigned long old)
233 {
234 	return NULL;
235 }
236 
237 static struct dst_ops ip6_dst_blackhole_ops = {
238 	.family			=	AF_INET6,
239 	.protocol		=	cpu_to_be16(ETH_P_IPV6),
240 	.destroy		=	ip6_dst_destroy,
241 	.check			=	ip6_dst_check,
242 	.mtu			=	ip6_blackhole_mtu,
243 	.default_advmss		=	ip6_default_advmss,
244 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
245 	.redirect		=	ip6_rt_blackhole_redirect,
246 	.cow_metrics		=	ip6_rt_blackhole_cow_metrics,
247 	.neigh_lookup		=	ip6_neigh_lookup,
248 };
249 
250 static const u32 ip6_template_metrics[RTAX_MAX] = {
251 	[RTAX_HOPLIMIT - 1] = 0,
252 };
253 
254 static const struct rt6_info ip6_null_entry_template = {
255 	.dst = {
256 		.__refcnt	= ATOMIC_INIT(1),
257 		.__use		= 1,
258 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
259 		.error		= -ENETUNREACH,
260 		.input		= ip6_pkt_discard,
261 		.output		= ip6_pkt_discard_out,
262 	},
263 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
264 	.rt6i_protocol  = RTPROT_KERNEL,
265 	.rt6i_metric	= ~(u32) 0,
266 	.rt6i_ref	= ATOMIC_INIT(1),
267 };
268 
269 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
270 
271 static const struct rt6_info ip6_prohibit_entry_template = {
272 	.dst = {
273 		.__refcnt	= ATOMIC_INIT(1),
274 		.__use		= 1,
275 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
276 		.error		= -EACCES,
277 		.input		= ip6_pkt_prohibit,
278 		.output		= ip6_pkt_prohibit_out,
279 	},
280 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
281 	.rt6i_protocol  = RTPROT_KERNEL,
282 	.rt6i_metric	= ~(u32) 0,
283 	.rt6i_ref	= ATOMIC_INIT(1),
284 };
285 
286 static const struct rt6_info ip6_blk_hole_entry_template = {
287 	.dst = {
288 		.__refcnt	= ATOMIC_INIT(1),
289 		.__use		= 1,
290 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
291 		.error		= -EINVAL,
292 		.input		= dst_discard,
293 		.output		= dst_discard_sk,
294 	},
295 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
296 	.rt6i_protocol  = RTPROT_KERNEL,
297 	.rt6i_metric	= ~(u32) 0,
298 	.rt6i_ref	= ATOMIC_INIT(1),
299 };
300 
301 #endif
302 
303 /* allocate dst with ip6_dst_ops */
304 static inline struct rt6_info *ip6_dst_alloc(struct net *net,
305 					     struct net_device *dev,
306 					     int flags,
307 					     struct fib6_table *table)
308 {
309 	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
310 					0, DST_OBSOLETE_FORCE_CHK, flags);
311 
312 	if (rt) {
313 		struct dst_entry *dst = &rt->dst;
314 
315 		memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
316 		rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
317 		INIT_LIST_HEAD(&rt->rt6i_siblings);
318 	}
319 	return rt;
320 }
321 
322 static void ip6_dst_destroy(struct dst_entry *dst)
323 {
324 	struct rt6_info *rt = (struct rt6_info *)dst;
325 	struct inet6_dev *idev = rt->rt6i_idev;
326 	struct dst_entry *from = dst->from;
327 
328 	if (!(rt->dst.flags & DST_HOST))
329 		dst_destroy_metrics_generic(dst);
330 
331 	if (idev) {
332 		rt->rt6i_idev = NULL;
333 		in6_dev_put(idev);
334 	}
335 
336 	dst->from = NULL;
337 	dst_release(from);
338 
339 	if (rt6_has_peer(rt)) {
340 		struct inet_peer *peer = rt6_peer_ptr(rt);
341 		inet_putpeer(peer);
342 	}
343 }
344 
345 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
346 			   int how)
347 {
348 	struct rt6_info *rt = (struct rt6_info *)dst;
349 	struct inet6_dev *idev = rt->rt6i_idev;
350 	struct net_device *loopback_dev =
351 		dev_net(dev)->loopback_dev;
352 
353 	if (dev != loopback_dev) {
354 		if (idev && idev->dev == dev) {
355 			struct inet6_dev *loopback_idev =
356 				in6_dev_get(loopback_dev);
357 			if (loopback_idev) {
358 				rt->rt6i_idev = loopback_idev;
359 				in6_dev_put(idev);
360 			}
361 		}
362 	}
363 }
364 
365 static bool rt6_check_expired(const struct rt6_info *rt)
366 {
367 	if (rt->rt6i_flags & RTF_EXPIRES) {
368 		if (time_after(jiffies, rt->dst.expires))
369 			return true;
370 	} else if (rt->dst.from) {
371 		return rt6_check_expired((struct rt6_info *) rt->dst.from);
372 	}
373 	return false;
374 }
375 
376 /* Multipath route selection:
377  *   Hash based function using packet header and flowlabel.
378  * Adapted from fib_info_hashfn()
379  */
380 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
381 			       const struct flowi6 *fl6)
382 {
383 	unsigned int val = fl6->flowi6_proto;
384 
385 	val ^= ipv6_addr_hash(&fl6->daddr);
386 	val ^= ipv6_addr_hash(&fl6->saddr);
387 
388 	/* Work only if this not encapsulated */
389 	switch (fl6->flowi6_proto) {
390 	case IPPROTO_UDP:
391 	case IPPROTO_TCP:
392 	case IPPROTO_SCTP:
393 		val ^= (__force u16)fl6->fl6_sport;
394 		val ^= (__force u16)fl6->fl6_dport;
395 		break;
396 
397 	case IPPROTO_ICMPV6:
398 		val ^= (__force u16)fl6->fl6_icmp_type;
399 		val ^= (__force u16)fl6->fl6_icmp_code;
400 		break;
401 	}
402 	/* RFC6438 recommands to use flowlabel */
403 	val ^= (__force u32)fl6->flowlabel;
404 
405 	/* Perhaps, we need to tune, this function? */
406 	val = val ^ (val >> 7) ^ (val >> 12);
407 	return val % candidate_count;
408 }
409 
410 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
411 					     struct flowi6 *fl6, int oif,
412 					     int strict)
413 {
414 	struct rt6_info *sibling, *next_sibling;
415 	int route_choosen;
416 
417 	route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
418 	/* Don't change the route, if route_choosen == 0
419 	 * (siblings does not include ourself)
420 	 */
421 	if (route_choosen)
422 		list_for_each_entry_safe(sibling, next_sibling,
423 				&match->rt6i_siblings, rt6i_siblings) {
424 			route_choosen--;
425 			if (route_choosen == 0) {
426 				if (rt6_score_route(sibling, oif, strict) < 0)
427 					break;
428 				match = sibling;
429 				break;
430 			}
431 		}
432 	return match;
433 }
434 
435 /*
436  *	Route lookup. Any table->tb6_lock is implied.
437  */
438 
439 static inline struct rt6_info *rt6_device_match(struct net *net,
440 						    struct rt6_info *rt,
441 						    const struct in6_addr *saddr,
442 						    int oif,
443 						    int flags)
444 {
445 	struct rt6_info *local = NULL;
446 	struct rt6_info *sprt;
447 
448 	if (!oif && ipv6_addr_any(saddr))
449 		goto out;
450 
451 	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
452 		struct net_device *dev = sprt->dst.dev;
453 
454 		if (oif) {
455 			if (dev->ifindex == oif)
456 				return sprt;
457 			if (dev->flags & IFF_LOOPBACK) {
458 				if (!sprt->rt6i_idev ||
459 				    sprt->rt6i_idev->dev->ifindex != oif) {
460 					if (flags & RT6_LOOKUP_F_IFACE && oif)
461 						continue;
462 					if (local && (!oif ||
463 						      local->rt6i_idev->dev->ifindex == oif))
464 						continue;
465 				}
466 				local = sprt;
467 			}
468 		} else {
469 			if (ipv6_chk_addr(net, saddr, dev,
470 					  flags & RT6_LOOKUP_F_IFACE))
471 				return sprt;
472 		}
473 	}
474 
475 	if (oif) {
476 		if (local)
477 			return local;
478 
479 		if (flags & RT6_LOOKUP_F_IFACE)
480 			return net->ipv6.ip6_null_entry;
481 	}
482 out:
483 	return rt;
484 }
485 
486 #ifdef CONFIG_IPV6_ROUTER_PREF
487 struct __rt6_probe_work {
488 	struct work_struct work;
489 	struct in6_addr target;
490 	struct net_device *dev;
491 };
492 
493 static void rt6_probe_deferred(struct work_struct *w)
494 {
495 	struct in6_addr mcaddr;
496 	struct __rt6_probe_work *work =
497 		container_of(w, struct __rt6_probe_work, work);
498 
499 	addrconf_addr_solict_mult(&work->target, &mcaddr);
500 	ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL);
501 	dev_put(work->dev);
502 	kfree(work);
503 }
504 
505 static void rt6_probe(struct rt6_info *rt)
506 {
507 	struct neighbour *neigh;
508 	/*
509 	 * Okay, this does not seem to be appropriate
510 	 * for now, however, we need to check if it
511 	 * is really so; aka Router Reachability Probing.
512 	 *
513 	 * Router Reachability Probe MUST be rate-limited
514 	 * to no more than one per minute.
515 	 */
516 	if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
517 		return;
518 	rcu_read_lock_bh();
519 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
520 	if (neigh) {
521 		write_lock(&neigh->lock);
522 		if (neigh->nud_state & NUD_VALID)
523 			goto out;
524 	}
525 
526 	if (!neigh ||
527 	    time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
528 		struct __rt6_probe_work *work;
529 
530 		work = kmalloc(sizeof(*work), GFP_ATOMIC);
531 
532 		if (neigh && work)
533 			__neigh_set_probe_once(neigh);
534 
535 		if (neigh)
536 			write_unlock(&neigh->lock);
537 
538 		if (work) {
539 			INIT_WORK(&work->work, rt6_probe_deferred);
540 			work->target = rt->rt6i_gateway;
541 			dev_hold(rt->dst.dev);
542 			work->dev = rt->dst.dev;
543 			schedule_work(&work->work);
544 		}
545 	} else {
546 out:
547 		write_unlock(&neigh->lock);
548 	}
549 	rcu_read_unlock_bh();
550 }
551 #else
552 static inline void rt6_probe(struct rt6_info *rt)
553 {
554 }
555 #endif
556 
557 /*
558  * Default Router Selection (RFC 2461 6.3.6)
559  */
560 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
561 {
562 	struct net_device *dev = rt->dst.dev;
563 	if (!oif || dev->ifindex == oif)
564 		return 2;
565 	if ((dev->flags & IFF_LOOPBACK) &&
566 	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
567 		return 1;
568 	return 0;
569 }
570 
571 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
572 {
573 	struct neighbour *neigh;
574 	enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
575 
576 	if (rt->rt6i_flags & RTF_NONEXTHOP ||
577 	    !(rt->rt6i_flags & RTF_GATEWAY))
578 		return RT6_NUD_SUCCEED;
579 
580 	rcu_read_lock_bh();
581 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
582 	if (neigh) {
583 		read_lock(&neigh->lock);
584 		if (neigh->nud_state & NUD_VALID)
585 			ret = RT6_NUD_SUCCEED;
586 #ifdef CONFIG_IPV6_ROUTER_PREF
587 		else if (!(neigh->nud_state & NUD_FAILED))
588 			ret = RT6_NUD_SUCCEED;
589 		else
590 			ret = RT6_NUD_FAIL_PROBE;
591 #endif
592 		read_unlock(&neigh->lock);
593 	} else {
594 		ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
595 		      RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
596 	}
597 	rcu_read_unlock_bh();
598 
599 	return ret;
600 }
601 
602 static int rt6_score_route(struct rt6_info *rt, int oif,
603 			   int strict)
604 {
605 	int m;
606 
607 	m = rt6_check_dev(rt, oif);
608 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
609 		return RT6_NUD_FAIL_HARD;
610 #ifdef CONFIG_IPV6_ROUTER_PREF
611 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
612 #endif
613 	if (strict & RT6_LOOKUP_F_REACHABLE) {
614 		int n = rt6_check_neigh(rt);
615 		if (n < 0)
616 			return n;
617 	}
618 	return m;
619 }
620 
621 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
622 				   int *mpri, struct rt6_info *match,
623 				   bool *do_rr)
624 {
625 	int m;
626 	bool match_do_rr = false;
627 
628 	if (rt6_check_expired(rt))
629 		goto out;
630 
631 	m = rt6_score_route(rt, oif, strict);
632 	if (m == RT6_NUD_FAIL_DO_RR) {
633 		match_do_rr = true;
634 		m = 0; /* lowest valid score */
635 	} else if (m == RT6_NUD_FAIL_HARD) {
636 		goto out;
637 	}
638 
639 	if (strict & RT6_LOOKUP_F_REACHABLE)
640 		rt6_probe(rt);
641 
642 	/* note that m can be RT6_NUD_FAIL_PROBE at this point */
643 	if (m > *mpri) {
644 		*do_rr = match_do_rr;
645 		*mpri = m;
646 		match = rt;
647 	}
648 out:
649 	return match;
650 }
651 
652 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
653 				     struct rt6_info *rr_head,
654 				     u32 metric, int oif, int strict,
655 				     bool *do_rr)
656 {
657 	struct rt6_info *rt, *match;
658 	int mpri = -1;
659 
660 	match = NULL;
661 	for (rt = rr_head; rt && rt->rt6i_metric == metric;
662 	     rt = rt->dst.rt6_next)
663 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
664 	for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
665 	     rt = rt->dst.rt6_next)
666 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
667 
668 	return match;
669 }
670 
671 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
672 {
673 	struct rt6_info *match, *rt0;
674 	struct net *net;
675 	bool do_rr = false;
676 
677 	rt0 = fn->rr_ptr;
678 	if (!rt0)
679 		fn->rr_ptr = rt0 = fn->leaf;
680 
681 	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
682 			     &do_rr);
683 
684 	if (do_rr) {
685 		struct rt6_info *next = rt0->dst.rt6_next;
686 
687 		/* no entries matched; do round-robin */
688 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
689 			next = fn->leaf;
690 
691 		if (next != rt0)
692 			fn->rr_ptr = next;
693 	}
694 
695 	net = dev_net(rt0->dst.dev);
696 	return match ? match : net->ipv6.ip6_null_entry;
697 }
698 
699 #ifdef CONFIG_IPV6_ROUTE_INFO
700 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
701 		  const struct in6_addr *gwaddr)
702 {
703 	struct net *net = dev_net(dev);
704 	struct route_info *rinfo = (struct route_info *) opt;
705 	struct in6_addr prefix_buf, *prefix;
706 	unsigned int pref;
707 	unsigned long lifetime;
708 	struct rt6_info *rt;
709 
710 	if (len < sizeof(struct route_info)) {
711 		return -EINVAL;
712 	}
713 
714 	/* Sanity check for prefix_len and length */
715 	if (rinfo->length > 3) {
716 		return -EINVAL;
717 	} else if (rinfo->prefix_len > 128) {
718 		return -EINVAL;
719 	} else if (rinfo->prefix_len > 64) {
720 		if (rinfo->length < 2) {
721 			return -EINVAL;
722 		}
723 	} else if (rinfo->prefix_len > 0) {
724 		if (rinfo->length < 1) {
725 			return -EINVAL;
726 		}
727 	}
728 
729 	pref = rinfo->route_pref;
730 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
731 		return -EINVAL;
732 
733 	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
734 
735 	if (rinfo->length == 3)
736 		prefix = (struct in6_addr *)rinfo->prefix;
737 	else {
738 		/* this function is safe */
739 		ipv6_addr_prefix(&prefix_buf,
740 				 (struct in6_addr *)rinfo->prefix,
741 				 rinfo->prefix_len);
742 		prefix = &prefix_buf;
743 	}
744 
745 	if (rinfo->prefix_len == 0)
746 		rt = rt6_get_dflt_router(gwaddr, dev);
747 	else
748 		rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
749 					gwaddr, dev->ifindex);
750 
751 	if (rt && !lifetime) {
752 		ip6_del_rt(rt);
753 		rt = NULL;
754 	}
755 
756 	if (!rt && lifetime)
757 		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
758 					pref);
759 	else if (rt)
760 		rt->rt6i_flags = RTF_ROUTEINFO |
761 				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
762 
763 	if (rt) {
764 		if (!addrconf_finite_timeout(lifetime))
765 			rt6_clean_expires(rt);
766 		else
767 			rt6_set_expires(rt, jiffies + HZ * lifetime);
768 
769 		ip6_rt_put(rt);
770 	}
771 	return 0;
772 }
773 #endif
774 
775 static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
776 					struct in6_addr *saddr)
777 {
778 	struct fib6_node *pn;
779 	while (1) {
780 		if (fn->fn_flags & RTN_TL_ROOT)
781 			return NULL;
782 		pn = fn->parent;
783 		if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
784 			fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
785 		else
786 			fn = pn;
787 		if (fn->fn_flags & RTN_RTINFO)
788 			return fn;
789 	}
790 }
791 
792 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
793 					     struct fib6_table *table,
794 					     struct flowi6 *fl6, int flags)
795 {
796 	struct fib6_node *fn;
797 	struct rt6_info *rt;
798 
799 	read_lock_bh(&table->tb6_lock);
800 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
801 restart:
802 	rt = fn->leaf;
803 	rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
804 	if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
805 		rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
806 	if (rt == net->ipv6.ip6_null_entry) {
807 		fn = fib6_backtrack(fn, &fl6->saddr);
808 		if (fn)
809 			goto restart;
810 	}
811 	dst_use(&rt->dst, jiffies);
812 	read_unlock_bh(&table->tb6_lock);
813 	return rt;
814 
815 }
816 
817 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
818 				    int flags)
819 {
820 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
821 }
822 EXPORT_SYMBOL_GPL(ip6_route_lookup);
823 
824 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
825 			    const struct in6_addr *saddr, int oif, int strict)
826 {
827 	struct flowi6 fl6 = {
828 		.flowi6_oif = oif,
829 		.daddr = *daddr,
830 	};
831 	struct dst_entry *dst;
832 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
833 
834 	if (saddr) {
835 		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
836 		flags |= RT6_LOOKUP_F_HAS_SADDR;
837 	}
838 
839 	dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
840 	if (dst->error == 0)
841 		return (struct rt6_info *) dst;
842 
843 	dst_release(dst);
844 
845 	return NULL;
846 }
847 EXPORT_SYMBOL(rt6_lookup);
848 
849 /* ip6_ins_rt is called with FREE table->tb6_lock.
850    It takes new route entry, the addition fails by any reason the
851    route is freed. In any case, if caller does not hold it, it may
852    be destroyed.
853  */
854 
855 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
856 			struct mx6_config *mxc)
857 {
858 	int err;
859 	struct fib6_table *table;
860 
861 	table = rt->rt6i_table;
862 	write_lock_bh(&table->tb6_lock);
863 	err = fib6_add(&table->tb6_root, rt, info, mxc);
864 	write_unlock_bh(&table->tb6_lock);
865 
866 	return err;
867 }
868 
869 int ip6_ins_rt(struct rt6_info *rt)
870 {
871 	struct nl_info info = {	.nl_net = dev_net(rt->dst.dev), };
872 	struct mx6_config mxc = { .mx = NULL, };
873 
874 	return __ip6_ins_rt(rt, &info, &mxc);
875 }
876 
877 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
878 				      const struct in6_addr *daddr,
879 				      const struct in6_addr *saddr)
880 {
881 	struct rt6_info *rt;
882 
883 	/*
884 	 *	Clone the route.
885 	 */
886 
887 	rt = ip6_rt_copy(ort, daddr);
888 
889 	if (rt) {
890 		if (ort->rt6i_dst.plen != 128 &&
891 		    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
892 			rt->rt6i_flags |= RTF_ANYCAST;
893 
894 		rt->rt6i_flags |= RTF_CACHE;
895 
896 #ifdef CONFIG_IPV6_SUBTREES
897 		if (rt->rt6i_src.plen && saddr) {
898 			rt->rt6i_src.addr = *saddr;
899 			rt->rt6i_src.plen = 128;
900 		}
901 #endif
902 	}
903 
904 	return rt;
905 }
906 
907 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
908 					const struct in6_addr *daddr)
909 {
910 	struct rt6_info *rt = ip6_rt_copy(ort, daddr);
911 
912 	if (rt)
913 		rt->rt6i_flags |= RTF_CACHE;
914 	return rt;
915 }
916 
917 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
918 				      struct flowi6 *fl6, int flags)
919 {
920 	struct fib6_node *fn, *saved_fn;
921 	struct rt6_info *rt, *nrt;
922 	int strict = 0;
923 	int attempts = 3;
924 	int err;
925 
926 	strict |= flags & RT6_LOOKUP_F_IFACE;
927 	if (net->ipv6.devconf_all->forwarding == 0)
928 		strict |= RT6_LOOKUP_F_REACHABLE;
929 
930 redo_fib6_lookup_lock:
931 	read_lock_bh(&table->tb6_lock);
932 
933 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
934 	saved_fn = fn;
935 
936 redo_rt6_select:
937 	rt = rt6_select(fn, oif, strict);
938 	if (rt->rt6i_nsiblings)
939 		rt = rt6_multipath_select(rt, fl6, oif, strict);
940 	if (rt == net->ipv6.ip6_null_entry) {
941 		fn = fib6_backtrack(fn, &fl6->saddr);
942 		if (fn)
943 			goto redo_rt6_select;
944 		else if (strict & RT6_LOOKUP_F_REACHABLE) {
945 			/* also consider unreachable route */
946 			strict &= ~RT6_LOOKUP_F_REACHABLE;
947 			fn = saved_fn;
948 			goto redo_rt6_select;
949 		} else {
950 			dst_hold(&rt->dst);
951 			read_unlock_bh(&table->tb6_lock);
952 			goto out2;
953 		}
954 	}
955 
956 	dst_hold(&rt->dst);
957 	read_unlock_bh(&table->tb6_lock);
958 
959 	if (rt->rt6i_flags & RTF_CACHE)
960 		goto out2;
961 
962 	if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY)))
963 		nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
964 	else if (!(rt->dst.flags & DST_HOST))
965 		nrt = rt6_alloc_clone(rt, &fl6->daddr);
966 	else
967 		goto out2;
968 
969 	ip6_rt_put(rt);
970 	rt = nrt ? : net->ipv6.ip6_null_entry;
971 
972 	dst_hold(&rt->dst);
973 	if (nrt) {
974 		err = ip6_ins_rt(nrt);
975 		if (!err)
976 			goto out2;
977 	}
978 
979 	if (--attempts <= 0)
980 		goto out2;
981 
982 	/*
983 	 * Race condition! In the gap, when table->tb6_lock was
984 	 * released someone could insert this route.  Relookup.
985 	 */
986 	ip6_rt_put(rt);
987 	goto redo_fib6_lookup_lock;
988 
989 out2:
990 	rt->dst.lastuse = jiffies;
991 	rt->dst.__use++;
992 
993 	return rt;
994 }
995 
996 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
997 					    struct flowi6 *fl6, int flags)
998 {
999 	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
1000 }
1001 
1002 static struct dst_entry *ip6_route_input_lookup(struct net *net,
1003 						struct net_device *dev,
1004 						struct flowi6 *fl6, int flags)
1005 {
1006 	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1007 		flags |= RT6_LOOKUP_F_IFACE;
1008 
1009 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1010 }
1011 
1012 void ip6_route_input(struct sk_buff *skb)
1013 {
1014 	const struct ipv6hdr *iph = ipv6_hdr(skb);
1015 	struct net *net = dev_net(skb->dev);
1016 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1017 	struct flowi6 fl6 = {
1018 		.flowi6_iif = skb->dev->ifindex,
1019 		.daddr = iph->daddr,
1020 		.saddr = iph->saddr,
1021 		.flowlabel = ip6_flowinfo(iph),
1022 		.flowi6_mark = skb->mark,
1023 		.flowi6_proto = iph->nexthdr,
1024 	};
1025 
1026 	skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1027 }
1028 
1029 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1030 					     struct flowi6 *fl6, int flags)
1031 {
1032 	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1033 }
1034 
1035 struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
1036 				    struct flowi6 *fl6)
1037 {
1038 	int flags = 0;
1039 
1040 	fl6->flowi6_iif = LOOPBACK_IFINDEX;
1041 
1042 	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
1043 		flags |= RT6_LOOKUP_F_IFACE;
1044 
1045 	if (!ipv6_addr_any(&fl6->saddr))
1046 		flags |= RT6_LOOKUP_F_HAS_SADDR;
1047 	else if (sk)
1048 		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1049 
1050 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1051 }
1052 EXPORT_SYMBOL(ip6_route_output);
1053 
1054 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1055 {
1056 	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1057 	struct dst_entry *new = NULL;
1058 
1059 	rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1060 	if (rt) {
1061 		new = &rt->dst;
1062 
1063 		memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
1064 		rt6_init_peer(rt, net->ipv6.peers);
1065 
1066 		new->__use = 1;
1067 		new->input = dst_discard;
1068 		new->output = dst_discard_sk;
1069 
1070 		if (dst_metrics_read_only(&ort->dst))
1071 			new->_metrics = ort->dst._metrics;
1072 		else
1073 			dst_copy_metrics(new, &ort->dst);
1074 		rt->rt6i_idev = ort->rt6i_idev;
1075 		if (rt->rt6i_idev)
1076 			in6_dev_hold(rt->rt6i_idev);
1077 
1078 		rt->rt6i_gateway = ort->rt6i_gateway;
1079 		rt->rt6i_flags = ort->rt6i_flags;
1080 		rt->rt6i_metric = 0;
1081 
1082 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1083 #ifdef CONFIG_IPV6_SUBTREES
1084 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1085 #endif
1086 
1087 		dst_free(new);
1088 	}
1089 
1090 	dst_release(dst_orig);
1091 	return new ? new : ERR_PTR(-ENOMEM);
1092 }
1093 
1094 /*
1095  *	Destination cache support functions
1096  */
1097 
1098 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1099 {
1100 	struct rt6_info *rt;
1101 
1102 	rt = (struct rt6_info *) dst;
1103 
1104 	/* All IPV6 dsts are created with ->obsolete set to the value
1105 	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1106 	 * into this function always.
1107 	 */
1108 	if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1109 		return NULL;
1110 
1111 	if (rt6_check_expired(rt))
1112 		return NULL;
1113 
1114 	return dst;
1115 }
1116 
1117 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1118 {
1119 	struct rt6_info *rt = (struct rt6_info *) dst;
1120 
1121 	if (rt) {
1122 		if (rt->rt6i_flags & RTF_CACHE) {
1123 			if (rt6_check_expired(rt)) {
1124 				ip6_del_rt(rt);
1125 				dst = NULL;
1126 			}
1127 		} else {
1128 			dst_release(dst);
1129 			dst = NULL;
1130 		}
1131 	}
1132 	return dst;
1133 }
1134 
1135 static void ip6_link_failure(struct sk_buff *skb)
1136 {
1137 	struct rt6_info *rt;
1138 
1139 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1140 
1141 	rt = (struct rt6_info *) skb_dst(skb);
1142 	if (rt) {
1143 		if (rt->rt6i_flags & RTF_CACHE) {
1144 			dst_hold(&rt->dst);
1145 			if (ip6_del_rt(rt))
1146 				dst_free(&rt->dst);
1147 		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1148 			rt->rt6i_node->fn_sernum = -1;
1149 		}
1150 	}
1151 }
1152 
1153 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1154 			       struct sk_buff *skb, u32 mtu)
1155 {
1156 	struct rt6_info *rt6 = (struct rt6_info *)dst;
1157 
1158 	dst_confirm(dst);
1159 	if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1160 		struct net *net = dev_net(dst->dev);
1161 
1162 		rt6->rt6i_flags |= RTF_MODIFIED;
1163 		if (mtu < IPV6_MIN_MTU)
1164 			mtu = IPV6_MIN_MTU;
1165 
1166 		dst_metric_set(dst, RTAX_MTU, mtu);
1167 		rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1168 	}
1169 }
1170 
1171 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1172 		     int oif, u32 mark)
1173 {
1174 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1175 	struct dst_entry *dst;
1176 	struct flowi6 fl6;
1177 
1178 	memset(&fl6, 0, sizeof(fl6));
1179 	fl6.flowi6_oif = oif;
1180 	fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
1181 	fl6.daddr = iph->daddr;
1182 	fl6.saddr = iph->saddr;
1183 	fl6.flowlabel = ip6_flowinfo(iph);
1184 
1185 	dst = ip6_route_output(net, NULL, &fl6);
1186 	if (!dst->error)
1187 		ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1188 	dst_release(dst);
1189 }
1190 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1191 
1192 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1193 {
1194 	ip6_update_pmtu(skb, sock_net(sk), mtu,
1195 			sk->sk_bound_dev_if, sk->sk_mark);
1196 }
1197 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1198 
1199 /* Handle redirects */
1200 struct ip6rd_flowi {
1201 	struct flowi6 fl6;
1202 	struct in6_addr gateway;
1203 };
1204 
1205 static struct rt6_info *__ip6_route_redirect(struct net *net,
1206 					     struct fib6_table *table,
1207 					     struct flowi6 *fl6,
1208 					     int flags)
1209 {
1210 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1211 	struct rt6_info *rt;
1212 	struct fib6_node *fn;
1213 
1214 	/* Get the "current" route for this destination and
1215 	 * check if the redirect has come from approriate router.
1216 	 *
1217 	 * RFC 4861 specifies that redirects should only be
1218 	 * accepted if they come from the nexthop to the target.
1219 	 * Due to the way the routes are chosen, this notion
1220 	 * is a bit fuzzy and one might need to check all possible
1221 	 * routes.
1222 	 */
1223 
1224 	read_lock_bh(&table->tb6_lock);
1225 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1226 restart:
1227 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1228 		if (rt6_check_expired(rt))
1229 			continue;
1230 		if (rt->dst.error)
1231 			break;
1232 		if (!(rt->rt6i_flags & RTF_GATEWAY))
1233 			continue;
1234 		if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1235 			continue;
1236 		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1237 			continue;
1238 		break;
1239 	}
1240 
1241 	if (!rt)
1242 		rt = net->ipv6.ip6_null_entry;
1243 	else if (rt->dst.error) {
1244 		rt = net->ipv6.ip6_null_entry;
1245 		goto out;
1246 	}
1247 
1248 	if (rt == net->ipv6.ip6_null_entry) {
1249 		fn = fib6_backtrack(fn, &fl6->saddr);
1250 		if (fn)
1251 			goto restart;
1252 	}
1253 
1254 out:
1255 	dst_hold(&rt->dst);
1256 
1257 	read_unlock_bh(&table->tb6_lock);
1258 
1259 	return rt;
1260 };
1261 
1262 static struct dst_entry *ip6_route_redirect(struct net *net,
1263 					const struct flowi6 *fl6,
1264 					const struct in6_addr *gateway)
1265 {
1266 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1267 	struct ip6rd_flowi rdfl;
1268 
1269 	rdfl.fl6 = *fl6;
1270 	rdfl.gateway = *gateway;
1271 
1272 	return fib6_rule_lookup(net, &rdfl.fl6,
1273 				flags, __ip6_route_redirect);
1274 }
1275 
1276 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1277 {
1278 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1279 	struct dst_entry *dst;
1280 	struct flowi6 fl6;
1281 
1282 	memset(&fl6, 0, sizeof(fl6));
1283 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
1284 	fl6.flowi6_oif = oif;
1285 	fl6.flowi6_mark = mark;
1286 	fl6.daddr = iph->daddr;
1287 	fl6.saddr = iph->saddr;
1288 	fl6.flowlabel = ip6_flowinfo(iph);
1289 
1290 	dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1291 	rt6_do_redirect(dst, NULL, skb);
1292 	dst_release(dst);
1293 }
1294 EXPORT_SYMBOL_GPL(ip6_redirect);
1295 
1296 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1297 			    u32 mark)
1298 {
1299 	const struct ipv6hdr *iph = ipv6_hdr(skb);
1300 	const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1301 	struct dst_entry *dst;
1302 	struct flowi6 fl6;
1303 
1304 	memset(&fl6, 0, sizeof(fl6));
1305 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
1306 	fl6.flowi6_oif = oif;
1307 	fl6.flowi6_mark = mark;
1308 	fl6.daddr = msg->dest;
1309 	fl6.saddr = iph->daddr;
1310 
1311 	dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1312 	rt6_do_redirect(dst, NULL, skb);
1313 	dst_release(dst);
1314 }
1315 
1316 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1317 {
1318 	ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1319 }
1320 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1321 
1322 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1323 {
1324 	struct net_device *dev = dst->dev;
1325 	unsigned int mtu = dst_mtu(dst);
1326 	struct net *net = dev_net(dev);
1327 
1328 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1329 
1330 	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1331 		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1332 
1333 	/*
1334 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1335 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1336 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
1337 	 * rely only on pmtu discovery"
1338 	 */
1339 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1340 		mtu = IPV6_MAXPLEN;
1341 	return mtu;
1342 }
1343 
1344 static unsigned int ip6_mtu(const struct dst_entry *dst)
1345 {
1346 	struct inet6_dev *idev;
1347 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1348 
1349 	if (mtu)
1350 		goto out;
1351 
1352 	mtu = IPV6_MIN_MTU;
1353 
1354 	rcu_read_lock();
1355 	idev = __in6_dev_get(dst->dev);
1356 	if (idev)
1357 		mtu = idev->cnf.mtu6;
1358 	rcu_read_unlock();
1359 
1360 out:
1361 	return min_t(unsigned int, mtu, IP6_MAX_MTU);
1362 }
1363 
1364 static struct dst_entry *icmp6_dst_gc_list;
1365 static DEFINE_SPINLOCK(icmp6_dst_lock);
1366 
1367 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1368 				  struct flowi6 *fl6)
1369 {
1370 	struct dst_entry *dst;
1371 	struct rt6_info *rt;
1372 	struct inet6_dev *idev = in6_dev_get(dev);
1373 	struct net *net = dev_net(dev);
1374 
1375 	if (unlikely(!idev))
1376 		return ERR_PTR(-ENODEV);
1377 
1378 	rt = ip6_dst_alloc(net, dev, 0, NULL);
1379 	if (unlikely(!rt)) {
1380 		in6_dev_put(idev);
1381 		dst = ERR_PTR(-ENOMEM);
1382 		goto out;
1383 	}
1384 
1385 	rt->dst.flags |= DST_HOST;
1386 	rt->dst.output  = ip6_output;
1387 	atomic_set(&rt->dst.__refcnt, 1);
1388 	rt->rt6i_gateway  = fl6->daddr;
1389 	rt->rt6i_dst.addr = fl6->daddr;
1390 	rt->rt6i_dst.plen = 128;
1391 	rt->rt6i_idev     = idev;
1392 	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1393 
1394 	spin_lock_bh(&icmp6_dst_lock);
1395 	rt->dst.next = icmp6_dst_gc_list;
1396 	icmp6_dst_gc_list = &rt->dst;
1397 	spin_unlock_bh(&icmp6_dst_lock);
1398 
1399 	fib6_force_start_gc(net);
1400 
1401 	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1402 
1403 out:
1404 	return dst;
1405 }
1406 
1407 int icmp6_dst_gc(void)
1408 {
1409 	struct dst_entry *dst, **pprev;
1410 	int more = 0;
1411 
1412 	spin_lock_bh(&icmp6_dst_lock);
1413 	pprev = &icmp6_dst_gc_list;
1414 
1415 	while ((dst = *pprev) != NULL) {
1416 		if (!atomic_read(&dst->__refcnt)) {
1417 			*pprev = dst->next;
1418 			dst_free(dst);
1419 		} else {
1420 			pprev = &dst->next;
1421 			++more;
1422 		}
1423 	}
1424 
1425 	spin_unlock_bh(&icmp6_dst_lock);
1426 
1427 	return more;
1428 }
1429 
1430 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1431 			    void *arg)
1432 {
1433 	struct dst_entry *dst, **pprev;
1434 
1435 	spin_lock_bh(&icmp6_dst_lock);
1436 	pprev = &icmp6_dst_gc_list;
1437 	while ((dst = *pprev) != NULL) {
1438 		struct rt6_info *rt = (struct rt6_info *) dst;
1439 		if (func(rt, arg)) {
1440 			*pprev = dst->next;
1441 			dst_free(dst);
1442 		} else {
1443 			pprev = &dst->next;
1444 		}
1445 	}
1446 	spin_unlock_bh(&icmp6_dst_lock);
1447 }
1448 
1449 static int ip6_dst_gc(struct dst_ops *ops)
1450 {
1451 	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1452 	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1453 	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1454 	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1455 	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1456 	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1457 	int entries;
1458 
1459 	entries = dst_entries_get_fast(ops);
1460 	if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1461 	    entries <= rt_max_size)
1462 		goto out;
1463 
1464 	net->ipv6.ip6_rt_gc_expire++;
1465 	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
1466 	entries = dst_entries_get_slow(ops);
1467 	if (entries < ops->gc_thresh)
1468 		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1469 out:
1470 	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1471 	return entries > rt_max_size;
1472 }
1473 
1474 static int ip6_convert_metrics(struct mx6_config *mxc,
1475 			       const struct fib6_config *cfg)
1476 {
1477 	struct nlattr *nla;
1478 	int remaining;
1479 	u32 *mp;
1480 
1481 	if (cfg->fc_mx == NULL)
1482 		return 0;
1483 
1484 	mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1485 	if (unlikely(!mp))
1486 		return -ENOMEM;
1487 
1488 	nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1489 		int type = nla_type(nla);
1490 
1491 		if (type) {
1492 			u32 val;
1493 
1494 			if (unlikely(type > RTAX_MAX))
1495 				goto err;
1496 			if (type == RTAX_CC_ALGO) {
1497 				char tmp[TCP_CA_NAME_MAX];
1498 
1499 				nla_strlcpy(tmp, nla, sizeof(tmp));
1500 				val = tcp_ca_get_key_by_name(tmp);
1501 				if (val == TCP_CA_UNSPEC)
1502 					goto err;
1503 			} else {
1504 				val = nla_get_u32(nla);
1505 			}
1506 
1507 			mp[type - 1] = val;
1508 			__set_bit(type - 1, mxc->mx_valid);
1509 		}
1510 	}
1511 
1512 	mxc->mx = mp;
1513 
1514 	return 0;
1515  err:
1516 	kfree(mp);
1517 	return -EINVAL;
1518 }
1519 
1520 int ip6_route_add(struct fib6_config *cfg)
1521 {
1522 	int err;
1523 	struct net *net = cfg->fc_nlinfo.nl_net;
1524 	struct rt6_info *rt = NULL;
1525 	struct net_device *dev = NULL;
1526 	struct inet6_dev *idev = NULL;
1527 	struct fib6_table *table;
1528 	struct mx6_config mxc = { .mx = NULL, };
1529 	int addr_type;
1530 
1531 	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1532 		return -EINVAL;
1533 #ifndef CONFIG_IPV6_SUBTREES
1534 	if (cfg->fc_src_len)
1535 		return -EINVAL;
1536 #endif
1537 	if (cfg->fc_ifindex) {
1538 		err = -ENODEV;
1539 		dev = dev_get_by_index(net, cfg->fc_ifindex);
1540 		if (!dev)
1541 			goto out;
1542 		idev = in6_dev_get(dev);
1543 		if (!idev)
1544 			goto out;
1545 	}
1546 
1547 	if (cfg->fc_metric == 0)
1548 		cfg->fc_metric = IP6_RT_PRIO_USER;
1549 
1550 	err = -ENOBUFS;
1551 	if (cfg->fc_nlinfo.nlh &&
1552 	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1553 		table = fib6_get_table(net, cfg->fc_table);
1554 		if (!table) {
1555 			pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1556 			table = fib6_new_table(net, cfg->fc_table);
1557 		}
1558 	} else {
1559 		table = fib6_new_table(net, cfg->fc_table);
1560 	}
1561 
1562 	if (!table)
1563 		goto out;
1564 
1565 	rt = ip6_dst_alloc(net, NULL, (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT, table);
1566 
1567 	if (!rt) {
1568 		err = -ENOMEM;
1569 		goto out;
1570 	}
1571 
1572 	if (cfg->fc_flags & RTF_EXPIRES)
1573 		rt6_set_expires(rt, jiffies +
1574 				clock_t_to_jiffies(cfg->fc_expires));
1575 	else
1576 		rt6_clean_expires(rt);
1577 
1578 	if (cfg->fc_protocol == RTPROT_UNSPEC)
1579 		cfg->fc_protocol = RTPROT_BOOT;
1580 	rt->rt6i_protocol = cfg->fc_protocol;
1581 
1582 	addr_type = ipv6_addr_type(&cfg->fc_dst);
1583 
1584 	if (addr_type & IPV6_ADDR_MULTICAST)
1585 		rt->dst.input = ip6_mc_input;
1586 	else if (cfg->fc_flags & RTF_LOCAL)
1587 		rt->dst.input = ip6_input;
1588 	else
1589 		rt->dst.input = ip6_forward;
1590 
1591 	rt->dst.output = ip6_output;
1592 
1593 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1594 	rt->rt6i_dst.plen = cfg->fc_dst_len;
1595 	if (rt->rt6i_dst.plen == 128) {
1596 		rt->dst.flags |= DST_HOST;
1597 		dst_metrics_set_force_overwrite(&rt->dst);
1598 	}
1599 
1600 #ifdef CONFIG_IPV6_SUBTREES
1601 	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1602 	rt->rt6i_src.plen = cfg->fc_src_len;
1603 #endif
1604 
1605 	rt->rt6i_metric = cfg->fc_metric;
1606 
1607 	/* We cannot add true routes via loopback here,
1608 	   they would result in kernel looping; promote them to reject routes
1609 	 */
1610 	if ((cfg->fc_flags & RTF_REJECT) ||
1611 	    (dev && (dev->flags & IFF_LOOPBACK) &&
1612 	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
1613 	     !(cfg->fc_flags & RTF_LOCAL))) {
1614 		/* hold loopback dev/idev if we haven't done so. */
1615 		if (dev != net->loopback_dev) {
1616 			if (dev) {
1617 				dev_put(dev);
1618 				in6_dev_put(idev);
1619 			}
1620 			dev = net->loopback_dev;
1621 			dev_hold(dev);
1622 			idev = in6_dev_get(dev);
1623 			if (!idev) {
1624 				err = -ENODEV;
1625 				goto out;
1626 			}
1627 		}
1628 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1629 		switch (cfg->fc_type) {
1630 		case RTN_BLACKHOLE:
1631 			rt->dst.error = -EINVAL;
1632 			rt->dst.output = dst_discard_sk;
1633 			rt->dst.input = dst_discard;
1634 			break;
1635 		case RTN_PROHIBIT:
1636 			rt->dst.error = -EACCES;
1637 			rt->dst.output = ip6_pkt_prohibit_out;
1638 			rt->dst.input = ip6_pkt_prohibit;
1639 			break;
1640 		case RTN_THROW:
1641 		default:
1642 			rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1643 					: -ENETUNREACH;
1644 			rt->dst.output = ip6_pkt_discard_out;
1645 			rt->dst.input = ip6_pkt_discard;
1646 			break;
1647 		}
1648 		goto install_route;
1649 	}
1650 
1651 	if (cfg->fc_flags & RTF_GATEWAY) {
1652 		const struct in6_addr *gw_addr;
1653 		int gwa_type;
1654 
1655 		gw_addr = &cfg->fc_gateway;
1656 		rt->rt6i_gateway = *gw_addr;
1657 		gwa_type = ipv6_addr_type(gw_addr);
1658 
1659 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1660 			struct rt6_info *grt;
1661 
1662 			/* IPv6 strictly inhibits using not link-local
1663 			   addresses as nexthop address.
1664 			   Otherwise, router will not able to send redirects.
1665 			   It is very good, but in some (rare!) circumstances
1666 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
1667 			   some exceptions. --ANK
1668 			 */
1669 			err = -EINVAL;
1670 			if (!(gwa_type & IPV6_ADDR_UNICAST))
1671 				goto out;
1672 
1673 			grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1674 
1675 			err = -EHOSTUNREACH;
1676 			if (!grt)
1677 				goto out;
1678 			if (dev) {
1679 				if (dev != grt->dst.dev) {
1680 					ip6_rt_put(grt);
1681 					goto out;
1682 				}
1683 			} else {
1684 				dev = grt->dst.dev;
1685 				idev = grt->rt6i_idev;
1686 				dev_hold(dev);
1687 				in6_dev_hold(grt->rt6i_idev);
1688 			}
1689 			if (!(grt->rt6i_flags & RTF_GATEWAY))
1690 				err = 0;
1691 			ip6_rt_put(grt);
1692 
1693 			if (err)
1694 				goto out;
1695 		}
1696 		err = -EINVAL;
1697 		if (!dev || (dev->flags & IFF_LOOPBACK))
1698 			goto out;
1699 	}
1700 
1701 	err = -ENODEV;
1702 	if (!dev)
1703 		goto out;
1704 
1705 	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1706 		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1707 			err = -EINVAL;
1708 			goto out;
1709 		}
1710 		rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1711 		rt->rt6i_prefsrc.plen = 128;
1712 	} else
1713 		rt->rt6i_prefsrc.plen = 0;
1714 
1715 	rt->rt6i_flags = cfg->fc_flags;
1716 
1717 install_route:
1718 	rt->dst.dev = dev;
1719 	rt->rt6i_idev = idev;
1720 	rt->rt6i_table = table;
1721 
1722 	cfg->fc_nlinfo.nl_net = dev_net(dev);
1723 
1724 	err = ip6_convert_metrics(&mxc, cfg);
1725 	if (err)
1726 		goto out;
1727 
1728 	err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
1729 
1730 	kfree(mxc.mx);
1731 	return err;
1732 out:
1733 	if (dev)
1734 		dev_put(dev);
1735 	if (idev)
1736 		in6_dev_put(idev);
1737 	if (rt)
1738 		dst_free(&rt->dst);
1739 	return err;
1740 }
1741 
1742 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1743 {
1744 	int err;
1745 	struct fib6_table *table;
1746 	struct net *net = dev_net(rt->dst.dev);
1747 
1748 	if (rt == net->ipv6.ip6_null_entry) {
1749 		err = -ENOENT;
1750 		goto out;
1751 	}
1752 
1753 	table = rt->rt6i_table;
1754 	write_lock_bh(&table->tb6_lock);
1755 	err = fib6_del(rt, info);
1756 	write_unlock_bh(&table->tb6_lock);
1757 
1758 out:
1759 	ip6_rt_put(rt);
1760 	return err;
1761 }
1762 
1763 int ip6_del_rt(struct rt6_info *rt)
1764 {
1765 	struct nl_info info = {
1766 		.nl_net = dev_net(rt->dst.dev),
1767 	};
1768 	return __ip6_del_rt(rt, &info);
1769 }
1770 
1771 static int ip6_route_del(struct fib6_config *cfg)
1772 {
1773 	struct fib6_table *table;
1774 	struct fib6_node *fn;
1775 	struct rt6_info *rt;
1776 	int err = -ESRCH;
1777 
1778 	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1779 	if (!table)
1780 		return err;
1781 
1782 	read_lock_bh(&table->tb6_lock);
1783 
1784 	fn = fib6_locate(&table->tb6_root,
1785 			 &cfg->fc_dst, cfg->fc_dst_len,
1786 			 &cfg->fc_src, cfg->fc_src_len);
1787 
1788 	if (fn) {
1789 		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1790 			if (cfg->fc_ifindex &&
1791 			    (!rt->dst.dev ||
1792 			     rt->dst.dev->ifindex != cfg->fc_ifindex))
1793 				continue;
1794 			if (cfg->fc_flags & RTF_GATEWAY &&
1795 			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1796 				continue;
1797 			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1798 				continue;
1799 			dst_hold(&rt->dst);
1800 			read_unlock_bh(&table->tb6_lock);
1801 
1802 			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1803 		}
1804 	}
1805 	read_unlock_bh(&table->tb6_lock);
1806 
1807 	return err;
1808 }
1809 
1810 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1811 {
1812 	struct net *net = dev_net(skb->dev);
1813 	struct netevent_redirect netevent;
1814 	struct rt6_info *rt, *nrt = NULL;
1815 	struct ndisc_options ndopts;
1816 	struct inet6_dev *in6_dev;
1817 	struct neighbour *neigh;
1818 	struct rd_msg *msg;
1819 	int optlen, on_link;
1820 	u8 *lladdr;
1821 
1822 	optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
1823 	optlen -= sizeof(*msg);
1824 
1825 	if (optlen < 0) {
1826 		net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1827 		return;
1828 	}
1829 
1830 	msg = (struct rd_msg *)icmp6_hdr(skb);
1831 
1832 	if (ipv6_addr_is_multicast(&msg->dest)) {
1833 		net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1834 		return;
1835 	}
1836 
1837 	on_link = 0;
1838 	if (ipv6_addr_equal(&msg->dest, &msg->target)) {
1839 		on_link = 1;
1840 	} else if (ipv6_addr_type(&msg->target) !=
1841 		   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1842 		net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1843 		return;
1844 	}
1845 
1846 	in6_dev = __in6_dev_get(skb->dev);
1847 	if (!in6_dev)
1848 		return;
1849 	if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1850 		return;
1851 
1852 	/* RFC2461 8.1:
1853 	 *	The IP source address of the Redirect MUST be the same as the current
1854 	 *	first-hop router for the specified ICMP Destination Address.
1855 	 */
1856 
1857 	if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
1858 		net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1859 		return;
1860 	}
1861 
1862 	lladdr = NULL;
1863 	if (ndopts.nd_opts_tgt_lladdr) {
1864 		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1865 					     skb->dev);
1866 		if (!lladdr) {
1867 			net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1868 			return;
1869 		}
1870 	}
1871 
1872 	rt = (struct rt6_info *) dst;
1873 	if (rt == net->ipv6.ip6_null_entry) {
1874 		net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1875 		return;
1876 	}
1877 
1878 	/* Redirect received -> path was valid.
1879 	 * Look, redirects are sent only in response to data packets,
1880 	 * so that this nexthop apparently is reachable. --ANK
1881 	 */
1882 	dst_confirm(&rt->dst);
1883 
1884 	neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
1885 	if (!neigh)
1886 		return;
1887 
1888 	/*
1889 	 *	We have finally decided to accept it.
1890 	 */
1891 
1892 	neigh_update(neigh, lladdr, NUD_STALE,
1893 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
1894 		     NEIGH_UPDATE_F_OVERRIDE|
1895 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1896 				     NEIGH_UPDATE_F_ISROUTER))
1897 		     );
1898 
1899 	nrt = ip6_rt_copy(rt, &msg->dest);
1900 	if (!nrt)
1901 		goto out;
1902 
1903 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1904 	if (on_link)
1905 		nrt->rt6i_flags &= ~RTF_GATEWAY;
1906 
1907 	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1908 
1909 	if (ip6_ins_rt(nrt))
1910 		goto out;
1911 
1912 	netevent.old = &rt->dst;
1913 	netevent.new = &nrt->dst;
1914 	netevent.daddr = &msg->dest;
1915 	netevent.neigh = neigh;
1916 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1917 
1918 	if (rt->rt6i_flags & RTF_CACHE) {
1919 		rt = (struct rt6_info *) dst_clone(&rt->dst);
1920 		ip6_del_rt(rt);
1921 	}
1922 
1923 out:
1924 	neigh_release(neigh);
1925 }
1926 
1927 /*
1928  *	Misc support functions
1929  */
1930 
1931 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1932 				    const struct in6_addr *dest)
1933 {
1934 	struct net *net = dev_net(ort->dst.dev);
1935 	struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1936 					    ort->rt6i_table);
1937 
1938 	if (rt) {
1939 		rt->dst.input = ort->dst.input;
1940 		rt->dst.output = ort->dst.output;
1941 		rt->dst.flags |= DST_HOST;
1942 
1943 		rt->rt6i_dst.addr = *dest;
1944 		rt->rt6i_dst.plen = 128;
1945 		dst_copy_metrics(&rt->dst, &ort->dst);
1946 		rt->dst.error = ort->dst.error;
1947 		rt->rt6i_idev = ort->rt6i_idev;
1948 		if (rt->rt6i_idev)
1949 			in6_dev_hold(rt->rt6i_idev);
1950 		rt->dst.lastuse = jiffies;
1951 
1952 		if (ort->rt6i_flags & RTF_GATEWAY)
1953 			rt->rt6i_gateway = ort->rt6i_gateway;
1954 		else
1955 			rt->rt6i_gateway = *dest;
1956 		rt->rt6i_flags = ort->rt6i_flags;
1957 		rt6_set_from(rt, ort);
1958 		rt->rt6i_metric = 0;
1959 
1960 #ifdef CONFIG_IPV6_SUBTREES
1961 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1962 #endif
1963 		memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1964 		rt->rt6i_table = ort->rt6i_table;
1965 	}
1966 	return rt;
1967 }
1968 
1969 #ifdef CONFIG_IPV6_ROUTE_INFO
1970 static struct rt6_info *rt6_get_route_info(struct net *net,
1971 					   const struct in6_addr *prefix, int prefixlen,
1972 					   const struct in6_addr *gwaddr, int ifindex)
1973 {
1974 	struct fib6_node *fn;
1975 	struct rt6_info *rt = NULL;
1976 	struct fib6_table *table;
1977 
1978 	table = fib6_get_table(net, RT6_TABLE_INFO);
1979 	if (!table)
1980 		return NULL;
1981 
1982 	read_lock_bh(&table->tb6_lock);
1983 	fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
1984 	if (!fn)
1985 		goto out;
1986 
1987 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1988 		if (rt->dst.dev->ifindex != ifindex)
1989 			continue;
1990 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1991 			continue;
1992 		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1993 			continue;
1994 		dst_hold(&rt->dst);
1995 		break;
1996 	}
1997 out:
1998 	read_unlock_bh(&table->tb6_lock);
1999 	return rt;
2000 }
2001 
2002 static struct rt6_info *rt6_add_route_info(struct net *net,
2003 					   const struct in6_addr *prefix, int prefixlen,
2004 					   const struct in6_addr *gwaddr, int ifindex,
2005 					   unsigned int pref)
2006 {
2007 	struct fib6_config cfg = {
2008 		.fc_table	= RT6_TABLE_INFO,
2009 		.fc_metric	= IP6_RT_PRIO_USER,
2010 		.fc_ifindex	= ifindex,
2011 		.fc_dst_len	= prefixlen,
2012 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2013 				  RTF_UP | RTF_PREF(pref),
2014 		.fc_nlinfo.portid = 0,
2015 		.fc_nlinfo.nlh = NULL,
2016 		.fc_nlinfo.nl_net = net,
2017 	};
2018 
2019 	cfg.fc_dst = *prefix;
2020 	cfg.fc_gateway = *gwaddr;
2021 
2022 	/* We should treat it as a default route if prefix length is 0. */
2023 	if (!prefixlen)
2024 		cfg.fc_flags |= RTF_DEFAULT;
2025 
2026 	ip6_route_add(&cfg);
2027 
2028 	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
2029 }
2030 #endif
2031 
2032 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
2033 {
2034 	struct rt6_info *rt;
2035 	struct fib6_table *table;
2036 
2037 	table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
2038 	if (!table)
2039 		return NULL;
2040 
2041 	read_lock_bh(&table->tb6_lock);
2042 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2043 		if (dev == rt->dst.dev &&
2044 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
2045 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
2046 			break;
2047 	}
2048 	if (rt)
2049 		dst_hold(&rt->dst);
2050 	read_unlock_bh(&table->tb6_lock);
2051 	return rt;
2052 }
2053 
2054 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
2055 				     struct net_device *dev,
2056 				     unsigned int pref)
2057 {
2058 	struct fib6_config cfg = {
2059 		.fc_table	= RT6_TABLE_DFLT,
2060 		.fc_metric	= IP6_RT_PRIO_USER,
2061 		.fc_ifindex	= dev->ifindex,
2062 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2063 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
2064 		.fc_nlinfo.portid = 0,
2065 		.fc_nlinfo.nlh = NULL,
2066 		.fc_nlinfo.nl_net = dev_net(dev),
2067 	};
2068 
2069 	cfg.fc_gateway = *gwaddr;
2070 
2071 	ip6_route_add(&cfg);
2072 
2073 	return rt6_get_dflt_router(gwaddr, dev);
2074 }
2075 
2076 void rt6_purge_dflt_routers(struct net *net)
2077 {
2078 	struct rt6_info *rt;
2079 	struct fib6_table *table;
2080 
2081 	/* NOTE: Keep consistent with rt6_get_dflt_router */
2082 	table = fib6_get_table(net, RT6_TABLE_DFLT);
2083 	if (!table)
2084 		return;
2085 
2086 restart:
2087 	read_lock_bh(&table->tb6_lock);
2088 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2089 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2090 		    (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
2091 			dst_hold(&rt->dst);
2092 			read_unlock_bh(&table->tb6_lock);
2093 			ip6_del_rt(rt);
2094 			goto restart;
2095 		}
2096 	}
2097 	read_unlock_bh(&table->tb6_lock);
2098 }
2099 
2100 static void rtmsg_to_fib6_config(struct net *net,
2101 				 struct in6_rtmsg *rtmsg,
2102 				 struct fib6_config *cfg)
2103 {
2104 	memset(cfg, 0, sizeof(*cfg));
2105 
2106 	cfg->fc_table = RT6_TABLE_MAIN;
2107 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2108 	cfg->fc_metric = rtmsg->rtmsg_metric;
2109 	cfg->fc_expires = rtmsg->rtmsg_info;
2110 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2111 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
2112 	cfg->fc_flags = rtmsg->rtmsg_flags;
2113 
2114 	cfg->fc_nlinfo.nl_net = net;
2115 
2116 	cfg->fc_dst = rtmsg->rtmsg_dst;
2117 	cfg->fc_src = rtmsg->rtmsg_src;
2118 	cfg->fc_gateway = rtmsg->rtmsg_gateway;
2119 }
2120 
2121 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2122 {
2123 	struct fib6_config cfg;
2124 	struct in6_rtmsg rtmsg;
2125 	int err;
2126 
2127 	switch (cmd) {
2128 	case SIOCADDRT:		/* Add a route */
2129 	case SIOCDELRT:		/* Delete a route */
2130 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2131 			return -EPERM;
2132 		err = copy_from_user(&rtmsg, arg,
2133 				     sizeof(struct in6_rtmsg));
2134 		if (err)
2135 			return -EFAULT;
2136 
2137 		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2138 
2139 		rtnl_lock();
2140 		switch (cmd) {
2141 		case SIOCADDRT:
2142 			err = ip6_route_add(&cfg);
2143 			break;
2144 		case SIOCDELRT:
2145 			err = ip6_route_del(&cfg);
2146 			break;
2147 		default:
2148 			err = -EINVAL;
2149 		}
2150 		rtnl_unlock();
2151 
2152 		return err;
2153 	}
2154 
2155 	return -EINVAL;
2156 }
2157 
2158 /*
2159  *	Drop the packet on the floor
2160  */
2161 
2162 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2163 {
2164 	int type;
2165 	struct dst_entry *dst = skb_dst(skb);
2166 	switch (ipstats_mib_noroutes) {
2167 	case IPSTATS_MIB_INNOROUTES:
2168 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2169 		if (type == IPV6_ADDR_ANY) {
2170 			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2171 				      IPSTATS_MIB_INADDRERRORS);
2172 			break;
2173 		}
2174 		/* FALLTHROUGH */
2175 	case IPSTATS_MIB_OUTNOROUTES:
2176 		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2177 			      ipstats_mib_noroutes);
2178 		break;
2179 	}
2180 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2181 	kfree_skb(skb);
2182 	return 0;
2183 }
2184 
2185 static int ip6_pkt_discard(struct sk_buff *skb)
2186 {
2187 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2188 }
2189 
2190 static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb)
2191 {
2192 	skb->dev = skb_dst(skb)->dev;
2193 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2194 }
2195 
2196 static int ip6_pkt_prohibit(struct sk_buff *skb)
2197 {
2198 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2199 }
2200 
2201 static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb)
2202 {
2203 	skb->dev = skb_dst(skb)->dev;
2204 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2205 }
2206 
2207 /*
2208  *	Allocate a dst for local (unicast / anycast) address.
2209  */
2210 
2211 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2212 				    const struct in6_addr *addr,
2213 				    bool anycast)
2214 {
2215 	struct net *net = dev_net(idev->dev);
2216 	struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
2217 					    DST_NOCOUNT, NULL);
2218 	if (!rt)
2219 		return ERR_PTR(-ENOMEM);
2220 
2221 	in6_dev_hold(idev);
2222 
2223 	rt->dst.flags |= DST_HOST;
2224 	rt->dst.input = ip6_input;
2225 	rt->dst.output = ip6_output;
2226 	rt->rt6i_idev = idev;
2227 
2228 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2229 	if (anycast)
2230 		rt->rt6i_flags |= RTF_ANYCAST;
2231 	else
2232 		rt->rt6i_flags |= RTF_LOCAL;
2233 
2234 	rt->rt6i_gateway  = *addr;
2235 	rt->rt6i_dst.addr = *addr;
2236 	rt->rt6i_dst.plen = 128;
2237 	rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2238 
2239 	atomic_set(&rt->dst.__refcnt, 1);
2240 
2241 	return rt;
2242 }
2243 
2244 int ip6_route_get_saddr(struct net *net,
2245 			struct rt6_info *rt,
2246 			const struct in6_addr *daddr,
2247 			unsigned int prefs,
2248 			struct in6_addr *saddr)
2249 {
2250 	struct inet6_dev *idev = ip6_dst_idev((struct dst_entry *)rt);
2251 	int err = 0;
2252 	if (rt->rt6i_prefsrc.plen)
2253 		*saddr = rt->rt6i_prefsrc.addr;
2254 	else
2255 		err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2256 					 daddr, prefs, saddr);
2257 	return err;
2258 }
2259 
2260 /* remove deleted ip from prefsrc entries */
2261 struct arg_dev_net_ip {
2262 	struct net_device *dev;
2263 	struct net *net;
2264 	struct in6_addr *addr;
2265 };
2266 
2267 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2268 {
2269 	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2270 	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2271 	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2272 
2273 	if (((void *)rt->dst.dev == dev || !dev) &&
2274 	    rt != net->ipv6.ip6_null_entry &&
2275 	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2276 		/* remove prefsrc entry */
2277 		rt->rt6i_prefsrc.plen = 0;
2278 	}
2279 	return 0;
2280 }
2281 
2282 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2283 {
2284 	struct net *net = dev_net(ifp->idev->dev);
2285 	struct arg_dev_net_ip adni = {
2286 		.dev = ifp->idev->dev,
2287 		.net = net,
2288 		.addr = &ifp->addr,
2289 	};
2290 	fib6_clean_all(net, fib6_remove_prefsrc, &adni);
2291 }
2292 
2293 #define RTF_RA_ROUTER		(RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2294 #define RTF_CACHE_GATEWAY	(RTF_GATEWAY | RTF_CACHE)
2295 
2296 /* Remove routers and update dst entries when gateway turn into host. */
2297 static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2298 {
2299 	struct in6_addr *gateway = (struct in6_addr *)arg;
2300 
2301 	if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2302 	     ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2303 	     ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2304 		return -1;
2305 	}
2306 	return 0;
2307 }
2308 
2309 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2310 {
2311 	fib6_clean_all(net, fib6_clean_tohost, gateway);
2312 }
2313 
2314 struct arg_dev_net {
2315 	struct net_device *dev;
2316 	struct net *net;
2317 };
2318 
2319 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2320 {
2321 	const struct arg_dev_net *adn = arg;
2322 	const struct net_device *dev = adn->dev;
2323 
2324 	if ((rt->dst.dev == dev || !dev) &&
2325 	    rt != adn->net->ipv6.ip6_null_entry)
2326 		return -1;
2327 
2328 	return 0;
2329 }
2330 
2331 void rt6_ifdown(struct net *net, struct net_device *dev)
2332 {
2333 	struct arg_dev_net adn = {
2334 		.dev = dev,
2335 		.net = net,
2336 	};
2337 
2338 	fib6_clean_all(net, fib6_ifdown, &adn);
2339 	icmp6_clean_all(fib6_ifdown, &adn);
2340 }
2341 
2342 struct rt6_mtu_change_arg {
2343 	struct net_device *dev;
2344 	unsigned int mtu;
2345 };
2346 
2347 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2348 {
2349 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2350 	struct inet6_dev *idev;
2351 
2352 	/* In IPv6 pmtu discovery is not optional,
2353 	   so that RTAX_MTU lock cannot disable it.
2354 	   We still use this lock to block changes
2355 	   caused by addrconf/ndisc.
2356 	*/
2357 
2358 	idev = __in6_dev_get(arg->dev);
2359 	if (!idev)
2360 		return 0;
2361 
2362 	/* For administrative MTU increase, there is no way to discover
2363 	   IPv6 PMTU increase, so PMTU increase should be updated here.
2364 	   Since RFC 1981 doesn't include administrative MTU increase
2365 	   update PMTU increase is a MUST. (i.e. jumbo frame)
2366 	 */
2367 	/*
2368 	   If new MTU is less than route PMTU, this new MTU will be the
2369 	   lowest MTU in the path, update the route PMTU to reflect PMTU
2370 	   decreases; if new MTU is greater than route PMTU, and the
2371 	   old MTU is the lowest MTU in the path, update the route PMTU
2372 	   to reflect the increase. In this case if the other nodes' MTU
2373 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
2374 	   PMTU discouvery.
2375 	 */
2376 	if (rt->dst.dev == arg->dev &&
2377 	    !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2378 	    (dst_mtu(&rt->dst) >= arg->mtu ||
2379 	     (dst_mtu(&rt->dst) < arg->mtu &&
2380 	      dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2381 		dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2382 	}
2383 	return 0;
2384 }
2385 
2386 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2387 {
2388 	struct rt6_mtu_change_arg arg = {
2389 		.dev = dev,
2390 		.mtu = mtu,
2391 	};
2392 
2393 	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
2394 }
2395 
2396 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2397 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2398 	[RTA_OIF]               = { .type = NLA_U32 },
2399 	[RTA_IIF]		= { .type = NLA_U32 },
2400 	[RTA_PRIORITY]          = { .type = NLA_U32 },
2401 	[RTA_METRICS]           = { .type = NLA_NESTED },
2402 	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
2403 };
2404 
2405 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2406 			      struct fib6_config *cfg)
2407 {
2408 	struct rtmsg *rtm;
2409 	struct nlattr *tb[RTA_MAX+1];
2410 	int err;
2411 
2412 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2413 	if (err < 0)
2414 		goto errout;
2415 
2416 	err = -EINVAL;
2417 	rtm = nlmsg_data(nlh);
2418 	memset(cfg, 0, sizeof(*cfg));
2419 
2420 	cfg->fc_table = rtm->rtm_table;
2421 	cfg->fc_dst_len = rtm->rtm_dst_len;
2422 	cfg->fc_src_len = rtm->rtm_src_len;
2423 	cfg->fc_flags = RTF_UP;
2424 	cfg->fc_protocol = rtm->rtm_protocol;
2425 	cfg->fc_type = rtm->rtm_type;
2426 
2427 	if (rtm->rtm_type == RTN_UNREACHABLE ||
2428 	    rtm->rtm_type == RTN_BLACKHOLE ||
2429 	    rtm->rtm_type == RTN_PROHIBIT ||
2430 	    rtm->rtm_type == RTN_THROW)
2431 		cfg->fc_flags |= RTF_REJECT;
2432 
2433 	if (rtm->rtm_type == RTN_LOCAL)
2434 		cfg->fc_flags |= RTF_LOCAL;
2435 
2436 	cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2437 	cfg->fc_nlinfo.nlh = nlh;
2438 	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2439 
2440 	if (tb[RTA_GATEWAY]) {
2441 		nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2442 		cfg->fc_flags |= RTF_GATEWAY;
2443 	}
2444 
2445 	if (tb[RTA_DST]) {
2446 		int plen = (rtm->rtm_dst_len + 7) >> 3;
2447 
2448 		if (nla_len(tb[RTA_DST]) < plen)
2449 			goto errout;
2450 
2451 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2452 	}
2453 
2454 	if (tb[RTA_SRC]) {
2455 		int plen = (rtm->rtm_src_len + 7) >> 3;
2456 
2457 		if (nla_len(tb[RTA_SRC]) < plen)
2458 			goto errout;
2459 
2460 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2461 	}
2462 
2463 	if (tb[RTA_PREFSRC])
2464 		nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2465 
2466 	if (tb[RTA_OIF])
2467 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2468 
2469 	if (tb[RTA_PRIORITY])
2470 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2471 
2472 	if (tb[RTA_METRICS]) {
2473 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2474 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2475 	}
2476 
2477 	if (tb[RTA_TABLE])
2478 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2479 
2480 	if (tb[RTA_MULTIPATH]) {
2481 		cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2482 		cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2483 	}
2484 
2485 	err = 0;
2486 errout:
2487 	return err;
2488 }
2489 
2490 static int ip6_route_multipath(struct fib6_config *cfg, int add)
2491 {
2492 	struct fib6_config r_cfg;
2493 	struct rtnexthop *rtnh;
2494 	int remaining;
2495 	int attrlen;
2496 	int err = 0, last_err = 0;
2497 
2498 beginning:
2499 	rtnh = (struct rtnexthop *)cfg->fc_mp;
2500 	remaining = cfg->fc_mp_len;
2501 
2502 	/* Parse a Multipath Entry */
2503 	while (rtnh_ok(rtnh, remaining)) {
2504 		memcpy(&r_cfg, cfg, sizeof(*cfg));
2505 		if (rtnh->rtnh_ifindex)
2506 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2507 
2508 		attrlen = rtnh_attrlen(rtnh);
2509 		if (attrlen > 0) {
2510 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2511 
2512 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2513 			if (nla) {
2514 				nla_memcpy(&r_cfg.fc_gateway, nla, 16);
2515 				r_cfg.fc_flags |= RTF_GATEWAY;
2516 			}
2517 		}
2518 		err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
2519 		if (err) {
2520 			last_err = err;
2521 			/* If we are trying to remove a route, do not stop the
2522 			 * loop when ip6_route_del() fails (because next hop is
2523 			 * already gone), we should try to remove all next hops.
2524 			 */
2525 			if (add) {
2526 				/* If add fails, we should try to delete all
2527 				 * next hops that have been already added.
2528 				 */
2529 				add = 0;
2530 				goto beginning;
2531 			}
2532 		}
2533 		/* Because each route is added like a single route we remove
2534 		 * this flag after the first nexthop (if there is a collision,
2535 		 * we have already fail to add the first nexthop:
2536 		 * fib6_add_rt2node() has reject it).
2537 		 */
2538 		cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL;
2539 		rtnh = rtnh_next(rtnh, &remaining);
2540 	}
2541 
2542 	return last_err;
2543 }
2544 
2545 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
2546 {
2547 	struct fib6_config cfg;
2548 	int err;
2549 
2550 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2551 	if (err < 0)
2552 		return err;
2553 
2554 	if (cfg.fc_mp)
2555 		return ip6_route_multipath(&cfg, 0);
2556 	else
2557 		return ip6_route_del(&cfg);
2558 }
2559 
2560 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
2561 {
2562 	struct fib6_config cfg;
2563 	int err;
2564 
2565 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2566 	if (err < 0)
2567 		return err;
2568 
2569 	if (cfg.fc_mp)
2570 		return ip6_route_multipath(&cfg, 1);
2571 	else
2572 		return ip6_route_add(&cfg);
2573 }
2574 
2575 static inline size_t rt6_nlmsg_size(void)
2576 {
2577 	return NLMSG_ALIGN(sizeof(struct rtmsg))
2578 	       + nla_total_size(16) /* RTA_SRC */
2579 	       + nla_total_size(16) /* RTA_DST */
2580 	       + nla_total_size(16) /* RTA_GATEWAY */
2581 	       + nla_total_size(16) /* RTA_PREFSRC */
2582 	       + nla_total_size(4) /* RTA_TABLE */
2583 	       + nla_total_size(4) /* RTA_IIF */
2584 	       + nla_total_size(4) /* RTA_OIF */
2585 	       + nla_total_size(4) /* RTA_PRIORITY */
2586 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2587 	       + nla_total_size(sizeof(struct rta_cacheinfo))
2588 	       + nla_total_size(TCP_CA_NAME_MAX); /* RTAX_CC_ALGO */
2589 }
2590 
2591 static int rt6_fill_node(struct net *net,
2592 			 struct sk_buff *skb, struct rt6_info *rt,
2593 			 struct in6_addr *dst, struct in6_addr *src,
2594 			 int iif, int type, u32 portid, u32 seq,
2595 			 int prefix, int nowait, unsigned int flags)
2596 {
2597 	struct rtmsg *rtm;
2598 	struct nlmsghdr *nlh;
2599 	long expires;
2600 	u32 table;
2601 
2602 	if (prefix) {	/* user wants prefix routes only */
2603 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2604 			/* success since this is not a prefix route */
2605 			return 1;
2606 		}
2607 	}
2608 
2609 	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
2610 	if (!nlh)
2611 		return -EMSGSIZE;
2612 
2613 	rtm = nlmsg_data(nlh);
2614 	rtm->rtm_family = AF_INET6;
2615 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
2616 	rtm->rtm_src_len = rt->rt6i_src.plen;
2617 	rtm->rtm_tos = 0;
2618 	if (rt->rt6i_table)
2619 		table = rt->rt6i_table->tb6_id;
2620 	else
2621 		table = RT6_TABLE_UNSPEC;
2622 	rtm->rtm_table = table;
2623 	if (nla_put_u32(skb, RTA_TABLE, table))
2624 		goto nla_put_failure;
2625 	if (rt->rt6i_flags & RTF_REJECT) {
2626 		switch (rt->dst.error) {
2627 		case -EINVAL:
2628 			rtm->rtm_type = RTN_BLACKHOLE;
2629 			break;
2630 		case -EACCES:
2631 			rtm->rtm_type = RTN_PROHIBIT;
2632 			break;
2633 		case -EAGAIN:
2634 			rtm->rtm_type = RTN_THROW;
2635 			break;
2636 		default:
2637 			rtm->rtm_type = RTN_UNREACHABLE;
2638 			break;
2639 		}
2640 	}
2641 	else if (rt->rt6i_flags & RTF_LOCAL)
2642 		rtm->rtm_type = RTN_LOCAL;
2643 	else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2644 		rtm->rtm_type = RTN_LOCAL;
2645 	else
2646 		rtm->rtm_type = RTN_UNICAST;
2647 	rtm->rtm_flags = 0;
2648 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2649 	rtm->rtm_protocol = rt->rt6i_protocol;
2650 	if (rt->rt6i_flags & RTF_DYNAMIC)
2651 		rtm->rtm_protocol = RTPROT_REDIRECT;
2652 	else if (rt->rt6i_flags & RTF_ADDRCONF) {
2653 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2654 			rtm->rtm_protocol = RTPROT_RA;
2655 		else
2656 			rtm->rtm_protocol = RTPROT_KERNEL;
2657 	}
2658 
2659 	if (rt->rt6i_flags & RTF_CACHE)
2660 		rtm->rtm_flags |= RTM_F_CLONED;
2661 
2662 	if (dst) {
2663 		if (nla_put(skb, RTA_DST, 16, dst))
2664 			goto nla_put_failure;
2665 		rtm->rtm_dst_len = 128;
2666 	} else if (rtm->rtm_dst_len)
2667 		if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2668 			goto nla_put_failure;
2669 #ifdef CONFIG_IPV6_SUBTREES
2670 	if (src) {
2671 		if (nla_put(skb, RTA_SRC, 16, src))
2672 			goto nla_put_failure;
2673 		rtm->rtm_src_len = 128;
2674 	} else if (rtm->rtm_src_len &&
2675 		   nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2676 		goto nla_put_failure;
2677 #endif
2678 	if (iif) {
2679 #ifdef CONFIG_IPV6_MROUTE
2680 		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2681 			int err = ip6mr_get_route(net, skb, rtm, nowait);
2682 			if (err <= 0) {
2683 				if (!nowait) {
2684 					if (err == 0)
2685 						return 0;
2686 					goto nla_put_failure;
2687 				} else {
2688 					if (err == -EMSGSIZE)
2689 						goto nla_put_failure;
2690 				}
2691 			}
2692 		} else
2693 #endif
2694 			if (nla_put_u32(skb, RTA_IIF, iif))
2695 				goto nla_put_failure;
2696 	} else if (dst) {
2697 		struct in6_addr saddr_buf;
2698 		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2699 		    nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2700 			goto nla_put_failure;
2701 	}
2702 
2703 	if (rt->rt6i_prefsrc.plen) {
2704 		struct in6_addr saddr_buf;
2705 		saddr_buf = rt->rt6i_prefsrc.addr;
2706 		if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2707 			goto nla_put_failure;
2708 	}
2709 
2710 	if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2711 		goto nla_put_failure;
2712 
2713 	if (rt->rt6i_flags & RTF_GATEWAY) {
2714 		if (nla_put(skb, RTA_GATEWAY, 16, &rt->rt6i_gateway) < 0)
2715 			goto nla_put_failure;
2716 	}
2717 
2718 	if (rt->dst.dev &&
2719 	    nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2720 		goto nla_put_failure;
2721 	if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2722 		goto nla_put_failure;
2723 
2724 	expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2725 
2726 	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2727 		goto nla_put_failure;
2728 
2729 	nlmsg_end(skb, nlh);
2730 	return 0;
2731 
2732 nla_put_failure:
2733 	nlmsg_cancel(skb, nlh);
2734 	return -EMSGSIZE;
2735 }
2736 
2737 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2738 {
2739 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2740 	int prefix;
2741 
2742 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2743 		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2744 		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2745 	} else
2746 		prefix = 0;
2747 
2748 	return rt6_fill_node(arg->net,
2749 		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2750 		     NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
2751 		     prefix, 0, NLM_F_MULTI);
2752 }
2753 
2754 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
2755 {
2756 	struct net *net = sock_net(in_skb->sk);
2757 	struct nlattr *tb[RTA_MAX+1];
2758 	struct rt6_info *rt;
2759 	struct sk_buff *skb;
2760 	struct rtmsg *rtm;
2761 	struct flowi6 fl6;
2762 	int err, iif = 0, oif = 0;
2763 
2764 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2765 	if (err < 0)
2766 		goto errout;
2767 
2768 	err = -EINVAL;
2769 	memset(&fl6, 0, sizeof(fl6));
2770 
2771 	if (tb[RTA_SRC]) {
2772 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2773 			goto errout;
2774 
2775 		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2776 	}
2777 
2778 	if (tb[RTA_DST]) {
2779 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2780 			goto errout;
2781 
2782 		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2783 	}
2784 
2785 	if (tb[RTA_IIF])
2786 		iif = nla_get_u32(tb[RTA_IIF]);
2787 
2788 	if (tb[RTA_OIF])
2789 		oif = nla_get_u32(tb[RTA_OIF]);
2790 
2791 	if (tb[RTA_MARK])
2792 		fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
2793 
2794 	if (iif) {
2795 		struct net_device *dev;
2796 		int flags = 0;
2797 
2798 		dev = __dev_get_by_index(net, iif);
2799 		if (!dev) {
2800 			err = -ENODEV;
2801 			goto errout;
2802 		}
2803 
2804 		fl6.flowi6_iif = iif;
2805 
2806 		if (!ipv6_addr_any(&fl6.saddr))
2807 			flags |= RT6_LOOKUP_F_HAS_SADDR;
2808 
2809 		rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2810 							       flags);
2811 	} else {
2812 		fl6.flowi6_oif = oif;
2813 
2814 		rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2815 	}
2816 
2817 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2818 	if (!skb) {
2819 		ip6_rt_put(rt);
2820 		err = -ENOBUFS;
2821 		goto errout;
2822 	}
2823 
2824 	/* Reserve room for dummy headers, this skb can pass
2825 	   through good chunk of routing engine.
2826 	 */
2827 	skb_reset_mac_header(skb);
2828 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2829 
2830 	skb_dst_set(skb, &rt->dst);
2831 
2832 	err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2833 			    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
2834 			    nlh->nlmsg_seq, 0, 0, 0);
2835 	if (err < 0) {
2836 		kfree_skb(skb);
2837 		goto errout;
2838 	}
2839 
2840 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2841 errout:
2842 	return err;
2843 }
2844 
2845 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2846 {
2847 	struct sk_buff *skb;
2848 	struct net *net = info->nl_net;
2849 	u32 seq;
2850 	int err;
2851 
2852 	err = -ENOBUFS;
2853 	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2854 
2855 	skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2856 	if (!skb)
2857 		goto errout;
2858 
2859 	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2860 				event, info->portid, seq, 0, 0, 0);
2861 	if (err < 0) {
2862 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2863 		WARN_ON(err == -EMSGSIZE);
2864 		kfree_skb(skb);
2865 		goto errout;
2866 	}
2867 	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2868 		    info->nlh, gfp_any());
2869 	return;
2870 errout:
2871 	if (err < 0)
2872 		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2873 }
2874 
2875 static int ip6_route_dev_notify(struct notifier_block *this,
2876 				unsigned long event, void *ptr)
2877 {
2878 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
2879 	struct net *net = dev_net(dev);
2880 
2881 	if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2882 		net->ipv6.ip6_null_entry->dst.dev = dev;
2883 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2884 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2885 		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2886 		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2887 		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2888 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2889 #endif
2890 	}
2891 
2892 	return NOTIFY_OK;
2893 }
2894 
2895 /*
2896  *	/proc
2897  */
2898 
2899 #ifdef CONFIG_PROC_FS
2900 
2901 static const struct file_operations ipv6_route_proc_fops = {
2902 	.owner		= THIS_MODULE,
2903 	.open		= ipv6_route_open,
2904 	.read		= seq_read,
2905 	.llseek		= seq_lseek,
2906 	.release	= seq_release_net,
2907 };
2908 
2909 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2910 {
2911 	struct net *net = (struct net *)seq->private;
2912 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2913 		   net->ipv6.rt6_stats->fib_nodes,
2914 		   net->ipv6.rt6_stats->fib_route_nodes,
2915 		   net->ipv6.rt6_stats->fib_rt_alloc,
2916 		   net->ipv6.rt6_stats->fib_rt_entries,
2917 		   net->ipv6.rt6_stats->fib_rt_cache,
2918 		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2919 		   net->ipv6.rt6_stats->fib_discarded_routes);
2920 
2921 	return 0;
2922 }
2923 
2924 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2925 {
2926 	return single_open_net(inode, file, rt6_stats_seq_show);
2927 }
2928 
2929 static const struct file_operations rt6_stats_seq_fops = {
2930 	.owner	 = THIS_MODULE,
2931 	.open	 = rt6_stats_seq_open,
2932 	.read	 = seq_read,
2933 	.llseek	 = seq_lseek,
2934 	.release = single_release_net,
2935 };
2936 #endif	/* CONFIG_PROC_FS */
2937 
2938 #ifdef CONFIG_SYSCTL
2939 
2940 static
2941 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
2942 			      void __user *buffer, size_t *lenp, loff_t *ppos)
2943 {
2944 	struct net *net;
2945 	int delay;
2946 	if (!write)
2947 		return -EINVAL;
2948 
2949 	net = (struct net *)ctl->extra1;
2950 	delay = net->ipv6.sysctl.flush_delay;
2951 	proc_dointvec(ctl, write, buffer, lenp, ppos);
2952 	fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
2953 	return 0;
2954 }
2955 
2956 struct ctl_table ipv6_route_table_template[] = {
2957 	{
2958 		.procname	=	"flush",
2959 		.data		=	&init_net.ipv6.sysctl.flush_delay,
2960 		.maxlen		=	sizeof(int),
2961 		.mode		=	0200,
2962 		.proc_handler	=	ipv6_sysctl_rtcache_flush
2963 	},
2964 	{
2965 		.procname	=	"gc_thresh",
2966 		.data		=	&ip6_dst_ops_template.gc_thresh,
2967 		.maxlen		=	sizeof(int),
2968 		.mode		=	0644,
2969 		.proc_handler	=	proc_dointvec,
2970 	},
2971 	{
2972 		.procname	=	"max_size",
2973 		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
2974 		.maxlen		=	sizeof(int),
2975 		.mode		=	0644,
2976 		.proc_handler	=	proc_dointvec,
2977 	},
2978 	{
2979 		.procname	=	"gc_min_interval",
2980 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2981 		.maxlen		=	sizeof(int),
2982 		.mode		=	0644,
2983 		.proc_handler	=	proc_dointvec_jiffies,
2984 	},
2985 	{
2986 		.procname	=	"gc_timeout",
2987 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2988 		.maxlen		=	sizeof(int),
2989 		.mode		=	0644,
2990 		.proc_handler	=	proc_dointvec_jiffies,
2991 	},
2992 	{
2993 		.procname	=	"gc_interval",
2994 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
2995 		.maxlen		=	sizeof(int),
2996 		.mode		=	0644,
2997 		.proc_handler	=	proc_dointvec_jiffies,
2998 	},
2999 	{
3000 		.procname	=	"gc_elasticity",
3001 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
3002 		.maxlen		=	sizeof(int),
3003 		.mode		=	0644,
3004 		.proc_handler	=	proc_dointvec,
3005 	},
3006 	{
3007 		.procname	=	"mtu_expires",
3008 		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
3009 		.maxlen		=	sizeof(int),
3010 		.mode		=	0644,
3011 		.proc_handler	=	proc_dointvec_jiffies,
3012 	},
3013 	{
3014 		.procname	=	"min_adv_mss",
3015 		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
3016 		.maxlen		=	sizeof(int),
3017 		.mode		=	0644,
3018 		.proc_handler	=	proc_dointvec,
3019 	},
3020 	{
3021 		.procname	=	"gc_min_interval_ms",
3022 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3023 		.maxlen		=	sizeof(int),
3024 		.mode		=	0644,
3025 		.proc_handler	=	proc_dointvec_ms_jiffies,
3026 	},
3027 	{ }
3028 };
3029 
3030 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
3031 {
3032 	struct ctl_table *table;
3033 
3034 	table = kmemdup(ipv6_route_table_template,
3035 			sizeof(ipv6_route_table_template),
3036 			GFP_KERNEL);
3037 
3038 	if (table) {
3039 		table[0].data = &net->ipv6.sysctl.flush_delay;
3040 		table[0].extra1 = net;
3041 		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
3042 		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3043 		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3044 		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3045 		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3046 		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3047 		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3048 		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
3049 		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3050 
3051 		/* Don't export sysctls to unprivileged users */
3052 		if (net->user_ns != &init_user_ns)
3053 			table[0].procname = NULL;
3054 	}
3055 
3056 	return table;
3057 }
3058 #endif
3059 
3060 static int __net_init ip6_route_net_init(struct net *net)
3061 {
3062 	int ret = -ENOMEM;
3063 
3064 	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3065 	       sizeof(net->ipv6.ip6_dst_ops));
3066 
3067 	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3068 		goto out_ip6_dst_ops;
3069 
3070 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3071 					   sizeof(*net->ipv6.ip6_null_entry),
3072 					   GFP_KERNEL);
3073 	if (!net->ipv6.ip6_null_entry)
3074 		goto out_ip6_dst_entries;
3075 	net->ipv6.ip6_null_entry->dst.path =
3076 		(struct dst_entry *)net->ipv6.ip6_null_entry;
3077 	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3078 	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3079 			 ip6_template_metrics, true);
3080 
3081 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3082 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3083 					       sizeof(*net->ipv6.ip6_prohibit_entry),
3084 					       GFP_KERNEL);
3085 	if (!net->ipv6.ip6_prohibit_entry)
3086 		goto out_ip6_null_entry;
3087 	net->ipv6.ip6_prohibit_entry->dst.path =
3088 		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3089 	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3090 	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3091 			 ip6_template_metrics, true);
3092 
3093 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3094 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
3095 					       GFP_KERNEL);
3096 	if (!net->ipv6.ip6_blk_hole_entry)
3097 		goto out_ip6_prohibit_entry;
3098 	net->ipv6.ip6_blk_hole_entry->dst.path =
3099 		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3100 	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3101 	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3102 			 ip6_template_metrics, true);
3103 #endif
3104 
3105 	net->ipv6.sysctl.flush_delay = 0;
3106 	net->ipv6.sysctl.ip6_rt_max_size = 4096;
3107 	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3108 	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3109 	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3110 	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3111 	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3112 	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3113 
3114 	net->ipv6.ip6_rt_gc_expire = 30*HZ;
3115 
3116 	ret = 0;
3117 out:
3118 	return ret;
3119 
3120 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3121 out_ip6_prohibit_entry:
3122 	kfree(net->ipv6.ip6_prohibit_entry);
3123 out_ip6_null_entry:
3124 	kfree(net->ipv6.ip6_null_entry);
3125 #endif
3126 out_ip6_dst_entries:
3127 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3128 out_ip6_dst_ops:
3129 	goto out;
3130 }
3131 
3132 static void __net_exit ip6_route_net_exit(struct net *net)
3133 {
3134 	kfree(net->ipv6.ip6_null_entry);
3135 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3136 	kfree(net->ipv6.ip6_prohibit_entry);
3137 	kfree(net->ipv6.ip6_blk_hole_entry);
3138 #endif
3139 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3140 }
3141 
3142 static int __net_init ip6_route_net_init_late(struct net *net)
3143 {
3144 #ifdef CONFIG_PROC_FS
3145 	proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3146 	proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3147 #endif
3148 	return 0;
3149 }
3150 
3151 static void __net_exit ip6_route_net_exit_late(struct net *net)
3152 {
3153 #ifdef CONFIG_PROC_FS
3154 	remove_proc_entry("ipv6_route", net->proc_net);
3155 	remove_proc_entry("rt6_stats", net->proc_net);
3156 #endif
3157 }
3158 
3159 static struct pernet_operations ip6_route_net_ops = {
3160 	.init = ip6_route_net_init,
3161 	.exit = ip6_route_net_exit,
3162 };
3163 
3164 static int __net_init ipv6_inetpeer_init(struct net *net)
3165 {
3166 	struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3167 
3168 	if (!bp)
3169 		return -ENOMEM;
3170 	inet_peer_base_init(bp);
3171 	net->ipv6.peers = bp;
3172 	return 0;
3173 }
3174 
3175 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3176 {
3177 	struct inet_peer_base *bp = net->ipv6.peers;
3178 
3179 	net->ipv6.peers = NULL;
3180 	inetpeer_invalidate_tree(bp);
3181 	kfree(bp);
3182 }
3183 
3184 static struct pernet_operations ipv6_inetpeer_ops = {
3185 	.init	=	ipv6_inetpeer_init,
3186 	.exit	=	ipv6_inetpeer_exit,
3187 };
3188 
3189 static struct pernet_operations ip6_route_net_late_ops = {
3190 	.init = ip6_route_net_init_late,
3191 	.exit = ip6_route_net_exit_late,
3192 };
3193 
3194 static struct notifier_block ip6_route_dev_notifier = {
3195 	.notifier_call = ip6_route_dev_notify,
3196 	.priority = 0,
3197 };
3198 
3199 int __init ip6_route_init(void)
3200 {
3201 	int ret;
3202 
3203 	ret = -ENOMEM;
3204 	ip6_dst_ops_template.kmem_cachep =
3205 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3206 				  SLAB_HWCACHE_ALIGN, NULL);
3207 	if (!ip6_dst_ops_template.kmem_cachep)
3208 		goto out;
3209 
3210 	ret = dst_entries_init(&ip6_dst_blackhole_ops);
3211 	if (ret)
3212 		goto out_kmem_cache;
3213 
3214 	ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3215 	if (ret)
3216 		goto out_dst_entries;
3217 
3218 	ret = register_pernet_subsys(&ip6_route_net_ops);
3219 	if (ret)
3220 		goto out_register_inetpeer;
3221 
3222 	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3223 
3224 	/* Registering of the loopback is done before this portion of code,
3225 	 * the loopback reference in rt6_info will not be taken, do it
3226 	 * manually for init_net */
3227 	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3228 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3229   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3230 	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3231 	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3232 	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3233 	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3234   #endif
3235 	ret = fib6_init();
3236 	if (ret)
3237 		goto out_register_subsys;
3238 
3239 	ret = xfrm6_init();
3240 	if (ret)
3241 		goto out_fib6_init;
3242 
3243 	ret = fib6_rules_init();
3244 	if (ret)
3245 		goto xfrm6_init;
3246 
3247 	ret = register_pernet_subsys(&ip6_route_net_late_ops);
3248 	if (ret)
3249 		goto fib6_rules_init;
3250 
3251 	ret = -ENOBUFS;
3252 	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3253 	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3254 	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3255 		goto out_register_late_subsys;
3256 
3257 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3258 	if (ret)
3259 		goto out_register_late_subsys;
3260 
3261 out:
3262 	return ret;
3263 
3264 out_register_late_subsys:
3265 	unregister_pernet_subsys(&ip6_route_net_late_ops);
3266 fib6_rules_init:
3267 	fib6_rules_cleanup();
3268 xfrm6_init:
3269 	xfrm6_fini();
3270 out_fib6_init:
3271 	fib6_gc_cleanup();
3272 out_register_subsys:
3273 	unregister_pernet_subsys(&ip6_route_net_ops);
3274 out_register_inetpeer:
3275 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3276 out_dst_entries:
3277 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3278 out_kmem_cache:
3279 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3280 	goto out;
3281 }
3282 
3283 void ip6_route_cleanup(void)
3284 {
3285 	unregister_netdevice_notifier(&ip6_route_dev_notifier);
3286 	unregister_pernet_subsys(&ip6_route_net_late_ops);
3287 	fib6_rules_cleanup();
3288 	xfrm6_fini();
3289 	fib6_gc_cleanup();
3290 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3291 	unregister_pernet_subsys(&ip6_route_net_ops);
3292 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3293 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3294 }
3295