xref: /openbmc/linux/net/ipv6/route.c (revision e1f7c9ee)
1 /*
2  *	Linux INET6 implementation
3  *	FIB front-end.
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13 
14 /*	Changes:
15  *
16  *	YOSHIFUJI Hideaki @USAGI
17  *		reworked default router selection.
18  *		- respect outgoing interface
19  *		- select from (probably) reachable routers (i.e.
20  *		routers in REACHABLE, STALE, DELAY or PROBE states).
21  *		- always select the same router if it is (probably)
22  *		reachable.  otherwise, round-robin the list.
23  *	Ville Nuorvala
24  *		Fixed routing subtrees.
25  */
26 
27 #define pr_fmt(fmt) "IPv6: " fmt
28 
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/xfrm.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60 #include <net/nexthop.h>
61 
62 #include <asm/uaccess.h>
63 
64 #ifdef CONFIG_SYSCTL
65 #include <linux/sysctl.h>
66 #endif
67 
68 enum rt6_nud_state {
69 	RT6_NUD_FAIL_HARD = -3,
70 	RT6_NUD_FAIL_PROBE = -2,
71 	RT6_NUD_FAIL_DO_RR = -1,
72 	RT6_NUD_SUCCEED = 1
73 };
74 
75 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
76 				    const struct in6_addr *dest);
77 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
78 static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
79 static unsigned int	 ip6_mtu(const struct dst_entry *dst);
80 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
81 static void		ip6_dst_destroy(struct dst_entry *);
82 static void		ip6_dst_ifdown(struct dst_entry *,
83 				       struct net_device *dev, int how);
84 static int		 ip6_dst_gc(struct dst_ops *ops);
85 
86 static int		ip6_pkt_discard(struct sk_buff *skb);
87 static int		ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb);
88 static int		ip6_pkt_prohibit(struct sk_buff *skb);
89 static int		ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb);
90 static void		ip6_link_failure(struct sk_buff *skb);
91 static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
92 					   struct sk_buff *skb, u32 mtu);
93 static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
94 					struct sk_buff *skb);
95 static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
96 
97 #ifdef CONFIG_IPV6_ROUTE_INFO
98 static struct rt6_info *rt6_add_route_info(struct net *net,
99 					   const struct in6_addr *prefix, int prefixlen,
100 					   const struct in6_addr *gwaddr, int ifindex,
101 					   unsigned int pref);
102 static struct rt6_info *rt6_get_route_info(struct net *net,
103 					   const struct in6_addr *prefix, int prefixlen,
104 					   const struct in6_addr *gwaddr, int ifindex);
105 #endif
106 
107 static void rt6_bind_peer(struct rt6_info *rt, int create)
108 {
109 	struct inet_peer_base *base;
110 	struct inet_peer *peer;
111 
112 	base = inetpeer_base_ptr(rt->_rt6i_peer);
113 	if (!base)
114 		return;
115 
116 	peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
117 	if (peer) {
118 		if (!rt6_set_peer(rt, peer))
119 			inet_putpeer(peer);
120 	}
121 }
122 
123 static struct inet_peer *__rt6_get_peer(struct rt6_info *rt, int create)
124 {
125 	if (rt6_has_peer(rt))
126 		return rt6_peer_ptr(rt);
127 
128 	rt6_bind_peer(rt, create);
129 	return (rt6_has_peer(rt) ? rt6_peer_ptr(rt) : NULL);
130 }
131 
132 static struct inet_peer *rt6_get_peer_create(struct rt6_info *rt)
133 {
134 	return __rt6_get_peer(rt, 1);
135 }
136 
137 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
138 {
139 	struct rt6_info *rt = (struct rt6_info *) dst;
140 	struct inet_peer *peer;
141 	u32 *p = NULL;
142 
143 	if (!(rt->dst.flags & DST_HOST))
144 		return NULL;
145 
146 	peer = rt6_get_peer_create(rt);
147 	if (peer) {
148 		u32 *old_p = __DST_METRICS_PTR(old);
149 		unsigned long prev, new;
150 
151 		p = peer->metrics;
152 		if (inet_metrics_new(peer) ||
153 		    (old & DST_METRICS_FORCE_OVERWRITE))
154 			memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
155 
156 		new = (unsigned long) p;
157 		prev = cmpxchg(&dst->_metrics, old, new);
158 
159 		if (prev != old) {
160 			p = __DST_METRICS_PTR(prev);
161 			if (prev & DST_METRICS_READ_ONLY)
162 				p = NULL;
163 		}
164 	}
165 	return p;
166 }
167 
168 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
169 					     struct sk_buff *skb,
170 					     const void *daddr)
171 {
172 	struct in6_addr *p = &rt->rt6i_gateway;
173 
174 	if (!ipv6_addr_any(p))
175 		return (const void *) p;
176 	else if (skb)
177 		return &ipv6_hdr(skb)->daddr;
178 	return daddr;
179 }
180 
181 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
182 					  struct sk_buff *skb,
183 					  const void *daddr)
184 {
185 	struct rt6_info *rt = (struct rt6_info *) dst;
186 	struct neighbour *n;
187 
188 	daddr = choose_neigh_daddr(rt, skb, daddr);
189 	n = __ipv6_neigh_lookup(dst->dev, daddr);
190 	if (n)
191 		return n;
192 	return neigh_create(&nd_tbl, daddr, dst->dev);
193 }
194 
195 static struct dst_ops ip6_dst_ops_template = {
196 	.family			=	AF_INET6,
197 	.protocol		=	cpu_to_be16(ETH_P_IPV6),
198 	.gc			=	ip6_dst_gc,
199 	.gc_thresh		=	1024,
200 	.check			=	ip6_dst_check,
201 	.default_advmss		=	ip6_default_advmss,
202 	.mtu			=	ip6_mtu,
203 	.cow_metrics		=	ipv6_cow_metrics,
204 	.destroy		=	ip6_dst_destroy,
205 	.ifdown			=	ip6_dst_ifdown,
206 	.negative_advice	=	ip6_negative_advice,
207 	.link_failure		=	ip6_link_failure,
208 	.update_pmtu		=	ip6_rt_update_pmtu,
209 	.redirect		=	rt6_do_redirect,
210 	.local_out		=	__ip6_local_out,
211 	.neigh_lookup		=	ip6_neigh_lookup,
212 };
213 
214 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
215 {
216 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
217 
218 	return mtu ? : dst->dev->mtu;
219 }
220 
221 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
222 					 struct sk_buff *skb, u32 mtu)
223 {
224 }
225 
226 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
227 				      struct sk_buff *skb)
228 {
229 }
230 
231 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
232 					 unsigned long old)
233 {
234 	return NULL;
235 }
236 
237 static struct dst_ops ip6_dst_blackhole_ops = {
238 	.family			=	AF_INET6,
239 	.protocol		=	cpu_to_be16(ETH_P_IPV6),
240 	.destroy		=	ip6_dst_destroy,
241 	.check			=	ip6_dst_check,
242 	.mtu			=	ip6_blackhole_mtu,
243 	.default_advmss		=	ip6_default_advmss,
244 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
245 	.redirect		=	ip6_rt_blackhole_redirect,
246 	.cow_metrics		=	ip6_rt_blackhole_cow_metrics,
247 	.neigh_lookup		=	ip6_neigh_lookup,
248 };
249 
250 static const u32 ip6_template_metrics[RTAX_MAX] = {
251 	[RTAX_HOPLIMIT - 1] = 0,
252 };
253 
254 static const struct rt6_info ip6_null_entry_template = {
255 	.dst = {
256 		.__refcnt	= ATOMIC_INIT(1),
257 		.__use		= 1,
258 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
259 		.error		= -ENETUNREACH,
260 		.input		= ip6_pkt_discard,
261 		.output		= ip6_pkt_discard_out,
262 	},
263 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
264 	.rt6i_protocol  = RTPROT_KERNEL,
265 	.rt6i_metric	= ~(u32) 0,
266 	.rt6i_ref	= ATOMIC_INIT(1),
267 };
268 
269 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
270 
271 static const struct rt6_info ip6_prohibit_entry_template = {
272 	.dst = {
273 		.__refcnt	= ATOMIC_INIT(1),
274 		.__use		= 1,
275 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
276 		.error		= -EACCES,
277 		.input		= ip6_pkt_prohibit,
278 		.output		= ip6_pkt_prohibit_out,
279 	},
280 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
281 	.rt6i_protocol  = RTPROT_KERNEL,
282 	.rt6i_metric	= ~(u32) 0,
283 	.rt6i_ref	= ATOMIC_INIT(1),
284 };
285 
286 static const struct rt6_info ip6_blk_hole_entry_template = {
287 	.dst = {
288 		.__refcnt	= ATOMIC_INIT(1),
289 		.__use		= 1,
290 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
291 		.error		= -EINVAL,
292 		.input		= dst_discard,
293 		.output		= dst_discard_sk,
294 	},
295 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
296 	.rt6i_protocol  = RTPROT_KERNEL,
297 	.rt6i_metric	= ~(u32) 0,
298 	.rt6i_ref	= ATOMIC_INIT(1),
299 };
300 
301 #endif
302 
303 /* allocate dst with ip6_dst_ops */
304 static inline struct rt6_info *ip6_dst_alloc(struct net *net,
305 					     struct net_device *dev,
306 					     int flags,
307 					     struct fib6_table *table)
308 {
309 	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
310 					0, DST_OBSOLETE_FORCE_CHK, flags);
311 
312 	if (rt) {
313 		struct dst_entry *dst = &rt->dst;
314 
315 		memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
316 		rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
317 		INIT_LIST_HEAD(&rt->rt6i_siblings);
318 	}
319 	return rt;
320 }
321 
322 static void ip6_dst_destroy(struct dst_entry *dst)
323 {
324 	struct rt6_info *rt = (struct rt6_info *)dst;
325 	struct inet6_dev *idev = rt->rt6i_idev;
326 	struct dst_entry *from = dst->from;
327 
328 	if (!(rt->dst.flags & DST_HOST))
329 		dst_destroy_metrics_generic(dst);
330 
331 	if (idev) {
332 		rt->rt6i_idev = NULL;
333 		in6_dev_put(idev);
334 	}
335 
336 	dst->from = NULL;
337 	dst_release(from);
338 
339 	if (rt6_has_peer(rt)) {
340 		struct inet_peer *peer = rt6_peer_ptr(rt);
341 		inet_putpeer(peer);
342 	}
343 }
344 
345 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
346 			   int how)
347 {
348 	struct rt6_info *rt = (struct rt6_info *)dst;
349 	struct inet6_dev *idev = rt->rt6i_idev;
350 	struct net_device *loopback_dev =
351 		dev_net(dev)->loopback_dev;
352 
353 	if (dev != loopback_dev) {
354 		if (idev && idev->dev == dev) {
355 			struct inet6_dev *loopback_idev =
356 				in6_dev_get(loopback_dev);
357 			if (loopback_idev) {
358 				rt->rt6i_idev = loopback_idev;
359 				in6_dev_put(idev);
360 			}
361 		}
362 	}
363 }
364 
365 static bool rt6_check_expired(const struct rt6_info *rt)
366 {
367 	if (rt->rt6i_flags & RTF_EXPIRES) {
368 		if (time_after(jiffies, rt->dst.expires))
369 			return true;
370 	} else if (rt->dst.from) {
371 		return rt6_check_expired((struct rt6_info *) rt->dst.from);
372 	}
373 	return false;
374 }
375 
376 /* Multipath route selection:
377  *   Hash based function using packet header and flowlabel.
378  * Adapted from fib_info_hashfn()
379  */
380 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
381 			       const struct flowi6 *fl6)
382 {
383 	unsigned int val = fl6->flowi6_proto;
384 
385 	val ^= ipv6_addr_hash(&fl6->daddr);
386 	val ^= ipv6_addr_hash(&fl6->saddr);
387 
388 	/* Work only if this not encapsulated */
389 	switch (fl6->flowi6_proto) {
390 	case IPPROTO_UDP:
391 	case IPPROTO_TCP:
392 	case IPPROTO_SCTP:
393 		val ^= (__force u16)fl6->fl6_sport;
394 		val ^= (__force u16)fl6->fl6_dport;
395 		break;
396 
397 	case IPPROTO_ICMPV6:
398 		val ^= (__force u16)fl6->fl6_icmp_type;
399 		val ^= (__force u16)fl6->fl6_icmp_code;
400 		break;
401 	}
402 	/* RFC6438 recommands to use flowlabel */
403 	val ^= (__force u32)fl6->flowlabel;
404 
405 	/* Perhaps, we need to tune, this function? */
406 	val = val ^ (val >> 7) ^ (val >> 12);
407 	return val % candidate_count;
408 }
409 
410 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
411 					     struct flowi6 *fl6, int oif,
412 					     int strict)
413 {
414 	struct rt6_info *sibling, *next_sibling;
415 	int route_choosen;
416 
417 	route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
418 	/* Don't change the route, if route_choosen == 0
419 	 * (siblings does not include ourself)
420 	 */
421 	if (route_choosen)
422 		list_for_each_entry_safe(sibling, next_sibling,
423 				&match->rt6i_siblings, rt6i_siblings) {
424 			route_choosen--;
425 			if (route_choosen == 0) {
426 				if (rt6_score_route(sibling, oif, strict) < 0)
427 					break;
428 				match = sibling;
429 				break;
430 			}
431 		}
432 	return match;
433 }
434 
435 /*
436  *	Route lookup. Any table->tb6_lock is implied.
437  */
438 
439 static inline struct rt6_info *rt6_device_match(struct net *net,
440 						    struct rt6_info *rt,
441 						    const struct in6_addr *saddr,
442 						    int oif,
443 						    int flags)
444 {
445 	struct rt6_info *local = NULL;
446 	struct rt6_info *sprt;
447 
448 	if (!oif && ipv6_addr_any(saddr))
449 		goto out;
450 
451 	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
452 		struct net_device *dev = sprt->dst.dev;
453 
454 		if (oif) {
455 			if (dev->ifindex == oif)
456 				return sprt;
457 			if (dev->flags & IFF_LOOPBACK) {
458 				if (!sprt->rt6i_idev ||
459 				    sprt->rt6i_idev->dev->ifindex != oif) {
460 					if (flags & RT6_LOOKUP_F_IFACE && oif)
461 						continue;
462 					if (local && (!oif ||
463 						      local->rt6i_idev->dev->ifindex == oif))
464 						continue;
465 				}
466 				local = sprt;
467 			}
468 		} else {
469 			if (ipv6_chk_addr(net, saddr, dev,
470 					  flags & RT6_LOOKUP_F_IFACE))
471 				return sprt;
472 		}
473 	}
474 
475 	if (oif) {
476 		if (local)
477 			return local;
478 
479 		if (flags & RT6_LOOKUP_F_IFACE)
480 			return net->ipv6.ip6_null_entry;
481 	}
482 out:
483 	return rt;
484 }
485 
486 #ifdef CONFIG_IPV6_ROUTER_PREF
487 struct __rt6_probe_work {
488 	struct work_struct work;
489 	struct in6_addr target;
490 	struct net_device *dev;
491 };
492 
493 static void rt6_probe_deferred(struct work_struct *w)
494 {
495 	struct in6_addr mcaddr;
496 	struct __rt6_probe_work *work =
497 		container_of(w, struct __rt6_probe_work, work);
498 
499 	addrconf_addr_solict_mult(&work->target, &mcaddr);
500 	ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL);
501 	dev_put(work->dev);
502 	kfree(w);
503 }
504 
505 static void rt6_probe(struct rt6_info *rt)
506 {
507 	struct neighbour *neigh;
508 	/*
509 	 * Okay, this does not seem to be appropriate
510 	 * for now, however, we need to check if it
511 	 * is really so; aka Router Reachability Probing.
512 	 *
513 	 * Router Reachability Probe MUST be rate-limited
514 	 * to no more than one per minute.
515 	 */
516 	if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
517 		return;
518 	rcu_read_lock_bh();
519 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
520 	if (neigh) {
521 		write_lock(&neigh->lock);
522 		if (neigh->nud_state & NUD_VALID)
523 			goto out;
524 	}
525 
526 	if (!neigh ||
527 	    time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
528 		struct __rt6_probe_work *work;
529 
530 		work = kmalloc(sizeof(*work), GFP_ATOMIC);
531 
532 		if (neigh && work)
533 			__neigh_set_probe_once(neigh);
534 
535 		if (neigh)
536 			write_unlock(&neigh->lock);
537 
538 		if (work) {
539 			INIT_WORK(&work->work, rt6_probe_deferred);
540 			work->target = rt->rt6i_gateway;
541 			dev_hold(rt->dst.dev);
542 			work->dev = rt->dst.dev;
543 			schedule_work(&work->work);
544 		}
545 	} else {
546 out:
547 		write_unlock(&neigh->lock);
548 	}
549 	rcu_read_unlock_bh();
550 }
551 #else
552 static inline void rt6_probe(struct rt6_info *rt)
553 {
554 }
555 #endif
556 
557 /*
558  * Default Router Selection (RFC 2461 6.3.6)
559  */
560 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
561 {
562 	struct net_device *dev = rt->dst.dev;
563 	if (!oif || dev->ifindex == oif)
564 		return 2;
565 	if ((dev->flags & IFF_LOOPBACK) &&
566 	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
567 		return 1;
568 	return 0;
569 }
570 
571 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
572 {
573 	struct neighbour *neigh;
574 	enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
575 
576 	if (rt->rt6i_flags & RTF_NONEXTHOP ||
577 	    !(rt->rt6i_flags & RTF_GATEWAY))
578 		return RT6_NUD_SUCCEED;
579 
580 	rcu_read_lock_bh();
581 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
582 	if (neigh) {
583 		read_lock(&neigh->lock);
584 		if (neigh->nud_state & NUD_VALID)
585 			ret = RT6_NUD_SUCCEED;
586 #ifdef CONFIG_IPV6_ROUTER_PREF
587 		else if (!(neigh->nud_state & NUD_FAILED))
588 			ret = RT6_NUD_SUCCEED;
589 		else
590 			ret = RT6_NUD_FAIL_PROBE;
591 #endif
592 		read_unlock(&neigh->lock);
593 	} else {
594 		ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
595 		      RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
596 	}
597 	rcu_read_unlock_bh();
598 
599 	return ret;
600 }
601 
602 static int rt6_score_route(struct rt6_info *rt, int oif,
603 			   int strict)
604 {
605 	int m;
606 
607 	m = rt6_check_dev(rt, oif);
608 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
609 		return RT6_NUD_FAIL_HARD;
610 #ifdef CONFIG_IPV6_ROUTER_PREF
611 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
612 #endif
613 	if (strict & RT6_LOOKUP_F_REACHABLE) {
614 		int n = rt6_check_neigh(rt);
615 		if (n < 0)
616 			return n;
617 	}
618 	return m;
619 }
620 
621 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
622 				   int *mpri, struct rt6_info *match,
623 				   bool *do_rr)
624 {
625 	int m;
626 	bool match_do_rr = false;
627 
628 	if (rt6_check_expired(rt))
629 		goto out;
630 
631 	m = rt6_score_route(rt, oif, strict);
632 	if (m == RT6_NUD_FAIL_DO_RR) {
633 		match_do_rr = true;
634 		m = 0; /* lowest valid score */
635 	} else if (m == RT6_NUD_FAIL_HARD) {
636 		goto out;
637 	}
638 
639 	if (strict & RT6_LOOKUP_F_REACHABLE)
640 		rt6_probe(rt);
641 
642 	/* note that m can be RT6_NUD_FAIL_PROBE at this point */
643 	if (m > *mpri) {
644 		*do_rr = match_do_rr;
645 		*mpri = m;
646 		match = rt;
647 	}
648 out:
649 	return match;
650 }
651 
652 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
653 				     struct rt6_info *rr_head,
654 				     u32 metric, int oif, int strict,
655 				     bool *do_rr)
656 {
657 	struct rt6_info *rt, *match;
658 	int mpri = -1;
659 
660 	match = NULL;
661 	for (rt = rr_head; rt && rt->rt6i_metric == metric;
662 	     rt = rt->dst.rt6_next)
663 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
664 	for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
665 	     rt = rt->dst.rt6_next)
666 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
667 
668 	return match;
669 }
670 
671 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
672 {
673 	struct rt6_info *match, *rt0;
674 	struct net *net;
675 	bool do_rr = false;
676 
677 	rt0 = fn->rr_ptr;
678 	if (!rt0)
679 		fn->rr_ptr = rt0 = fn->leaf;
680 
681 	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
682 			     &do_rr);
683 
684 	if (do_rr) {
685 		struct rt6_info *next = rt0->dst.rt6_next;
686 
687 		/* no entries matched; do round-robin */
688 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
689 			next = fn->leaf;
690 
691 		if (next != rt0)
692 			fn->rr_ptr = next;
693 	}
694 
695 	net = dev_net(rt0->dst.dev);
696 	return match ? match : net->ipv6.ip6_null_entry;
697 }
698 
699 #ifdef CONFIG_IPV6_ROUTE_INFO
700 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
701 		  const struct in6_addr *gwaddr)
702 {
703 	struct net *net = dev_net(dev);
704 	struct route_info *rinfo = (struct route_info *) opt;
705 	struct in6_addr prefix_buf, *prefix;
706 	unsigned int pref;
707 	unsigned long lifetime;
708 	struct rt6_info *rt;
709 
710 	if (len < sizeof(struct route_info)) {
711 		return -EINVAL;
712 	}
713 
714 	/* Sanity check for prefix_len and length */
715 	if (rinfo->length > 3) {
716 		return -EINVAL;
717 	} else if (rinfo->prefix_len > 128) {
718 		return -EINVAL;
719 	} else if (rinfo->prefix_len > 64) {
720 		if (rinfo->length < 2) {
721 			return -EINVAL;
722 		}
723 	} else if (rinfo->prefix_len > 0) {
724 		if (rinfo->length < 1) {
725 			return -EINVAL;
726 		}
727 	}
728 
729 	pref = rinfo->route_pref;
730 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
731 		return -EINVAL;
732 
733 	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
734 
735 	if (rinfo->length == 3)
736 		prefix = (struct in6_addr *)rinfo->prefix;
737 	else {
738 		/* this function is safe */
739 		ipv6_addr_prefix(&prefix_buf,
740 				 (struct in6_addr *)rinfo->prefix,
741 				 rinfo->prefix_len);
742 		prefix = &prefix_buf;
743 	}
744 
745 	if (rinfo->prefix_len == 0)
746 		rt = rt6_get_dflt_router(gwaddr, dev);
747 	else
748 		rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
749 					gwaddr, dev->ifindex);
750 
751 	if (rt && !lifetime) {
752 		ip6_del_rt(rt);
753 		rt = NULL;
754 	}
755 
756 	if (!rt && lifetime)
757 		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
758 					pref);
759 	else if (rt)
760 		rt->rt6i_flags = RTF_ROUTEINFO |
761 				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
762 
763 	if (rt) {
764 		if (!addrconf_finite_timeout(lifetime))
765 			rt6_clean_expires(rt);
766 		else
767 			rt6_set_expires(rt, jiffies + HZ * lifetime);
768 
769 		ip6_rt_put(rt);
770 	}
771 	return 0;
772 }
773 #endif
774 
775 #define BACKTRACK(__net, saddr)			\
776 do { \
777 	if (rt == __net->ipv6.ip6_null_entry) {	\
778 		struct fib6_node *pn; \
779 		while (1) { \
780 			if (fn->fn_flags & RTN_TL_ROOT) \
781 				goto out; \
782 			pn = fn->parent; \
783 			if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
784 				fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
785 			else \
786 				fn = pn; \
787 			if (fn->fn_flags & RTN_RTINFO) \
788 				goto restart; \
789 		} \
790 	} \
791 } while (0)
792 
793 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
794 					     struct fib6_table *table,
795 					     struct flowi6 *fl6, int flags)
796 {
797 	struct fib6_node *fn;
798 	struct rt6_info *rt;
799 
800 	read_lock_bh(&table->tb6_lock);
801 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
802 restart:
803 	rt = fn->leaf;
804 	rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
805 	if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
806 		rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
807 	BACKTRACK(net, &fl6->saddr);
808 out:
809 	dst_use(&rt->dst, jiffies);
810 	read_unlock_bh(&table->tb6_lock);
811 	return rt;
812 
813 }
814 
815 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
816 				    int flags)
817 {
818 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
819 }
820 EXPORT_SYMBOL_GPL(ip6_route_lookup);
821 
822 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
823 			    const struct in6_addr *saddr, int oif, int strict)
824 {
825 	struct flowi6 fl6 = {
826 		.flowi6_oif = oif,
827 		.daddr = *daddr,
828 	};
829 	struct dst_entry *dst;
830 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
831 
832 	if (saddr) {
833 		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
834 		flags |= RT6_LOOKUP_F_HAS_SADDR;
835 	}
836 
837 	dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
838 	if (dst->error == 0)
839 		return (struct rt6_info *) dst;
840 
841 	dst_release(dst);
842 
843 	return NULL;
844 }
845 EXPORT_SYMBOL(rt6_lookup);
846 
847 /* ip6_ins_rt is called with FREE table->tb6_lock.
848    It takes new route entry, the addition fails by any reason the
849    route is freed. In any case, if caller does not hold it, it may
850    be destroyed.
851  */
852 
853 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
854 			struct nlattr *mx, int mx_len)
855 {
856 	int err;
857 	struct fib6_table *table;
858 
859 	table = rt->rt6i_table;
860 	write_lock_bh(&table->tb6_lock);
861 	err = fib6_add(&table->tb6_root, rt, info, mx, mx_len);
862 	write_unlock_bh(&table->tb6_lock);
863 
864 	return err;
865 }
866 
867 int ip6_ins_rt(struct rt6_info *rt)
868 {
869 	struct nl_info info = {
870 		.nl_net = dev_net(rt->dst.dev),
871 	};
872 	return __ip6_ins_rt(rt, &info, NULL, 0);
873 }
874 
875 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
876 				      const struct in6_addr *daddr,
877 				      const struct in6_addr *saddr)
878 {
879 	struct rt6_info *rt;
880 
881 	/*
882 	 *	Clone the route.
883 	 */
884 
885 	rt = ip6_rt_copy(ort, daddr);
886 
887 	if (rt) {
888 		if (ort->rt6i_dst.plen != 128 &&
889 		    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
890 			rt->rt6i_flags |= RTF_ANYCAST;
891 
892 		rt->rt6i_flags |= RTF_CACHE;
893 
894 #ifdef CONFIG_IPV6_SUBTREES
895 		if (rt->rt6i_src.plen && saddr) {
896 			rt->rt6i_src.addr = *saddr;
897 			rt->rt6i_src.plen = 128;
898 		}
899 #endif
900 	}
901 
902 	return rt;
903 }
904 
905 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
906 					const struct in6_addr *daddr)
907 {
908 	struct rt6_info *rt = ip6_rt_copy(ort, daddr);
909 
910 	if (rt)
911 		rt->rt6i_flags |= RTF_CACHE;
912 	return rt;
913 }
914 
915 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
916 				      struct flowi6 *fl6, int flags)
917 {
918 	struct fib6_node *fn;
919 	struct rt6_info *rt, *nrt;
920 	int strict = 0;
921 	int attempts = 3;
922 	int err;
923 	int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
924 
925 	strict |= flags & RT6_LOOKUP_F_IFACE;
926 
927 relookup:
928 	read_lock_bh(&table->tb6_lock);
929 
930 restart_2:
931 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
932 
933 restart:
934 	rt = rt6_select(fn, oif, strict | reachable);
935 	if (rt->rt6i_nsiblings)
936 		rt = rt6_multipath_select(rt, fl6, oif, strict | reachable);
937 	BACKTRACK(net, &fl6->saddr);
938 	if (rt == net->ipv6.ip6_null_entry ||
939 	    rt->rt6i_flags & RTF_CACHE)
940 		goto out;
941 
942 	dst_hold(&rt->dst);
943 	read_unlock_bh(&table->tb6_lock);
944 
945 	if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY)))
946 		nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
947 	else if (!(rt->dst.flags & DST_HOST))
948 		nrt = rt6_alloc_clone(rt, &fl6->daddr);
949 	else
950 		goto out2;
951 
952 	ip6_rt_put(rt);
953 	rt = nrt ? : net->ipv6.ip6_null_entry;
954 
955 	dst_hold(&rt->dst);
956 	if (nrt) {
957 		err = ip6_ins_rt(nrt);
958 		if (!err)
959 			goto out2;
960 	}
961 
962 	if (--attempts <= 0)
963 		goto out2;
964 
965 	/*
966 	 * Race condition! In the gap, when table->tb6_lock was
967 	 * released someone could insert this route.  Relookup.
968 	 */
969 	ip6_rt_put(rt);
970 	goto relookup;
971 
972 out:
973 	if (reachable) {
974 		reachable = 0;
975 		goto restart_2;
976 	}
977 	dst_hold(&rt->dst);
978 	read_unlock_bh(&table->tb6_lock);
979 out2:
980 	rt->dst.lastuse = jiffies;
981 	rt->dst.__use++;
982 
983 	return rt;
984 }
985 
986 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
987 					    struct flowi6 *fl6, int flags)
988 {
989 	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
990 }
991 
992 static struct dst_entry *ip6_route_input_lookup(struct net *net,
993 						struct net_device *dev,
994 						struct flowi6 *fl6, int flags)
995 {
996 	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
997 		flags |= RT6_LOOKUP_F_IFACE;
998 
999 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1000 }
1001 
1002 void ip6_route_input(struct sk_buff *skb)
1003 {
1004 	const struct ipv6hdr *iph = ipv6_hdr(skb);
1005 	struct net *net = dev_net(skb->dev);
1006 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1007 	struct flowi6 fl6 = {
1008 		.flowi6_iif = skb->dev->ifindex,
1009 		.daddr = iph->daddr,
1010 		.saddr = iph->saddr,
1011 		.flowlabel = ip6_flowinfo(iph),
1012 		.flowi6_mark = skb->mark,
1013 		.flowi6_proto = iph->nexthdr,
1014 	};
1015 
1016 	skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1017 }
1018 
1019 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1020 					     struct flowi6 *fl6, int flags)
1021 {
1022 	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1023 }
1024 
1025 struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
1026 				    struct flowi6 *fl6)
1027 {
1028 	int flags = 0;
1029 
1030 	fl6->flowi6_iif = LOOPBACK_IFINDEX;
1031 
1032 	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
1033 		flags |= RT6_LOOKUP_F_IFACE;
1034 
1035 	if (!ipv6_addr_any(&fl6->saddr))
1036 		flags |= RT6_LOOKUP_F_HAS_SADDR;
1037 	else if (sk)
1038 		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1039 
1040 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1041 }
1042 EXPORT_SYMBOL(ip6_route_output);
1043 
1044 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1045 {
1046 	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1047 	struct dst_entry *new = NULL;
1048 
1049 	rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1050 	if (rt) {
1051 		new = &rt->dst;
1052 
1053 		memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
1054 		rt6_init_peer(rt, net->ipv6.peers);
1055 
1056 		new->__use = 1;
1057 		new->input = dst_discard;
1058 		new->output = dst_discard_sk;
1059 
1060 		if (dst_metrics_read_only(&ort->dst))
1061 			new->_metrics = ort->dst._metrics;
1062 		else
1063 			dst_copy_metrics(new, &ort->dst);
1064 		rt->rt6i_idev = ort->rt6i_idev;
1065 		if (rt->rt6i_idev)
1066 			in6_dev_hold(rt->rt6i_idev);
1067 
1068 		rt->rt6i_gateway = ort->rt6i_gateway;
1069 		rt->rt6i_flags = ort->rt6i_flags;
1070 		rt->rt6i_metric = 0;
1071 
1072 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1073 #ifdef CONFIG_IPV6_SUBTREES
1074 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1075 #endif
1076 
1077 		dst_free(new);
1078 	}
1079 
1080 	dst_release(dst_orig);
1081 	return new ? new : ERR_PTR(-ENOMEM);
1082 }
1083 
1084 /*
1085  *	Destination cache support functions
1086  */
1087 
1088 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1089 {
1090 	struct rt6_info *rt;
1091 
1092 	rt = (struct rt6_info *) dst;
1093 
1094 	/* All IPV6 dsts are created with ->obsolete set to the value
1095 	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1096 	 * into this function always.
1097 	 */
1098 	if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1099 		return NULL;
1100 
1101 	if (rt6_check_expired(rt))
1102 		return NULL;
1103 
1104 	return dst;
1105 }
1106 
1107 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1108 {
1109 	struct rt6_info *rt = (struct rt6_info *) dst;
1110 
1111 	if (rt) {
1112 		if (rt->rt6i_flags & RTF_CACHE) {
1113 			if (rt6_check_expired(rt)) {
1114 				ip6_del_rt(rt);
1115 				dst = NULL;
1116 			}
1117 		} else {
1118 			dst_release(dst);
1119 			dst = NULL;
1120 		}
1121 	}
1122 	return dst;
1123 }
1124 
1125 static void ip6_link_failure(struct sk_buff *skb)
1126 {
1127 	struct rt6_info *rt;
1128 
1129 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1130 
1131 	rt = (struct rt6_info *) skb_dst(skb);
1132 	if (rt) {
1133 		if (rt->rt6i_flags & RTF_CACHE) {
1134 			dst_hold(&rt->dst);
1135 			if (ip6_del_rt(rt))
1136 				dst_free(&rt->dst);
1137 		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1138 			rt->rt6i_node->fn_sernum = -1;
1139 		}
1140 	}
1141 }
1142 
1143 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1144 			       struct sk_buff *skb, u32 mtu)
1145 {
1146 	struct rt6_info *rt6 = (struct rt6_info *)dst;
1147 
1148 	dst_confirm(dst);
1149 	if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1150 		struct net *net = dev_net(dst->dev);
1151 
1152 		rt6->rt6i_flags |= RTF_MODIFIED;
1153 		if (mtu < IPV6_MIN_MTU) {
1154 			u32 features = dst_metric(dst, RTAX_FEATURES);
1155 			mtu = IPV6_MIN_MTU;
1156 			features |= RTAX_FEATURE_ALLFRAG;
1157 			dst_metric_set(dst, RTAX_FEATURES, features);
1158 		}
1159 		dst_metric_set(dst, RTAX_MTU, mtu);
1160 		rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1161 	}
1162 }
1163 
1164 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1165 		     int oif, u32 mark)
1166 {
1167 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1168 	struct dst_entry *dst;
1169 	struct flowi6 fl6;
1170 
1171 	memset(&fl6, 0, sizeof(fl6));
1172 	fl6.flowi6_oif = oif;
1173 	fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
1174 	fl6.daddr = iph->daddr;
1175 	fl6.saddr = iph->saddr;
1176 	fl6.flowlabel = ip6_flowinfo(iph);
1177 
1178 	dst = ip6_route_output(net, NULL, &fl6);
1179 	if (!dst->error)
1180 		ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1181 	dst_release(dst);
1182 }
1183 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1184 
1185 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1186 {
1187 	ip6_update_pmtu(skb, sock_net(sk), mtu,
1188 			sk->sk_bound_dev_if, sk->sk_mark);
1189 }
1190 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1191 
1192 /* Handle redirects */
1193 struct ip6rd_flowi {
1194 	struct flowi6 fl6;
1195 	struct in6_addr gateway;
1196 };
1197 
1198 static struct rt6_info *__ip6_route_redirect(struct net *net,
1199 					     struct fib6_table *table,
1200 					     struct flowi6 *fl6,
1201 					     int flags)
1202 {
1203 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1204 	struct rt6_info *rt;
1205 	struct fib6_node *fn;
1206 
1207 	/* Get the "current" route for this destination and
1208 	 * check if the redirect has come from approriate router.
1209 	 *
1210 	 * RFC 4861 specifies that redirects should only be
1211 	 * accepted if they come from the nexthop to the target.
1212 	 * Due to the way the routes are chosen, this notion
1213 	 * is a bit fuzzy and one might need to check all possible
1214 	 * routes.
1215 	 */
1216 
1217 	read_lock_bh(&table->tb6_lock);
1218 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1219 restart:
1220 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1221 		if (rt6_check_expired(rt))
1222 			continue;
1223 		if (rt->dst.error)
1224 			break;
1225 		if (!(rt->rt6i_flags & RTF_GATEWAY))
1226 			continue;
1227 		if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1228 			continue;
1229 		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1230 			continue;
1231 		break;
1232 	}
1233 
1234 	if (!rt)
1235 		rt = net->ipv6.ip6_null_entry;
1236 	else if (rt->dst.error) {
1237 		rt = net->ipv6.ip6_null_entry;
1238 		goto out;
1239 	}
1240 	BACKTRACK(net, &fl6->saddr);
1241 out:
1242 	dst_hold(&rt->dst);
1243 
1244 	read_unlock_bh(&table->tb6_lock);
1245 
1246 	return rt;
1247 };
1248 
1249 static struct dst_entry *ip6_route_redirect(struct net *net,
1250 					const struct flowi6 *fl6,
1251 					const struct in6_addr *gateway)
1252 {
1253 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1254 	struct ip6rd_flowi rdfl;
1255 
1256 	rdfl.fl6 = *fl6;
1257 	rdfl.gateway = *gateway;
1258 
1259 	return fib6_rule_lookup(net, &rdfl.fl6,
1260 				flags, __ip6_route_redirect);
1261 }
1262 
1263 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1264 {
1265 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1266 	struct dst_entry *dst;
1267 	struct flowi6 fl6;
1268 
1269 	memset(&fl6, 0, sizeof(fl6));
1270 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
1271 	fl6.flowi6_oif = oif;
1272 	fl6.flowi6_mark = mark;
1273 	fl6.daddr = iph->daddr;
1274 	fl6.saddr = iph->saddr;
1275 	fl6.flowlabel = ip6_flowinfo(iph);
1276 
1277 	dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1278 	rt6_do_redirect(dst, NULL, skb);
1279 	dst_release(dst);
1280 }
1281 EXPORT_SYMBOL_GPL(ip6_redirect);
1282 
1283 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1284 			    u32 mark)
1285 {
1286 	const struct ipv6hdr *iph = ipv6_hdr(skb);
1287 	const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1288 	struct dst_entry *dst;
1289 	struct flowi6 fl6;
1290 
1291 	memset(&fl6, 0, sizeof(fl6));
1292 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
1293 	fl6.flowi6_oif = oif;
1294 	fl6.flowi6_mark = mark;
1295 	fl6.daddr = msg->dest;
1296 	fl6.saddr = iph->daddr;
1297 
1298 	dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1299 	rt6_do_redirect(dst, NULL, skb);
1300 	dst_release(dst);
1301 }
1302 
1303 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1304 {
1305 	ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1306 }
1307 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1308 
1309 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1310 {
1311 	struct net_device *dev = dst->dev;
1312 	unsigned int mtu = dst_mtu(dst);
1313 	struct net *net = dev_net(dev);
1314 
1315 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1316 
1317 	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1318 		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1319 
1320 	/*
1321 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1322 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1323 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
1324 	 * rely only on pmtu discovery"
1325 	 */
1326 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1327 		mtu = IPV6_MAXPLEN;
1328 	return mtu;
1329 }
1330 
1331 static unsigned int ip6_mtu(const struct dst_entry *dst)
1332 {
1333 	struct inet6_dev *idev;
1334 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1335 
1336 	if (mtu)
1337 		goto out;
1338 
1339 	mtu = IPV6_MIN_MTU;
1340 
1341 	rcu_read_lock();
1342 	idev = __in6_dev_get(dst->dev);
1343 	if (idev)
1344 		mtu = idev->cnf.mtu6;
1345 	rcu_read_unlock();
1346 
1347 out:
1348 	return min_t(unsigned int, mtu, IP6_MAX_MTU);
1349 }
1350 
1351 static struct dst_entry *icmp6_dst_gc_list;
1352 static DEFINE_SPINLOCK(icmp6_dst_lock);
1353 
1354 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1355 				  struct flowi6 *fl6)
1356 {
1357 	struct dst_entry *dst;
1358 	struct rt6_info *rt;
1359 	struct inet6_dev *idev = in6_dev_get(dev);
1360 	struct net *net = dev_net(dev);
1361 
1362 	if (unlikely(!idev))
1363 		return ERR_PTR(-ENODEV);
1364 
1365 	rt = ip6_dst_alloc(net, dev, 0, NULL);
1366 	if (unlikely(!rt)) {
1367 		in6_dev_put(idev);
1368 		dst = ERR_PTR(-ENOMEM);
1369 		goto out;
1370 	}
1371 
1372 	rt->dst.flags |= DST_HOST;
1373 	rt->dst.output  = ip6_output;
1374 	atomic_set(&rt->dst.__refcnt, 1);
1375 	rt->rt6i_gateway  = fl6->daddr;
1376 	rt->rt6i_dst.addr = fl6->daddr;
1377 	rt->rt6i_dst.plen = 128;
1378 	rt->rt6i_idev     = idev;
1379 	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1380 
1381 	spin_lock_bh(&icmp6_dst_lock);
1382 	rt->dst.next = icmp6_dst_gc_list;
1383 	icmp6_dst_gc_list = &rt->dst;
1384 	spin_unlock_bh(&icmp6_dst_lock);
1385 
1386 	fib6_force_start_gc(net);
1387 
1388 	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1389 
1390 out:
1391 	return dst;
1392 }
1393 
1394 int icmp6_dst_gc(void)
1395 {
1396 	struct dst_entry *dst, **pprev;
1397 	int more = 0;
1398 
1399 	spin_lock_bh(&icmp6_dst_lock);
1400 	pprev = &icmp6_dst_gc_list;
1401 
1402 	while ((dst = *pprev) != NULL) {
1403 		if (!atomic_read(&dst->__refcnt)) {
1404 			*pprev = dst->next;
1405 			dst_free(dst);
1406 		} else {
1407 			pprev = &dst->next;
1408 			++more;
1409 		}
1410 	}
1411 
1412 	spin_unlock_bh(&icmp6_dst_lock);
1413 
1414 	return more;
1415 }
1416 
1417 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1418 			    void *arg)
1419 {
1420 	struct dst_entry *dst, **pprev;
1421 
1422 	spin_lock_bh(&icmp6_dst_lock);
1423 	pprev = &icmp6_dst_gc_list;
1424 	while ((dst = *pprev) != NULL) {
1425 		struct rt6_info *rt = (struct rt6_info *) dst;
1426 		if (func(rt, arg)) {
1427 			*pprev = dst->next;
1428 			dst_free(dst);
1429 		} else {
1430 			pprev = &dst->next;
1431 		}
1432 	}
1433 	spin_unlock_bh(&icmp6_dst_lock);
1434 }
1435 
1436 static int ip6_dst_gc(struct dst_ops *ops)
1437 {
1438 	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1439 	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1440 	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1441 	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1442 	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1443 	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1444 	int entries;
1445 
1446 	entries = dst_entries_get_fast(ops);
1447 	if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1448 	    entries <= rt_max_size)
1449 		goto out;
1450 
1451 	net->ipv6.ip6_rt_gc_expire++;
1452 	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
1453 	entries = dst_entries_get_slow(ops);
1454 	if (entries < ops->gc_thresh)
1455 		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1456 out:
1457 	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1458 	return entries > rt_max_size;
1459 }
1460 
1461 /*
1462  *
1463  */
1464 
1465 int ip6_route_add(struct fib6_config *cfg)
1466 {
1467 	int err;
1468 	struct net *net = cfg->fc_nlinfo.nl_net;
1469 	struct rt6_info *rt = NULL;
1470 	struct net_device *dev = NULL;
1471 	struct inet6_dev *idev = NULL;
1472 	struct fib6_table *table;
1473 	int addr_type;
1474 
1475 	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1476 		return -EINVAL;
1477 #ifndef CONFIG_IPV6_SUBTREES
1478 	if (cfg->fc_src_len)
1479 		return -EINVAL;
1480 #endif
1481 	if (cfg->fc_ifindex) {
1482 		err = -ENODEV;
1483 		dev = dev_get_by_index(net, cfg->fc_ifindex);
1484 		if (!dev)
1485 			goto out;
1486 		idev = in6_dev_get(dev);
1487 		if (!idev)
1488 			goto out;
1489 	}
1490 
1491 	if (cfg->fc_metric == 0)
1492 		cfg->fc_metric = IP6_RT_PRIO_USER;
1493 
1494 	err = -ENOBUFS;
1495 	if (cfg->fc_nlinfo.nlh &&
1496 	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1497 		table = fib6_get_table(net, cfg->fc_table);
1498 		if (!table) {
1499 			pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1500 			table = fib6_new_table(net, cfg->fc_table);
1501 		}
1502 	} else {
1503 		table = fib6_new_table(net, cfg->fc_table);
1504 	}
1505 
1506 	if (!table)
1507 		goto out;
1508 
1509 	rt = ip6_dst_alloc(net, NULL, (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT, table);
1510 
1511 	if (!rt) {
1512 		err = -ENOMEM;
1513 		goto out;
1514 	}
1515 
1516 	if (cfg->fc_flags & RTF_EXPIRES)
1517 		rt6_set_expires(rt, jiffies +
1518 				clock_t_to_jiffies(cfg->fc_expires));
1519 	else
1520 		rt6_clean_expires(rt);
1521 
1522 	if (cfg->fc_protocol == RTPROT_UNSPEC)
1523 		cfg->fc_protocol = RTPROT_BOOT;
1524 	rt->rt6i_protocol = cfg->fc_protocol;
1525 
1526 	addr_type = ipv6_addr_type(&cfg->fc_dst);
1527 
1528 	if (addr_type & IPV6_ADDR_MULTICAST)
1529 		rt->dst.input = ip6_mc_input;
1530 	else if (cfg->fc_flags & RTF_LOCAL)
1531 		rt->dst.input = ip6_input;
1532 	else
1533 		rt->dst.input = ip6_forward;
1534 
1535 	rt->dst.output = ip6_output;
1536 
1537 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1538 	rt->rt6i_dst.plen = cfg->fc_dst_len;
1539 	if (rt->rt6i_dst.plen == 128) {
1540 		rt->dst.flags |= DST_HOST;
1541 		dst_metrics_set_force_overwrite(&rt->dst);
1542 	}
1543 
1544 #ifdef CONFIG_IPV6_SUBTREES
1545 	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1546 	rt->rt6i_src.plen = cfg->fc_src_len;
1547 #endif
1548 
1549 	rt->rt6i_metric = cfg->fc_metric;
1550 
1551 	/* We cannot add true routes via loopback here,
1552 	   they would result in kernel looping; promote them to reject routes
1553 	 */
1554 	if ((cfg->fc_flags & RTF_REJECT) ||
1555 	    (dev && (dev->flags & IFF_LOOPBACK) &&
1556 	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
1557 	     !(cfg->fc_flags & RTF_LOCAL))) {
1558 		/* hold loopback dev/idev if we haven't done so. */
1559 		if (dev != net->loopback_dev) {
1560 			if (dev) {
1561 				dev_put(dev);
1562 				in6_dev_put(idev);
1563 			}
1564 			dev = net->loopback_dev;
1565 			dev_hold(dev);
1566 			idev = in6_dev_get(dev);
1567 			if (!idev) {
1568 				err = -ENODEV;
1569 				goto out;
1570 			}
1571 		}
1572 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1573 		switch (cfg->fc_type) {
1574 		case RTN_BLACKHOLE:
1575 			rt->dst.error = -EINVAL;
1576 			rt->dst.output = dst_discard_sk;
1577 			rt->dst.input = dst_discard;
1578 			break;
1579 		case RTN_PROHIBIT:
1580 			rt->dst.error = -EACCES;
1581 			rt->dst.output = ip6_pkt_prohibit_out;
1582 			rt->dst.input = ip6_pkt_prohibit;
1583 			break;
1584 		case RTN_THROW:
1585 		default:
1586 			rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1587 					: -ENETUNREACH;
1588 			rt->dst.output = ip6_pkt_discard_out;
1589 			rt->dst.input = ip6_pkt_discard;
1590 			break;
1591 		}
1592 		goto install_route;
1593 	}
1594 
1595 	if (cfg->fc_flags & RTF_GATEWAY) {
1596 		const struct in6_addr *gw_addr;
1597 		int gwa_type;
1598 
1599 		gw_addr = &cfg->fc_gateway;
1600 		rt->rt6i_gateway = *gw_addr;
1601 		gwa_type = ipv6_addr_type(gw_addr);
1602 
1603 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1604 			struct rt6_info *grt;
1605 
1606 			/* IPv6 strictly inhibits using not link-local
1607 			   addresses as nexthop address.
1608 			   Otherwise, router will not able to send redirects.
1609 			   It is very good, but in some (rare!) circumstances
1610 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
1611 			   some exceptions. --ANK
1612 			 */
1613 			err = -EINVAL;
1614 			if (!(gwa_type & IPV6_ADDR_UNICAST))
1615 				goto out;
1616 
1617 			grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1618 
1619 			err = -EHOSTUNREACH;
1620 			if (!grt)
1621 				goto out;
1622 			if (dev) {
1623 				if (dev != grt->dst.dev) {
1624 					ip6_rt_put(grt);
1625 					goto out;
1626 				}
1627 			} else {
1628 				dev = grt->dst.dev;
1629 				idev = grt->rt6i_idev;
1630 				dev_hold(dev);
1631 				in6_dev_hold(grt->rt6i_idev);
1632 			}
1633 			if (!(grt->rt6i_flags & RTF_GATEWAY))
1634 				err = 0;
1635 			ip6_rt_put(grt);
1636 
1637 			if (err)
1638 				goto out;
1639 		}
1640 		err = -EINVAL;
1641 		if (!dev || (dev->flags & IFF_LOOPBACK))
1642 			goto out;
1643 	}
1644 
1645 	err = -ENODEV;
1646 	if (!dev)
1647 		goto out;
1648 
1649 	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1650 		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1651 			err = -EINVAL;
1652 			goto out;
1653 		}
1654 		rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1655 		rt->rt6i_prefsrc.plen = 128;
1656 	} else
1657 		rt->rt6i_prefsrc.plen = 0;
1658 
1659 	rt->rt6i_flags = cfg->fc_flags;
1660 
1661 install_route:
1662 	rt->dst.dev = dev;
1663 	rt->rt6i_idev = idev;
1664 	rt->rt6i_table = table;
1665 
1666 	cfg->fc_nlinfo.nl_net = dev_net(dev);
1667 
1668 	return __ip6_ins_rt(rt, &cfg->fc_nlinfo, cfg->fc_mx, cfg->fc_mx_len);
1669 
1670 out:
1671 	if (dev)
1672 		dev_put(dev);
1673 	if (idev)
1674 		in6_dev_put(idev);
1675 	if (rt)
1676 		dst_free(&rt->dst);
1677 	return err;
1678 }
1679 
1680 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1681 {
1682 	int err;
1683 	struct fib6_table *table;
1684 	struct net *net = dev_net(rt->dst.dev);
1685 
1686 	if (rt == net->ipv6.ip6_null_entry) {
1687 		err = -ENOENT;
1688 		goto out;
1689 	}
1690 
1691 	table = rt->rt6i_table;
1692 	write_lock_bh(&table->tb6_lock);
1693 	err = fib6_del(rt, info);
1694 	write_unlock_bh(&table->tb6_lock);
1695 
1696 out:
1697 	ip6_rt_put(rt);
1698 	return err;
1699 }
1700 
1701 int ip6_del_rt(struct rt6_info *rt)
1702 {
1703 	struct nl_info info = {
1704 		.nl_net = dev_net(rt->dst.dev),
1705 	};
1706 	return __ip6_del_rt(rt, &info);
1707 }
1708 
1709 static int ip6_route_del(struct fib6_config *cfg)
1710 {
1711 	struct fib6_table *table;
1712 	struct fib6_node *fn;
1713 	struct rt6_info *rt;
1714 	int err = -ESRCH;
1715 
1716 	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1717 	if (!table)
1718 		return err;
1719 
1720 	read_lock_bh(&table->tb6_lock);
1721 
1722 	fn = fib6_locate(&table->tb6_root,
1723 			 &cfg->fc_dst, cfg->fc_dst_len,
1724 			 &cfg->fc_src, cfg->fc_src_len);
1725 
1726 	if (fn) {
1727 		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1728 			if (cfg->fc_ifindex &&
1729 			    (!rt->dst.dev ||
1730 			     rt->dst.dev->ifindex != cfg->fc_ifindex))
1731 				continue;
1732 			if (cfg->fc_flags & RTF_GATEWAY &&
1733 			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1734 				continue;
1735 			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1736 				continue;
1737 			dst_hold(&rt->dst);
1738 			read_unlock_bh(&table->tb6_lock);
1739 
1740 			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1741 		}
1742 	}
1743 	read_unlock_bh(&table->tb6_lock);
1744 
1745 	return err;
1746 }
1747 
1748 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1749 {
1750 	struct net *net = dev_net(skb->dev);
1751 	struct netevent_redirect netevent;
1752 	struct rt6_info *rt, *nrt = NULL;
1753 	struct ndisc_options ndopts;
1754 	struct inet6_dev *in6_dev;
1755 	struct neighbour *neigh;
1756 	struct rd_msg *msg;
1757 	int optlen, on_link;
1758 	u8 *lladdr;
1759 
1760 	optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
1761 	optlen -= sizeof(*msg);
1762 
1763 	if (optlen < 0) {
1764 		net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1765 		return;
1766 	}
1767 
1768 	msg = (struct rd_msg *)icmp6_hdr(skb);
1769 
1770 	if (ipv6_addr_is_multicast(&msg->dest)) {
1771 		net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1772 		return;
1773 	}
1774 
1775 	on_link = 0;
1776 	if (ipv6_addr_equal(&msg->dest, &msg->target)) {
1777 		on_link = 1;
1778 	} else if (ipv6_addr_type(&msg->target) !=
1779 		   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1780 		net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1781 		return;
1782 	}
1783 
1784 	in6_dev = __in6_dev_get(skb->dev);
1785 	if (!in6_dev)
1786 		return;
1787 	if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1788 		return;
1789 
1790 	/* RFC2461 8.1:
1791 	 *	The IP source address of the Redirect MUST be the same as the current
1792 	 *	first-hop router for the specified ICMP Destination Address.
1793 	 */
1794 
1795 	if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
1796 		net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1797 		return;
1798 	}
1799 
1800 	lladdr = NULL;
1801 	if (ndopts.nd_opts_tgt_lladdr) {
1802 		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1803 					     skb->dev);
1804 		if (!lladdr) {
1805 			net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1806 			return;
1807 		}
1808 	}
1809 
1810 	rt = (struct rt6_info *) dst;
1811 	if (rt == net->ipv6.ip6_null_entry) {
1812 		net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1813 		return;
1814 	}
1815 
1816 	/* Redirect received -> path was valid.
1817 	 * Look, redirects are sent only in response to data packets,
1818 	 * so that this nexthop apparently is reachable. --ANK
1819 	 */
1820 	dst_confirm(&rt->dst);
1821 
1822 	neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
1823 	if (!neigh)
1824 		return;
1825 
1826 	/*
1827 	 *	We have finally decided to accept it.
1828 	 */
1829 
1830 	neigh_update(neigh, lladdr, NUD_STALE,
1831 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
1832 		     NEIGH_UPDATE_F_OVERRIDE|
1833 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1834 				     NEIGH_UPDATE_F_ISROUTER))
1835 		     );
1836 
1837 	nrt = ip6_rt_copy(rt, &msg->dest);
1838 	if (!nrt)
1839 		goto out;
1840 
1841 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1842 	if (on_link)
1843 		nrt->rt6i_flags &= ~RTF_GATEWAY;
1844 
1845 	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1846 
1847 	if (ip6_ins_rt(nrt))
1848 		goto out;
1849 
1850 	netevent.old = &rt->dst;
1851 	netevent.new = &nrt->dst;
1852 	netevent.daddr = &msg->dest;
1853 	netevent.neigh = neigh;
1854 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1855 
1856 	if (rt->rt6i_flags & RTF_CACHE) {
1857 		rt = (struct rt6_info *) dst_clone(&rt->dst);
1858 		ip6_del_rt(rt);
1859 	}
1860 
1861 out:
1862 	neigh_release(neigh);
1863 }
1864 
1865 /*
1866  *	Misc support functions
1867  */
1868 
1869 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1870 				    const struct in6_addr *dest)
1871 {
1872 	struct net *net = dev_net(ort->dst.dev);
1873 	struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1874 					    ort->rt6i_table);
1875 
1876 	if (rt) {
1877 		rt->dst.input = ort->dst.input;
1878 		rt->dst.output = ort->dst.output;
1879 		rt->dst.flags |= DST_HOST;
1880 
1881 		rt->rt6i_dst.addr = *dest;
1882 		rt->rt6i_dst.plen = 128;
1883 		dst_copy_metrics(&rt->dst, &ort->dst);
1884 		rt->dst.error = ort->dst.error;
1885 		rt->rt6i_idev = ort->rt6i_idev;
1886 		if (rt->rt6i_idev)
1887 			in6_dev_hold(rt->rt6i_idev);
1888 		rt->dst.lastuse = jiffies;
1889 
1890 		if (ort->rt6i_flags & RTF_GATEWAY)
1891 			rt->rt6i_gateway = ort->rt6i_gateway;
1892 		else
1893 			rt->rt6i_gateway = *dest;
1894 		rt->rt6i_flags = ort->rt6i_flags;
1895 		rt6_set_from(rt, ort);
1896 		rt->rt6i_metric = 0;
1897 
1898 #ifdef CONFIG_IPV6_SUBTREES
1899 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1900 #endif
1901 		memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1902 		rt->rt6i_table = ort->rt6i_table;
1903 	}
1904 	return rt;
1905 }
1906 
1907 #ifdef CONFIG_IPV6_ROUTE_INFO
1908 static struct rt6_info *rt6_get_route_info(struct net *net,
1909 					   const struct in6_addr *prefix, int prefixlen,
1910 					   const struct in6_addr *gwaddr, int ifindex)
1911 {
1912 	struct fib6_node *fn;
1913 	struct rt6_info *rt = NULL;
1914 	struct fib6_table *table;
1915 
1916 	table = fib6_get_table(net, RT6_TABLE_INFO);
1917 	if (!table)
1918 		return NULL;
1919 
1920 	read_lock_bh(&table->tb6_lock);
1921 	fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
1922 	if (!fn)
1923 		goto out;
1924 
1925 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1926 		if (rt->dst.dev->ifindex != ifindex)
1927 			continue;
1928 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1929 			continue;
1930 		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1931 			continue;
1932 		dst_hold(&rt->dst);
1933 		break;
1934 	}
1935 out:
1936 	read_unlock_bh(&table->tb6_lock);
1937 	return rt;
1938 }
1939 
1940 static struct rt6_info *rt6_add_route_info(struct net *net,
1941 					   const struct in6_addr *prefix, int prefixlen,
1942 					   const struct in6_addr *gwaddr, int ifindex,
1943 					   unsigned int pref)
1944 {
1945 	struct fib6_config cfg = {
1946 		.fc_table	= RT6_TABLE_INFO,
1947 		.fc_metric	= IP6_RT_PRIO_USER,
1948 		.fc_ifindex	= ifindex,
1949 		.fc_dst_len	= prefixlen,
1950 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1951 				  RTF_UP | RTF_PREF(pref),
1952 		.fc_nlinfo.portid = 0,
1953 		.fc_nlinfo.nlh = NULL,
1954 		.fc_nlinfo.nl_net = net,
1955 	};
1956 
1957 	cfg.fc_dst = *prefix;
1958 	cfg.fc_gateway = *gwaddr;
1959 
1960 	/* We should treat it as a default route if prefix length is 0. */
1961 	if (!prefixlen)
1962 		cfg.fc_flags |= RTF_DEFAULT;
1963 
1964 	ip6_route_add(&cfg);
1965 
1966 	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1967 }
1968 #endif
1969 
1970 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1971 {
1972 	struct rt6_info *rt;
1973 	struct fib6_table *table;
1974 
1975 	table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1976 	if (!table)
1977 		return NULL;
1978 
1979 	read_lock_bh(&table->tb6_lock);
1980 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1981 		if (dev == rt->dst.dev &&
1982 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1983 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
1984 			break;
1985 	}
1986 	if (rt)
1987 		dst_hold(&rt->dst);
1988 	read_unlock_bh(&table->tb6_lock);
1989 	return rt;
1990 }
1991 
1992 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1993 				     struct net_device *dev,
1994 				     unsigned int pref)
1995 {
1996 	struct fib6_config cfg = {
1997 		.fc_table	= RT6_TABLE_DFLT,
1998 		.fc_metric	= IP6_RT_PRIO_USER,
1999 		.fc_ifindex	= dev->ifindex,
2000 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2001 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
2002 		.fc_nlinfo.portid = 0,
2003 		.fc_nlinfo.nlh = NULL,
2004 		.fc_nlinfo.nl_net = dev_net(dev),
2005 	};
2006 
2007 	cfg.fc_gateway = *gwaddr;
2008 
2009 	ip6_route_add(&cfg);
2010 
2011 	return rt6_get_dflt_router(gwaddr, dev);
2012 }
2013 
2014 void rt6_purge_dflt_routers(struct net *net)
2015 {
2016 	struct rt6_info *rt;
2017 	struct fib6_table *table;
2018 
2019 	/* NOTE: Keep consistent with rt6_get_dflt_router */
2020 	table = fib6_get_table(net, RT6_TABLE_DFLT);
2021 	if (!table)
2022 		return;
2023 
2024 restart:
2025 	read_lock_bh(&table->tb6_lock);
2026 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2027 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2028 		    (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
2029 			dst_hold(&rt->dst);
2030 			read_unlock_bh(&table->tb6_lock);
2031 			ip6_del_rt(rt);
2032 			goto restart;
2033 		}
2034 	}
2035 	read_unlock_bh(&table->tb6_lock);
2036 }
2037 
2038 static void rtmsg_to_fib6_config(struct net *net,
2039 				 struct in6_rtmsg *rtmsg,
2040 				 struct fib6_config *cfg)
2041 {
2042 	memset(cfg, 0, sizeof(*cfg));
2043 
2044 	cfg->fc_table = RT6_TABLE_MAIN;
2045 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2046 	cfg->fc_metric = rtmsg->rtmsg_metric;
2047 	cfg->fc_expires = rtmsg->rtmsg_info;
2048 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2049 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
2050 	cfg->fc_flags = rtmsg->rtmsg_flags;
2051 
2052 	cfg->fc_nlinfo.nl_net = net;
2053 
2054 	cfg->fc_dst = rtmsg->rtmsg_dst;
2055 	cfg->fc_src = rtmsg->rtmsg_src;
2056 	cfg->fc_gateway = rtmsg->rtmsg_gateway;
2057 }
2058 
2059 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2060 {
2061 	struct fib6_config cfg;
2062 	struct in6_rtmsg rtmsg;
2063 	int err;
2064 
2065 	switch (cmd) {
2066 	case SIOCADDRT:		/* Add a route */
2067 	case SIOCDELRT:		/* Delete a route */
2068 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2069 			return -EPERM;
2070 		err = copy_from_user(&rtmsg, arg,
2071 				     sizeof(struct in6_rtmsg));
2072 		if (err)
2073 			return -EFAULT;
2074 
2075 		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2076 
2077 		rtnl_lock();
2078 		switch (cmd) {
2079 		case SIOCADDRT:
2080 			err = ip6_route_add(&cfg);
2081 			break;
2082 		case SIOCDELRT:
2083 			err = ip6_route_del(&cfg);
2084 			break;
2085 		default:
2086 			err = -EINVAL;
2087 		}
2088 		rtnl_unlock();
2089 
2090 		return err;
2091 	}
2092 
2093 	return -EINVAL;
2094 }
2095 
2096 /*
2097  *	Drop the packet on the floor
2098  */
2099 
2100 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2101 {
2102 	int type;
2103 	struct dst_entry *dst = skb_dst(skb);
2104 	switch (ipstats_mib_noroutes) {
2105 	case IPSTATS_MIB_INNOROUTES:
2106 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2107 		if (type == IPV6_ADDR_ANY) {
2108 			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2109 				      IPSTATS_MIB_INADDRERRORS);
2110 			break;
2111 		}
2112 		/* FALLTHROUGH */
2113 	case IPSTATS_MIB_OUTNOROUTES:
2114 		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2115 			      ipstats_mib_noroutes);
2116 		break;
2117 	}
2118 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2119 	kfree_skb(skb);
2120 	return 0;
2121 }
2122 
2123 static int ip6_pkt_discard(struct sk_buff *skb)
2124 {
2125 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2126 }
2127 
2128 static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb)
2129 {
2130 	skb->dev = skb_dst(skb)->dev;
2131 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2132 }
2133 
2134 static int ip6_pkt_prohibit(struct sk_buff *skb)
2135 {
2136 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2137 }
2138 
2139 static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb)
2140 {
2141 	skb->dev = skb_dst(skb)->dev;
2142 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2143 }
2144 
2145 /*
2146  *	Allocate a dst for local (unicast / anycast) address.
2147  */
2148 
2149 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2150 				    const struct in6_addr *addr,
2151 				    bool anycast)
2152 {
2153 	struct net *net = dev_net(idev->dev);
2154 	struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
2155 					    DST_NOCOUNT, NULL);
2156 	if (!rt)
2157 		return ERR_PTR(-ENOMEM);
2158 
2159 	in6_dev_hold(idev);
2160 
2161 	rt->dst.flags |= DST_HOST;
2162 	rt->dst.input = ip6_input;
2163 	rt->dst.output = ip6_output;
2164 	rt->rt6i_idev = idev;
2165 
2166 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2167 	if (anycast)
2168 		rt->rt6i_flags |= RTF_ANYCAST;
2169 	else
2170 		rt->rt6i_flags |= RTF_LOCAL;
2171 
2172 	rt->rt6i_gateway  = *addr;
2173 	rt->rt6i_dst.addr = *addr;
2174 	rt->rt6i_dst.plen = 128;
2175 	rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2176 
2177 	atomic_set(&rt->dst.__refcnt, 1);
2178 
2179 	return rt;
2180 }
2181 
2182 int ip6_route_get_saddr(struct net *net,
2183 			struct rt6_info *rt,
2184 			const struct in6_addr *daddr,
2185 			unsigned int prefs,
2186 			struct in6_addr *saddr)
2187 {
2188 	struct inet6_dev *idev = ip6_dst_idev((struct dst_entry *)rt);
2189 	int err = 0;
2190 	if (rt->rt6i_prefsrc.plen)
2191 		*saddr = rt->rt6i_prefsrc.addr;
2192 	else
2193 		err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2194 					 daddr, prefs, saddr);
2195 	return err;
2196 }
2197 
2198 /* remove deleted ip from prefsrc entries */
2199 struct arg_dev_net_ip {
2200 	struct net_device *dev;
2201 	struct net *net;
2202 	struct in6_addr *addr;
2203 };
2204 
2205 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2206 {
2207 	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2208 	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2209 	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2210 
2211 	if (((void *)rt->dst.dev == dev || !dev) &&
2212 	    rt != net->ipv6.ip6_null_entry &&
2213 	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2214 		/* remove prefsrc entry */
2215 		rt->rt6i_prefsrc.plen = 0;
2216 	}
2217 	return 0;
2218 }
2219 
2220 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2221 {
2222 	struct net *net = dev_net(ifp->idev->dev);
2223 	struct arg_dev_net_ip adni = {
2224 		.dev = ifp->idev->dev,
2225 		.net = net,
2226 		.addr = &ifp->addr,
2227 	};
2228 	fib6_clean_all(net, fib6_remove_prefsrc, &adni);
2229 }
2230 
2231 #define RTF_RA_ROUTER		(RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2232 #define RTF_CACHE_GATEWAY	(RTF_GATEWAY | RTF_CACHE)
2233 
2234 /* Remove routers and update dst entries when gateway turn into host. */
2235 static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2236 {
2237 	struct in6_addr *gateway = (struct in6_addr *)arg;
2238 
2239 	if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2240 	     ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2241 	     ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2242 		return -1;
2243 	}
2244 	return 0;
2245 }
2246 
2247 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2248 {
2249 	fib6_clean_all(net, fib6_clean_tohost, gateway);
2250 }
2251 
2252 struct arg_dev_net {
2253 	struct net_device *dev;
2254 	struct net *net;
2255 };
2256 
2257 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2258 {
2259 	const struct arg_dev_net *adn = arg;
2260 	const struct net_device *dev = adn->dev;
2261 
2262 	if ((rt->dst.dev == dev || !dev) &&
2263 	    rt != adn->net->ipv6.ip6_null_entry)
2264 		return -1;
2265 
2266 	return 0;
2267 }
2268 
2269 void rt6_ifdown(struct net *net, struct net_device *dev)
2270 {
2271 	struct arg_dev_net adn = {
2272 		.dev = dev,
2273 		.net = net,
2274 	};
2275 
2276 	fib6_clean_all(net, fib6_ifdown, &adn);
2277 	icmp6_clean_all(fib6_ifdown, &adn);
2278 }
2279 
2280 struct rt6_mtu_change_arg {
2281 	struct net_device *dev;
2282 	unsigned int mtu;
2283 };
2284 
2285 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2286 {
2287 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2288 	struct inet6_dev *idev;
2289 
2290 	/* In IPv6 pmtu discovery is not optional,
2291 	   so that RTAX_MTU lock cannot disable it.
2292 	   We still use this lock to block changes
2293 	   caused by addrconf/ndisc.
2294 	*/
2295 
2296 	idev = __in6_dev_get(arg->dev);
2297 	if (!idev)
2298 		return 0;
2299 
2300 	/* For administrative MTU increase, there is no way to discover
2301 	   IPv6 PMTU increase, so PMTU increase should be updated here.
2302 	   Since RFC 1981 doesn't include administrative MTU increase
2303 	   update PMTU increase is a MUST. (i.e. jumbo frame)
2304 	 */
2305 	/*
2306 	   If new MTU is less than route PMTU, this new MTU will be the
2307 	   lowest MTU in the path, update the route PMTU to reflect PMTU
2308 	   decreases; if new MTU is greater than route PMTU, and the
2309 	   old MTU is the lowest MTU in the path, update the route PMTU
2310 	   to reflect the increase. In this case if the other nodes' MTU
2311 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
2312 	   PMTU discouvery.
2313 	 */
2314 	if (rt->dst.dev == arg->dev &&
2315 	    !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2316 	    (dst_mtu(&rt->dst) >= arg->mtu ||
2317 	     (dst_mtu(&rt->dst) < arg->mtu &&
2318 	      dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2319 		dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2320 	}
2321 	return 0;
2322 }
2323 
2324 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2325 {
2326 	struct rt6_mtu_change_arg arg = {
2327 		.dev = dev,
2328 		.mtu = mtu,
2329 	};
2330 
2331 	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
2332 }
2333 
2334 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2335 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2336 	[RTA_OIF]               = { .type = NLA_U32 },
2337 	[RTA_IIF]		= { .type = NLA_U32 },
2338 	[RTA_PRIORITY]          = { .type = NLA_U32 },
2339 	[RTA_METRICS]           = { .type = NLA_NESTED },
2340 	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
2341 };
2342 
2343 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2344 			      struct fib6_config *cfg)
2345 {
2346 	struct rtmsg *rtm;
2347 	struct nlattr *tb[RTA_MAX+1];
2348 	int err;
2349 
2350 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2351 	if (err < 0)
2352 		goto errout;
2353 
2354 	err = -EINVAL;
2355 	rtm = nlmsg_data(nlh);
2356 	memset(cfg, 0, sizeof(*cfg));
2357 
2358 	cfg->fc_table = rtm->rtm_table;
2359 	cfg->fc_dst_len = rtm->rtm_dst_len;
2360 	cfg->fc_src_len = rtm->rtm_src_len;
2361 	cfg->fc_flags = RTF_UP;
2362 	cfg->fc_protocol = rtm->rtm_protocol;
2363 	cfg->fc_type = rtm->rtm_type;
2364 
2365 	if (rtm->rtm_type == RTN_UNREACHABLE ||
2366 	    rtm->rtm_type == RTN_BLACKHOLE ||
2367 	    rtm->rtm_type == RTN_PROHIBIT ||
2368 	    rtm->rtm_type == RTN_THROW)
2369 		cfg->fc_flags |= RTF_REJECT;
2370 
2371 	if (rtm->rtm_type == RTN_LOCAL)
2372 		cfg->fc_flags |= RTF_LOCAL;
2373 
2374 	cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2375 	cfg->fc_nlinfo.nlh = nlh;
2376 	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2377 
2378 	if (tb[RTA_GATEWAY]) {
2379 		nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2380 		cfg->fc_flags |= RTF_GATEWAY;
2381 	}
2382 
2383 	if (tb[RTA_DST]) {
2384 		int plen = (rtm->rtm_dst_len + 7) >> 3;
2385 
2386 		if (nla_len(tb[RTA_DST]) < plen)
2387 			goto errout;
2388 
2389 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2390 	}
2391 
2392 	if (tb[RTA_SRC]) {
2393 		int plen = (rtm->rtm_src_len + 7) >> 3;
2394 
2395 		if (nla_len(tb[RTA_SRC]) < plen)
2396 			goto errout;
2397 
2398 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2399 	}
2400 
2401 	if (tb[RTA_PREFSRC])
2402 		nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2403 
2404 	if (tb[RTA_OIF])
2405 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2406 
2407 	if (tb[RTA_PRIORITY])
2408 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2409 
2410 	if (tb[RTA_METRICS]) {
2411 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2412 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2413 	}
2414 
2415 	if (tb[RTA_TABLE])
2416 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2417 
2418 	if (tb[RTA_MULTIPATH]) {
2419 		cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2420 		cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2421 	}
2422 
2423 	err = 0;
2424 errout:
2425 	return err;
2426 }
2427 
2428 static int ip6_route_multipath(struct fib6_config *cfg, int add)
2429 {
2430 	struct fib6_config r_cfg;
2431 	struct rtnexthop *rtnh;
2432 	int remaining;
2433 	int attrlen;
2434 	int err = 0, last_err = 0;
2435 
2436 beginning:
2437 	rtnh = (struct rtnexthop *)cfg->fc_mp;
2438 	remaining = cfg->fc_mp_len;
2439 
2440 	/* Parse a Multipath Entry */
2441 	while (rtnh_ok(rtnh, remaining)) {
2442 		memcpy(&r_cfg, cfg, sizeof(*cfg));
2443 		if (rtnh->rtnh_ifindex)
2444 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2445 
2446 		attrlen = rtnh_attrlen(rtnh);
2447 		if (attrlen > 0) {
2448 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2449 
2450 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2451 			if (nla) {
2452 				nla_memcpy(&r_cfg.fc_gateway, nla, 16);
2453 				r_cfg.fc_flags |= RTF_GATEWAY;
2454 			}
2455 		}
2456 		err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
2457 		if (err) {
2458 			last_err = err;
2459 			/* If we are trying to remove a route, do not stop the
2460 			 * loop when ip6_route_del() fails (because next hop is
2461 			 * already gone), we should try to remove all next hops.
2462 			 */
2463 			if (add) {
2464 				/* If add fails, we should try to delete all
2465 				 * next hops that have been already added.
2466 				 */
2467 				add = 0;
2468 				goto beginning;
2469 			}
2470 		}
2471 		/* Because each route is added like a single route we remove
2472 		 * this flag after the first nexthop (if there is a collision,
2473 		 * we have already fail to add the first nexthop:
2474 		 * fib6_add_rt2node() has reject it).
2475 		 */
2476 		cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL;
2477 		rtnh = rtnh_next(rtnh, &remaining);
2478 	}
2479 
2480 	return last_err;
2481 }
2482 
2483 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
2484 {
2485 	struct fib6_config cfg;
2486 	int err;
2487 
2488 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2489 	if (err < 0)
2490 		return err;
2491 
2492 	if (cfg.fc_mp)
2493 		return ip6_route_multipath(&cfg, 0);
2494 	else
2495 		return ip6_route_del(&cfg);
2496 }
2497 
2498 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
2499 {
2500 	struct fib6_config cfg;
2501 	int err;
2502 
2503 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2504 	if (err < 0)
2505 		return err;
2506 
2507 	if (cfg.fc_mp)
2508 		return ip6_route_multipath(&cfg, 1);
2509 	else
2510 		return ip6_route_add(&cfg);
2511 }
2512 
2513 static inline size_t rt6_nlmsg_size(void)
2514 {
2515 	return NLMSG_ALIGN(sizeof(struct rtmsg))
2516 	       + nla_total_size(16) /* RTA_SRC */
2517 	       + nla_total_size(16) /* RTA_DST */
2518 	       + nla_total_size(16) /* RTA_GATEWAY */
2519 	       + nla_total_size(16) /* RTA_PREFSRC */
2520 	       + nla_total_size(4) /* RTA_TABLE */
2521 	       + nla_total_size(4) /* RTA_IIF */
2522 	       + nla_total_size(4) /* RTA_OIF */
2523 	       + nla_total_size(4) /* RTA_PRIORITY */
2524 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2525 	       + nla_total_size(sizeof(struct rta_cacheinfo));
2526 }
2527 
2528 static int rt6_fill_node(struct net *net,
2529 			 struct sk_buff *skb, struct rt6_info *rt,
2530 			 struct in6_addr *dst, struct in6_addr *src,
2531 			 int iif, int type, u32 portid, u32 seq,
2532 			 int prefix, int nowait, unsigned int flags)
2533 {
2534 	struct rtmsg *rtm;
2535 	struct nlmsghdr *nlh;
2536 	long expires;
2537 	u32 table;
2538 
2539 	if (prefix) {	/* user wants prefix routes only */
2540 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2541 			/* success since this is not a prefix route */
2542 			return 1;
2543 		}
2544 	}
2545 
2546 	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
2547 	if (!nlh)
2548 		return -EMSGSIZE;
2549 
2550 	rtm = nlmsg_data(nlh);
2551 	rtm->rtm_family = AF_INET6;
2552 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
2553 	rtm->rtm_src_len = rt->rt6i_src.plen;
2554 	rtm->rtm_tos = 0;
2555 	if (rt->rt6i_table)
2556 		table = rt->rt6i_table->tb6_id;
2557 	else
2558 		table = RT6_TABLE_UNSPEC;
2559 	rtm->rtm_table = table;
2560 	if (nla_put_u32(skb, RTA_TABLE, table))
2561 		goto nla_put_failure;
2562 	if (rt->rt6i_flags & RTF_REJECT) {
2563 		switch (rt->dst.error) {
2564 		case -EINVAL:
2565 			rtm->rtm_type = RTN_BLACKHOLE;
2566 			break;
2567 		case -EACCES:
2568 			rtm->rtm_type = RTN_PROHIBIT;
2569 			break;
2570 		case -EAGAIN:
2571 			rtm->rtm_type = RTN_THROW;
2572 			break;
2573 		default:
2574 			rtm->rtm_type = RTN_UNREACHABLE;
2575 			break;
2576 		}
2577 	}
2578 	else if (rt->rt6i_flags & RTF_LOCAL)
2579 		rtm->rtm_type = RTN_LOCAL;
2580 	else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2581 		rtm->rtm_type = RTN_LOCAL;
2582 	else
2583 		rtm->rtm_type = RTN_UNICAST;
2584 	rtm->rtm_flags = 0;
2585 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2586 	rtm->rtm_protocol = rt->rt6i_protocol;
2587 	if (rt->rt6i_flags & RTF_DYNAMIC)
2588 		rtm->rtm_protocol = RTPROT_REDIRECT;
2589 	else if (rt->rt6i_flags & RTF_ADDRCONF) {
2590 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2591 			rtm->rtm_protocol = RTPROT_RA;
2592 		else
2593 			rtm->rtm_protocol = RTPROT_KERNEL;
2594 	}
2595 
2596 	if (rt->rt6i_flags & RTF_CACHE)
2597 		rtm->rtm_flags |= RTM_F_CLONED;
2598 
2599 	if (dst) {
2600 		if (nla_put(skb, RTA_DST, 16, dst))
2601 			goto nla_put_failure;
2602 		rtm->rtm_dst_len = 128;
2603 	} else if (rtm->rtm_dst_len)
2604 		if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2605 			goto nla_put_failure;
2606 #ifdef CONFIG_IPV6_SUBTREES
2607 	if (src) {
2608 		if (nla_put(skb, RTA_SRC, 16, src))
2609 			goto nla_put_failure;
2610 		rtm->rtm_src_len = 128;
2611 	} else if (rtm->rtm_src_len &&
2612 		   nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2613 		goto nla_put_failure;
2614 #endif
2615 	if (iif) {
2616 #ifdef CONFIG_IPV6_MROUTE
2617 		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2618 			int err = ip6mr_get_route(net, skb, rtm, nowait);
2619 			if (err <= 0) {
2620 				if (!nowait) {
2621 					if (err == 0)
2622 						return 0;
2623 					goto nla_put_failure;
2624 				} else {
2625 					if (err == -EMSGSIZE)
2626 						goto nla_put_failure;
2627 				}
2628 			}
2629 		} else
2630 #endif
2631 			if (nla_put_u32(skb, RTA_IIF, iif))
2632 				goto nla_put_failure;
2633 	} else if (dst) {
2634 		struct in6_addr saddr_buf;
2635 		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2636 		    nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2637 			goto nla_put_failure;
2638 	}
2639 
2640 	if (rt->rt6i_prefsrc.plen) {
2641 		struct in6_addr saddr_buf;
2642 		saddr_buf = rt->rt6i_prefsrc.addr;
2643 		if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2644 			goto nla_put_failure;
2645 	}
2646 
2647 	if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2648 		goto nla_put_failure;
2649 
2650 	if (rt->rt6i_flags & RTF_GATEWAY) {
2651 		if (nla_put(skb, RTA_GATEWAY, 16, &rt->rt6i_gateway) < 0)
2652 			goto nla_put_failure;
2653 	}
2654 
2655 	if (rt->dst.dev &&
2656 	    nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2657 		goto nla_put_failure;
2658 	if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2659 		goto nla_put_failure;
2660 
2661 	expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2662 
2663 	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2664 		goto nla_put_failure;
2665 
2666 	return nlmsg_end(skb, nlh);
2667 
2668 nla_put_failure:
2669 	nlmsg_cancel(skb, nlh);
2670 	return -EMSGSIZE;
2671 }
2672 
2673 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2674 {
2675 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2676 	int prefix;
2677 
2678 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2679 		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2680 		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2681 	} else
2682 		prefix = 0;
2683 
2684 	return rt6_fill_node(arg->net,
2685 		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2686 		     NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
2687 		     prefix, 0, NLM_F_MULTI);
2688 }
2689 
2690 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
2691 {
2692 	struct net *net = sock_net(in_skb->sk);
2693 	struct nlattr *tb[RTA_MAX+1];
2694 	struct rt6_info *rt;
2695 	struct sk_buff *skb;
2696 	struct rtmsg *rtm;
2697 	struct flowi6 fl6;
2698 	int err, iif = 0, oif = 0;
2699 
2700 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2701 	if (err < 0)
2702 		goto errout;
2703 
2704 	err = -EINVAL;
2705 	memset(&fl6, 0, sizeof(fl6));
2706 
2707 	if (tb[RTA_SRC]) {
2708 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2709 			goto errout;
2710 
2711 		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2712 	}
2713 
2714 	if (tb[RTA_DST]) {
2715 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2716 			goto errout;
2717 
2718 		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2719 	}
2720 
2721 	if (tb[RTA_IIF])
2722 		iif = nla_get_u32(tb[RTA_IIF]);
2723 
2724 	if (tb[RTA_OIF])
2725 		oif = nla_get_u32(tb[RTA_OIF]);
2726 
2727 	if (tb[RTA_MARK])
2728 		fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
2729 
2730 	if (iif) {
2731 		struct net_device *dev;
2732 		int flags = 0;
2733 
2734 		dev = __dev_get_by_index(net, iif);
2735 		if (!dev) {
2736 			err = -ENODEV;
2737 			goto errout;
2738 		}
2739 
2740 		fl6.flowi6_iif = iif;
2741 
2742 		if (!ipv6_addr_any(&fl6.saddr))
2743 			flags |= RT6_LOOKUP_F_HAS_SADDR;
2744 
2745 		rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2746 							       flags);
2747 	} else {
2748 		fl6.flowi6_oif = oif;
2749 
2750 		rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2751 	}
2752 
2753 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2754 	if (!skb) {
2755 		ip6_rt_put(rt);
2756 		err = -ENOBUFS;
2757 		goto errout;
2758 	}
2759 
2760 	/* Reserve room for dummy headers, this skb can pass
2761 	   through good chunk of routing engine.
2762 	 */
2763 	skb_reset_mac_header(skb);
2764 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2765 
2766 	skb_dst_set(skb, &rt->dst);
2767 
2768 	err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2769 			    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
2770 			    nlh->nlmsg_seq, 0, 0, 0);
2771 	if (err < 0) {
2772 		kfree_skb(skb);
2773 		goto errout;
2774 	}
2775 
2776 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2777 errout:
2778 	return err;
2779 }
2780 
2781 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2782 {
2783 	struct sk_buff *skb;
2784 	struct net *net = info->nl_net;
2785 	u32 seq;
2786 	int err;
2787 
2788 	err = -ENOBUFS;
2789 	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2790 
2791 	skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2792 	if (!skb)
2793 		goto errout;
2794 
2795 	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2796 				event, info->portid, seq, 0, 0, 0);
2797 	if (err < 0) {
2798 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2799 		WARN_ON(err == -EMSGSIZE);
2800 		kfree_skb(skb);
2801 		goto errout;
2802 	}
2803 	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2804 		    info->nlh, gfp_any());
2805 	return;
2806 errout:
2807 	if (err < 0)
2808 		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2809 }
2810 
2811 static int ip6_route_dev_notify(struct notifier_block *this,
2812 				unsigned long event, void *ptr)
2813 {
2814 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
2815 	struct net *net = dev_net(dev);
2816 
2817 	if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2818 		net->ipv6.ip6_null_entry->dst.dev = dev;
2819 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2820 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2821 		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2822 		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2823 		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2824 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2825 #endif
2826 	}
2827 
2828 	return NOTIFY_OK;
2829 }
2830 
2831 /*
2832  *	/proc
2833  */
2834 
2835 #ifdef CONFIG_PROC_FS
2836 
2837 static const struct file_operations ipv6_route_proc_fops = {
2838 	.owner		= THIS_MODULE,
2839 	.open		= ipv6_route_open,
2840 	.read		= seq_read,
2841 	.llseek		= seq_lseek,
2842 	.release	= seq_release_net,
2843 };
2844 
2845 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2846 {
2847 	struct net *net = (struct net *)seq->private;
2848 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2849 		   net->ipv6.rt6_stats->fib_nodes,
2850 		   net->ipv6.rt6_stats->fib_route_nodes,
2851 		   net->ipv6.rt6_stats->fib_rt_alloc,
2852 		   net->ipv6.rt6_stats->fib_rt_entries,
2853 		   net->ipv6.rt6_stats->fib_rt_cache,
2854 		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2855 		   net->ipv6.rt6_stats->fib_discarded_routes);
2856 
2857 	return 0;
2858 }
2859 
2860 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2861 {
2862 	return single_open_net(inode, file, rt6_stats_seq_show);
2863 }
2864 
2865 static const struct file_operations rt6_stats_seq_fops = {
2866 	.owner	 = THIS_MODULE,
2867 	.open	 = rt6_stats_seq_open,
2868 	.read	 = seq_read,
2869 	.llseek	 = seq_lseek,
2870 	.release = single_release_net,
2871 };
2872 #endif	/* CONFIG_PROC_FS */
2873 
2874 #ifdef CONFIG_SYSCTL
2875 
2876 static
2877 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
2878 			      void __user *buffer, size_t *lenp, loff_t *ppos)
2879 {
2880 	struct net *net;
2881 	int delay;
2882 	if (!write)
2883 		return -EINVAL;
2884 
2885 	net = (struct net *)ctl->extra1;
2886 	delay = net->ipv6.sysctl.flush_delay;
2887 	proc_dointvec(ctl, write, buffer, lenp, ppos);
2888 	fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
2889 	return 0;
2890 }
2891 
2892 struct ctl_table ipv6_route_table_template[] = {
2893 	{
2894 		.procname	=	"flush",
2895 		.data		=	&init_net.ipv6.sysctl.flush_delay,
2896 		.maxlen		=	sizeof(int),
2897 		.mode		=	0200,
2898 		.proc_handler	=	ipv6_sysctl_rtcache_flush
2899 	},
2900 	{
2901 		.procname	=	"gc_thresh",
2902 		.data		=	&ip6_dst_ops_template.gc_thresh,
2903 		.maxlen		=	sizeof(int),
2904 		.mode		=	0644,
2905 		.proc_handler	=	proc_dointvec,
2906 	},
2907 	{
2908 		.procname	=	"max_size",
2909 		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
2910 		.maxlen		=	sizeof(int),
2911 		.mode		=	0644,
2912 		.proc_handler	=	proc_dointvec,
2913 	},
2914 	{
2915 		.procname	=	"gc_min_interval",
2916 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2917 		.maxlen		=	sizeof(int),
2918 		.mode		=	0644,
2919 		.proc_handler	=	proc_dointvec_jiffies,
2920 	},
2921 	{
2922 		.procname	=	"gc_timeout",
2923 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2924 		.maxlen		=	sizeof(int),
2925 		.mode		=	0644,
2926 		.proc_handler	=	proc_dointvec_jiffies,
2927 	},
2928 	{
2929 		.procname	=	"gc_interval",
2930 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
2931 		.maxlen		=	sizeof(int),
2932 		.mode		=	0644,
2933 		.proc_handler	=	proc_dointvec_jiffies,
2934 	},
2935 	{
2936 		.procname	=	"gc_elasticity",
2937 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2938 		.maxlen		=	sizeof(int),
2939 		.mode		=	0644,
2940 		.proc_handler	=	proc_dointvec,
2941 	},
2942 	{
2943 		.procname	=	"mtu_expires",
2944 		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2945 		.maxlen		=	sizeof(int),
2946 		.mode		=	0644,
2947 		.proc_handler	=	proc_dointvec_jiffies,
2948 	},
2949 	{
2950 		.procname	=	"min_adv_mss",
2951 		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
2952 		.maxlen		=	sizeof(int),
2953 		.mode		=	0644,
2954 		.proc_handler	=	proc_dointvec,
2955 	},
2956 	{
2957 		.procname	=	"gc_min_interval_ms",
2958 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2959 		.maxlen		=	sizeof(int),
2960 		.mode		=	0644,
2961 		.proc_handler	=	proc_dointvec_ms_jiffies,
2962 	},
2963 	{ }
2964 };
2965 
2966 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2967 {
2968 	struct ctl_table *table;
2969 
2970 	table = kmemdup(ipv6_route_table_template,
2971 			sizeof(ipv6_route_table_template),
2972 			GFP_KERNEL);
2973 
2974 	if (table) {
2975 		table[0].data = &net->ipv6.sysctl.flush_delay;
2976 		table[0].extra1 = net;
2977 		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2978 		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2979 		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2980 		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2981 		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2982 		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2983 		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2984 		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2985 		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2986 
2987 		/* Don't export sysctls to unprivileged users */
2988 		if (net->user_ns != &init_user_ns)
2989 			table[0].procname = NULL;
2990 	}
2991 
2992 	return table;
2993 }
2994 #endif
2995 
2996 static int __net_init ip6_route_net_init(struct net *net)
2997 {
2998 	int ret = -ENOMEM;
2999 
3000 	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3001 	       sizeof(net->ipv6.ip6_dst_ops));
3002 
3003 	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3004 		goto out_ip6_dst_ops;
3005 
3006 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3007 					   sizeof(*net->ipv6.ip6_null_entry),
3008 					   GFP_KERNEL);
3009 	if (!net->ipv6.ip6_null_entry)
3010 		goto out_ip6_dst_entries;
3011 	net->ipv6.ip6_null_entry->dst.path =
3012 		(struct dst_entry *)net->ipv6.ip6_null_entry;
3013 	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3014 	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3015 			 ip6_template_metrics, true);
3016 
3017 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3018 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3019 					       sizeof(*net->ipv6.ip6_prohibit_entry),
3020 					       GFP_KERNEL);
3021 	if (!net->ipv6.ip6_prohibit_entry)
3022 		goto out_ip6_null_entry;
3023 	net->ipv6.ip6_prohibit_entry->dst.path =
3024 		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3025 	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3026 	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3027 			 ip6_template_metrics, true);
3028 
3029 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3030 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
3031 					       GFP_KERNEL);
3032 	if (!net->ipv6.ip6_blk_hole_entry)
3033 		goto out_ip6_prohibit_entry;
3034 	net->ipv6.ip6_blk_hole_entry->dst.path =
3035 		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3036 	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3037 	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3038 			 ip6_template_metrics, true);
3039 #endif
3040 
3041 	net->ipv6.sysctl.flush_delay = 0;
3042 	net->ipv6.sysctl.ip6_rt_max_size = 4096;
3043 	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3044 	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3045 	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3046 	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3047 	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3048 	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3049 
3050 	net->ipv6.ip6_rt_gc_expire = 30*HZ;
3051 
3052 	ret = 0;
3053 out:
3054 	return ret;
3055 
3056 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3057 out_ip6_prohibit_entry:
3058 	kfree(net->ipv6.ip6_prohibit_entry);
3059 out_ip6_null_entry:
3060 	kfree(net->ipv6.ip6_null_entry);
3061 #endif
3062 out_ip6_dst_entries:
3063 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3064 out_ip6_dst_ops:
3065 	goto out;
3066 }
3067 
3068 static void __net_exit ip6_route_net_exit(struct net *net)
3069 {
3070 	kfree(net->ipv6.ip6_null_entry);
3071 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3072 	kfree(net->ipv6.ip6_prohibit_entry);
3073 	kfree(net->ipv6.ip6_blk_hole_entry);
3074 #endif
3075 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3076 }
3077 
3078 static int __net_init ip6_route_net_init_late(struct net *net)
3079 {
3080 #ifdef CONFIG_PROC_FS
3081 	proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3082 	proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3083 #endif
3084 	return 0;
3085 }
3086 
3087 static void __net_exit ip6_route_net_exit_late(struct net *net)
3088 {
3089 #ifdef CONFIG_PROC_FS
3090 	remove_proc_entry("ipv6_route", net->proc_net);
3091 	remove_proc_entry("rt6_stats", net->proc_net);
3092 #endif
3093 }
3094 
3095 static struct pernet_operations ip6_route_net_ops = {
3096 	.init = ip6_route_net_init,
3097 	.exit = ip6_route_net_exit,
3098 };
3099 
3100 static int __net_init ipv6_inetpeer_init(struct net *net)
3101 {
3102 	struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3103 
3104 	if (!bp)
3105 		return -ENOMEM;
3106 	inet_peer_base_init(bp);
3107 	net->ipv6.peers = bp;
3108 	return 0;
3109 }
3110 
3111 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3112 {
3113 	struct inet_peer_base *bp = net->ipv6.peers;
3114 
3115 	net->ipv6.peers = NULL;
3116 	inetpeer_invalidate_tree(bp);
3117 	kfree(bp);
3118 }
3119 
3120 static struct pernet_operations ipv6_inetpeer_ops = {
3121 	.init	=	ipv6_inetpeer_init,
3122 	.exit	=	ipv6_inetpeer_exit,
3123 };
3124 
3125 static struct pernet_operations ip6_route_net_late_ops = {
3126 	.init = ip6_route_net_init_late,
3127 	.exit = ip6_route_net_exit_late,
3128 };
3129 
3130 static struct notifier_block ip6_route_dev_notifier = {
3131 	.notifier_call = ip6_route_dev_notify,
3132 	.priority = 0,
3133 };
3134 
3135 int __init ip6_route_init(void)
3136 {
3137 	int ret;
3138 
3139 	ret = -ENOMEM;
3140 	ip6_dst_ops_template.kmem_cachep =
3141 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3142 				  SLAB_HWCACHE_ALIGN, NULL);
3143 	if (!ip6_dst_ops_template.kmem_cachep)
3144 		goto out;
3145 
3146 	ret = dst_entries_init(&ip6_dst_blackhole_ops);
3147 	if (ret)
3148 		goto out_kmem_cache;
3149 
3150 	ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3151 	if (ret)
3152 		goto out_dst_entries;
3153 
3154 	ret = register_pernet_subsys(&ip6_route_net_ops);
3155 	if (ret)
3156 		goto out_register_inetpeer;
3157 
3158 	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3159 
3160 	/* Registering of the loopback is done before this portion of code,
3161 	 * the loopback reference in rt6_info will not be taken, do it
3162 	 * manually for init_net */
3163 	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3164 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3165   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3166 	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3167 	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3168 	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3169 	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3170   #endif
3171 	ret = fib6_init();
3172 	if (ret)
3173 		goto out_register_subsys;
3174 
3175 	ret = xfrm6_init();
3176 	if (ret)
3177 		goto out_fib6_init;
3178 
3179 	ret = fib6_rules_init();
3180 	if (ret)
3181 		goto xfrm6_init;
3182 
3183 	ret = register_pernet_subsys(&ip6_route_net_late_ops);
3184 	if (ret)
3185 		goto fib6_rules_init;
3186 
3187 	ret = -ENOBUFS;
3188 	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3189 	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3190 	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3191 		goto out_register_late_subsys;
3192 
3193 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3194 	if (ret)
3195 		goto out_register_late_subsys;
3196 
3197 out:
3198 	return ret;
3199 
3200 out_register_late_subsys:
3201 	unregister_pernet_subsys(&ip6_route_net_late_ops);
3202 fib6_rules_init:
3203 	fib6_rules_cleanup();
3204 xfrm6_init:
3205 	xfrm6_fini();
3206 out_fib6_init:
3207 	fib6_gc_cleanup();
3208 out_register_subsys:
3209 	unregister_pernet_subsys(&ip6_route_net_ops);
3210 out_register_inetpeer:
3211 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3212 out_dst_entries:
3213 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3214 out_kmem_cache:
3215 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3216 	goto out;
3217 }
3218 
3219 void ip6_route_cleanup(void)
3220 {
3221 	unregister_netdevice_notifier(&ip6_route_dev_notifier);
3222 	unregister_pernet_subsys(&ip6_route_net_late_ops);
3223 	fib6_rules_cleanup();
3224 	xfrm6_fini();
3225 	fib6_gc_cleanup();
3226 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3227 	unregister_pernet_subsys(&ip6_route_net_ops);
3228 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3229 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3230 }
3231