xref: /openbmc/linux/net/ipv6/route.c (revision eb3fcf00)
1 /*
2  *	Linux INET6 implementation
3  *	FIB front-end.
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13 
14 /*	Changes:
15  *
16  *	YOSHIFUJI Hideaki @USAGI
17  *		reworked default router selection.
18  *		- respect outgoing interface
19  *		- select from (probably) reachable routers (i.e.
20  *		routers in REACHABLE, STALE, DELAY or PROBE states).
21  *		- always select the same router if it is (probably)
22  *		reachable.  otherwise, round-robin the list.
23  *	Ville Nuorvala
24  *		Fixed routing subtrees.
25  */
26 
27 #define pr_fmt(fmt) "IPv6: " fmt
28 
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/dst_metadata.h>
58 #include <net/xfrm.h>
59 #include <net/netevent.h>
60 #include <net/netlink.h>
61 #include <net/nexthop.h>
62 #include <net/lwtunnel.h>
63 #include <net/ip_tunnels.h>
64 
65 #include <asm/uaccess.h>
66 
67 #ifdef CONFIG_SYSCTL
68 #include <linux/sysctl.h>
69 #endif
70 
71 enum rt6_nud_state {
72 	RT6_NUD_FAIL_HARD = -3,
73 	RT6_NUD_FAIL_PROBE = -2,
74 	RT6_NUD_FAIL_DO_RR = -1,
75 	RT6_NUD_SUCCEED = 1
76 };
77 
78 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
79 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
80 static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
81 static unsigned int	 ip6_mtu(const struct dst_entry *dst);
82 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
83 static void		ip6_dst_destroy(struct dst_entry *);
84 static void		ip6_dst_ifdown(struct dst_entry *,
85 				       struct net_device *dev, int how);
86 static int		 ip6_dst_gc(struct dst_ops *ops);
87 
88 static int		ip6_pkt_discard(struct sk_buff *skb);
89 static int		ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb);
90 static int		ip6_pkt_prohibit(struct sk_buff *skb);
91 static int		ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb);
92 static void		ip6_link_failure(struct sk_buff *skb);
93 static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
94 					   struct sk_buff *skb, u32 mtu);
95 static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
96 					struct sk_buff *skb);
97 static void		rt6_dst_from_metrics_check(struct rt6_info *rt);
98 static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
99 
100 #ifdef CONFIG_IPV6_ROUTE_INFO
101 static struct rt6_info *rt6_add_route_info(struct net *net,
102 					   const struct in6_addr *prefix, int prefixlen,
103 					   const struct in6_addr *gwaddr, int ifindex,
104 					   unsigned int pref);
105 static struct rt6_info *rt6_get_route_info(struct net *net,
106 					   const struct in6_addr *prefix, int prefixlen,
107 					   const struct in6_addr *gwaddr, int ifindex);
108 #endif
109 
110 struct uncached_list {
111 	spinlock_t		lock;
112 	struct list_head	head;
113 };
114 
115 static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
116 
117 static void rt6_uncached_list_add(struct rt6_info *rt)
118 {
119 	struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
120 
121 	rt->dst.flags |= DST_NOCACHE;
122 	rt->rt6i_uncached_list = ul;
123 
124 	spin_lock_bh(&ul->lock);
125 	list_add_tail(&rt->rt6i_uncached, &ul->head);
126 	spin_unlock_bh(&ul->lock);
127 }
128 
129 static void rt6_uncached_list_del(struct rt6_info *rt)
130 {
131 	if (!list_empty(&rt->rt6i_uncached)) {
132 		struct uncached_list *ul = rt->rt6i_uncached_list;
133 
134 		spin_lock_bh(&ul->lock);
135 		list_del(&rt->rt6i_uncached);
136 		spin_unlock_bh(&ul->lock);
137 	}
138 }
139 
140 static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
141 {
142 	struct net_device *loopback_dev = net->loopback_dev;
143 	int cpu;
144 
145 	for_each_possible_cpu(cpu) {
146 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
147 		struct rt6_info *rt;
148 
149 		spin_lock_bh(&ul->lock);
150 		list_for_each_entry(rt, &ul->head, rt6i_uncached) {
151 			struct inet6_dev *rt_idev = rt->rt6i_idev;
152 			struct net_device *rt_dev = rt->dst.dev;
153 
154 			if (rt_idev && (rt_idev->dev == dev || !dev) &&
155 			    rt_idev->dev != loopback_dev) {
156 				rt->rt6i_idev = in6_dev_get(loopback_dev);
157 				in6_dev_put(rt_idev);
158 			}
159 
160 			if (rt_dev && (rt_dev == dev || !dev) &&
161 			    rt_dev != loopback_dev) {
162 				rt->dst.dev = loopback_dev;
163 				dev_hold(rt->dst.dev);
164 				dev_put(rt_dev);
165 			}
166 		}
167 		spin_unlock_bh(&ul->lock);
168 	}
169 }
170 
171 static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
172 {
173 	return dst_metrics_write_ptr(rt->dst.from);
174 }
175 
176 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
177 {
178 	struct rt6_info *rt = (struct rt6_info *)dst;
179 
180 	if (rt->rt6i_flags & RTF_PCPU)
181 		return rt6_pcpu_cow_metrics(rt);
182 	else if (rt->rt6i_flags & RTF_CACHE)
183 		return NULL;
184 	else
185 		return dst_cow_metrics_generic(dst, old);
186 }
187 
188 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
189 					     struct sk_buff *skb,
190 					     const void *daddr)
191 {
192 	struct in6_addr *p = &rt->rt6i_gateway;
193 
194 	if (!ipv6_addr_any(p))
195 		return (const void *) p;
196 	else if (skb)
197 		return &ipv6_hdr(skb)->daddr;
198 	return daddr;
199 }
200 
201 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
202 					  struct sk_buff *skb,
203 					  const void *daddr)
204 {
205 	struct rt6_info *rt = (struct rt6_info *) dst;
206 	struct neighbour *n;
207 
208 	daddr = choose_neigh_daddr(rt, skb, daddr);
209 	n = __ipv6_neigh_lookup(dst->dev, daddr);
210 	if (n)
211 		return n;
212 	return neigh_create(&nd_tbl, daddr, dst->dev);
213 }
214 
215 static struct dst_ops ip6_dst_ops_template = {
216 	.family			=	AF_INET6,
217 	.gc			=	ip6_dst_gc,
218 	.gc_thresh		=	1024,
219 	.check			=	ip6_dst_check,
220 	.default_advmss		=	ip6_default_advmss,
221 	.mtu			=	ip6_mtu,
222 	.cow_metrics		=	ipv6_cow_metrics,
223 	.destroy		=	ip6_dst_destroy,
224 	.ifdown			=	ip6_dst_ifdown,
225 	.negative_advice	=	ip6_negative_advice,
226 	.link_failure		=	ip6_link_failure,
227 	.update_pmtu		=	ip6_rt_update_pmtu,
228 	.redirect		=	rt6_do_redirect,
229 	.local_out		=	__ip6_local_out,
230 	.neigh_lookup		=	ip6_neigh_lookup,
231 };
232 
233 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
234 {
235 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
236 
237 	return mtu ? : dst->dev->mtu;
238 }
239 
240 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
241 					 struct sk_buff *skb, u32 mtu)
242 {
243 }
244 
245 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
246 				      struct sk_buff *skb)
247 {
248 }
249 
250 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
251 					 unsigned long old)
252 {
253 	return NULL;
254 }
255 
256 static struct dst_ops ip6_dst_blackhole_ops = {
257 	.family			=	AF_INET6,
258 	.destroy		=	ip6_dst_destroy,
259 	.check			=	ip6_dst_check,
260 	.mtu			=	ip6_blackhole_mtu,
261 	.default_advmss		=	ip6_default_advmss,
262 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
263 	.redirect		=	ip6_rt_blackhole_redirect,
264 	.cow_metrics		=	ip6_rt_blackhole_cow_metrics,
265 	.neigh_lookup		=	ip6_neigh_lookup,
266 };
267 
268 static const u32 ip6_template_metrics[RTAX_MAX] = {
269 	[RTAX_HOPLIMIT - 1] = 0,
270 };
271 
272 static const struct rt6_info ip6_null_entry_template = {
273 	.dst = {
274 		.__refcnt	= ATOMIC_INIT(1),
275 		.__use		= 1,
276 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
277 		.error		= -ENETUNREACH,
278 		.input		= ip6_pkt_discard,
279 		.output		= ip6_pkt_discard_out,
280 	},
281 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
282 	.rt6i_protocol  = RTPROT_KERNEL,
283 	.rt6i_metric	= ~(u32) 0,
284 	.rt6i_ref	= ATOMIC_INIT(1),
285 };
286 
287 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
288 
289 static const struct rt6_info ip6_prohibit_entry_template = {
290 	.dst = {
291 		.__refcnt	= ATOMIC_INIT(1),
292 		.__use		= 1,
293 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
294 		.error		= -EACCES,
295 		.input		= ip6_pkt_prohibit,
296 		.output		= ip6_pkt_prohibit_out,
297 	},
298 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
299 	.rt6i_protocol  = RTPROT_KERNEL,
300 	.rt6i_metric	= ~(u32) 0,
301 	.rt6i_ref	= ATOMIC_INIT(1),
302 };
303 
304 static const struct rt6_info ip6_blk_hole_entry_template = {
305 	.dst = {
306 		.__refcnt	= ATOMIC_INIT(1),
307 		.__use		= 1,
308 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
309 		.error		= -EINVAL,
310 		.input		= dst_discard,
311 		.output		= dst_discard_sk,
312 	},
313 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
314 	.rt6i_protocol  = RTPROT_KERNEL,
315 	.rt6i_metric	= ~(u32) 0,
316 	.rt6i_ref	= ATOMIC_INIT(1),
317 };
318 
319 #endif
320 
321 /* allocate dst with ip6_dst_ops */
322 static struct rt6_info *__ip6_dst_alloc(struct net *net,
323 					struct net_device *dev,
324 					int flags)
325 {
326 	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
327 					0, DST_OBSOLETE_FORCE_CHK, flags);
328 
329 	if (rt) {
330 		struct dst_entry *dst = &rt->dst;
331 
332 		memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
333 		INIT_LIST_HEAD(&rt->rt6i_siblings);
334 		INIT_LIST_HEAD(&rt->rt6i_uncached);
335 	}
336 	return rt;
337 }
338 
339 static struct rt6_info *ip6_dst_alloc(struct net *net,
340 				      struct net_device *dev,
341 				      int flags)
342 {
343 	struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
344 
345 	if (rt) {
346 		rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
347 		if (rt->rt6i_pcpu) {
348 			int cpu;
349 
350 			for_each_possible_cpu(cpu) {
351 				struct rt6_info **p;
352 
353 				p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
354 				/* no one shares rt */
355 				*p =  NULL;
356 			}
357 		} else {
358 			dst_destroy((struct dst_entry *)rt);
359 			return NULL;
360 		}
361 	}
362 
363 	return rt;
364 }
365 
366 static void ip6_dst_destroy(struct dst_entry *dst)
367 {
368 	struct rt6_info *rt = (struct rt6_info *)dst;
369 	struct dst_entry *from = dst->from;
370 	struct inet6_dev *idev;
371 
372 	dst_destroy_metrics_generic(dst);
373 	free_percpu(rt->rt6i_pcpu);
374 	rt6_uncached_list_del(rt);
375 
376 	idev = rt->rt6i_idev;
377 	if (idev) {
378 		rt->rt6i_idev = NULL;
379 		in6_dev_put(idev);
380 	}
381 
382 	dst->from = NULL;
383 	dst_release(from);
384 }
385 
386 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
387 			   int how)
388 {
389 	struct rt6_info *rt = (struct rt6_info *)dst;
390 	struct inet6_dev *idev = rt->rt6i_idev;
391 	struct net_device *loopback_dev =
392 		dev_net(dev)->loopback_dev;
393 
394 	if (dev != loopback_dev) {
395 		if (idev && idev->dev == dev) {
396 			struct inet6_dev *loopback_idev =
397 				in6_dev_get(loopback_dev);
398 			if (loopback_idev) {
399 				rt->rt6i_idev = loopback_idev;
400 				in6_dev_put(idev);
401 			}
402 		}
403 	}
404 }
405 
406 static bool rt6_check_expired(const struct rt6_info *rt)
407 {
408 	if (rt->rt6i_flags & RTF_EXPIRES) {
409 		if (time_after(jiffies, rt->dst.expires))
410 			return true;
411 	} else if (rt->dst.from) {
412 		return rt6_check_expired((struct rt6_info *) rt->dst.from);
413 	}
414 	return false;
415 }
416 
417 /* Multipath route selection:
418  *   Hash based function using packet header and flowlabel.
419  * Adapted from fib_info_hashfn()
420  */
421 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
422 			       const struct flowi6 *fl6)
423 {
424 	unsigned int val = fl6->flowi6_proto;
425 
426 	val ^= ipv6_addr_hash(&fl6->daddr);
427 	val ^= ipv6_addr_hash(&fl6->saddr);
428 
429 	/* Work only if this not encapsulated */
430 	switch (fl6->flowi6_proto) {
431 	case IPPROTO_UDP:
432 	case IPPROTO_TCP:
433 	case IPPROTO_SCTP:
434 		val ^= (__force u16)fl6->fl6_sport;
435 		val ^= (__force u16)fl6->fl6_dport;
436 		break;
437 
438 	case IPPROTO_ICMPV6:
439 		val ^= (__force u16)fl6->fl6_icmp_type;
440 		val ^= (__force u16)fl6->fl6_icmp_code;
441 		break;
442 	}
443 	/* RFC6438 recommands to use flowlabel */
444 	val ^= (__force u32)fl6->flowlabel;
445 
446 	/* Perhaps, we need to tune, this function? */
447 	val = val ^ (val >> 7) ^ (val >> 12);
448 	return val % candidate_count;
449 }
450 
451 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
452 					     struct flowi6 *fl6, int oif,
453 					     int strict)
454 {
455 	struct rt6_info *sibling, *next_sibling;
456 	int route_choosen;
457 
458 	route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
459 	/* Don't change the route, if route_choosen == 0
460 	 * (siblings does not include ourself)
461 	 */
462 	if (route_choosen)
463 		list_for_each_entry_safe(sibling, next_sibling,
464 				&match->rt6i_siblings, rt6i_siblings) {
465 			route_choosen--;
466 			if (route_choosen == 0) {
467 				if (rt6_score_route(sibling, oif, strict) < 0)
468 					break;
469 				match = sibling;
470 				break;
471 			}
472 		}
473 	return match;
474 }
475 
476 /*
477  *	Route lookup. Any table->tb6_lock is implied.
478  */
479 
480 static inline struct rt6_info *rt6_device_match(struct net *net,
481 						    struct rt6_info *rt,
482 						    const struct in6_addr *saddr,
483 						    int oif,
484 						    int flags)
485 {
486 	struct rt6_info *local = NULL;
487 	struct rt6_info *sprt;
488 
489 	if (!oif && ipv6_addr_any(saddr))
490 		goto out;
491 
492 	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
493 		struct net_device *dev = sprt->dst.dev;
494 
495 		if (oif) {
496 			if (dev->ifindex == oif)
497 				return sprt;
498 			if (dev->flags & IFF_LOOPBACK) {
499 				if (!sprt->rt6i_idev ||
500 				    sprt->rt6i_idev->dev->ifindex != oif) {
501 					if (flags & RT6_LOOKUP_F_IFACE && oif)
502 						continue;
503 					if (local && (!oif ||
504 						      local->rt6i_idev->dev->ifindex == oif))
505 						continue;
506 				}
507 				local = sprt;
508 			}
509 		} else {
510 			if (ipv6_chk_addr(net, saddr, dev,
511 					  flags & RT6_LOOKUP_F_IFACE))
512 				return sprt;
513 		}
514 	}
515 
516 	if (oif) {
517 		if (local)
518 			return local;
519 
520 		if (flags & RT6_LOOKUP_F_IFACE)
521 			return net->ipv6.ip6_null_entry;
522 	}
523 out:
524 	return rt;
525 }
526 
527 #ifdef CONFIG_IPV6_ROUTER_PREF
528 struct __rt6_probe_work {
529 	struct work_struct work;
530 	struct in6_addr target;
531 	struct net_device *dev;
532 };
533 
534 static void rt6_probe_deferred(struct work_struct *w)
535 {
536 	struct in6_addr mcaddr;
537 	struct __rt6_probe_work *work =
538 		container_of(w, struct __rt6_probe_work, work);
539 
540 	addrconf_addr_solict_mult(&work->target, &mcaddr);
541 	ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL, NULL);
542 	dev_put(work->dev);
543 	kfree(work);
544 }
545 
546 static void rt6_probe(struct rt6_info *rt)
547 {
548 	struct __rt6_probe_work *work;
549 	struct neighbour *neigh;
550 	/*
551 	 * Okay, this does not seem to be appropriate
552 	 * for now, however, we need to check if it
553 	 * is really so; aka Router Reachability Probing.
554 	 *
555 	 * Router Reachability Probe MUST be rate-limited
556 	 * to no more than one per minute.
557 	 */
558 	if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
559 		return;
560 	rcu_read_lock_bh();
561 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
562 	if (neigh) {
563 		if (neigh->nud_state & NUD_VALID)
564 			goto out;
565 
566 		work = NULL;
567 		write_lock(&neigh->lock);
568 		if (!(neigh->nud_state & NUD_VALID) &&
569 		    time_after(jiffies,
570 			       neigh->updated +
571 			       rt->rt6i_idev->cnf.rtr_probe_interval)) {
572 			work = kmalloc(sizeof(*work), GFP_ATOMIC);
573 			if (work)
574 				__neigh_set_probe_once(neigh);
575 		}
576 		write_unlock(&neigh->lock);
577 	} else {
578 		work = kmalloc(sizeof(*work), GFP_ATOMIC);
579 	}
580 
581 	if (work) {
582 		INIT_WORK(&work->work, rt6_probe_deferred);
583 		work->target = rt->rt6i_gateway;
584 		dev_hold(rt->dst.dev);
585 		work->dev = rt->dst.dev;
586 		schedule_work(&work->work);
587 	}
588 
589 out:
590 	rcu_read_unlock_bh();
591 }
592 #else
593 static inline void rt6_probe(struct rt6_info *rt)
594 {
595 }
596 #endif
597 
598 /*
599  * Default Router Selection (RFC 2461 6.3.6)
600  */
601 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
602 {
603 	struct net_device *dev = rt->dst.dev;
604 	if (!oif || dev->ifindex == oif)
605 		return 2;
606 	if ((dev->flags & IFF_LOOPBACK) &&
607 	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
608 		return 1;
609 	return 0;
610 }
611 
612 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
613 {
614 	struct neighbour *neigh;
615 	enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
616 
617 	if (rt->rt6i_flags & RTF_NONEXTHOP ||
618 	    !(rt->rt6i_flags & RTF_GATEWAY))
619 		return RT6_NUD_SUCCEED;
620 
621 	rcu_read_lock_bh();
622 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
623 	if (neigh) {
624 		read_lock(&neigh->lock);
625 		if (neigh->nud_state & NUD_VALID)
626 			ret = RT6_NUD_SUCCEED;
627 #ifdef CONFIG_IPV6_ROUTER_PREF
628 		else if (!(neigh->nud_state & NUD_FAILED))
629 			ret = RT6_NUD_SUCCEED;
630 		else
631 			ret = RT6_NUD_FAIL_PROBE;
632 #endif
633 		read_unlock(&neigh->lock);
634 	} else {
635 		ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
636 		      RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
637 	}
638 	rcu_read_unlock_bh();
639 
640 	return ret;
641 }
642 
643 static int rt6_score_route(struct rt6_info *rt, int oif,
644 			   int strict)
645 {
646 	int m;
647 
648 	m = rt6_check_dev(rt, oif);
649 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
650 		return RT6_NUD_FAIL_HARD;
651 #ifdef CONFIG_IPV6_ROUTER_PREF
652 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
653 #endif
654 	if (strict & RT6_LOOKUP_F_REACHABLE) {
655 		int n = rt6_check_neigh(rt);
656 		if (n < 0)
657 			return n;
658 	}
659 	return m;
660 }
661 
662 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
663 				   int *mpri, struct rt6_info *match,
664 				   bool *do_rr)
665 {
666 	int m;
667 	bool match_do_rr = false;
668 	struct inet6_dev *idev = rt->rt6i_idev;
669 	struct net_device *dev = rt->dst.dev;
670 
671 	if (dev && !netif_carrier_ok(dev) &&
672 	    idev->cnf.ignore_routes_with_linkdown)
673 		goto out;
674 
675 	if (rt6_check_expired(rt))
676 		goto out;
677 
678 	m = rt6_score_route(rt, oif, strict);
679 	if (m == RT6_NUD_FAIL_DO_RR) {
680 		match_do_rr = true;
681 		m = 0; /* lowest valid score */
682 	} else if (m == RT6_NUD_FAIL_HARD) {
683 		goto out;
684 	}
685 
686 	if (strict & RT6_LOOKUP_F_REACHABLE)
687 		rt6_probe(rt);
688 
689 	/* note that m can be RT6_NUD_FAIL_PROBE at this point */
690 	if (m > *mpri) {
691 		*do_rr = match_do_rr;
692 		*mpri = m;
693 		match = rt;
694 	}
695 out:
696 	return match;
697 }
698 
699 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
700 				     struct rt6_info *rr_head,
701 				     u32 metric, int oif, int strict,
702 				     bool *do_rr)
703 {
704 	struct rt6_info *rt, *match, *cont;
705 	int mpri = -1;
706 
707 	match = NULL;
708 	cont = NULL;
709 	for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
710 		if (rt->rt6i_metric != metric) {
711 			cont = rt;
712 			break;
713 		}
714 
715 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
716 	}
717 
718 	for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
719 		if (rt->rt6i_metric != metric) {
720 			cont = rt;
721 			break;
722 		}
723 
724 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
725 	}
726 
727 	if (match || !cont)
728 		return match;
729 
730 	for (rt = cont; rt; rt = rt->dst.rt6_next)
731 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
732 
733 	return match;
734 }
735 
736 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
737 {
738 	struct rt6_info *match, *rt0;
739 	struct net *net;
740 	bool do_rr = false;
741 
742 	rt0 = fn->rr_ptr;
743 	if (!rt0)
744 		fn->rr_ptr = rt0 = fn->leaf;
745 
746 	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
747 			     &do_rr);
748 
749 	if (do_rr) {
750 		struct rt6_info *next = rt0->dst.rt6_next;
751 
752 		/* no entries matched; do round-robin */
753 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
754 			next = fn->leaf;
755 
756 		if (next != rt0)
757 			fn->rr_ptr = next;
758 	}
759 
760 	net = dev_net(rt0->dst.dev);
761 	return match ? match : net->ipv6.ip6_null_entry;
762 }
763 
764 static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
765 {
766 	return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
767 }
768 
769 #ifdef CONFIG_IPV6_ROUTE_INFO
770 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
771 		  const struct in6_addr *gwaddr)
772 {
773 	struct net *net = dev_net(dev);
774 	struct route_info *rinfo = (struct route_info *) opt;
775 	struct in6_addr prefix_buf, *prefix;
776 	unsigned int pref;
777 	unsigned long lifetime;
778 	struct rt6_info *rt;
779 
780 	if (len < sizeof(struct route_info)) {
781 		return -EINVAL;
782 	}
783 
784 	/* Sanity check for prefix_len and length */
785 	if (rinfo->length > 3) {
786 		return -EINVAL;
787 	} else if (rinfo->prefix_len > 128) {
788 		return -EINVAL;
789 	} else if (rinfo->prefix_len > 64) {
790 		if (rinfo->length < 2) {
791 			return -EINVAL;
792 		}
793 	} else if (rinfo->prefix_len > 0) {
794 		if (rinfo->length < 1) {
795 			return -EINVAL;
796 		}
797 	}
798 
799 	pref = rinfo->route_pref;
800 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
801 		return -EINVAL;
802 
803 	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
804 
805 	if (rinfo->length == 3)
806 		prefix = (struct in6_addr *)rinfo->prefix;
807 	else {
808 		/* this function is safe */
809 		ipv6_addr_prefix(&prefix_buf,
810 				 (struct in6_addr *)rinfo->prefix,
811 				 rinfo->prefix_len);
812 		prefix = &prefix_buf;
813 	}
814 
815 	if (rinfo->prefix_len == 0)
816 		rt = rt6_get_dflt_router(gwaddr, dev);
817 	else
818 		rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
819 					gwaddr, dev->ifindex);
820 
821 	if (rt && !lifetime) {
822 		ip6_del_rt(rt);
823 		rt = NULL;
824 	}
825 
826 	if (!rt && lifetime)
827 		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
828 					pref);
829 	else if (rt)
830 		rt->rt6i_flags = RTF_ROUTEINFO |
831 				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
832 
833 	if (rt) {
834 		if (!addrconf_finite_timeout(lifetime))
835 			rt6_clean_expires(rt);
836 		else
837 			rt6_set_expires(rt, jiffies + HZ * lifetime);
838 
839 		ip6_rt_put(rt);
840 	}
841 	return 0;
842 }
843 #endif
844 
845 static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
846 					struct in6_addr *saddr)
847 {
848 	struct fib6_node *pn;
849 	while (1) {
850 		if (fn->fn_flags & RTN_TL_ROOT)
851 			return NULL;
852 		pn = fn->parent;
853 		if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
854 			fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
855 		else
856 			fn = pn;
857 		if (fn->fn_flags & RTN_RTINFO)
858 			return fn;
859 	}
860 }
861 
862 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
863 					     struct fib6_table *table,
864 					     struct flowi6 *fl6, int flags)
865 {
866 	struct fib6_node *fn;
867 	struct rt6_info *rt;
868 
869 	read_lock_bh(&table->tb6_lock);
870 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
871 restart:
872 	rt = fn->leaf;
873 	rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
874 	if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
875 		rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
876 	if (rt == net->ipv6.ip6_null_entry) {
877 		fn = fib6_backtrack(fn, &fl6->saddr);
878 		if (fn)
879 			goto restart;
880 	}
881 	dst_use(&rt->dst, jiffies);
882 	read_unlock_bh(&table->tb6_lock);
883 	return rt;
884 
885 }
886 
887 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
888 				    int flags)
889 {
890 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
891 }
892 EXPORT_SYMBOL_GPL(ip6_route_lookup);
893 
894 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
895 			    const struct in6_addr *saddr, int oif, int strict)
896 {
897 	struct flowi6 fl6 = {
898 		.flowi6_oif = oif,
899 		.daddr = *daddr,
900 	};
901 	struct dst_entry *dst;
902 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
903 
904 	if (saddr) {
905 		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
906 		flags |= RT6_LOOKUP_F_HAS_SADDR;
907 	}
908 
909 	dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
910 	if (dst->error == 0)
911 		return (struct rt6_info *) dst;
912 
913 	dst_release(dst);
914 
915 	return NULL;
916 }
917 EXPORT_SYMBOL(rt6_lookup);
918 
919 /* ip6_ins_rt is called with FREE table->tb6_lock.
920    It takes new route entry, the addition fails by any reason the
921    route is freed. In any case, if caller does not hold it, it may
922    be destroyed.
923  */
924 
925 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
926 			struct mx6_config *mxc)
927 {
928 	int err;
929 	struct fib6_table *table;
930 
931 	table = rt->rt6i_table;
932 	write_lock_bh(&table->tb6_lock);
933 	err = fib6_add(&table->tb6_root, rt, info, mxc);
934 	write_unlock_bh(&table->tb6_lock);
935 
936 	return err;
937 }
938 
939 int ip6_ins_rt(struct rt6_info *rt)
940 {
941 	struct nl_info info = {	.nl_net = dev_net(rt->dst.dev), };
942 	struct mx6_config mxc = { .mx = NULL, };
943 
944 	return __ip6_ins_rt(rt, &info, &mxc);
945 }
946 
947 static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
948 					   const struct in6_addr *daddr,
949 					   const struct in6_addr *saddr)
950 {
951 	struct rt6_info *rt;
952 
953 	/*
954 	 *	Clone the route.
955 	 */
956 
957 	if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
958 		ort = (struct rt6_info *)ort->dst.from;
959 
960 	rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0);
961 
962 	if (!rt)
963 		return NULL;
964 
965 	ip6_rt_copy_init(rt, ort);
966 	rt->rt6i_flags |= RTF_CACHE;
967 	rt->rt6i_metric = 0;
968 	rt->dst.flags |= DST_HOST;
969 	rt->rt6i_dst.addr = *daddr;
970 	rt->rt6i_dst.plen = 128;
971 
972 	if (!rt6_is_gw_or_nonexthop(ort)) {
973 		if (ort->rt6i_dst.plen != 128 &&
974 		    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
975 			rt->rt6i_flags |= RTF_ANYCAST;
976 #ifdef CONFIG_IPV6_SUBTREES
977 		if (rt->rt6i_src.plen && saddr) {
978 			rt->rt6i_src.addr = *saddr;
979 			rt->rt6i_src.plen = 128;
980 		}
981 #endif
982 	}
983 
984 	return rt;
985 }
986 
987 static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
988 {
989 	struct rt6_info *pcpu_rt;
990 
991 	pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
992 				  rt->dst.dev, rt->dst.flags);
993 
994 	if (!pcpu_rt)
995 		return NULL;
996 	ip6_rt_copy_init(pcpu_rt, rt);
997 	pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
998 	pcpu_rt->rt6i_flags |= RTF_PCPU;
999 	return pcpu_rt;
1000 }
1001 
1002 /* It should be called with read_lock_bh(&tb6_lock) acquired */
1003 static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
1004 {
1005 	struct rt6_info *pcpu_rt, **p;
1006 
1007 	p = this_cpu_ptr(rt->rt6i_pcpu);
1008 	pcpu_rt = *p;
1009 
1010 	if (pcpu_rt) {
1011 		dst_hold(&pcpu_rt->dst);
1012 		rt6_dst_from_metrics_check(pcpu_rt);
1013 	}
1014 	return pcpu_rt;
1015 }
1016 
1017 static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
1018 {
1019 	struct fib6_table *table = rt->rt6i_table;
1020 	struct rt6_info *pcpu_rt, *prev, **p;
1021 
1022 	pcpu_rt = ip6_rt_pcpu_alloc(rt);
1023 	if (!pcpu_rt) {
1024 		struct net *net = dev_net(rt->dst.dev);
1025 
1026 		dst_hold(&net->ipv6.ip6_null_entry->dst);
1027 		return net->ipv6.ip6_null_entry;
1028 	}
1029 
1030 	read_lock_bh(&table->tb6_lock);
1031 	if (rt->rt6i_pcpu) {
1032 		p = this_cpu_ptr(rt->rt6i_pcpu);
1033 		prev = cmpxchg(p, NULL, pcpu_rt);
1034 		if (prev) {
1035 			/* If someone did it before us, return prev instead */
1036 			dst_destroy(&pcpu_rt->dst);
1037 			pcpu_rt = prev;
1038 		}
1039 	} else {
1040 		/* rt has been removed from the fib6 tree
1041 		 * before we have a chance to acquire the read_lock.
1042 		 * In this case, don't brother to create a pcpu rt
1043 		 * since rt is going away anyway.  The next
1044 		 * dst_check() will trigger a re-lookup.
1045 		 */
1046 		dst_destroy(&pcpu_rt->dst);
1047 		pcpu_rt = rt;
1048 	}
1049 	dst_hold(&pcpu_rt->dst);
1050 	rt6_dst_from_metrics_check(pcpu_rt);
1051 	read_unlock_bh(&table->tb6_lock);
1052 	return pcpu_rt;
1053 }
1054 
1055 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
1056 				      struct flowi6 *fl6, int flags)
1057 {
1058 	struct fib6_node *fn, *saved_fn;
1059 	struct rt6_info *rt;
1060 	int strict = 0;
1061 
1062 	strict |= flags & RT6_LOOKUP_F_IFACE;
1063 	if (net->ipv6.devconf_all->forwarding == 0)
1064 		strict |= RT6_LOOKUP_F_REACHABLE;
1065 
1066 	read_lock_bh(&table->tb6_lock);
1067 
1068 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1069 	saved_fn = fn;
1070 
1071 redo_rt6_select:
1072 	rt = rt6_select(fn, oif, strict);
1073 	if (rt->rt6i_nsiblings)
1074 		rt = rt6_multipath_select(rt, fl6, oif, strict);
1075 	if (rt == net->ipv6.ip6_null_entry) {
1076 		fn = fib6_backtrack(fn, &fl6->saddr);
1077 		if (fn)
1078 			goto redo_rt6_select;
1079 		else if (strict & RT6_LOOKUP_F_REACHABLE) {
1080 			/* also consider unreachable route */
1081 			strict &= ~RT6_LOOKUP_F_REACHABLE;
1082 			fn = saved_fn;
1083 			goto redo_rt6_select;
1084 		}
1085 	}
1086 
1087 
1088 	if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
1089 		dst_use(&rt->dst, jiffies);
1090 		read_unlock_bh(&table->tb6_lock);
1091 
1092 		rt6_dst_from_metrics_check(rt);
1093 		return rt;
1094 	} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1095 			    !(rt->rt6i_flags & RTF_GATEWAY))) {
1096 		/* Create a RTF_CACHE clone which will not be
1097 		 * owned by the fib6 tree.  It is for the special case where
1098 		 * the daddr in the skb during the neighbor look-up is different
1099 		 * from the fl6->daddr used to look-up route here.
1100 		 */
1101 
1102 		struct rt6_info *uncached_rt;
1103 
1104 		dst_use(&rt->dst, jiffies);
1105 		read_unlock_bh(&table->tb6_lock);
1106 
1107 		uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1108 		dst_release(&rt->dst);
1109 
1110 		if (uncached_rt)
1111 			rt6_uncached_list_add(uncached_rt);
1112 		else
1113 			uncached_rt = net->ipv6.ip6_null_entry;
1114 
1115 		dst_hold(&uncached_rt->dst);
1116 		return uncached_rt;
1117 
1118 	} else {
1119 		/* Get a percpu copy */
1120 
1121 		struct rt6_info *pcpu_rt;
1122 
1123 		rt->dst.lastuse = jiffies;
1124 		rt->dst.__use++;
1125 		pcpu_rt = rt6_get_pcpu_route(rt);
1126 
1127 		if (pcpu_rt) {
1128 			read_unlock_bh(&table->tb6_lock);
1129 		} else {
1130 			/* We have to do the read_unlock first
1131 			 * because rt6_make_pcpu_route() may trigger
1132 			 * ip6_dst_gc() which will take the write_lock.
1133 			 */
1134 			dst_hold(&rt->dst);
1135 			read_unlock_bh(&table->tb6_lock);
1136 			pcpu_rt = rt6_make_pcpu_route(rt);
1137 			dst_release(&rt->dst);
1138 		}
1139 
1140 		return pcpu_rt;
1141 
1142 	}
1143 }
1144 
1145 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
1146 					    struct flowi6 *fl6, int flags)
1147 {
1148 	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
1149 }
1150 
1151 static struct dst_entry *ip6_route_input_lookup(struct net *net,
1152 						struct net_device *dev,
1153 						struct flowi6 *fl6, int flags)
1154 {
1155 	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1156 		flags |= RT6_LOOKUP_F_IFACE;
1157 
1158 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1159 }
1160 
1161 void ip6_route_input(struct sk_buff *skb)
1162 {
1163 	const struct ipv6hdr *iph = ipv6_hdr(skb);
1164 	struct net *net = dev_net(skb->dev);
1165 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1166 	struct ip_tunnel_info *tun_info;
1167 	struct flowi6 fl6 = {
1168 		.flowi6_iif = skb->dev->ifindex,
1169 		.daddr = iph->daddr,
1170 		.saddr = iph->saddr,
1171 		.flowlabel = ip6_flowinfo(iph),
1172 		.flowi6_mark = skb->mark,
1173 		.flowi6_proto = iph->nexthdr,
1174 	};
1175 
1176 	tun_info = skb_tunnel_info(skb);
1177 	if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
1178 		fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
1179 	skb_dst_drop(skb);
1180 	skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1181 }
1182 
1183 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1184 					     struct flowi6 *fl6, int flags)
1185 {
1186 	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1187 }
1188 
1189 struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
1190 				    struct flowi6 *fl6)
1191 {
1192 	int flags = 0;
1193 
1194 	fl6->flowi6_iif = LOOPBACK_IFINDEX;
1195 
1196 	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
1197 		flags |= RT6_LOOKUP_F_IFACE;
1198 
1199 	if (!ipv6_addr_any(&fl6->saddr))
1200 		flags |= RT6_LOOKUP_F_HAS_SADDR;
1201 	else if (sk)
1202 		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1203 
1204 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1205 }
1206 EXPORT_SYMBOL(ip6_route_output);
1207 
1208 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1209 {
1210 	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1211 	struct dst_entry *new = NULL;
1212 
1213 	rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1214 	if (rt) {
1215 		new = &rt->dst;
1216 
1217 		memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
1218 
1219 		new->__use = 1;
1220 		new->input = dst_discard;
1221 		new->output = dst_discard_sk;
1222 
1223 		if (dst_metrics_read_only(&ort->dst))
1224 			new->_metrics = ort->dst._metrics;
1225 		else
1226 			dst_copy_metrics(new, &ort->dst);
1227 		rt->rt6i_idev = ort->rt6i_idev;
1228 		if (rt->rt6i_idev)
1229 			in6_dev_hold(rt->rt6i_idev);
1230 
1231 		rt->rt6i_gateway = ort->rt6i_gateway;
1232 		rt->rt6i_flags = ort->rt6i_flags;
1233 		rt->rt6i_metric = 0;
1234 
1235 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1236 #ifdef CONFIG_IPV6_SUBTREES
1237 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1238 #endif
1239 
1240 		dst_free(new);
1241 	}
1242 
1243 	dst_release(dst_orig);
1244 	return new ? new : ERR_PTR(-ENOMEM);
1245 }
1246 
1247 /*
1248  *	Destination cache support functions
1249  */
1250 
1251 static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1252 {
1253 	if (rt->dst.from &&
1254 	    dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1255 		dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1256 }
1257 
1258 static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1259 {
1260 	if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1261 		return NULL;
1262 
1263 	if (rt6_check_expired(rt))
1264 		return NULL;
1265 
1266 	return &rt->dst;
1267 }
1268 
1269 static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1270 {
1271 	if (rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1272 	    rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1273 		return &rt->dst;
1274 	else
1275 		return NULL;
1276 }
1277 
1278 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1279 {
1280 	struct rt6_info *rt;
1281 
1282 	rt = (struct rt6_info *) dst;
1283 
1284 	/* All IPV6 dsts are created with ->obsolete set to the value
1285 	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1286 	 * into this function always.
1287 	 */
1288 
1289 	rt6_dst_from_metrics_check(rt);
1290 
1291 	if ((rt->rt6i_flags & RTF_PCPU) || unlikely(dst->flags & DST_NOCACHE))
1292 		return rt6_dst_from_check(rt, cookie);
1293 	else
1294 		return rt6_check(rt, cookie);
1295 }
1296 
1297 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1298 {
1299 	struct rt6_info *rt = (struct rt6_info *) dst;
1300 
1301 	if (rt) {
1302 		if (rt->rt6i_flags & RTF_CACHE) {
1303 			if (rt6_check_expired(rt)) {
1304 				ip6_del_rt(rt);
1305 				dst = NULL;
1306 			}
1307 		} else {
1308 			dst_release(dst);
1309 			dst = NULL;
1310 		}
1311 	}
1312 	return dst;
1313 }
1314 
1315 static void ip6_link_failure(struct sk_buff *skb)
1316 {
1317 	struct rt6_info *rt;
1318 
1319 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1320 
1321 	rt = (struct rt6_info *) skb_dst(skb);
1322 	if (rt) {
1323 		if (rt->rt6i_flags & RTF_CACHE) {
1324 			dst_hold(&rt->dst);
1325 			ip6_del_rt(rt);
1326 		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1327 			rt->rt6i_node->fn_sernum = -1;
1328 		}
1329 	}
1330 }
1331 
1332 static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1333 {
1334 	struct net *net = dev_net(rt->dst.dev);
1335 
1336 	rt->rt6i_flags |= RTF_MODIFIED;
1337 	rt->rt6i_pmtu = mtu;
1338 	rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1339 }
1340 
1341 static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1342 				 const struct ipv6hdr *iph, u32 mtu)
1343 {
1344 	struct rt6_info *rt6 = (struct rt6_info *)dst;
1345 
1346 	if (rt6->rt6i_flags & RTF_LOCAL)
1347 		return;
1348 
1349 	dst_confirm(dst);
1350 	mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1351 	if (mtu >= dst_mtu(dst))
1352 		return;
1353 
1354 	if (rt6->rt6i_flags & RTF_CACHE) {
1355 		rt6_do_update_pmtu(rt6, mtu);
1356 	} else {
1357 		const struct in6_addr *daddr, *saddr;
1358 		struct rt6_info *nrt6;
1359 
1360 		if (iph) {
1361 			daddr = &iph->daddr;
1362 			saddr = &iph->saddr;
1363 		} else if (sk) {
1364 			daddr = &sk->sk_v6_daddr;
1365 			saddr = &inet6_sk(sk)->saddr;
1366 		} else {
1367 			return;
1368 		}
1369 		nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1370 		if (nrt6) {
1371 			rt6_do_update_pmtu(nrt6, mtu);
1372 
1373 			/* ip6_ins_rt(nrt6) will bump the
1374 			 * rt6->rt6i_node->fn_sernum
1375 			 * which will fail the next rt6_check() and
1376 			 * invalidate the sk->sk_dst_cache.
1377 			 */
1378 			ip6_ins_rt(nrt6);
1379 		}
1380 	}
1381 }
1382 
1383 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1384 			       struct sk_buff *skb, u32 mtu)
1385 {
1386 	__ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
1387 }
1388 
1389 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1390 		     int oif, u32 mark)
1391 {
1392 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1393 	struct dst_entry *dst;
1394 	struct flowi6 fl6;
1395 
1396 	memset(&fl6, 0, sizeof(fl6));
1397 	fl6.flowi6_oif = oif;
1398 	fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
1399 	fl6.daddr = iph->daddr;
1400 	fl6.saddr = iph->saddr;
1401 	fl6.flowlabel = ip6_flowinfo(iph);
1402 
1403 	dst = ip6_route_output(net, NULL, &fl6);
1404 	if (!dst->error)
1405 		__ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
1406 	dst_release(dst);
1407 }
1408 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1409 
1410 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1411 {
1412 	ip6_update_pmtu(skb, sock_net(sk), mtu,
1413 			sk->sk_bound_dev_if, sk->sk_mark);
1414 }
1415 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1416 
1417 /* Handle redirects */
1418 struct ip6rd_flowi {
1419 	struct flowi6 fl6;
1420 	struct in6_addr gateway;
1421 };
1422 
1423 static struct rt6_info *__ip6_route_redirect(struct net *net,
1424 					     struct fib6_table *table,
1425 					     struct flowi6 *fl6,
1426 					     int flags)
1427 {
1428 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1429 	struct rt6_info *rt;
1430 	struct fib6_node *fn;
1431 
1432 	/* Get the "current" route for this destination and
1433 	 * check if the redirect has come from approriate router.
1434 	 *
1435 	 * RFC 4861 specifies that redirects should only be
1436 	 * accepted if they come from the nexthop to the target.
1437 	 * Due to the way the routes are chosen, this notion
1438 	 * is a bit fuzzy and one might need to check all possible
1439 	 * routes.
1440 	 */
1441 
1442 	read_lock_bh(&table->tb6_lock);
1443 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1444 restart:
1445 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1446 		if (rt6_check_expired(rt))
1447 			continue;
1448 		if (rt->dst.error)
1449 			break;
1450 		if (!(rt->rt6i_flags & RTF_GATEWAY))
1451 			continue;
1452 		if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1453 			continue;
1454 		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1455 			continue;
1456 		break;
1457 	}
1458 
1459 	if (!rt)
1460 		rt = net->ipv6.ip6_null_entry;
1461 	else if (rt->dst.error) {
1462 		rt = net->ipv6.ip6_null_entry;
1463 		goto out;
1464 	}
1465 
1466 	if (rt == net->ipv6.ip6_null_entry) {
1467 		fn = fib6_backtrack(fn, &fl6->saddr);
1468 		if (fn)
1469 			goto restart;
1470 	}
1471 
1472 out:
1473 	dst_hold(&rt->dst);
1474 
1475 	read_unlock_bh(&table->tb6_lock);
1476 
1477 	return rt;
1478 };
1479 
1480 static struct dst_entry *ip6_route_redirect(struct net *net,
1481 					const struct flowi6 *fl6,
1482 					const struct in6_addr *gateway)
1483 {
1484 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1485 	struct ip6rd_flowi rdfl;
1486 
1487 	rdfl.fl6 = *fl6;
1488 	rdfl.gateway = *gateway;
1489 
1490 	return fib6_rule_lookup(net, &rdfl.fl6,
1491 				flags, __ip6_route_redirect);
1492 }
1493 
1494 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1495 {
1496 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1497 	struct dst_entry *dst;
1498 	struct flowi6 fl6;
1499 
1500 	memset(&fl6, 0, sizeof(fl6));
1501 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
1502 	fl6.flowi6_oif = oif;
1503 	fl6.flowi6_mark = mark;
1504 	fl6.daddr = iph->daddr;
1505 	fl6.saddr = iph->saddr;
1506 	fl6.flowlabel = ip6_flowinfo(iph);
1507 
1508 	dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1509 	rt6_do_redirect(dst, NULL, skb);
1510 	dst_release(dst);
1511 }
1512 EXPORT_SYMBOL_GPL(ip6_redirect);
1513 
1514 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1515 			    u32 mark)
1516 {
1517 	const struct ipv6hdr *iph = ipv6_hdr(skb);
1518 	const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1519 	struct dst_entry *dst;
1520 	struct flowi6 fl6;
1521 
1522 	memset(&fl6, 0, sizeof(fl6));
1523 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
1524 	fl6.flowi6_oif = oif;
1525 	fl6.flowi6_mark = mark;
1526 	fl6.daddr = msg->dest;
1527 	fl6.saddr = iph->daddr;
1528 
1529 	dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1530 	rt6_do_redirect(dst, NULL, skb);
1531 	dst_release(dst);
1532 }
1533 
1534 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1535 {
1536 	ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1537 }
1538 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1539 
1540 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1541 {
1542 	struct net_device *dev = dst->dev;
1543 	unsigned int mtu = dst_mtu(dst);
1544 	struct net *net = dev_net(dev);
1545 
1546 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1547 
1548 	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1549 		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1550 
1551 	/*
1552 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1553 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1554 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
1555 	 * rely only on pmtu discovery"
1556 	 */
1557 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1558 		mtu = IPV6_MAXPLEN;
1559 	return mtu;
1560 }
1561 
1562 static unsigned int ip6_mtu(const struct dst_entry *dst)
1563 {
1564 	const struct rt6_info *rt = (const struct rt6_info *)dst;
1565 	unsigned int mtu = rt->rt6i_pmtu;
1566 	struct inet6_dev *idev;
1567 
1568 	if (mtu)
1569 		goto out;
1570 
1571 	mtu = dst_metric_raw(dst, RTAX_MTU);
1572 	if (mtu)
1573 		goto out;
1574 
1575 	mtu = IPV6_MIN_MTU;
1576 
1577 	rcu_read_lock();
1578 	idev = __in6_dev_get(dst->dev);
1579 	if (idev)
1580 		mtu = idev->cnf.mtu6;
1581 	rcu_read_unlock();
1582 
1583 out:
1584 	return min_t(unsigned int, mtu, IP6_MAX_MTU);
1585 }
1586 
1587 static struct dst_entry *icmp6_dst_gc_list;
1588 static DEFINE_SPINLOCK(icmp6_dst_lock);
1589 
1590 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1591 				  struct flowi6 *fl6)
1592 {
1593 	struct dst_entry *dst;
1594 	struct rt6_info *rt;
1595 	struct inet6_dev *idev = in6_dev_get(dev);
1596 	struct net *net = dev_net(dev);
1597 
1598 	if (unlikely(!idev))
1599 		return ERR_PTR(-ENODEV);
1600 
1601 	rt = ip6_dst_alloc(net, dev, 0);
1602 	if (unlikely(!rt)) {
1603 		in6_dev_put(idev);
1604 		dst = ERR_PTR(-ENOMEM);
1605 		goto out;
1606 	}
1607 
1608 	rt->dst.flags |= DST_HOST;
1609 	rt->dst.output  = ip6_output;
1610 	atomic_set(&rt->dst.__refcnt, 1);
1611 	rt->rt6i_gateway  = fl6->daddr;
1612 	rt->rt6i_dst.addr = fl6->daddr;
1613 	rt->rt6i_dst.plen = 128;
1614 	rt->rt6i_idev     = idev;
1615 	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1616 
1617 	spin_lock_bh(&icmp6_dst_lock);
1618 	rt->dst.next = icmp6_dst_gc_list;
1619 	icmp6_dst_gc_list = &rt->dst;
1620 	spin_unlock_bh(&icmp6_dst_lock);
1621 
1622 	fib6_force_start_gc(net);
1623 
1624 	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1625 
1626 out:
1627 	return dst;
1628 }
1629 
1630 int icmp6_dst_gc(void)
1631 {
1632 	struct dst_entry *dst, **pprev;
1633 	int more = 0;
1634 
1635 	spin_lock_bh(&icmp6_dst_lock);
1636 	pprev = &icmp6_dst_gc_list;
1637 
1638 	while ((dst = *pprev) != NULL) {
1639 		if (!atomic_read(&dst->__refcnt)) {
1640 			*pprev = dst->next;
1641 			dst_free(dst);
1642 		} else {
1643 			pprev = &dst->next;
1644 			++more;
1645 		}
1646 	}
1647 
1648 	spin_unlock_bh(&icmp6_dst_lock);
1649 
1650 	return more;
1651 }
1652 
1653 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1654 			    void *arg)
1655 {
1656 	struct dst_entry *dst, **pprev;
1657 
1658 	spin_lock_bh(&icmp6_dst_lock);
1659 	pprev = &icmp6_dst_gc_list;
1660 	while ((dst = *pprev) != NULL) {
1661 		struct rt6_info *rt = (struct rt6_info *) dst;
1662 		if (func(rt, arg)) {
1663 			*pprev = dst->next;
1664 			dst_free(dst);
1665 		} else {
1666 			pprev = &dst->next;
1667 		}
1668 	}
1669 	spin_unlock_bh(&icmp6_dst_lock);
1670 }
1671 
1672 static int ip6_dst_gc(struct dst_ops *ops)
1673 {
1674 	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1675 	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1676 	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1677 	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1678 	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1679 	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1680 	int entries;
1681 
1682 	entries = dst_entries_get_fast(ops);
1683 	if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1684 	    entries <= rt_max_size)
1685 		goto out;
1686 
1687 	net->ipv6.ip6_rt_gc_expire++;
1688 	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
1689 	entries = dst_entries_get_slow(ops);
1690 	if (entries < ops->gc_thresh)
1691 		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1692 out:
1693 	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1694 	return entries > rt_max_size;
1695 }
1696 
1697 static int ip6_convert_metrics(struct mx6_config *mxc,
1698 			       const struct fib6_config *cfg)
1699 {
1700 	bool ecn_ca = false;
1701 	struct nlattr *nla;
1702 	int remaining;
1703 	u32 *mp;
1704 
1705 	if (!cfg->fc_mx)
1706 		return 0;
1707 
1708 	mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1709 	if (unlikely(!mp))
1710 		return -ENOMEM;
1711 
1712 	nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1713 		int type = nla_type(nla);
1714 		u32 val;
1715 
1716 		if (!type)
1717 			continue;
1718 		if (unlikely(type > RTAX_MAX))
1719 			goto err;
1720 
1721 		if (type == RTAX_CC_ALGO) {
1722 			char tmp[TCP_CA_NAME_MAX];
1723 
1724 			nla_strlcpy(tmp, nla, sizeof(tmp));
1725 			val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
1726 			if (val == TCP_CA_UNSPEC)
1727 				goto err;
1728 		} else {
1729 			val = nla_get_u32(nla);
1730 		}
1731 		if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
1732 			goto err;
1733 
1734 		mp[type - 1] = val;
1735 		__set_bit(type - 1, mxc->mx_valid);
1736 	}
1737 
1738 	if (ecn_ca) {
1739 		__set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
1740 		mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
1741 	}
1742 
1743 	mxc->mx = mp;
1744 	return 0;
1745  err:
1746 	kfree(mp);
1747 	return -EINVAL;
1748 }
1749 
1750 int ip6_route_info_create(struct fib6_config *cfg, struct rt6_info **rt_ret)
1751 {
1752 	int err;
1753 	struct net *net = cfg->fc_nlinfo.nl_net;
1754 	struct rt6_info *rt = NULL;
1755 	struct net_device *dev = NULL;
1756 	struct inet6_dev *idev = NULL;
1757 	struct fib6_table *table;
1758 	int addr_type;
1759 
1760 	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1761 		return -EINVAL;
1762 #ifndef CONFIG_IPV6_SUBTREES
1763 	if (cfg->fc_src_len)
1764 		return -EINVAL;
1765 #endif
1766 	if (cfg->fc_ifindex) {
1767 		err = -ENODEV;
1768 		dev = dev_get_by_index(net, cfg->fc_ifindex);
1769 		if (!dev)
1770 			goto out;
1771 		idev = in6_dev_get(dev);
1772 		if (!idev)
1773 			goto out;
1774 	}
1775 
1776 	if (cfg->fc_metric == 0)
1777 		cfg->fc_metric = IP6_RT_PRIO_USER;
1778 
1779 	err = -ENOBUFS;
1780 	if (cfg->fc_nlinfo.nlh &&
1781 	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1782 		table = fib6_get_table(net, cfg->fc_table);
1783 		if (!table) {
1784 			pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1785 			table = fib6_new_table(net, cfg->fc_table);
1786 		}
1787 	} else {
1788 		table = fib6_new_table(net, cfg->fc_table);
1789 	}
1790 
1791 	if (!table)
1792 		goto out;
1793 
1794 	rt = ip6_dst_alloc(net, NULL,
1795 			   (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1796 
1797 	if (!rt) {
1798 		err = -ENOMEM;
1799 		goto out;
1800 	}
1801 
1802 	if (cfg->fc_flags & RTF_EXPIRES)
1803 		rt6_set_expires(rt, jiffies +
1804 				clock_t_to_jiffies(cfg->fc_expires));
1805 	else
1806 		rt6_clean_expires(rt);
1807 
1808 	if (cfg->fc_protocol == RTPROT_UNSPEC)
1809 		cfg->fc_protocol = RTPROT_BOOT;
1810 	rt->rt6i_protocol = cfg->fc_protocol;
1811 
1812 	addr_type = ipv6_addr_type(&cfg->fc_dst);
1813 
1814 	if (addr_type & IPV6_ADDR_MULTICAST)
1815 		rt->dst.input = ip6_mc_input;
1816 	else if (cfg->fc_flags & RTF_LOCAL)
1817 		rt->dst.input = ip6_input;
1818 	else
1819 		rt->dst.input = ip6_forward;
1820 
1821 	rt->dst.output = ip6_output;
1822 
1823 	if (cfg->fc_encap) {
1824 		struct lwtunnel_state *lwtstate;
1825 
1826 		err = lwtunnel_build_state(dev, cfg->fc_encap_type,
1827 					   cfg->fc_encap, AF_INET6, cfg,
1828 					   &lwtstate);
1829 		if (err)
1830 			goto out;
1831 		rt->dst.lwtstate = lwtstate_get(lwtstate);
1832 		if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
1833 			rt->dst.lwtstate->orig_output = rt->dst.output;
1834 			rt->dst.output = lwtunnel_output;
1835 		}
1836 		if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
1837 			rt->dst.lwtstate->orig_input = rt->dst.input;
1838 			rt->dst.input = lwtunnel_input;
1839 		}
1840 	}
1841 
1842 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1843 	rt->rt6i_dst.plen = cfg->fc_dst_len;
1844 	if (rt->rt6i_dst.plen == 128)
1845 		rt->dst.flags |= DST_HOST;
1846 
1847 #ifdef CONFIG_IPV6_SUBTREES
1848 	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1849 	rt->rt6i_src.plen = cfg->fc_src_len;
1850 #endif
1851 
1852 	rt->rt6i_metric = cfg->fc_metric;
1853 
1854 	/* We cannot add true routes via loopback here,
1855 	   they would result in kernel looping; promote them to reject routes
1856 	 */
1857 	if ((cfg->fc_flags & RTF_REJECT) ||
1858 	    (dev && (dev->flags & IFF_LOOPBACK) &&
1859 	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
1860 	     !(cfg->fc_flags & RTF_LOCAL))) {
1861 		/* hold loopback dev/idev if we haven't done so. */
1862 		if (dev != net->loopback_dev) {
1863 			if (dev) {
1864 				dev_put(dev);
1865 				in6_dev_put(idev);
1866 			}
1867 			dev = net->loopback_dev;
1868 			dev_hold(dev);
1869 			idev = in6_dev_get(dev);
1870 			if (!idev) {
1871 				err = -ENODEV;
1872 				goto out;
1873 			}
1874 		}
1875 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1876 		switch (cfg->fc_type) {
1877 		case RTN_BLACKHOLE:
1878 			rt->dst.error = -EINVAL;
1879 			rt->dst.output = dst_discard_sk;
1880 			rt->dst.input = dst_discard;
1881 			break;
1882 		case RTN_PROHIBIT:
1883 			rt->dst.error = -EACCES;
1884 			rt->dst.output = ip6_pkt_prohibit_out;
1885 			rt->dst.input = ip6_pkt_prohibit;
1886 			break;
1887 		case RTN_THROW:
1888 		case RTN_UNREACHABLE:
1889 		default:
1890 			rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1891 					: (cfg->fc_type == RTN_UNREACHABLE)
1892 					? -EHOSTUNREACH : -ENETUNREACH;
1893 			rt->dst.output = ip6_pkt_discard_out;
1894 			rt->dst.input = ip6_pkt_discard;
1895 			break;
1896 		}
1897 		goto install_route;
1898 	}
1899 
1900 	if (cfg->fc_flags & RTF_GATEWAY) {
1901 		const struct in6_addr *gw_addr;
1902 		int gwa_type;
1903 
1904 		gw_addr = &cfg->fc_gateway;
1905 		gwa_type = ipv6_addr_type(gw_addr);
1906 
1907 		/* if gw_addr is local we will fail to detect this in case
1908 		 * address is still TENTATIVE (DAD in progress). rt6_lookup()
1909 		 * will return already-added prefix route via interface that
1910 		 * prefix route was assigned to, which might be non-loopback.
1911 		 */
1912 		err = -EINVAL;
1913 		if (ipv6_chk_addr_and_flags(net, gw_addr,
1914 					    gwa_type & IPV6_ADDR_LINKLOCAL ?
1915 					    dev : NULL, 0, 0))
1916 			goto out;
1917 
1918 		rt->rt6i_gateway = *gw_addr;
1919 
1920 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1921 			struct rt6_info *grt;
1922 
1923 			/* IPv6 strictly inhibits using not link-local
1924 			   addresses as nexthop address.
1925 			   Otherwise, router will not able to send redirects.
1926 			   It is very good, but in some (rare!) circumstances
1927 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
1928 			   some exceptions. --ANK
1929 			 */
1930 			if (!(gwa_type & IPV6_ADDR_UNICAST))
1931 				goto out;
1932 
1933 			grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1934 
1935 			err = -EHOSTUNREACH;
1936 			if (!grt)
1937 				goto out;
1938 			if (dev) {
1939 				if (dev != grt->dst.dev) {
1940 					ip6_rt_put(grt);
1941 					goto out;
1942 				}
1943 			} else {
1944 				dev = grt->dst.dev;
1945 				idev = grt->rt6i_idev;
1946 				dev_hold(dev);
1947 				in6_dev_hold(grt->rt6i_idev);
1948 			}
1949 			if (!(grt->rt6i_flags & RTF_GATEWAY))
1950 				err = 0;
1951 			ip6_rt_put(grt);
1952 
1953 			if (err)
1954 				goto out;
1955 		}
1956 		err = -EINVAL;
1957 		if (!dev || (dev->flags & IFF_LOOPBACK))
1958 			goto out;
1959 	}
1960 
1961 	err = -ENODEV;
1962 	if (!dev)
1963 		goto out;
1964 
1965 	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1966 		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1967 			err = -EINVAL;
1968 			goto out;
1969 		}
1970 		rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1971 		rt->rt6i_prefsrc.plen = 128;
1972 	} else
1973 		rt->rt6i_prefsrc.plen = 0;
1974 
1975 	rt->rt6i_flags = cfg->fc_flags;
1976 
1977 install_route:
1978 	rt->dst.dev = dev;
1979 	rt->rt6i_idev = idev;
1980 	rt->rt6i_table = table;
1981 
1982 	cfg->fc_nlinfo.nl_net = dev_net(dev);
1983 
1984 	*rt_ret = rt;
1985 
1986 	return 0;
1987 out:
1988 	if (dev)
1989 		dev_put(dev);
1990 	if (idev)
1991 		in6_dev_put(idev);
1992 	if (rt)
1993 		dst_free(&rt->dst);
1994 
1995 	*rt_ret = NULL;
1996 
1997 	return err;
1998 }
1999 
2000 int ip6_route_add(struct fib6_config *cfg)
2001 {
2002 	struct mx6_config mxc = { .mx = NULL, };
2003 	struct rt6_info *rt = NULL;
2004 	int err;
2005 
2006 	err = ip6_route_info_create(cfg, &rt);
2007 	if (err)
2008 		goto out;
2009 
2010 	err = ip6_convert_metrics(&mxc, cfg);
2011 	if (err)
2012 		goto out;
2013 
2014 	err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
2015 
2016 	kfree(mxc.mx);
2017 
2018 	return err;
2019 out:
2020 	if (rt)
2021 		dst_free(&rt->dst);
2022 
2023 	return err;
2024 }
2025 
2026 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
2027 {
2028 	int err;
2029 	struct fib6_table *table;
2030 	struct net *net = dev_net(rt->dst.dev);
2031 
2032 	if (rt == net->ipv6.ip6_null_entry ||
2033 	    rt->dst.flags & DST_NOCACHE) {
2034 		err = -ENOENT;
2035 		goto out;
2036 	}
2037 
2038 	table = rt->rt6i_table;
2039 	write_lock_bh(&table->tb6_lock);
2040 	err = fib6_del(rt, info);
2041 	write_unlock_bh(&table->tb6_lock);
2042 
2043 out:
2044 	ip6_rt_put(rt);
2045 	return err;
2046 }
2047 
2048 int ip6_del_rt(struct rt6_info *rt)
2049 {
2050 	struct nl_info info = {
2051 		.nl_net = dev_net(rt->dst.dev),
2052 	};
2053 	return __ip6_del_rt(rt, &info);
2054 }
2055 
2056 static int ip6_route_del(struct fib6_config *cfg)
2057 {
2058 	struct fib6_table *table;
2059 	struct fib6_node *fn;
2060 	struct rt6_info *rt;
2061 	int err = -ESRCH;
2062 
2063 	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
2064 	if (!table)
2065 		return err;
2066 
2067 	read_lock_bh(&table->tb6_lock);
2068 
2069 	fn = fib6_locate(&table->tb6_root,
2070 			 &cfg->fc_dst, cfg->fc_dst_len,
2071 			 &cfg->fc_src, cfg->fc_src_len);
2072 
2073 	if (fn) {
2074 		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2075 			if ((rt->rt6i_flags & RTF_CACHE) &&
2076 			    !(cfg->fc_flags & RTF_CACHE))
2077 				continue;
2078 			if (cfg->fc_ifindex &&
2079 			    (!rt->dst.dev ||
2080 			     rt->dst.dev->ifindex != cfg->fc_ifindex))
2081 				continue;
2082 			if (cfg->fc_flags & RTF_GATEWAY &&
2083 			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
2084 				continue;
2085 			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
2086 				continue;
2087 			dst_hold(&rt->dst);
2088 			read_unlock_bh(&table->tb6_lock);
2089 
2090 			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
2091 		}
2092 	}
2093 	read_unlock_bh(&table->tb6_lock);
2094 
2095 	return err;
2096 }
2097 
2098 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
2099 {
2100 	struct net *net = dev_net(skb->dev);
2101 	struct netevent_redirect netevent;
2102 	struct rt6_info *rt, *nrt = NULL;
2103 	struct ndisc_options ndopts;
2104 	struct inet6_dev *in6_dev;
2105 	struct neighbour *neigh;
2106 	struct rd_msg *msg;
2107 	int optlen, on_link;
2108 	u8 *lladdr;
2109 
2110 	optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
2111 	optlen -= sizeof(*msg);
2112 
2113 	if (optlen < 0) {
2114 		net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
2115 		return;
2116 	}
2117 
2118 	msg = (struct rd_msg *)icmp6_hdr(skb);
2119 
2120 	if (ipv6_addr_is_multicast(&msg->dest)) {
2121 		net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
2122 		return;
2123 	}
2124 
2125 	on_link = 0;
2126 	if (ipv6_addr_equal(&msg->dest, &msg->target)) {
2127 		on_link = 1;
2128 	} else if (ipv6_addr_type(&msg->target) !=
2129 		   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
2130 		net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
2131 		return;
2132 	}
2133 
2134 	in6_dev = __in6_dev_get(skb->dev);
2135 	if (!in6_dev)
2136 		return;
2137 	if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2138 		return;
2139 
2140 	/* RFC2461 8.1:
2141 	 *	The IP source address of the Redirect MUST be the same as the current
2142 	 *	first-hop router for the specified ICMP Destination Address.
2143 	 */
2144 
2145 	if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
2146 		net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2147 		return;
2148 	}
2149 
2150 	lladdr = NULL;
2151 	if (ndopts.nd_opts_tgt_lladdr) {
2152 		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2153 					     skb->dev);
2154 		if (!lladdr) {
2155 			net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2156 			return;
2157 		}
2158 	}
2159 
2160 	rt = (struct rt6_info *) dst;
2161 	if (rt == net->ipv6.ip6_null_entry) {
2162 		net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
2163 		return;
2164 	}
2165 
2166 	/* Redirect received -> path was valid.
2167 	 * Look, redirects are sent only in response to data packets,
2168 	 * so that this nexthop apparently is reachable. --ANK
2169 	 */
2170 	dst_confirm(&rt->dst);
2171 
2172 	neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
2173 	if (!neigh)
2174 		return;
2175 
2176 	/*
2177 	 *	We have finally decided to accept it.
2178 	 */
2179 
2180 	neigh_update(neigh, lladdr, NUD_STALE,
2181 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
2182 		     NEIGH_UPDATE_F_OVERRIDE|
2183 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
2184 				     NEIGH_UPDATE_F_ISROUTER))
2185 		     );
2186 
2187 	nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
2188 	if (!nrt)
2189 		goto out;
2190 
2191 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
2192 	if (on_link)
2193 		nrt->rt6i_flags &= ~RTF_GATEWAY;
2194 
2195 	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
2196 
2197 	if (ip6_ins_rt(nrt))
2198 		goto out;
2199 
2200 	netevent.old = &rt->dst;
2201 	netevent.new = &nrt->dst;
2202 	netevent.daddr = &msg->dest;
2203 	netevent.neigh = neigh;
2204 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
2205 
2206 	if (rt->rt6i_flags & RTF_CACHE) {
2207 		rt = (struct rt6_info *) dst_clone(&rt->dst);
2208 		ip6_del_rt(rt);
2209 	}
2210 
2211 out:
2212 	neigh_release(neigh);
2213 }
2214 
2215 /*
2216  *	Misc support functions
2217  */
2218 
2219 static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
2220 {
2221 	BUG_ON(from->dst.from);
2222 
2223 	rt->rt6i_flags &= ~RTF_EXPIRES;
2224 	dst_hold(&from->dst);
2225 	rt->dst.from = &from->dst;
2226 	dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
2227 }
2228 
2229 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
2230 {
2231 	rt->dst.input = ort->dst.input;
2232 	rt->dst.output = ort->dst.output;
2233 	rt->rt6i_dst = ort->rt6i_dst;
2234 	rt->dst.error = ort->dst.error;
2235 	rt->rt6i_idev = ort->rt6i_idev;
2236 	if (rt->rt6i_idev)
2237 		in6_dev_hold(rt->rt6i_idev);
2238 	rt->dst.lastuse = jiffies;
2239 	rt->rt6i_gateway = ort->rt6i_gateway;
2240 	rt->rt6i_flags = ort->rt6i_flags;
2241 	rt6_set_from(rt, ort);
2242 	rt->rt6i_metric = ort->rt6i_metric;
2243 #ifdef CONFIG_IPV6_SUBTREES
2244 	rt->rt6i_src = ort->rt6i_src;
2245 #endif
2246 	rt->rt6i_prefsrc = ort->rt6i_prefsrc;
2247 	rt->rt6i_table = ort->rt6i_table;
2248 	rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
2249 }
2250 
2251 #ifdef CONFIG_IPV6_ROUTE_INFO
2252 static struct rt6_info *rt6_get_route_info(struct net *net,
2253 					   const struct in6_addr *prefix, int prefixlen,
2254 					   const struct in6_addr *gwaddr, int ifindex)
2255 {
2256 	struct fib6_node *fn;
2257 	struct rt6_info *rt = NULL;
2258 	struct fib6_table *table;
2259 
2260 	table = fib6_get_table(net, RT6_TABLE_INFO);
2261 	if (!table)
2262 		return NULL;
2263 
2264 	read_lock_bh(&table->tb6_lock);
2265 	fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
2266 	if (!fn)
2267 		goto out;
2268 
2269 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2270 		if (rt->dst.dev->ifindex != ifindex)
2271 			continue;
2272 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2273 			continue;
2274 		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2275 			continue;
2276 		dst_hold(&rt->dst);
2277 		break;
2278 	}
2279 out:
2280 	read_unlock_bh(&table->tb6_lock);
2281 	return rt;
2282 }
2283 
2284 static struct rt6_info *rt6_add_route_info(struct net *net,
2285 					   const struct in6_addr *prefix, int prefixlen,
2286 					   const struct in6_addr *gwaddr, int ifindex,
2287 					   unsigned int pref)
2288 {
2289 	struct fib6_config cfg = {
2290 		.fc_table	= RT6_TABLE_INFO,
2291 		.fc_metric	= IP6_RT_PRIO_USER,
2292 		.fc_ifindex	= ifindex,
2293 		.fc_dst_len	= prefixlen,
2294 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2295 				  RTF_UP | RTF_PREF(pref),
2296 		.fc_nlinfo.portid = 0,
2297 		.fc_nlinfo.nlh = NULL,
2298 		.fc_nlinfo.nl_net = net,
2299 	};
2300 
2301 	cfg.fc_dst = *prefix;
2302 	cfg.fc_gateway = *gwaddr;
2303 
2304 	/* We should treat it as a default route if prefix length is 0. */
2305 	if (!prefixlen)
2306 		cfg.fc_flags |= RTF_DEFAULT;
2307 
2308 	ip6_route_add(&cfg);
2309 
2310 	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
2311 }
2312 #endif
2313 
2314 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
2315 {
2316 	struct rt6_info *rt;
2317 	struct fib6_table *table;
2318 
2319 	table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
2320 	if (!table)
2321 		return NULL;
2322 
2323 	read_lock_bh(&table->tb6_lock);
2324 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2325 		if (dev == rt->dst.dev &&
2326 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
2327 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
2328 			break;
2329 	}
2330 	if (rt)
2331 		dst_hold(&rt->dst);
2332 	read_unlock_bh(&table->tb6_lock);
2333 	return rt;
2334 }
2335 
2336 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
2337 				     struct net_device *dev,
2338 				     unsigned int pref)
2339 {
2340 	struct fib6_config cfg = {
2341 		.fc_table	= RT6_TABLE_DFLT,
2342 		.fc_metric	= IP6_RT_PRIO_USER,
2343 		.fc_ifindex	= dev->ifindex,
2344 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2345 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
2346 		.fc_nlinfo.portid = 0,
2347 		.fc_nlinfo.nlh = NULL,
2348 		.fc_nlinfo.nl_net = dev_net(dev),
2349 	};
2350 
2351 	cfg.fc_gateway = *gwaddr;
2352 
2353 	ip6_route_add(&cfg);
2354 
2355 	return rt6_get_dflt_router(gwaddr, dev);
2356 }
2357 
2358 void rt6_purge_dflt_routers(struct net *net)
2359 {
2360 	struct rt6_info *rt;
2361 	struct fib6_table *table;
2362 
2363 	/* NOTE: Keep consistent with rt6_get_dflt_router */
2364 	table = fib6_get_table(net, RT6_TABLE_DFLT);
2365 	if (!table)
2366 		return;
2367 
2368 restart:
2369 	read_lock_bh(&table->tb6_lock);
2370 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2371 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2372 		    (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
2373 			dst_hold(&rt->dst);
2374 			read_unlock_bh(&table->tb6_lock);
2375 			ip6_del_rt(rt);
2376 			goto restart;
2377 		}
2378 	}
2379 	read_unlock_bh(&table->tb6_lock);
2380 }
2381 
2382 static void rtmsg_to_fib6_config(struct net *net,
2383 				 struct in6_rtmsg *rtmsg,
2384 				 struct fib6_config *cfg)
2385 {
2386 	memset(cfg, 0, sizeof(*cfg));
2387 
2388 	cfg->fc_table = RT6_TABLE_MAIN;
2389 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2390 	cfg->fc_metric = rtmsg->rtmsg_metric;
2391 	cfg->fc_expires = rtmsg->rtmsg_info;
2392 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2393 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
2394 	cfg->fc_flags = rtmsg->rtmsg_flags;
2395 
2396 	cfg->fc_nlinfo.nl_net = net;
2397 
2398 	cfg->fc_dst = rtmsg->rtmsg_dst;
2399 	cfg->fc_src = rtmsg->rtmsg_src;
2400 	cfg->fc_gateway = rtmsg->rtmsg_gateway;
2401 }
2402 
2403 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2404 {
2405 	struct fib6_config cfg;
2406 	struct in6_rtmsg rtmsg;
2407 	int err;
2408 
2409 	switch (cmd) {
2410 	case SIOCADDRT:		/* Add a route */
2411 	case SIOCDELRT:		/* Delete a route */
2412 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2413 			return -EPERM;
2414 		err = copy_from_user(&rtmsg, arg,
2415 				     sizeof(struct in6_rtmsg));
2416 		if (err)
2417 			return -EFAULT;
2418 
2419 		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2420 
2421 		rtnl_lock();
2422 		switch (cmd) {
2423 		case SIOCADDRT:
2424 			err = ip6_route_add(&cfg);
2425 			break;
2426 		case SIOCDELRT:
2427 			err = ip6_route_del(&cfg);
2428 			break;
2429 		default:
2430 			err = -EINVAL;
2431 		}
2432 		rtnl_unlock();
2433 
2434 		return err;
2435 	}
2436 
2437 	return -EINVAL;
2438 }
2439 
2440 /*
2441  *	Drop the packet on the floor
2442  */
2443 
2444 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2445 {
2446 	int type;
2447 	struct dst_entry *dst = skb_dst(skb);
2448 	switch (ipstats_mib_noroutes) {
2449 	case IPSTATS_MIB_INNOROUTES:
2450 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2451 		if (type == IPV6_ADDR_ANY) {
2452 			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2453 				      IPSTATS_MIB_INADDRERRORS);
2454 			break;
2455 		}
2456 		/* FALLTHROUGH */
2457 	case IPSTATS_MIB_OUTNOROUTES:
2458 		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2459 			      ipstats_mib_noroutes);
2460 		break;
2461 	}
2462 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2463 	kfree_skb(skb);
2464 	return 0;
2465 }
2466 
2467 static int ip6_pkt_discard(struct sk_buff *skb)
2468 {
2469 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2470 }
2471 
2472 static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb)
2473 {
2474 	skb->dev = skb_dst(skb)->dev;
2475 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2476 }
2477 
2478 static int ip6_pkt_prohibit(struct sk_buff *skb)
2479 {
2480 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2481 }
2482 
2483 static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb)
2484 {
2485 	skb->dev = skb_dst(skb)->dev;
2486 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2487 }
2488 
2489 /*
2490  *	Allocate a dst for local (unicast / anycast) address.
2491  */
2492 
2493 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2494 				    const struct in6_addr *addr,
2495 				    bool anycast)
2496 {
2497 	struct net *net = dev_net(idev->dev);
2498 	struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
2499 					    DST_NOCOUNT);
2500 	if (!rt)
2501 		return ERR_PTR(-ENOMEM);
2502 
2503 	in6_dev_hold(idev);
2504 
2505 	rt->dst.flags |= DST_HOST;
2506 	rt->dst.input = ip6_input;
2507 	rt->dst.output = ip6_output;
2508 	rt->rt6i_idev = idev;
2509 
2510 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2511 	if (anycast)
2512 		rt->rt6i_flags |= RTF_ANYCAST;
2513 	else
2514 		rt->rt6i_flags |= RTF_LOCAL;
2515 
2516 	rt->rt6i_gateway  = *addr;
2517 	rt->rt6i_dst.addr = *addr;
2518 	rt->rt6i_dst.plen = 128;
2519 	rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2520 	rt->dst.flags |= DST_NOCACHE;
2521 
2522 	atomic_set(&rt->dst.__refcnt, 1);
2523 
2524 	return rt;
2525 }
2526 
2527 int ip6_route_get_saddr(struct net *net,
2528 			struct rt6_info *rt,
2529 			const struct in6_addr *daddr,
2530 			unsigned int prefs,
2531 			struct in6_addr *saddr)
2532 {
2533 	struct inet6_dev *idev =
2534 		rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
2535 	int err = 0;
2536 	if (rt && rt->rt6i_prefsrc.plen)
2537 		*saddr = rt->rt6i_prefsrc.addr;
2538 	else
2539 		err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2540 					 daddr, prefs, saddr);
2541 	return err;
2542 }
2543 
2544 /* remove deleted ip from prefsrc entries */
2545 struct arg_dev_net_ip {
2546 	struct net_device *dev;
2547 	struct net *net;
2548 	struct in6_addr *addr;
2549 };
2550 
2551 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2552 {
2553 	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2554 	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2555 	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2556 
2557 	if (((void *)rt->dst.dev == dev || !dev) &&
2558 	    rt != net->ipv6.ip6_null_entry &&
2559 	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2560 		/* remove prefsrc entry */
2561 		rt->rt6i_prefsrc.plen = 0;
2562 	}
2563 	return 0;
2564 }
2565 
2566 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2567 {
2568 	struct net *net = dev_net(ifp->idev->dev);
2569 	struct arg_dev_net_ip adni = {
2570 		.dev = ifp->idev->dev,
2571 		.net = net,
2572 		.addr = &ifp->addr,
2573 	};
2574 	fib6_clean_all(net, fib6_remove_prefsrc, &adni);
2575 }
2576 
2577 #define RTF_RA_ROUTER		(RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2578 #define RTF_CACHE_GATEWAY	(RTF_GATEWAY | RTF_CACHE)
2579 
2580 /* Remove routers and update dst entries when gateway turn into host. */
2581 static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2582 {
2583 	struct in6_addr *gateway = (struct in6_addr *)arg;
2584 
2585 	if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2586 	     ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2587 	     ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2588 		return -1;
2589 	}
2590 	return 0;
2591 }
2592 
2593 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2594 {
2595 	fib6_clean_all(net, fib6_clean_tohost, gateway);
2596 }
2597 
2598 struct arg_dev_net {
2599 	struct net_device *dev;
2600 	struct net *net;
2601 };
2602 
2603 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2604 {
2605 	const struct arg_dev_net *adn = arg;
2606 	const struct net_device *dev = adn->dev;
2607 
2608 	if ((rt->dst.dev == dev || !dev) &&
2609 	    rt != adn->net->ipv6.ip6_null_entry)
2610 		return -1;
2611 
2612 	return 0;
2613 }
2614 
2615 void rt6_ifdown(struct net *net, struct net_device *dev)
2616 {
2617 	struct arg_dev_net adn = {
2618 		.dev = dev,
2619 		.net = net,
2620 	};
2621 
2622 	fib6_clean_all(net, fib6_ifdown, &adn);
2623 	icmp6_clean_all(fib6_ifdown, &adn);
2624 	rt6_uncached_list_flush_dev(net, dev);
2625 }
2626 
2627 struct rt6_mtu_change_arg {
2628 	struct net_device *dev;
2629 	unsigned int mtu;
2630 };
2631 
2632 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2633 {
2634 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2635 	struct inet6_dev *idev;
2636 
2637 	/* In IPv6 pmtu discovery is not optional,
2638 	   so that RTAX_MTU lock cannot disable it.
2639 	   We still use this lock to block changes
2640 	   caused by addrconf/ndisc.
2641 	*/
2642 
2643 	idev = __in6_dev_get(arg->dev);
2644 	if (!idev)
2645 		return 0;
2646 
2647 	/* For administrative MTU increase, there is no way to discover
2648 	   IPv6 PMTU increase, so PMTU increase should be updated here.
2649 	   Since RFC 1981 doesn't include administrative MTU increase
2650 	   update PMTU increase is a MUST. (i.e. jumbo frame)
2651 	 */
2652 	/*
2653 	   If new MTU is less than route PMTU, this new MTU will be the
2654 	   lowest MTU in the path, update the route PMTU to reflect PMTU
2655 	   decreases; if new MTU is greater than route PMTU, and the
2656 	   old MTU is the lowest MTU in the path, update the route PMTU
2657 	   to reflect the increase. In this case if the other nodes' MTU
2658 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
2659 	   PMTU discouvery.
2660 	 */
2661 	if (rt->dst.dev == arg->dev &&
2662 	    !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2663 		if (rt->rt6i_flags & RTF_CACHE) {
2664 			/* For RTF_CACHE with rt6i_pmtu == 0
2665 			 * (i.e. a redirected route),
2666 			 * the metrics of its rt->dst.from has already
2667 			 * been updated.
2668 			 */
2669 			if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2670 				rt->rt6i_pmtu = arg->mtu;
2671 		} else if (dst_mtu(&rt->dst) >= arg->mtu ||
2672 			   (dst_mtu(&rt->dst) < arg->mtu &&
2673 			    dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2674 			dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2675 		}
2676 	}
2677 	return 0;
2678 }
2679 
2680 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2681 {
2682 	struct rt6_mtu_change_arg arg = {
2683 		.dev = dev,
2684 		.mtu = mtu,
2685 	};
2686 
2687 	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
2688 }
2689 
2690 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2691 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2692 	[RTA_OIF]               = { .type = NLA_U32 },
2693 	[RTA_IIF]		= { .type = NLA_U32 },
2694 	[RTA_PRIORITY]          = { .type = NLA_U32 },
2695 	[RTA_METRICS]           = { .type = NLA_NESTED },
2696 	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
2697 	[RTA_PREF]              = { .type = NLA_U8 },
2698 	[RTA_ENCAP_TYPE]	= { .type = NLA_U16 },
2699 	[RTA_ENCAP]		= { .type = NLA_NESTED },
2700 };
2701 
2702 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2703 			      struct fib6_config *cfg)
2704 {
2705 	struct rtmsg *rtm;
2706 	struct nlattr *tb[RTA_MAX+1];
2707 	unsigned int pref;
2708 	int err;
2709 
2710 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2711 	if (err < 0)
2712 		goto errout;
2713 
2714 	err = -EINVAL;
2715 	rtm = nlmsg_data(nlh);
2716 	memset(cfg, 0, sizeof(*cfg));
2717 
2718 	cfg->fc_table = rtm->rtm_table;
2719 	cfg->fc_dst_len = rtm->rtm_dst_len;
2720 	cfg->fc_src_len = rtm->rtm_src_len;
2721 	cfg->fc_flags = RTF_UP;
2722 	cfg->fc_protocol = rtm->rtm_protocol;
2723 	cfg->fc_type = rtm->rtm_type;
2724 
2725 	if (rtm->rtm_type == RTN_UNREACHABLE ||
2726 	    rtm->rtm_type == RTN_BLACKHOLE ||
2727 	    rtm->rtm_type == RTN_PROHIBIT ||
2728 	    rtm->rtm_type == RTN_THROW)
2729 		cfg->fc_flags |= RTF_REJECT;
2730 
2731 	if (rtm->rtm_type == RTN_LOCAL)
2732 		cfg->fc_flags |= RTF_LOCAL;
2733 
2734 	if (rtm->rtm_flags & RTM_F_CLONED)
2735 		cfg->fc_flags |= RTF_CACHE;
2736 
2737 	cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2738 	cfg->fc_nlinfo.nlh = nlh;
2739 	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2740 
2741 	if (tb[RTA_GATEWAY]) {
2742 		cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
2743 		cfg->fc_flags |= RTF_GATEWAY;
2744 	}
2745 
2746 	if (tb[RTA_DST]) {
2747 		int plen = (rtm->rtm_dst_len + 7) >> 3;
2748 
2749 		if (nla_len(tb[RTA_DST]) < plen)
2750 			goto errout;
2751 
2752 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2753 	}
2754 
2755 	if (tb[RTA_SRC]) {
2756 		int plen = (rtm->rtm_src_len + 7) >> 3;
2757 
2758 		if (nla_len(tb[RTA_SRC]) < plen)
2759 			goto errout;
2760 
2761 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2762 	}
2763 
2764 	if (tb[RTA_PREFSRC])
2765 		cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
2766 
2767 	if (tb[RTA_OIF])
2768 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2769 
2770 	if (tb[RTA_PRIORITY])
2771 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2772 
2773 	if (tb[RTA_METRICS]) {
2774 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2775 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2776 	}
2777 
2778 	if (tb[RTA_TABLE])
2779 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2780 
2781 	if (tb[RTA_MULTIPATH]) {
2782 		cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2783 		cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2784 	}
2785 
2786 	if (tb[RTA_PREF]) {
2787 		pref = nla_get_u8(tb[RTA_PREF]);
2788 		if (pref != ICMPV6_ROUTER_PREF_LOW &&
2789 		    pref != ICMPV6_ROUTER_PREF_HIGH)
2790 			pref = ICMPV6_ROUTER_PREF_MEDIUM;
2791 		cfg->fc_flags |= RTF_PREF(pref);
2792 	}
2793 
2794 	if (tb[RTA_ENCAP])
2795 		cfg->fc_encap = tb[RTA_ENCAP];
2796 
2797 	if (tb[RTA_ENCAP_TYPE])
2798 		cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
2799 
2800 	err = 0;
2801 errout:
2802 	return err;
2803 }
2804 
2805 struct rt6_nh {
2806 	struct rt6_info *rt6_info;
2807 	struct fib6_config r_cfg;
2808 	struct mx6_config mxc;
2809 	struct list_head next;
2810 };
2811 
2812 static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
2813 {
2814 	struct rt6_nh *nh;
2815 
2816 	list_for_each_entry(nh, rt6_nh_list, next) {
2817 		pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6 nexthop %pI6 ifi %d\n",
2818 		        &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
2819 		        nh->r_cfg.fc_ifindex);
2820 	}
2821 }
2822 
2823 static int ip6_route_info_append(struct list_head *rt6_nh_list,
2824 				 struct rt6_info *rt, struct fib6_config *r_cfg)
2825 {
2826 	struct rt6_nh *nh;
2827 	struct rt6_info *rtnh;
2828 	int err = -EEXIST;
2829 
2830 	list_for_each_entry(nh, rt6_nh_list, next) {
2831 		/* check if rt6_info already exists */
2832 		rtnh = nh->rt6_info;
2833 
2834 		if (rtnh->dst.dev == rt->dst.dev &&
2835 		    rtnh->rt6i_idev == rt->rt6i_idev &&
2836 		    ipv6_addr_equal(&rtnh->rt6i_gateway,
2837 				    &rt->rt6i_gateway))
2838 			return err;
2839 	}
2840 
2841 	nh = kzalloc(sizeof(*nh), GFP_KERNEL);
2842 	if (!nh)
2843 		return -ENOMEM;
2844 	nh->rt6_info = rt;
2845 	err = ip6_convert_metrics(&nh->mxc, r_cfg);
2846 	if (err) {
2847 		kfree(nh);
2848 		return err;
2849 	}
2850 	memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
2851 	list_add_tail(&nh->next, rt6_nh_list);
2852 
2853 	return 0;
2854 }
2855 
2856 static int ip6_route_multipath_add(struct fib6_config *cfg)
2857 {
2858 	struct fib6_config r_cfg;
2859 	struct rtnexthop *rtnh;
2860 	struct rt6_info *rt;
2861 	struct rt6_nh *err_nh;
2862 	struct rt6_nh *nh, *nh_safe;
2863 	int remaining;
2864 	int attrlen;
2865 	int err = 1;
2866 	int nhn = 0;
2867 	int replace = (cfg->fc_nlinfo.nlh &&
2868 		       (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
2869 	LIST_HEAD(rt6_nh_list);
2870 
2871 	remaining = cfg->fc_mp_len;
2872 	rtnh = (struct rtnexthop *)cfg->fc_mp;
2873 
2874 	/* Parse a Multipath Entry and build a list (rt6_nh_list) of
2875 	 * rt6_info structs per nexthop
2876 	 */
2877 	while (rtnh_ok(rtnh, remaining)) {
2878 		memcpy(&r_cfg, cfg, sizeof(*cfg));
2879 		if (rtnh->rtnh_ifindex)
2880 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2881 
2882 		attrlen = rtnh_attrlen(rtnh);
2883 		if (attrlen > 0) {
2884 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2885 
2886 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2887 			if (nla) {
2888 				r_cfg.fc_gateway = nla_get_in6_addr(nla);
2889 				r_cfg.fc_flags |= RTF_GATEWAY;
2890 			}
2891 			r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
2892 			nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
2893 			if (nla)
2894 				r_cfg.fc_encap_type = nla_get_u16(nla);
2895 		}
2896 
2897 		err = ip6_route_info_create(&r_cfg, &rt);
2898 		if (err)
2899 			goto cleanup;
2900 
2901 		err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
2902 		if (err) {
2903 			dst_free(&rt->dst);
2904 			goto cleanup;
2905 		}
2906 
2907 		rtnh = rtnh_next(rtnh, &remaining);
2908 	}
2909 
2910 	err_nh = NULL;
2911 	list_for_each_entry(nh, &rt6_nh_list, next) {
2912 		err = __ip6_ins_rt(nh->rt6_info, &cfg->fc_nlinfo, &nh->mxc);
2913 		/* nh->rt6_info is used or freed at this point, reset to NULL*/
2914 		nh->rt6_info = NULL;
2915 		if (err) {
2916 			if (replace && nhn)
2917 				ip6_print_replace_route_err(&rt6_nh_list);
2918 			err_nh = nh;
2919 			goto add_errout;
2920 		}
2921 
2922 		/* Because each route is added like a single route we remove
2923 		 * these flags after the first nexthop: if there is a collision,
2924 		 * we have already failed to add the first nexthop:
2925 		 * fib6_add_rt2node() has rejected it; when replacing, old
2926 		 * nexthops have been replaced by first new, the rest should
2927 		 * be added to it.
2928 		 */
2929 		cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
2930 						     NLM_F_REPLACE);
2931 		nhn++;
2932 	}
2933 
2934 	goto cleanup;
2935 
2936 add_errout:
2937 	/* Delete routes that were already added */
2938 	list_for_each_entry(nh, &rt6_nh_list, next) {
2939 		if (err_nh == nh)
2940 			break;
2941 		ip6_route_del(&nh->r_cfg);
2942 	}
2943 
2944 cleanup:
2945 	list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
2946 		if (nh->rt6_info)
2947 			dst_free(&nh->rt6_info->dst);
2948 		kfree(nh->mxc.mx);
2949 		list_del(&nh->next);
2950 		kfree(nh);
2951 	}
2952 
2953 	return err;
2954 }
2955 
2956 static int ip6_route_multipath_del(struct fib6_config *cfg)
2957 {
2958 	struct fib6_config r_cfg;
2959 	struct rtnexthop *rtnh;
2960 	int remaining;
2961 	int attrlen;
2962 	int err = 1, last_err = 0;
2963 
2964 	remaining = cfg->fc_mp_len;
2965 	rtnh = (struct rtnexthop *)cfg->fc_mp;
2966 
2967 	/* Parse a Multipath Entry */
2968 	while (rtnh_ok(rtnh, remaining)) {
2969 		memcpy(&r_cfg, cfg, sizeof(*cfg));
2970 		if (rtnh->rtnh_ifindex)
2971 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2972 
2973 		attrlen = rtnh_attrlen(rtnh);
2974 		if (attrlen > 0) {
2975 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2976 
2977 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2978 			if (nla) {
2979 				nla_memcpy(&r_cfg.fc_gateway, nla, 16);
2980 				r_cfg.fc_flags |= RTF_GATEWAY;
2981 			}
2982 		}
2983 		err = ip6_route_del(&r_cfg);
2984 		if (err)
2985 			last_err = err;
2986 
2987 		rtnh = rtnh_next(rtnh, &remaining);
2988 	}
2989 
2990 	return last_err;
2991 }
2992 
2993 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
2994 {
2995 	struct fib6_config cfg;
2996 	int err;
2997 
2998 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2999 	if (err < 0)
3000 		return err;
3001 
3002 	if (cfg.fc_mp)
3003 		return ip6_route_multipath_del(&cfg);
3004 	else
3005 		return ip6_route_del(&cfg);
3006 }
3007 
3008 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
3009 {
3010 	struct fib6_config cfg;
3011 	int err;
3012 
3013 	err = rtm_to_fib6_config(skb, nlh, &cfg);
3014 	if (err < 0)
3015 		return err;
3016 
3017 	if (cfg.fc_mp)
3018 		return ip6_route_multipath_add(&cfg);
3019 	else
3020 		return ip6_route_add(&cfg);
3021 }
3022 
3023 static inline size_t rt6_nlmsg_size(struct rt6_info *rt)
3024 {
3025 	return NLMSG_ALIGN(sizeof(struct rtmsg))
3026 	       + nla_total_size(16) /* RTA_SRC */
3027 	       + nla_total_size(16) /* RTA_DST */
3028 	       + nla_total_size(16) /* RTA_GATEWAY */
3029 	       + nla_total_size(16) /* RTA_PREFSRC */
3030 	       + nla_total_size(4) /* RTA_TABLE */
3031 	       + nla_total_size(4) /* RTA_IIF */
3032 	       + nla_total_size(4) /* RTA_OIF */
3033 	       + nla_total_size(4) /* RTA_PRIORITY */
3034 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
3035 	       + nla_total_size(sizeof(struct rta_cacheinfo))
3036 	       + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
3037 	       + nla_total_size(1) /* RTA_PREF */
3038 	       + lwtunnel_get_encap_size(rt->dst.lwtstate);
3039 }
3040 
3041 static int rt6_fill_node(struct net *net,
3042 			 struct sk_buff *skb, struct rt6_info *rt,
3043 			 struct in6_addr *dst, struct in6_addr *src,
3044 			 int iif, int type, u32 portid, u32 seq,
3045 			 int prefix, int nowait, unsigned int flags)
3046 {
3047 	u32 metrics[RTAX_MAX];
3048 	struct rtmsg *rtm;
3049 	struct nlmsghdr *nlh;
3050 	long expires;
3051 	u32 table;
3052 
3053 	if (prefix) {	/* user wants prefix routes only */
3054 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
3055 			/* success since this is not a prefix route */
3056 			return 1;
3057 		}
3058 	}
3059 
3060 	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
3061 	if (!nlh)
3062 		return -EMSGSIZE;
3063 
3064 	rtm = nlmsg_data(nlh);
3065 	rtm->rtm_family = AF_INET6;
3066 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
3067 	rtm->rtm_src_len = rt->rt6i_src.plen;
3068 	rtm->rtm_tos = 0;
3069 	if (rt->rt6i_table)
3070 		table = rt->rt6i_table->tb6_id;
3071 	else
3072 		table = RT6_TABLE_UNSPEC;
3073 	rtm->rtm_table = table;
3074 	if (nla_put_u32(skb, RTA_TABLE, table))
3075 		goto nla_put_failure;
3076 	if (rt->rt6i_flags & RTF_REJECT) {
3077 		switch (rt->dst.error) {
3078 		case -EINVAL:
3079 			rtm->rtm_type = RTN_BLACKHOLE;
3080 			break;
3081 		case -EACCES:
3082 			rtm->rtm_type = RTN_PROHIBIT;
3083 			break;
3084 		case -EAGAIN:
3085 			rtm->rtm_type = RTN_THROW;
3086 			break;
3087 		default:
3088 			rtm->rtm_type = RTN_UNREACHABLE;
3089 			break;
3090 		}
3091 	}
3092 	else if (rt->rt6i_flags & RTF_LOCAL)
3093 		rtm->rtm_type = RTN_LOCAL;
3094 	else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
3095 		rtm->rtm_type = RTN_LOCAL;
3096 	else
3097 		rtm->rtm_type = RTN_UNICAST;
3098 	rtm->rtm_flags = 0;
3099 	if (!netif_carrier_ok(rt->dst.dev)) {
3100 		rtm->rtm_flags |= RTNH_F_LINKDOWN;
3101 		if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
3102 			rtm->rtm_flags |= RTNH_F_DEAD;
3103 	}
3104 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
3105 	rtm->rtm_protocol = rt->rt6i_protocol;
3106 	if (rt->rt6i_flags & RTF_DYNAMIC)
3107 		rtm->rtm_protocol = RTPROT_REDIRECT;
3108 	else if (rt->rt6i_flags & RTF_ADDRCONF) {
3109 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
3110 			rtm->rtm_protocol = RTPROT_RA;
3111 		else
3112 			rtm->rtm_protocol = RTPROT_KERNEL;
3113 	}
3114 
3115 	if (rt->rt6i_flags & RTF_CACHE)
3116 		rtm->rtm_flags |= RTM_F_CLONED;
3117 
3118 	if (dst) {
3119 		if (nla_put_in6_addr(skb, RTA_DST, dst))
3120 			goto nla_put_failure;
3121 		rtm->rtm_dst_len = 128;
3122 	} else if (rtm->rtm_dst_len)
3123 		if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
3124 			goto nla_put_failure;
3125 #ifdef CONFIG_IPV6_SUBTREES
3126 	if (src) {
3127 		if (nla_put_in6_addr(skb, RTA_SRC, src))
3128 			goto nla_put_failure;
3129 		rtm->rtm_src_len = 128;
3130 	} else if (rtm->rtm_src_len &&
3131 		   nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
3132 		goto nla_put_failure;
3133 #endif
3134 	if (iif) {
3135 #ifdef CONFIG_IPV6_MROUTE
3136 		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
3137 			int err = ip6mr_get_route(net, skb, rtm, nowait);
3138 			if (err <= 0) {
3139 				if (!nowait) {
3140 					if (err == 0)
3141 						return 0;
3142 					goto nla_put_failure;
3143 				} else {
3144 					if (err == -EMSGSIZE)
3145 						goto nla_put_failure;
3146 				}
3147 			}
3148 		} else
3149 #endif
3150 			if (nla_put_u32(skb, RTA_IIF, iif))
3151 				goto nla_put_failure;
3152 	} else if (dst) {
3153 		struct in6_addr saddr_buf;
3154 		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
3155 		    nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3156 			goto nla_put_failure;
3157 	}
3158 
3159 	if (rt->rt6i_prefsrc.plen) {
3160 		struct in6_addr saddr_buf;
3161 		saddr_buf = rt->rt6i_prefsrc.addr;
3162 		if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3163 			goto nla_put_failure;
3164 	}
3165 
3166 	memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
3167 	if (rt->rt6i_pmtu)
3168 		metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
3169 	if (rtnetlink_put_metrics(skb, metrics) < 0)
3170 		goto nla_put_failure;
3171 
3172 	if (rt->rt6i_flags & RTF_GATEWAY) {
3173 		if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
3174 			goto nla_put_failure;
3175 	}
3176 
3177 	if (rt->dst.dev &&
3178 	    nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
3179 		goto nla_put_failure;
3180 	if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
3181 		goto nla_put_failure;
3182 
3183 	expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
3184 
3185 	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
3186 		goto nla_put_failure;
3187 
3188 	if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
3189 		goto nla_put_failure;
3190 
3191 	lwtunnel_fill_encap(skb, rt->dst.lwtstate);
3192 
3193 	nlmsg_end(skb, nlh);
3194 	return 0;
3195 
3196 nla_put_failure:
3197 	nlmsg_cancel(skb, nlh);
3198 	return -EMSGSIZE;
3199 }
3200 
3201 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
3202 {
3203 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
3204 	int prefix;
3205 
3206 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
3207 		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
3208 		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
3209 	} else
3210 		prefix = 0;
3211 
3212 	return rt6_fill_node(arg->net,
3213 		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
3214 		     NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
3215 		     prefix, 0, NLM_F_MULTI);
3216 }
3217 
3218 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
3219 {
3220 	struct net *net = sock_net(in_skb->sk);
3221 	struct nlattr *tb[RTA_MAX+1];
3222 	struct rt6_info *rt;
3223 	struct sk_buff *skb;
3224 	struct rtmsg *rtm;
3225 	struct flowi6 fl6;
3226 	int err, iif = 0, oif = 0;
3227 
3228 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
3229 	if (err < 0)
3230 		goto errout;
3231 
3232 	err = -EINVAL;
3233 	memset(&fl6, 0, sizeof(fl6));
3234 
3235 	if (tb[RTA_SRC]) {
3236 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
3237 			goto errout;
3238 
3239 		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
3240 	}
3241 
3242 	if (tb[RTA_DST]) {
3243 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
3244 			goto errout;
3245 
3246 		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
3247 	}
3248 
3249 	if (tb[RTA_IIF])
3250 		iif = nla_get_u32(tb[RTA_IIF]);
3251 
3252 	if (tb[RTA_OIF])
3253 		oif = nla_get_u32(tb[RTA_OIF]);
3254 
3255 	if (tb[RTA_MARK])
3256 		fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
3257 
3258 	if (iif) {
3259 		struct net_device *dev;
3260 		int flags = 0;
3261 
3262 		dev = __dev_get_by_index(net, iif);
3263 		if (!dev) {
3264 			err = -ENODEV;
3265 			goto errout;
3266 		}
3267 
3268 		fl6.flowi6_iif = iif;
3269 
3270 		if (!ipv6_addr_any(&fl6.saddr))
3271 			flags |= RT6_LOOKUP_F_HAS_SADDR;
3272 
3273 		rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
3274 							       flags);
3275 	} else {
3276 		fl6.flowi6_oif = oif;
3277 
3278 		rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
3279 	}
3280 
3281 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
3282 	if (!skb) {
3283 		ip6_rt_put(rt);
3284 		err = -ENOBUFS;
3285 		goto errout;
3286 	}
3287 
3288 	/* Reserve room for dummy headers, this skb can pass
3289 	   through good chunk of routing engine.
3290 	 */
3291 	skb_reset_mac_header(skb);
3292 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
3293 
3294 	skb_dst_set(skb, &rt->dst);
3295 
3296 	err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
3297 			    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
3298 			    nlh->nlmsg_seq, 0, 0, 0);
3299 	if (err < 0) {
3300 		kfree_skb(skb);
3301 		goto errout;
3302 	}
3303 
3304 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
3305 errout:
3306 	return err;
3307 }
3308 
3309 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
3310 		     unsigned int nlm_flags)
3311 {
3312 	struct sk_buff *skb;
3313 	struct net *net = info->nl_net;
3314 	u32 seq;
3315 	int err;
3316 
3317 	err = -ENOBUFS;
3318 	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3319 
3320 	skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3321 	if (!skb)
3322 		goto errout;
3323 
3324 	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
3325 				event, info->portid, seq, 0, 0, nlm_flags);
3326 	if (err < 0) {
3327 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
3328 		WARN_ON(err == -EMSGSIZE);
3329 		kfree_skb(skb);
3330 		goto errout;
3331 	}
3332 	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
3333 		    info->nlh, gfp_any());
3334 	return;
3335 errout:
3336 	if (err < 0)
3337 		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
3338 }
3339 
3340 static int ip6_route_dev_notify(struct notifier_block *this,
3341 				unsigned long event, void *ptr)
3342 {
3343 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
3344 	struct net *net = dev_net(dev);
3345 
3346 	if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
3347 		net->ipv6.ip6_null_entry->dst.dev = dev;
3348 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
3349 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3350 		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
3351 		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
3352 		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
3353 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
3354 #endif
3355 	}
3356 
3357 	return NOTIFY_OK;
3358 }
3359 
3360 /*
3361  *	/proc
3362  */
3363 
3364 #ifdef CONFIG_PROC_FS
3365 
3366 static const struct file_operations ipv6_route_proc_fops = {
3367 	.owner		= THIS_MODULE,
3368 	.open		= ipv6_route_open,
3369 	.read		= seq_read,
3370 	.llseek		= seq_lseek,
3371 	.release	= seq_release_net,
3372 };
3373 
3374 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
3375 {
3376 	struct net *net = (struct net *)seq->private;
3377 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
3378 		   net->ipv6.rt6_stats->fib_nodes,
3379 		   net->ipv6.rt6_stats->fib_route_nodes,
3380 		   net->ipv6.rt6_stats->fib_rt_alloc,
3381 		   net->ipv6.rt6_stats->fib_rt_entries,
3382 		   net->ipv6.rt6_stats->fib_rt_cache,
3383 		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
3384 		   net->ipv6.rt6_stats->fib_discarded_routes);
3385 
3386 	return 0;
3387 }
3388 
3389 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
3390 {
3391 	return single_open_net(inode, file, rt6_stats_seq_show);
3392 }
3393 
3394 static const struct file_operations rt6_stats_seq_fops = {
3395 	.owner	 = THIS_MODULE,
3396 	.open	 = rt6_stats_seq_open,
3397 	.read	 = seq_read,
3398 	.llseek	 = seq_lseek,
3399 	.release = single_release_net,
3400 };
3401 #endif	/* CONFIG_PROC_FS */
3402 
3403 #ifdef CONFIG_SYSCTL
3404 
3405 static
3406 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
3407 			      void __user *buffer, size_t *lenp, loff_t *ppos)
3408 {
3409 	struct net *net;
3410 	int delay;
3411 	if (!write)
3412 		return -EINVAL;
3413 
3414 	net = (struct net *)ctl->extra1;
3415 	delay = net->ipv6.sysctl.flush_delay;
3416 	proc_dointvec(ctl, write, buffer, lenp, ppos);
3417 	fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
3418 	return 0;
3419 }
3420 
3421 struct ctl_table ipv6_route_table_template[] = {
3422 	{
3423 		.procname	=	"flush",
3424 		.data		=	&init_net.ipv6.sysctl.flush_delay,
3425 		.maxlen		=	sizeof(int),
3426 		.mode		=	0200,
3427 		.proc_handler	=	ipv6_sysctl_rtcache_flush
3428 	},
3429 	{
3430 		.procname	=	"gc_thresh",
3431 		.data		=	&ip6_dst_ops_template.gc_thresh,
3432 		.maxlen		=	sizeof(int),
3433 		.mode		=	0644,
3434 		.proc_handler	=	proc_dointvec,
3435 	},
3436 	{
3437 		.procname	=	"max_size",
3438 		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
3439 		.maxlen		=	sizeof(int),
3440 		.mode		=	0644,
3441 		.proc_handler	=	proc_dointvec,
3442 	},
3443 	{
3444 		.procname	=	"gc_min_interval",
3445 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3446 		.maxlen		=	sizeof(int),
3447 		.mode		=	0644,
3448 		.proc_handler	=	proc_dointvec_jiffies,
3449 	},
3450 	{
3451 		.procname	=	"gc_timeout",
3452 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
3453 		.maxlen		=	sizeof(int),
3454 		.mode		=	0644,
3455 		.proc_handler	=	proc_dointvec_jiffies,
3456 	},
3457 	{
3458 		.procname	=	"gc_interval",
3459 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
3460 		.maxlen		=	sizeof(int),
3461 		.mode		=	0644,
3462 		.proc_handler	=	proc_dointvec_jiffies,
3463 	},
3464 	{
3465 		.procname	=	"gc_elasticity",
3466 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
3467 		.maxlen		=	sizeof(int),
3468 		.mode		=	0644,
3469 		.proc_handler	=	proc_dointvec,
3470 	},
3471 	{
3472 		.procname	=	"mtu_expires",
3473 		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
3474 		.maxlen		=	sizeof(int),
3475 		.mode		=	0644,
3476 		.proc_handler	=	proc_dointvec_jiffies,
3477 	},
3478 	{
3479 		.procname	=	"min_adv_mss",
3480 		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
3481 		.maxlen		=	sizeof(int),
3482 		.mode		=	0644,
3483 		.proc_handler	=	proc_dointvec,
3484 	},
3485 	{
3486 		.procname	=	"gc_min_interval_ms",
3487 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3488 		.maxlen		=	sizeof(int),
3489 		.mode		=	0644,
3490 		.proc_handler	=	proc_dointvec_ms_jiffies,
3491 	},
3492 	{ }
3493 };
3494 
3495 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
3496 {
3497 	struct ctl_table *table;
3498 
3499 	table = kmemdup(ipv6_route_table_template,
3500 			sizeof(ipv6_route_table_template),
3501 			GFP_KERNEL);
3502 
3503 	if (table) {
3504 		table[0].data = &net->ipv6.sysctl.flush_delay;
3505 		table[0].extra1 = net;
3506 		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
3507 		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3508 		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3509 		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3510 		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3511 		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3512 		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3513 		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
3514 		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3515 
3516 		/* Don't export sysctls to unprivileged users */
3517 		if (net->user_ns != &init_user_ns)
3518 			table[0].procname = NULL;
3519 	}
3520 
3521 	return table;
3522 }
3523 #endif
3524 
3525 static int __net_init ip6_route_net_init(struct net *net)
3526 {
3527 	int ret = -ENOMEM;
3528 
3529 	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3530 	       sizeof(net->ipv6.ip6_dst_ops));
3531 
3532 	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3533 		goto out_ip6_dst_ops;
3534 
3535 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3536 					   sizeof(*net->ipv6.ip6_null_entry),
3537 					   GFP_KERNEL);
3538 	if (!net->ipv6.ip6_null_entry)
3539 		goto out_ip6_dst_entries;
3540 	net->ipv6.ip6_null_entry->dst.path =
3541 		(struct dst_entry *)net->ipv6.ip6_null_entry;
3542 	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3543 	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3544 			 ip6_template_metrics, true);
3545 
3546 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3547 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3548 					       sizeof(*net->ipv6.ip6_prohibit_entry),
3549 					       GFP_KERNEL);
3550 	if (!net->ipv6.ip6_prohibit_entry)
3551 		goto out_ip6_null_entry;
3552 	net->ipv6.ip6_prohibit_entry->dst.path =
3553 		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3554 	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3555 	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3556 			 ip6_template_metrics, true);
3557 
3558 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3559 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
3560 					       GFP_KERNEL);
3561 	if (!net->ipv6.ip6_blk_hole_entry)
3562 		goto out_ip6_prohibit_entry;
3563 	net->ipv6.ip6_blk_hole_entry->dst.path =
3564 		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3565 	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3566 	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3567 			 ip6_template_metrics, true);
3568 #endif
3569 
3570 	net->ipv6.sysctl.flush_delay = 0;
3571 	net->ipv6.sysctl.ip6_rt_max_size = 4096;
3572 	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3573 	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3574 	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3575 	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3576 	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3577 	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3578 
3579 	net->ipv6.ip6_rt_gc_expire = 30*HZ;
3580 
3581 	ret = 0;
3582 out:
3583 	return ret;
3584 
3585 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3586 out_ip6_prohibit_entry:
3587 	kfree(net->ipv6.ip6_prohibit_entry);
3588 out_ip6_null_entry:
3589 	kfree(net->ipv6.ip6_null_entry);
3590 #endif
3591 out_ip6_dst_entries:
3592 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3593 out_ip6_dst_ops:
3594 	goto out;
3595 }
3596 
3597 static void __net_exit ip6_route_net_exit(struct net *net)
3598 {
3599 	kfree(net->ipv6.ip6_null_entry);
3600 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3601 	kfree(net->ipv6.ip6_prohibit_entry);
3602 	kfree(net->ipv6.ip6_blk_hole_entry);
3603 #endif
3604 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3605 }
3606 
3607 static int __net_init ip6_route_net_init_late(struct net *net)
3608 {
3609 #ifdef CONFIG_PROC_FS
3610 	proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3611 	proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3612 #endif
3613 	return 0;
3614 }
3615 
3616 static void __net_exit ip6_route_net_exit_late(struct net *net)
3617 {
3618 #ifdef CONFIG_PROC_FS
3619 	remove_proc_entry("ipv6_route", net->proc_net);
3620 	remove_proc_entry("rt6_stats", net->proc_net);
3621 #endif
3622 }
3623 
3624 static struct pernet_operations ip6_route_net_ops = {
3625 	.init = ip6_route_net_init,
3626 	.exit = ip6_route_net_exit,
3627 };
3628 
3629 static int __net_init ipv6_inetpeer_init(struct net *net)
3630 {
3631 	struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3632 
3633 	if (!bp)
3634 		return -ENOMEM;
3635 	inet_peer_base_init(bp);
3636 	net->ipv6.peers = bp;
3637 	return 0;
3638 }
3639 
3640 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3641 {
3642 	struct inet_peer_base *bp = net->ipv6.peers;
3643 
3644 	net->ipv6.peers = NULL;
3645 	inetpeer_invalidate_tree(bp);
3646 	kfree(bp);
3647 }
3648 
3649 static struct pernet_operations ipv6_inetpeer_ops = {
3650 	.init	=	ipv6_inetpeer_init,
3651 	.exit	=	ipv6_inetpeer_exit,
3652 };
3653 
3654 static struct pernet_operations ip6_route_net_late_ops = {
3655 	.init = ip6_route_net_init_late,
3656 	.exit = ip6_route_net_exit_late,
3657 };
3658 
3659 static struct notifier_block ip6_route_dev_notifier = {
3660 	.notifier_call = ip6_route_dev_notify,
3661 	.priority = 0,
3662 };
3663 
3664 int __init ip6_route_init(void)
3665 {
3666 	int ret;
3667 	int cpu;
3668 
3669 	ret = -ENOMEM;
3670 	ip6_dst_ops_template.kmem_cachep =
3671 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3672 				  SLAB_HWCACHE_ALIGN, NULL);
3673 	if (!ip6_dst_ops_template.kmem_cachep)
3674 		goto out;
3675 
3676 	ret = dst_entries_init(&ip6_dst_blackhole_ops);
3677 	if (ret)
3678 		goto out_kmem_cache;
3679 
3680 	ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3681 	if (ret)
3682 		goto out_dst_entries;
3683 
3684 	ret = register_pernet_subsys(&ip6_route_net_ops);
3685 	if (ret)
3686 		goto out_register_inetpeer;
3687 
3688 	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3689 
3690 	/* Registering of the loopback is done before this portion of code,
3691 	 * the loopback reference in rt6_info will not be taken, do it
3692 	 * manually for init_net */
3693 	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3694 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3695   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3696 	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3697 	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3698 	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3699 	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3700   #endif
3701 	ret = fib6_init();
3702 	if (ret)
3703 		goto out_register_subsys;
3704 
3705 	ret = xfrm6_init();
3706 	if (ret)
3707 		goto out_fib6_init;
3708 
3709 	ret = fib6_rules_init();
3710 	if (ret)
3711 		goto xfrm6_init;
3712 
3713 	ret = register_pernet_subsys(&ip6_route_net_late_ops);
3714 	if (ret)
3715 		goto fib6_rules_init;
3716 
3717 	ret = -ENOBUFS;
3718 	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3719 	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3720 	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3721 		goto out_register_late_subsys;
3722 
3723 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3724 	if (ret)
3725 		goto out_register_late_subsys;
3726 
3727 	for_each_possible_cpu(cpu) {
3728 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
3729 
3730 		INIT_LIST_HEAD(&ul->head);
3731 		spin_lock_init(&ul->lock);
3732 	}
3733 
3734 out:
3735 	return ret;
3736 
3737 out_register_late_subsys:
3738 	unregister_pernet_subsys(&ip6_route_net_late_ops);
3739 fib6_rules_init:
3740 	fib6_rules_cleanup();
3741 xfrm6_init:
3742 	xfrm6_fini();
3743 out_fib6_init:
3744 	fib6_gc_cleanup();
3745 out_register_subsys:
3746 	unregister_pernet_subsys(&ip6_route_net_ops);
3747 out_register_inetpeer:
3748 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3749 out_dst_entries:
3750 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3751 out_kmem_cache:
3752 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3753 	goto out;
3754 }
3755 
3756 void ip6_route_cleanup(void)
3757 {
3758 	unregister_netdevice_notifier(&ip6_route_dev_notifier);
3759 	unregister_pernet_subsys(&ip6_route_net_late_ops);
3760 	fib6_rules_cleanup();
3761 	xfrm6_fini();
3762 	fib6_gc_cleanup();
3763 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3764 	unregister_pernet_subsys(&ip6_route_net_ops);
3765 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3766 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3767 }
3768