xref: /openbmc/linux/net/ipv6/route.c (revision a2cce7a9)
1 /*
2  *	Linux INET6 implementation
3  *	FIB front-end.
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13 
14 /*	Changes:
15  *
16  *	YOSHIFUJI Hideaki @USAGI
17  *		reworked default router selection.
18  *		- respect outgoing interface
19  *		- select from (probably) reachable routers (i.e.
20  *		routers in REACHABLE, STALE, DELAY or PROBE states).
21  *		- always select the same router if it is (probably)
22  *		reachable.  otherwise, round-robin the list.
23  *	Ville Nuorvala
24  *		Fixed routing subtrees.
25  */
26 
27 #define pr_fmt(fmt) "IPv6: " fmt
28 
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/dst_metadata.h>
58 #include <net/xfrm.h>
59 #include <net/netevent.h>
60 #include <net/netlink.h>
61 #include <net/nexthop.h>
62 #include <net/lwtunnel.h>
63 #include <net/ip_tunnels.h>
64 
65 #include <asm/uaccess.h>
66 
67 #ifdef CONFIG_SYSCTL
68 #include <linux/sysctl.h>
69 #endif
70 
71 enum rt6_nud_state {
72 	RT6_NUD_FAIL_HARD = -3,
73 	RT6_NUD_FAIL_PROBE = -2,
74 	RT6_NUD_FAIL_DO_RR = -1,
75 	RT6_NUD_SUCCEED = 1
76 };
77 
78 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
79 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
80 static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
81 static unsigned int	 ip6_mtu(const struct dst_entry *dst);
82 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
83 static void		ip6_dst_destroy(struct dst_entry *);
84 static void		ip6_dst_ifdown(struct dst_entry *,
85 				       struct net_device *dev, int how);
86 static int		 ip6_dst_gc(struct dst_ops *ops);
87 
88 static int		ip6_pkt_discard(struct sk_buff *skb);
89 static int		ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb);
90 static int		ip6_pkt_prohibit(struct sk_buff *skb);
91 static int		ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb);
92 static void		ip6_link_failure(struct sk_buff *skb);
93 static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
94 					   struct sk_buff *skb, u32 mtu);
95 static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
96 					struct sk_buff *skb);
97 static void		rt6_dst_from_metrics_check(struct rt6_info *rt);
98 static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
99 
100 #ifdef CONFIG_IPV6_ROUTE_INFO
101 static struct rt6_info *rt6_add_route_info(struct net *net,
102 					   const struct in6_addr *prefix, int prefixlen,
103 					   const struct in6_addr *gwaddr, int ifindex,
104 					   unsigned int pref);
105 static struct rt6_info *rt6_get_route_info(struct net *net,
106 					   const struct in6_addr *prefix, int prefixlen,
107 					   const struct in6_addr *gwaddr, int ifindex);
108 #endif
109 
110 struct uncached_list {
111 	spinlock_t		lock;
112 	struct list_head	head;
113 };
114 
115 static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
116 
117 static void rt6_uncached_list_add(struct rt6_info *rt)
118 {
119 	struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
120 
121 	rt->dst.flags |= DST_NOCACHE;
122 	rt->rt6i_uncached_list = ul;
123 
124 	spin_lock_bh(&ul->lock);
125 	list_add_tail(&rt->rt6i_uncached, &ul->head);
126 	spin_unlock_bh(&ul->lock);
127 }
128 
129 static void rt6_uncached_list_del(struct rt6_info *rt)
130 {
131 	if (!list_empty(&rt->rt6i_uncached)) {
132 		struct uncached_list *ul = rt->rt6i_uncached_list;
133 
134 		spin_lock_bh(&ul->lock);
135 		list_del(&rt->rt6i_uncached);
136 		spin_unlock_bh(&ul->lock);
137 	}
138 }
139 
140 static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
141 {
142 	struct net_device *loopback_dev = net->loopback_dev;
143 	int cpu;
144 
145 	for_each_possible_cpu(cpu) {
146 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
147 		struct rt6_info *rt;
148 
149 		spin_lock_bh(&ul->lock);
150 		list_for_each_entry(rt, &ul->head, rt6i_uncached) {
151 			struct inet6_dev *rt_idev = rt->rt6i_idev;
152 			struct net_device *rt_dev = rt->dst.dev;
153 
154 			if (rt_idev && (rt_idev->dev == dev || !dev) &&
155 			    rt_idev->dev != loopback_dev) {
156 				rt->rt6i_idev = in6_dev_get(loopback_dev);
157 				in6_dev_put(rt_idev);
158 			}
159 
160 			if (rt_dev && (rt_dev == dev || !dev) &&
161 			    rt_dev != loopback_dev) {
162 				rt->dst.dev = loopback_dev;
163 				dev_hold(rt->dst.dev);
164 				dev_put(rt_dev);
165 			}
166 		}
167 		spin_unlock_bh(&ul->lock);
168 	}
169 }
170 
171 static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
172 {
173 	return dst_metrics_write_ptr(rt->dst.from);
174 }
175 
176 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
177 {
178 	struct rt6_info *rt = (struct rt6_info *)dst;
179 
180 	if (rt->rt6i_flags & RTF_PCPU)
181 		return rt6_pcpu_cow_metrics(rt);
182 	else if (rt->rt6i_flags & RTF_CACHE)
183 		return NULL;
184 	else
185 		return dst_cow_metrics_generic(dst, old);
186 }
187 
188 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
189 					     struct sk_buff *skb,
190 					     const void *daddr)
191 {
192 	struct in6_addr *p = &rt->rt6i_gateway;
193 
194 	if (!ipv6_addr_any(p))
195 		return (const void *) p;
196 	else if (skb)
197 		return &ipv6_hdr(skb)->daddr;
198 	return daddr;
199 }
200 
201 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
202 					  struct sk_buff *skb,
203 					  const void *daddr)
204 {
205 	struct rt6_info *rt = (struct rt6_info *) dst;
206 	struct neighbour *n;
207 
208 	daddr = choose_neigh_daddr(rt, skb, daddr);
209 	n = __ipv6_neigh_lookup(dst->dev, daddr);
210 	if (n)
211 		return n;
212 	return neigh_create(&nd_tbl, daddr, dst->dev);
213 }
214 
215 static struct dst_ops ip6_dst_ops_template = {
216 	.family			=	AF_INET6,
217 	.gc			=	ip6_dst_gc,
218 	.gc_thresh		=	1024,
219 	.check			=	ip6_dst_check,
220 	.default_advmss		=	ip6_default_advmss,
221 	.mtu			=	ip6_mtu,
222 	.cow_metrics		=	ipv6_cow_metrics,
223 	.destroy		=	ip6_dst_destroy,
224 	.ifdown			=	ip6_dst_ifdown,
225 	.negative_advice	=	ip6_negative_advice,
226 	.link_failure		=	ip6_link_failure,
227 	.update_pmtu		=	ip6_rt_update_pmtu,
228 	.redirect		=	rt6_do_redirect,
229 	.local_out		=	__ip6_local_out,
230 	.neigh_lookup		=	ip6_neigh_lookup,
231 };
232 
233 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
234 {
235 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
236 
237 	return mtu ? : dst->dev->mtu;
238 }
239 
240 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
241 					 struct sk_buff *skb, u32 mtu)
242 {
243 }
244 
245 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
246 				      struct sk_buff *skb)
247 {
248 }
249 
250 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
251 					 unsigned long old)
252 {
253 	return NULL;
254 }
255 
256 static struct dst_ops ip6_dst_blackhole_ops = {
257 	.family			=	AF_INET6,
258 	.destroy		=	ip6_dst_destroy,
259 	.check			=	ip6_dst_check,
260 	.mtu			=	ip6_blackhole_mtu,
261 	.default_advmss		=	ip6_default_advmss,
262 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
263 	.redirect		=	ip6_rt_blackhole_redirect,
264 	.cow_metrics		=	ip6_rt_blackhole_cow_metrics,
265 	.neigh_lookup		=	ip6_neigh_lookup,
266 };
267 
268 static const u32 ip6_template_metrics[RTAX_MAX] = {
269 	[RTAX_HOPLIMIT - 1] = 0,
270 };
271 
272 static const struct rt6_info ip6_null_entry_template = {
273 	.dst = {
274 		.__refcnt	= ATOMIC_INIT(1),
275 		.__use		= 1,
276 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
277 		.error		= -ENETUNREACH,
278 		.input		= ip6_pkt_discard,
279 		.output		= ip6_pkt_discard_out,
280 	},
281 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
282 	.rt6i_protocol  = RTPROT_KERNEL,
283 	.rt6i_metric	= ~(u32) 0,
284 	.rt6i_ref	= ATOMIC_INIT(1),
285 };
286 
287 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
288 
289 static const struct rt6_info ip6_prohibit_entry_template = {
290 	.dst = {
291 		.__refcnt	= ATOMIC_INIT(1),
292 		.__use		= 1,
293 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
294 		.error		= -EACCES,
295 		.input		= ip6_pkt_prohibit,
296 		.output		= ip6_pkt_prohibit_out,
297 	},
298 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
299 	.rt6i_protocol  = RTPROT_KERNEL,
300 	.rt6i_metric	= ~(u32) 0,
301 	.rt6i_ref	= ATOMIC_INIT(1),
302 };
303 
304 static const struct rt6_info ip6_blk_hole_entry_template = {
305 	.dst = {
306 		.__refcnt	= ATOMIC_INIT(1),
307 		.__use		= 1,
308 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
309 		.error		= -EINVAL,
310 		.input		= dst_discard,
311 		.output		= dst_discard_sk,
312 	},
313 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
314 	.rt6i_protocol  = RTPROT_KERNEL,
315 	.rt6i_metric	= ~(u32) 0,
316 	.rt6i_ref	= ATOMIC_INIT(1),
317 };
318 
319 #endif
320 
321 /* allocate dst with ip6_dst_ops */
322 static struct rt6_info *__ip6_dst_alloc(struct net *net,
323 					struct net_device *dev,
324 					int flags)
325 {
326 	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
327 					0, DST_OBSOLETE_FORCE_CHK, flags);
328 
329 	if (rt) {
330 		struct dst_entry *dst = &rt->dst;
331 
332 		memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
333 		INIT_LIST_HEAD(&rt->rt6i_siblings);
334 		INIT_LIST_HEAD(&rt->rt6i_uncached);
335 	}
336 	return rt;
337 }
338 
339 static struct rt6_info *ip6_dst_alloc(struct net *net,
340 				      struct net_device *dev,
341 				      int flags)
342 {
343 	struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
344 
345 	if (rt) {
346 		rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
347 		if (rt->rt6i_pcpu) {
348 			int cpu;
349 
350 			for_each_possible_cpu(cpu) {
351 				struct rt6_info **p;
352 
353 				p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
354 				/* no one shares rt */
355 				*p =  NULL;
356 			}
357 		} else {
358 			dst_destroy((struct dst_entry *)rt);
359 			return NULL;
360 		}
361 	}
362 
363 	return rt;
364 }
365 
366 static void ip6_dst_destroy(struct dst_entry *dst)
367 {
368 	struct rt6_info *rt = (struct rt6_info *)dst;
369 	struct dst_entry *from = dst->from;
370 	struct inet6_dev *idev;
371 
372 	dst_destroy_metrics_generic(dst);
373 	free_percpu(rt->rt6i_pcpu);
374 	rt6_uncached_list_del(rt);
375 
376 	idev = rt->rt6i_idev;
377 	if (idev) {
378 		rt->rt6i_idev = NULL;
379 		in6_dev_put(idev);
380 	}
381 
382 	dst->from = NULL;
383 	dst_release(from);
384 }
385 
386 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
387 			   int how)
388 {
389 	struct rt6_info *rt = (struct rt6_info *)dst;
390 	struct inet6_dev *idev = rt->rt6i_idev;
391 	struct net_device *loopback_dev =
392 		dev_net(dev)->loopback_dev;
393 
394 	if (dev != loopback_dev) {
395 		if (idev && idev->dev == dev) {
396 			struct inet6_dev *loopback_idev =
397 				in6_dev_get(loopback_dev);
398 			if (loopback_idev) {
399 				rt->rt6i_idev = loopback_idev;
400 				in6_dev_put(idev);
401 			}
402 		}
403 	}
404 }
405 
406 static bool rt6_check_expired(const struct rt6_info *rt)
407 {
408 	if (rt->rt6i_flags & RTF_EXPIRES) {
409 		if (time_after(jiffies, rt->dst.expires))
410 			return true;
411 	} else if (rt->dst.from) {
412 		return rt6_check_expired((struct rt6_info *) rt->dst.from);
413 	}
414 	return false;
415 }
416 
417 /* Multipath route selection:
418  *   Hash based function using packet header and flowlabel.
419  * Adapted from fib_info_hashfn()
420  */
421 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
422 			       const struct flowi6 *fl6)
423 {
424 	unsigned int val = fl6->flowi6_proto;
425 
426 	val ^= ipv6_addr_hash(&fl6->daddr);
427 	val ^= ipv6_addr_hash(&fl6->saddr);
428 
429 	/* Work only if this not encapsulated */
430 	switch (fl6->flowi6_proto) {
431 	case IPPROTO_UDP:
432 	case IPPROTO_TCP:
433 	case IPPROTO_SCTP:
434 		val ^= (__force u16)fl6->fl6_sport;
435 		val ^= (__force u16)fl6->fl6_dport;
436 		break;
437 
438 	case IPPROTO_ICMPV6:
439 		val ^= (__force u16)fl6->fl6_icmp_type;
440 		val ^= (__force u16)fl6->fl6_icmp_code;
441 		break;
442 	}
443 	/* RFC6438 recommands to use flowlabel */
444 	val ^= (__force u32)fl6->flowlabel;
445 
446 	/* Perhaps, we need to tune, this function? */
447 	val = val ^ (val >> 7) ^ (val >> 12);
448 	return val % candidate_count;
449 }
450 
451 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
452 					     struct flowi6 *fl6, int oif,
453 					     int strict)
454 {
455 	struct rt6_info *sibling, *next_sibling;
456 	int route_choosen;
457 
458 	route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
459 	/* Don't change the route, if route_choosen == 0
460 	 * (siblings does not include ourself)
461 	 */
462 	if (route_choosen)
463 		list_for_each_entry_safe(sibling, next_sibling,
464 				&match->rt6i_siblings, rt6i_siblings) {
465 			route_choosen--;
466 			if (route_choosen == 0) {
467 				if (rt6_score_route(sibling, oif, strict) < 0)
468 					break;
469 				match = sibling;
470 				break;
471 			}
472 		}
473 	return match;
474 }
475 
476 /*
477  *	Route lookup. Any table->tb6_lock is implied.
478  */
479 
480 static inline struct rt6_info *rt6_device_match(struct net *net,
481 						    struct rt6_info *rt,
482 						    const struct in6_addr *saddr,
483 						    int oif,
484 						    int flags)
485 {
486 	struct rt6_info *local = NULL;
487 	struct rt6_info *sprt;
488 
489 	if (!oif && ipv6_addr_any(saddr))
490 		goto out;
491 
492 	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
493 		struct net_device *dev = sprt->dst.dev;
494 
495 		if (oif) {
496 			if (dev->ifindex == oif)
497 				return sprt;
498 			if (dev->flags & IFF_LOOPBACK) {
499 				if (!sprt->rt6i_idev ||
500 				    sprt->rt6i_idev->dev->ifindex != oif) {
501 					if (flags & RT6_LOOKUP_F_IFACE && oif)
502 						continue;
503 					if (local && (!oif ||
504 						      local->rt6i_idev->dev->ifindex == oif))
505 						continue;
506 				}
507 				local = sprt;
508 			}
509 		} else {
510 			if (ipv6_chk_addr(net, saddr, dev,
511 					  flags & RT6_LOOKUP_F_IFACE))
512 				return sprt;
513 		}
514 	}
515 
516 	if (oif) {
517 		if (local)
518 			return local;
519 
520 		if (flags & RT6_LOOKUP_F_IFACE)
521 			return net->ipv6.ip6_null_entry;
522 	}
523 out:
524 	return rt;
525 }
526 
527 #ifdef CONFIG_IPV6_ROUTER_PREF
528 struct __rt6_probe_work {
529 	struct work_struct work;
530 	struct in6_addr target;
531 	struct net_device *dev;
532 };
533 
534 static void rt6_probe_deferred(struct work_struct *w)
535 {
536 	struct in6_addr mcaddr;
537 	struct __rt6_probe_work *work =
538 		container_of(w, struct __rt6_probe_work, work);
539 
540 	addrconf_addr_solict_mult(&work->target, &mcaddr);
541 	ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL, NULL);
542 	dev_put(work->dev);
543 	kfree(work);
544 }
545 
546 static void rt6_probe(struct rt6_info *rt)
547 {
548 	struct __rt6_probe_work *work;
549 	struct neighbour *neigh;
550 	/*
551 	 * Okay, this does not seem to be appropriate
552 	 * for now, however, we need to check if it
553 	 * is really so; aka Router Reachability Probing.
554 	 *
555 	 * Router Reachability Probe MUST be rate-limited
556 	 * to no more than one per minute.
557 	 */
558 	if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
559 		return;
560 	rcu_read_lock_bh();
561 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
562 	if (neigh) {
563 		if (neigh->nud_state & NUD_VALID)
564 			goto out;
565 
566 		work = NULL;
567 		write_lock(&neigh->lock);
568 		if (!(neigh->nud_state & NUD_VALID) &&
569 		    time_after(jiffies,
570 			       neigh->updated +
571 			       rt->rt6i_idev->cnf.rtr_probe_interval)) {
572 			work = kmalloc(sizeof(*work), GFP_ATOMIC);
573 			if (work)
574 				__neigh_set_probe_once(neigh);
575 		}
576 		write_unlock(&neigh->lock);
577 	} else {
578 		work = kmalloc(sizeof(*work), GFP_ATOMIC);
579 	}
580 
581 	if (work) {
582 		INIT_WORK(&work->work, rt6_probe_deferred);
583 		work->target = rt->rt6i_gateway;
584 		dev_hold(rt->dst.dev);
585 		work->dev = rt->dst.dev;
586 		schedule_work(&work->work);
587 	}
588 
589 out:
590 	rcu_read_unlock_bh();
591 }
592 #else
593 static inline void rt6_probe(struct rt6_info *rt)
594 {
595 }
596 #endif
597 
598 /*
599  * Default Router Selection (RFC 2461 6.3.6)
600  */
601 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
602 {
603 	struct net_device *dev = rt->dst.dev;
604 	if (!oif || dev->ifindex == oif)
605 		return 2;
606 	if ((dev->flags & IFF_LOOPBACK) &&
607 	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
608 		return 1;
609 	return 0;
610 }
611 
612 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
613 {
614 	struct neighbour *neigh;
615 	enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
616 
617 	if (rt->rt6i_flags & RTF_NONEXTHOP ||
618 	    !(rt->rt6i_flags & RTF_GATEWAY))
619 		return RT6_NUD_SUCCEED;
620 
621 	rcu_read_lock_bh();
622 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
623 	if (neigh) {
624 		read_lock(&neigh->lock);
625 		if (neigh->nud_state & NUD_VALID)
626 			ret = RT6_NUD_SUCCEED;
627 #ifdef CONFIG_IPV6_ROUTER_PREF
628 		else if (!(neigh->nud_state & NUD_FAILED))
629 			ret = RT6_NUD_SUCCEED;
630 		else
631 			ret = RT6_NUD_FAIL_PROBE;
632 #endif
633 		read_unlock(&neigh->lock);
634 	} else {
635 		ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
636 		      RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
637 	}
638 	rcu_read_unlock_bh();
639 
640 	return ret;
641 }
642 
643 static int rt6_score_route(struct rt6_info *rt, int oif,
644 			   int strict)
645 {
646 	int m;
647 
648 	m = rt6_check_dev(rt, oif);
649 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
650 		return RT6_NUD_FAIL_HARD;
651 #ifdef CONFIG_IPV6_ROUTER_PREF
652 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
653 #endif
654 	if (strict & RT6_LOOKUP_F_REACHABLE) {
655 		int n = rt6_check_neigh(rt);
656 		if (n < 0)
657 			return n;
658 	}
659 	return m;
660 }
661 
662 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
663 				   int *mpri, struct rt6_info *match,
664 				   bool *do_rr)
665 {
666 	int m;
667 	bool match_do_rr = false;
668 	struct inet6_dev *idev = rt->rt6i_idev;
669 	struct net_device *dev = rt->dst.dev;
670 
671 	if (dev && !netif_carrier_ok(dev) &&
672 	    idev->cnf.ignore_routes_with_linkdown)
673 		goto out;
674 
675 	if (rt6_check_expired(rt))
676 		goto out;
677 
678 	m = rt6_score_route(rt, oif, strict);
679 	if (m == RT6_NUD_FAIL_DO_RR) {
680 		match_do_rr = true;
681 		m = 0; /* lowest valid score */
682 	} else if (m == RT6_NUD_FAIL_HARD) {
683 		goto out;
684 	}
685 
686 	if (strict & RT6_LOOKUP_F_REACHABLE)
687 		rt6_probe(rt);
688 
689 	/* note that m can be RT6_NUD_FAIL_PROBE at this point */
690 	if (m > *mpri) {
691 		*do_rr = match_do_rr;
692 		*mpri = m;
693 		match = rt;
694 	}
695 out:
696 	return match;
697 }
698 
699 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
700 				     struct rt6_info *rr_head,
701 				     u32 metric, int oif, int strict,
702 				     bool *do_rr)
703 {
704 	struct rt6_info *rt, *match, *cont;
705 	int mpri = -1;
706 
707 	match = NULL;
708 	cont = NULL;
709 	for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
710 		if (rt->rt6i_metric != metric) {
711 			cont = rt;
712 			break;
713 		}
714 
715 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
716 	}
717 
718 	for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
719 		if (rt->rt6i_metric != metric) {
720 			cont = rt;
721 			break;
722 		}
723 
724 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
725 	}
726 
727 	if (match || !cont)
728 		return match;
729 
730 	for (rt = cont; rt; rt = rt->dst.rt6_next)
731 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
732 
733 	return match;
734 }
735 
736 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
737 {
738 	struct rt6_info *match, *rt0;
739 	struct net *net;
740 	bool do_rr = false;
741 
742 	rt0 = fn->rr_ptr;
743 	if (!rt0)
744 		fn->rr_ptr = rt0 = fn->leaf;
745 
746 	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
747 			     &do_rr);
748 
749 	if (do_rr) {
750 		struct rt6_info *next = rt0->dst.rt6_next;
751 
752 		/* no entries matched; do round-robin */
753 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
754 			next = fn->leaf;
755 
756 		if (next != rt0)
757 			fn->rr_ptr = next;
758 	}
759 
760 	net = dev_net(rt0->dst.dev);
761 	return match ? match : net->ipv6.ip6_null_entry;
762 }
763 
764 static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
765 {
766 	return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
767 }
768 
769 #ifdef CONFIG_IPV6_ROUTE_INFO
770 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
771 		  const struct in6_addr *gwaddr)
772 {
773 	struct net *net = dev_net(dev);
774 	struct route_info *rinfo = (struct route_info *) opt;
775 	struct in6_addr prefix_buf, *prefix;
776 	unsigned int pref;
777 	unsigned long lifetime;
778 	struct rt6_info *rt;
779 
780 	if (len < sizeof(struct route_info)) {
781 		return -EINVAL;
782 	}
783 
784 	/* Sanity check for prefix_len and length */
785 	if (rinfo->length > 3) {
786 		return -EINVAL;
787 	} else if (rinfo->prefix_len > 128) {
788 		return -EINVAL;
789 	} else if (rinfo->prefix_len > 64) {
790 		if (rinfo->length < 2) {
791 			return -EINVAL;
792 		}
793 	} else if (rinfo->prefix_len > 0) {
794 		if (rinfo->length < 1) {
795 			return -EINVAL;
796 		}
797 	}
798 
799 	pref = rinfo->route_pref;
800 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
801 		return -EINVAL;
802 
803 	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
804 
805 	if (rinfo->length == 3)
806 		prefix = (struct in6_addr *)rinfo->prefix;
807 	else {
808 		/* this function is safe */
809 		ipv6_addr_prefix(&prefix_buf,
810 				 (struct in6_addr *)rinfo->prefix,
811 				 rinfo->prefix_len);
812 		prefix = &prefix_buf;
813 	}
814 
815 	if (rinfo->prefix_len == 0)
816 		rt = rt6_get_dflt_router(gwaddr, dev);
817 	else
818 		rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
819 					gwaddr, dev->ifindex);
820 
821 	if (rt && !lifetime) {
822 		ip6_del_rt(rt);
823 		rt = NULL;
824 	}
825 
826 	if (!rt && lifetime)
827 		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
828 					pref);
829 	else if (rt)
830 		rt->rt6i_flags = RTF_ROUTEINFO |
831 				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
832 
833 	if (rt) {
834 		if (!addrconf_finite_timeout(lifetime))
835 			rt6_clean_expires(rt);
836 		else
837 			rt6_set_expires(rt, jiffies + HZ * lifetime);
838 
839 		ip6_rt_put(rt);
840 	}
841 	return 0;
842 }
843 #endif
844 
845 static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
846 					struct in6_addr *saddr)
847 {
848 	struct fib6_node *pn;
849 	while (1) {
850 		if (fn->fn_flags & RTN_TL_ROOT)
851 			return NULL;
852 		pn = fn->parent;
853 		if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
854 			fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
855 		else
856 			fn = pn;
857 		if (fn->fn_flags & RTN_RTINFO)
858 			return fn;
859 	}
860 }
861 
862 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
863 					     struct fib6_table *table,
864 					     struct flowi6 *fl6, int flags)
865 {
866 	struct fib6_node *fn;
867 	struct rt6_info *rt;
868 
869 	read_lock_bh(&table->tb6_lock);
870 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
871 restart:
872 	rt = fn->leaf;
873 	rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
874 	if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
875 		rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
876 	if (rt == net->ipv6.ip6_null_entry) {
877 		fn = fib6_backtrack(fn, &fl6->saddr);
878 		if (fn)
879 			goto restart;
880 	}
881 	dst_use(&rt->dst, jiffies);
882 	read_unlock_bh(&table->tb6_lock);
883 	return rt;
884 
885 }
886 
887 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
888 				    int flags)
889 {
890 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
891 }
892 EXPORT_SYMBOL_GPL(ip6_route_lookup);
893 
894 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
895 			    const struct in6_addr *saddr, int oif, int strict)
896 {
897 	struct flowi6 fl6 = {
898 		.flowi6_oif = oif,
899 		.daddr = *daddr,
900 	};
901 	struct dst_entry *dst;
902 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
903 
904 	if (saddr) {
905 		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
906 		flags |= RT6_LOOKUP_F_HAS_SADDR;
907 	}
908 
909 	dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
910 	if (dst->error == 0)
911 		return (struct rt6_info *) dst;
912 
913 	dst_release(dst);
914 
915 	return NULL;
916 }
917 EXPORT_SYMBOL(rt6_lookup);
918 
919 /* ip6_ins_rt is called with FREE table->tb6_lock.
920    It takes new route entry, the addition fails by any reason the
921    route is freed. In any case, if caller does not hold it, it may
922    be destroyed.
923  */
924 
925 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
926 			struct mx6_config *mxc)
927 {
928 	int err;
929 	struct fib6_table *table;
930 
931 	table = rt->rt6i_table;
932 	write_lock_bh(&table->tb6_lock);
933 	err = fib6_add(&table->tb6_root, rt, info, mxc);
934 	write_unlock_bh(&table->tb6_lock);
935 
936 	return err;
937 }
938 
939 int ip6_ins_rt(struct rt6_info *rt)
940 {
941 	struct nl_info info = {	.nl_net = dev_net(rt->dst.dev), };
942 	struct mx6_config mxc = { .mx = NULL, };
943 
944 	return __ip6_ins_rt(rt, &info, &mxc);
945 }
946 
947 static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
948 					   const struct in6_addr *daddr,
949 					   const struct in6_addr *saddr)
950 {
951 	struct rt6_info *rt;
952 
953 	/*
954 	 *	Clone the route.
955 	 */
956 
957 	if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
958 		ort = (struct rt6_info *)ort->dst.from;
959 
960 	rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0);
961 
962 	if (!rt)
963 		return NULL;
964 
965 	ip6_rt_copy_init(rt, ort);
966 	rt->rt6i_flags |= RTF_CACHE;
967 	rt->rt6i_metric = 0;
968 	rt->dst.flags |= DST_HOST;
969 	rt->rt6i_dst.addr = *daddr;
970 	rt->rt6i_dst.plen = 128;
971 
972 	if (!rt6_is_gw_or_nonexthop(ort)) {
973 		if (ort->rt6i_dst.plen != 128 &&
974 		    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
975 			rt->rt6i_flags |= RTF_ANYCAST;
976 #ifdef CONFIG_IPV6_SUBTREES
977 		if (rt->rt6i_src.plen && saddr) {
978 			rt->rt6i_src.addr = *saddr;
979 			rt->rt6i_src.plen = 128;
980 		}
981 #endif
982 	}
983 
984 	return rt;
985 }
986 
987 static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
988 {
989 	struct rt6_info *pcpu_rt;
990 
991 	pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
992 				  rt->dst.dev, rt->dst.flags);
993 
994 	if (!pcpu_rt)
995 		return NULL;
996 	ip6_rt_copy_init(pcpu_rt, rt);
997 	pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
998 	pcpu_rt->rt6i_flags |= RTF_PCPU;
999 	return pcpu_rt;
1000 }
1001 
1002 /* It should be called with read_lock_bh(&tb6_lock) acquired */
1003 static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
1004 {
1005 	struct rt6_info *pcpu_rt, **p;
1006 
1007 	p = this_cpu_ptr(rt->rt6i_pcpu);
1008 	pcpu_rt = *p;
1009 
1010 	if (pcpu_rt) {
1011 		dst_hold(&pcpu_rt->dst);
1012 		rt6_dst_from_metrics_check(pcpu_rt);
1013 	}
1014 	return pcpu_rt;
1015 }
1016 
1017 static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
1018 {
1019 	struct fib6_table *table = rt->rt6i_table;
1020 	struct rt6_info *pcpu_rt, *prev, **p;
1021 
1022 	pcpu_rt = ip6_rt_pcpu_alloc(rt);
1023 	if (!pcpu_rt) {
1024 		struct net *net = dev_net(rt->dst.dev);
1025 
1026 		dst_hold(&net->ipv6.ip6_null_entry->dst);
1027 		return net->ipv6.ip6_null_entry;
1028 	}
1029 
1030 	read_lock_bh(&table->tb6_lock);
1031 	if (rt->rt6i_pcpu) {
1032 		p = this_cpu_ptr(rt->rt6i_pcpu);
1033 		prev = cmpxchg(p, NULL, pcpu_rt);
1034 		if (prev) {
1035 			/* If someone did it before us, return prev instead */
1036 			dst_destroy(&pcpu_rt->dst);
1037 			pcpu_rt = prev;
1038 		}
1039 	} else {
1040 		/* rt has been removed from the fib6 tree
1041 		 * before we have a chance to acquire the read_lock.
1042 		 * In this case, don't brother to create a pcpu rt
1043 		 * since rt is going away anyway.  The next
1044 		 * dst_check() will trigger a re-lookup.
1045 		 */
1046 		dst_destroy(&pcpu_rt->dst);
1047 		pcpu_rt = rt;
1048 	}
1049 	dst_hold(&pcpu_rt->dst);
1050 	rt6_dst_from_metrics_check(pcpu_rt);
1051 	read_unlock_bh(&table->tb6_lock);
1052 	return pcpu_rt;
1053 }
1054 
1055 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
1056 				      struct flowi6 *fl6, int flags)
1057 {
1058 	struct fib6_node *fn, *saved_fn;
1059 	struct rt6_info *rt;
1060 	int strict = 0;
1061 
1062 	strict |= flags & RT6_LOOKUP_F_IFACE;
1063 	if (net->ipv6.devconf_all->forwarding == 0)
1064 		strict |= RT6_LOOKUP_F_REACHABLE;
1065 
1066 	read_lock_bh(&table->tb6_lock);
1067 
1068 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1069 	saved_fn = fn;
1070 
1071 redo_rt6_select:
1072 	rt = rt6_select(fn, oif, strict);
1073 	if (rt->rt6i_nsiblings)
1074 		rt = rt6_multipath_select(rt, fl6, oif, strict);
1075 	if (rt == net->ipv6.ip6_null_entry) {
1076 		fn = fib6_backtrack(fn, &fl6->saddr);
1077 		if (fn)
1078 			goto redo_rt6_select;
1079 		else if (strict & RT6_LOOKUP_F_REACHABLE) {
1080 			/* also consider unreachable route */
1081 			strict &= ~RT6_LOOKUP_F_REACHABLE;
1082 			fn = saved_fn;
1083 			goto redo_rt6_select;
1084 		}
1085 	}
1086 
1087 
1088 	if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
1089 		dst_use(&rt->dst, jiffies);
1090 		read_unlock_bh(&table->tb6_lock);
1091 
1092 		rt6_dst_from_metrics_check(rt);
1093 		return rt;
1094 	} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1095 			    !(rt->rt6i_flags & RTF_GATEWAY))) {
1096 		/* Create a RTF_CACHE clone which will not be
1097 		 * owned by the fib6 tree.  It is for the special case where
1098 		 * the daddr in the skb during the neighbor look-up is different
1099 		 * from the fl6->daddr used to look-up route here.
1100 		 */
1101 
1102 		struct rt6_info *uncached_rt;
1103 
1104 		dst_use(&rt->dst, jiffies);
1105 		read_unlock_bh(&table->tb6_lock);
1106 
1107 		uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1108 		dst_release(&rt->dst);
1109 
1110 		if (uncached_rt)
1111 			rt6_uncached_list_add(uncached_rt);
1112 		else
1113 			uncached_rt = net->ipv6.ip6_null_entry;
1114 
1115 		dst_hold(&uncached_rt->dst);
1116 		return uncached_rt;
1117 
1118 	} else {
1119 		/* Get a percpu copy */
1120 
1121 		struct rt6_info *pcpu_rt;
1122 
1123 		rt->dst.lastuse = jiffies;
1124 		rt->dst.__use++;
1125 		pcpu_rt = rt6_get_pcpu_route(rt);
1126 
1127 		if (pcpu_rt) {
1128 			read_unlock_bh(&table->tb6_lock);
1129 		} else {
1130 			/* We have to do the read_unlock first
1131 			 * because rt6_make_pcpu_route() may trigger
1132 			 * ip6_dst_gc() which will take the write_lock.
1133 			 */
1134 			dst_hold(&rt->dst);
1135 			read_unlock_bh(&table->tb6_lock);
1136 			pcpu_rt = rt6_make_pcpu_route(rt);
1137 			dst_release(&rt->dst);
1138 		}
1139 
1140 		return pcpu_rt;
1141 
1142 	}
1143 }
1144 
1145 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
1146 					    struct flowi6 *fl6, int flags)
1147 {
1148 	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
1149 }
1150 
1151 static struct dst_entry *ip6_route_input_lookup(struct net *net,
1152 						struct net_device *dev,
1153 						struct flowi6 *fl6, int flags)
1154 {
1155 	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1156 		flags |= RT6_LOOKUP_F_IFACE;
1157 
1158 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1159 }
1160 
1161 void ip6_route_input(struct sk_buff *skb)
1162 {
1163 	const struct ipv6hdr *iph = ipv6_hdr(skb);
1164 	struct net *net = dev_net(skb->dev);
1165 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1166 	struct ip_tunnel_info *tun_info;
1167 	struct flowi6 fl6 = {
1168 		.flowi6_iif = skb->dev->ifindex,
1169 		.daddr = iph->daddr,
1170 		.saddr = iph->saddr,
1171 		.flowlabel = ip6_flowinfo(iph),
1172 		.flowi6_mark = skb->mark,
1173 		.flowi6_proto = iph->nexthdr,
1174 	};
1175 
1176 	tun_info = skb_tunnel_info(skb);
1177 	if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
1178 		fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
1179 	skb_dst_drop(skb);
1180 	skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1181 }
1182 
1183 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1184 					     struct flowi6 *fl6, int flags)
1185 {
1186 	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1187 }
1188 
1189 struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
1190 				    struct flowi6 *fl6)
1191 {
1192 	int flags = 0;
1193 
1194 	fl6->flowi6_iif = LOOPBACK_IFINDEX;
1195 
1196 	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
1197 	    fl6->flowi6_oif)
1198 		flags |= RT6_LOOKUP_F_IFACE;
1199 
1200 	if (!ipv6_addr_any(&fl6->saddr))
1201 		flags |= RT6_LOOKUP_F_HAS_SADDR;
1202 	else if (sk)
1203 		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1204 
1205 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1206 }
1207 EXPORT_SYMBOL(ip6_route_output);
1208 
1209 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1210 {
1211 	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1212 	struct dst_entry *new = NULL;
1213 
1214 	rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1215 	if (rt) {
1216 		new = &rt->dst;
1217 
1218 		memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
1219 
1220 		new->__use = 1;
1221 		new->input = dst_discard;
1222 		new->output = dst_discard_sk;
1223 
1224 		if (dst_metrics_read_only(&ort->dst))
1225 			new->_metrics = ort->dst._metrics;
1226 		else
1227 			dst_copy_metrics(new, &ort->dst);
1228 		rt->rt6i_idev = ort->rt6i_idev;
1229 		if (rt->rt6i_idev)
1230 			in6_dev_hold(rt->rt6i_idev);
1231 
1232 		rt->rt6i_gateway = ort->rt6i_gateway;
1233 		rt->rt6i_flags = ort->rt6i_flags;
1234 		rt->rt6i_metric = 0;
1235 
1236 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1237 #ifdef CONFIG_IPV6_SUBTREES
1238 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1239 #endif
1240 
1241 		dst_free(new);
1242 	}
1243 
1244 	dst_release(dst_orig);
1245 	return new ? new : ERR_PTR(-ENOMEM);
1246 }
1247 
1248 /*
1249  *	Destination cache support functions
1250  */
1251 
1252 static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1253 {
1254 	if (rt->dst.from &&
1255 	    dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1256 		dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1257 }
1258 
1259 static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1260 {
1261 	if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1262 		return NULL;
1263 
1264 	if (rt6_check_expired(rt))
1265 		return NULL;
1266 
1267 	return &rt->dst;
1268 }
1269 
1270 static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1271 {
1272 	if (rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1273 	    rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1274 		return &rt->dst;
1275 	else
1276 		return NULL;
1277 }
1278 
1279 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1280 {
1281 	struct rt6_info *rt;
1282 
1283 	rt = (struct rt6_info *) dst;
1284 
1285 	/* All IPV6 dsts are created with ->obsolete set to the value
1286 	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1287 	 * into this function always.
1288 	 */
1289 
1290 	rt6_dst_from_metrics_check(rt);
1291 
1292 	if ((rt->rt6i_flags & RTF_PCPU) || unlikely(dst->flags & DST_NOCACHE))
1293 		return rt6_dst_from_check(rt, cookie);
1294 	else
1295 		return rt6_check(rt, cookie);
1296 }
1297 
1298 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1299 {
1300 	struct rt6_info *rt = (struct rt6_info *) dst;
1301 
1302 	if (rt) {
1303 		if (rt->rt6i_flags & RTF_CACHE) {
1304 			if (rt6_check_expired(rt)) {
1305 				ip6_del_rt(rt);
1306 				dst = NULL;
1307 			}
1308 		} else {
1309 			dst_release(dst);
1310 			dst = NULL;
1311 		}
1312 	}
1313 	return dst;
1314 }
1315 
1316 static void ip6_link_failure(struct sk_buff *skb)
1317 {
1318 	struct rt6_info *rt;
1319 
1320 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1321 
1322 	rt = (struct rt6_info *) skb_dst(skb);
1323 	if (rt) {
1324 		if (rt->rt6i_flags & RTF_CACHE) {
1325 			dst_hold(&rt->dst);
1326 			ip6_del_rt(rt);
1327 		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1328 			rt->rt6i_node->fn_sernum = -1;
1329 		}
1330 	}
1331 }
1332 
1333 static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1334 {
1335 	struct net *net = dev_net(rt->dst.dev);
1336 
1337 	rt->rt6i_flags |= RTF_MODIFIED;
1338 	rt->rt6i_pmtu = mtu;
1339 	rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1340 }
1341 
1342 static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1343 				 const struct ipv6hdr *iph, u32 mtu)
1344 {
1345 	struct rt6_info *rt6 = (struct rt6_info *)dst;
1346 
1347 	if (rt6->rt6i_flags & RTF_LOCAL)
1348 		return;
1349 
1350 	dst_confirm(dst);
1351 	mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1352 	if (mtu >= dst_mtu(dst))
1353 		return;
1354 
1355 	if (rt6->rt6i_flags & RTF_CACHE) {
1356 		rt6_do_update_pmtu(rt6, mtu);
1357 	} else {
1358 		const struct in6_addr *daddr, *saddr;
1359 		struct rt6_info *nrt6;
1360 
1361 		if (iph) {
1362 			daddr = &iph->daddr;
1363 			saddr = &iph->saddr;
1364 		} else if (sk) {
1365 			daddr = &sk->sk_v6_daddr;
1366 			saddr = &inet6_sk(sk)->saddr;
1367 		} else {
1368 			return;
1369 		}
1370 		nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1371 		if (nrt6) {
1372 			rt6_do_update_pmtu(nrt6, mtu);
1373 
1374 			/* ip6_ins_rt(nrt6) will bump the
1375 			 * rt6->rt6i_node->fn_sernum
1376 			 * which will fail the next rt6_check() and
1377 			 * invalidate the sk->sk_dst_cache.
1378 			 */
1379 			ip6_ins_rt(nrt6);
1380 		}
1381 	}
1382 }
1383 
1384 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1385 			       struct sk_buff *skb, u32 mtu)
1386 {
1387 	__ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
1388 }
1389 
1390 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1391 		     int oif, u32 mark)
1392 {
1393 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1394 	struct dst_entry *dst;
1395 	struct flowi6 fl6;
1396 
1397 	memset(&fl6, 0, sizeof(fl6));
1398 	fl6.flowi6_oif = oif;
1399 	fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
1400 	fl6.daddr = iph->daddr;
1401 	fl6.saddr = iph->saddr;
1402 	fl6.flowlabel = ip6_flowinfo(iph);
1403 
1404 	dst = ip6_route_output(net, NULL, &fl6);
1405 	if (!dst->error)
1406 		__ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
1407 	dst_release(dst);
1408 }
1409 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1410 
1411 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1412 {
1413 	ip6_update_pmtu(skb, sock_net(sk), mtu,
1414 			sk->sk_bound_dev_if, sk->sk_mark);
1415 }
1416 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1417 
1418 /* Handle redirects */
1419 struct ip6rd_flowi {
1420 	struct flowi6 fl6;
1421 	struct in6_addr gateway;
1422 };
1423 
1424 static struct rt6_info *__ip6_route_redirect(struct net *net,
1425 					     struct fib6_table *table,
1426 					     struct flowi6 *fl6,
1427 					     int flags)
1428 {
1429 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1430 	struct rt6_info *rt;
1431 	struct fib6_node *fn;
1432 
1433 	/* Get the "current" route for this destination and
1434 	 * check if the redirect has come from approriate router.
1435 	 *
1436 	 * RFC 4861 specifies that redirects should only be
1437 	 * accepted if they come from the nexthop to the target.
1438 	 * Due to the way the routes are chosen, this notion
1439 	 * is a bit fuzzy and one might need to check all possible
1440 	 * routes.
1441 	 */
1442 
1443 	read_lock_bh(&table->tb6_lock);
1444 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1445 restart:
1446 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1447 		if (rt6_check_expired(rt))
1448 			continue;
1449 		if (rt->dst.error)
1450 			break;
1451 		if (!(rt->rt6i_flags & RTF_GATEWAY))
1452 			continue;
1453 		if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1454 			continue;
1455 		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1456 			continue;
1457 		break;
1458 	}
1459 
1460 	if (!rt)
1461 		rt = net->ipv6.ip6_null_entry;
1462 	else if (rt->dst.error) {
1463 		rt = net->ipv6.ip6_null_entry;
1464 		goto out;
1465 	}
1466 
1467 	if (rt == net->ipv6.ip6_null_entry) {
1468 		fn = fib6_backtrack(fn, &fl6->saddr);
1469 		if (fn)
1470 			goto restart;
1471 	}
1472 
1473 out:
1474 	dst_hold(&rt->dst);
1475 
1476 	read_unlock_bh(&table->tb6_lock);
1477 
1478 	return rt;
1479 };
1480 
1481 static struct dst_entry *ip6_route_redirect(struct net *net,
1482 					const struct flowi6 *fl6,
1483 					const struct in6_addr *gateway)
1484 {
1485 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1486 	struct ip6rd_flowi rdfl;
1487 
1488 	rdfl.fl6 = *fl6;
1489 	rdfl.gateway = *gateway;
1490 
1491 	return fib6_rule_lookup(net, &rdfl.fl6,
1492 				flags, __ip6_route_redirect);
1493 }
1494 
1495 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1496 {
1497 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1498 	struct dst_entry *dst;
1499 	struct flowi6 fl6;
1500 
1501 	memset(&fl6, 0, sizeof(fl6));
1502 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
1503 	fl6.flowi6_oif = oif;
1504 	fl6.flowi6_mark = mark;
1505 	fl6.daddr = iph->daddr;
1506 	fl6.saddr = iph->saddr;
1507 	fl6.flowlabel = ip6_flowinfo(iph);
1508 
1509 	dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1510 	rt6_do_redirect(dst, NULL, skb);
1511 	dst_release(dst);
1512 }
1513 EXPORT_SYMBOL_GPL(ip6_redirect);
1514 
1515 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1516 			    u32 mark)
1517 {
1518 	const struct ipv6hdr *iph = ipv6_hdr(skb);
1519 	const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1520 	struct dst_entry *dst;
1521 	struct flowi6 fl6;
1522 
1523 	memset(&fl6, 0, sizeof(fl6));
1524 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
1525 	fl6.flowi6_oif = oif;
1526 	fl6.flowi6_mark = mark;
1527 	fl6.daddr = msg->dest;
1528 	fl6.saddr = iph->daddr;
1529 
1530 	dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1531 	rt6_do_redirect(dst, NULL, skb);
1532 	dst_release(dst);
1533 }
1534 
1535 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1536 {
1537 	ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1538 }
1539 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1540 
1541 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1542 {
1543 	struct net_device *dev = dst->dev;
1544 	unsigned int mtu = dst_mtu(dst);
1545 	struct net *net = dev_net(dev);
1546 
1547 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1548 
1549 	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1550 		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1551 
1552 	/*
1553 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1554 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1555 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
1556 	 * rely only on pmtu discovery"
1557 	 */
1558 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1559 		mtu = IPV6_MAXPLEN;
1560 	return mtu;
1561 }
1562 
1563 static unsigned int ip6_mtu(const struct dst_entry *dst)
1564 {
1565 	const struct rt6_info *rt = (const struct rt6_info *)dst;
1566 	unsigned int mtu = rt->rt6i_pmtu;
1567 	struct inet6_dev *idev;
1568 
1569 	if (mtu)
1570 		goto out;
1571 
1572 	mtu = dst_metric_raw(dst, RTAX_MTU);
1573 	if (mtu)
1574 		goto out;
1575 
1576 	mtu = IPV6_MIN_MTU;
1577 
1578 	rcu_read_lock();
1579 	idev = __in6_dev_get(dst->dev);
1580 	if (idev)
1581 		mtu = idev->cnf.mtu6;
1582 	rcu_read_unlock();
1583 
1584 out:
1585 	return min_t(unsigned int, mtu, IP6_MAX_MTU);
1586 }
1587 
1588 static struct dst_entry *icmp6_dst_gc_list;
1589 static DEFINE_SPINLOCK(icmp6_dst_lock);
1590 
1591 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1592 				  struct flowi6 *fl6)
1593 {
1594 	struct dst_entry *dst;
1595 	struct rt6_info *rt;
1596 	struct inet6_dev *idev = in6_dev_get(dev);
1597 	struct net *net = dev_net(dev);
1598 
1599 	if (unlikely(!idev))
1600 		return ERR_PTR(-ENODEV);
1601 
1602 	rt = ip6_dst_alloc(net, dev, 0);
1603 	if (unlikely(!rt)) {
1604 		in6_dev_put(idev);
1605 		dst = ERR_PTR(-ENOMEM);
1606 		goto out;
1607 	}
1608 
1609 	rt->dst.flags |= DST_HOST;
1610 	rt->dst.output  = ip6_output;
1611 	atomic_set(&rt->dst.__refcnt, 1);
1612 	rt->rt6i_gateway  = fl6->daddr;
1613 	rt->rt6i_dst.addr = fl6->daddr;
1614 	rt->rt6i_dst.plen = 128;
1615 	rt->rt6i_idev     = idev;
1616 	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1617 
1618 	spin_lock_bh(&icmp6_dst_lock);
1619 	rt->dst.next = icmp6_dst_gc_list;
1620 	icmp6_dst_gc_list = &rt->dst;
1621 	spin_unlock_bh(&icmp6_dst_lock);
1622 
1623 	fib6_force_start_gc(net);
1624 
1625 	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1626 
1627 out:
1628 	return dst;
1629 }
1630 
1631 int icmp6_dst_gc(void)
1632 {
1633 	struct dst_entry *dst, **pprev;
1634 	int more = 0;
1635 
1636 	spin_lock_bh(&icmp6_dst_lock);
1637 	pprev = &icmp6_dst_gc_list;
1638 
1639 	while ((dst = *pprev) != NULL) {
1640 		if (!atomic_read(&dst->__refcnt)) {
1641 			*pprev = dst->next;
1642 			dst_free(dst);
1643 		} else {
1644 			pprev = &dst->next;
1645 			++more;
1646 		}
1647 	}
1648 
1649 	spin_unlock_bh(&icmp6_dst_lock);
1650 
1651 	return more;
1652 }
1653 
1654 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1655 			    void *arg)
1656 {
1657 	struct dst_entry *dst, **pprev;
1658 
1659 	spin_lock_bh(&icmp6_dst_lock);
1660 	pprev = &icmp6_dst_gc_list;
1661 	while ((dst = *pprev) != NULL) {
1662 		struct rt6_info *rt = (struct rt6_info *) dst;
1663 		if (func(rt, arg)) {
1664 			*pprev = dst->next;
1665 			dst_free(dst);
1666 		} else {
1667 			pprev = &dst->next;
1668 		}
1669 	}
1670 	spin_unlock_bh(&icmp6_dst_lock);
1671 }
1672 
1673 static int ip6_dst_gc(struct dst_ops *ops)
1674 {
1675 	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1676 	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1677 	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1678 	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1679 	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1680 	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1681 	int entries;
1682 
1683 	entries = dst_entries_get_fast(ops);
1684 	if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1685 	    entries <= rt_max_size)
1686 		goto out;
1687 
1688 	net->ipv6.ip6_rt_gc_expire++;
1689 	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
1690 	entries = dst_entries_get_slow(ops);
1691 	if (entries < ops->gc_thresh)
1692 		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1693 out:
1694 	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1695 	return entries > rt_max_size;
1696 }
1697 
1698 static int ip6_convert_metrics(struct mx6_config *mxc,
1699 			       const struct fib6_config *cfg)
1700 {
1701 	bool ecn_ca = false;
1702 	struct nlattr *nla;
1703 	int remaining;
1704 	u32 *mp;
1705 
1706 	if (!cfg->fc_mx)
1707 		return 0;
1708 
1709 	mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1710 	if (unlikely(!mp))
1711 		return -ENOMEM;
1712 
1713 	nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1714 		int type = nla_type(nla);
1715 		u32 val;
1716 
1717 		if (!type)
1718 			continue;
1719 		if (unlikely(type > RTAX_MAX))
1720 			goto err;
1721 
1722 		if (type == RTAX_CC_ALGO) {
1723 			char tmp[TCP_CA_NAME_MAX];
1724 
1725 			nla_strlcpy(tmp, nla, sizeof(tmp));
1726 			val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
1727 			if (val == TCP_CA_UNSPEC)
1728 				goto err;
1729 		} else {
1730 			val = nla_get_u32(nla);
1731 		}
1732 		if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
1733 			goto err;
1734 
1735 		mp[type - 1] = val;
1736 		__set_bit(type - 1, mxc->mx_valid);
1737 	}
1738 
1739 	if (ecn_ca) {
1740 		__set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
1741 		mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
1742 	}
1743 
1744 	mxc->mx = mp;
1745 	return 0;
1746  err:
1747 	kfree(mp);
1748 	return -EINVAL;
1749 }
1750 
1751 int ip6_route_info_create(struct fib6_config *cfg, struct rt6_info **rt_ret)
1752 {
1753 	int err;
1754 	struct net *net = cfg->fc_nlinfo.nl_net;
1755 	struct rt6_info *rt = NULL;
1756 	struct net_device *dev = NULL;
1757 	struct inet6_dev *idev = NULL;
1758 	struct fib6_table *table;
1759 	int addr_type;
1760 
1761 	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1762 		return -EINVAL;
1763 #ifndef CONFIG_IPV6_SUBTREES
1764 	if (cfg->fc_src_len)
1765 		return -EINVAL;
1766 #endif
1767 	if (cfg->fc_ifindex) {
1768 		err = -ENODEV;
1769 		dev = dev_get_by_index(net, cfg->fc_ifindex);
1770 		if (!dev)
1771 			goto out;
1772 		idev = in6_dev_get(dev);
1773 		if (!idev)
1774 			goto out;
1775 	}
1776 
1777 	if (cfg->fc_metric == 0)
1778 		cfg->fc_metric = IP6_RT_PRIO_USER;
1779 
1780 	err = -ENOBUFS;
1781 	if (cfg->fc_nlinfo.nlh &&
1782 	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1783 		table = fib6_get_table(net, cfg->fc_table);
1784 		if (!table) {
1785 			pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1786 			table = fib6_new_table(net, cfg->fc_table);
1787 		}
1788 	} else {
1789 		table = fib6_new_table(net, cfg->fc_table);
1790 	}
1791 
1792 	if (!table)
1793 		goto out;
1794 
1795 	rt = ip6_dst_alloc(net, NULL,
1796 			   (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1797 
1798 	if (!rt) {
1799 		err = -ENOMEM;
1800 		goto out;
1801 	}
1802 
1803 	if (cfg->fc_flags & RTF_EXPIRES)
1804 		rt6_set_expires(rt, jiffies +
1805 				clock_t_to_jiffies(cfg->fc_expires));
1806 	else
1807 		rt6_clean_expires(rt);
1808 
1809 	if (cfg->fc_protocol == RTPROT_UNSPEC)
1810 		cfg->fc_protocol = RTPROT_BOOT;
1811 	rt->rt6i_protocol = cfg->fc_protocol;
1812 
1813 	addr_type = ipv6_addr_type(&cfg->fc_dst);
1814 
1815 	if (addr_type & IPV6_ADDR_MULTICAST)
1816 		rt->dst.input = ip6_mc_input;
1817 	else if (cfg->fc_flags & RTF_LOCAL)
1818 		rt->dst.input = ip6_input;
1819 	else
1820 		rt->dst.input = ip6_forward;
1821 
1822 	rt->dst.output = ip6_output;
1823 
1824 	if (cfg->fc_encap) {
1825 		struct lwtunnel_state *lwtstate;
1826 
1827 		err = lwtunnel_build_state(dev, cfg->fc_encap_type,
1828 					   cfg->fc_encap, AF_INET6, cfg,
1829 					   &lwtstate);
1830 		if (err)
1831 			goto out;
1832 		rt->dst.lwtstate = lwtstate_get(lwtstate);
1833 		if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
1834 			rt->dst.lwtstate->orig_output = rt->dst.output;
1835 			rt->dst.output = lwtunnel_output;
1836 		}
1837 		if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
1838 			rt->dst.lwtstate->orig_input = rt->dst.input;
1839 			rt->dst.input = lwtunnel_input;
1840 		}
1841 	}
1842 
1843 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1844 	rt->rt6i_dst.plen = cfg->fc_dst_len;
1845 	if (rt->rt6i_dst.plen == 128)
1846 		rt->dst.flags |= DST_HOST;
1847 
1848 #ifdef CONFIG_IPV6_SUBTREES
1849 	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1850 	rt->rt6i_src.plen = cfg->fc_src_len;
1851 #endif
1852 
1853 	rt->rt6i_metric = cfg->fc_metric;
1854 
1855 	/* We cannot add true routes via loopback here,
1856 	   they would result in kernel looping; promote them to reject routes
1857 	 */
1858 	if ((cfg->fc_flags & RTF_REJECT) ||
1859 	    (dev && (dev->flags & IFF_LOOPBACK) &&
1860 	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
1861 	     !(cfg->fc_flags & RTF_LOCAL))) {
1862 		/* hold loopback dev/idev if we haven't done so. */
1863 		if (dev != net->loopback_dev) {
1864 			if (dev) {
1865 				dev_put(dev);
1866 				in6_dev_put(idev);
1867 			}
1868 			dev = net->loopback_dev;
1869 			dev_hold(dev);
1870 			idev = in6_dev_get(dev);
1871 			if (!idev) {
1872 				err = -ENODEV;
1873 				goto out;
1874 			}
1875 		}
1876 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1877 		switch (cfg->fc_type) {
1878 		case RTN_BLACKHOLE:
1879 			rt->dst.error = -EINVAL;
1880 			rt->dst.output = dst_discard_sk;
1881 			rt->dst.input = dst_discard;
1882 			break;
1883 		case RTN_PROHIBIT:
1884 			rt->dst.error = -EACCES;
1885 			rt->dst.output = ip6_pkt_prohibit_out;
1886 			rt->dst.input = ip6_pkt_prohibit;
1887 			break;
1888 		case RTN_THROW:
1889 		case RTN_UNREACHABLE:
1890 		default:
1891 			rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1892 					: (cfg->fc_type == RTN_UNREACHABLE)
1893 					? -EHOSTUNREACH : -ENETUNREACH;
1894 			rt->dst.output = ip6_pkt_discard_out;
1895 			rt->dst.input = ip6_pkt_discard;
1896 			break;
1897 		}
1898 		goto install_route;
1899 	}
1900 
1901 	if (cfg->fc_flags & RTF_GATEWAY) {
1902 		const struct in6_addr *gw_addr;
1903 		int gwa_type;
1904 
1905 		gw_addr = &cfg->fc_gateway;
1906 		gwa_type = ipv6_addr_type(gw_addr);
1907 
1908 		/* if gw_addr is local we will fail to detect this in case
1909 		 * address is still TENTATIVE (DAD in progress). rt6_lookup()
1910 		 * will return already-added prefix route via interface that
1911 		 * prefix route was assigned to, which might be non-loopback.
1912 		 */
1913 		err = -EINVAL;
1914 		if (ipv6_chk_addr_and_flags(net, gw_addr,
1915 					    gwa_type & IPV6_ADDR_LINKLOCAL ?
1916 					    dev : NULL, 0, 0))
1917 			goto out;
1918 
1919 		rt->rt6i_gateway = *gw_addr;
1920 
1921 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1922 			struct rt6_info *grt;
1923 
1924 			/* IPv6 strictly inhibits using not link-local
1925 			   addresses as nexthop address.
1926 			   Otherwise, router will not able to send redirects.
1927 			   It is very good, but in some (rare!) circumstances
1928 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
1929 			   some exceptions. --ANK
1930 			 */
1931 			if (!(gwa_type & IPV6_ADDR_UNICAST))
1932 				goto out;
1933 
1934 			grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1935 
1936 			err = -EHOSTUNREACH;
1937 			if (!grt)
1938 				goto out;
1939 			if (dev) {
1940 				if (dev != grt->dst.dev) {
1941 					ip6_rt_put(grt);
1942 					goto out;
1943 				}
1944 			} else {
1945 				dev = grt->dst.dev;
1946 				idev = grt->rt6i_idev;
1947 				dev_hold(dev);
1948 				in6_dev_hold(grt->rt6i_idev);
1949 			}
1950 			if (!(grt->rt6i_flags & RTF_GATEWAY))
1951 				err = 0;
1952 			ip6_rt_put(grt);
1953 
1954 			if (err)
1955 				goto out;
1956 		}
1957 		err = -EINVAL;
1958 		if (!dev || (dev->flags & IFF_LOOPBACK))
1959 			goto out;
1960 	}
1961 
1962 	err = -ENODEV;
1963 	if (!dev)
1964 		goto out;
1965 
1966 	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1967 		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1968 			err = -EINVAL;
1969 			goto out;
1970 		}
1971 		rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1972 		rt->rt6i_prefsrc.plen = 128;
1973 	} else
1974 		rt->rt6i_prefsrc.plen = 0;
1975 
1976 	rt->rt6i_flags = cfg->fc_flags;
1977 
1978 install_route:
1979 	rt->dst.dev = dev;
1980 	rt->rt6i_idev = idev;
1981 	rt->rt6i_table = table;
1982 
1983 	cfg->fc_nlinfo.nl_net = dev_net(dev);
1984 
1985 	*rt_ret = rt;
1986 
1987 	return 0;
1988 out:
1989 	if (dev)
1990 		dev_put(dev);
1991 	if (idev)
1992 		in6_dev_put(idev);
1993 	if (rt)
1994 		dst_free(&rt->dst);
1995 
1996 	*rt_ret = NULL;
1997 
1998 	return err;
1999 }
2000 
2001 int ip6_route_add(struct fib6_config *cfg)
2002 {
2003 	struct mx6_config mxc = { .mx = NULL, };
2004 	struct rt6_info *rt = NULL;
2005 	int err;
2006 
2007 	err = ip6_route_info_create(cfg, &rt);
2008 	if (err)
2009 		goto out;
2010 
2011 	err = ip6_convert_metrics(&mxc, cfg);
2012 	if (err)
2013 		goto out;
2014 
2015 	err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
2016 
2017 	kfree(mxc.mx);
2018 
2019 	return err;
2020 out:
2021 	if (rt)
2022 		dst_free(&rt->dst);
2023 
2024 	return err;
2025 }
2026 
2027 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
2028 {
2029 	int err;
2030 	struct fib6_table *table;
2031 	struct net *net = dev_net(rt->dst.dev);
2032 
2033 	if (rt == net->ipv6.ip6_null_entry ||
2034 	    rt->dst.flags & DST_NOCACHE) {
2035 		err = -ENOENT;
2036 		goto out;
2037 	}
2038 
2039 	table = rt->rt6i_table;
2040 	write_lock_bh(&table->tb6_lock);
2041 	err = fib6_del(rt, info);
2042 	write_unlock_bh(&table->tb6_lock);
2043 
2044 out:
2045 	ip6_rt_put(rt);
2046 	return err;
2047 }
2048 
2049 int ip6_del_rt(struct rt6_info *rt)
2050 {
2051 	struct nl_info info = {
2052 		.nl_net = dev_net(rt->dst.dev),
2053 	};
2054 	return __ip6_del_rt(rt, &info);
2055 }
2056 
2057 static int ip6_route_del(struct fib6_config *cfg)
2058 {
2059 	struct fib6_table *table;
2060 	struct fib6_node *fn;
2061 	struct rt6_info *rt;
2062 	int err = -ESRCH;
2063 
2064 	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
2065 	if (!table)
2066 		return err;
2067 
2068 	read_lock_bh(&table->tb6_lock);
2069 
2070 	fn = fib6_locate(&table->tb6_root,
2071 			 &cfg->fc_dst, cfg->fc_dst_len,
2072 			 &cfg->fc_src, cfg->fc_src_len);
2073 
2074 	if (fn) {
2075 		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2076 			if ((rt->rt6i_flags & RTF_CACHE) &&
2077 			    !(cfg->fc_flags & RTF_CACHE))
2078 				continue;
2079 			if (cfg->fc_ifindex &&
2080 			    (!rt->dst.dev ||
2081 			     rt->dst.dev->ifindex != cfg->fc_ifindex))
2082 				continue;
2083 			if (cfg->fc_flags & RTF_GATEWAY &&
2084 			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
2085 				continue;
2086 			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
2087 				continue;
2088 			dst_hold(&rt->dst);
2089 			read_unlock_bh(&table->tb6_lock);
2090 
2091 			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
2092 		}
2093 	}
2094 	read_unlock_bh(&table->tb6_lock);
2095 
2096 	return err;
2097 }
2098 
2099 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
2100 {
2101 	struct net *net = dev_net(skb->dev);
2102 	struct netevent_redirect netevent;
2103 	struct rt6_info *rt, *nrt = NULL;
2104 	struct ndisc_options ndopts;
2105 	struct inet6_dev *in6_dev;
2106 	struct neighbour *neigh;
2107 	struct rd_msg *msg;
2108 	int optlen, on_link;
2109 	u8 *lladdr;
2110 
2111 	optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
2112 	optlen -= sizeof(*msg);
2113 
2114 	if (optlen < 0) {
2115 		net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
2116 		return;
2117 	}
2118 
2119 	msg = (struct rd_msg *)icmp6_hdr(skb);
2120 
2121 	if (ipv6_addr_is_multicast(&msg->dest)) {
2122 		net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
2123 		return;
2124 	}
2125 
2126 	on_link = 0;
2127 	if (ipv6_addr_equal(&msg->dest, &msg->target)) {
2128 		on_link = 1;
2129 	} else if (ipv6_addr_type(&msg->target) !=
2130 		   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
2131 		net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
2132 		return;
2133 	}
2134 
2135 	in6_dev = __in6_dev_get(skb->dev);
2136 	if (!in6_dev)
2137 		return;
2138 	if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2139 		return;
2140 
2141 	/* RFC2461 8.1:
2142 	 *	The IP source address of the Redirect MUST be the same as the current
2143 	 *	first-hop router for the specified ICMP Destination Address.
2144 	 */
2145 
2146 	if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
2147 		net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2148 		return;
2149 	}
2150 
2151 	lladdr = NULL;
2152 	if (ndopts.nd_opts_tgt_lladdr) {
2153 		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2154 					     skb->dev);
2155 		if (!lladdr) {
2156 			net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2157 			return;
2158 		}
2159 	}
2160 
2161 	rt = (struct rt6_info *) dst;
2162 	if (rt == net->ipv6.ip6_null_entry) {
2163 		net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
2164 		return;
2165 	}
2166 
2167 	/* Redirect received -> path was valid.
2168 	 * Look, redirects are sent only in response to data packets,
2169 	 * so that this nexthop apparently is reachable. --ANK
2170 	 */
2171 	dst_confirm(&rt->dst);
2172 
2173 	neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
2174 	if (!neigh)
2175 		return;
2176 
2177 	/*
2178 	 *	We have finally decided to accept it.
2179 	 */
2180 
2181 	neigh_update(neigh, lladdr, NUD_STALE,
2182 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
2183 		     NEIGH_UPDATE_F_OVERRIDE|
2184 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
2185 				     NEIGH_UPDATE_F_ISROUTER))
2186 		     );
2187 
2188 	nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
2189 	if (!nrt)
2190 		goto out;
2191 
2192 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
2193 	if (on_link)
2194 		nrt->rt6i_flags &= ~RTF_GATEWAY;
2195 
2196 	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
2197 
2198 	if (ip6_ins_rt(nrt))
2199 		goto out;
2200 
2201 	netevent.old = &rt->dst;
2202 	netevent.new = &nrt->dst;
2203 	netevent.daddr = &msg->dest;
2204 	netevent.neigh = neigh;
2205 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
2206 
2207 	if (rt->rt6i_flags & RTF_CACHE) {
2208 		rt = (struct rt6_info *) dst_clone(&rt->dst);
2209 		ip6_del_rt(rt);
2210 	}
2211 
2212 out:
2213 	neigh_release(neigh);
2214 }
2215 
2216 /*
2217  *	Misc support functions
2218  */
2219 
2220 static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
2221 {
2222 	BUG_ON(from->dst.from);
2223 
2224 	rt->rt6i_flags &= ~RTF_EXPIRES;
2225 	dst_hold(&from->dst);
2226 	rt->dst.from = &from->dst;
2227 	dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
2228 }
2229 
2230 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
2231 {
2232 	rt->dst.input = ort->dst.input;
2233 	rt->dst.output = ort->dst.output;
2234 	rt->rt6i_dst = ort->rt6i_dst;
2235 	rt->dst.error = ort->dst.error;
2236 	rt->rt6i_idev = ort->rt6i_idev;
2237 	if (rt->rt6i_idev)
2238 		in6_dev_hold(rt->rt6i_idev);
2239 	rt->dst.lastuse = jiffies;
2240 	rt->rt6i_gateway = ort->rt6i_gateway;
2241 	rt->rt6i_flags = ort->rt6i_flags;
2242 	rt6_set_from(rt, ort);
2243 	rt->rt6i_metric = ort->rt6i_metric;
2244 #ifdef CONFIG_IPV6_SUBTREES
2245 	rt->rt6i_src = ort->rt6i_src;
2246 #endif
2247 	rt->rt6i_prefsrc = ort->rt6i_prefsrc;
2248 	rt->rt6i_table = ort->rt6i_table;
2249 	rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
2250 }
2251 
2252 #ifdef CONFIG_IPV6_ROUTE_INFO
2253 static struct rt6_info *rt6_get_route_info(struct net *net,
2254 					   const struct in6_addr *prefix, int prefixlen,
2255 					   const struct in6_addr *gwaddr, int ifindex)
2256 {
2257 	struct fib6_node *fn;
2258 	struct rt6_info *rt = NULL;
2259 	struct fib6_table *table;
2260 
2261 	table = fib6_get_table(net, RT6_TABLE_INFO);
2262 	if (!table)
2263 		return NULL;
2264 
2265 	read_lock_bh(&table->tb6_lock);
2266 	fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
2267 	if (!fn)
2268 		goto out;
2269 
2270 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2271 		if (rt->dst.dev->ifindex != ifindex)
2272 			continue;
2273 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2274 			continue;
2275 		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2276 			continue;
2277 		dst_hold(&rt->dst);
2278 		break;
2279 	}
2280 out:
2281 	read_unlock_bh(&table->tb6_lock);
2282 	return rt;
2283 }
2284 
2285 static struct rt6_info *rt6_add_route_info(struct net *net,
2286 					   const struct in6_addr *prefix, int prefixlen,
2287 					   const struct in6_addr *gwaddr, int ifindex,
2288 					   unsigned int pref)
2289 {
2290 	struct fib6_config cfg = {
2291 		.fc_table	= RT6_TABLE_INFO,
2292 		.fc_metric	= IP6_RT_PRIO_USER,
2293 		.fc_ifindex	= ifindex,
2294 		.fc_dst_len	= prefixlen,
2295 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2296 				  RTF_UP | RTF_PREF(pref),
2297 		.fc_nlinfo.portid = 0,
2298 		.fc_nlinfo.nlh = NULL,
2299 		.fc_nlinfo.nl_net = net,
2300 	};
2301 
2302 	cfg.fc_dst = *prefix;
2303 	cfg.fc_gateway = *gwaddr;
2304 
2305 	/* We should treat it as a default route if prefix length is 0. */
2306 	if (!prefixlen)
2307 		cfg.fc_flags |= RTF_DEFAULT;
2308 
2309 	ip6_route_add(&cfg);
2310 
2311 	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
2312 }
2313 #endif
2314 
2315 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
2316 {
2317 	struct rt6_info *rt;
2318 	struct fib6_table *table;
2319 
2320 	table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
2321 	if (!table)
2322 		return NULL;
2323 
2324 	read_lock_bh(&table->tb6_lock);
2325 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2326 		if (dev == rt->dst.dev &&
2327 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
2328 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
2329 			break;
2330 	}
2331 	if (rt)
2332 		dst_hold(&rt->dst);
2333 	read_unlock_bh(&table->tb6_lock);
2334 	return rt;
2335 }
2336 
2337 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
2338 				     struct net_device *dev,
2339 				     unsigned int pref)
2340 {
2341 	struct fib6_config cfg = {
2342 		.fc_table	= RT6_TABLE_DFLT,
2343 		.fc_metric	= IP6_RT_PRIO_USER,
2344 		.fc_ifindex	= dev->ifindex,
2345 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2346 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
2347 		.fc_nlinfo.portid = 0,
2348 		.fc_nlinfo.nlh = NULL,
2349 		.fc_nlinfo.nl_net = dev_net(dev),
2350 	};
2351 
2352 	cfg.fc_gateway = *gwaddr;
2353 
2354 	ip6_route_add(&cfg);
2355 
2356 	return rt6_get_dflt_router(gwaddr, dev);
2357 }
2358 
2359 void rt6_purge_dflt_routers(struct net *net)
2360 {
2361 	struct rt6_info *rt;
2362 	struct fib6_table *table;
2363 
2364 	/* NOTE: Keep consistent with rt6_get_dflt_router */
2365 	table = fib6_get_table(net, RT6_TABLE_DFLT);
2366 	if (!table)
2367 		return;
2368 
2369 restart:
2370 	read_lock_bh(&table->tb6_lock);
2371 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2372 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2373 		    (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
2374 			dst_hold(&rt->dst);
2375 			read_unlock_bh(&table->tb6_lock);
2376 			ip6_del_rt(rt);
2377 			goto restart;
2378 		}
2379 	}
2380 	read_unlock_bh(&table->tb6_lock);
2381 }
2382 
2383 static void rtmsg_to_fib6_config(struct net *net,
2384 				 struct in6_rtmsg *rtmsg,
2385 				 struct fib6_config *cfg)
2386 {
2387 	memset(cfg, 0, sizeof(*cfg));
2388 
2389 	cfg->fc_table = RT6_TABLE_MAIN;
2390 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2391 	cfg->fc_metric = rtmsg->rtmsg_metric;
2392 	cfg->fc_expires = rtmsg->rtmsg_info;
2393 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2394 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
2395 	cfg->fc_flags = rtmsg->rtmsg_flags;
2396 
2397 	cfg->fc_nlinfo.nl_net = net;
2398 
2399 	cfg->fc_dst = rtmsg->rtmsg_dst;
2400 	cfg->fc_src = rtmsg->rtmsg_src;
2401 	cfg->fc_gateway = rtmsg->rtmsg_gateway;
2402 }
2403 
2404 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2405 {
2406 	struct fib6_config cfg;
2407 	struct in6_rtmsg rtmsg;
2408 	int err;
2409 
2410 	switch (cmd) {
2411 	case SIOCADDRT:		/* Add a route */
2412 	case SIOCDELRT:		/* Delete a route */
2413 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2414 			return -EPERM;
2415 		err = copy_from_user(&rtmsg, arg,
2416 				     sizeof(struct in6_rtmsg));
2417 		if (err)
2418 			return -EFAULT;
2419 
2420 		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2421 
2422 		rtnl_lock();
2423 		switch (cmd) {
2424 		case SIOCADDRT:
2425 			err = ip6_route_add(&cfg);
2426 			break;
2427 		case SIOCDELRT:
2428 			err = ip6_route_del(&cfg);
2429 			break;
2430 		default:
2431 			err = -EINVAL;
2432 		}
2433 		rtnl_unlock();
2434 
2435 		return err;
2436 	}
2437 
2438 	return -EINVAL;
2439 }
2440 
2441 /*
2442  *	Drop the packet on the floor
2443  */
2444 
2445 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2446 {
2447 	int type;
2448 	struct dst_entry *dst = skb_dst(skb);
2449 	switch (ipstats_mib_noroutes) {
2450 	case IPSTATS_MIB_INNOROUTES:
2451 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2452 		if (type == IPV6_ADDR_ANY) {
2453 			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2454 				      IPSTATS_MIB_INADDRERRORS);
2455 			break;
2456 		}
2457 		/* FALLTHROUGH */
2458 	case IPSTATS_MIB_OUTNOROUTES:
2459 		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2460 			      ipstats_mib_noroutes);
2461 		break;
2462 	}
2463 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2464 	kfree_skb(skb);
2465 	return 0;
2466 }
2467 
2468 static int ip6_pkt_discard(struct sk_buff *skb)
2469 {
2470 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2471 }
2472 
2473 static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb)
2474 {
2475 	skb->dev = skb_dst(skb)->dev;
2476 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2477 }
2478 
2479 static int ip6_pkt_prohibit(struct sk_buff *skb)
2480 {
2481 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2482 }
2483 
2484 static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb)
2485 {
2486 	skb->dev = skb_dst(skb)->dev;
2487 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2488 }
2489 
2490 /*
2491  *	Allocate a dst for local (unicast / anycast) address.
2492  */
2493 
2494 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2495 				    const struct in6_addr *addr,
2496 				    bool anycast)
2497 {
2498 	struct net *net = dev_net(idev->dev);
2499 	struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
2500 					    DST_NOCOUNT);
2501 	if (!rt)
2502 		return ERR_PTR(-ENOMEM);
2503 
2504 	in6_dev_hold(idev);
2505 
2506 	rt->dst.flags |= DST_HOST;
2507 	rt->dst.input = ip6_input;
2508 	rt->dst.output = ip6_output;
2509 	rt->rt6i_idev = idev;
2510 
2511 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2512 	if (anycast)
2513 		rt->rt6i_flags |= RTF_ANYCAST;
2514 	else
2515 		rt->rt6i_flags |= RTF_LOCAL;
2516 
2517 	rt->rt6i_gateway  = *addr;
2518 	rt->rt6i_dst.addr = *addr;
2519 	rt->rt6i_dst.plen = 128;
2520 	rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2521 	rt->dst.flags |= DST_NOCACHE;
2522 
2523 	atomic_set(&rt->dst.__refcnt, 1);
2524 
2525 	return rt;
2526 }
2527 
2528 int ip6_route_get_saddr(struct net *net,
2529 			struct rt6_info *rt,
2530 			const struct in6_addr *daddr,
2531 			unsigned int prefs,
2532 			struct in6_addr *saddr)
2533 {
2534 	struct inet6_dev *idev =
2535 		rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
2536 	int err = 0;
2537 	if (rt && rt->rt6i_prefsrc.plen)
2538 		*saddr = rt->rt6i_prefsrc.addr;
2539 	else
2540 		err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2541 					 daddr, prefs, saddr);
2542 	return err;
2543 }
2544 
2545 /* remove deleted ip from prefsrc entries */
2546 struct arg_dev_net_ip {
2547 	struct net_device *dev;
2548 	struct net *net;
2549 	struct in6_addr *addr;
2550 };
2551 
2552 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2553 {
2554 	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2555 	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2556 	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2557 
2558 	if (((void *)rt->dst.dev == dev || !dev) &&
2559 	    rt != net->ipv6.ip6_null_entry &&
2560 	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2561 		/* remove prefsrc entry */
2562 		rt->rt6i_prefsrc.plen = 0;
2563 	}
2564 	return 0;
2565 }
2566 
2567 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2568 {
2569 	struct net *net = dev_net(ifp->idev->dev);
2570 	struct arg_dev_net_ip adni = {
2571 		.dev = ifp->idev->dev,
2572 		.net = net,
2573 		.addr = &ifp->addr,
2574 	};
2575 	fib6_clean_all(net, fib6_remove_prefsrc, &adni);
2576 }
2577 
2578 #define RTF_RA_ROUTER		(RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2579 #define RTF_CACHE_GATEWAY	(RTF_GATEWAY | RTF_CACHE)
2580 
2581 /* Remove routers and update dst entries when gateway turn into host. */
2582 static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2583 {
2584 	struct in6_addr *gateway = (struct in6_addr *)arg;
2585 
2586 	if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2587 	     ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2588 	     ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2589 		return -1;
2590 	}
2591 	return 0;
2592 }
2593 
2594 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2595 {
2596 	fib6_clean_all(net, fib6_clean_tohost, gateway);
2597 }
2598 
2599 struct arg_dev_net {
2600 	struct net_device *dev;
2601 	struct net *net;
2602 };
2603 
2604 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2605 {
2606 	const struct arg_dev_net *adn = arg;
2607 	const struct net_device *dev = adn->dev;
2608 
2609 	if ((rt->dst.dev == dev || !dev) &&
2610 	    rt != adn->net->ipv6.ip6_null_entry)
2611 		return -1;
2612 
2613 	return 0;
2614 }
2615 
2616 void rt6_ifdown(struct net *net, struct net_device *dev)
2617 {
2618 	struct arg_dev_net adn = {
2619 		.dev = dev,
2620 		.net = net,
2621 	};
2622 
2623 	fib6_clean_all(net, fib6_ifdown, &adn);
2624 	icmp6_clean_all(fib6_ifdown, &adn);
2625 	rt6_uncached_list_flush_dev(net, dev);
2626 }
2627 
2628 struct rt6_mtu_change_arg {
2629 	struct net_device *dev;
2630 	unsigned int mtu;
2631 };
2632 
2633 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2634 {
2635 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2636 	struct inet6_dev *idev;
2637 
2638 	/* In IPv6 pmtu discovery is not optional,
2639 	   so that RTAX_MTU lock cannot disable it.
2640 	   We still use this lock to block changes
2641 	   caused by addrconf/ndisc.
2642 	*/
2643 
2644 	idev = __in6_dev_get(arg->dev);
2645 	if (!idev)
2646 		return 0;
2647 
2648 	/* For administrative MTU increase, there is no way to discover
2649 	   IPv6 PMTU increase, so PMTU increase should be updated here.
2650 	   Since RFC 1981 doesn't include administrative MTU increase
2651 	   update PMTU increase is a MUST. (i.e. jumbo frame)
2652 	 */
2653 	/*
2654 	   If new MTU is less than route PMTU, this new MTU will be the
2655 	   lowest MTU in the path, update the route PMTU to reflect PMTU
2656 	   decreases; if new MTU is greater than route PMTU, and the
2657 	   old MTU is the lowest MTU in the path, update the route PMTU
2658 	   to reflect the increase. In this case if the other nodes' MTU
2659 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
2660 	   PMTU discouvery.
2661 	 */
2662 	if (rt->dst.dev == arg->dev &&
2663 	    !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2664 		if (rt->rt6i_flags & RTF_CACHE) {
2665 			/* For RTF_CACHE with rt6i_pmtu == 0
2666 			 * (i.e. a redirected route),
2667 			 * the metrics of its rt->dst.from has already
2668 			 * been updated.
2669 			 */
2670 			if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2671 				rt->rt6i_pmtu = arg->mtu;
2672 		} else if (dst_mtu(&rt->dst) >= arg->mtu ||
2673 			   (dst_mtu(&rt->dst) < arg->mtu &&
2674 			    dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2675 			dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2676 		}
2677 	}
2678 	return 0;
2679 }
2680 
2681 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2682 {
2683 	struct rt6_mtu_change_arg arg = {
2684 		.dev = dev,
2685 		.mtu = mtu,
2686 	};
2687 
2688 	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
2689 }
2690 
2691 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2692 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2693 	[RTA_OIF]               = { .type = NLA_U32 },
2694 	[RTA_IIF]		= { .type = NLA_U32 },
2695 	[RTA_PRIORITY]          = { .type = NLA_U32 },
2696 	[RTA_METRICS]           = { .type = NLA_NESTED },
2697 	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
2698 	[RTA_PREF]              = { .type = NLA_U8 },
2699 	[RTA_ENCAP_TYPE]	= { .type = NLA_U16 },
2700 	[RTA_ENCAP]		= { .type = NLA_NESTED },
2701 };
2702 
2703 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2704 			      struct fib6_config *cfg)
2705 {
2706 	struct rtmsg *rtm;
2707 	struct nlattr *tb[RTA_MAX+1];
2708 	unsigned int pref;
2709 	int err;
2710 
2711 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2712 	if (err < 0)
2713 		goto errout;
2714 
2715 	err = -EINVAL;
2716 	rtm = nlmsg_data(nlh);
2717 	memset(cfg, 0, sizeof(*cfg));
2718 
2719 	cfg->fc_table = rtm->rtm_table;
2720 	cfg->fc_dst_len = rtm->rtm_dst_len;
2721 	cfg->fc_src_len = rtm->rtm_src_len;
2722 	cfg->fc_flags = RTF_UP;
2723 	cfg->fc_protocol = rtm->rtm_protocol;
2724 	cfg->fc_type = rtm->rtm_type;
2725 
2726 	if (rtm->rtm_type == RTN_UNREACHABLE ||
2727 	    rtm->rtm_type == RTN_BLACKHOLE ||
2728 	    rtm->rtm_type == RTN_PROHIBIT ||
2729 	    rtm->rtm_type == RTN_THROW)
2730 		cfg->fc_flags |= RTF_REJECT;
2731 
2732 	if (rtm->rtm_type == RTN_LOCAL)
2733 		cfg->fc_flags |= RTF_LOCAL;
2734 
2735 	if (rtm->rtm_flags & RTM_F_CLONED)
2736 		cfg->fc_flags |= RTF_CACHE;
2737 
2738 	cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2739 	cfg->fc_nlinfo.nlh = nlh;
2740 	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2741 
2742 	if (tb[RTA_GATEWAY]) {
2743 		cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
2744 		cfg->fc_flags |= RTF_GATEWAY;
2745 	}
2746 
2747 	if (tb[RTA_DST]) {
2748 		int plen = (rtm->rtm_dst_len + 7) >> 3;
2749 
2750 		if (nla_len(tb[RTA_DST]) < plen)
2751 			goto errout;
2752 
2753 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2754 	}
2755 
2756 	if (tb[RTA_SRC]) {
2757 		int plen = (rtm->rtm_src_len + 7) >> 3;
2758 
2759 		if (nla_len(tb[RTA_SRC]) < plen)
2760 			goto errout;
2761 
2762 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2763 	}
2764 
2765 	if (tb[RTA_PREFSRC])
2766 		cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
2767 
2768 	if (tb[RTA_OIF])
2769 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2770 
2771 	if (tb[RTA_PRIORITY])
2772 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2773 
2774 	if (tb[RTA_METRICS]) {
2775 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2776 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2777 	}
2778 
2779 	if (tb[RTA_TABLE])
2780 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2781 
2782 	if (tb[RTA_MULTIPATH]) {
2783 		cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2784 		cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2785 	}
2786 
2787 	if (tb[RTA_PREF]) {
2788 		pref = nla_get_u8(tb[RTA_PREF]);
2789 		if (pref != ICMPV6_ROUTER_PREF_LOW &&
2790 		    pref != ICMPV6_ROUTER_PREF_HIGH)
2791 			pref = ICMPV6_ROUTER_PREF_MEDIUM;
2792 		cfg->fc_flags |= RTF_PREF(pref);
2793 	}
2794 
2795 	if (tb[RTA_ENCAP])
2796 		cfg->fc_encap = tb[RTA_ENCAP];
2797 
2798 	if (tb[RTA_ENCAP_TYPE])
2799 		cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
2800 
2801 	err = 0;
2802 errout:
2803 	return err;
2804 }
2805 
2806 struct rt6_nh {
2807 	struct rt6_info *rt6_info;
2808 	struct fib6_config r_cfg;
2809 	struct mx6_config mxc;
2810 	struct list_head next;
2811 };
2812 
2813 static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
2814 {
2815 	struct rt6_nh *nh;
2816 
2817 	list_for_each_entry(nh, rt6_nh_list, next) {
2818 		pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6 nexthop %pI6 ifi %d\n",
2819 		        &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
2820 		        nh->r_cfg.fc_ifindex);
2821 	}
2822 }
2823 
2824 static int ip6_route_info_append(struct list_head *rt6_nh_list,
2825 				 struct rt6_info *rt, struct fib6_config *r_cfg)
2826 {
2827 	struct rt6_nh *nh;
2828 	struct rt6_info *rtnh;
2829 	int err = -EEXIST;
2830 
2831 	list_for_each_entry(nh, rt6_nh_list, next) {
2832 		/* check if rt6_info already exists */
2833 		rtnh = nh->rt6_info;
2834 
2835 		if (rtnh->dst.dev == rt->dst.dev &&
2836 		    rtnh->rt6i_idev == rt->rt6i_idev &&
2837 		    ipv6_addr_equal(&rtnh->rt6i_gateway,
2838 				    &rt->rt6i_gateway))
2839 			return err;
2840 	}
2841 
2842 	nh = kzalloc(sizeof(*nh), GFP_KERNEL);
2843 	if (!nh)
2844 		return -ENOMEM;
2845 	nh->rt6_info = rt;
2846 	err = ip6_convert_metrics(&nh->mxc, r_cfg);
2847 	if (err) {
2848 		kfree(nh);
2849 		return err;
2850 	}
2851 	memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
2852 	list_add_tail(&nh->next, rt6_nh_list);
2853 
2854 	return 0;
2855 }
2856 
2857 static int ip6_route_multipath_add(struct fib6_config *cfg)
2858 {
2859 	struct fib6_config r_cfg;
2860 	struct rtnexthop *rtnh;
2861 	struct rt6_info *rt;
2862 	struct rt6_nh *err_nh;
2863 	struct rt6_nh *nh, *nh_safe;
2864 	int remaining;
2865 	int attrlen;
2866 	int err = 1;
2867 	int nhn = 0;
2868 	int replace = (cfg->fc_nlinfo.nlh &&
2869 		       (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
2870 	LIST_HEAD(rt6_nh_list);
2871 
2872 	remaining = cfg->fc_mp_len;
2873 	rtnh = (struct rtnexthop *)cfg->fc_mp;
2874 
2875 	/* Parse a Multipath Entry and build a list (rt6_nh_list) of
2876 	 * rt6_info structs per nexthop
2877 	 */
2878 	while (rtnh_ok(rtnh, remaining)) {
2879 		memcpy(&r_cfg, cfg, sizeof(*cfg));
2880 		if (rtnh->rtnh_ifindex)
2881 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2882 
2883 		attrlen = rtnh_attrlen(rtnh);
2884 		if (attrlen > 0) {
2885 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2886 
2887 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2888 			if (nla) {
2889 				r_cfg.fc_gateway = nla_get_in6_addr(nla);
2890 				r_cfg.fc_flags |= RTF_GATEWAY;
2891 			}
2892 			r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
2893 			nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
2894 			if (nla)
2895 				r_cfg.fc_encap_type = nla_get_u16(nla);
2896 		}
2897 
2898 		err = ip6_route_info_create(&r_cfg, &rt);
2899 		if (err)
2900 			goto cleanup;
2901 
2902 		err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
2903 		if (err) {
2904 			dst_free(&rt->dst);
2905 			goto cleanup;
2906 		}
2907 
2908 		rtnh = rtnh_next(rtnh, &remaining);
2909 	}
2910 
2911 	err_nh = NULL;
2912 	list_for_each_entry(nh, &rt6_nh_list, next) {
2913 		err = __ip6_ins_rt(nh->rt6_info, &cfg->fc_nlinfo, &nh->mxc);
2914 		/* nh->rt6_info is used or freed at this point, reset to NULL*/
2915 		nh->rt6_info = NULL;
2916 		if (err) {
2917 			if (replace && nhn)
2918 				ip6_print_replace_route_err(&rt6_nh_list);
2919 			err_nh = nh;
2920 			goto add_errout;
2921 		}
2922 
2923 		/* Because each route is added like a single route we remove
2924 		 * these flags after the first nexthop: if there is a collision,
2925 		 * we have already failed to add the first nexthop:
2926 		 * fib6_add_rt2node() has rejected it; when replacing, old
2927 		 * nexthops have been replaced by first new, the rest should
2928 		 * be added to it.
2929 		 */
2930 		cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
2931 						     NLM_F_REPLACE);
2932 		nhn++;
2933 	}
2934 
2935 	goto cleanup;
2936 
2937 add_errout:
2938 	/* Delete routes that were already added */
2939 	list_for_each_entry(nh, &rt6_nh_list, next) {
2940 		if (err_nh == nh)
2941 			break;
2942 		ip6_route_del(&nh->r_cfg);
2943 	}
2944 
2945 cleanup:
2946 	list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
2947 		if (nh->rt6_info)
2948 			dst_free(&nh->rt6_info->dst);
2949 		kfree(nh->mxc.mx);
2950 		list_del(&nh->next);
2951 		kfree(nh);
2952 	}
2953 
2954 	return err;
2955 }
2956 
2957 static int ip6_route_multipath_del(struct fib6_config *cfg)
2958 {
2959 	struct fib6_config r_cfg;
2960 	struct rtnexthop *rtnh;
2961 	int remaining;
2962 	int attrlen;
2963 	int err = 1, last_err = 0;
2964 
2965 	remaining = cfg->fc_mp_len;
2966 	rtnh = (struct rtnexthop *)cfg->fc_mp;
2967 
2968 	/* Parse a Multipath Entry */
2969 	while (rtnh_ok(rtnh, remaining)) {
2970 		memcpy(&r_cfg, cfg, sizeof(*cfg));
2971 		if (rtnh->rtnh_ifindex)
2972 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2973 
2974 		attrlen = rtnh_attrlen(rtnh);
2975 		if (attrlen > 0) {
2976 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2977 
2978 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2979 			if (nla) {
2980 				nla_memcpy(&r_cfg.fc_gateway, nla, 16);
2981 				r_cfg.fc_flags |= RTF_GATEWAY;
2982 			}
2983 		}
2984 		err = ip6_route_del(&r_cfg);
2985 		if (err)
2986 			last_err = err;
2987 
2988 		rtnh = rtnh_next(rtnh, &remaining);
2989 	}
2990 
2991 	return last_err;
2992 }
2993 
2994 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
2995 {
2996 	struct fib6_config cfg;
2997 	int err;
2998 
2999 	err = rtm_to_fib6_config(skb, nlh, &cfg);
3000 	if (err < 0)
3001 		return err;
3002 
3003 	if (cfg.fc_mp)
3004 		return ip6_route_multipath_del(&cfg);
3005 	else
3006 		return ip6_route_del(&cfg);
3007 }
3008 
3009 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
3010 {
3011 	struct fib6_config cfg;
3012 	int err;
3013 
3014 	err = rtm_to_fib6_config(skb, nlh, &cfg);
3015 	if (err < 0)
3016 		return err;
3017 
3018 	if (cfg.fc_mp)
3019 		return ip6_route_multipath_add(&cfg);
3020 	else
3021 		return ip6_route_add(&cfg);
3022 }
3023 
3024 static inline size_t rt6_nlmsg_size(struct rt6_info *rt)
3025 {
3026 	return NLMSG_ALIGN(sizeof(struct rtmsg))
3027 	       + nla_total_size(16) /* RTA_SRC */
3028 	       + nla_total_size(16) /* RTA_DST */
3029 	       + nla_total_size(16) /* RTA_GATEWAY */
3030 	       + nla_total_size(16) /* RTA_PREFSRC */
3031 	       + nla_total_size(4) /* RTA_TABLE */
3032 	       + nla_total_size(4) /* RTA_IIF */
3033 	       + nla_total_size(4) /* RTA_OIF */
3034 	       + nla_total_size(4) /* RTA_PRIORITY */
3035 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
3036 	       + nla_total_size(sizeof(struct rta_cacheinfo))
3037 	       + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
3038 	       + nla_total_size(1) /* RTA_PREF */
3039 	       + lwtunnel_get_encap_size(rt->dst.lwtstate);
3040 }
3041 
3042 static int rt6_fill_node(struct net *net,
3043 			 struct sk_buff *skb, struct rt6_info *rt,
3044 			 struct in6_addr *dst, struct in6_addr *src,
3045 			 int iif, int type, u32 portid, u32 seq,
3046 			 int prefix, int nowait, unsigned int flags)
3047 {
3048 	u32 metrics[RTAX_MAX];
3049 	struct rtmsg *rtm;
3050 	struct nlmsghdr *nlh;
3051 	long expires;
3052 	u32 table;
3053 
3054 	if (prefix) {	/* user wants prefix routes only */
3055 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
3056 			/* success since this is not a prefix route */
3057 			return 1;
3058 		}
3059 	}
3060 
3061 	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
3062 	if (!nlh)
3063 		return -EMSGSIZE;
3064 
3065 	rtm = nlmsg_data(nlh);
3066 	rtm->rtm_family = AF_INET6;
3067 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
3068 	rtm->rtm_src_len = rt->rt6i_src.plen;
3069 	rtm->rtm_tos = 0;
3070 	if (rt->rt6i_table)
3071 		table = rt->rt6i_table->tb6_id;
3072 	else
3073 		table = RT6_TABLE_UNSPEC;
3074 	rtm->rtm_table = table;
3075 	if (nla_put_u32(skb, RTA_TABLE, table))
3076 		goto nla_put_failure;
3077 	if (rt->rt6i_flags & RTF_REJECT) {
3078 		switch (rt->dst.error) {
3079 		case -EINVAL:
3080 			rtm->rtm_type = RTN_BLACKHOLE;
3081 			break;
3082 		case -EACCES:
3083 			rtm->rtm_type = RTN_PROHIBIT;
3084 			break;
3085 		case -EAGAIN:
3086 			rtm->rtm_type = RTN_THROW;
3087 			break;
3088 		default:
3089 			rtm->rtm_type = RTN_UNREACHABLE;
3090 			break;
3091 		}
3092 	}
3093 	else if (rt->rt6i_flags & RTF_LOCAL)
3094 		rtm->rtm_type = RTN_LOCAL;
3095 	else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
3096 		rtm->rtm_type = RTN_LOCAL;
3097 	else
3098 		rtm->rtm_type = RTN_UNICAST;
3099 	rtm->rtm_flags = 0;
3100 	if (!netif_carrier_ok(rt->dst.dev)) {
3101 		rtm->rtm_flags |= RTNH_F_LINKDOWN;
3102 		if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
3103 			rtm->rtm_flags |= RTNH_F_DEAD;
3104 	}
3105 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
3106 	rtm->rtm_protocol = rt->rt6i_protocol;
3107 	if (rt->rt6i_flags & RTF_DYNAMIC)
3108 		rtm->rtm_protocol = RTPROT_REDIRECT;
3109 	else if (rt->rt6i_flags & RTF_ADDRCONF) {
3110 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
3111 			rtm->rtm_protocol = RTPROT_RA;
3112 		else
3113 			rtm->rtm_protocol = RTPROT_KERNEL;
3114 	}
3115 
3116 	if (rt->rt6i_flags & RTF_CACHE)
3117 		rtm->rtm_flags |= RTM_F_CLONED;
3118 
3119 	if (dst) {
3120 		if (nla_put_in6_addr(skb, RTA_DST, dst))
3121 			goto nla_put_failure;
3122 		rtm->rtm_dst_len = 128;
3123 	} else if (rtm->rtm_dst_len)
3124 		if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
3125 			goto nla_put_failure;
3126 #ifdef CONFIG_IPV6_SUBTREES
3127 	if (src) {
3128 		if (nla_put_in6_addr(skb, RTA_SRC, src))
3129 			goto nla_put_failure;
3130 		rtm->rtm_src_len = 128;
3131 	} else if (rtm->rtm_src_len &&
3132 		   nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
3133 		goto nla_put_failure;
3134 #endif
3135 	if (iif) {
3136 #ifdef CONFIG_IPV6_MROUTE
3137 		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
3138 			int err = ip6mr_get_route(net, skb, rtm, nowait);
3139 			if (err <= 0) {
3140 				if (!nowait) {
3141 					if (err == 0)
3142 						return 0;
3143 					goto nla_put_failure;
3144 				} else {
3145 					if (err == -EMSGSIZE)
3146 						goto nla_put_failure;
3147 				}
3148 			}
3149 		} else
3150 #endif
3151 			if (nla_put_u32(skb, RTA_IIF, iif))
3152 				goto nla_put_failure;
3153 	} else if (dst) {
3154 		struct in6_addr saddr_buf;
3155 		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
3156 		    nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3157 			goto nla_put_failure;
3158 	}
3159 
3160 	if (rt->rt6i_prefsrc.plen) {
3161 		struct in6_addr saddr_buf;
3162 		saddr_buf = rt->rt6i_prefsrc.addr;
3163 		if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3164 			goto nla_put_failure;
3165 	}
3166 
3167 	memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
3168 	if (rt->rt6i_pmtu)
3169 		metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
3170 	if (rtnetlink_put_metrics(skb, metrics) < 0)
3171 		goto nla_put_failure;
3172 
3173 	if (rt->rt6i_flags & RTF_GATEWAY) {
3174 		if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
3175 			goto nla_put_failure;
3176 	}
3177 
3178 	if (rt->dst.dev &&
3179 	    nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
3180 		goto nla_put_failure;
3181 	if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
3182 		goto nla_put_failure;
3183 
3184 	expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
3185 
3186 	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
3187 		goto nla_put_failure;
3188 
3189 	if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
3190 		goto nla_put_failure;
3191 
3192 	lwtunnel_fill_encap(skb, rt->dst.lwtstate);
3193 
3194 	nlmsg_end(skb, nlh);
3195 	return 0;
3196 
3197 nla_put_failure:
3198 	nlmsg_cancel(skb, nlh);
3199 	return -EMSGSIZE;
3200 }
3201 
3202 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
3203 {
3204 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
3205 	int prefix;
3206 
3207 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
3208 		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
3209 		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
3210 	} else
3211 		prefix = 0;
3212 
3213 	return rt6_fill_node(arg->net,
3214 		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
3215 		     NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
3216 		     prefix, 0, NLM_F_MULTI);
3217 }
3218 
3219 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
3220 {
3221 	struct net *net = sock_net(in_skb->sk);
3222 	struct nlattr *tb[RTA_MAX+1];
3223 	struct rt6_info *rt;
3224 	struct sk_buff *skb;
3225 	struct rtmsg *rtm;
3226 	struct flowi6 fl6;
3227 	int err, iif = 0, oif = 0;
3228 
3229 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
3230 	if (err < 0)
3231 		goto errout;
3232 
3233 	err = -EINVAL;
3234 	memset(&fl6, 0, sizeof(fl6));
3235 
3236 	if (tb[RTA_SRC]) {
3237 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
3238 			goto errout;
3239 
3240 		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
3241 	}
3242 
3243 	if (tb[RTA_DST]) {
3244 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
3245 			goto errout;
3246 
3247 		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
3248 	}
3249 
3250 	if (tb[RTA_IIF])
3251 		iif = nla_get_u32(tb[RTA_IIF]);
3252 
3253 	if (tb[RTA_OIF])
3254 		oif = nla_get_u32(tb[RTA_OIF]);
3255 
3256 	if (tb[RTA_MARK])
3257 		fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
3258 
3259 	if (iif) {
3260 		struct net_device *dev;
3261 		int flags = 0;
3262 
3263 		dev = __dev_get_by_index(net, iif);
3264 		if (!dev) {
3265 			err = -ENODEV;
3266 			goto errout;
3267 		}
3268 
3269 		fl6.flowi6_iif = iif;
3270 
3271 		if (!ipv6_addr_any(&fl6.saddr))
3272 			flags |= RT6_LOOKUP_F_HAS_SADDR;
3273 
3274 		rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
3275 							       flags);
3276 	} else {
3277 		fl6.flowi6_oif = oif;
3278 
3279 		rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
3280 	}
3281 
3282 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
3283 	if (!skb) {
3284 		ip6_rt_put(rt);
3285 		err = -ENOBUFS;
3286 		goto errout;
3287 	}
3288 
3289 	/* Reserve room for dummy headers, this skb can pass
3290 	   through good chunk of routing engine.
3291 	 */
3292 	skb_reset_mac_header(skb);
3293 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
3294 
3295 	skb_dst_set(skb, &rt->dst);
3296 
3297 	err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
3298 			    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
3299 			    nlh->nlmsg_seq, 0, 0, 0);
3300 	if (err < 0) {
3301 		kfree_skb(skb);
3302 		goto errout;
3303 	}
3304 
3305 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
3306 errout:
3307 	return err;
3308 }
3309 
3310 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
3311 		     unsigned int nlm_flags)
3312 {
3313 	struct sk_buff *skb;
3314 	struct net *net = info->nl_net;
3315 	u32 seq;
3316 	int err;
3317 
3318 	err = -ENOBUFS;
3319 	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3320 
3321 	skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3322 	if (!skb)
3323 		goto errout;
3324 
3325 	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
3326 				event, info->portid, seq, 0, 0, nlm_flags);
3327 	if (err < 0) {
3328 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
3329 		WARN_ON(err == -EMSGSIZE);
3330 		kfree_skb(skb);
3331 		goto errout;
3332 	}
3333 	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
3334 		    info->nlh, gfp_any());
3335 	return;
3336 errout:
3337 	if (err < 0)
3338 		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
3339 }
3340 
3341 static int ip6_route_dev_notify(struct notifier_block *this,
3342 				unsigned long event, void *ptr)
3343 {
3344 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
3345 	struct net *net = dev_net(dev);
3346 
3347 	if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
3348 		net->ipv6.ip6_null_entry->dst.dev = dev;
3349 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
3350 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3351 		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
3352 		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
3353 		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
3354 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
3355 #endif
3356 	}
3357 
3358 	return NOTIFY_OK;
3359 }
3360 
3361 /*
3362  *	/proc
3363  */
3364 
3365 #ifdef CONFIG_PROC_FS
3366 
3367 static const struct file_operations ipv6_route_proc_fops = {
3368 	.owner		= THIS_MODULE,
3369 	.open		= ipv6_route_open,
3370 	.read		= seq_read,
3371 	.llseek		= seq_lseek,
3372 	.release	= seq_release_net,
3373 };
3374 
3375 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
3376 {
3377 	struct net *net = (struct net *)seq->private;
3378 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
3379 		   net->ipv6.rt6_stats->fib_nodes,
3380 		   net->ipv6.rt6_stats->fib_route_nodes,
3381 		   net->ipv6.rt6_stats->fib_rt_alloc,
3382 		   net->ipv6.rt6_stats->fib_rt_entries,
3383 		   net->ipv6.rt6_stats->fib_rt_cache,
3384 		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
3385 		   net->ipv6.rt6_stats->fib_discarded_routes);
3386 
3387 	return 0;
3388 }
3389 
3390 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
3391 {
3392 	return single_open_net(inode, file, rt6_stats_seq_show);
3393 }
3394 
3395 static const struct file_operations rt6_stats_seq_fops = {
3396 	.owner	 = THIS_MODULE,
3397 	.open	 = rt6_stats_seq_open,
3398 	.read	 = seq_read,
3399 	.llseek	 = seq_lseek,
3400 	.release = single_release_net,
3401 };
3402 #endif	/* CONFIG_PROC_FS */
3403 
3404 #ifdef CONFIG_SYSCTL
3405 
3406 static
3407 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
3408 			      void __user *buffer, size_t *lenp, loff_t *ppos)
3409 {
3410 	struct net *net;
3411 	int delay;
3412 	if (!write)
3413 		return -EINVAL;
3414 
3415 	net = (struct net *)ctl->extra1;
3416 	delay = net->ipv6.sysctl.flush_delay;
3417 	proc_dointvec(ctl, write, buffer, lenp, ppos);
3418 	fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
3419 	return 0;
3420 }
3421 
3422 struct ctl_table ipv6_route_table_template[] = {
3423 	{
3424 		.procname	=	"flush",
3425 		.data		=	&init_net.ipv6.sysctl.flush_delay,
3426 		.maxlen		=	sizeof(int),
3427 		.mode		=	0200,
3428 		.proc_handler	=	ipv6_sysctl_rtcache_flush
3429 	},
3430 	{
3431 		.procname	=	"gc_thresh",
3432 		.data		=	&ip6_dst_ops_template.gc_thresh,
3433 		.maxlen		=	sizeof(int),
3434 		.mode		=	0644,
3435 		.proc_handler	=	proc_dointvec,
3436 	},
3437 	{
3438 		.procname	=	"max_size",
3439 		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
3440 		.maxlen		=	sizeof(int),
3441 		.mode		=	0644,
3442 		.proc_handler	=	proc_dointvec,
3443 	},
3444 	{
3445 		.procname	=	"gc_min_interval",
3446 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3447 		.maxlen		=	sizeof(int),
3448 		.mode		=	0644,
3449 		.proc_handler	=	proc_dointvec_jiffies,
3450 	},
3451 	{
3452 		.procname	=	"gc_timeout",
3453 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
3454 		.maxlen		=	sizeof(int),
3455 		.mode		=	0644,
3456 		.proc_handler	=	proc_dointvec_jiffies,
3457 	},
3458 	{
3459 		.procname	=	"gc_interval",
3460 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
3461 		.maxlen		=	sizeof(int),
3462 		.mode		=	0644,
3463 		.proc_handler	=	proc_dointvec_jiffies,
3464 	},
3465 	{
3466 		.procname	=	"gc_elasticity",
3467 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
3468 		.maxlen		=	sizeof(int),
3469 		.mode		=	0644,
3470 		.proc_handler	=	proc_dointvec,
3471 	},
3472 	{
3473 		.procname	=	"mtu_expires",
3474 		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
3475 		.maxlen		=	sizeof(int),
3476 		.mode		=	0644,
3477 		.proc_handler	=	proc_dointvec_jiffies,
3478 	},
3479 	{
3480 		.procname	=	"min_adv_mss",
3481 		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
3482 		.maxlen		=	sizeof(int),
3483 		.mode		=	0644,
3484 		.proc_handler	=	proc_dointvec,
3485 	},
3486 	{
3487 		.procname	=	"gc_min_interval_ms",
3488 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3489 		.maxlen		=	sizeof(int),
3490 		.mode		=	0644,
3491 		.proc_handler	=	proc_dointvec_ms_jiffies,
3492 	},
3493 	{ }
3494 };
3495 
3496 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
3497 {
3498 	struct ctl_table *table;
3499 
3500 	table = kmemdup(ipv6_route_table_template,
3501 			sizeof(ipv6_route_table_template),
3502 			GFP_KERNEL);
3503 
3504 	if (table) {
3505 		table[0].data = &net->ipv6.sysctl.flush_delay;
3506 		table[0].extra1 = net;
3507 		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
3508 		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3509 		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3510 		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3511 		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3512 		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3513 		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3514 		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
3515 		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3516 
3517 		/* Don't export sysctls to unprivileged users */
3518 		if (net->user_ns != &init_user_ns)
3519 			table[0].procname = NULL;
3520 	}
3521 
3522 	return table;
3523 }
3524 #endif
3525 
3526 static int __net_init ip6_route_net_init(struct net *net)
3527 {
3528 	int ret = -ENOMEM;
3529 
3530 	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3531 	       sizeof(net->ipv6.ip6_dst_ops));
3532 
3533 	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3534 		goto out_ip6_dst_ops;
3535 
3536 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3537 					   sizeof(*net->ipv6.ip6_null_entry),
3538 					   GFP_KERNEL);
3539 	if (!net->ipv6.ip6_null_entry)
3540 		goto out_ip6_dst_entries;
3541 	net->ipv6.ip6_null_entry->dst.path =
3542 		(struct dst_entry *)net->ipv6.ip6_null_entry;
3543 	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3544 	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3545 			 ip6_template_metrics, true);
3546 
3547 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3548 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3549 					       sizeof(*net->ipv6.ip6_prohibit_entry),
3550 					       GFP_KERNEL);
3551 	if (!net->ipv6.ip6_prohibit_entry)
3552 		goto out_ip6_null_entry;
3553 	net->ipv6.ip6_prohibit_entry->dst.path =
3554 		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3555 	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3556 	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3557 			 ip6_template_metrics, true);
3558 
3559 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3560 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
3561 					       GFP_KERNEL);
3562 	if (!net->ipv6.ip6_blk_hole_entry)
3563 		goto out_ip6_prohibit_entry;
3564 	net->ipv6.ip6_blk_hole_entry->dst.path =
3565 		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3566 	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3567 	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3568 			 ip6_template_metrics, true);
3569 #endif
3570 
3571 	net->ipv6.sysctl.flush_delay = 0;
3572 	net->ipv6.sysctl.ip6_rt_max_size = 4096;
3573 	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3574 	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3575 	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3576 	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3577 	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3578 	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3579 
3580 	net->ipv6.ip6_rt_gc_expire = 30*HZ;
3581 
3582 	ret = 0;
3583 out:
3584 	return ret;
3585 
3586 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3587 out_ip6_prohibit_entry:
3588 	kfree(net->ipv6.ip6_prohibit_entry);
3589 out_ip6_null_entry:
3590 	kfree(net->ipv6.ip6_null_entry);
3591 #endif
3592 out_ip6_dst_entries:
3593 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3594 out_ip6_dst_ops:
3595 	goto out;
3596 }
3597 
3598 static void __net_exit ip6_route_net_exit(struct net *net)
3599 {
3600 	kfree(net->ipv6.ip6_null_entry);
3601 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3602 	kfree(net->ipv6.ip6_prohibit_entry);
3603 	kfree(net->ipv6.ip6_blk_hole_entry);
3604 #endif
3605 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3606 }
3607 
3608 static int __net_init ip6_route_net_init_late(struct net *net)
3609 {
3610 #ifdef CONFIG_PROC_FS
3611 	proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3612 	proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3613 #endif
3614 	return 0;
3615 }
3616 
3617 static void __net_exit ip6_route_net_exit_late(struct net *net)
3618 {
3619 #ifdef CONFIG_PROC_FS
3620 	remove_proc_entry("ipv6_route", net->proc_net);
3621 	remove_proc_entry("rt6_stats", net->proc_net);
3622 #endif
3623 }
3624 
3625 static struct pernet_operations ip6_route_net_ops = {
3626 	.init = ip6_route_net_init,
3627 	.exit = ip6_route_net_exit,
3628 };
3629 
3630 static int __net_init ipv6_inetpeer_init(struct net *net)
3631 {
3632 	struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3633 
3634 	if (!bp)
3635 		return -ENOMEM;
3636 	inet_peer_base_init(bp);
3637 	net->ipv6.peers = bp;
3638 	return 0;
3639 }
3640 
3641 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3642 {
3643 	struct inet_peer_base *bp = net->ipv6.peers;
3644 
3645 	net->ipv6.peers = NULL;
3646 	inetpeer_invalidate_tree(bp);
3647 	kfree(bp);
3648 }
3649 
3650 static struct pernet_operations ipv6_inetpeer_ops = {
3651 	.init	=	ipv6_inetpeer_init,
3652 	.exit	=	ipv6_inetpeer_exit,
3653 };
3654 
3655 static struct pernet_operations ip6_route_net_late_ops = {
3656 	.init = ip6_route_net_init_late,
3657 	.exit = ip6_route_net_exit_late,
3658 };
3659 
3660 static struct notifier_block ip6_route_dev_notifier = {
3661 	.notifier_call = ip6_route_dev_notify,
3662 	.priority = 0,
3663 };
3664 
3665 int __init ip6_route_init(void)
3666 {
3667 	int ret;
3668 	int cpu;
3669 
3670 	ret = -ENOMEM;
3671 	ip6_dst_ops_template.kmem_cachep =
3672 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3673 				  SLAB_HWCACHE_ALIGN, NULL);
3674 	if (!ip6_dst_ops_template.kmem_cachep)
3675 		goto out;
3676 
3677 	ret = dst_entries_init(&ip6_dst_blackhole_ops);
3678 	if (ret)
3679 		goto out_kmem_cache;
3680 
3681 	ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3682 	if (ret)
3683 		goto out_dst_entries;
3684 
3685 	ret = register_pernet_subsys(&ip6_route_net_ops);
3686 	if (ret)
3687 		goto out_register_inetpeer;
3688 
3689 	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3690 
3691 	/* Registering of the loopback is done before this portion of code,
3692 	 * the loopback reference in rt6_info will not be taken, do it
3693 	 * manually for init_net */
3694 	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3695 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3696   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3697 	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3698 	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3699 	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3700 	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3701   #endif
3702 	ret = fib6_init();
3703 	if (ret)
3704 		goto out_register_subsys;
3705 
3706 	ret = xfrm6_init();
3707 	if (ret)
3708 		goto out_fib6_init;
3709 
3710 	ret = fib6_rules_init();
3711 	if (ret)
3712 		goto xfrm6_init;
3713 
3714 	ret = register_pernet_subsys(&ip6_route_net_late_ops);
3715 	if (ret)
3716 		goto fib6_rules_init;
3717 
3718 	ret = -ENOBUFS;
3719 	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3720 	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3721 	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3722 		goto out_register_late_subsys;
3723 
3724 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3725 	if (ret)
3726 		goto out_register_late_subsys;
3727 
3728 	for_each_possible_cpu(cpu) {
3729 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
3730 
3731 		INIT_LIST_HEAD(&ul->head);
3732 		spin_lock_init(&ul->lock);
3733 	}
3734 
3735 out:
3736 	return ret;
3737 
3738 out_register_late_subsys:
3739 	unregister_pernet_subsys(&ip6_route_net_late_ops);
3740 fib6_rules_init:
3741 	fib6_rules_cleanup();
3742 xfrm6_init:
3743 	xfrm6_fini();
3744 out_fib6_init:
3745 	fib6_gc_cleanup();
3746 out_register_subsys:
3747 	unregister_pernet_subsys(&ip6_route_net_ops);
3748 out_register_inetpeer:
3749 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3750 out_dst_entries:
3751 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3752 out_kmem_cache:
3753 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3754 	goto out;
3755 }
3756 
3757 void ip6_route_cleanup(void)
3758 {
3759 	unregister_netdevice_notifier(&ip6_route_dev_notifier);
3760 	unregister_pernet_subsys(&ip6_route_net_late_ops);
3761 	fib6_rules_cleanup();
3762 	xfrm6_fini();
3763 	fib6_gc_cleanup();
3764 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3765 	unregister_pernet_subsys(&ip6_route_net_ops);
3766 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3767 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3768 }
3769