xref: /openbmc/linux/net/ipv6/route.c (revision cd4d09ec)
1 /*
2  *	Linux INET6 implementation
3  *	FIB front-end.
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13 
14 /*	Changes:
15  *
16  *	YOSHIFUJI Hideaki @USAGI
17  *		reworked default router selection.
18  *		- respect outgoing interface
19  *		- select from (probably) reachable routers (i.e.
20  *		routers in REACHABLE, STALE, DELAY or PROBE states).
21  *		- always select the same router if it is (probably)
22  *		reachable.  otherwise, round-robin the list.
23  *	Ville Nuorvala
24  *		Fixed routing subtrees.
25  */
26 
27 #define pr_fmt(fmt) "IPv6: " fmt
28 
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/dst_metadata.h>
58 #include <net/xfrm.h>
59 #include <net/netevent.h>
60 #include <net/netlink.h>
61 #include <net/nexthop.h>
62 #include <net/lwtunnel.h>
63 #include <net/ip_tunnels.h>
64 #include <net/l3mdev.h>
65 #include <trace/events/fib6.h>
66 
67 #include <asm/uaccess.h>
68 
69 #ifdef CONFIG_SYSCTL
70 #include <linux/sysctl.h>
71 #endif
72 
73 enum rt6_nud_state {
74 	RT6_NUD_FAIL_HARD = -3,
75 	RT6_NUD_FAIL_PROBE = -2,
76 	RT6_NUD_FAIL_DO_RR = -1,
77 	RT6_NUD_SUCCEED = 1
78 };
79 
80 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
81 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
82 static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
83 static unsigned int	 ip6_mtu(const struct dst_entry *dst);
84 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
85 static void		ip6_dst_destroy(struct dst_entry *);
86 static void		ip6_dst_ifdown(struct dst_entry *,
87 				       struct net_device *dev, int how);
88 static int		 ip6_dst_gc(struct dst_ops *ops);
89 
90 static int		ip6_pkt_discard(struct sk_buff *skb);
91 static int		ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
92 static int		ip6_pkt_prohibit(struct sk_buff *skb);
93 static int		ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
94 static void		ip6_link_failure(struct sk_buff *skb);
95 static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
96 					   struct sk_buff *skb, u32 mtu);
97 static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
98 					struct sk_buff *skb);
99 static void		rt6_dst_from_metrics_check(struct rt6_info *rt);
100 static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
101 
102 #ifdef CONFIG_IPV6_ROUTE_INFO
103 static struct rt6_info *rt6_add_route_info(struct net *net,
104 					   const struct in6_addr *prefix, int prefixlen,
105 					   const struct in6_addr *gwaddr, int ifindex,
106 					   unsigned int pref);
107 static struct rt6_info *rt6_get_route_info(struct net *net,
108 					   const struct in6_addr *prefix, int prefixlen,
109 					   const struct in6_addr *gwaddr, int ifindex);
110 #endif
111 
112 struct uncached_list {
113 	spinlock_t		lock;
114 	struct list_head	head;
115 };
116 
117 static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
118 
119 static void rt6_uncached_list_add(struct rt6_info *rt)
120 {
121 	struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
122 
123 	rt->dst.flags |= DST_NOCACHE;
124 	rt->rt6i_uncached_list = ul;
125 
126 	spin_lock_bh(&ul->lock);
127 	list_add_tail(&rt->rt6i_uncached, &ul->head);
128 	spin_unlock_bh(&ul->lock);
129 }
130 
131 static void rt6_uncached_list_del(struct rt6_info *rt)
132 {
133 	if (!list_empty(&rt->rt6i_uncached)) {
134 		struct uncached_list *ul = rt->rt6i_uncached_list;
135 
136 		spin_lock_bh(&ul->lock);
137 		list_del(&rt->rt6i_uncached);
138 		spin_unlock_bh(&ul->lock);
139 	}
140 }
141 
142 static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
143 {
144 	struct net_device *loopback_dev = net->loopback_dev;
145 	int cpu;
146 
147 	if (dev == loopback_dev)
148 		return;
149 
150 	for_each_possible_cpu(cpu) {
151 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
152 		struct rt6_info *rt;
153 
154 		spin_lock_bh(&ul->lock);
155 		list_for_each_entry(rt, &ul->head, rt6i_uncached) {
156 			struct inet6_dev *rt_idev = rt->rt6i_idev;
157 			struct net_device *rt_dev = rt->dst.dev;
158 
159 			if (rt_idev->dev == dev) {
160 				rt->rt6i_idev = in6_dev_get(loopback_dev);
161 				in6_dev_put(rt_idev);
162 			}
163 
164 			if (rt_dev == dev) {
165 				rt->dst.dev = loopback_dev;
166 				dev_hold(rt->dst.dev);
167 				dev_put(rt_dev);
168 			}
169 		}
170 		spin_unlock_bh(&ul->lock);
171 	}
172 }
173 
174 static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
175 {
176 	return dst_metrics_write_ptr(rt->dst.from);
177 }
178 
179 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
180 {
181 	struct rt6_info *rt = (struct rt6_info *)dst;
182 
183 	if (rt->rt6i_flags & RTF_PCPU)
184 		return rt6_pcpu_cow_metrics(rt);
185 	else if (rt->rt6i_flags & RTF_CACHE)
186 		return NULL;
187 	else
188 		return dst_cow_metrics_generic(dst, old);
189 }
190 
191 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
192 					     struct sk_buff *skb,
193 					     const void *daddr)
194 {
195 	struct in6_addr *p = &rt->rt6i_gateway;
196 
197 	if (!ipv6_addr_any(p))
198 		return (const void *) p;
199 	else if (skb)
200 		return &ipv6_hdr(skb)->daddr;
201 	return daddr;
202 }
203 
204 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
205 					  struct sk_buff *skb,
206 					  const void *daddr)
207 {
208 	struct rt6_info *rt = (struct rt6_info *) dst;
209 	struct neighbour *n;
210 
211 	daddr = choose_neigh_daddr(rt, skb, daddr);
212 	n = __ipv6_neigh_lookup(dst->dev, daddr);
213 	if (n)
214 		return n;
215 	return neigh_create(&nd_tbl, daddr, dst->dev);
216 }
217 
218 static struct dst_ops ip6_dst_ops_template = {
219 	.family			=	AF_INET6,
220 	.gc			=	ip6_dst_gc,
221 	.gc_thresh		=	1024,
222 	.check			=	ip6_dst_check,
223 	.default_advmss		=	ip6_default_advmss,
224 	.mtu			=	ip6_mtu,
225 	.cow_metrics		=	ipv6_cow_metrics,
226 	.destroy		=	ip6_dst_destroy,
227 	.ifdown			=	ip6_dst_ifdown,
228 	.negative_advice	=	ip6_negative_advice,
229 	.link_failure		=	ip6_link_failure,
230 	.update_pmtu		=	ip6_rt_update_pmtu,
231 	.redirect		=	rt6_do_redirect,
232 	.local_out		=	__ip6_local_out,
233 	.neigh_lookup		=	ip6_neigh_lookup,
234 };
235 
236 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
237 {
238 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
239 
240 	return mtu ? : dst->dev->mtu;
241 }
242 
243 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
244 					 struct sk_buff *skb, u32 mtu)
245 {
246 }
247 
248 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
249 				      struct sk_buff *skb)
250 {
251 }
252 
253 static struct dst_ops ip6_dst_blackhole_ops = {
254 	.family			=	AF_INET6,
255 	.destroy		=	ip6_dst_destroy,
256 	.check			=	ip6_dst_check,
257 	.mtu			=	ip6_blackhole_mtu,
258 	.default_advmss		=	ip6_default_advmss,
259 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
260 	.redirect		=	ip6_rt_blackhole_redirect,
261 	.cow_metrics		=	dst_cow_metrics_generic,
262 	.neigh_lookup		=	ip6_neigh_lookup,
263 };
264 
265 static const u32 ip6_template_metrics[RTAX_MAX] = {
266 	[RTAX_HOPLIMIT - 1] = 0,
267 };
268 
269 static const struct rt6_info ip6_null_entry_template = {
270 	.dst = {
271 		.__refcnt	= ATOMIC_INIT(1),
272 		.__use		= 1,
273 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
274 		.error		= -ENETUNREACH,
275 		.input		= ip6_pkt_discard,
276 		.output		= ip6_pkt_discard_out,
277 	},
278 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
279 	.rt6i_protocol  = RTPROT_KERNEL,
280 	.rt6i_metric	= ~(u32) 0,
281 	.rt6i_ref	= ATOMIC_INIT(1),
282 };
283 
284 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
285 
286 static const struct rt6_info ip6_prohibit_entry_template = {
287 	.dst = {
288 		.__refcnt	= ATOMIC_INIT(1),
289 		.__use		= 1,
290 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
291 		.error		= -EACCES,
292 		.input		= ip6_pkt_prohibit,
293 		.output		= ip6_pkt_prohibit_out,
294 	},
295 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
296 	.rt6i_protocol  = RTPROT_KERNEL,
297 	.rt6i_metric	= ~(u32) 0,
298 	.rt6i_ref	= ATOMIC_INIT(1),
299 };
300 
301 static const struct rt6_info ip6_blk_hole_entry_template = {
302 	.dst = {
303 		.__refcnt	= ATOMIC_INIT(1),
304 		.__use		= 1,
305 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
306 		.error		= -EINVAL,
307 		.input		= dst_discard,
308 		.output		= dst_discard_out,
309 	},
310 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
311 	.rt6i_protocol  = RTPROT_KERNEL,
312 	.rt6i_metric	= ~(u32) 0,
313 	.rt6i_ref	= ATOMIC_INIT(1),
314 };
315 
316 #endif
317 
318 static void rt6_info_init(struct rt6_info *rt)
319 {
320 	struct dst_entry *dst = &rt->dst;
321 
322 	memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
323 	INIT_LIST_HEAD(&rt->rt6i_siblings);
324 	INIT_LIST_HEAD(&rt->rt6i_uncached);
325 }
326 
327 /* allocate dst with ip6_dst_ops */
328 static struct rt6_info *__ip6_dst_alloc(struct net *net,
329 					struct net_device *dev,
330 					int flags)
331 {
332 	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
333 					0, DST_OBSOLETE_FORCE_CHK, flags);
334 
335 	if (rt)
336 		rt6_info_init(rt);
337 
338 	return rt;
339 }
340 
341 static struct rt6_info *ip6_dst_alloc(struct net *net,
342 				      struct net_device *dev,
343 				      int flags)
344 {
345 	struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
346 
347 	if (rt) {
348 		rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
349 		if (rt->rt6i_pcpu) {
350 			int cpu;
351 
352 			for_each_possible_cpu(cpu) {
353 				struct rt6_info **p;
354 
355 				p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
356 				/* no one shares rt */
357 				*p =  NULL;
358 			}
359 		} else {
360 			dst_destroy((struct dst_entry *)rt);
361 			return NULL;
362 		}
363 	}
364 
365 	return rt;
366 }
367 
368 static void ip6_dst_destroy(struct dst_entry *dst)
369 {
370 	struct rt6_info *rt = (struct rt6_info *)dst;
371 	struct dst_entry *from = dst->from;
372 	struct inet6_dev *idev;
373 
374 	dst_destroy_metrics_generic(dst);
375 	free_percpu(rt->rt6i_pcpu);
376 	rt6_uncached_list_del(rt);
377 
378 	idev = rt->rt6i_idev;
379 	if (idev) {
380 		rt->rt6i_idev = NULL;
381 		in6_dev_put(idev);
382 	}
383 
384 	dst->from = NULL;
385 	dst_release(from);
386 }
387 
388 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
389 			   int how)
390 {
391 	struct rt6_info *rt = (struct rt6_info *)dst;
392 	struct inet6_dev *idev = rt->rt6i_idev;
393 	struct net_device *loopback_dev =
394 		dev_net(dev)->loopback_dev;
395 
396 	if (dev != loopback_dev) {
397 		if (idev && idev->dev == dev) {
398 			struct inet6_dev *loopback_idev =
399 				in6_dev_get(loopback_dev);
400 			if (loopback_idev) {
401 				rt->rt6i_idev = loopback_idev;
402 				in6_dev_put(idev);
403 			}
404 		}
405 	}
406 }
407 
408 static bool __rt6_check_expired(const struct rt6_info *rt)
409 {
410 	if (rt->rt6i_flags & RTF_EXPIRES)
411 		return time_after(jiffies, rt->dst.expires);
412 	else
413 		return false;
414 }
415 
416 static bool rt6_check_expired(const struct rt6_info *rt)
417 {
418 	if (rt->rt6i_flags & RTF_EXPIRES) {
419 		if (time_after(jiffies, rt->dst.expires))
420 			return true;
421 	} else if (rt->dst.from) {
422 		return rt6_check_expired((struct rt6_info *) rt->dst.from);
423 	}
424 	return false;
425 }
426 
427 /* Multipath route selection:
428  *   Hash based function using packet header and flowlabel.
429  * Adapted from fib_info_hashfn()
430  */
431 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
432 			       const struct flowi6 *fl6)
433 {
434 	return get_hash_from_flowi6(fl6) % candidate_count;
435 }
436 
437 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
438 					     struct flowi6 *fl6, int oif,
439 					     int strict)
440 {
441 	struct rt6_info *sibling, *next_sibling;
442 	int route_choosen;
443 
444 	route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
445 	/* Don't change the route, if route_choosen == 0
446 	 * (siblings does not include ourself)
447 	 */
448 	if (route_choosen)
449 		list_for_each_entry_safe(sibling, next_sibling,
450 				&match->rt6i_siblings, rt6i_siblings) {
451 			route_choosen--;
452 			if (route_choosen == 0) {
453 				if (rt6_score_route(sibling, oif, strict) < 0)
454 					break;
455 				match = sibling;
456 				break;
457 			}
458 		}
459 	return match;
460 }
461 
462 /*
463  *	Route lookup. Any table->tb6_lock is implied.
464  */
465 
466 static inline struct rt6_info *rt6_device_match(struct net *net,
467 						    struct rt6_info *rt,
468 						    const struct in6_addr *saddr,
469 						    int oif,
470 						    int flags)
471 {
472 	struct rt6_info *local = NULL;
473 	struct rt6_info *sprt;
474 
475 	if (!oif && ipv6_addr_any(saddr))
476 		goto out;
477 
478 	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
479 		struct net_device *dev = sprt->dst.dev;
480 
481 		if (oif) {
482 			if (dev->ifindex == oif)
483 				return sprt;
484 			if (dev->flags & IFF_LOOPBACK) {
485 				if (!sprt->rt6i_idev ||
486 				    sprt->rt6i_idev->dev->ifindex != oif) {
487 					if (flags & RT6_LOOKUP_F_IFACE)
488 						continue;
489 					if (local &&
490 					    local->rt6i_idev->dev->ifindex == oif)
491 						continue;
492 				}
493 				local = sprt;
494 			}
495 		} else {
496 			if (ipv6_chk_addr(net, saddr, dev,
497 					  flags & RT6_LOOKUP_F_IFACE))
498 				return sprt;
499 		}
500 	}
501 
502 	if (oif) {
503 		if (local)
504 			return local;
505 
506 		if (flags & RT6_LOOKUP_F_IFACE)
507 			return net->ipv6.ip6_null_entry;
508 	}
509 out:
510 	return rt;
511 }
512 
513 #ifdef CONFIG_IPV6_ROUTER_PREF
514 struct __rt6_probe_work {
515 	struct work_struct work;
516 	struct in6_addr target;
517 	struct net_device *dev;
518 };
519 
520 static void rt6_probe_deferred(struct work_struct *w)
521 {
522 	struct in6_addr mcaddr;
523 	struct __rt6_probe_work *work =
524 		container_of(w, struct __rt6_probe_work, work);
525 
526 	addrconf_addr_solict_mult(&work->target, &mcaddr);
527 	ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL);
528 	dev_put(work->dev);
529 	kfree(work);
530 }
531 
532 static void rt6_probe(struct rt6_info *rt)
533 {
534 	struct __rt6_probe_work *work;
535 	struct neighbour *neigh;
536 	/*
537 	 * Okay, this does not seem to be appropriate
538 	 * for now, however, we need to check if it
539 	 * is really so; aka Router Reachability Probing.
540 	 *
541 	 * Router Reachability Probe MUST be rate-limited
542 	 * to no more than one per minute.
543 	 */
544 	if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
545 		return;
546 	rcu_read_lock_bh();
547 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
548 	if (neigh) {
549 		if (neigh->nud_state & NUD_VALID)
550 			goto out;
551 
552 		work = NULL;
553 		write_lock(&neigh->lock);
554 		if (!(neigh->nud_state & NUD_VALID) &&
555 		    time_after(jiffies,
556 			       neigh->updated +
557 			       rt->rt6i_idev->cnf.rtr_probe_interval)) {
558 			work = kmalloc(sizeof(*work), GFP_ATOMIC);
559 			if (work)
560 				__neigh_set_probe_once(neigh);
561 		}
562 		write_unlock(&neigh->lock);
563 	} else {
564 		work = kmalloc(sizeof(*work), GFP_ATOMIC);
565 	}
566 
567 	if (work) {
568 		INIT_WORK(&work->work, rt6_probe_deferred);
569 		work->target = rt->rt6i_gateway;
570 		dev_hold(rt->dst.dev);
571 		work->dev = rt->dst.dev;
572 		schedule_work(&work->work);
573 	}
574 
575 out:
576 	rcu_read_unlock_bh();
577 }
578 #else
579 static inline void rt6_probe(struct rt6_info *rt)
580 {
581 }
582 #endif
583 
584 /*
585  * Default Router Selection (RFC 2461 6.3.6)
586  */
587 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
588 {
589 	struct net_device *dev = rt->dst.dev;
590 	if (!oif || dev->ifindex == oif)
591 		return 2;
592 	if ((dev->flags & IFF_LOOPBACK) &&
593 	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
594 		return 1;
595 	return 0;
596 }
597 
598 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
599 {
600 	struct neighbour *neigh;
601 	enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
602 
603 	if (rt->rt6i_flags & RTF_NONEXTHOP ||
604 	    !(rt->rt6i_flags & RTF_GATEWAY))
605 		return RT6_NUD_SUCCEED;
606 
607 	rcu_read_lock_bh();
608 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
609 	if (neigh) {
610 		read_lock(&neigh->lock);
611 		if (neigh->nud_state & NUD_VALID)
612 			ret = RT6_NUD_SUCCEED;
613 #ifdef CONFIG_IPV6_ROUTER_PREF
614 		else if (!(neigh->nud_state & NUD_FAILED))
615 			ret = RT6_NUD_SUCCEED;
616 		else
617 			ret = RT6_NUD_FAIL_PROBE;
618 #endif
619 		read_unlock(&neigh->lock);
620 	} else {
621 		ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
622 		      RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
623 	}
624 	rcu_read_unlock_bh();
625 
626 	return ret;
627 }
628 
629 static int rt6_score_route(struct rt6_info *rt, int oif,
630 			   int strict)
631 {
632 	int m;
633 
634 	m = rt6_check_dev(rt, oif);
635 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
636 		return RT6_NUD_FAIL_HARD;
637 #ifdef CONFIG_IPV6_ROUTER_PREF
638 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
639 #endif
640 	if (strict & RT6_LOOKUP_F_REACHABLE) {
641 		int n = rt6_check_neigh(rt);
642 		if (n < 0)
643 			return n;
644 	}
645 	return m;
646 }
647 
648 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
649 				   int *mpri, struct rt6_info *match,
650 				   bool *do_rr)
651 {
652 	int m;
653 	bool match_do_rr = false;
654 	struct inet6_dev *idev = rt->rt6i_idev;
655 	struct net_device *dev = rt->dst.dev;
656 
657 	if (dev && !netif_carrier_ok(dev) &&
658 	    idev->cnf.ignore_routes_with_linkdown)
659 		goto out;
660 
661 	if (rt6_check_expired(rt))
662 		goto out;
663 
664 	m = rt6_score_route(rt, oif, strict);
665 	if (m == RT6_NUD_FAIL_DO_RR) {
666 		match_do_rr = true;
667 		m = 0; /* lowest valid score */
668 	} else if (m == RT6_NUD_FAIL_HARD) {
669 		goto out;
670 	}
671 
672 	if (strict & RT6_LOOKUP_F_REACHABLE)
673 		rt6_probe(rt);
674 
675 	/* note that m can be RT6_NUD_FAIL_PROBE at this point */
676 	if (m > *mpri) {
677 		*do_rr = match_do_rr;
678 		*mpri = m;
679 		match = rt;
680 	}
681 out:
682 	return match;
683 }
684 
685 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
686 				     struct rt6_info *rr_head,
687 				     u32 metric, int oif, int strict,
688 				     bool *do_rr)
689 {
690 	struct rt6_info *rt, *match, *cont;
691 	int mpri = -1;
692 
693 	match = NULL;
694 	cont = NULL;
695 	for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
696 		if (rt->rt6i_metric != metric) {
697 			cont = rt;
698 			break;
699 		}
700 
701 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
702 	}
703 
704 	for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
705 		if (rt->rt6i_metric != metric) {
706 			cont = rt;
707 			break;
708 		}
709 
710 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
711 	}
712 
713 	if (match || !cont)
714 		return match;
715 
716 	for (rt = cont; rt; rt = rt->dst.rt6_next)
717 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
718 
719 	return match;
720 }
721 
722 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
723 {
724 	struct rt6_info *match, *rt0;
725 	struct net *net;
726 	bool do_rr = false;
727 
728 	rt0 = fn->rr_ptr;
729 	if (!rt0)
730 		fn->rr_ptr = rt0 = fn->leaf;
731 
732 	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
733 			     &do_rr);
734 
735 	if (do_rr) {
736 		struct rt6_info *next = rt0->dst.rt6_next;
737 
738 		/* no entries matched; do round-robin */
739 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
740 			next = fn->leaf;
741 
742 		if (next != rt0)
743 			fn->rr_ptr = next;
744 	}
745 
746 	net = dev_net(rt0->dst.dev);
747 	return match ? match : net->ipv6.ip6_null_entry;
748 }
749 
750 static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
751 {
752 	return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
753 }
754 
755 #ifdef CONFIG_IPV6_ROUTE_INFO
756 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
757 		  const struct in6_addr *gwaddr)
758 {
759 	struct net *net = dev_net(dev);
760 	struct route_info *rinfo = (struct route_info *) opt;
761 	struct in6_addr prefix_buf, *prefix;
762 	unsigned int pref;
763 	unsigned long lifetime;
764 	struct rt6_info *rt;
765 
766 	if (len < sizeof(struct route_info)) {
767 		return -EINVAL;
768 	}
769 
770 	/* Sanity check for prefix_len and length */
771 	if (rinfo->length > 3) {
772 		return -EINVAL;
773 	} else if (rinfo->prefix_len > 128) {
774 		return -EINVAL;
775 	} else if (rinfo->prefix_len > 64) {
776 		if (rinfo->length < 2) {
777 			return -EINVAL;
778 		}
779 	} else if (rinfo->prefix_len > 0) {
780 		if (rinfo->length < 1) {
781 			return -EINVAL;
782 		}
783 	}
784 
785 	pref = rinfo->route_pref;
786 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
787 		return -EINVAL;
788 
789 	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
790 
791 	if (rinfo->length == 3)
792 		prefix = (struct in6_addr *)rinfo->prefix;
793 	else {
794 		/* this function is safe */
795 		ipv6_addr_prefix(&prefix_buf,
796 				 (struct in6_addr *)rinfo->prefix,
797 				 rinfo->prefix_len);
798 		prefix = &prefix_buf;
799 	}
800 
801 	if (rinfo->prefix_len == 0)
802 		rt = rt6_get_dflt_router(gwaddr, dev);
803 	else
804 		rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
805 					gwaddr, dev->ifindex);
806 
807 	if (rt && !lifetime) {
808 		ip6_del_rt(rt);
809 		rt = NULL;
810 	}
811 
812 	if (!rt && lifetime)
813 		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
814 					pref);
815 	else if (rt)
816 		rt->rt6i_flags = RTF_ROUTEINFO |
817 				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
818 
819 	if (rt) {
820 		if (!addrconf_finite_timeout(lifetime))
821 			rt6_clean_expires(rt);
822 		else
823 			rt6_set_expires(rt, jiffies + HZ * lifetime);
824 
825 		ip6_rt_put(rt);
826 	}
827 	return 0;
828 }
829 #endif
830 
831 static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
832 					struct in6_addr *saddr)
833 {
834 	struct fib6_node *pn;
835 	while (1) {
836 		if (fn->fn_flags & RTN_TL_ROOT)
837 			return NULL;
838 		pn = fn->parent;
839 		if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
840 			fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
841 		else
842 			fn = pn;
843 		if (fn->fn_flags & RTN_RTINFO)
844 			return fn;
845 	}
846 }
847 
848 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
849 					     struct fib6_table *table,
850 					     struct flowi6 *fl6, int flags)
851 {
852 	struct fib6_node *fn;
853 	struct rt6_info *rt;
854 
855 	read_lock_bh(&table->tb6_lock);
856 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
857 restart:
858 	rt = fn->leaf;
859 	rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
860 	if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
861 		rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
862 	if (rt == net->ipv6.ip6_null_entry) {
863 		fn = fib6_backtrack(fn, &fl6->saddr);
864 		if (fn)
865 			goto restart;
866 	}
867 	dst_use(&rt->dst, jiffies);
868 	read_unlock_bh(&table->tb6_lock);
869 
870 	trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
871 
872 	return rt;
873 
874 }
875 
876 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
877 				    int flags)
878 {
879 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
880 }
881 EXPORT_SYMBOL_GPL(ip6_route_lookup);
882 
883 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
884 			    const struct in6_addr *saddr, int oif, int strict)
885 {
886 	struct flowi6 fl6 = {
887 		.flowi6_oif = oif,
888 		.daddr = *daddr,
889 	};
890 	struct dst_entry *dst;
891 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
892 
893 	if (saddr) {
894 		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
895 		flags |= RT6_LOOKUP_F_HAS_SADDR;
896 	}
897 
898 	dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
899 	if (dst->error == 0)
900 		return (struct rt6_info *) dst;
901 
902 	dst_release(dst);
903 
904 	return NULL;
905 }
906 EXPORT_SYMBOL(rt6_lookup);
907 
908 /* ip6_ins_rt is called with FREE table->tb6_lock.
909    It takes new route entry, the addition fails by any reason the
910    route is freed. In any case, if caller does not hold it, it may
911    be destroyed.
912  */
913 
914 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
915 			struct mx6_config *mxc)
916 {
917 	int err;
918 	struct fib6_table *table;
919 
920 	table = rt->rt6i_table;
921 	write_lock_bh(&table->tb6_lock);
922 	err = fib6_add(&table->tb6_root, rt, info, mxc);
923 	write_unlock_bh(&table->tb6_lock);
924 
925 	return err;
926 }
927 
928 int ip6_ins_rt(struct rt6_info *rt)
929 {
930 	struct nl_info info = {	.nl_net = dev_net(rt->dst.dev), };
931 	struct mx6_config mxc = { .mx = NULL, };
932 
933 	return __ip6_ins_rt(rt, &info, &mxc);
934 }
935 
936 static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
937 					   const struct in6_addr *daddr,
938 					   const struct in6_addr *saddr)
939 {
940 	struct rt6_info *rt;
941 
942 	/*
943 	 *	Clone the route.
944 	 */
945 
946 	if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
947 		ort = (struct rt6_info *)ort->dst.from;
948 
949 	rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0);
950 
951 	if (!rt)
952 		return NULL;
953 
954 	ip6_rt_copy_init(rt, ort);
955 	rt->rt6i_flags |= RTF_CACHE;
956 	rt->rt6i_metric = 0;
957 	rt->dst.flags |= DST_HOST;
958 	rt->rt6i_dst.addr = *daddr;
959 	rt->rt6i_dst.plen = 128;
960 
961 	if (!rt6_is_gw_or_nonexthop(ort)) {
962 		if (ort->rt6i_dst.plen != 128 &&
963 		    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
964 			rt->rt6i_flags |= RTF_ANYCAST;
965 #ifdef CONFIG_IPV6_SUBTREES
966 		if (rt->rt6i_src.plen && saddr) {
967 			rt->rt6i_src.addr = *saddr;
968 			rt->rt6i_src.plen = 128;
969 		}
970 #endif
971 	}
972 
973 	return rt;
974 }
975 
976 static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
977 {
978 	struct rt6_info *pcpu_rt;
979 
980 	pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
981 				  rt->dst.dev, rt->dst.flags);
982 
983 	if (!pcpu_rt)
984 		return NULL;
985 	ip6_rt_copy_init(pcpu_rt, rt);
986 	pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
987 	pcpu_rt->rt6i_flags |= RTF_PCPU;
988 	return pcpu_rt;
989 }
990 
991 /* It should be called with read_lock_bh(&tb6_lock) acquired */
992 static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
993 {
994 	struct rt6_info *pcpu_rt, **p;
995 
996 	p = this_cpu_ptr(rt->rt6i_pcpu);
997 	pcpu_rt = *p;
998 
999 	if (pcpu_rt) {
1000 		dst_hold(&pcpu_rt->dst);
1001 		rt6_dst_from_metrics_check(pcpu_rt);
1002 	}
1003 	return pcpu_rt;
1004 }
1005 
1006 static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
1007 {
1008 	struct fib6_table *table = rt->rt6i_table;
1009 	struct rt6_info *pcpu_rt, *prev, **p;
1010 
1011 	pcpu_rt = ip6_rt_pcpu_alloc(rt);
1012 	if (!pcpu_rt) {
1013 		struct net *net = dev_net(rt->dst.dev);
1014 
1015 		dst_hold(&net->ipv6.ip6_null_entry->dst);
1016 		return net->ipv6.ip6_null_entry;
1017 	}
1018 
1019 	read_lock_bh(&table->tb6_lock);
1020 	if (rt->rt6i_pcpu) {
1021 		p = this_cpu_ptr(rt->rt6i_pcpu);
1022 		prev = cmpxchg(p, NULL, pcpu_rt);
1023 		if (prev) {
1024 			/* If someone did it before us, return prev instead */
1025 			dst_destroy(&pcpu_rt->dst);
1026 			pcpu_rt = prev;
1027 		}
1028 	} else {
1029 		/* rt has been removed from the fib6 tree
1030 		 * before we have a chance to acquire the read_lock.
1031 		 * In this case, don't brother to create a pcpu rt
1032 		 * since rt is going away anyway.  The next
1033 		 * dst_check() will trigger a re-lookup.
1034 		 */
1035 		dst_destroy(&pcpu_rt->dst);
1036 		pcpu_rt = rt;
1037 	}
1038 	dst_hold(&pcpu_rt->dst);
1039 	rt6_dst_from_metrics_check(pcpu_rt);
1040 	read_unlock_bh(&table->tb6_lock);
1041 	return pcpu_rt;
1042 }
1043 
1044 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
1045 				      struct flowi6 *fl6, int flags)
1046 {
1047 	struct fib6_node *fn, *saved_fn;
1048 	struct rt6_info *rt;
1049 	int strict = 0;
1050 
1051 	strict |= flags & RT6_LOOKUP_F_IFACE;
1052 	if (net->ipv6.devconf_all->forwarding == 0)
1053 		strict |= RT6_LOOKUP_F_REACHABLE;
1054 
1055 	read_lock_bh(&table->tb6_lock);
1056 
1057 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1058 	saved_fn = fn;
1059 
1060 	if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1061 		oif = 0;
1062 
1063 redo_rt6_select:
1064 	rt = rt6_select(fn, oif, strict);
1065 	if (rt->rt6i_nsiblings)
1066 		rt = rt6_multipath_select(rt, fl6, oif, strict);
1067 	if (rt == net->ipv6.ip6_null_entry) {
1068 		fn = fib6_backtrack(fn, &fl6->saddr);
1069 		if (fn)
1070 			goto redo_rt6_select;
1071 		else if (strict & RT6_LOOKUP_F_REACHABLE) {
1072 			/* also consider unreachable route */
1073 			strict &= ~RT6_LOOKUP_F_REACHABLE;
1074 			fn = saved_fn;
1075 			goto redo_rt6_select;
1076 		}
1077 	}
1078 
1079 
1080 	if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
1081 		dst_use(&rt->dst, jiffies);
1082 		read_unlock_bh(&table->tb6_lock);
1083 
1084 		rt6_dst_from_metrics_check(rt);
1085 
1086 		trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
1087 		return rt;
1088 	} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1089 			    !(rt->rt6i_flags & RTF_GATEWAY))) {
1090 		/* Create a RTF_CACHE clone which will not be
1091 		 * owned by the fib6 tree.  It is for the special case where
1092 		 * the daddr in the skb during the neighbor look-up is different
1093 		 * from the fl6->daddr used to look-up route here.
1094 		 */
1095 
1096 		struct rt6_info *uncached_rt;
1097 
1098 		dst_use(&rt->dst, jiffies);
1099 		read_unlock_bh(&table->tb6_lock);
1100 
1101 		uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1102 		dst_release(&rt->dst);
1103 
1104 		if (uncached_rt)
1105 			rt6_uncached_list_add(uncached_rt);
1106 		else
1107 			uncached_rt = net->ipv6.ip6_null_entry;
1108 
1109 		dst_hold(&uncached_rt->dst);
1110 
1111 		trace_fib6_table_lookup(net, uncached_rt, table->tb6_id, fl6);
1112 		return uncached_rt;
1113 
1114 	} else {
1115 		/* Get a percpu copy */
1116 
1117 		struct rt6_info *pcpu_rt;
1118 
1119 		rt->dst.lastuse = jiffies;
1120 		rt->dst.__use++;
1121 		pcpu_rt = rt6_get_pcpu_route(rt);
1122 
1123 		if (pcpu_rt) {
1124 			read_unlock_bh(&table->tb6_lock);
1125 		} else {
1126 			/* We have to do the read_unlock first
1127 			 * because rt6_make_pcpu_route() may trigger
1128 			 * ip6_dst_gc() which will take the write_lock.
1129 			 */
1130 			dst_hold(&rt->dst);
1131 			read_unlock_bh(&table->tb6_lock);
1132 			pcpu_rt = rt6_make_pcpu_route(rt);
1133 			dst_release(&rt->dst);
1134 		}
1135 
1136 		trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6);
1137 		return pcpu_rt;
1138 
1139 	}
1140 }
1141 
1142 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
1143 					    struct flowi6 *fl6, int flags)
1144 {
1145 	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
1146 }
1147 
1148 static struct dst_entry *ip6_route_input_lookup(struct net *net,
1149 						struct net_device *dev,
1150 						struct flowi6 *fl6, int flags)
1151 {
1152 	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1153 		flags |= RT6_LOOKUP_F_IFACE;
1154 
1155 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1156 }
1157 
1158 void ip6_route_input(struct sk_buff *skb)
1159 {
1160 	const struct ipv6hdr *iph = ipv6_hdr(skb);
1161 	struct net *net = dev_net(skb->dev);
1162 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1163 	struct ip_tunnel_info *tun_info;
1164 	struct flowi6 fl6 = {
1165 		.flowi6_iif = l3mdev_fib_oif(skb->dev),
1166 		.daddr = iph->daddr,
1167 		.saddr = iph->saddr,
1168 		.flowlabel = ip6_flowinfo(iph),
1169 		.flowi6_mark = skb->mark,
1170 		.flowi6_proto = iph->nexthdr,
1171 	};
1172 
1173 	tun_info = skb_tunnel_info(skb);
1174 	if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
1175 		fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
1176 	skb_dst_drop(skb);
1177 	skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1178 }
1179 
1180 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1181 					     struct flowi6 *fl6, int flags)
1182 {
1183 	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1184 }
1185 
1186 struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
1187 				    struct flowi6 *fl6)
1188 {
1189 	struct dst_entry *dst;
1190 	int flags = 0;
1191 	bool any_src;
1192 
1193 	dst = l3mdev_rt6_dst_by_oif(net, fl6);
1194 	if (dst)
1195 		return dst;
1196 
1197 	fl6->flowi6_iif = LOOPBACK_IFINDEX;
1198 
1199 	any_src = ipv6_addr_any(&fl6->saddr);
1200 	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
1201 	    (fl6->flowi6_oif && any_src))
1202 		flags |= RT6_LOOKUP_F_IFACE;
1203 
1204 	if (!any_src)
1205 		flags |= RT6_LOOKUP_F_HAS_SADDR;
1206 	else if (sk)
1207 		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1208 
1209 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1210 }
1211 EXPORT_SYMBOL(ip6_route_output);
1212 
1213 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1214 {
1215 	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1216 	struct dst_entry *new = NULL;
1217 
1218 	rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1219 	if (rt) {
1220 		rt6_info_init(rt);
1221 
1222 		new = &rt->dst;
1223 		new->__use = 1;
1224 		new->input = dst_discard;
1225 		new->output = dst_discard_out;
1226 
1227 		dst_copy_metrics(new, &ort->dst);
1228 		rt->rt6i_idev = ort->rt6i_idev;
1229 		if (rt->rt6i_idev)
1230 			in6_dev_hold(rt->rt6i_idev);
1231 
1232 		rt->rt6i_gateway = ort->rt6i_gateway;
1233 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
1234 		rt->rt6i_metric = 0;
1235 
1236 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1237 #ifdef CONFIG_IPV6_SUBTREES
1238 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1239 #endif
1240 
1241 		dst_free(new);
1242 	}
1243 
1244 	dst_release(dst_orig);
1245 	return new ? new : ERR_PTR(-ENOMEM);
1246 }
1247 
1248 /*
1249  *	Destination cache support functions
1250  */
1251 
1252 static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1253 {
1254 	if (rt->dst.from &&
1255 	    dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1256 		dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1257 }
1258 
1259 static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1260 {
1261 	if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1262 		return NULL;
1263 
1264 	if (rt6_check_expired(rt))
1265 		return NULL;
1266 
1267 	return &rt->dst;
1268 }
1269 
1270 static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1271 {
1272 	if (!__rt6_check_expired(rt) &&
1273 	    rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1274 	    rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1275 		return &rt->dst;
1276 	else
1277 		return NULL;
1278 }
1279 
1280 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1281 {
1282 	struct rt6_info *rt;
1283 
1284 	rt = (struct rt6_info *) dst;
1285 
1286 	/* All IPV6 dsts are created with ->obsolete set to the value
1287 	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1288 	 * into this function always.
1289 	 */
1290 
1291 	rt6_dst_from_metrics_check(rt);
1292 
1293 	if (rt->rt6i_flags & RTF_PCPU ||
1294 	    (unlikely(dst->flags & DST_NOCACHE) && rt->dst.from))
1295 		return rt6_dst_from_check(rt, cookie);
1296 	else
1297 		return rt6_check(rt, cookie);
1298 }
1299 
1300 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1301 {
1302 	struct rt6_info *rt = (struct rt6_info *) dst;
1303 
1304 	if (rt) {
1305 		if (rt->rt6i_flags & RTF_CACHE) {
1306 			if (rt6_check_expired(rt)) {
1307 				ip6_del_rt(rt);
1308 				dst = NULL;
1309 			}
1310 		} else {
1311 			dst_release(dst);
1312 			dst = NULL;
1313 		}
1314 	}
1315 	return dst;
1316 }
1317 
1318 static void ip6_link_failure(struct sk_buff *skb)
1319 {
1320 	struct rt6_info *rt;
1321 
1322 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1323 
1324 	rt = (struct rt6_info *) skb_dst(skb);
1325 	if (rt) {
1326 		if (rt->rt6i_flags & RTF_CACHE) {
1327 			dst_hold(&rt->dst);
1328 			ip6_del_rt(rt);
1329 		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1330 			rt->rt6i_node->fn_sernum = -1;
1331 		}
1332 	}
1333 }
1334 
1335 static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1336 {
1337 	struct net *net = dev_net(rt->dst.dev);
1338 
1339 	rt->rt6i_flags |= RTF_MODIFIED;
1340 	rt->rt6i_pmtu = mtu;
1341 	rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1342 }
1343 
1344 static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
1345 {
1346 	return !(rt->rt6i_flags & RTF_CACHE) &&
1347 		(rt->rt6i_flags & RTF_PCPU || rt->rt6i_node);
1348 }
1349 
1350 static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1351 				 const struct ipv6hdr *iph, u32 mtu)
1352 {
1353 	struct rt6_info *rt6 = (struct rt6_info *)dst;
1354 
1355 	if (rt6->rt6i_flags & RTF_LOCAL)
1356 		return;
1357 
1358 	dst_confirm(dst);
1359 	mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1360 	if (mtu >= dst_mtu(dst))
1361 		return;
1362 
1363 	if (!rt6_cache_allowed_for_pmtu(rt6)) {
1364 		rt6_do_update_pmtu(rt6, mtu);
1365 	} else {
1366 		const struct in6_addr *daddr, *saddr;
1367 		struct rt6_info *nrt6;
1368 
1369 		if (iph) {
1370 			daddr = &iph->daddr;
1371 			saddr = &iph->saddr;
1372 		} else if (sk) {
1373 			daddr = &sk->sk_v6_daddr;
1374 			saddr = &inet6_sk(sk)->saddr;
1375 		} else {
1376 			return;
1377 		}
1378 		nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1379 		if (nrt6) {
1380 			rt6_do_update_pmtu(nrt6, mtu);
1381 
1382 			/* ip6_ins_rt(nrt6) will bump the
1383 			 * rt6->rt6i_node->fn_sernum
1384 			 * which will fail the next rt6_check() and
1385 			 * invalidate the sk->sk_dst_cache.
1386 			 */
1387 			ip6_ins_rt(nrt6);
1388 		}
1389 	}
1390 }
1391 
1392 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1393 			       struct sk_buff *skb, u32 mtu)
1394 {
1395 	__ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
1396 }
1397 
1398 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1399 		     int oif, u32 mark)
1400 {
1401 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1402 	struct dst_entry *dst;
1403 	struct flowi6 fl6;
1404 
1405 	memset(&fl6, 0, sizeof(fl6));
1406 	fl6.flowi6_oif = oif;
1407 	fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
1408 	fl6.daddr = iph->daddr;
1409 	fl6.saddr = iph->saddr;
1410 	fl6.flowlabel = ip6_flowinfo(iph);
1411 
1412 	dst = ip6_route_output(net, NULL, &fl6);
1413 	if (!dst->error)
1414 		__ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
1415 	dst_release(dst);
1416 }
1417 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1418 
1419 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1420 {
1421 	ip6_update_pmtu(skb, sock_net(sk), mtu,
1422 			sk->sk_bound_dev_if, sk->sk_mark);
1423 }
1424 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1425 
1426 /* Handle redirects */
1427 struct ip6rd_flowi {
1428 	struct flowi6 fl6;
1429 	struct in6_addr gateway;
1430 };
1431 
1432 static struct rt6_info *__ip6_route_redirect(struct net *net,
1433 					     struct fib6_table *table,
1434 					     struct flowi6 *fl6,
1435 					     int flags)
1436 {
1437 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1438 	struct rt6_info *rt;
1439 	struct fib6_node *fn;
1440 
1441 	/* Get the "current" route for this destination and
1442 	 * check if the redirect has come from approriate router.
1443 	 *
1444 	 * RFC 4861 specifies that redirects should only be
1445 	 * accepted if they come from the nexthop to the target.
1446 	 * Due to the way the routes are chosen, this notion
1447 	 * is a bit fuzzy and one might need to check all possible
1448 	 * routes.
1449 	 */
1450 
1451 	read_lock_bh(&table->tb6_lock);
1452 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1453 restart:
1454 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1455 		if (rt6_check_expired(rt))
1456 			continue;
1457 		if (rt->dst.error)
1458 			break;
1459 		if (!(rt->rt6i_flags & RTF_GATEWAY))
1460 			continue;
1461 		if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1462 			continue;
1463 		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1464 			continue;
1465 		break;
1466 	}
1467 
1468 	if (!rt)
1469 		rt = net->ipv6.ip6_null_entry;
1470 	else if (rt->dst.error) {
1471 		rt = net->ipv6.ip6_null_entry;
1472 		goto out;
1473 	}
1474 
1475 	if (rt == net->ipv6.ip6_null_entry) {
1476 		fn = fib6_backtrack(fn, &fl6->saddr);
1477 		if (fn)
1478 			goto restart;
1479 	}
1480 
1481 out:
1482 	dst_hold(&rt->dst);
1483 
1484 	read_unlock_bh(&table->tb6_lock);
1485 
1486 	trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
1487 	return rt;
1488 };
1489 
1490 static struct dst_entry *ip6_route_redirect(struct net *net,
1491 					const struct flowi6 *fl6,
1492 					const struct in6_addr *gateway)
1493 {
1494 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1495 	struct ip6rd_flowi rdfl;
1496 
1497 	rdfl.fl6 = *fl6;
1498 	rdfl.gateway = *gateway;
1499 
1500 	return fib6_rule_lookup(net, &rdfl.fl6,
1501 				flags, __ip6_route_redirect);
1502 }
1503 
1504 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1505 {
1506 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1507 	struct dst_entry *dst;
1508 	struct flowi6 fl6;
1509 
1510 	memset(&fl6, 0, sizeof(fl6));
1511 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
1512 	fl6.flowi6_oif = oif;
1513 	fl6.flowi6_mark = mark;
1514 	fl6.daddr = iph->daddr;
1515 	fl6.saddr = iph->saddr;
1516 	fl6.flowlabel = ip6_flowinfo(iph);
1517 
1518 	dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1519 	rt6_do_redirect(dst, NULL, skb);
1520 	dst_release(dst);
1521 }
1522 EXPORT_SYMBOL_GPL(ip6_redirect);
1523 
1524 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1525 			    u32 mark)
1526 {
1527 	const struct ipv6hdr *iph = ipv6_hdr(skb);
1528 	const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1529 	struct dst_entry *dst;
1530 	struct flowi6 fl6;
1531 
1532 	memset(&fl6, 0, sizeof(fl6));
1533 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
1534 	fl6.flowi6_oif = oif;
1535 	fl6.flowi6_mark = mark;
1536 	fl6.daddr = msg->dest;
1537 	fl6.saddr = iph->daddr;
1538 
1539 	dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1540 	rt6_do_redirect(dst, NULL, skb);
1541 	dst_release(dst);
1542 }
1543 
1544 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1545 {
1546 	ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1547 }
1548 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1549 
1550 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1551 {
1552 	struct net_device *dev = dst->dev;
1553 	unsigned int mtu = dst_mtu(dst);
1554 	struct net *net = dev_net(dev);
1555 
1556 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1557 
1558 	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1559 		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1560 
1561 	/*
1562 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1563 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1564 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
1565 	 * rely only on pmtu discovery"
1566 	 */
1567 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1568 		mtu = IPV6_MAXPLEN;
1569 	return mtu;
1570 }
1571 
1572 static unsigned int ip6_mtu(const struct dst_entry *dst)
1573 {
1574 	const struct rt6_info *rt = (const struct rt6_info *)dst;
1575 	unsigned int mtu = rt->rt6i_pmtu;
1576 	struct inet6_dev *idev;
1577 
1578 	if (mtu)
1579 		goto out;
1580 
1581 	mtu = dst_metric_raw(dst, RTAX_MTU);
1582 	if (mtu)
1583 		goto out;
1584 
1585 	mtu = IPV6_MIN_MTU;
1586 
1587 	rcu_read_lock();
1588 	idev = __in6_dev_get(dst->dev);
1589 	if (idev)
1590 		mtu = idev->cnf.mtu6;
1591 	rcu_read_unlock();
1592 
1593 out:
1594 	return min_t(unsigned int, mtu, IP6_MAX_MTU);
1595 }
1596 
1597 static struct dst_entry *icmp6_dst_gc_list;
1598 static DEFINE_SPINLOCK(icmp6_dst_lock);
1599 
1600 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1601 				  struct flowi6 *fl6)
1602 {
1603 	struct dst_entry *dst;
1604 	struct rt6_info *rt;
1605 	struct inet6_dev *idev = in6_dev_get(dev);
1606 	struct net *net = dev_net(dev);
1607 
1608 	if (unlikely(!idev))
1609 		return ERR_PTR(-ENODEV);
1610 
1611 	rt = ip6_dst_alloc(net, dev, 0);
1612 	if (unlikely(!rt)) {
1613 		in6_dev_put(idev);
1614 		dst = ERR_PTR(-ENOMEM);
1615 		goto out;
1616 	}
1617 
1618 	rt->dst.flags |= DST_HOST;
1619 	rt->dst.output  = ip6_output;
1620 	atomic_set(&rt->dst.__refcnt, 1);
1621 	rt->rt6i_gateway  = fl6->daddr;
1622 	rt->rt6i_dst.addr = fl6->daddr;
1623 	rt->rt6i_dst.plen = 128;
1624 	rt->rt6i_idev     = idev;
1625 	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1626 
1627 	spin_lock_bh(&icmp6_dst_lock);
1628 	rt->dst.next = icmp6_dst_gc_list;
1629 	icmp6_dst_gc_list = &rt->dst;
1630 	spin_unlock_bh(&icmp6_dst_lock);
1631 
1632 	fib6_force_start_gc(net);
1633 
1634 	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1635 
1636 out:
1637 	return dst;
1638 }
1639 
1640 int icmp6_dst_gc(void)
1641 {
1642 	struct dst_entry *dst, **pprev;
1643 	int more = 0;
1644 
1645 	spin_lock_bh(&icmp6_dst_lock);
1646 	pprev = &icmp6_dst_gc_list;
1647 
1648 	while ((dst = *pprev) != NULL) {
1649 		if (!atomic_read(&dst->__refcnt)) {
1650 			*pprev = dst->next;
1651 			dst_free(dst);
1652 		} else {
1653 			pprev = &dst->next;
1654 			++more;
1655 		}
1656 	}
1657 
1658 	spin_unlock_bh(&icmp6_dst_lock);
1659 
1660 	return more;
1661 }
1662 
1663 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1664 			    void *arg)
1665 {
1666 	struct dst_entry *dst, **pprev;
1667 
1668 	spin_lock_bh(&icmp6_dst_lock);
1669 	pprev = &icmp6_dst_gc_list;
1670 	while ((dst = *pprev) != NULL) {
1671 		struct rt6_info *rt = (struct rt6_info *) dst;
1672 		if (func(rt, arg)) {
1673 			*pprev = dst->next;
1674 			dst_free(dst);
1675 		} else {
1676 			pprev = &dst->next;
1677 		}
1678 	}
1679 	spin_unlock_bh(&icmp6_dst_lock);
1680 }
1681 
1682 static int ip6_dst_gc(struct dst_ops *ops)
1683 {
1684 	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1685 	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1686 	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1687 	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1688 	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1689 	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1690 	int entries;
1691 
1692 	entries = dst_entries_get_fast(ops);
1693 	if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1694 	    entries <= rt_max_size)
1695 		goto out;
1696 
1697 	net->ipv6.ip6_rt_gc_expire++;
1698 	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
1699 	entries = dst_entries_get_slow(ops);
1700 	if (entries < ops->gc_thresh)
1701 		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1702 out:
1703 	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1704 	return entries > rt_max_size;
1705 }
1706 
1707 static int ip6_convert_metrics(struct mx6_config *mxc,
1708 			       const struct fib6_config *cfg)
1709 {
1710 	bool ecn_ca = false;
1711 	struct nlattr *nla;
1712 	int remaining;
1713 	u32 *mp;
1714 
1715 	if (!cfg->fc_mx)
1716 		return 0;
1717 
1718 	mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1719 	if (unlikely(!mp))
1720 		return -ENOMEM;
1721 
1722 	nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1723 		int type = nla_type(nla);
1724 		u32 val;
1725 
1726 		if (!type)
1727 			continue;
1728 		if (unlikely(type > RTAX_MAX))
1729 			goto err;
1730 
1731 		if (type == RTAX_CC_ALGO) {
1732 			char tmp[TCP_CA_NAME_MAX];
1733 
1734 			nla_strlcpy(tmp, nla, sizeof(tmp));
1735 			val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
1736 			if (val == TCP_CA_UNSPEC)
1737 				goto err;
1738 		} else {
1739 			val = nla_get_u32(nla);
1740 		}
1741 		if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
1742 			goto err;
1743 
1744 		mp[type - 1] = val;
1745 		__set_bit(type - 1, mxc->mx_valid);
1746 	}
1747 
1748 	if (ecn_ca) {
1749 		__set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
1750 		mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
1751 	}
1752 
1753 	mxc->mx = mp;
1754 	return 0;
1755  err:
1756 	kfree(mp);
1757 	return -EINVAL;
1758 }
1759 
1760 static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
1761 {
1762 	struct net *net = cfg->fc_nlinfo.nl_net;
1763 	struct rt6_info *rt = NULL;
1764 	struct net_device *dev = NULL;
1765 	struct inet6_dev *idev = NULL;
1766 	struct fib6_table *table;
1767 	int addr_type;
1768 	int err = -EINVAL;
1769 
1770 	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1771 		goto out;
1772 #ifndef CONFIG_IPV6_SUBTREES
1773 	if (cfg->fc_src_len)
1774 		goto out;
1775 #endif
1776 	if (cfg->fc_ifindex) {
1777 		err = -ENODEV;
1778 		dev = dev_get_by_index(net, cfg->fc_ifindex);
1779 		if (!dev)
1780 			goto out;
1781 		idev = in6_dev_get(dev);
1782 		if (!idev)
1783 			goto out;
1784 	}
1785 
1786 	if (cfg->fc_metric == 0)
1787 		cfg->fc_metric = IP6_RT_PRIO_USER;
1788 
1789 	err = -ENOBUFS;
1790 	if (cfg->fc_nlinfo.nlh &&
1791 	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1792 		table = fib6_get_table(net, cfg->fc_table);
1793 		if (!table) {
1794 			pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1795 			table = fib6_new_table(net, cfg->fc_table);
1796 		}
1797 	} else {
1798 		table = fib6_new_table(net, cfg->fc_table);
1799 	}
1800 
1801 	if (!table)
1802 		goto out;
1803 
1804 	rt = ip6_dst_alloc(net, NULL,
1805 			   (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1806 
1807 	if (!rt) {
1808 		err = -ENOMEM;
1809 		goto out;
1810 	}
1811 
1812 	if (cfg->fc_flags & RTF_EXPIRES)
1813 		rt6_set_expires(rt, jiffies +
1814 				clock_t_to_jiffies(cfg->fc_expires));
1815 	else
1816 		rt6_clean_expires(rt);
1817 
1818 	if (cfg->fc_protocol == RTPROT_UNSPEC)
1819 		cfg->fc_protocol = RTPROT_BOOT;
1820 	rt->rt6i_protocol = cfg->fc_protocol;
1821 
1822 	addr_type = ipv6_addr_type(&cfg->fc_dst);
1823 
1824 	if (addr_type & IPV6_ADDR_MULTICAST)
1825 		rt->dst.input = ip6_mc_input;
1826 	else if (cfg->fc_flags & RTF_LOCAL)
1827 		rt->dst.input = ip6_input;
1828 	else
1829 		rt->dst.input = ip6_forward;
1830 
1831 	rt->dst.output = ip6_output;
1832 
1833 	if (cfg->fc_encap) {
1834 		struct lwtunnel_state *lwtstate;
1835 
1836 		err = lwtunnel_build_state(dev, cfg->fc_encap_type,
1837 					   cfg->fc_encap, AF_INET6, cfg,
1838 					   &lwtstate);
1839 		if (err)
1840 			goto out;
1841 		rt->dst.lwtstate = lwtstate_get(lwtstate);
1842 		if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
1843 			rt->dst.lwtstate->orig_output = rt->dst.output;
1844 			rt->dst.output = lwtunnel_output;
1845 		}
1846 		if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
1847 			rt->dst.lwtstate->orig_input = rt->dst.input;
1848 			rt->dst.input = lwtunnel_input;
1849 		}
1850 	}
1851 
1852 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1853 	rt->rt6i_dst.plen = cfg->fc_dst_len;
1854 	if (rt->rt6i_dst.plen == 128)
1855 		rt->dst.flags |= DST_HOST;
1856 
1857 #ifdef CONFIG_IPV6_SUBTREES
1858 	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1859 	rt->rt6i_src.plen = cfg->fc_src_len;
1860 #endif
1861 
1862 	rt->rt6i_metric = cfg->fc_metric;
1863 
1864 	/* We cannot add true routes via loopback here,
1865 	   they would result in kernel looping; promote them to reject routes
1866 	 */
1867 	if ((cfg->fc_flags & RTF_REJECT) ||
1868 	    (dev && (dev->flags & IFF_LOOPBACK) &&
1869 	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
1870 	     !(cfg->fc_flags & RTF_LOCAL))) {
1871 		/* hold loopback dev/idev if we haven't done so. */
1872 		if (dev != net->loopback_dev) {
1873 			if (dev) {
1874 				dev_put(dev);
1875 				in6_dev_put(idev);
1876 			}
1877 			dev = net->loopback_dev;
1878 			dev_hold(dev);
1879 			idev = in6_dev_get(dev);
1880 			if (!idev) {
1881 				err = -ENODEV;
1882 				goto out;
1883 			}
1884 		}
1885 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1886 		switch (cfg->fc_type) {
1887 		case RTN_BLACKHOLE:
1888 			rt->dst.error = -EINVAL;
1889 			rt->dst.output = dst_discard_out;
1890 			rt->dst.input = dst_discard;
1891 			break;
1892 		case RTN_PROHIBIT:
1893 			rt->dst.error = -EACCES;
1894 			rt->dst.output = ip6_pkt_prohibit_out;
1895 			rt->dst.input = ip6_pkt_prohibit;
1896 			break;
1897 		case RTN_THROW:
1898 		case RTN_UNREACHABLE:
1899 		default:
1900 			rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1901 					: (cfg->fc_type == RTN_UNREACHABLE)
1902 					? -EHOSTUNREACH : -ENETUNREACH;
1903 			rt->dst.output = ip6_pkt_discard_out;
1904 			rt->dst.input = ip6_pkt_discard;
1905 			break;
1906 		}
1907 		goto install_route;
1908 	}
1909 
1910 	if (cfg->fc_flags & RTF_GATEWAY) {
1911 		const struct in6_addr *gw_addr;
1912 		int gwa_type;
1913 
1914 		gw_addr = &cfg->fc_gateway;
1915 		gwa_type = ipv6_addr_type(gw_addr);
1916 
1917 		/* if gw_addr is local we will fail to detect this in case
1918 		 * address is still TENTATIVE (DAD in progress). rt6_lookup()
1919 		 * will return already-added prefix route via interface that
1920 		 * prefix route was assigned to, which might be non-loopback.
1921 		 */
1922 		err = -EINVAL;
1923 		if (ipv6_chk_addr_and_flags(net, gw_addr,
1924 					    gwa_type & IPV6_ADDR_LINKLOCAL ?
1925 					    dev : NULL, 0, 0))
1926 			goto out;
1927 
1928 		rt->rt6i_gateway = *gw_addr;
1929 
1930 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1931 			struct rt6_info *grt;
1932 
1933 			/* IPv6 strictly inhibits using not link-local
1934 			   addresses as nexthop address.
1935 			   Otherwise, router will not able to send redirects.
1936 			   It is very good, but in some (rare!) circumstances
1937 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
1938 			   some exceptions. --ANK
1939 			 */
1940 			if (!(gwa_type & IPV6_ADDR_UNICAST))
1941 				goto out;
1942 
1943 			grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1944 
1945 			err = -EHOSTUNREACH;
1946 			if (!grt)
1947 				goto out;
1948 			if (dev) {
1949 				if (dev != grt->dst.dev) {
1950 					ip6_rt_put(grt);
1951 					goto out;
1952 				}
1953 			} else {
1954 				dev = grt->dst.dev;
1955 				idev = grt->rt6i_idev;
1956 				dev_hold(dev);
1957 				in6_dev_hold(grt->rt6i_idev);
1958 			}
1959 			if (!(grt->rt6i_flags & RTF_GATEWAY))
1960 				err = 0;
1961 			ip6_rt_put(grt);
1962 
1963 			if (err)
1964 				goto out;
1965 		}
1966 		err = -EINVAL;
1967 		if (!dev || (dev->flags & IFF_LOOPBACK))
1968 			goto out;
1969 	}
1970 
1971 	err = -ENODEV;
1972 	if (!dev)
1973 		goto out;
1974 
1975 	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1976 		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1977 			err = -EINVAL;
1978 			goto out;
1979 		}
1980 		rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1981 		rt->rt6i_prefsrc.plen = 128;
1982 	} else
1983 		rt->rt6i_prefsrc.plen = 0;
1984 
1985 	rt->rt6i_flags = cfg->fc_flags;
1986 
1987 install_route:
1988 	rt->dst.dev = dev;
1989 	rt->rt6i_idev = idev;
1990 	rt->rt6i_table = table;
1991 
1992 	cfg->fc_nlinfo.nl_net = dev_net(dev);
1993 
1994 	return rt;
1995 out:
1996 	if (dev)
1997 		dev_put(dev);
1998 	if (idev)
1999 		in6_dev_put(idev);
2000 	if (rt)
2001 		dst_free(&rt->dst);
2002 
2003 	return ERR_PTR(err);
2004 }
2005 
2006 int ip6_route_add(struct fib6_config *cfg)
2007 {
2008 	struct mx6_config mxc = { .mx = NULL, };
2009 	struct rt6_info *rt;
2010 	int err;
2011 
2012 	rt = ip6_route_info_create(cfg);
2013 	if (IS_ERR(rt)) {
2014 		err = PTR_ERR(rt);
2015 		rt = NULL;
2016 		goto out;
2017 	}
2018 
2019 	err = ip6_convert_metrics(&mxc, cfg);
2020 	if (err)
2021 		goto out;
2022 
2023 	err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
2024 
2025 	kfree(mxc.mx);
2026 
2027 	return err;
2028 out:
2029 	if (rt)
2030 		dst_free(&rt->dst);
2031 
2032 	return err;
2033 }
2034 
2035 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
2036 {
2037 	int err;
2038 	struct fib6_table *table;
2039 	struct net *net = dev_net(rt->dst.dev);
2040 
2041 	if (rt == net->ipv6.ip6_null_entry ||
2042 	    rt->dst.flags & DST_NOCACHE) {
2043 		err = -ENOENT;
2044 		goto out;
2045 	}
2046 
2047 	table = rt->rt6i_table;
2048 	write_lock_bh(&table->tb6_lock);
2049 	err = fib6_del(rt, info);
2050 	write_unlock_bh(&table->tb6_lock);
2051 
2052 out:
2053 	ip6_rt_put(rt);
2054 	return err;
2055 }
2056 
2057 int ip6_del_rt(struct rt6_info *rt)
2058 {
2059 	struct nl_info info = {
2060 		.nl_net = dev_net(rt->dst.dev),
2061 	};
2062 	return __ip6_del_rt(rt, &info);
2063 }
2064 
2065 static int ip6_route_del(struct fib6_config *cfg)
2066 {
2067 	struct fib6_table *table;
2068 	struct fib6_node *fn;
2069 	struct rt6_info *rt;
2070 	int err = -ESRCH;
2071 
2072 	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
2073 	if (!table)
2074 		return err;
2075 
2076 	read_lock_bh(&table->tb6_lock);
2077 
2078 	fn = fib6_locate(&table->tb6_root,
2079 			 &cfg->fc_dst, cfg->fc_dst_len,
2080 			 &cfg->fc_src, cfg->fc_src_len);
2081 
2082 	if (fn) {
2083 		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2084 			if ((rt->rt6i_flags & RTF_CACHE) &&
2085 			    !(cfg->fc_flags & RTF_CACHE))
2086 				continue;
2087 			if (cfg->fc_ifindex &&
2088 			    (!rt->dst.dev ||
2089 			     rt->dst.dev->ifindex != cfg->fc_ifindex))
2090 				continue;
2091 			if (cfg->fc_flags & RTF_GATEWAY &&
2092 			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
2093 				continue;
2094 			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
2095 				continue;
2096 			dst_hold(&rt->dst);
2097 			read_unlock_bh(&table->tb6_lock);
2098 
2099 			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
2100 		}
2101 	}
2102 	read_unlock_bh(&table->tb6_lock);
2103 
2104 	return err;
2105 }
2106 
2107 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
2108 {
2109 	struct netevent_redirect netevent;
2110 	struct rt6_info *rt, *nrt = NULL;
2111 	struct ndisc_options ndopts;
2112 	struct inet6_dev *in6_dev;
2113 	struct neighbour *neigh;
2114 	struct rd_msg *msg;
2115 	int optlen, on_link;
2116 	u8 *lladdr;
2117 
2118 	optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
2119 	optlen -= sizeof(*msg);
2120 
2121 	if (optlen < 0) {
2122 		net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
2123 		return;
2124 	}
2125 
2126 	msg = (struct rd_msg *)icmp6_hdr(skb);
2127 
2128 	if (ipv6_addr_is_multicast(&msg->dest)) {
2129 		net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
2130 		return;
2131 	}
2132 
2133 	on_link = 0;
2134 	if (ipv6_addr_equal(&msg->dest, &msg->target)) {
2135 		on_link = 1;
2136 	} else if (ipv6_addr_type(&msg->target) !=
2137 		   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
2138 		net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
2139 		return;
2140 	}
2141 
2142 	in6_dev = __in6_dev_get(skb->dev);
2143 	if (!in6_dev)
2144 		return;
2145 	if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2146 		return;
2147 
2148 	/* RFC2461 8.1:
2149 	 *	The IP source address of the Redirect MUST be the same as the current
2150 	 *	first-hop router for the specified ICMP Destination Address.
2151 	 */
2152 
2153 	if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
2154 		net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2155 		return;
2156 	}
2157 
2158 	lladdr = NULL;
2159 	if (ndopts.nd_opts_tgt_lladdr) {
2160 		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2161 					     skb->dev);
2162 		if (!lladdr) {
2163 			net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2164 			return;
2165 		}
2166 	}
2167 
2168 	rt = (struct rt6_info *) dst;
2169 	if (rt->rt6i_flags & RTF_REJECT) {
2170 		net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
2171 		return;
2172 	}
2173 
2174 	/* Redirect received -> path was valid.
2175 	 * Look, redirects are sent only in response to data packets,
2176 	 * so that this nexthop apparently is reachable. --ANK
2177 	 */
2178 	dst_confirm(&rt->dst);
2179 
2180 	neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
2181 	if (!neigh)
2182 		return;
2183 
2184 	/*
2185 	 *	We have finally decided to accept it.
2186 	 */
2187 
2188 	neigh_update(neigh, lladdr, NUD_STALE,
2189 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
2190 		     NEIGH_UPDATE_F_OVERRIDE|
2191 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
2192 				     NEIGH_UPDATE_F_ISROUTER))
2193 		     );
2194 
2195 	nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
2196 	if (!nrt)
2197 		goto out;
2198 
2199 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
2200 	if (on_link)
2201 		nrt->rt6i_flags &= ~RTF_GATEWAY;
2202 
2203 	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
2204 
2205 	if (ip6_ins_rt(nrt))
2206 		goto out;
2207 
2208 	netevent.old = &rt->dst;
2209 	netevent.new = &nrt->dst;
2210 	netevent.daddr = &msg->dest;
2211 	netevent.neigh = neigh;
2212 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
2213 
2214 	if (rt->rt6i_flags & RTF_CACHE) {
2215 		rt = (struct rt6_info *) dst_clone(&rt->dst);
2216 		ip6_del_rt(rt);
2217 	}
2218 
2219 out:
2220 	neigh_release(neigh);
2221 }
2222 
2223 /*
2224  *	Misc support functions
2225  */
2226 
2227 static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
2228 {
2229 	BUG_ON(from->dst.from);
2230 
2231 	rt->rt6i_flags &= ~RTF_EXPIRES;
2232 	dst_hold(&from->dst);
2233 	rt->dst.from = &from->dst;
2234 	dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
2235 }
2236 
2237 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
2238 {
2239 	rt->dst.input = ort->dst.input;
2240 	rt->dst.output = ort->dst.output;
2241 	rt->rt6i_dst = ort->rt6i_dst;
2242 	rt->dst.error = ort->dst.error;
2243 	rt->rt6i_idev = ort->rt6i_idev;
2244 	if (rt->rt6i_idev)
2245 		in6_dev_hold(rt->rt6i_idev);
2246 	rt->dst.lastuse = jiffies;
2247 	rt->rt6i_gateway = ort->rt6i_gateway;
2248 	rt->rt6i_flags = ort->rt6i_flags;
2249 	rt6_set_from(rt, ort);
2250 	rt->rt6i_metric = ort->rt6i_metric;
2251 #ifdef CONFIG_IPV6_SUBTREES
2252 	rt->rt6i_src = ort->rt6i_src;
2253 #endif
2254 	rt->rt6i_prefsrc = ort->rt6i_prefsrc;
2255 	rt->rt6i_table = ort->rt6i_table;
2256 	rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
2257 }
2258 
2259 #ifdef CONFIG_IPV6_ROUTE_INFO
2260 static struct rt6_info *rt6_get_route_info(struct net *net,
2261 					   const struct in6_addr *prefix, int prefixlen,
2262 					   const struct in6_addr *gwaddr, int ifindex)
2263 {
2264 	struct fib6_node *fn;
2265 	struct rt6_info *rt = NULL;
2266 	struct fib6_table *table;
2267 
2268 	table = fib6_get_table(net, RT6_TABLE_INFO);
2269 	if (!table)
2270 		return NULL;
2271 
2272 	read_lock_bh(&table->tb6_lock);
2273 	fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
2274 	if (!fn)
2275 		goto out;
2276 
2277 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2278 		if (rt->dst.dev->ifindex != ifindex)
2279 			continue;
2280 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2281 			continue;
2282 		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2283 			continue;
2284 		dst_hold(&rt->dst);
2285 		break;
2286 	}
2287 out:
2288 	read_unlock_bh(&table->tb6_lock);
2289 	return rt;
2290 }
2291 
2292 static struct rt6_info *rt6_add_route_info(struct net *net,
2293 					   const struct in6_addr *prefix, int prefixlen,
2294 					   const struct in6_addr *gwaddr, int ifindex,
2295 					   unsigned int pref)
2296 {
2297 	struct fib6_config cfg = {
2298 		.fc_metric	= IP6_RT_PRIO_USER,
2299 		.fc_ifindex	= ifindex,
2300 		.fc_dst_len	= prefixlen,
2301 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2302 				  RTF_UP | RTF_PREF(pref),
2303 		.fc_nlinfo.portid = 0,
2304 		.fc_nlinfo.nlh = NULL,
2305 		.fc_nlinfo.nl_net = net,
2306 	};
2307 
2308 	cfg.fc_table = l3mdev_fib_table_by_index(net, ifindex) ? : RT6_TABLE_INFO;
2309 	cfg.fc_dst = *prefix;
2310 	cfg.fc_gateway = *gwaddr;
2311 
2312 	/* We should treat it as a default route if prefix length is 0. */
2313 	if (!prefixlen)
2314 		cfg.fc_flags |= RTF_DEFAULT;
2315 
2316 	ip6_route_add(&cfg);
2317 
2318 	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
2319 }
2320 #endif
2321 
2322 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
2323 {
2324 	struct rt6_info *rt;
2325 	struct fib6_table *table;
2326 
2327 	table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
2328 	if (!table)
2329 		return NULL;
2330 
2331 	read_lock_bh(&table->tb6_lock);
2332 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2333 		if (dev == rt->dst.dev &&
2334 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
2335 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
2336 			break;
2337 	}
2338 	if (rt)
2339 		dst_hold(&rt->dst);
2340 	read_unlock_bh(&table->tb6_lock);
2341 	return rt;
2342 }
2343 
2344 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
2345 				     struct net_device *dev,
2346 				     unsigned int pref)
2347 {
2348 	struct fib6_config cfg = {
2349 		.fc_table	= l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
2350 		.fc_metric	= IP6_RT_PRIO_USER,
2351 		.fc_ifindex	= dev->ifindex,
2352 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2353 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
2354 		.fc_nlinfo.portid = 0,
2355 		.fc_nlinfo.nlh = NULL,
2356 		.fc_nlinfo.nl_net = dev_net(dev),
2357 	};
2358 
2359 	cfg.fc_gateway = *gwaddr;
2360 
2361 	ip6_route_add(&cfg);
2362 
2363 	return rt6_get_dflt_router(gwaddr, dev);
2364 }
2365 
2366 void rt6_purge_dflt_routers(struct net *net)
2367 {
2368 	struct rt6_info *rt;
2369 	struct fib6_table *table;
2370 
2371 	/* NOTE: Keep consistent with rt6_get_dflt_router */
2372 	table = fib6_get_table(net, RT6_TABLE_DFLT);
2373 	if (!table)
2374 		return;
2375 
2376 restart:
2377 	read_lock_bh(&table->tb6_lock);
2378 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2379 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2380 		    (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
2381 			dst_hold(&rt->dst);
2382 			read_unlock_bh(&table->tb6_lock);
2383 			ip6_del_rt(rt);
2384 			goto restart;
2385 		}
2386 	}
2387 	read_unlock_bh(&table->tb6_lock);
2388 }
2389 
2390 static void rtmsg_to_fib6_config(struct net *net,
2391 				 struct in6_rtmsg *rtmsg,
2392 				 struct fib6_config *cfg)
2393 {
2394 	memset(cfg, 0, sizeof(*cfg));
2395 
2396 	cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
2397 			 : RT6_TABLE_MAIN;
2398 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2399 	cfg->fc_metric = rtmsg->rtmsg_metric;
2400 	cfg->fc_expires = rtmsg->rtmsg_info;
2401 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2402 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
2403 	cfg->fc_flags = rtmsg->rtmsg_flags;
2404 
2405 	cfg->fc_nlinfo.nl_net = net;
2406 
2407 	cfg->fc_dst = rtmsg->rtmsg_dst;
2408 	cfg->fc_src = rtmsg->rtmsg_src;
2409 	cfg->fc_gateway = rtmsg->rtmsg_gateway;
2410 }
2411 
2412 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2413 {
2414 	struct fib6_config cfg;
2415 	struct in6_rtmsg rtmsg;
2416 	int err;
2417 
2418 	switch (cmd) {
2419 	case SIOCADDRT:		/* Add a route */
2420 	case SIOCDELRT:		/* Delete a route */
2421 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2422 			return -EPERM;
2423 		err = copy_from_user(&rtmsg, arg,
2424 				     sizeof(struct in6_rtmsg));
2425 		if (err)
2426 			return -EFAULT;
2427 
2428 		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2429 
2430 		rtnl_lock();
2431 		switch (cmd) {
2432 		case SIOCADDRT:
2433 			err = ip6_route_add(&cfg);
2434 			break;
2435 		case SIOCDELRT:
2436 			err = ip6_route_del(&cfg);
2437 			break;
2438 		default:
2439 			err = -EINVAL;
2440 		}
2441 		rtnl_unlock();
2442 
2443 		return err;
2444 	}
2445 
2446 	return -EINVAL;
2447 }
2448 
2449 /*
2450  *	Drop the packet on the floor
2451  */
2452 
2453 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2454 {
2455 	int type;
2456 	struct dst_entry *dst = skb_dst(skb);
2457 	switch (ipstats_mib_noroutes) {
2458 	case IPSTATS_MIB_INNOROUTES:
2459 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2460 		if (type == IPV6_ADDR_ANY) {
2461 			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2462 				      IPSTATS_MIB_INADDRERRORS);
2463 			break;
2464 		}
2465 		/* FALLTHROUGH */
2466 	case IPSTATS_MIB_OUTNOROUTES:
2467 		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2468 			      ipstats_mib_noroutes);
2469 		break;
2470 	}
2471 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2472 	kfree_skb(skb);
2473 	return 0;
2474 }
2475 
2476 static int ip6_pkt_discard(struct sk_buff *skb)
2477 {
2478 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2479 }
2480 
2481 static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2482 {
2483 	skb->dev = skb_dst(skb)->dev;
2484 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2485 }
2486 
2487 static int ip6_pkt_prohibit(struct sk_buff *skb)
2488 {
2489 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2490 }
2491 
2492 static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2493 {
2494 	skb->dev = skb_dst(skb)->dev;
2495 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2496 }
2497 
2498 /*
2499  *	Allocate a dst for local (unicast / anycast) address.
2500  */
2501 
2502 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2503 				    const struct in6_addr *addr,
2504 				    bool anycast)
2505 {
2506 	u32 tb_id;
2507 	struct net *net = dev_net(idev->dev);
2508 	struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
2509 					    DST_NOCOUNT);
2510 	if (!rt)
2511 		return ERR_PTR(-ENOMEM);
2512 
2513 	in6_dev_hold(idev);
2514 
2515 	rt->dst.flags |= DST_HOST;
2516 	rt->dst.input = ip6_input;
2517 	rt->dst.output = ip6_output;
2518 	rt->rt6i_idev = idev;
2519 
2520 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2521 	if (anycast)
2522 		rt->rt6i_flags |= RTF_ANYCAST;
2523 	else
2524 		rt->rt6i_flags |= RTF_LOCAL;
2525 
2526 	rt->rt6i_gateway  = *addr;
2527 	rt->rt6i_dst.addr = *addr;
2528 	rt->rt6i_dst.plen = 128;
2529 	tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
2530 	rt->rt6i_table = fib6_get_table(net, tb_id);
2531 	rt->dst.flags |= DST_NOCACHE;
2532 
2533 	atomic_set(&rt->dst.__refcnt, 1);
2534 
2535 	return rt;
2536 }
2537 
2538 int ip6_route_get_saddr(struct net *net,
2539 			struct rt6_info *rt,
2540 			const struct in6_addr *daddr,
2541 			unsigned int prefs,
2542 			struct in6_addr *saddr)
2543 {
2544 	struct inet6_dev *idev =
2545 		rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
2546 	int err = 0;
2547 	if (rt && rt->rt6i_prefsrc.plen)
2548 		*saddr = rt->rt6i_prefsrc.addr;
2549 	else
2550 		err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2551 					 daddr, prefs, saddr);
2552 	return err;
2553 }
2554 
2555 /* remove deleted ip from prefsrc entries */
2556 struct arg_dev_net_ip {
2557 	struct net_device *dev;
2558 	struct net *net;
2559 	struct in6_addr *addr;
2560 };
2561 
2562 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2563 {
2564 	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2565 	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2566 	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2567 
2568 	if (((void *)rt->dst.dev == dev || !dev) &&
2569 	    rt != net->ipv6.ip6_null_entry &&
2570 	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2571 		/* remove prefsrc entry */
2572 		rt->rt6i_prefsrc.plen = 0;
2573 	}
2574 	return 0;
2575 }
2576 
2577 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2578 {
2579 	struct net *net = dev_net(ifp->idev->dev);
2580 	struct arg_dev_net_ip adni = {
2581 		.dev = ifp->idev->dev,
2582 		.net = net,
2583 		.addr = &ifp->addr,
2584 	};
2585 	fib6_clean_all(net, fib6_remove_prefsrc, &adni);
2586 }
2587 
2588 #define RTF_RA_ROUTER		(RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2589 #define RTF_CACHE_GATEWAY	(RTF_GATEWAY | RTF_CACHE)
2590 
2591 /* Remove routers and update dst entries when gateway turn into host. */
2592 static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2593 {
2594 	struct in6_addr *gateway = (struct in6_addr *)arg;
2595 
2596 	if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2597 	     ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2598 	     ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2599 		return -1;
2600 	}
2601 	return 0;
2602 }
2603 
2604 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2605 {
2606 	fib6_clean_all(net, fib6_clean_tohost, gateway);
2607 }
2608 
2609 struct arg_dev_net {
2610 	struct net_device *dev;
2611 	struct net *net;
2612 };
2613 
2614 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2615 {
2616 	const struct arg_dev_net *adn = arg;
2617 	const struct net_device *dev = adn->dev;
2618 
2619 	if ((rt->dst.dev == dev || !dev) &&
2620 	    rt != adn->net->ipv6.ip6_null_entry)
2621 		return -1;
2622 
2623 	return 0;
2624 }
2625 
2626 void rt6_ifdown(struct net *net, struct net_device *dev)
2627 {
2628 	struct arg_dev_net adn = {
2629 		.dev = dev,
2630 		.net = net,
2631 	};
2632 
2633 	fib6_clean_all(net, fib6_ifdown, &adn);
2634 	icmp6_clean_all(fib6_ifdown, &adn);
2635 	if (dev)
2636 		rt6_uncached_list_flush_dev(net, dev);
2637 }
2638 
2639 struct rt6_mtu_change_arg {
2640 	struct net_device *dev;
2641 	unsigned int mtu;
2642 };
2643 
2644 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2645 {
2646 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2647 	struct inet6_dev *idev;
2648 
2649 	/* In IPv6 pmtu discovery is not optional,
2650 	   so that RTAX_MTU lock cannot disable it.
2651 	   We still use this lock to block changes
2652 	   caused by addrconf/ndisc.
2653 	*/
2654 
2655 	idev = __in6_dev_get(arg->dev);
2656 	if (!idev)
2657 		return 0;
2658 
2659 	/* For administrative MTU increase, there is no way to discover
2660 	   IPv6 PMTU increase, so PMTU increase should be updated here.
2661 	   Since RFC 1981 doesn't include administrative MTU increase
2662 	   update PMTU increase is a MUST. (i.e. jumbo frame)
2663 	 */
2664 	/*
2665 	   If new MTU is less than route PMTU, this new MTU will be the
2666 	   lowest MTU in the path, update the route PMTU to reflect PMTU
2667 	   decreases; if new MTU is greater than route PMTU, and the
2668 	   old MTU is the lowest MTU in the path, update the route PMTU
2669 	   to reflect the increase. In this case if the other nodes' MTU
2670 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
2671 	   PMTU discouvery.
2672 	 */
2673 	if (rt->dst.dev == arg->dev &&
2674 	    !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2675 		if (rt->rt6i_flags & RTF_CACHE) {
2676 			/* For RTF_CACHE with rt6i_pmtu == 0
2677 			 * (i.e. a redirected route),
2678 			 * the metrics of its rt->dst.from has already
2679 			 * been updated.
2680 			 */
2681 			if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2682 				rt->rt6i_pmtu = arg->mtu;
2683 		} else if (dst_mtu(&rt->dst) >= arg->mtu ||
2684 			   (dst_mtu(&rt->dst) < arg->mtu &&
2685 			    dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2686 			dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2687 		}
2688 	}
2689 	return 0;
2690 }
2691 
2692 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2693 {
2694 	struct rt6_mtu_change_arg arg = {
2695 		.dev = dev,
2696 		.mtu = mtu,
2697 	};
2698 
2699 	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
2700 }
2701 
2702 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2703 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2704 	[RTA_OIF]               = { .type = NLA_U32 },
2705 	[RTA_IIF]		= { .type = NLA_U32 },
2706 	[RTA_PRIORITY]          = { .type = NLA_U32 },
2707 	[RTA_METRICS]           = { .type = NLA_NESTED },
2708 	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
2709 	[RTA_PREF]              = { .type = NLA_U8 },
2710 	[RTA_ENCAP_TYPE]	= { .type = NLA_U16 },
2711 	[RTA_ENCAP]		= { .type = NLA_NESTED },
2712 	[RTA_EXPIRES]		= { .type = NLA_U32 },
2713 };
2714 
2715 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2716 			      struct fib6_config *cfg)
2717 {
2718 	struct rtmsg *rtm;
2719 	struct nlattr *tb[RTA_MAX+1];
2720 	unsigned int pref;
2721 	int err;
2722 
2723 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2724 	if (err < 0)
2725 		goto errout;
2726 
2727 	err = -EINVAL;
2728 	rtm = nlmsg_data(nlh);
2729 	memset(cfg, 0, sizeof(*cfg));
2730 
2731 	cfg->fc_table = rtm->rtm_table;
2732 	cfg->fc_dst_len = rtm->rtm_dst_len;
2733 	cfg->fc_src_len = rtm->rtm_src_len;
2734 	cfg->fc_flags = RTF_UP;
2735 	cfg->fc_protocol = rtm->rtm_protocol;
2736 	cfg->fc_type = rtm->rtm_type;
2737 
2738 	if (rtm->rtm_type == RTN_UNREACHABLE ||
2739 	    rtm->rtm_type == RTN_BLACKHOLE ||
2740 	    rtm->rtm_type == RTN_PROHIBIT ||
2741 	    rtm->rtm_type == RTN_THROW)
2742 		cfg->fc_flags |= RTF_REJECT;
2743 
2744 	if (rtm->rtm_type == RTN_LOCAL)
2745 		cfg->fc_flags |= RTF_LOCAL;
2746 
2747 	if (rtm->rtm_flags & RTM_F_CLONED)
2748 		cfg->fc_flags |= RTF_CACHE;
2749 
2750 	cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2751 	cfg->fc_nlinfo.nlh = nlh;
2752 	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2753 
2754 	if (tb[RTA_GATEWAY]) {
2755 		cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
2756 		cfg->fc_flags |= RTF_GATEWAY;
2757 	}
2758 
2759 	if (tb[RTA_DST]) {
2760 		int plen = (rtm->rtm_dst_len + 7) >> 3;
2761 
2762 		if (nla_len(tb[RTA_DST]) < plen)
2763 			goto errout;
2764 
2765 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2766 	}
2767 
2768 	if (tb[RTA_SRC]) {
2769 		int plen = (rtm->rtm_src_len + 7) >> 3;
2770 
2771 		if (nla_len(tb[RTA_SRC]) < plen)
2772 			goto errout;
2773 
2774 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2775 	}
2776 
2777 	if (tb[RTA_PREFSRC])
2778 		cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
2779 
2780 	if (tb[RTA_OIF])
2781 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2782 
2783 	if (tb[RTA_PRIORITY])
2784 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2785 
2786 	if (tb[RTA_METRICS]) {
2787 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2788 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2789 	}
2790 
2791 	if (tb[RTA_TABLE])
2792 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2793 
2794 	if (tb[RTA_MULTIPATH]) {
2795 		cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2796 		cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2797 	}
2798 
2799 	if (tb[RTA_PREF]) {
2800 		pref = nla_get_u8(tb[RTA_PREF]);
2801 		if (pref != ICMPV6_ROUTER_PREF_LOW &&
2802 		    pref != ICMPV6_ROUTER_PREF_HIGH)
2803 			pref = ICMPV6_ROUTER_PREF_MEDIUM;
2804 		cfg->fc_flags |= RTF_PREF(pref);
2805 	}
2806 
2807 	if (tb[RTA_ENCAP])
2808 		cfg->fc_encap = tb[RTA_ENCAP];
2809 
2810 	if (tb[RTA_ENCAP_TYPE])
2811 		cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
2812 
2813 	if (tb[RTA_EXPIRES]) {
2814 		unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
2815 
2816 		if (addrconf_finite_timeout(timeout)) {
2817 			cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
2818 			cfg->fc_flags |= RTF_EXPIRES;
2819 		}
2820 	}
2821 
2822 	err = 0;
2823 errout:
2824 	return err;
2825 }
2826 
2827 struct rt6_nh {
2828 	struct rt6_info *rt6_info;
2829 	struct fib6_config r_cfg;
2830 	struct mx6_config mxc;
2831 	struct list_head next;
2832 };
2833 
2834 static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
2835 {
2836 	struct rt6_nh *nh;
2837 
2838 	list_for_each_entry(nh, rt6_nh_list, next) {
2839 		pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6 nexthop %pI6 ifi %d\n",
2840 		        &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
2841 		        nh->r_cfg.fc_ifindex);
2842 	}
2843 }
2844 
2845 static int ip6_route_info_append(struct list_head *rt6_nh_list,
2846 				 struct rt6_info *rt, struct fib6_config *r_cfg)
2847 {
2848 	struct rt6_nh *nh;
2849 	struct rt6_info *rtnh;
2850 	int err = -EEXIST;
2851 
2852 	list_for_each_entry(nh, rt6_nh_list, next) {
2853 		/* check if rt6_info already exists */
2854 		rtnh = nh->rt6_info;
2855 
2856 		if (rtnh->dst.dev == rt->dst.dev &&
2857 		    rtnh->rt6i_idev == rt->rt6i_idev &&
2858 		    ipv6_addr_equal(&rtnh->rt6i_gateway,
2859 				    &rt->rt6i_gateway))
2860 			return err;
2861 	}
2862 
2863 	nh = kzalloc(sizeof(*nh), GFP_KERNEL);
2864 	if (!nh)
2865 		return -ENOMEM;
2866 	nh->rt6_info = rt;
2867 	err = ip6_convert_metrics(&nh->mxc, r_cfg);
2868 	if (err) {
2869 		kfree(nh);
2870 		return err;
2871 	}
2872 	memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
2873 	list_add_tail(&nh->next, rt6_nh_list);
2874 
2875 	return 0;
2876 }
2877 
2878 static int ip6_route_multipath_add(struct fib6_config *cfg)
2879 {
2880 	struct fib6_config r_cfg;
2881 	struct rtnexthop *rtnh;
2882 	struct rt6_info *rt;
2883 	struct rt6_nh *err_nh;
2884 	struct rt6_nh *nh, *nh_safe;
2885 	int remaining;
2886 	int attrlen;
2887 	int err = 1;
2888 	int nhn = 0;
2889 	int replace = (cfg->fc_nlinfo.nlh &&
2890 		       (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
2891 	LIST_HEAD(rt6_nh_list);
2892 
2893 	remaining = cfg->fc_mp_len;
2894 	rtnh = (struct rtnexthop *)cfg->fc_mp;
2895 
2896 	/* Parse a Multipath Entry and build a list (rt6_nh_list) of
2897 	 * rt6_info structs per nexthop
2898 	 */
2899 	while (rtnh_ok(rtnh, remaining)) {
2900 		memcpy(&r_cfg, cfg, sizeof(*cfg));
2901 		if (rtnh->rtnh_ifindex)
2902 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2903 
2904 		attrlen = rtnh_attrlen(rtnh);
2905 		if (attrlen > 0) {
2906 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2907 
2908 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2909 			if (nla) {
2910 				r_cfg.fc_gateway = nla_get_in6_addr(nla);
2911 				r_cfg.fc_flags |= RTF_GATEWAY;
2912 			}
2913 			r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
2914 			nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
2915 			if (nla)
2916 				r_cfg.fc_encap_type = nla_get_u16(nla);
2917 		}
2918 
2919 		rt = ip6_route_info_create(&r_cfg);
2920 		if (IS_ERR(rt)) {
2921 			err = PTR_ERR(rt);
2922 			rt = NULL;
2923 			goto cleanup;
2924 		}
2925 
2926 		err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
2927 		if (err) {
2928 			dst_free(&rt->dst);
2929 			goto cleanup;
2930 		}
2931 
2932 		rtnh = rtnh_next(rtnh, &remaining);
2933 	}
2934 
2935 	err_nh = NULL;
2936 	list_for_each_entry(nh, &rt6_nh_list, next) {
2937 		err = __ip6_ins_rt(nh->rt6_info, &cfg->fc_nlinfo, &nh->mxc);
2938 		/* nh->rt6_info is used or freed at this point, reset to NULL*/
2939 		nh->rt6_info = NULL;
2940 		if (err) {
2941 			if (replace && nhn)
2942 				ip6_print_replace_route_err(&rt6_nh_list);
2943 			err_nh = nh;
2944 			goto add_errout;
2945 		}
2946 
2947 		/* Because each route is added like a single route we remove
2948 		 * these flags after the first nexthop: if there is a collision,
2949 		 * we have already failed to add the first nexthop:
2950 		 * fib6_add_rt2node() has rejected it; when replacing, old
2951 		 * nexthops have been replaced by first new, the rest should
2952 		 * be added to it.
2953 		 */
2954 		cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
2955 						     NLM_F_REPLACE);
2956 		nhn++;
2957 	}
2958 
2959 	goto cleanup;
2960 
2961 add_errout:
2962 	/* Delete routes that were already added */
2963 	list_for_each_entry(nh, &rt6_nh_list, next) {
2964 		if (err_nh == nh)
2965 			break;
2966 		ip6_route_del(&nh->r_cfg);
2967 	}
2968 
2969 cleanup:
2970 	list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
2971 		if (nh->rt6_info)
2972 			dst_free(&nh->rt6_info->dst);
2973 		kfree(nh->mxc.mx);
2974 		list_del(&nh->next);
2975 		kfree(nh);
2976 	}
2977 
2978 	return err;
2979 }
2980 
2981 static int ip6_route_multipath_del(struct fib6_config *cfg)
2982 {
2983 	struct fib6_config r_cfg;
2984 	struct rtnexthop *rtnh;
2985 	int remaining;
2986 	int attrlen;
2987 	int err = 1, last_err = 0;
2988 
2989 	remaining = cfg->fc_mp_len;
2990 	rtnh = (struct rtnexthop *)cfg->fc_mp;
2991 
2992 	/* Parse a Multipath Entry */
2993 	while (rtnh_ok(rtnh, remaining)) {
2994 		memcpy(&r_cfg, cfg, sizeof(*cfg));
2995 		if (rtnh->rtnh_ifindex)
2996 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2997 
2998 		attrlen = rtnh_attrlen(rtnh);
2999 		if (attrlen > 0) {
3000 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3001 
3002 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3003 			if (nla) {
3004 				nla_memcpy(&r_cfg.fc_gateway, nla, 16);
3005 				r_cfg.fc_flags |= RTF_GATEWAY;
3006 			}
3007 		}
3008 		err = ip6_route_del(&r_cfg);
3009 		if (err)
3010 			last_err = err;
3011 
3012 		rtnh = rtnh_next(rtnh, &remaining);
3013 	}
3014 
3015 	return last_err;
3016 }
3017 
3018 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
3019 {
3020 	struct fib6_config cfg;
3021 	int err;
3022 
3023 	err = rtm_to_fib6_config(skb, nlh, &cfg);
3024 	if (err < 0)
3025 		return err;
3026 
3027 	if (cfg.fc_mp)
3028 		return ip6_route_multipath_del(&cfg);
3029 	else
3030 		return ip6_route_del(&cfg);
3031 }
3032 
3033 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
3034 {
3035 	struct fib6_config cfg;
3036 	int err;
3037 
3038 	err = rtm_to_fib6_config(skb, nlh, &cfg);
3039 	if (err < 0)
3040 		return err;
3041 
3042 	if (cfg.fc_mp)
3043 		return ip6_route_multipath_add(&cfg);
3044 	else
3045 		return ip6_route_add(&cfg);
3046 }
3047 
3048 static inline size_t rt6_nlmsg_size(struct rt6_info *rt)
3049 {
3050 	return NLMSG_ALIGN(sizeof(struct rtmsg))
3051 	       + nla_total_size(16) /* RTA_SRC */
3052 	       + nla_total_size(16) /* RTA_DST */
3053 	       + nla_total_size(16) /* RTA_GATEWAY */
3054 	       + nla_total_size(16) /* RTA_PREFSRC */
3055 	       + nla_total_size(4) /* RTA_TABLE */
3056 	       + nla_total_size(4) /* RTA_IIF */
3057 	       + nla_total_size(4) /* RTA_OIF */
3058 	       + nla_total_size(4) /* RTA_PRIORITY */
3059 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
3060 	       + nla_total_size(sizeof(struct rta_cacheinfo))
3061 	       + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
3062 	       + nla_total_size(1) /* RTA_PREF */
3063 	       + lwtunnel_get_encap_size(rt->dst.lwtstate);
3064 }
3065 
3066 static int rt6_fill_node(struct net *net,
3067 			 struct sk_buff *skb, struct rt6_info *rt,
3068 			 struct in6_addr *dst, struct in6_addr *src,
3069 			 int iif, int type, u32 portid, u32 seq,
3070 			 int prefix, int nowait, unsigned int flags)
3071 {
3072 	u32 metrics[RTAX_MAX];
3073 	struct rtmsg *rtm;
3074 	struct nlmsghdr *nlh;
3075 	long expires;
3076 	u32 table;
3077 
3078 	if (prefix) {	/* user wants prefix routes only */
3079 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
3080 			/* success since this is not a prefix route */
3081 			return 1;
3082 		}
3083 	}
3084 
3085 	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
3086 	if (!nlh)
3087 		return -EMSGSIZE;
3088 
3089 	rtm = nlmsg_data(nlh);
3090 	rtm->rtm_family = AF_INET6;
3091 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
3092 	rtm->rtm_src_len = rt->rt6i_src.plen;
3093 	rtm->rtm_tos = 0;
3094 	if (rt->rt6i_table)
3095 		table = rt->rt6i_table->tb6_id;
3096 	else
3097 		table = RT6_TABLE_UNSPEC;
3098 	rtm->rtm_table = table;
3099 	if (nla_put_u32(skb, RTA_TABLE, table))
3100 		goto nla_put_failure;
3101 	if (rt->rt6i_flags & RTF_REJECT) {
3102 		switch (rt->dst.error) {
3103 		case -EINVAL:
3104 			rtm->rtm_type = RTN_BLACKHOLE;
3105 			break;
3106 		case -EACCES:
3107 			rtm->rtm_type = RTN_PROHIBIT;
3108 			break;
3109 		case -EAGAIN:
3110 			rtm->rtm_type = RTN_THROW;
3111 			break;
3112 		default:
3113 			rtm->rtm_type = RTN_UNREACHABLE;
3114 			break;
3115 		}
3116 	}
3117 	else if (rt->rt6i_flags & RTF_LOCAL)
3118 		rtm->rtm_type = RTN_LOCAL;
3119 	else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
3120 		rtm->rtm_type = RTN_LOCAL;
3121 	else
3122 		rtm->rtm_type = RTN_UNICAST;
3123 	rtm->rtm_flags = 0;
3124 	if (!netif_carrier_ok(rt->dst.dev)) {
3125 		rtm->rtm_flags |= RTNH_F_LINKDOWN;
3126 		if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
3127 			rtm->rtm_flags |= RTNH_F_DEAD;
3128 	}
3129 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
3130 	rtm->rtm_protocol = rt->rt6i_protocol;
3131 	if (rt->rt6i_flags & RTF_DYNAMIC)
3132 		rtm->rtm_protocol = RTPROT_REDIRECT;
3133 	else if (rt->rt6i_flags & RTF_ADDRCONF) {
3134 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
3135 			rtm->rtm_protocol = RTPROT_RA;
3136 		else
3137 			rtm->rtm_protocol = RTPROT_KERNEL;
3138 	}
3139 
3140 	if (rt->rt6i_flags & RTF_CACHE)
3141 		rtm->rtm_flags |= RTM_F_CLONED;
3142 
3143 	if (dst) {
3144 		if (nla_put_in6_addr(skb, RTA_DST, dst))
3145 			goto nla_put_failure;
3146 		rtm->rtm_dst_len = 128;
3147 	} else if (rtm->rtm_dst_len)
3148 		if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
3149 			goto nla_put_failure;
3150 #ifdef CONFIG_IPV6_SUBTREES
3151 	if (src) {
3152 		if (nla_put_in6_addr(skb, RTA_SRC, src))
3153 			goto nla_put_failure;
3154 		rtm->rtm_src_len = 128;
3155 	} else if (rtm->rtm_src_len &&
3156 		   nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
3157 		goto nla_put_failure;
3158 #endif
3159 	if (iif) {
3160 #ifdef CONFIG_IPV6_MROUTE
3161 		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
3162 			int err = ip6mr_get_route(net, skb, rtm, nowait);
3163 			if (err <= 0) {
3164 				if (!nowait) {
3165 					if (err == 0)
3166 						return 0;
3167 					goto nla_put_failure;
3168 				} else {
3169 					if (err == -EMSGSIZE)
3170 						goto nla_put_failure;
3171 				}
3172 			}
3173 		} else
3174 #endif
3175 			if (nla_put_u32(skb, RTA_IIF, iif))
3176 				goto nla_put_failure;
3177 	} else if (dst) {
3178 		struct in6_addr saddr_buf;
3179 		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
3180 		    nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3181 			goto nla_put_failure;
3182 	}
3183 
3184 	if (rt->rt6i_prefsrc.plen) {
3185 		struct in6_addr saddr_buf;
3186 		saddr_buf = rt->rt6i_prefsrc.addr;
3187 		if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3188 			goto nla_put_failure;
3189 	}
3190 
3191 	memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
3192 	if (rt->rt6i_pmtu)
3193 		metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
3194 	if (rtnetlink_put_metrics(skb, metrics) < 0)
3195 		goto nla_put_failure;
3196 
3197 	if (rt->rt6i_flags & RTF_GATEWAY) {
3198 		if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
3199 			goto nla_put_failure;
3200 	}
3201 
3202 	if (rt->dst.dev &&
3203 	    nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
3204 		goto nla_put_failure;
3205 	if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
3206 		goto nla_put_failure;
3207 
3208 	expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
3209 
3210 	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
3211 		goto nla_put_failure;
3212 
3213 	if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
3214 		goto nla_put_failure;
3215 
3216 	lwtunnel_fill_encap(skb, rt->dst.lwtstate);
3217 
3218 	nlmsg_end(skb, nlh);
3219 	return 0;
3220 
3221 nla_put_failure:
3222 	nlmsg_cancel(skb, nlh);
3223 	return -EMSGSIZE;
3224 }
3225 
3226 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
3227 {
3228 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
3229 	int prefix;
3230 
3231 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
3232 		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
3233 		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
3234 	} else
3235 		prefix = 0;
3236 
3237 	return rt6_fill_node(arg->net,
3238 		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
3239 		     NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
3240 		     prefix, 0, NLM_F_MULTI);
3241 }
3242 
3243 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
3244 {
3245 	struct net *net = sock_net(in_skb->sk);
3246 	struct nlattr *tb[RTA_MAX+1];
3247 	struct rt6_info *rt;
3248 	struct sk_buff *skb;
3249 	struct rtmsg *rtm;
3250 	struct flowi6 fl6;
3251 	int err, iif = 0, oif = 0;
3252 
3253 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
3254 	if (err < 0)
3255 		goto errout;
3256 
3257 	err = -EINVAL;
3258 	memset(&fl6, 0, sizeof(fl6));
3259 
3260 	if (tb[RTA_SRC]) {
3261 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
3262 			goto errout;
3263 
3264 		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
3265 	}
3266 
3267 	if (tb[RTA_DST]) {
3268 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
3269 			goto errout;
3270 
3271 		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
3272 	}
3273 
3274 	if (tb[RTA_IIF])
3275 		iif = nla_get_u32(tb[RTA_IIF]);
3276 
3277 	if (tb[RTA_OIF])
3278 		oif = nla_get_u32(tb[RTA_OIF]);
3279 
3280 	if (tb[RTA_MARK])
3281 		fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
3282 
3283 	if (iif) {
3284 		struct net_device *dev;
3285 		int flags = 0;
3286 
3287 		dev = __dev_get_by_index(net, iif);
3288 		if (!dev) {
3289 			err = -ENODEV;
3290 			goto errout;
3291 		}
3292 
3293 		fl6.flowi6_iif = iif;
3294 
3295 		if (!ipv6_addr_any(&fl6.saddr))
3296 			flags |= RT6_LOOKUP_F_HAS_SADDR;
3297 
3298 		rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
3299 							       flags);
3300 	} else {
3301 		fl6.flowi6_oif = oif;
3302 
3303 		if (netif_index_is_l3_master(net, oif)) {
3304 			fl6.flowi6_flags = FLOWI_FLAG_L3MDEV_SRC |
3305 					   FLOWI_FLAG_SKIP_NH_OIF;
3306 		}
3307 
3308 		rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
3309 	}
3310 
3311 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
3312 	if (!skb) {
3313 		ip6_rt_put(rt);
3314 		err = -ENOBUFS;
3315 		goto errout;
3316 	}
3317 
3318 	/* Reserve room for dummy headers, this skb can pass
3319 	   through good chunk of routing engine.
3320 	 */
3321 	skb_reset_mac_header(skb);
3322 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
3323 
3324 	skb_dst_set(skb, &rt->dst);
3325 
3326 	err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
3327 			    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
3328 			    nlh->nlmsg_seq, 0, 0, 0);
3329 	if (err < 0) {
3330 		kfree_skb(skb);
3331 		goto errout;
3332 	}
3333 
3334 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
3335 errout:
3336 	return err;
3337 }
3338 
3339 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
3340 		     unsigned int nlm_flags)
3341 {
3342 	struct sk_buff *skb;
3343 	struct net *net = info->nl_net;
3344 	u32 seq;
3345 	int err;
3346 
3347 	err = -ENOBUFS;
3348 	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3349 
3350 	skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3351 	if (!skb)
3352 		goto errout;
3353 
3354 	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
3355 				event, info->portid, seq, 0, 0, nlm_flags);
3356 	if (err < 0) {
3357 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
3358 		WARN_ON(err == -EMSGSIZE);
3359 		kfree_skb(skb);
3360 		goto errout;
3361 	}
3362 	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
3363 		    info->nlh, gfp_any());
3364 	return;
3365 errout:
3366 	if (err < 0)
3367 		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
3368 }
3369 
3370 static int ip6_route_dev_notify(struct notifier_block *this,
3371 				unsigned long event, void *ptr)
3372 {
3373 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
3374 	struct net *net = dev_net(dev);
3375 
3376 	if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
3377 		net->ipv6.ip6_null_entry->dst.dev = dev;
3378 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
3379 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3380 		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
3381 		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
3382 		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
3383 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
3384 #endif
3385 	}
3386 
3387 	return NOTIFY_OK;
3388 }
3389 
3390 /*
3391  *	/proc
3392  */
3393 
3394 #ifdef CONFIG_PROC_FS
3395 
3396 static const struct file_operations ipv6_route_proc_fops = {
3397 	.owner		= THIS_MODULE,
3398 	.open		= ipv6_route_open,
3399 	.read		= seq_read,
3400 	.llseek		= seq_lseek,
3401 	.release	= seq_release_net,
3402 };
3403 
3404 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
3405 {
3406 	struct net *net = (struct net *)seq->private;
3407 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
3408 		   net->ipv6.rt6_stats->fib_nodes,
3409 		   net->ipv6.rt6_stats->fib_route_nodes,
3410 		   net->ipv6.rt6_stats->fib_rt_alloc,
3411 		   net->ipv6.rt6_stats->fib_rt_entries,
3412 		   net->ipv6.rt6_stats->fib_rt_cache,
3413 		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
3414 		   net->ipv6.rt6_stats->fib_discarded_routes);
3415 
3416 	return 0;
3417 }
3418 
3419 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
3420 {
3421 	return single_open_net(inode, file, rt6_stats_seq_show);
3422 }
3423 
3424 static const struct file_operations rt6_stats_seq_fops = {
3425 	.owner	 = THIS_MODULE,
3426 	.open	 = rt6_stats_seq_open,
3427 	.read	 = seq_read,
3428 	.llseek	 = seq_lseek,
3429 	.release = single_release_net,
3430 };
3431 #endif	/* CONFIG_PROC_FS */
3432 
3433 #ifdef CONFIG_SYSCTL
3434 
3435 static
3436 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
3437 			      void __user *buffer, size_t *lenp, loff_t *ppos)
3438 {
3439 	struct net *net;
3440 	int delay;
3441 	if (!write)
3442 		return -EINVAL;
3443 
3444 	net = (struct net *)ctl->extra1;
3445 	delay = net->ipv6.sysctl.flush_delay;
3446 	proc_dointvec(ctl, write, buffer, lenp, ppos);
3447 	fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
3448 	return 0;
3449 }
3450 
3451 struct ctl_table ipv6_route_table_template[] = {
3452 	{
3453 		.procname	=	"flush",
3454 		.data		=	&init_net.ipv6.sysctl.flush_delay,
3455 		.maxlen		=	sizeof(int),
3456 		.mode		=	0200,
3457 		.proc_handler	=	ipv6_sysctl_rtcache_flush
3458 	},
3459 	{
3460 		.procname	=	"gc_thresh",
3461 		.data		=	&ip6_dst_ops_template.gc_thresh,
3462 		.maxlen		=	sizeof(int),
3463 		.mode		=	0644,
3464 		.proc_handler	=	proc_dointvec,
3465 	},
3466 	{
3467 		.procname	=	"max_size",
3468 		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
3469 		.maxlen		=	sizeof(int),
3470 		.mode		=	0644,
3471 		.proc_handler	=	proc_dointvec,
3472 	},
3473 	{
3474 		.procname	=	"gc_min_interval",
3475 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3476 		.maxlen		=	sizeof(int),
3477 		.mode		=	0644,
3478 		.proc_handler	=	proc_dointvec_jiffies,
3479 	},
3480 	{
3481 		.procname	=	"gc_timeout",
3482 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
3483 		.maxlen		=	sizeof(int),
3484 		.mode		=	0644,
3485 		.proc_handler	=	proc_dointvec_jiffies,
3486 	},
3487 	{
3488 		.procname	=	"gc_interval",
3489 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
3490 		.maxlen		=	sizeof(int),
3491 		.mode		=	0644,
3492 		.proc_handler	=	proc_dointvec_jiffies,
3493 	},
3494 	{
3495 		.procname	=	"gc_elasticity",
3496 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
3497 		.maxlen		=	sizeof(int),
3498 		.mode		=	0644,
3499 		.proc_handler	=	proc_dointvec,
3500 	},
3501 	{
3502 		.procname	=	"mtu_expires",
3503 		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
3504 		.maxlen		=	sizeof(int),
3505 		.mode		=	0644,
3506 		.proc_handler	=	proc_dointvec_jiffies,
3507 	},
3508 	{
3509 		.procname	=	"min_adv_mss",
3510 		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
3511 		.maxlen		=	sizeof(int),
3512 		.mode		=	0644,
3513 		.proc_handler	=	proc_dointvec,
3514 	},
3515 	{
3516 		.procname	=	"gc_min_interval_ms",
3517 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3518 		.maxlen		=	sizeof(int),
3519 		.mode		=	0644,
3520 		.proc_handler	=	proc_dointvec_ms_jiffies,
3521 	},
3522 	{ }
3523 };
3524 
3525 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
3526 {
3527 	struct ctl_table *table;
3528 
3529 	table = kmemdup(ipv6_route_table_template,
3530 			sizeof(ipv6_route_table_template),
3531 			GFP_KERNEL);
3532 
3533 	if (table) {
3534 		table[0].data = &net->ipv6.sysctl.flush_delay;
3535 		table[0].extra1 = net;
3536 		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
3537 		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3538 		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3539 		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3540 		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3541 		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3542 		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3543 		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
3544 		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3545 
3546 		/* Don't export sysctls to unprivileged users */
3547 		if (net->user_ns != &init_user_ns)
3548 			table[0].procname = NULL;
3549 	}
3550 
3551 	return table;
3552 }
3553 #endif
3554 
3555 static int __net_init ip6_route_net_init(struct net *net)
3556 {
3557 	int ret = -ENOMEM;
3558 
3559 	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3560 	       sizeof(net->ipv6.ip6_dst_ops));
3561 
3562 	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3563 		goto out_ip6_dst_ops;
3564 
3565 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3566 					   sizeof(*net->ipv6.ip6_null_entry),
3567 					   GFP_KERNEL);
3568 	if (!net->ipv6.ip6_null_entry)
3569 		goto out_ip6_dst_entries;
3570 	net->ipv6.ip6_null_entry->dst.path =
3571 		(struct dst_entry *)net->ipv6.ip6_null_entry;
3572 	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3573 	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3574 			 ip6_template_metrics, true);
3575 
3576 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3577 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3578 					       sizeof(*net->ipv6.ip6_prohibit_entry),
3579 					       GFP_KERNEL);
3580 	if (!net->ipv6.ip6_prohibit_entry)
3581 		goto out_ip6_null_entry;
3582 	net->ipv6.ip6_prohibit_entry->dst.path =
3583 		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3584 	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3585 	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3586 			 ip6_template_metrics, true);
3587 
3588 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3589 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
3590 					       GFP_KERNEL);
3591 	if (!net->ipv6.ip6_blk_hole_entry)
3592 		goto out_ip6_prohibit_entry;
3593 	net->ipv6.ip6_blk_hole_entry->dst.path =
3594 		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3595 	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3596 	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3597 			 ip6_template_metrics, true);
3598 #endif
3599 
3600 	net->ipv6.sysctl.flush_delay = 0;
3601 	net->ipv6.sysctl.ip6_rt_max_size = 4096;
3602 	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3603 	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3604 	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3605 	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3606 	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3607 	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3608 
3609 	net->ipv6.ip6_rt_gc_expire = 30*HZ;
3610 
3611 	ret = 0;
3612 out:
3613 	return ret;
3614 
3615 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3616 out_ip6_prohibit_entry:
3617 	kfree(net->ipv6.ip6_prohibit_entry);
3618 out_ip6_null_entry:
3619 	kfree(net->ipv6.ip6_null_entry);
3620 #endif
3621 out_ip6_dst_entries:
3622 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3623 out_ip6_dst_ops:
3624 	goto out;
3625 }
3626 
3627 static void __net_exit ip6_route_net_exit(struct net *net)
3628 {
3629 	kfree(net->ipv6.ip6_null_entry);
3630 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3631 	kfree(net->ipv6.ip6_prohibit_entry);
3632 	kfree(net->ipv6.ip6_blk_hole_entry);
3633 #endif
3634 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3635 }
3636 
3637 static int __net_init ip6_route_net_init_late(struct net *net)
3638 {
3639 #ifdef CONFIG_PROC_FS
3640 	proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3641 	proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3642 #endif
3643 	return 0;
3644 }
3645 
3646 static void __net_exit ip6_route_net_exit_late(struct net *net)
3647 {
3648 #ifdef CONFIG_PROC_FS
3649 	remove_proc_entry("ipv6_route", net->proc_net);
3650 	remove_proc_entry("rt6_stats", net->proc_net);
3651 #endif
3652 }
3653 
3654 static struct pernet_operations ip6_route_net_ops = {
3655 	.init = ip6_route_net_init,
3656 	.exit = ip6_route_net_exit,
3657 };
3658 
3659 static int __net_init ipv6_inetpeer_init(struct net *net)
3660 {
3661 	struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3662 
3663 	if (!bp)
3664 		return -ENOMEM;
3665 	inet_peer_base_init(bp);
3666 	net->ipv6.peers = bp;
3667 	return 0;
3668 }
3669 
3670 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3671 {
3672 	struct inet_peer_base *bp = net->ipv6.peers;
3673 
3674 	net->ipv6.peers = NULL;
3675 	inetpeer_invalidate_tree(bp);
3676 	kfree(bp);
3677 }
3678 
3679 static struct pernet_operations ipv6_inetpeer_ops = {
3680 	.init	=	ipv6_inetpeer_init,
3681 	.exit	=	ipv6_inetpeer_exit,
3682 };
3683 
3684 static struct pernet_operations ip6_route_net_late_ops = {
3685 	.init = ip6_route_net_init_late,
3686 	.exit = ip6_route_net_exit_late,
3687 };
3688 
3689 static struct notifier_block ip6_route_dev_notifier = {
3690 	.notifier_call = ip6_route_dev_notify,
3691 	.priority = 0,
3692 };
3693 
3694 int __init ip6_route_init(void)
3695 {
3696 	int ret;
3697 	int cpu;
3698 
3699 	ret = -ENOMEM;
3700 	ip6_dst_ops_template.kmem_cachep =
3701 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3702 				  SLAB_HWCACHE_ALIGN, NULL);
3703 	if (!ip6_dst_ops_template.kmem_cachep)
3704 		goto out;
3705 
3706 	ret = dst_entries_init(&ip6_dst_blackhole_ops);
3707 	if (ret)
3708 		goto out_kmem_cache;
3709 
3710 	ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3711 	if (ret)
3712 		goto out_dst_entries;
3713 
3714 	ret = register_pernet_subsys(&ip6_route_net_ops);
3715 	if (ret)
3716 		goto out_register_inetpeer;
3717 
3718 	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3719 
3720 	/* Registering of the loopback is done before this portion of code,
3721 	 * the loopback reference in rt6_info will not be taken, do it
3722 	 * manually for init_net */
3723 	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3724 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3725   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3726 	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3727 	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3728 	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3729 	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3730   #endif
3731 	ret = fib6_init();
3732 	if (ret)
3733 		goto out_register_subsys;
3734 
3735 	ret = xfrm6_init();
3736 	if (ret)
3737 		goto out_fib6_init;
3738 
3739 	ret = fib6_rules_init();
3740 	if (ret)
3741 		goto xfrm6_init;
3742 
3743 	ret = register_pernet_subsys(&ip6_route_net_late_ops);
3744 	if (ret)
3745 		goto fib6_rules_init;
3746 
3747 	ret = -ENOBUFS;
3748 	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3749 	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3750 	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3751 		goto out_register_late_subsys;
3752 
3753 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3754 	if (ret)
3755 		goto out_register_late_subsys;
3756 
3757 	for_each_possible_cpu(cpu) {
3758 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
3759 
3760 		INIT_LIST_HEAD(&ul->head);
3761 		spin_lock_init(&ul->lock);
3762 	}
3763 
3764 out:
3765 	return ret;
3766 
3767 out_register_late_subsys:
3768 	unregister_pernet_subsys(&ip6_route_net_late_ops);
3769 fib6_rules_init:
3770 	fib6_rules_cleanup();
3771 xfrm6_init:
3772 	xfrm6_fini();
3773 out_fib6_init:
3774 	fib6_gc_cleanup();
3775 out_register_subsys:
3776 	unregister_pernet_subsys(&ip6_route_net_ops);
3777 out_register_inetpeer:
3778 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3779 out_dst_entries:
3780 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3781 out_kmem_cache:
3782 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3783 	goto out;
3784 }
3785 
3786 void ip6_route_cleanup(void)
3787 {
3788 	unregister_netdevice_notifier(&ip6_route_dev_notifier);
3789 	unregister_pernet_subsys(&ip6_route_net_late_ops);
3790 	fib6_rules_cleanup();
3791 	xfrm6_fini();
3792 	fib6_gc_cleanup();
3793 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3794 	unregister_pernet_subsys(&ip6_route_net_ops);
3795 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3796 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3797 }
3798