xref: /openbmc/linux/net/ipv6/route.c (revision 781095f903f398148cd0b646d3984234a715f29e)
1 /*
2  *	Linux INET6 implementation
3  *	FIB front-end.
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13 
14 /*	Changes:
15  *
16  *	YOSHIFUJI Hideaki @USAGI
17  *		reworked default router selection.
18  *		- respect outgoing interface
19  *		- select from (probably) reachable routers (i.e.
20  *		routers in REACHABLE, STALE, DELAY or PROBE states).
21  *		- always select the same router if it is (probably)
22  *		reachable.  otherwise, round-robin the list.
23  *	Ville Nuorvala
24  *		Fixed routing subtrees.
25  */
26 
27 #define pr_fmt(fmt) "IPv6: " fmt
28 
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/dst_metadata.h>
58 #include <net/xfrm.h>
59 #include <net/netevent.h>
60 #include <net/netlink.h>
61 #include <net/nexthop.h>
62 #include <net/lwtunnel.h>
63 #include <net/ip_tunnels.h>
64 #include <net/l3mdev.h>
65 #include <trace/events/fib6.h>
66 
67 #include <asm/uaccess.h>
68 
69 #ifdef CONFIG_SYSCTL
70 #include <linux/sysctl.h>
71 #endif
72 
73 enum rt6_nud_state {
74 	RT6_NUD_FAIL_HARD = -3,
75 	RT6_NUD_FAIL_PROBE = -2,
76 	RT6_NUD_FAIL_DO_RR = -1,
77 	RT6_NUD_SUCCEED = 1
78 };
79 
80 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
81 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
82 static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
83 static unsigned int	 ip6_mtu(const struct dst_entry *dst);
84 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
85 static void		ip6_dst_destroy(struct dst_entry *);
86 static void		ip6_dst_ifdown(struct dst_entry *,
87 				       struct net_device *dev, int how);
88 static int		 ip6_dst_gc(struct dst_ops *ops);
89 
90 static int		ip6_pkt_discard(struct sk_buff *skb);
91 static int		ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
92 static int		ip6_pkt_prohibit(struct sk_buff *skb);
93 static int		ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
94 static void		ip6_link_failure(struct sk_buff *skb);
95 static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
96 					   struct sk_buff *skb, u32 mtu);
97 static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
98 					struct sk_buff *skb);
99 static void		rt6_dst_from_metrics_check(struct rt6_info *rt);
100 static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
101 
102 #ifdef CONFIG_IPV6_ROUTE_INFO
103 static struct rt6_info *rt6_add_route_info(struct net *net,
104 					   const struct in6_addr *prefix, int prefixlen,
105 					   const struct in6_addr *gwaddr, int ifindex,
106 					   unsigned int pref);
107 static struct rt6_info *rt6_get_route_info(struct net *net,
108 					   const struct in6_addr *prefix, int prefixlen,
109 					   const struct in6_addr *gwaddr, int ifindex);
110 #endif
111 
112 struct uncached_list {
113 	spinlock_t		lock;
114 	struct list_head	head;
115 };
116 
117 static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
118 
119 static void rt6_uncached_list_add(struct rt6_info *rt)
120 {
121 	struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
122 
123 	rt->dst.flags |= DST_NOCACHE;
124 	rt->rt6i_uncached_list = ul;
125 
126 	spin_lock_bh(&ul->lock);
127 	list_add_tail(&rt->rt6i_uncached, &ul->head);
128 	spin_unlock_bh(&ul->lock);
129 }
130 
131 static void rt6_uncached_list_del(struct rt6_info *rt)
132 {
133 	if (!list_empty(&rt->rt6i_uncached)) {
134 		struct uncached_list *ul = rt->rt6i_uncached_list;
135 
136 		spin_lock_bh(&ul->lock);
137 		list_del(&rt->rt6i_uncached);
138 		spin_unlock_bh(&ul->lock);
139 	}
140 }
141 
142 static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
143 {
144 	struct net_device *loopback_dev = net->loopback_dev;
145 	int cpu;
146 
147 	if (dev == loopback_dev)
148 		return;
149 
150 	for_each_possible_cpu(cpu) {
151 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
152 		struct rt6_info *rt;
153 
154 		spin_lock_bh(&ul->lock);
155 		list_for_each_entry(rt, &ul->head, rt6i_uncached) {
156 			struct inet6_dev *rt_idev = rt->rt6i_idev;
157 			struct net_device *rt_dev = rt->dst.dev;
158 
159 			if (rt_idev->dev == dev) {
160 				rt->rt6i_idev = in6_dev_get(loopback_dev);
161 				in6_dev_put(rt_idev);
162 			}
163 
164 			if (rt_dev == dev) {
165 				rt->dst.dev = loopback_dev;
166 				dev_hold(rt->dst.dev);
167 				dev_put(rt_dev);
168 			}
169 		}
170 		spin_unlock_bh(&ul->lock);
171 	}
172 }
173 
174 static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
175 {
176 	return dst_metrics_write_ptr(rt->dst.from);
177 }
178 
179 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
180 {
181 	struct rt6_info *rt = (struct rt6_info *)dst;
182 
183 	if (rt->rt6i_flags & RTF_PCPU)
184 		return rt6_pcpu_cow_metrics(rt);
185 	else if (rt->rt6i_flags & RTF_CACHE)
186 		return NULL;
187 	else
188 		return dst_cow_metrics_generic(dst, old);
189 }
190 
191 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
192 					     struct sk_buff *skb,
193 					     const void *daddr)
194 {
195 	struct in6_addr *p = &rt->rt6i_gateway;
196 
197 	if (!ipv6_addr_any(p))
198 		return (const void *) p;
199 	else if (skb)
200 		return &ipv6_hdr(skb)->daddr;
201 	return daddr;
202 }
203 
204 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
205 					  struct sk_buff *skb,
206 					  const void *daddr)
207 {
208 	struct rt6_info *rt = (struct rt6_info *) dst;
209 	struct neighbour *n;
210 
211 	daddr = choose_neigh_daddr(rt, skb, daddr);
212 	n = __ipv6_neigh_lookup(dst->dev, daddr);
213 	if (n)
214 		return n;
215 	return neigh_create(&nd_tbl, daddr, dst->dev);
216 }
217 
218 static struct dst_ops ip6_dst_ops_template = {
219 	.family			=	AF_INET6,
220 	.gc			=	ip6_dst_gc,
221 	.gc_thresh		=	1024,
222 	.check			=	ip6_dst_check,
223 	.default_advmss		=	ip6_default_advmss,
224 	.mtu			=	ip6_mtu,
225 	.cow_metrics		=	ipv6_cow_metrics,
226 	.destroy		=	ip6_dst_destroy,
227 	.ifdown			=	ip6_dst_ifdown,
228 	.negative_advice	=	ip6_negative_advice,
229 	.link_failure		=	ip6_link_failure,
230 	.update_pmtu		=	ip6_rt_update_pmtu,
231 	.redirect		=	rt6_do_redirect,
232 	.local_out		=	__ip6_local_out,
233 	.neigh_lookup		=	ip6_neigh_lookup,
234 };
235 
236 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
237 {
238 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
239 
240 	return mtu ? : dst->dev->mtu;
241 }
242 
243 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
244 					 struct sk_buff *skb, u32 mtu)
245 {
246 }
247 
248 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
249 				      struct sk_buff *skb)
250 {
251 }
252 
253 static struct dst_ops ip6_dst_blackhole_ops = {
254 	.family			=	AF_INET6,
255 	.destroy		=	ip6_dst_destroy,
256 	.check			=	ip6_dst_check,
257 	.mtu			=	ip6_blackhole_mtu,
258 	.default_advmss		=	ip6_default_advmss,
259 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
260 	.redirect		=	ip6_rt_blackhole_redirect,
261 	.cow_metrics		=	dst_cow_metrics_generic,
262 	.neigh_lookup		=	ip6_neigh_lookup,
263 };
264 
265 static const u32 ip6_template_metrics[RTAX_MAX] = {
266 	[RTAX_HOPLIMIT - 1] = 0,
267 };
268 
269 static const struct rt6_info ip6_null_entry_template = {
270 	.dst = {
271 		.__refcnt	= ATOMIC_INIT(1),
272 		.__use		= 1,
273 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
274 		.error		= -ENETUNREACH,
275 		.input		= ip6_pkt_discard,
276 		.output		= ip6_pkt_discard_out,
277 	},
278 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
279 	.rt6i_protocol  = RTPROT_KERNEL,
280 	.rt6i_metric	= ~(u32) 0,
281 	.rt6i_ref	= ATOMIC_INIT(1),
282 };
283 
284 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
285 
286 static const struct rt6_info ip6_prohibit_entry_template = {
287 	.dst = {
288 		.__refcnt	= ATOMIC_INIT(1),
289 		.__use		= 1,
290 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
291 		.error		= -EACCES,
292 		.input		= ip6_pkt_prohibit,
293 		.output		= ip6_pkt_prohibit_out,
294 	},
295 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
296 	.rt6i_protocol  = RTPROT_KERNEL,
297 	.rt6i_metric	= ~(u32) 0,
298 	.rt6i_ref	= ATOMIC_INIT(1),
299 };
300 
301 static const struct rt6_info ip6_blk_hole_entry_template = {
302 	.dst = {
303 		.__refcnt	= ATOMIC_INIT(1),
304 		.__use		= 1,
305 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
306 		.error		= -EINVAL,
307 		.input		= dst_discard,
308 		.output		= dst_discard_out,
309 	},
310 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
311 	.rt6i_protocol  = RTPROT_KERNEL,
312 	.rt6i_metric	= ~(u32) 0,
313 	.rt6i_ref	= ATOMIC_INIT(1),
314 };
315 
316 #endif
317 
318 static void rt6_info_init(struct rt6_info *rt)
319 {
320 	struct dst_entry *dst = &rt->dst;
321 
322 	memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
323 	INIT_LIST_HEAD(&rt->rt6i_siblings);
324 	INIT_LIST_HEAD(&rt->rt6i_uncached);
325 }
326 
327 /* allocate dst with ip6_dst_ops */
328 static struct rt6_info *__ip6_dst_alloc(struct net *net,
329 					struct net_device *dev,
330 					int flags)
331 {
332 	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
333 					0, DST_OBSOLETE_FORCE_CHK, flags);
334 
335 	if (rt)
336 		rt6_info_init(rt);
337 
338 	return rt;
339 }
340 
341 static struct rt6_info *ip6_dst_alloc(struct net *net,
342 				      struct net_device *dev,
343 				      int flags)
344 {
345 	struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
346 
347 	if (rt) {
348 		rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
349 		if (rt->rt6i_pcpu) {
350 			int cpu;
351 
352 			for_each_possible_cpu(cpu) {
353 				struct rt6_info **p;
354 
355 				p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
356 				/* no one shares rt */
357 				*p =  NULL;
358 			}
359 		} else {
360 			dst_destroy((struct dst_entry *)rt);
361 			return NULL;
362 		}
363 	}
364 
365 	return rt;
366 }
367 
368 static void ip6_dst_destroy(struct dst_entry *dst)
369 {
370 	struct rt6_info *rt = (struct rt6_info *)dst;
371 	struct dst_entry *from = dst->from;
372 	struct inet6_dev *idev;
373 
374 	dst_destroy_metrics_generic(dst);
375 	free_percpu(rt->rt6i_pcpu);
376 	rt6_uncached_list_del(rt);
377 
378 	idev = rt->rt6i_idev;
379 	if (idev) {
380 		rt->rt6i_idev = NULL;
381 		in6_dev_put(idev);
382 	}
383 
384 	dst->from = NULL;
385 	dst_release(from);
386 }
387 
388 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
389 			   int how)
390 {
391 	struct rt6_info *rt = (struct rt6_info *)dst;
392 	struct inet6_dev *idev = rt->rt6i_idev;
393 	struct net_device *loopback_dev =
394 		dev_net(dev)->loopback_dev;
395 
396 	if (dev != loopback_dev) {
397 		if (idev && idev->dev == dev) {
398 			struct inet6_dev *loopback_idev =
399 				in6_dev_get(loopback_dev);
400 			if (loopback_idev) {
401 				rt->rt6i_idev = loopback_idev;
402 				in6_dev_put(idev);
403 			}
404 		}
405 	}
406 }
407 
408 static bool __rt6_check_expired(const struct rt6_info *rt)
409 {
410 	if (rt->rt6i_flags & RTF_EXPIRES)
411 		return time_after(jiffies, rt->dst.expires);
412 	else
413 		return false;
414 }
415 
416 static bool rt6_check_expired(const struct rt6_info *rt)
417 {
418 	if (rt->rt6i_flags & RTF_EXPIRES) {
419 		if (time_after(jiffies, rt->dst.expires))
420 			return true;
421 	} else if (rt->dst.from) {
422 		return rt6_check_expired((struct rt6_info *) rt->dst.from);
423 	}
424 	return false;
425 }
426 
427 /* Multipath route selection:
428  *   Hash based function using packet header and flowlabel.
429  * Adapted from fib_info_hashfn()
430  */
431 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
432 			       const struct flowi6 *fl6)
433 {
434 	return get_hash_from_flowi6(fl6) % candidate_count;
435 }
436 
437 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
438 					     struct flowi6 *fl6, int oif,
439 					     int strict)
440 {
441 	struct rt6_info *sibling, *next_sibling;
442 	int route_choosen;
443 
444 	route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
445 	/* Don't change the route, if route_choosen == 0
446 	 * (siblings does not include ourself)
447 	 */
448 	if (route_choosen)
449 		list_for_each_entry_safe(sibling, next_sibling,
450 				&match->rt6i_siblings, rt6i_siblings) {
451 			route_choosen--;
452 			if (route_choosen == 0) {
453 				if (rt6_score_route(sibling, oif, strict) < 0)
454 					break;
455 				match = sibling;
456 				break;
457 			}
458 		}
459 	return match;
460 }
461 
462 /*
463  *	Route lookup. Any table->tb6_lock is implied.
464  */
465 
466 static inline struct rt6_info *rt6_device_match(struct net *net,
467 						    struct rt6_info *rt,
468 						    const struct in6_addr *saddr,
469 						    int oif,
470 						    int flags)
471 {
472 	struct rt6_info *local = NULL;
473 	struct rt6_info *sprt;
474 
475 	if (!oif && ipv6_addr_any(saddr))
476 		goto out;
477 
478 	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
479 		struct net_device *dev = sprt->dst.dev;
480 
481 		if (oif) {
482 			if (dev->ifindex == oif)
483 				return sprt;
484 			if (dev->flags & IFF_LOOPBACK) {
485 				if (!sprt->rt6i_idev ||
486 				    sprt->rt6i_idev->dev->ifindex != oif) {
487 					if (flags & RT6_LOOKUP_F_IFACE)
488 						continue;
489 					if (local &&
490 					    local->rt6i_idev->dev->ifindex == oif)
491 						continue;
492 				}
493 				local = sprt;
494 			}
495 		} else {
496 			if (ipv6_chk_addr(net, saddr, dev,
497 					  flags & RT6_LOOKUP_F_IFACE))
498 				return sprt;
499 		}
500 	}
501 
502 	if (oif) {
503 		if (local)
504 			return local;
505 
506 		if (flags & RT6_LOOKUP_F_IFACE)
507 			return net->ipv6.ip6_null_entry;
508 	}
509 out:
510 	return rt;
511 }
512 
513 #ifdef CONFIG_IPV6_ROUTER_PREF
514 struct __rt6_probe_work {
515 	struct work_struct work;
516 	struct in6_addr target;
517 	struct net_device *dev;
518 };
519 
520 static void rt6_probe_deferred(struct work_struct *w)
521 {
522 	struct in6_addr mcaddr;
523 	struct __rt6_probe_work *work =
524 		container_of(w, struct __rt6_probe_work, work);
525 
526 	addrconf_addr_solict_mult(&work->target, &mcaddr);
527 	ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL);
528 	dev_put(work->dev);
529 	kfree(work);
530 }
531 
532 static void rt6_probe(struct rt6_info *rt)
533 {
534 	struct __rt6_probe_work *work;
535 	struct neighbour *neigh;
536 	/*
537 	 * Okay, this does not seem to be appropriate
538 	 * for now, however, we need to check if it
539 	 * is really so; aka Router Reachability Probing.
540 	 *
541 	 * Router Reachability Probe MUST be rate-limited
542 	 * to no more than one per minute.
543 	 */
544 	if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
545 		return;
546 	rcu_read_lock_bh();
547 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
548 	if (neigh) {
549 		if (neigh->nud_state & NUD_VALID)
550 			goto out;
551 
552 		work = NULL;
553 		write_lock(&neigh->lock);
554 		if (!(neigh->nud_state & NUD_VALID) &&
555 		    time_after(jiffies,
556 			       neigh->updated +
557 			       rt->rt6i_idev->cnf.rtr_probe_interval)) {
558 			work = kmalloc(sizeof(*work), GFP_ATOMIC);
559 			if (work)
560 				__neigh_set_probe_once(neigh);
561 		}
562 		write_unlock(&neigh->lock);
563 	} else {
564 		work = kmalloc(sizeof(*work), GFP_ATOMIC);
565 	}
566 
567 	if (work) {
568 		INIT_WORK(&work->work, rt6_probe_deferred);
569 		work->target = rt->rt6i_gateway;
570 		dev_hold(rt->dst.dev);
571 		work->dev = rt->dst.dev;
572 		schedule_work(&work->work);
573 	}
574 
575 out:
576 	rcu_read_unlock_bh();
577 }
578 #else
579 static inline void rt6_probe(struct rt6_info *rt)
580 {
581 }
582 #endif
583 
584 /*
585  * Default Router Selection (RFC 2461 6.3.6)
586  */
587 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
588 {
589 	struct net_device *dev = rt->dst.dev;
590 	if (!oif || dev->ifindex == oif)
591 		return 2;
592 	if ((dev->flags & IFF_LOOPBACK) &&
593 	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
594 		return 1;
595 	return 0;
596 }
597 
598 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
599 {
600 	struct neighbour *neigh;
601 	enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
602 
603 	if (rt->rt6i_flags & RTF_NONEXTHOP ||
604 	    !(rt->rt6i_flags & RTF_GATEWAY))
605 		return RT6_NUD_SUCCEED;
606 
607 	rcu_read_lock_bh();
608 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
609 	if (neigh) {
610 		read_lock(&neigh->lock);
611 		if (neigh->nud_state & NUD_VALID)
612 			ret = RT6_NUD_SUCCEED;
613 #ifdef CONFIG_IPV6_ROUTER_PREF
614 		else if (!(neigh->nud_state & NUD_FAILED))
615 			ret = RT6_NUD_SUCCEED;
616 		else
617 			ret = RT6_NUD_FAIL_PROBE;
618 #endif
619 		read_unlock(&neigh->lock);
620 	} else {
621 		ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
622 		      RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
623 	}
624 	rcu_read_unlock_bh();
625 
626 	return ret;
627 }
628 
629 static int rt6_score_route(struct rt6_info *rt, int oif,
630 			   int strict)
631 {
632 	int m;
633 
634 	m = rt6_check_dev(rt, oif);
635 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
636 		return RT6_NUD_FAIL_HARD;
637 #ifdef CONFIG_IPV6_ROUTER_PREF
638 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
639 #endif
640 	if (strict & RT6_LOOKUP_F_REACHABLE) {
641 		int n = rt6_check_neigh(rt);
642 		if (n < 0)
643 			return n;
644 	}
645 	return m;
646 }
647 
648 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
649 				   int *mpri, struct rt6_info *match,
650 				   bool *do_rr)
651 {
652 	int m;
653 	bool match_do_rr = false;
654 	struct inet6_dev *idev = rt->rt6i_idev;
655 	struct net_device *dev = rt->dst.dev;
656 
657 	if (dev && !netif_carrier_ok(dev) &&
658 	    idev->cnf.ignore_routes_with_linkdown)
659 		goto out;
660 
661 	if (rt6_check_expired(rt))
662 		goto out;
663 
664 	m = rt6_score_route(rt, oif, strict);
665 	if (m == RT6_NUD_FAIL_DO_RR) {
666 		match_do_rr = true;
667 		m = 0; /* lowest valid score */
668 	} else if (m == RT6_NUD_FAIL_HARD) {
669 		goto out;
670 	}
671 
672 	if (strict & RT6_LOOKUP_F_REACHABLE)
673 		rt6_probe(rt);
674 
675 	/* note that m can be RT6_NUD_FAIL_PROBE at this point */
676 	if (m > *mpri) {
677 		*do_rr = match_do_rr;
678 		*mpri = m;
679 		match = rt;
680 	}
681 out:
682 	return match;
683 }
684 
685 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
686 				     struct rt6_info *rr_head,
687 				     u32 metric, int oif, int strict,
688 				     bool *do_rr)
689 {
690 	struct rt6_info *rt, *match, *cont;
691 	int mpri = -1;
692 
693 	match = NULL;
694 	cont = NULL;
695 	for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
696 		if (rt->rt6i_metric != metric) {
697 			cont = rt;
698 			break;
699 		}
700 
701 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
702 	}
703 
704 	for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
705 		if (rt->rt6i_metric != metric) {
706 			cont = rt;
707 			break;
708 		}
709 
710 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
711 	}
712 
713 	if (match || !cont)
714 		return match;
715 
716 	for (rt = cont; rt; rt = rt->dst.rt6_next)
717 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
718 
719 	return match;
720 }
721 
722 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
723 {
724 	struct rt6_info *match, *rt0;
725 	struct net *net;
726 	bool do_rr = false;
727 
728 	rt0 = fn->rr_ptr;
729 	if (!rt0)
730 		fn->rr_ptr = rt0 = fn->leaf;
731 
732 	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
733 			     &do_rr);
734 
735 	if (do_rr) {
736 		struct rt6_info *next = rt0->dst.rt6_next;
737 
738 		/* no entries matched; do round-robin */
739 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
740 			next = fn->leaf;
741 
742 		if (next != rt0)
743 			fn->rr_ptr = next;
744 	}
745 
746 	net = dev_net(rt0->dst.dev);
747 	return match ? match : net->ipv6.ip6_null_entry;
748 }
749 
750 static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
751 {
752 	return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
753 }
754 
755 #ifdef CONFIG_IPV6_ROUTE_INFO
756 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
757 		  const struct in6_addr *gwaddr)
758 {
759 	struct net *net = dev_net(dev);
760 	struct route_info *rinfo = (struct route_info *) opt;
761 	struct in6_addr prefix_buf, *prefix;
762 	unsigned int pref;
763 	unsigned long lifetime;
764 	struct rt6_info *rt;
765 
766 	if (len < sizeof(struct route_info)) {
767 		return -EINVAL;
768 	}
769 
770 	/* Sanity check for prefix_len and length */
771 	if (rinfo->length > 3) {
772 		return -EINVAL;
773 	} else if (rinfo->prefix_len > 128) {
774 		return -EINVAL;
775 	} else if (rinfo->prefix_len > 64) {
776 		if (rinfo->length < 2) {
777 			return -EINVAL;
778 		}
779 	} else if (rinfo->prefix_len > 0) {
780 		if (rinfo->length < 1) {
781 			return -EINVAL;
782 		}
783 	}
784 
785 	pref = rinfo->route_pref;
786 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
787 		return -EINVAL;
788 
789 	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
790 
791 	if (rinfo->length == 3)
792 		prefix = (struct in6_addr *)rinfo->prefix;
793 	else {
794 		/* this function is safe */
795 		ipv6_addr_prefix(&prefix_buf,
796 				 (struct in6_addr *)rinfo->prefix,
797 				 rinfo->prefix_len);
798 		prefix = &prefix_buf;
799 	}
800 
801 	if (rinfo->prefix_len == 0)
802 		rt = rt6_get_dflt_router(gwaddr, dev);
803 	else
804 		rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
805 					gwaddr, dev->ifindex);
806 
807 	if (rt && !lifetime) {
808 		ip6_del_rt(rt);
809 		rt = NULL;
810 	}
811 
812 	if (!rt && lifetime)
813 		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
814 					pref);
815 	else if (rt)
816 		rt->rt6i_flags = RTF_ROUTEINFO |
817 				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
818 
819 	if (rt) {
820 		if (!addrconf_finite_timeout(lifetime))
821 			rt6_clean_expires(rt);
822 		else
823 			rt6_set_expires(rt, jiffies + HZ * lifetime);
824 
825 		ip6_rt_put(rt);
826 	}
827 	return 0;
828 }
829 #endif
830 
831 static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
832 					struct in6_addr *saddr)
833 {
834 	struct fib6_node *pn;
835 	while (1) {
836 		if (fn->fn_flags & RTN_TL_ROOT)
837 			return NULL;
838 		pn = fn->parent;
839 		if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
840 			fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
841 		else
842 			fn = pn;
843 		if (fn->fn_flags & RTN_RTINFO)
844 			return fn;
845 	}
846 }
847 
848 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
849 					     struct fib6_table *table,
850 					     struct flowi6 *fl6, int flags)
851 {
852 	struct fib6_node *fn;
853 	struct rt6_info *rt;
854 
855 	read_lock_bh(&table->tb6_lock);
856 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
857 restart:
858 	rt = fn->leaf;
859 	rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
860 	if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
861 		rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
862 	if (rt == net->ipv6.ip6_null_entry) {
863 		fn = fib6_backtrack(fn, &fl6->saddr);
864 		if (fn)
865 			goto restart;
866 	}
867 	dst_use(&rt->dst, jiffies);
868 	read_unlock_bh(&table->tb6_lock);
869 
870 	trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
871 
872 	return rt;
873 
874 }
875 
876 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
877 				    int flags)
878 {
879 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
880 }
881 EXPORT_SYMBOL_GPL(ip6_route_lookup);
882 
883 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
884 			    const struct in6_addr *saddr, int oif, int strict)
885 {
886 	struct flowi6 fl6 = {
887 		.flowi6_oif = oif,
888 		.daddr = *daddr,
889 	};
890 	struct dst_entry *dst;
891 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
892 
893 	if (saddr) {
894 		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
895 		flags |= RT6_LOOKUP_F_HAS_SADDR;
896 	}
897 
898 	dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
899 	if (dst->error == 0)
900 		return (struct rt6_info *) dst;
901 
902 	dst_release(dst);
903 
904 	return NULL;
905 }
906 EXPORT_SYMBOL(rt6_lookup);
907 
908 /* ip6_ins_rt is called with FREE table->tb6_lock.
909    It takes new route entry, the addition fails by any reason the
910    route is freed. In any case, if caller does not hold it, it may
911    be destroyed.
912  */
913 
914 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
915 			struct mx6_config *mxc)
916 {
917 	int err;
918 	struct fib6_table *table;
919 
920 	table = rt->rt6i_table;
921 	write_lock_bh(&table->tb6_lock);
922 	err = fib6_add(&table->tb6_root, rt, info, mxc);
923 	write_unlock_bh(&table->tb6_lock);
924 
925 	return err;
926 }
927 
928 int ip6_ins_rt(struct rt6_info *rt)
929 {
930 	struct nl_info info = {	.nl_net = dev_net(rt->dst.dev), };
931 	struct mx6_config mxc = { .mx = NULL, };
932 
933 	return __ip6_ins_rt(rt, &info, &mxc);
934 }
935 
936 static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
937 					   const struct in6_addr *daddr,
938 					   const struct in6_addr *saddr)
939 {
940 	struct rt6_info *rt;
941 
942 	/*
943 	 *	Clone the route.
944 	 */
945 
946 	if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
947 		ort = (struct rt6_info *)ort->dst.from;
948 
949 	rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0);
950 
951 	if (!rt)
952 		return NULL;
953 
954 	ip6_rt_copy_init(rt, ort);
955 	rt->rt6i_flags |= RTF_CACHE;
956 	rt->rt6i_metric = 0;
957 	rt->dst.flags |= DST_HOST;
958 	rt->rt6i_dst.addr = *daddr;
959 	rt->rt6i_dst.plen = 128;
960 
961 	if (!rt6_is_gw_or_nonexthop(ort)) {
962 		if (ort->rt6i_dst.plen != 128 &&
963 		    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
964 			rt->rt6i_flags |= RTF_ANYCAST;
965 #ifdef CONFIG_IPV6_SUBTREES
966 		if (rt->rt6i_src.plen && saddr) {
967 			rt->rt6i_src.addr = *saddr;
968 			rt->rt6i_src.plen = 128;
969 		}
970 #endif
971 	}
972 
973 	return rt;
974 }
975 
976 static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
977 {
978 	struct rt6_info *pcpu_rt;
979 
980 	pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
981 				  rt->dst.dev, rt->dst.flags);
982 
983 	if (!pcpu_rt)
984 		return NULL;
985 	ip6_rt_copy_init(pcpu_rt, rt);
986 	pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
987 	pcpu_rt->rt6i_flags |= RTF_PCPU;
988 	return pcpu_rt;
989 }
990 
991 /* It should be called with read_lock_bh(&tb6_lock) acquired */
992 static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
993 {
994 	struct rt6_info *pcpu_rt, **p;
995 
996 	p = this_cpu_ptr(rt->rt6i_pcpu);
997 	pcpu_rt = *p;
998 
999 	if (pcpu_rt) {
1000 		dst_hold(&pcpu_rt->dst);
1001 		rt6_dst_from_metrics_check(pcpu_rt);
1002 	}
1003 	return pcpu_rt;
1004 }
1005 
1006 static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
1007 {
1008 	struct fib6_table *table = rt->rt6i_table;
1009 	struct rt6_info *pcpu_rt, *prev, **p;
1010 
1011 	pcpu_rt = ip6_rt_pcpu_alloc(rt);
1012 	if (!pcpu_rt) {
1013 		struct net *net = dev_net(rt->dst.dev);
1014 
1015 		dst_hold(&net->ipv6.ip6_null_entry->dst);
1016 		return net->ipv6.ip6_null_entry;
1017 	}
1018 
1019 	read_lock_bh(&table->tb6_lock);
1020 	if (rt->rt6i_pcpu) {
1021 		p = this_cpu_ptr(rt->rt6i_pcpu);
1022 		prev = cmpxchg(p, NULL, pcpu_rt);
1023 		if (prev) {
1024 			/* If someone did it before us, return prev instead */
1025 			dst_destroy(&pcpu_rt->dst);
1026 			pcpu_rt = prev;
1027 		}
1028 	} else {
1029 		/* rt has been removed from the fib6 tree
1030 		 * before we have a chance to acquire the read_lock.
1031 		 * In this case, don't brother to create a pcpu rt
1032 		 * since rt is going away anyway.  The next
1033 		 * dst_check() will trigger a re-lookup.
1034 		 */
1035 		dst_destroy(&pcpu_rt->dst);
1036 		pcpu_rt = rt;
1037 	}
1038 	dst_hold(&pcpu_rt->dst);
1039 	rt6_dst_from_metrics_check(pcpu_rt);
1040 	read_unlock_bh(&table->tb6_lock);
1041 	return pcpu_rt;
1042 }
1043 
1044 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
1045 				      struct flowi6 *fl6, int flags)
1046 {
1047 	struct fib6_node *fn, *saved_fn;
1048 	struct rt6_info *rt;
1049 	int strict = 0;
1050 
1051 	strict |= flags & RT6_LOOKUP_F_IFACE;
1052 	if (net->ipv6.devconf_all->forwarding == 0)
1053 		strict |= RT6_LOOKUP_F_REACHABLE;
1054 
1055 	read_lock_bh(&table->tb6_lock);
1056 
1057 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1058 	saved_fn = fn;
1059 
1060 	if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1061 		oif = 0;
1062 
1063 redo_rt6_select:
1064 	rt = rt6_select(fn, oif, strict);
1065 	if (rt->rt6i_nsiblings)
1066 		rt = rt6_multipath_select(rt, fl6, oif, strict);
1067 	if (rt == net->ipv6.ip6_null_entry) {
1068 		fn = fib6_backtrack(fn, &fl6->saddr);
1069 		if (fn)
1070 			goto redo_rt6_select;
1071 		else if (strict & RT6_LOOKUP_F_REACHABLE) {
1072 			/* also consider unreachable route */
1073 			strict &= ~RT6_LOOKUP_F_REACHABLE;
1074 			fn = saved_fn;
1075 			goto redo_rt6_select;
1076 		}
1077 	}
1078 
1079 
1080 	if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
1081 		dst_use(&rt->dst, jiffies);
1082 		read_unlock_bh(&table->tb6_lock);
1083 
1084 		rt6_dst_from_metrics_check(rt);
1085 
1086 		trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
1087 		return rt;
1088 	} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1089 			    !(rt->rt6i_flags & RTF_GATEWAY))) {
1090 		/* Create a RTF_CACHE clone which will not be
1091 		 * owned by the fib6 tree.  It is for the special case where
1092 		 * the daddr in the skb during the neighbor look-up is different
1093 		 * from the fl6->daddr used to look-up route here.
1094 		 */
1095 
1096 		struct rt6_info *uncached_rt;
1097 
1098 		dst_use(&rt->dst, jiffies);
1099 		read_unlock_bh(&table->tb6_lock);
1100 
1101 		uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1102 		dst_release(&rt->dst);
1103 
1104 		if (uncached_rt)
1105 			rt6_uncached_list_add(uncached_rt);
1106 		else
1107 			uncached_rt = net->ipv6.ip6_null_entry;
1108 
1109 		dst_hold(&uncached_rt->dst);
1110 
1111 		trace_fib6_table_lookup(net, uncached_rt, table->tb6_id, fl6);
1112 		return uncached_rt;
1113 
1114 	} else {
1115 		/* Get a percpu copy */
1116 
1117 		struct rt6_info *pcpu_rt;
1118 
1119 		rt->dst.lastuse = jiffies;
1120 		rt->dst.__use++;
1121 		pcpu_rt = rt6_get_pcpu_route(rt);
1122 
1123 		if (pcpu_rt) {
1124 			read_unlock_bh(&table->tb6_lock);
1125 		} else {
1126 			/* We have to do the read_unlock first
1127 			 * because rt6_make_pcpu_route() may trigger
1128 			 * ip6_dst_gc() which will take the write_lock.
1129 			 */
1130 			dst_hold(&rt->dst);
1131 			read_unlock_bh(&table->tb6_lock);
1132 			pcpu_rt = rt6_make_pcpu_route(rt);
1133 			dst_release(&rt->dst);
1134 		}
1135 
1136 		trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6);
1137 		return pcpu_rt;
1138 
1139 	}
1140 }
1141 
1142 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
1143 					    struct flowi6 *fl6, int flags)
1144 {
1145 	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
1146 }
1147 
1148 static struct dst_entry *ip6_route_input_lookup(struct net *net,
1149 						struct net_device *dev,
1150 						struct flowi6 *fl6, int flags)
1151 {
1152 	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1153 		flags |= RT6_LOOKUP_F_IFACE;
1154 
1155 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1156 }
1157 
1158 void ip6_route_input(struct sk_buff *skb)
1159 {
1160 	const struct ipv6hdr *iph = ipv6_hdr(skb);
1161 	struct net *net = dev_net(skb->dev);
1162 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1163 	struct ip_tunnel_info *tun_info;
1164 	struct flowi6 fl6 = {
1165 		.flowi6_iif = l3mdev_fib_oif(skb->dev),
1166 		.daddr = iph->daddr,
1167 		.saddr = iph->saddr,
1168 		.flowlabel = ip6_flowinfo(iph),
1169 		.flowi6_mark = skb->mark,
1170 		.flowi6_proto = iph->nexthdr,
1171 	};
1172 
1173 	tun_info = skb_tunnel_info(skb);
1174 	if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
1175 		fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
1176 	skb_dst_drop(skb);
1177 	skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1178 }
1179 
1180 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1181 					     struct flowi6 *fl6, int flags)
1182 {
1183 	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1184 }
1185 
1186 struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
1187 					 struct flowi6 *fl6, int flags)
1188 {
1189 	struct dst_entry *dst;
1190 	bool any_src;
1191 
1192 	dst = l3mdev_rt6_dst_by_oif(net, fl6);
1193 	if (dst)
1194 		return dst;
1195 
1196 	fl6->flowi6_iif = LOOPBACK_IFINDEX;
1197 
1198 	any_src = ipv6_addr_any(&fl6->saddr);
1199 	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
1200 	    (fl6->flowi6_oif && any_src))
1201 		flags |= RT6_LOOKUP_F_IFACE;
1202 
1203 	if (!any_src)
1204 		flags |= RT6_LOOKUP_F_HAS_SADDR;
1205 	else if (sk)
1206 		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1207 
1208 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1209 }
1210 EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1211 
1212 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1213 {
1214 	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1215 	struct dst_entry *new = NULL;
1216 
1217 	rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1218 	if (rt) {
1219 		rt6_info_init(rt);
1220 
1221 		new = &rt->dst;
1222 		new->__use = 1;
1223 		new->input = dst_discard;
1224 		new->output = dst_discard_out;
1225 
1226 		dst_copy_metrics(new, &ort->dst);
1227 		rt->rt6i_idev = ort->rt6i_idev;
1228 		if (rt->rt6i_idev)
1229 			in6_dev_hold(rt->rt6i_idev);
1230 
1231 		rt->rt6i_gateway = ort->rt6i_gateway;
1232 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
1233 		rt->rt6i_metric = 0;
1234 
1235 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1236 #ifdef CONFIG_IPV6_SUBTREES
1237 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1238 #endif
1239 
1240 		dst_free(new);
1241 	}
1242 
1243 	dst_release(dst_orig);
1244 	return new ? new : ERR_PTR(-ENOMEM);
1245 }
1246 
1247 /*
1248  *	Destination cache support functions
1249  */
1250 
1251 static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1252 {
1253 	if (rt->dst.from &&
1254 	    dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1255 		dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1256 }
1257 
1258 static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1259 {
1260 	if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1261 		return NULL;
1262 
1263 	if (rt6_check_expired(rt))
1264 		return NULL;
1265 
1266 	return &rt->dst;
1267 }
1268 
1269 static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1270 {
1271 	if (!__rt6_check_expired(rt) &&
1272 	    rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1273 	    rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1274 		return &rt->dst;
1275 	else
1276 		return NULL;
1277 }
1278 
1279 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1280 {
1281 	struct rt6_info *rt;
1282 
1283 	rt = (struct rt6_info *) dst;
1284 
1285 	/* All IPV6 dsts are created with ->obsolete set to the value
1286 	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1287 	 * into this function always.
1288 	 */
1289 
1290 	rt6_dst_from_metrics_check(rt);
1291 
1292 	if (rt->rt6i_flags & RTF_PCPU ||
1293 	    (unlikely(dst->flags & DST_NOCACHE) && rt->dst.from))
1294 		return rt6_dst_from_check(rt, cookie);
1295 	else
1296 		return rt6_check(rt, cookie);
1297 }
1298 
1299 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1300 {
1301 	struct rt6_info *rt = (struct rt6_info *) dst;
1302 
1303 	if (rt) {
1304 		if (rt->rt6i_flags & RTF_CACHE) {
1305 			if (rt6_check_expired(rt)) {
1306 				ip6_del_rt(rt);
1307 				dst = NULL;
1308 			}
1309 		} else {
1310 			dst_release(dst);
1311 			dst = NULL;
1312 		}
1313 	}
1314 	return dst;
1315 }
1316 
1317 static void ip6_link_failure(struct sk_buff *skb)
1318 {
1319 	struct rt6_info *rt;
1320 
1321 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1322 
1323 	rt = (struct rt6_info *) skb_dst(skb);
1324 	if (rt) {
1325 		if (rt->rt6i_flags & RTF_CACHE) {
1326 			dst_hold(&rt->dst);
1327 			ip6_del_rt(rt);
1328 		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1329 			rt->rt6i_node->fn_sernum = -1;
1330 		}
1331 	}
1332 }
1333 
1334 static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1335 {
1336 	struct net *net = dev_net(rt->dst.dev);
1337 
1338 	rt->rt6i_flags |= RTF_MODIFIED;
1339 	rt->rt6i_pmtu = mtu;
1340 	rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1341 }
1342 
1343 static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
1344 {
1345 	return !(rt->rt6i_flags & RTF_CACHE) &&
1346 		(rt->rt6i_flags & RTF_PCPU || rt->rt6i_node);
1347 }
1348 
1349 static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1350 				 const struct ipv6hdr *iph, u32 mtu)
1351 {
1352 	struct rt6_info *rt6 = (struct rt6_info *)dst;
1353 
1354 	if (rt6->rt6i_flags & RTF_LOCAL)
1355 		return;
1356 
1357 	dst_confirm(dst);
1358 	mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1359 	if (mtu >= dst_mtu(dst))
1360 		return;
1361 
1362 	if (!rt6_cache_allowed_for_pmtu(rt6)) {
1363 		rt6_do_update_pmtu(rt6, mtu);
1364 	} else {
1365 		const struct in6_addr *daddr, *saddr;
1366 		struct rt6_info *nrt6;
1367 
1368 		if (iph) {
1369 			daddr = &iph->daddr;
1370 			saddr = &iph->saddr;
1371 		} else if (sk) {
1372 			daddr = &sk->sk_v6_daddr;
1373 			saddr = &inet6_sk(sk)->saddr;
1374 		} else {
1375 			return;
1376 		}
1377 		nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1378 		if (nrt6) {
1379 			rt6_do_update_pmtu(nrt6, mtu);
1380 
1381 			/* ip6_ins_rt(nrt6) will bump the
1382 			 * rt6->rt6i_node->fn_sernum
1383 			 * which will fail the next rt6_check() and
1384 			 * invalidate the sk->sk_dst_cache.
1385 			 */
1386 			ip6_ins_rt(nrt6);
1387 		}
1388 	}
1389 }
1390 
1391 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1392 			       struct sk_buff *skb, u32 mtu)
1393 {
1394 	__ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
1395 }
1396 
1397 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1398 		     int oif, u32 mark)
1399 {
1400 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1401 	struct dst_entry *dst;
1402 	struct flowi6 fl6;
1403 
1404 	memset(&fl6, 0, sizeof(fl6));
1405 	fl6.flowi6_oif = oif;
1406 	fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
1407 	fl6.daddr = iph->daddr;
1408 	fl6.saddr = iph->saddr;
1409 	fl6.flowlabel = ip6_flowinfo(iph);
1410 
1411 	dst = ip6_route_output(net, NULL, &fl6);
1412 	if (!dst->error)
1413 		__ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
1414 	dst_release(dst);
1415 }
1416 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1417 
1418 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1419 {
1420 	ip6_update_pmtu(skb, sock_net(sk), mtu,
1421 			sk->sk_bound_dev_if, sk->sk_mark);
1422 }
1423 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1424 
1425 /* Handle redirects */
1426 struct ip6rd_flowi {
1427 	struct flowi6 fl6;
1428 	struct in6_addr gateway;
1429 };
1430 
1431 static struct rt6_info *__ip6_route_redirect(struct net *net,
1432 					     struct fib6_table *table,
1433 					     struct flowi6 *fl6,
1434 					     int flags)
1435 {
1436 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1437 	struct rt6_info *rt;
1438 	struct fib6_node *fn;
1439 
1440 	/* Get the "current" route for this destination and
1441 	 * check if the redirect has come from approriate router.
1442 	 *
1443 	 * RFC 4861 specifies that redirects should only be
1444 	 * accepted if they come from the nexthop to the target.
1445 	 * Due to the way the routes are chosen, this notion
1446 	 * is a bit fuzzy and one might need to check all possible
1447 	 * routes.
1448 	 */
1449 
1450 	read_lock_bh(&table->tb6_lock);
1451 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1452 restart:
1453 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1454 		if (rt6_check_expired(rt))
1455 			continue;
1456 		if (rt->dst.error)
1457 			break;
1458 		if (!(rt->rt6i_flags & RTF_GATEWAY))
1459 			continue;
1460 		if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1461 			continue;
1462 		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1463 			continue;
1464 		break;
1465 	}
1466 
1467 	if (!rt)
1468 		rt = net->ipv6.ip6_null_entry;
1469 	else if (rt->dst.error) {
1470 		rt = net->ipv6.ip6_null_entry;
1471 		goto out;
1472 	}
1473 
1474 	if (rt == net->ipv6.ip6_null_entry) {
1475 		fn = fib6_backtrack(fn, &fl6->saddr);
1476 		if (fn)
1477 			goto restart;
1478 	}
1479 
1480 out:
1481 	dst_hold(&rt->dst);
1482 
1483 	read_unlock_bh(&table->tb6_lock);
1484 
1485 	trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
1486 	return rt;
1487 };
1488 
1489 static struct dst_entry *ip6_route_redirect(struct net *net,
1490 					const struct flowi6 *fl6,
1491 					const struct in6_addr *gateway)
1492 {
1493 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1494 	struct ip6rd_flowi rdfl;
1495 
1496 	rdfl.fl6 = *fl6;
1497 	rdfl.gateway = *gateway;
1498 
1499 	return fib6_rule_lookup(net, &rdfl.fl6,
1500 				flags, __ip6_route_redirect);
1501 }
1502 
1503 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1504 {
1505 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1506 	struct dst_entry *dst;
1507 	struct flowi6 fl6;
1508 
1509 	memset(&fl6, 0, sizeof(fl6));
1510 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
1511 	fl6.flowi6_oif = oif;
1512 	fl6.flowi6_mark = mark;
1513 	fl6.daddr = iph->daddr;
1514 	fl6.saddr = iph->saddr;
1515 	fl6.flowlabel = ip6_flowinfo(iph);
1516 
1517 	dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1518 	rt6_do_redirect(dst, NULL, skb);
1519 	dst_release(dst);
1520 }
1521 EXPORT_SYMBOL_GPL(ip6_redirect);
1522 
1523 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1524 			    u32 mark)
1525 {
1526 	const struct ipv6hdr *iph = ipv6_hdr(skb);
1527 	const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1528 	struct dst_entry *dst;
1529 	struct flowi6 fl6;
1530 
1531 	memset(&fl6, 0, sizeof(fl6));
1532 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
1533 	fl6.flowi6_oif = oif;
1534 	fl6.flowi6_mark = mark;
1535 	fl6.daddr = msg->dest;
1536 	fl6.saddr = iph->daddr;
1537 
1538 	dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1539 	rt6_do_redirect(dst, NULL, skb);
1540 	dst_release(dst);
1541 }
1542 
1543 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1544 {
1545 	ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1546 }
1547 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1548 
1549 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1550 {
1551 	struct net_device *dev = dst->dev;
1552 	unsigned int mtu = dst_mtu(dst);
1553 	struct net *net = dev_net(dev);
1554 
1555 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1556 
1557 	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1558 		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1559 
1560 	/*
1561 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1562 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1563 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
1564 	 * rely only on pmtu discovery"
1565 	 */
1566 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1567 		mtu = IPV6_MAXPLEN;
1568 	return mtu;
1569 }
1570 
1571 static unsigned int ip6_mtu(const struct dst_entry *dst)
1572 {
1573 	const struct rt6_info *rt = (const struct rt6_info *)dst;
1574 	unsigned int mtu = rt->rt6i_pmtu;
1575 	struct inet6_dev *idev;
1576 
1577 	if (mtu)
1578 		goto out;
1579 
1580 	mtu = dst_metric_raw(dst, RTAX_MTU);
1581 	if (mtu)
1582 		goto out;
1583 
1584 	mtu = IPV6_MIN_MTU;
1585 
1586 	rcu_read_lock();
1587 	idev = __in6_dev_get(dst->dev);
1588 	if (idev)
1589 		mtu = idev->cnf.mtu6;
1590 	rcu_read_unlock();
1591 
1592 out:
1593 	return min_t(unsigned int, mtu, IP6_MAX_MTU);
1594 }
1595 
1596 static struct dst_entry *icmp6_dst_gc_list;
1597 static DEFINE_SPINLOCK(icmp6_dst_lock);
1598 
1599 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1600 				  struct flowi6 *fl6)
1601 {
1602 	struct dst_entry *dst;
1603 	struct rt6_info *rt;
1604 	struct inet6_dev *idev = in6_dev_get(dev);
1605 	struct net *net = dev_net(dev);
1606 
1607 	if (unlikely(!idev))
1608 		return ERR_PTR(-ENODEV);
1609 
1610 	rt = ip6_dst_alloc(net, dev, 0);
1611 	if (unlikely(!rt)) {
1612 		in6_dev_put(idev);
1613 		dst = ERR_PTR(-ENOMEM);
1614 		goto out;
1615 	}
1616 
1617 	rt->dst.flags |= DST_HOST;
1618 	rt->dst.output  = ip6_output;
1619 	atomic_set(&rt->dst.__refcnt, 1);
1620 	rt->rt6i_gateway  = fl6->daddr;
1621 	rt->rt6i_dst.addr = fl6->daddr;
1622 	rt->rt6i_dst.plen = 128;
1623 	rt->rt6i_idev     = idev;
1624 	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1625 
1626 	spin_lock_bh(&icmp6_dst_lock);
1627 	rt->dst.next = icmp6_dst_gc_list;
1628 	icmp6_dst_gc_list = &rt->dst;
1629 	spin_unlock_bh(&icmp6_dst_lock);
1630 
1631 	fib6_force_start_gc(net);
1632 
1633 	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1634 
1635 out:
1636 	return dst;
1637 }
1638 
1639 int icmp6_dst_gc(void)
1640 {
1641 	struct dst_entry *dst, **pprev;
1642 	int more = 0;
1643 
1644 	spin_lock_bh(&icmp6_dst_lock);
1645 	pprev = &icmp6_dst_gc_list;
1646 
1647 	while ((dst = *pprev) != NULL) {
1648 		if (!atomic_read(&dst->__refcnt)) {
1649 			*pprev = dst->next;
1650 			dst_free(dst);
1651 		} else {
1652 			pprev = &dst->next;
1653 			++more;
1654 		}
1655 	}
1656 
1657 	spin_unlock_bh(&icmp6_dst_lock);
1658 
1659 	return more;
1660 }
1661 
1662 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1663 			    void *arg)
1664 {
1665 	struct dst_entry *dst, **pprev;
1666 
1667 	spin_lock_bh(&icmp6_dst_lock);
1668 	pprev = &icmp6_dst_gc_list;
1669 	while ((dst = *pprev) != NULL) {
1670 		struct rt6_info *rt = (struct rt6_info *) dst;
1671 		if (func(rt, arg)) {
1672 			*pprev = dst->next;
1673 			dst_free(dst);
1674 		} else {
1675 			pprev = &dst->next;
1676 		}
1677 	}
1678 	spin_unlock_bh(&icmp6_dst_lock);
1679 }
1680 
1681 static int ip6_dst_gc(struct dst_ops *ops)
1682 {
1683 	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1684 	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1685 	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1686 	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1687 	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1688 	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1689 	int entries;
1690 
1691 	entries = dst_entries_get_fast(ops);
1692 	if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1693 	    entries <= rt_max_size)
1694 		goto out;
1695 
1696 	net->ipv6.ip6_rt_gc_expire++;
1697 	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
1698 	entries = dst_entries_get_slow(ops);
1699 	if (entries < ops->gc_thresh)
1700 		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1701 out:
1702 	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1703 	return entries > rt_max_size;
1704 }
1705 
1706 static int ip6_convert_metrics(struct mx6_config *mxc,
1707 			       const struct fib6_config *cfg)
1708 {
1709 	bool ecn_ca = false;
1710 	struct nlattr *nla;
1711 	int remaining;
1712 	u32 *mp;
1713 
1714 	if (!cfg->fc_mx)
1715 		return 0;
1716 
1717 	mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1718 	if (unlikely(!mp))
1719 		return -ENOMEM;
1720 
1721 	nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1722 		int type = nla_type(nla);
1723 		u32 val;
1724 
1725 		if (!type)
1726 			continue;
1727 		if (unlikely(type > RTAX_MAX))
1728 			goto err;
1729 
1730 		if (type == RTAX_CC_ALGO) {
1731 			char tmp[TCP_CA_NAME_MAX];
1732 
1733 			nla_strlcpy(tmp, nla, sizeof(tmp));
1734 			val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
1735 			if (val == TCP_CA_UNSPEC)
1736 				goto err;
1737 		} else {
1738 			val = nla_get_u32(nla);
1739 		}
1740 		if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
1741 			goto err;
1742 
1743 		mp[type - 1] = val;
1744 		__set_bit(type - 1, mxc->mx_valid);
1745 	}
1746 
1747 	if (ecn_ca) {
1748 		__set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
1749 		mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
1750 	}
1751 
1752 	mxc->mx = mp;
1753 	return 0;
1754  err:
1755 	kfree(mp);
1756 	return -EINVAL;
1757 }
1758 
1759 static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
1760 {
1761 	struct net *net = cfg->fc_nlinfo.nl_net;
1762 	struct rt6_info *rt = NULL;
1763 	struct net_device *dev = NULL;
1764 	struct inet6_dev *idev = NULL;
1765 	struct fib6_table *table;
1766 	int addr_type;
1767 	int err = -EINVAL;
1768 
1769 	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1770 		goto out;
1771 #ifndef CONFIG_IPV6_SUBTREES
1772 	if (cfg->fc_src_len)
1773 		goto out;
1774 #endif
1775 	if (cfg->fc_ifindex) {
1776 		err = -ENODEV;
1777 		dev = dev_get_by_index(net, cfg->fc_ifindex);
1778 		if (!dev)
1779 			goto out;
1780 		idev = in6_dev_get(dev);
1781 		if (!idev)
1782 			goto out;
1783 	}
1784 
1785 	if (cfg->fc_metric == 0)
1786 		cfg->fc_metric = IP6_RT_PRIO_USER;
1787 
1788 	err = -ENOBUFS;
1789 	if (cfg->fc_nlinfo.nlh &&
1790 	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1791 		table = fib6_get_table(net, cfg->fc_table);
1792 		if (!table) {
1793 			pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1794 			table = fib6_new_table(net, cfg->fc_table);
1795 		}
1796 	} else {
1797 		table = fib6_new_table(net, cfg->fc_table);
1798 	}
1799 
1800 	if (!table)
1801 		goto out;
1802 
1803 	rt = ip6_dst_alloc(net, NULL,
1804 			   (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1805 
1806 	if (!rt) {
1807 		err = -ENOMEM;
1808 		goto out;
1809 	}
1810 
1811 	if (cfg->fc_flags & RTF_EXPIRES)
1812 		rt6_set_expires(rt, jiffies +
1813 				clock_t_to_jiffies(cfg->fc_expires));
1814 	else
1815 		rt6_clean_expires(rt);
1816 
1817 	if (cfg->fc_protocol == RTPROT_UNSPEC)
1818 		cfg->fc_protocol = RTPROT_BOOT;
1819 	rt->rt6i_protocol = cfg->fc_protocol;
1820 
1821 	addr_type = ipv6_addr_type(&cfg->fc_dst);
1822 
1823 	if (addr_type & IPV6_ADDR_MULTICAST)
1824 		rt->dst.input = ip6_mc_input;
1825 	else if (cfg->fc_flags & RTF_LOCAL)
1826 		rt->dst.input = ip6_input;
1827 	else
1828 		rt->dst.input = ip6_forward;
1829 
1830 	rt->dst.output = ip6_output;
1831 
1832 	if (cfg->fc_encap) {
1833 		struct lwtunnel_state *lwtstate;
1834 
1835 		err = lwtunnel_build_state(dev, cfg->fc_encap_type,
1836 					   cfg->fc_encap, AF_INET6, cfg,
1837 					   &lwtstate);
1838 		if (err)
1839 			goto out;
1840 		rt->dst.lwtstate = lwtstate_get(lwtstate);
1841 		if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
1842 			rt->dst.lwtstate->orig_output = rt->dst.output;
1843 			rt->dst.output = lwtunnel_output;
1844 		}
1845 		if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
1846 			rt->dst.lwtstate->orig_input = rt->dst.input;
1847 			rt->dst.input = lwtunnel_input;
1848 		}
1849 	}
1850 
1851 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1852 	rt->rt6i_dst.plen = cfg->fc_dst_len;
1853 	if (rt->rt6i_dst.plen == 128)
1854 		rt->dst.flags |= DST_HOST;
1855 
1856 #ifdef CONFIG_IPV6_SUBTREES
1857 	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1858 	rt->rt6i_src.plen = cfg->fc_src_len;
1859 #endif
1860 
1861 	rt->rt6i_metric = cfg->fc_metric;
1862 
1863 	/* We cannot add true routes via loopback here,
1864 	   they would result in kernel looping; promote them to reject routes
1865 	 */
1866 	if ((cfg->fc_flags & RTF_REJECT) ||
1867 	    (dev && (dev->flags & IFF_LOOPBACK) &&
1868 	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
1869 	     !(cfg->fc_flags & RTF_LOCAL))) {
1870 		/* hold loopback dev/idev if we haven't done so. */
1871 		if (dev != net->loopback_dev) {
1872 			if (dev) {
1873 				dev_put(dev);
1874 				in6_dev_put(idev);
1875 			}
1876 			dev = net->loopback_dev;
1877 			dev_hold(dev);
1878 			idev = in6_dev_get(dev);
1879 			if (!idev) {
1880 				err = -ENODEV;
1881 				goto out;
1882 			}
1883 		}
1884 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1885 		switch (cfg->fc_type) {
1886 		case RTN_BLACKHOLE:
1887 			rt->dst.error = -EINVAL;
1888 			rt->dst.output = dst_discard_out;
1889 			rt->dst.input = dst_discard;
1890 			break;
1891 		case RTN_PROHIBIT:
1892 			rt->dst.error = -EACCES;
1893 			rt->dst.output = ip6_pkt_prohibit_out;
1894 			rt->dst.input = ip6_pkt_prohibit;
1895 			break;
1896 		case RTN_THROW:
1897 		case RTN_UNREACHABLE:
1898 		default:
1899 			rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1900 					: (cfg->fc_type == RTN_UNREACHABLE)
1901 					? -EHOSTUNREACH : -ENETUNREACH;
1902 			rt->dst.output = ip6_pkt_discard_out;
1903 			rt->dst.input = ip6_pkt_discard;
1904 			break;
1905 		}
1906 		goto install_route;
1907 	}
1908 
1909 	if (cfg->fc_flags & RTF_GATEWAY) {
1910 		const struct in6_addr *gw_addr;
1911 		int gwa_type;
1912 
1913 		gw_addr = &cfg->fc_gateway;
1914 		gwa_type = ipv6_addr_type(gw_addr);
1915 
1916 		/* if gw_addr is local we will fail to detect this in case
1917 		 * address is still TENTATIVE (DAD in progress). rt6_lookup()
1918 		 * will return already-added prefix route via interface that
1919 		 * prefix route was assigned to, which might be non-loopback.
1920 		 */
1921 		err = -EINVAL;
1922 		if (ipv6_chk_addr_and_flags(net, gw_addr,
1923 					    gwa_type & IPV6_ADDR_LINKLOCAL ?
1924 					    dev : NULL, 0, 0))
1925 			goto out;
1926 
1927 		rt->rt6i_gateway = *gw_addr;
1928 
1929 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1930 			struct rt6_info *grt;
1931 
1932 			/* IPv6 strictly inhibits using not link-local
1933 			   addresses as nexthop address.
1934 			   Otherwise, router will not able to send redirects.
1935 			   It is very good, but in some (rare!) circumstances
1936 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
1937 			   some exceptions. --ANK
1938 			 */
1939 			if (!(gwa_type & IPV6_ADDR_UNICAST))
1940 				goto out;
1941 
1942 			grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1943 
1944 			err = -EHOSTUNREACH;
1945 			if (!grt)
1946 				goto out;
1947 			if (dev) {
1948 				if (dev != grt->dst.dev) {
1949 					ip6_rt_put(grt);
1950 					goto out;
1951 				}
1952 			} else {
1953 				dev = grt->dst.dev;
1954 				idev = grt->rt6i_idev;
1955 				dev_hold(dev);
1956 				in6_dev_hold(grt->rt6i_idev);
1957 			}
1958 			if (!(grt->rt6i_flags & RTF_GATEWAY))
1959 				err = 0;
1960 			ip6_rt_put(grt);
1961 
1962 			if (err)
1963 				goto out;
1964 		}
1965 		err = -EINVAL;
1966 		if (!dev || (dev->flags & IFF_LOOPBACK))
1967 			goto out;
1968 	}
1969 
1970 	err = -ENODEV;
1971 	if (!dev)
1972 		goto out;
1973 
1974 	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1975 		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1976 			err = -EINVAL;
1977 			goto out;
1978 		}
1979 		rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1980 		rt->rt6i_prefsrc.plen = 128;
1981 	} else
1982 		rt->rt6i_prefsrc.plen = 0;
1983 
1984 	rt->rt6i_flags = cfg->fc_flags;
1985 
1986 install_route:
1987 	rt->dst.dev = dev;
1988 	rt->rt6i_idev = idev;
1989 	rt->rt6i_table = table;
1990 
1991 	cfg->fc_nlinfo.nl_net = dev_net(dev);
1992 
1993 	return rt;
1994 out:
1995 	if (dev)
1996 		dev_put(dev);
1997 	if (idev)
1998 		in6_dev_put(idev);
1999 	if (rt)
2000 		dst_free(&rt->dst);
2001 
2002 	return ERR_PTR(err);
2003 }
2004 
2005 int ip6_route_add(struct fib6_config *cfg)
2006 {
2007 	struct mx6_config mxc = { .mx = NULL, };
2008 	struct rt6_info *rt;
2009 	int err;
2010 
2011 	rt = ip6_route_info_create(cfg);
2012 	if (IS_ERR(rt)) {
2013 		err = PTR_ERR(rt);
2014 		rt = NULL;
2015 		goto out;
2016 	}
2017 
2018 	err = ip6_convert_metrics(&mxc, cfg);
2019 	if (err)
2020 		goto out;
2021 
2022 	err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
2023 
2024 	kfree(mxc.mx);
2025 
2026 	return err;
2027 out:
2028 	if (rt)
2029 		dst_free(&rt->dst);
2030 
2031 	return err;
2032 }
2033 
2034 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
2035 {
2036 	int err;
2037 	struct fib6_table *table;
2038 	struct net *net = dev_net(rt->dst.dev);
2039 
2040 	if (rt == net->ipv6.ip6_null_entry ||
2041 	    rt->dst.flags & DST_NOCACHE) {
2042 		err = -ENOENT;
2043 		goto out;
2044 	}
2045 
2046 	table = rt->rt6i_table;
2047 	write_lock_bh(&table->tb6_lock);
2048 	err = fib6_del(rt, info);
2049 	write_unlock_bh(&table->tb6_lock);
2050 
2051 out:
2052 	ip6_rt_put(rt);
2053 	return err;
2054 }
2055 
2056 int ip6_del_rt(struct rt6_info *rt)
2057 {
2058 	struct nl_info info = {
2059 		.nl_net = dev_net(rt->dst.dev),
2060 	};
2061 	return __ip6_del_rt(rt, &info);
2062 }
2063 
2064 static int ip6_route_del(struct fib6_config *cfg)
2065 {
2066 	struct fib6_table *table;
2067 	struct fib6_node *fn;
2068 	struct rt6_info *rt;
2069 	int err = -ESRCH;
2070 
2071 	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
2072 	if (!table)
2073 		return err;
2074 
2075 	read_lock_bh(&table->tb6_lock);
2076 
2077 	fn = fib6_locate(&table->tb6_root,
2078 			 &cfg->fc_dst, cfg->fc_dst_len,
2079 			 &cfg->fc_src, cfg->fc_src_len);
2080 
2081 	if (fn) {
2082 		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2083 			if ((rt->rt6i_flags & RTF_CACHE) &&
2084 			    !(cfg->fc_flags & RTF_CACHE))
2085 				continue;
2086 			if (cfg->fc_ifindex &&
2087 			    (!rt->dst.dev ||
2088 			     rt->dst.dev->ifindex != cfg->fc_ifindex))
2089 				continue;
2090 			if (cfg->fc_flags & RTF_GATEWAY &&
2091 			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
2092 				continue;
2093 			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
2094 				continue;
2095 			dst_hold(&rt->dst);
2096 			read_unlock_bh(&table->tb6_lock);
2097 
2098 			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
2099 		}
2100 	}
2101 	read_unlock_bh(&table->tb6_lock);
2102 
2103 	return err;
2104 }
2105 
2106 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
2107 {
2108 	struct netevent_redirect netevent;
2109 	struct rt6_info *rt, *nrt = NULL;
2110 	struct ndisc_options ndopts;
2111 	struct inet6_dev *in6_dev;
2112 	struct neighbour *neigh;
2113 	struct rd_msg *msg;
2114 	int optlen, on_link;
2115 	u8 *lladdr;
2116 
2117 	optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
2118 	optlen -= sizeof(*msg);
2119 
2120 	if (optlen < 0) {
2121 		net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
2122 		return;
2123 	}
2124 
2125 	msg = (struct rd_msg *)icmp6_hdr(skb);
2126 
2127 	if (ipv6_addr_is_multicast(&msg->dest)) {
2128 		net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
2129 		return;
2130 	}
2131 
2132 	on_link = 0;
2133 	if (ipv6_addr_equal(&msg->dest, &msg->target)) {
2134 		on_link = 1;
2135 	} else if (ipv6_addr_type(&msg->target) !=
2136 		   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
2137 		net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
2138 		return;
2139 	}
2140 
2141 	in6_dev = __in6_dev_get(skb->dev);
2142 	if (!in6_dev)
2143 		return;
2144 	if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2145 		return;
2146 
2147 	/* RFC2461 8.1:
2148 	 *	The IP source address of the Redirect MUST be the same as the current
2149 	 *	first-hop router for the specified ICMP Destination Address.
2150 	 */
2151 
2152 	if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
2153 		net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2154 		return;
2155 	}
2156 
2157 	lladdr = NULL;
2158 	if (ndopts.nd_opts_tgt_lladdr) {
2159 		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2160 					     skb->dev);
2161 		if (!lladdr) {
2162 			net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2163 			return;
2164 		}
2165 	}
2166 
2167 	rt = (struct rt6_info *) dst;
2168 	if (rt->rt6i_flags & RTF_REJECT) {
2169 		net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
2170 		return;
2171 	}
2172 
2173 	/* Redirect received -> path was valid.
2174 	 * Look, redirects are sent only in response to data packets,
2175 	 * so that this nexthop apparently is reachable. --ANK
2176 	 */
2177 	dst_confirm(&rt->dst);
2178 
2179 	neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
2180 	if (!neigh)
2181 		return;
2182 
2183 	/*
2184 	 *	We have finally decided to accept it.
2185 	 */
2186 
2187 	neigh_update(neigh, lladdr, NUD_STALE,
2188 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
2189 		     NEIGH_UPDATE_F_OVERRIDE|
2190 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
2191 				     NEIGH_UPDATE_F_ISROUTER))
2192 		     );
2193 
2194 	nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
2195 	if (!nrt)
2196 		goto out;
2197 
2198 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
2199 	if (on_link)
2200 		nrt->rt6i_flags &= ~RTF_GATEWAY;
2201 
2202 	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
2203 
2204 	if (ip6_ins_rt(nrt))
2205 		goto out;
2206 
2207 	netevent.old = &rt->dst;
2208 	netevent.new = &nrt->dst;
2209 	netevent.daddr = &msg->dest;
2210 	netevent.neigh = neigh;
2211 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
2212 
2213 	if (rt->rt6i_flags & RTF_CACHE) {
2214 		rt = (struct rt6_info *) dst_clone(&rt->dst);
2215 		ip6_del_rt(rt);
2216 	}
2217 
2218 out:
2219 	neigh_release(neigh);
2220 }
2221 
2222 /*
2223  *	Misc support functions
2224  */
2225 
2226 static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
2227 {
2228 	BUG_ON(from->dst.from);
2229 
2230 	rt->rt6i_flags &= ~RTF_EXPIRES;
2231 	dst_hold(&from->dst);
2232 	rt->dst.from = &from->dst;
2233 	dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
2234 }
2235 
2236 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
2237 {
2238 	rt->dst.input = ort->dst.input;
2239 	rt->dst.output = ort->dst.output;
2240 	rt->rt6i_dst = ort->rt6i_dst;
2241 	rt->dst.error = ort->dst.error;
2242 	rt->rt6i_idev = ort->rt6i_idev;
2243 	if (rt->rt6i_idev)
2244 		in6_dev_hold(rt->rt6i_idev);
2245 	rt->dst.lastuse = jiffies;
2246 	rt->rt6i_gateway = ort->rt6i_gateway;
2247 	rt->rt6i_flags = ort->rt6i_flags;
2248 	rt6_set_from(rt, ort);
2249 	rt->rt6i_metric = ort->rt6i_metric;
2250 #ifdef CONFIG_IPV6_SUBTREES
2251 	rt->rt6i_src = ort->rt6i_src;
2252 #endif
2253 	rt->rt6i_prefsrc = ort->rt6i_prefsrc;
2254 	rt->rt6i_table = ort->rt6i_table;
2255 	rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
2256 }
2257 
2258 #ifdef CONFIG_IPV6_ROUTE_INFO
2259 static struct rt6_info *rt6_get_route_info(struct net *net,
2260 					   const struct in6_addr *prefix, int prefixlen,
2261 					   const struct in6_addr *gwaddr, int ifindex)
2262 {
2263 	struct fib6_node *fn;
2264 	struct rt6_info *rt = NULL;
2265 	struct fib6_table *table;
2266 
2267 	table = fib6_get_table(net, RT6_TABLE_INFO);
2268 	if (!table)
2269 		return NULL;
2270 
2271 	read_lock_bh(&table->tb6_lock);
2272 	fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
2273 	if (!fn)
2274 		goto out;
2275 
2276 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2277 		if (rt->dst.dev->ifindex != ifindex)
2278 			continue;
2279 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2280 			continue;
2281 		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2282 			continue;
2283 		dst_hold(&rt->dst);
2284 		break;
2285 	}
2286 out:
2287 	read_unlock_bh(&table->tb6_lock);
2288 	return rt;
2289 }
2290 
2291 static struct rt6_info *rt6_add_route_info(struct net *net,
2292 					   const struct in6_addr *prefix, int prefixlen,
2293 					   const struct in6_addr *gwaddr, int ifindex,
2294 					   unsigned int pref)
2295 {
2296 	struct fib6_config cfg = {
2297 		.fc_metric	= IP6_RT_PRIO_USER,
2298 		.fc_ifindex	= ifindex,
2299 		.fc_dst_len	= prefixlen,
2300 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2301 				  RTF_UP | RTF_PREF(pref),
2302 		.fc_nlinfo.portid = 0,
2303 		.fc_nlinfo.nlh = NULL,
2304 		.fc_nlinfo.nl_net = net,
2305 	};
2306 
2307 	cfg.fc_table = l3mdev_fib_table_by_index(net, ifindex) ? : RT6_TABLE_INFO;
2308 	cfg.fc_dst = *prefix;
2309 	cfg.fc_gateway = *gwaddr;
2310 
2311 	/* We should treat it as a default route if prefix length is 0. */
2312 	if (!prefixlen)
2313 		cfg.fc_flags |= RTF_DEFAULT;
2314 
2315 	ip6_route_add(&cfg);
2316 
2317 	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
2318 }
2319 #endif
2320 
2321 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
2322 {
2323 	struct rt6_info *rt;
2324 	struct fib6_table *table;
2325 
2326 	table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
2327 	if (!table)
2328 		return NULL;
2329 
2330 	read_lock_bh(&table->tb6_lock);
2331 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2332 		if (dev == rt->dst.dev &&
2333 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
2334 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
2335 			break;
2336 	}
2337 	if (rt)
2338 		dst_hold(&rt->dst);
2339 	read_unlock_bh(&table->tb6_lock);
2340 	return rt;
2341 }
2342 
2343 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
2344 				     struct net_device *dev,
2345 				     unsigned int pref)
2346 {
2347 	struct fib6_config cfg = {
2348 		.fc_table	= l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
2349 		.fc_metric	= IP6_RT_PRIO_USER,
2350 		.fc_ifindex	= dev->ifindex,
2351 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2352 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
2353 		.fc_nlinfo.portid = 0,
2354 		.fc_nlinfo.nlh = NULL,
2355 		.fc_nlinfo.nl_net = dev_net(dev),
2356 	};
2357 
2358 	cfg.fc_gateway = *gwaddr;
2359 
2360 	ip6_route_add(&cfg);
2361 
2362 	return rt6_get_dflt_router(gwaddr, dev);
2363 }
2364 
2365 void rt6_purge_dflt_routers(struct net *net)
2366 {
2367 	struct rt6_info *rt;
2368 	struct fib6_table *table;
2369 
2370 	/* NOTE: Keep consistent with rt6_get_dflt_router */
2371 	table = fib6_get_table(net, RT6_TABLE_DFLT);
2372 	if (!table)
2373 		return;
2374 
2375 restart:
2376 	read_lock_bh(&table->tb6_lock);
2377 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2378 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2379 		    (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
2380 			dst_hold(&rt->dst);
2381 			read_unlock_bh(&table->tb6_lock);
2382 			ip6_del_rt(rt);
2383 			goto restart;
2384 		}
2385 	}
2386 	read_unlock_bh(&table->tb6_lock);
2387 }
2388 
2389 static void rtmsg_to_fib6_config(struct net *net,
2390 				 struct in6_rtmsg *rtmsg,
2391 				 struct fib6_config *cfg)
2392 {
2393 	memset(cfg, 0, sizeof(*cfg));
2394 
2395 	cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
2396 			 : RT6_TABLE_MAIN;
2397 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2398 	cfg->fc_metric = rtmsg->rtmsg_metric;
2399 	cfg->fc_expires = rtmsg->rtmsg_info;
2400 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2401 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
2402 	cfg->fc_flags = rtmsg->rtmsg_flags;
2403 
2404 	cfg->fc_nlinfo.nl_net = net;
2405 
2406 	cfg->fc_dst = rtmsg->rtmsg_dst;
2407 	cfg->fc_src = rtmsg->rtmsg_src;
2408 	cfg->fc_gateway = rtmsg->rtmsg_gateway;
2409 }
2410 
2411 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2412 {
2413 	struct fib6_config cfg;
2414 	struct in6_rtmsg rtmsg;
2415 	int err;
2416 
2417 	switch (cmd) {
2418 	case SIOCADDRT:		/* Add a route */
2419 	case SIOCDELRT:		/* Delete a route */
2420 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2421 			return -EPERM;
2422 		err = copy_from_user(&rtmsg, arg,
2423 				     sizeof(struct in6_rtmsg));
2424 		if (err)
2425 			return -EFAULT;
2426 
2427 		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2428 
2429 		rtnl_lock();
2430 		switch (cmd) {
2431 		case SIOCADDRT:
2432 			err = ip6_route_add(&cfg);
2433 			break;
2434 		case SIOCDELRT:
2435 			err = ip6_route_del(&cfg);
2436 			break;
2437 		default:
2438 			err = -EINVAL;
2439 		}
2440 		rtnl_unlock();
2441 
2442 		return err;
2443 	}
2444 
2445 	return -EINVAL;
2446 }
2447 
2448 /*
2449  *	Drop the packet on the floor
2450  */
2451 
2452 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2453 {
2454 	int type;
2455 	struct dst_entry *dst = skb_dst(skb);
2456 	switch (ipstats_mib_noroutes) {
2457 	case IPSTATS_MIB_INNOROUTES:
2458 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2459 		if (type == IPV6_ADDR_ANY) {
2460 			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2461 				      IPSTATS_MIB_INADDRERRORS);
2462 			break;
2463 		}
2464 		/* FALLTHROUGH */
2465 	case IPSTATS_MIB_OUTNOROUTES:
2466 		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2467 			      ipstats_mib_noroutes);
2468 		break;
2469 	}
2470 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2471 	kfree_skb(skb);
2472 	return 0;
2473 }
2474 
2475 static int ip6_pkt_discard(struct sk_buff *skb)
2476 {
2477 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2478 }
2479 
2480 static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2481 {
2482 	skb->dev = skb_dst(skb)->dev;
2483 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2484 }
2485 
2486 static int ip6_pkt_prohibit(struct sk_buff *skb)
2487 {
2488 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2489 }
2490 
2491 static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2492 {
2493 	skb->dev = skb_dst(skb)->dev;
2494 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2495 }
2496 
2497 /*
2498  *	Allocate a dst for local (unicast / anycast) address.
2499  */
2500 
2501 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2502 				    const struct in6_addr *addr,
2503 				    bool anycast)
2504 {
2505 	u32 tb_id;
2506 	struct net *net = dev_net(idev->dev);
2507 	struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
2508 					    DST_NOCOUNT);
2509 	if (!rt)
2510 		return ERR_PTR(-ENOMEM);
2511 
2512 	in6_dev_hold(idev);
2513 
2514 	rt->dst.flags |= DST_HOST;
2515 	rt->dst.input = ip6_input;
2516 	rt->dst.output = ip6_output;
2517 	rt->rt6i_idev = idev;
2518 
2519 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2520 	if (anycast)
2521 		rt->rt6i_flags |= RTF_ANYCAST;
2522 	else
2523 		rt->rt6i_flags |= RTF_LOCAL;
2524 
2525 	rt->rt6i_gateway  = *addr;
2526 	rt->rt6i_dst.addr = *addr;
2527 	rt->rt6i_dst.plen = 128;
2528 	tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
2529 	rt->rt6i_table = fib6_get_table(net, tb_id);
2530 	rt->dst.flags |= DST_NOCACHE;
2531 
2532 	atomic_set(&rt->dst.__refcnt, 1);
2533 
2534 	return rt;
2535 }
2536 
2537 int ip6_route_get_saddr(struct net *net,
2538 			struct rt6_info *rt,
2539 			const struct in6_addr *daddr,
2540 			unsigned int prefs,
2541 			struct in6_addr *saddr)
2542 {
2543 	struct inet6_dev *idev =
2544 		rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
2545 	int err = 0;
2546 	if (rt && rt->rt6i_prefsrc.plen)
2547 		*saddr = rt->rt6i_prefsrc.addr;
2548 	else
2549 		err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2550 					 daddr, prefs, saddr);
2551 	return err;
2552 }
2553 
2554 /* remove deleted ip from prefsrc entries */
2555 struct arg_dev_net_ip {
2556 	struct net_device *dev;
2557 	struct net *net;
2558 	struct in6_addr *addr;
2559 };
2560 
2561 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2562 {
2563 	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2564 	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2565 	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2566 
2567 	if (((void *)rt->dst.dev == dev || !dev) &&
2568 	    rt != net->ipv6.ip6_null_entry &&
2569 	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2570 		/* remove prefsrc entry */
2571 		rt->rt6i_prefsrc.plen = 0;
2572 	}
2573 	return 0;
2574 }
2575 
2576 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2577 {
2578 	struct net *net = dev_net(ifp->idev->dev);
2579 	struct arg_dev_net_ip adni = {
2580 		.dev = ifp->idev->dev,
2581 		.net = net,
2582 		.addr = &ifp->addr,
2583 	};
2584 	fib6_clean_all(net, fib6_remove_prefsrc, &adni);
2585 }
2586 
2587 #define RTF_RA_ROUTER		(RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2588 #define RTF_CACHE_GATEWAY	(RTF_GATEWAY | RTF_CACHE)
2589 
2590 /* Remove routers and update dst entries when gateway turn into host. */
2591 static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2592 {
2593 	struct in6_addr *gateway = (struct in6_addr *)arg;
2594 
2595 	if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2596 	     ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2597 	     ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2598 		return -1;
2599 	}
2600 	return 0;
2601 }
2602 
2603 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2604 {
2605 	fib6_clean_all(net, fib6_clean_tohost, gateway);
2606 }
2607 
2608 struct arg_dev_net {
2609 	struct net_device *dev;
2610 	struct net *net;
2611 };
2612 
2613 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2614 {
2615 	const struct arg_dev_net *adn = arg;
2616 	const struct net_device *dev = adn->dev;
2617 
2618 	if ((rt->dst.dev == dev || !dev) &&
2619 	    rt != adn->net->ipv6.ip6_null_entry)
2620 		return -1;
2621 
2622 	return 0;
2623 }
2624 
2625 void rt6_ifdown(struct net *net, struct net_device *dev)
2626 {
2627 	struct arg_dev_net adn = {
2628 		.dev = dev,
2629 		.net = net,
2630 	};
2631 
2632 	fib6_clean_all(net, fib6_ifdown, &adn);
2633 	icmp6_clean_all(fib6_ifdown, &adn);
2634 	if (dev)
2635 		rt6_uncached_list_flush_dev(net, dev);
2636 }
2637 
2638 struct rt6_mtu_change_arg {
2639 	struct net_device *dev;
2640 	unsigned int mtu;
2641 };
2642 
2643 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2644 {
2645 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2646 	struct inet6_dev *idev;
2647 
2648 	/* In IPv6 pmtu discovery is not optional,
2649 	   so that RTAX_MTU lock cannot disable it.
2650 	   We still use this lock to block changes
2651 	   caused by addrconf/ndisc.
2652 	*/
2653 
2654 	idev = __in6_dev_get(arg->dev);
2655 	if (!idev)
2656 		return 0;
2657 
2658 	/* For administrative MTU increase, there is no way to discover
2659 	   IPv6 PMTU increase, so PMTU increase should be updated here.
2660 	   Since RFC 1981 doesn't include administrative MTU increase
2661 	   update PMTU increase is a MUST. (i.e. jumbo frame)
2662 	 */
2663 	/*
2664 	   If new MTU is less than route PMTU, this new MTU will be the
2665 	   lowest MTU in the path, update the route PMTU to reflect PMTU
2666 	   decreases; if new MTU is greater than route PMTU, and the
2667 	   old MTU is the lowest MTU in the path, update the route PMTU
2668 	   to reflect the increase. In this case if the other nodes' MTU
2669 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
2670 	   PMTU discouvery.
2671 	 */
2672 	if (rt->dst.dev == arg->dev &&
2673 	    !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2674 		if (rt->rt6i_flags & RTF_CACHE) {
2675 			/* For RTF_CACHE with rt6i_pmtu == 0
2676 			 * (i.e. a redirected route),
2677 			 * the metrics of its rt->dst.from has already
2678 			 * been updated.
2679 			 */
2680 			if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2681 				rt->rt6i_pmtu = arg->mtu;
2682 		} else if (dst_mtu(&rt->dst) >= arg->mtu ||
2683 			   (dst_mtu(&rt->dst) < arg->mtu &&
2684 			    dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2685 			dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2686 		}
2687 	}
2688 	return 0;
2689 }
2690 
2691 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2692 {
2693 	struct rt6_mtu_change_arg arg = {
2694 		.dev = dev,
2695 		.mtu = mtu,
2696 	};
2697 
2698 	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
2699 }
2700 
2701 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2702 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2703 	[RTA_OIF]               = { .type = NLA_U32 },
2704 	[RTA_IIF]		= { .type = NLA_U32 },
2705 	[RTA_PRIORITY]          = { .type = NLA_U32 },
2706 	[RTA_METRICS]           = { .type = NLA_NESTED },
2707 	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
2708 	[RTA_PREF]              = { .type = NLA_U8 },
2709 	[RTA_ENCAP_TYPE]	= { .type = NLA_U16 },
2710 	[RTA_ENCAP]		= { .type = NLA_NESTED },
2711 	[RTA_EXPIRES]		= { .type = NLA_U32 },
2712 };
2713 
2714 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2715 			      struct fib6_config *cfg)
2716 {
2717 	struct rtmsg *rtm;
2718 	struct nlattr *tb[RTA_MAX+1];
2719 	unsigned int pref;
2720 	int err;
2721 
2722 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2723 	if (err < 0)
2724 		goto errout;
2725 
2726 	err = -EINVAL;
2727 	rtm = nlmsg_data(nlh);
2728 	memset(cfg, 0, sizeof(*cfg));
2729 
2730 	cfg->fc_table = rtm->rtm_table;
2731 	cfg->fc_dst_len = rtm->rtm_dst_len;
2732 	cfg->fc_src_len = rtm->rtm_src_len;
2733 	cfg->fc_flags = RTF_UP;
2734 	cfg->fc_protocol = rtm->rtm_protocol;
2735 	cfg->fc_type = rtm->rtm_type;
2736 
2737 	if (rtm->rtm_type == RTN_UNREACHABLE ||
2738 	    rtm->rtm_type == RTN_BLACKHOLE ||
2739 	    rtm->rtm_type == RTN_PROHIBIT ||
2740 	    rtm->rtm_type == RTN_THROW)
2741 		cfg->fc_flags |= RTF_REJECT;
2742 
2743 	if (rtm->rtm_type == RTN_LOCAL)
2744 		cfg->fc_flags |= RTF_LOCAL;
2745 
2746 	if (rtm->rtm_flags & RTM_F_CLONED)
2747 		cfg->fc_flags |= RTF_CACHE;
2748 
2749 	cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2750 	cfg->fc_nlinfo.nlh = nlh;
2751 	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2752 
2753 	if (tb[RTA_GATEWAY]) {
2754 		cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
2755 		cfg->fc_flags |= RTF_GATEWAY;
2756 	}
2757 
2758 	if (tb[RTA_DST]) {
2759 		int plen = (rtm->rtm_dst_len + 7) >> 3;
2760 
2761 		if (nla_len(tb[RTA_DST]) < plen)
2762 			goto errout;
2763 
2764 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2765 	}
2766 
2767 	if (tb[RTA_SRC]) {
2768 		int plen = (rtm->rtm_src_len + 7) >> 3;
2769 
2770 		if (nla_len(tb[RTA_SRC]) < plen)
2771 			goto errout;
2772 
2773 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2774 	}
2775 
2776 	if (tb[RTA_PREFSRC])
2777 		cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
2778 
2779 	if (tb[RTA_OIF])
2780 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2781 
2782 	if (tb[RTA_PRIORITY])
2783 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2784 
2785 	if (tb[RTA_METRICS]) {
2786 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2787 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2788 	}
2789 
2790 	if (tb[RTA_TABLE])
2791 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2792 
2793 	if (tb[RTA_MULTIPATH]) {
2794 		cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2795 		cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2796 	}
2797 
2798 	if (tb[RTA_PREF]) {
2799 		pref = nla_get_u8(tb[RTA_PREF]);
2800 		if (pref != ICMPV6_ROUTER_PREF_LOW &&
2801 		    pref != ICMPV6_ROUTER_PREF_HIGH)
2802 			pref = ICMPV6_ROUTER_PREF_MEDIUM;
2803 		cfg->fc_flags |= RTF_PREF(pref);
2804 	}
2805 
2806 	if (tb[RTA_ENCAP])
2807 		cfg->fc_encap = tb[RTA_ENCAP];
2808 
2809 	if (tb[RTA_ENCAP_TYPE])
2810 		cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
2811 
2812 	if (tb[RTA_EXPIRES]) {
2813 		unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
2814 
2815 		if (addrconf_finite_timeout(timeout)) {
2816 			cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
2817 			cfg->fc_flags |= RTF_EXPIRES;
2818 		}
2819 	}
2820 
2821 	err = 0;
2822 errout:
2823 	return err;
2824 }
2825 
2826 struct rt6_nh {
2827 	struct rt6_info *rt6_info;
2828 	struct fib6_config r_cfg;
2829 	struct mx6_config mxc;
2830 	struct list_head next;
2831 };
2832 
2833 static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
2834 {
2835 	struct rt6_nh *nh;
2836 
2837 	list_for_each_entry(nh, rt6_nh_list, next) {
2838 		pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6 nexthop %pI6 ifi %d\n",
2839 		        &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
2840 		        nh->r_cfg.fc_ifindex);
2841 	}
2842 }
2843 
2844 static int ip6_route_info_append(struct list_head *rt6_nh_list,
2845 				 struct rt6_info *rt, struct fib6_config *r_cfg)
2846 {
2847 	struct rt6_nh *nh;
2848 	struct rt6_info *rtnh;
2849 	int err = -EEXIST;
2850 
2851 	list_for_each_entry(nh, rt6_nh_list, next) {
2852 		/* check if rt6_info already exists */
2853 		rtnh = nh->rt6_info;
2854 
2855 		if (rtnh->dst.dev == rt->dst.dev &&
2856 		    rtnh->rt6i_idev == rt->rt6i_idev &&
2857 		    ipv6_addr_equal(&rtnh->rt6i_gateway,
2858 				    &rt->rt6i_gateway))
2859 			return err;
2860 	}
2861 
2862 	nh = kzalloc(sizeof(*nh), GFP_KERNEL);
2863 	if (!nh)
2864 		return -ENOMEM;
2865 	nh->rt6_info = rt;
2866 	err = ip6_convert_metrics(&nh->mxc, r_cfg);
2867 	if (err) {
2868 		kfree(nh);
2869 		return err;
2870 	}
2871 	memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
2872 	list_add_tail(&nh->next, rt6_nh_list);
2873 
2874 	return 0;
2875 }
2876 
2877 static int ip6_route_multipath_add(struct fib6_config *cfg)
2878 {
2879 	struct fib6_config r_cfg;
2880 	struct rtnexthop *rtnh;
2881 	struct rt6_info *rt;
2882 	struct rt6_nh *err_nh;
2883 	struct rt6_nh *nh, *nh_safe;
2884 	int remaining;
2885 	int attrlen;
2886 	int err = 1;
2887 	int nhn = 0;
2888 	int replace = (cfg->fc_nlinfo.nlh &&
2889 		       (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
2890 	LIST_HEAD(rt6_nh_list);
2891 
2892 	remaining = cfg->fc_mp_len;
2893 	rtnh = (struct rtnexthop *)cfg->fc_mp;
2894 
2895 	/* Parse a Multipath Entry and build a list (rt6_nh_list) of
2896 	 * rt6_info structs per nexthop
2897 	 */
2898 	while (rtnh_ok(rtnh, remaining)) {
2899 		memcpy(&r_cfg, cfg, sizeof(*cfg));
2900 		if (rtnh->rtnh_ifindex)
2901 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2902 
2903 		attrlen = rtnh_attrlen(rtnh);
2904 		if (attrlen > 0) {
2905 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2906 
2907 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2908 			if (nla) {
2909 				r_cfg.fc_gateway = nla_get_in6_addr(nla);
2910 				r_cfg.fc_flags |= RTF_GATEWAY;
2911 			}
2912 			r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
2913 			nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
2914 			if (nla)
2915 				r_cfg.fc_encap_type = nla_get_u16(nla);
2916 		}
2917 
2918 		rt = ip6_route_info_create(&r_cfg);
2919 		if (IS_ERR(rt)) {
2920 			err = PTR_ERR(rt);
2921 			rt = NULL;
2922 			goto cleanup;
2923 		}
2924 
2925 		err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
2926 		if (err) {
2927 			dst_free(&rt->dst);
2928 			goto cleanup;
2929 		}
2930 
2931 		rtnh = rtnh_next(rtnh, &remaining);
2932 	}
2933 
2934 	err_nh = NULL;
2935 	list_for_each_entry(nh, &rt6_nh_list, next) {
2936 		err = __ip6_ins_rt(nh->rt6_info, &cfg->fc_nlinfo, &nh->mxc);
2937 		/* nh->rt6_info is used or freed at this point, reset to NULL*/
2938 		nh->rt6_info = NULL;
2939 		if (err) {
2940 			if (replace && nhn)
2941 				ip6_print_replace_route_err(&rt6_nh_list);
2942 			err_nh = nh;
2943 			goto add_errout;
2944 		}
2945 
2946 		/* Because each route is added like a single route we remove
2947 		 * these flags after the first nexthop: if there is a collision,
2948 		 * we have already failed to add the first nexthop:
2949 		 * fib6_add_rt2node() has rejected it; when replacing, old
2950 		 * nexthops have been replaced by first new, the rest should
2951 		 * be added to it.
2952 		 */
2953 		cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
2954 						     NLM_F_REPLACE);
2955 		nhn++;
2956 	}
2957 
2958 	goto cleanup;
2959 
2960 add_errout:
2961 	/* Delete routes that were already added */
2962 	list_for_each_entry(nh, &rt6_nh_list, next) {
2963 		if (err_nh == nh)
2964 			break;
2965 		ip6_route_del(&nh->r_cfg);
2966 	}
2967 
2968 cleanup:
2969 	list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
2970 		if (nh->rt6_info)
2971 			dst_free(&nh->rt6_info->dst);
2972 		kfree(nh->mxc.mx);
2973 		list_del(&nh->next);
2974 		kfree(nh);
2975 	}
2976 
2977 	return err;
2978 }
2979 
2980 static int ip6_route_multipath_del(struct fib6_config *cfg)
2981 {
2982 	struct fib6_config r_cfg;
2983 	struct rtnexthop *rtnh;
2984 	int remaining;
2985 	int attrlen;
2986 	int err = 1, last_err = 0;
2987 
2988 	remaining = cfg->fc_mp_len;
2989 	rtnh = (struct rtnexthop *)cfg->fc_mp;
2990 
2991 	/* Parse a Multipath Entry */
2992 	while (rtnh_ok(rtnh, remaining)) {
2993 		memcpy(&r_cfg, cfg, sizeof(*cfg));
2994 		if (rtnh->rtnh_ifindex)
2995 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2996 
2997 		attrlen = rtnh_attrlen(rtnh);
2998 		if (attrlen > 0) {
2999 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3000 
3001 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3002 			if (nla) {
3003 				nla_memcpy(&r_cfg.fc_gateway, nla, 16);
3004 				r_cfg.fc_flags |= RTF_GATEWAY;
3005 			}
3006 		}
3007 		err = ip6_route_del(&r_cfg);
3008 		if (err)
3009 			last_err = err;
3010 
3011 		rtnh = rtnh_next(rtnh, &remaining);
3012 	}
3013 
3014 	return last_err;
3015 }
3016 
3017 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
3018 {
3019 	struct fib6_config cfg;
3020 	int err;
3021 
3022 	err = rtm_to_fib6_config(skb, nlh, &cfg);
3023 	if (err < 0)
3024 		return err;
3025 
3026 	if (cfg.fc_mp)
3027 		return ip6_route_multipath_del(&cfg);
3028 	else
3029 		return ip6_route_del(&cfg);
3030 }
3031 
3032 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
3033 {
3034 	struct fib6_config cfg;
3035 	int err;
3036 
3037 	err = rtm_to_fib6_config(skb, nlh, &cfg);
3038 	if (err < 0)
3039 		return err;
3040 
3041 	if (cfg.fc_mp)
3042 		return ip6_route_multipath_add(&cfg);
3043 	else
3044 		return ip6_route_add(&cfg);
3045 }
3046 
3047 static inline size_t rt6_nlmsg_size(struct rt6_info *rt)
3048 {
3049 	return NLMSG_ALIGN(sizeof(struct rtmsg))
3050 	       + nla_total_size(16) /* RTA_SRC */
3051 	       + nla_total_size(16) /* RTA_DST */
3052 	       + nla_total_size(16) /* RTA_GATEWAY */
3053 	       + nla_total_size(16) /* RTA_PREFSRC */
3054 	       + nla_total_size(4) /* RTA_TABLE */
3055 	       + nla_total_size(4) /* RTA_IIF */
3056 	       + nla_total_size(4) /* RTA_OIF */
3057 	       + nla_total_size(4) /* RTA_PRIORITY */
3058 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
3059 	       + nla_total_size(sizeof(struct rta_cacheinfo))
3060 	       + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
3061 	       + nla_total_size(1) /* RTA_PREF */
3062 	       + lwtunnel_get_encap_size(rt->dst.lwtstate);
3063 }
3064 
3065 static int rt6_fill_node(struct net *net,
3066 			 struct sk_buff *skb, struct rt6_info *rt,
3067 			 struct in6_addr *dst, struct in6_addr *src,
3068 			 int iif, int type, u32 portid, u32 seq,
3069 			 int prefix, int nowait, unsigned int flags)
3070 {
3071 	u32 metrics[RTAX_MAX];
3072 	struct rtmsg *rtm;
3073 	struct nlmsghdr *nlh;
3074 	long expires;
3075 	u32 table;
3076 
3077 	if (prefix) {	/* user wants prefix routes only */
3078 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
3079 			/* success since this is not a prefix route */
3080 			return 1;
3081 		}
3082 	}
3083 
3084 	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
3085 	if (!nlh)
3086 		return -EMSGSIZE;
3087 
3088 	rtm = nlmsg_data(nlh);
3089 	rtm->rtm_family = AF_INET6;
3090 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
3091 	rtm->rtm_src_len = rt->rt6i_src.plen;
3092 	rtm->rtm_tos = 0;
3093 	if (rt->rt6i_table)
3094 		table = rt->rt6i_table->tb6_id;
3095 	else
3096 		table = RT6_TABLE_UNSPEC;
3097 	rtm->rtm_table = table;
3098 	if (nla_put_u32(skb, RTA_TABLE, table))
3099 		goto nla_put_failure;
3100 	if (rt->rt6i_flags & RTF_REJECT) {
3101 		switch (rt->dst.error) {
3102 		case -EINVAL:
3103 			rtm->rtm_type = RTN_BLACKHOLE;
3104 			break;
3105 		case -EACCES:
3106 			rtm->rtm_type = RTN_PROHIBIT;
3107 			break;
3108 		case -EAGAIN:
3109 			rtm->rtm_type = RTN_THROW;
3110 			break;
3111 		default:
3112 			rtm->rtm_type = RTN_UNREACHABLE;
3113 			break;
3114 		}
3115 	}
3116 	else if (rt->rt6i_flags & RTF_LOCAL)
3117 		rtm->rtm_type = RTN_LOCAL;
3118 	else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
3119 		rtm->rtm_type = RTN_LOCAL;
3120 	else
3121 		rtm->rtm_type = RTN_UNICAST;
3122 	rtm->rtm_flags = 0;
3123 	if (!netif_carrier_ok(rt->dst.dev)) {
3124 		rtm->rtm_flags |= RTNH_F_LINKDOWN;
3125 		if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
3126 			rtm->rtm_flags |= RTNH_F_DEAD;
3127 	}
3128 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
3129 	rtm->rtm_protocol = rt->rt6i_protocol;
3130 	if (rt->rt6i_flags & RTF_DYNAMIC)
3131 		rtm->rtm_protocol = RTPROT_REDIRECT;
3132 	else if (rt->rt6i_flags & RTF_ADDRCONF) {
3133 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
3134 			rtm->rtm_protocol = RTPROT_RA;
3135 		else
3136 			rtm->rtm_protocol = RTPROT_KERNEL;
3137 	}
3138 
3139 	if (rt->rt6i_flags & RTF_CACHE)
3140 		rtm->rtm_flags |= RTM_F_CLONED;
3141 
3142 	if (dst) {
3143 		if (nla_put_in6_addr(skb, RTA_DST, dst))
3144 			goto nla_put_failure;
3145 		rtm->rtm_dst_len = 128;
3146 	} else if (rtm->rtm_dst_len)
3147 		if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
3148 			goto nla_put_failure;
3149 #ifdef CONFIG_IPV6_SUBTREES
3150 	if (src) {
3151 		if (nla_put_in6_addr(skb, RTA_SRC, src))
3152 			goto nla_put_failure;
3153 		rtm->rtm_src_len = 128;
3154 	} else if (rtm->rtm_src_len &&
3155 		   nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
3156 		goto nla_put_failure;
3157 #endif
3158 	if (iif) {
3159 #ifdef CONFIG_IPV6_MROUTE
3160 		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
3161 			int err = ip6mr_get_route(net, skb, rtm, nowait);
3162 			if (err <= 0) {
3163 				if (!nowait) {
3164 					if (err == 0)
3165 						return 0;
3166 					goto nla_put_failure;
3167 				} else {
3168 					if (err == -EMSGSIZE)
3169 						goto nla_put_failure;
3170 				}
3171 			}
3172 		} else
3173 #endif
3174 			if (nla_put_u32(skb, RTA_IIF, iif))
3175 				goto nla_put_failure;
3176 	} else if (dst) {
3177 		struct in6_addr saddr_buf;
3178 		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
3179 		    nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3180 			goto nla_put_failure;
3181 	}
3182 
3183 	if (rt->rt6i_prefsrc.plen) {
3184 		struct in6_addr saddr_buf;
3185 		saddr_buf = rt->rt6i_prefsrc.addr;
3186 		if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3187 			goto nla_put_failure;
3188 	}
3189 
3190 	memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
3191 	if (rt->rt6i_pmtu)
3192 		metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
3193 	if (rtnetlink_put_metrics(skb, metrics) < 0)
3194 		goto nla_put_failure;
3195 
3196 	if (rt->rt6i_flags & RTF_GATEWAY) {
3197 		if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
3198 			goto nla_put_failure;
3199 	}
3200 
3201 	if (rt->dst.dev &&
3202 	    nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
3203 		goto nla_put_failure;
3204 	if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
3205 		goto nla_put_failure;
3206 
3207 	expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
3208 
3209 	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
3210 		goto nla_put_failure;
3211 
3212 	if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
3213 		goto nla_put_failure;
3214 
3215 	lwtunnel_fill_encap(skb, rt->dst.lwtstate);
3216 
3217 	nlmsg_end(skb, nlh);
3218 	return 0;
3219 
3220 nla_put_failure:
3221 	nlmsg_cancel(skb, nlh);
3222 	return -EMSGSIZE;
3223 }
3224 
3225 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
3226 {
3227 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
3228 	int prefix;
3229 
3230 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
3231 		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
3232 		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
3233 	} else
3234 		prefix = 0;
3235 
3236 	return rt6_fill_node(arg->net,
3237 		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
3238 		     NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
3239 		     prefix, 0, NLM_F_MULTI);
3240 }
3241 
3242 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
3243 {
3244 	struct net *net = sock_net(in_skb->sk);
3245 	struct nlattr *tb[RTA_MAX+1];
3246 	struct rt6_info *rt;
3247 	struct sk_buff *skb;
3248 	struct rtmsg *rtm;
3249 	struct flowi6 fl6;
3250 	int err, iif = 0, oif = 0;
3251 
3252 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
3253 	if (err < 0)
3254 		goto errout;
3255 
3256 	err = -EINVAL;
3257 	memset(&fl6, 0, sizeof(fl6));
3258 
3259 	if (tb[RTA_SRC]) {
3260 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
3261 			goto errout;
3262 
3263 		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
3264 	}
3265 
3266 	if (tb[RTA_DST]) {
3267 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
3268 			goto errout;
3269 
3270 		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
3271 	}
3272 
3273 	if (tb[RTA_IIF])
3274 		iif = nla_get_u32(tb[RTA_IIF]);
3275 
3276 	if (tb[RTA_OIF])
3277 		oif = nla_get_u32(tb[RTA_OIF]);
3278 
3279 	if (tb[RTA_MARK])
3280 		fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
3281 
3282 	if (iif) {
3283 		struct net_device *dev;
3284 		int flags = 0;
3285 
3286 		dev = __dev_get_by_index(net, iif);
3287 		if (!dev) {
3288 			err = -ENODEV;
3289 			goto errout;
3290 		}
3291 
3292 		fl6.flowi6_iif = iif;
3293 
3294 		if (!ipv6_addr_any(&fl6.saddr))
3295 			flags |= RT6_LOOKUP_F_HAS_SADDR;
3296 
3297 		rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
3298 							       flags);
3299 	} else {
3300 		fl6.flowi6_oif = oif;
3301 
3302 		if (netif_index_is_l3_master(net, oif)) {
3303 			fl6.flowi6_flags = FLOWI_FLAG_L3MDEV_SRC |
3304 					   FLOWI_FLAG_SKIP_NH_OIF;
3305 		}
3306 
3307 		rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
3308 	}
3309 
3310 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
3311 	if (!skb) {
3312 		ip6_rt_put(rt);
3313 		err = -ENOBUFS;
3314 		goto errout;
3315 	}
3316 
3317 	/* Reserve room for dummy headers, this skb can pass
3318 	   through good chunk of routing engine.
3319 	 */
3320 	skb_reset_mac_header(skb);
3321 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
3322 
3323 	skb_dst_set(skb, &rt->dst);
3324 
3325 	err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
3326 			    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
3327 			    nlh->nlmsg_seq, 0, 0, 0);
3328 	if (err < 0) {
3329 		kfree_skb(skb);
3330 		goto errout;
3331 	}
3332 
3333 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
3334 errout:
3335 	return err;
3336 }
3337 
3338 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
3339 		     unsigned int nlm_flags)
3340 {
3341 	struct sk_buff *skb;
3342 	struct net *net = info->nl_net;
3343 	u32 seq;
3344 	int err;
3345 
3346 	err = -ENOBUFS;
3347 	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3348 
3349 	skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3350 	if (!skb)
3351 		goto errout;
3352 
3353 	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
3354 				event, info->portid, seq, 0, 0, nlm_flags);
3355 	if (err < 0) {
3356 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
3357 		WARN_ON(err == -EMSGSIZE);
3358 		kfree_skb(skb);
3359 		goto errout;
3360 	}
3361 	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
3362 		    info->nlh, gfp_any());
3363 	return;
3364 errout:
3365 	if (err < 0)
3366 		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
3367 }
3368 
3369 static int ip6_route_dev_notify(struct notifier_block *this,
3370 				unsigned long event, void *ptr)
3371 {
3372 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
3373 	struct net *net = dev_net(dev);
3374 
3375 	if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
3376 		net->ipv6.ip6_null_entry->dst.dev = dev;
3377 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
3378 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3379 		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
3380 		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
3381 		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
3382 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
3383 #endif
3384 	}
3385 
3386 	return NOTIFY_OK;
3387 }
3388 
3389 /*
3390  *	/proc
3391  */
3392 
3393 #ifdef CONFIG_PROC_FS
3394 
3395 static const struct file_operations ipv6_route_proc_fops = {
3396 	.owner		= THIS_MODULE,
3397 	.open		= ipv6_route_open,
3398 	.read		= seq_read,
3399 	.llseek		= seq_lseek,
3400 	.release	= seq_release_net,
3401 };
3402 
3403 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
3404 {
3405 	struct net *net = (struct net *)seq->private;
3406 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
3407 		   net->ipv6.rt6_stats->fib_nodes,
3408 		   net->ipv6.rt6_stats->fib_route_nodes,
3409 		   net->ipv6.rt6_stats->fib_rt_alloc,
3410 		   net->ipv6.rt6_stats->fib_rt_entries,
3411 		   net->ipv6.rt6_stats->fib_rt_cache,
3412 		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
3413 		   net->ipv6.rt6_stats->fib_discarded_routes);
3414 
3415 	return 0;
3416 }
3417 
3418 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
3419 {
3420 	return single_open_net(inode, file, rt6_stats_seq_show);
3421 }
3422 
3423 static const struct file_operations rt6_stats_seq_fops = {
3424 	.owner	 = THIS_MODULE,
3425 	.open	 = rt6_stats_seq_open,
3426 	.read	 = seq_read,
3427 	.llseek	 = seq_lseek,
3428 	.release = single_release_net,
3429 };
3430 #endif	/* CONFIG_PROC_FS */
3431 
3432 #ifdef CONFIG_SYSCTL
3433 
3434 static
3435 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
3436 			      void __user *buffer, size_t *lenp, loff_t *ppos)
3437 {
3438 	struct net *net;
3439 	int delay;
3440 	if (!write)
3441 		return -EINVAL;
3442 
3443 	net = (struct net *)ctl->extra1;
3444 	delay = net->ipv6.sysctl.flush_delay;
3445 	proc_dointvec(ctl, write, buffer, lenp, ppos);
3446 	fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
3447 	return 0;
3448 }
3449 
3450 struct ctl_table ipv6_route_table_template[] = {
3451 	{
3452 		.procname	=	"flush",
3453 		.data		=	&init_net.ipv6.sysctl.flush_delay,
3454 		.maxlen		=	sizeof(int),
3455 		.mode		=	0200,
3456 		.proc_handler	=	ipv6_sysctl_rtcache_flush
3457 	},
3458 	{
3459 		.procname	=	"gc_thresh",
3460 		.data		=	&ip6_dst_ops_template.gc_thresh,
3461 		.maxlen		=	sizeof(int),
3462 		.mode		=	0644,
3463 		.proc_handler	=	proc_dointvec,
3464 	},
3465 	{
3466 		.procname	=	"max_size",
3467 		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
3468 		.maxlen		=	sizeof(int),
3469 		.mode		=	0644,
3470 		.proc_handler	=	proc_dointvec,
3471 	},
3472 	{
3473 		.procname	=	"gc_min_interval",
3474 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3475 		.maxlen		=	sizeof(int),
3476 		.mode		=	0644,
3477 		.proc_handler	=	proc_dointvec_jiffies,
3478 	},
3479 	{
3480 		.procname	=	"gc_timeout",
3481 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
3482 		.maxlen		=	sizeof(int),
3483 		.mode		=	0644,
3484 		.proc_handler	=	proc_dointvec_jiffies,
3485 	},
3486 	{
3487 		.procname	=	"gc_interval",
3488 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
3489 		.maxlen		=	sizeof(int),
3490 		.mode		=	0644,
3491 		.proc_handler	=	proc_dointvec_jiffies,
3492 	},
3493 	{
3494 		.procname	=	"gc_elasticity",
3495 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
3496 		.maxlen		=	sizeof(int),
3497 		.mode		=	0644,
3498 		.proc_handler	=	proc_dointvec,
3499 	},
3500 	{
3501 		.procname	=	"mtu_expires",
3502 		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
3503 		.maxlen		=	sizeof(int),
3504 		.mode		=	0644,
3505 		.proc_handler	=	proc_dointvec_jiffies,
3506 	},
3507 	{
3508 		.procname	=	"min_adv_mss",
3509 		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
3510 		.maxlen		=	sizeof(int),
3511 		.mode		=	0644,
3512 		.proc_handler	=	proc_dointvec,
3513 	},
3514 	{
3515 		.procname	=	"gc_min_interval_ms",
3516 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3517 		.maxlen		=	sizeof(int),
3518 		.mode		=	0644,
3519 		.proc_handler	=	proc_dointvec_ms_jiffies,
3520 	},
3521 	{ }
3522 };
3523 
3524 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
3525 {
3526 	struct ctl_table *table;
3527 
3528 	table = kmemdup(ipv6_route_table_template,
3529 			sizeof(ipv6_route_table_template),
3530 			GFP_KERNEL);
3531 
3532 	if (table) {
3533 		table[0].data = &net->ipv6.sysctl.flush_delay;
3534 		table[0].extra1 = net;
3535 		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
3536 		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3537 		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3538 		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3539 		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3540 		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3541 		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3542 		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
3543 		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3544 
3545 		/* Don't export sysctls to unprivileged users */
3546 		if (net->user_ns != &init_user_ns)
3547 			table[0].procname = NULL;
3548 	}
3549 
3550 	return table;
3551 }
3552 #endif
3553 
3554 static int __net_init ip6_route_net_init(struct net *net)
3555 {
3556 	int ret = -ENOMEM;
3557 
3558 	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3559 	       sizeof(net->ipv6.ip6_dst_ops));
3560 
3561 	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3562 		goto out_ip6_dst_ops;
3563 
3564 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3565 					   sizeof(*net->ipv6.ip6_null_entry),
3566 					   GFP_KERNEL);
3567 	if (!net->ipv6.ip6_null_entry)
3568 		goto out_ip6_dst_entries;
3569 	net->ipv6.ip6_null_entry->dst.path =
3570 		(struct dst_entry *)net->ipv6.ip6_null_entry;
3571 	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3572 	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3573 			 ip6_template_metrics, true);
3574 
3575 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3576 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3577 					       sizeof(*net->ipv6.ip6_prohibit_entry),
3578 					       GFP_KERNEL);
3579 	if (!net->ipv6.ip6_prohibit_entry)
3580 		goto out_ip6_null_entry;
3581 	net->ipv6.ip6_prohibit_entry->dst.path =
3582 		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3583 	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3584 	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3585 			 ip6_template_metrics, true);
3586 
3587 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3588 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
3589 					       GFP_KERNEL);
3590 	if (!net->ipv6.ip6_blk_hole_entry)
3591 		goto out_ip6_prohibit_entry;
3592 	net->ipv6.ip6_blk_hole_entry->dst.path =
3593 		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3594 	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3595 	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3596 			 ip6_template_metrics, true);
3597 #endif
3598 
3599 	net->ipv6.sysctl.flush_delay = 0;
3600 	net->ipv6.sysctl.ip6_rt_max_size = 4096;
3601 	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3602 	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3603 	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3604 	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3605 	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3606 	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3607 
3608 	net->ipv6.ip6_rt_gc_expire = 30*HZ;
3609 
3610 	ret = 0;
3611 out:
3612 	return ret;
3613 
3614 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3615 out_ip6_prohibit_entry:
3616 	kfree(net->ipv6.ip6_prohibit_entry);
3617 out_ip6_null_entry:
3618 	kfree(net->ipv6.ip6_null_entry);
3619 #endif
3620 out_ip6_dst_entries:
3621 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3622 out_ip6_dst_ops:
3623 	goto out;
3624 }
3625 
3626 static void __net_exit ip6_route_net_exit(struct net *net)
3627 {
3628 	kfree(net->ipv6.ip6_null_entry);
3629 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3630 	kfree(net->ipv6.ip6_prohibit_entry);
3631 	kfree(net->ipv6.ip6_blk_hole_entry);
3632 #endif
3633 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3634 }
3635 
3636 static int __net_init ip6_route_net_init_late(struct net *net)
3637 {
3638 #ifdef CONFIG_PROC_FS
3639 	proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3640 	proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3641 #endif
3642 	return 0;
3643 }
3644 
3645 static void __net_exit ip6_route_net_exit_late(struct net *net)
3646 {
3647 #ifdef CONFIG_PROC_FS
3648 	remove_proc_entry("ipv6_route", net->proc_net);
3649 	remove_proc_entry("rt6_stats", net->proc_net);
3650 #endif
3651 }
3652 
3653 static struct pernet_operations ip6_route_net_ops = {
3654 	.init = ip6_route_net_init,
3655 	.exit = ip6_route_net_exit,
3656 };
3657 
3658 static int __net_init ipv6_inetpeer_init(struct net *net)
3659 {
3660 	struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3661 
3662 	if (!bp)
3663 		return -ENOMEM;
3664 	inet_peer_base_init(bp);
3665 	net->ipv6.peers = bp;
3666 	return 0;
3667 }
3668 
3669 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3670 {
3671 	struct inet_peer_base *bp = net->ipv6.peers;
3672 
3673 	net->ipv6.peers = NULL;
3674 	inetpeer_invalidate_tree(bp);
3675 	kfree(bp);
3676 }
3677 
3678 static struct pernet_operations ipv6_inetpeer_ops = {
3679 	.init	=	ipv6_inetpeer_init,
3680 	.exit	=	ipv6_inetpeer_exit,
3681 };
3682 
3683 static struct pernet_operations ip6_route_net_late_ops = {
3684 	.init = ip6_route_net_init_late,
3685 	.exit = ip6_route_net_exit_late,
3686 };
3687 
3688 static struct notifier_block ip6_route_dev_notifier = {
3689 	.notifier_call = ip6_route_dev_notify,
3690 	.priority = 0,
3691 };
3692 
3693 int __init ip6_route_init(void)
3694 {
3695 	int ret;
3696 	int cpu;
3697 
3698 	ret = -ENOMEM;
3699 	ip6_dst_ops_template.kmem_cachep =
3700 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3701 				  SLAB_HWCACHE_ALIGN, NULL);
3702 	if (!ip6_dst_ops_template.kmem_cachep)
3703 		goto out;
3704 
3705 	ret = dst_entries_init(&ip6_dst_blackhole_ops);
3706 	if (ret)
3707 		goto out_kmem_cache;
3708 
3709 	ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3710 	if (ret)
3711 		goto out_dst_entries;
3712 
3713 	ret = register_pernet_subsys(&ip6_route_net_ops);
3714 	if (ret)
3715 		goto out_register_inetpeer;
3716 
3717 	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3718 
3719 	/* Registering of the loopback is done before this portion of code,
3720 	 * the loopback reference in rt6_info will not be taken, do it
3721 	 * manually for init_net */
3722 	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3723 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3724   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3725 	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3726 	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3727 	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3728 	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3729   #endif
3730 	ret = fib6_init();
3731 	if (ret)
3732 		goto out_register_subsys;
3733 
3734 	ret = xfrm6_init();
3735 	if (ret)
3736 		goto out_fib6_init;
3737 
3738 	ret = fib6_rules_init();
3739 	if (ret)
3740 		goto xfrm6_init;
3741 
3742 	ret = register_pernet_subsys(&ip6_route_net_late_ops);
3743 	if (ret)
3744 		goto fib6_rules_init;
3745 
3746 	ret = -ENOBUFS;
3747 	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3748 	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3749 	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3750 		goto out_register_late_subsys;
3751 
3752 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3753 	if (ret)
3754 		goto out_register_late_subsys;
3755 
3756 	for_each_possible_cpu(cpu) {
3757 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
3758 
3759 		INIT_LIST_HEAD(&ul->head);
3760 		spin_lock_init(&ul->lock);
3761 	}
3762 
3763 out:
3764 	return ret;
3765 
3766 out_register_late_subsys:
3767 	unregister_pernet_subsys(&ip6_route_net_late_ops);
3768 fib6_rules_init:
3769 	fib6_rules_cleanup();
3770 xfrm6_init:
3771 	xfrm6_fini();
3772 out_fib6_init:
3773 	fib6_gc_cleanup();
3774 out_register_subsys:
3775 	unregister_pernet_subsys(&ip6_route_net_ops);
3776 out_register_inetpeer:
3777 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3778 out_dst_entries:
3779 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3780 out_kmem_cache:
3781 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3782 	goto out;
3783 }
3784 
3785 void ip6_route_cleanup(void)
3786 {
3787 	unregister_netdevice_notifier(&ip6_route_dev_notifier);
3788 	unregister_pernet_subsys(&ip6_route_net_late_ops);
3789 	fib6_rules_cleanup();
3790 	xfrm6_fini();
3791 	fib6_gc_cleanup();
3792 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3793 	unregister_pernet_subsys(&ip6_route_net_ops);
3794 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3795 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3796 }
3797