xref: /openbmc/linux/net/ipv6/route.c (revision 372892ec1151c895c7dec362f3246f089690cfc7)
1 /*
2  *	Linux INET6 implementation
3  *	FIB front-end.
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13 
14 /*	Changes:
15  *
16  *	YOSHIFUJI Hideaki @USAGI
17  *		reworked default router selection.
18  *		- respect outgoing interface
19  *		- select from (probably) reachable routers (i.e.
20  *		routers in REACHABLE, STALE, DELAY or PROBE states).
21  *		- always select the same router if it is (probably)
22  *		reachable.  otherwise, round-robin the list.
23  *	Ville Nuorvala
24  *		Fixed routing subtrees.
25  */
26 
27 #define pr_fmt(fmt) "IPv6: " fmt
28 
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/dst_metadata.h>
58 #include <net/xfrm.h>
59 #include <net/netevent.h>
60 #include <net/netlink.h>
61 #include <net/nexthop.h>
62 #include <net/lwtunnel.h>
63 #include <net/ip_tunnels.h>
64 
65 #include <asm/uaccess.h>
66 
67 #ifdef CONFIG_SYSCTL
68 #include <linux/sysctl.h>
69 #endif
70 
71 enum rt6_nud_state {
72 	RT6_NUD_FAIL_HARD = -3,
73 	RT6_NUD_FAIL_PROBE = -2,
74 	RT6_NUD_FAIL_DO_RR = -1,
75 	RT6_NUD_SUCCEED = 1
76 };
77 
78 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
79 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
80 static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
81 static unsigned int	 ip6_mtu(const struct dst_entry *dst);
82 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
83 static void		ip6_dst_destroy(struct dst_entry *);
84 static void		ip6_dst_ifdown(struct dst_entry *,
85 				       struct net_device *dev, int how);
86 static int		 ip6_dst_gc(struct dst_ops *ops);
87 
88 static int		ip6_pkt_discard(struct sk_buff *skb);
89 static int		ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb);
90 static int		ip6_pkt_prohibit(struct sk_buff *skb);
91 static int		ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb);
92 static void		ip6_link_failure(struct sk_buff *skb);
93 static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
94 					   struct sk_buff *skb, u32 mtu);
95 static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
96 					struct sk_buff *skb);
97 static void		rt6_dst_from_metrics_check(struct rt6_info *rt);
98 static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
99 
100 #ifdef CONFIG_IPV6_ROUTE_INFO
101 static struct rt6_info *rt6_add_route_info(struct net *net,
102 					   const struct in6_addr *prefix, int prefixlen,
103 					   const struct in6_addr *gwaddr, int ifindex,
104 					   unsigned int pref);
105 static struct rt6_info *rt6_get_route_info(struct net *net,
106 					   const struct in6_addr *prefix, int prefixlen,
107 					   const struct in6_addr *gwaddr, int ifindex);
108 #endif
109 
110 struct uncached_list {
111 	spinlock_t		lock;
112 	struct list_head	head;
113 };
114 
115 static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
116 
117 static void rt6_uncached_list_add(struct rt6_info *rt)
118 {
119 	struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
120 
121 	rt->dst.flags |= DST_NOCACHE;
122 	rt->rt6i_uncached_list = ul;
123 
124 	spin_lock_bh(&ul->lock);
125 	list_add_tail(&rt->rt6i_uncached, &ul->head);
126 	spin_unlock_bh(&ul->lock);
127 }
128 
129 static void rt6_uncached_list_del(struct rt6_info *rt)
130 {
131 	if (!list_empty(&rt->rt6i_uncached)) {
132 		struct uncached_list *ul = rt->rt6i_uncached_list;
133 
134 		spin_lock_bh(&ul->lock);
135 		list_del(&rt->rt6i_uncached);
136 		spin_unlock_bh(&ul->lock);
137 	}
138 }
139 
140 static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
141 {
142 	struct net_device *loopback_dev = net->loopback_dev;
143 	int cpu;
144 
145 	for_each_possible_cpu(cpu) {
146 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
147 		struct rt6_info *rt;
148 
149 		spin_lock_bh(&ul->lock);
150 		list_for_each_entry(rt, &ul->head, rt6i_uncached) {
151 			struct inet6_dev *rt_idev = rt->rt6i_idev;
152 			struct net_device *rt_dev = rt->dst.dev;
153 
154 			if (rt_idev && (rt_idev->dev == dev || !dev) &&
155 			    rt_idev->dev != loopback_dev) {
156 				rt->rt6i_idev = in6_dev_get(loopback_dev);
157 				in6_dev_put(rt_idev);
158 			}
159 
160 			if (rt_dev && (rt_dev == dev || !dev) &&
161 			    rt_dev != loopback_dev) {
162 				rt->dst.dev = loopback_dev;
163 				dev_hold(rt->dst.dev);
164 				dev_put(rt_dev);
165 			}
166 		}
167 		spin_unlock_bh(&ul->lock);
168 	}
169 }
170 
171 static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
172 {
173 	return dst_metrics_write_ptr(rt->dst.from);
174 }
175 
176 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
177 {
178 	struct rt6_info *rt = (struct rt6_info *)dst;
179 
180 	if (rt->rt6i_flags & RTF_PCPU)
181 		return rt6_pcpu_cow_metrics(rt);
182 	else if (rt->rt6i_flags & RTF_CACHE)
183 		return NULL;
184 	else
185 		return dst_cow_metrics_generic(dst, old);
186 }
187 
188 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
189 					     struct sk_buff *skb,
190 					     const void *daddr)
191 {
192 	struct in6_addr *p = &rt->rt6i_gateway;
193 
194 	if (!ipv6_addr_any(p))
195 		return (const void *) p;
196 	else if (skb)
197 		return &ipv6_hdr(skb)->daddr;
198 	return daddr;
199 }
200 
201 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
202 					  struct sk_buff *skb,
203 					  const void *daddr)
204 {
205 	struct rt6_info *rt = (struct rt6_info *) dst;
206 	struct neighbour *n;
207 
208 	daddr = choose_neigh_daddr(rt, skb, daddr);
209 	n = __ipv6_neigh_lookup(dst->dev, daddr);
210 	if (n)
211 		return n;
212 	return neigh_create(&nd_tbl, daddr, dst->dev);
213 }
214 
215 static struct dst_ops ip6_dst_ops_template = {
216 	.family			=	AF_INET6,
217 	.gc			=	ip6_dst_gc,
218 	.gc_thresh		=	1024,
219 	.check			=	ip6_dst_check,
220 	.default_advmss		=	ip6_default_advmss,
221 	.mtu			=	ip6_mtu,
222 	.cow_metrics		=	ipv6_cow_metrics,
223 	.destroy		=	ip6_dst_destroy,
224 	.ifdown			=	ip6_dst_ifdown,
225 	.negative_advice	=	ip6_negative_advice,
226 	.link_failure		=	ip6_link_failure,
227 	.update_pmtu		=	ip6_rt_update_pmtu,
228 	.redirect		=	rt6_do_redirect,
229 	.local_out		=	__ip6_local_out,
230 	.neigh_lookup		=	ip6_neigh_lookup,
231 };
232 
233 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
234 {
235 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
236 
237 	return mtu ? : dst->dev->mtu;
238 }
239 
240 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
241 					 struct sk_buff *skb, u32 mtu)
242 {
243 }
244 
245 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
246 				      struct sk_buff *skb)
247 {
248 }
249 
250 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
251 					 unsigned long old)
252 {
253 	return NULL;
254 }
255 
256 static struct dst_ops ip6_dst_blackhole_ops = {
257 	.family			=	AF_INET6,
258 	.destroy		=	ip6_dst_destroy,
259 	.check			=	ip6_dst_check,
260 	.mtu			=	ip6_blackhole_mtu,
261 	.default_advmss		=	ip6_default_advmss,
262 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
263 	.redirect		=	ip6_rt_blackhole_redirect,
264 	.cow_metrics		=	ip6_rt_blackhole_cow_metrics,
265 	.neigh_lookup		=	ip6_neigh_lookup,
266 };
267 
268 static const u32 ip6_template_metrics[RTAX_MAX] = {
269 	[RTAX_HOPLIMIT - 1] = 0,
270 };
271 
272 static const struct rt6_info ip6_null_entry_template = {
273 	.dst = {
274 		.__refcnt	= ATOMIC_INIT(1),
275 		.__use		= 1,
276 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
277 		.error		= -ENETUNREACH,
278 		.input		= ip6_pkt_discard,
279 		.output		= ip6_pkt_discard_out,
280 	},
281 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
282 	.rt6i_protocol  = RTPROT_KERNEL,
283 	.rt6i_metric	= ~(u32) 0,
284 	.rt6i_ref	= ATOMIC_INIT(1),
285 };
286 
287 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
288 
289 static const struct rt6_info ip6_prohibit_entry_template = {
290 	.dst = {
291 		.__refcnt	= ATOMIC_INIT(1),
292 		.__use		= 1,
293 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
294 		.error		= -EACCES,
295 		.input		= ip6_pkt_prohibit,
296 		.output		= ip6_pkt_prohibit_out,
297 	},
298 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
299 	.rt6i_protocol  = RTPROT_KERNEL,
300 	.rt6i_metric	= ~(u32) 0,
301 	.rt6i_ref	= ATOMIC_INIT(1),
302 };
303 
304 static const struct rt6_info ip6_blk_hole_entry_template = {
305 	.dst = {
306 		.__refcnt	= ATOMIC_INIT(1),
307 		.__use		= 1,
308 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
309 		.error		= -EINVAL,
310 		.input		= dst_discard,
311 		.output		= dst_discard_sk,
312 	},
313 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
314 	.rt6i_protocol  = RTPROT_KERNEL,
315 	.rt6i_metric	= ~(u32) 0,
316 	.rt6i_ref	= ATOMIC_INIT(1),
317 };
318 
319 #endif
320 
321 /* allocate dst with ip6_dst_ops */
322 static struct rt6_info *__ip6_dst_alloc(struct net *net,
323 					struct net_device *dev,
324 					int flags)
325 {
326 	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
327 					0, DST_OBSOLETE_FORCE_CHK, flags);
328 
329 	if (rt) {
330 		struct dst_entry *dst = &rt->dst;
331 
332 		memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
333 		INIT_LIST_HEAD(&rt->rt6i_siblings);
334 		INIT_LIST_HEAD(&rt->rt6i_uncached);
335 	}
336 	return rt;
337 }
338 
339 static struct rt6_info *ip6_dst_alloc(struct net *net,
340 				      struct net_device *dev,
341 				      int flags)
342 {
343 	struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
344 
345 	if (rt) {
346 		rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
347 		if (rt->rt6i_pcpu) {
348 			int cpu;
349 
350 			for_each_possible_cpu(cpu) {
351 				struct rt6_info **p;
352 
353 				p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
354 				/* no one shares rt */
355 				*p =  NULL;
356 			}
357 		} else {
358 			dst_destroy((struct dst_entry *)rt);
359 			return NULL;
360 		}
361 	}
362 
363 	return rt;
364 }
365 
366 static void ip6_dst_destroy(struct dst_entry *dst)
367 {
368 	struct rt6_info *rt = (struct rt6_info *)dst;
369 	struct dst_entry *from = dst->from;
370 	struct inet6_dev *idev;
371 
372 	dst_destroy_metrics_generic(dst);
373 	free_percpu(rt->rt6i_pcpu);
374 	rt6_uncached_list_del(rt);
375 
376 	idev = rt->rt6i_idev;
377 	if (idev) {
378 		rt->rt6i_idev = NULL;
379 		in6_dev_put(idev);
380 	}
381 
382 	dst->from = NULL;
383 	dst_release(from);
384 }
385 
386 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
387 			   int how)
388 {
389 	struct rt6_info *rt = (struct rt6_info *)dst;
390 	struct inet6_dev *idev = rt->rt6i_idev;
391 	struct net_device *loopback_dev =
392 		dev_net(dev)->loopback_dev;
393 
394 	if (dev != loopback_dev) {
395 		if (idev && idev->dev == dev) {
396 			struct inet6_dev *loopback_idev =
397 				in6_dev_get(loopback_dev);
398 			if (loopback_idev) {
399 				rt->rt6i_idev = loopback_idev;
400 				in6_dev_put(idev);
401 			}
402 		}
403 	}
404 }
405 
406 static bool rt6_check_expired(const struct rt6_info *rt)
407 {
408 	if (rt->rt6i_flags & RTF_EXPIRES) {
409 		if (time_after(jiffies, rt->dst.expires))
410 			return true;
411 	} else if (rt->dst.from) {
412 		return rt6_check_expired((struct rt6_info *) rt->dst.from);
413 	}
414 	return false;
415 }
416 
417 /* Multipath route selection:
418  *   Hash based function using packet header and flowlabel.
419  * Adapted from fib_info_hashfn()
420  */
421 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
422 			       const struct flowi6 *fl6)
423 {
424 	return get_hash_from_flowi6(fl6) % candidate_count;
425 }
426 
427 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
428 					     struct flowi6 *fl6, int oif,
429 					     int strict)
430 {
431 	struct rt6_info *sibling, *next_sibling;
432 	int route_choosen;
433 
434 	route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
435 	/* Don't change the route, if route_choosen == 0
436 	 * (siblings does not include ourself)
437 	 */
438 	if (route_choosen)
439 		list_for_each_entry_safe(sibling, next_sibling,
440 				&match->rt6i_siblings, rt6i_siblings) {
441 			route_choosen--;
442 			if (route_choosen == 0) {
443 				if (rt6_score_route(sibling, oif, strict) < 0)
444 					break;
445 				match = sibling;
446 				break;
447 			}
448 		}
449 	return match;
450 }
451 
452 /*
453  *	Route lookup. Any table->tb6_lock is implied.
454  */
455 
456 static inline struct rt6_info *rt6_device_match(struct net *net,
457 						    struct rt6_info *rt,
458 						    const struct in6_addr *saddr,
459 						    int oif,
460 						    int flags)
461 {
462 	struct rt6_info *local = NULL;
463 	struct rt6_info *sprt;
464 
465 	if (!oif && ipv6_addr_any(saddr))
466 		goto out;
467 
468 	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
469 		struct net_device *dev = sprt->dst.dev;
470 
471 		if (oif) {
472 			if (dev->ifindex == oif)
473 				return sprt;
474 			if (dev->flags & IFF_LOOPBACK) {
475 				if (!sprt->rt6i_idev ||
476 				    sprt->rt6i_idev->dev->ifindex != oif) {
477 					if (flags & RT6_LOOKUP_F_IFACE && oif)
478 						continue;
479 					if (local && (!oif ||
480 						      local->rt6i_idev->dev->ifindex == oif))
481 						continue;
482 				}
483 				local = sprt;
484 			}
485 		} else {
486 			if (ipv6_chk_addr(net, saddr, dev,
487 					  flags & RT6_LOOKUP_F_IFACE))
488 				return sprt;
489 		}
490 	}
491 
492 	if (oif) {
493 		if (local)
494 			return local;
495 
496 		if (flags & RT6_LOOKUP_F_IFACE)
497 			return net->ipv6.ip6_null_entry;
498 	}
499 out:
500 	return rt;
501 }
502 
503 #ifdef CONFIG_IPV6_ROUTER_PREF
504 struct __rt6_probe_work {
505 	struct work_struct work;
506 	struct in6_addr target;
507 	struct net_device *dev;
508 };
509 
510 static void rt6_probe_deferred(struct work_struct *w)
511 {
512 	struct in6_addr mcaddr;
513 	struct __rt6_probe_work *work =
514 		container_of(w, struct __rt6_probe_work, work);
515 
516 	addrconf_addr_solict_mult(&work->target, &mcaddr);
517 	ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, NULL);
518 	dev_put(work->dev);
519 	kfree(work);
520 }
521 
522 static void rt6_probe(struct rt6_info *rt)
523 {
524 	struct __rt6_probe_work *work;
525 	struct neighbour *neigh;
526 	/*
527 	 * Okay, this does not seem to be appropriate
528 	 * for now, however, we need to check if it
529 	 * is really so; aka Router Reachability Probing.
530 	 *
531 	 * Router Reachability Probe MUST be rate-limited
532 	 * to no more than one per minute.
533 	 */
534 	if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
535 		return;
536 	rcu_read_lock_bh();
537 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
538 	if (neigh) {
539 		if (neigh->nud_state & NUD_VALID)
540 			goto out;
541 
542 		work = NULL;
543 		write_lock(&neigh->lock);
544 		if (!(neigh->nud_state & NUD_VALID) &&
545 		    time_after(jiffies,
546 			       neigh->updated +
547 			       rt->rt6i_idev->cnf.rtr_probe_interval)) {
548 			work = kmalloc(sizeof(*work), GFP_ATOMIC);
549 			if (work)
550 				__neigh_set_probe_once(neigh);
551 		}
552 		write_unlock(&neigh->lock);
553 	} else {
554 		work = kmalloc(sizeof(*work), GFP_ATOMIC);
555 	}
556 
557 	if (work) {
558 		INIT_WORK(&work->work, rt6_probe_deferred);
559 		work->target = rt->rt6i_gateway;
560 		dev_hold(rt->dst.dev);
561 		work->dev = rt->dst.dev;
562 		schedule_work(&work->work);
563 	}
564 
565 out:
566 	rcu_read_unlock_bh();
567 }
568 #else
569 static inline void rt6_probe(struct rt6_info *rt)
570 {
571 }
572 #endif
573 
574 /*
575  * Default Router Selection (RFC 2461 6.3.6)
576  */
577 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
578 {
579 	struct net_device *dev = rt->dst.dev;
580 	if (!oif || dev->ifindex == oif)
581 		return 2;
582 	if ((dev->flags & IFF_LOOPBACK) &&
583 	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
584 		return 1;
585 	return 0;
586 }
587 
588 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
589 {
590 	struct neighbour *neigh;
591 	enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
592 
593 	if (rt->rt6i_flags & RTF_NONEXTHOP ||
594 	    !(rt->rt6i_flags & RTF_GATEWAY))
595 		return RT6_NUD_SUCCEED;
596 
597 	rcu_read_lock_bh();
598 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
599 	if (neigh) {
600 		read_lock(&neigh->lock);
601 		if (neigh->nud_state & NUD_VALID)
602 			ret = RT6_NUD_SUCCEED;
603 #ifdef CONFIG_IPV6_ROUTER_PREF
604 		else if (!(neigh->nud_state & NUD_FAILED))
605 			ret = RT6_NUD_SUCCEED;
606 		else
607 			ret = RT6_NUD_FAIL_PROBE;
608 #endif
609 		read_unlock(&neigh->lock);
610 	} else {
611 		ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
612 		      RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
613 	}
614 	rcu_read_unlock_bh();
615 
616 	return ret;
617 }
618 
619 static int rt6_score_route(struct rt6_info *rt, int oif,
620 			   int strict)
621 {
622 	int m;
623 
624 	m = rt6_check_dev(rt, oif);
625 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
626 		return RT6_NUD_FAIL_HARD;
627 #ifdef CONFIG_IPV6_ROUTER_PREF
628 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
629 #endif
630 	if (strict & RT6_LOOKUP_F_REACHABLE) {
631 		int n = rt6_check_neigh(rt);
632 		if (n < 0)
633 			return n;
634 	}
635 	return m;
636 }
637 
638 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
639 				   int *mpri, struct rt6_info *match,
640 				   bool *do_rr)
641 {
642 	int m;
643 	bool match_do_rr = false;
644 	struct inet6_dev *idev = rt->rt6i_idev;
645 	struct net_device *dev = rt->dst.dev;
646 
647 	if (dev && !netif_carrier_ok(dev) &&
648 	    idev->cnf.ignore_routes_with_linkdown)
649 		goto out;
650 
651 	if (rt6_check_expired(rt))
652 		goto out;
653 
654 	m = rt6_score_route(rt, oif, strict);
655 	if (m == RT6_NUD_FAIL_DO_RR) {
656 		match_do_rr = true;
657 		m = 0; /* lowest valid score */
658 	} else if (m == RT6_NUD_FAIL_HARD) {
659 		goto out;
660 	}
661 
662 	if (strict & RT6_LOOKUP_F_REACHABLE)
663 		rt6_probe(rt);
664 
665 	/* note that m can be RT6_NUD_FAIL_PROBE at this point */
666 	if (m > *mpri) {
667 		*do_rr = match_do_rr;
668 		*mpri = m;
669 		match = rt;
670 	}
671 out:
672 	return match;
673 }
674 
675 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
676 				     struct rt6_info *rr_head,
677 				     u32 metric, int oif, int strict,
678 				     bool *do_rr)
679 {
680 	struct rt6_info *rt, *match, *cont;
681 	int mpri = -1;
682 
683 	match = NULL;
684 	cont = NULL;
685 	for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
686 		if (rt->rt6i_metric != metric) {
687 			cont = rt;
688 			break;
689 		}
690 
691 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
692 	}
693 
694 	for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
695 		if (rt->rt6i_metric != metric) {
696 			cont = rt;
697 			break;
698 		}
699 
700 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
701 	}
702 
703 	if (match || !cont)
704 		return match;
705 
706 	for (rt = cont; rt; rt = rt->dst.rt6_next)
707 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
708 
709 	return match;
710 }
711 
712 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
713 {
714 	struct rt6_info *match, *rt0;
715 	struct net *net;
716 	bool do_rr = false;
717 
718 	rt0 = fn->rr_ptr;
719 	if (!rt0)
720 		fn->rr_ptr = rt0 = fn->leaf;
721 
722 	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
723 			     &do_rr);
724 
725 	if (do_rr) {
726 		struct rt6_info *next = rt0->dst.rt6_next;
727 
728 		/* no entries matched; do round-robin */
729 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
730 			next = fn->leaf;
731 
732 		if (next != rt0)
733 			fn->rr_ptr = next;
734 	}
735 
736 	net = dev_net(rt0->dst.dev);
737 	return match ? match : net->ipv6.ip6_null_entry;
738 }
739 
740 static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
741 {
742 	return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
743 }
744 
745 #ifdef CONFIG_IPV6_ROUTE_INFO
746 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
747 		  const struct in6_addr *gwaddr)
748 {
749 	struct net *net = dev_net(dev);
750 	struct route_info *rinfo = (struct route_info *) opt;
751 	struct in6_addr prefix_buf, *prefix;
752 	unsigned int pref;
753 	unsigned long lifetime;
754 	struct rt6_info *rt;
755 
756 	if (len < sizeof(struct route_info)) {
757 		return -EINVAL;
758 	}
759 
760 	/* Sanity check for prefix_len and length */
761 	if (rinfo->length > 3) {
762 		return -EINVAL;
763 	} else if (rinfo->prefix_len > 128) {
764 		return -EINVAL;
765 	} else if (rinfo->prefix_len > 64) {
766 		if (rinfo->length < 2) {
767 			return -EINVAL;
768 		}
769 	} else if (rinfo->prefix_len > 0) {
770 		if (rinfo->length < 1) {
771 			return -EINVAL;
772 		}
773 	}
774 
775 	pref = rinfo->route_pref;
776 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
777 		return -EINVAL;
778 
779 	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
780 
781 	if (rinfo->length == 3)
782 		prefix = (struct in6_addr *)rinfo->prefix;
783 	else {
784 		/* this function is safe */
785 		ipv6_addr_prefix(&prefix_buf,
786 				 (struct in6_addr *)rinfo->prefix,
787 				 rinfo->prefix_len);
788 		prefix = &prefix_buf;
789 	}
790 
791 	if (rinfo->prefix_len == 0)
792 		rt = rt6_get_dflt_router(gwaddr, dev);
793 	else
794 		rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
795 					gwaddr, dev->ifindex);
796 
797 	if (rt && !lifetime) {
798 		ip6_del_rt(rt);
799 		rt = NULL;
800 	}
801 
802 	if (!rt && lifetime)
803 		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
804 					pref);
805 	else if (rt)
806 		rt->rt6i_flags = RTF_ROUTEINFO |
807 				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
808 
809 	if (rt) {
810 		if (!addrconf_finite_timeout(lifetime))
811 			rt6_clean_expires(rt);
812 		else
813 			rt6_set_expires(rt, jiffies + HZ * lifetime);
814 
815 		ip6_rt_put(rt);
816 	}
817 	return 0;
818 }
819 #endif
820 
821 static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
822 					struct in6_addr *saddr)
823 {
824 	struct fib6_node *pn;
825 	while (1) {
826 		if (fn->fn_flags & RTN_TL_ROOT)
827 			return NULL;
828 		pn = fn->parent;
829 		if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
830 			fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
831 		else
832 			fn = pn;
833 		if (fn->fn_flags & RTN_RTINFO)
834 			return fn;
835 	}
836 }
837 
838 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
839 					     struct fib6_table *table,
840 					     struct flowi6 *fl6, int flags)
841 {
842 	struct fib6_node *fn;
843 	struct rt6_info *rt;
844 
845 	read_lock_bh(&table->tb6_lock);
846 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
847 restart:
848 	rt = fn->leaf;
849 	rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
850 	if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
851 		rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
852 	if (rt == net->ipv6.ip6_null_entry) {
853 		fn = fib6_backtrack(fn, &fl6->saddr);
854 		if (fn)
855 			goto restart;
856 	}
857 	dst_use(&rt->dst, jiffies);
858 	read_unlock_bh(&table->tb6_lock);
859 	return rt;
860 
861 }
862 
863 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
864 				    int flags)
865 {
866 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
867 }
868 EXPORT_SYMBOL_GPL(ip6_route_lookup);
869 
870 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
871 			    const struct in6_addr *saddr, int oif, int strict)
872 {
873 	struct flowi6 fl6 = {
874 		.flowi6_oif = oif,
875 		.daddr = *daddr,
876 	};
877 	struct dst_entry *dst;
878 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
879 
880 	if (saddr) {
881 		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
882 		flags |= RT6_LOOKUP_F_HAS_SADDR;
883 	}
884 
885 	dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
886 	if (dst->error == 0)
887 		return (struct rt6_info *) dst;
888 
889 	dst_release(dst);
890 
891 	return NULL;
892 }
893 EXPORT_SYMBOL(rt6_lookup);
894 
895 /* ip6_ins_rt is called with FREE table->tb6_lock.
896    It takes new route entry, the addition fails by any reason the
897    route is freed. In any case, if caller does not hold it, it may
898    be destroyed.
899  */
900 
901 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
902 			struct mx6_config *mxc)
903 {
904 	int err;
905 	struct fib6_table *table;
906 
907 	table = rt->rt6i_table;
908 	write_lock_bh(&table->tb6_lock);
909 	err = fib6_add(&table->tb6_root, rt, info, mxc);
910 	write_unlock_bh(&table->tb6_lock);
911 
912 	return err;
913 }
914 
915 int ip6_ins_rt(struct rt6_info *rt)
916 {
917 	struct nl_info info = {	.nl_net = dev_net(rt->dst.dev), };
918 	struct mx6_config mxc = { .mx = NULL, };
919 
920 	return __ip6_ins_rt(rt, &info, &mxc);
921 }
922 
923 static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
924 					   const struct in6_addr *daddr,
925 					   const struct in6_addr *saddr)
926 {
927 	struct rt6_info *rt;
928 
929 	/*
930 	 *	Clone the route.
931 	 */
932 
933 	if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
934 		ort = (struct rt6_info *)ort->dst.from;
935 
936 	rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0);
937 
938 	if (!rt)
939 		return NULL;
940 
941 	ip6_rt_copy_init(rt, ort);
942 	rt->rt6i_flags |= RTF_CACHE;
943 	rt->rt6i_metric = 0;
944 	rt->dst.flags |= DST_HOST;
945 	rt->rt6i_dst.addr = *daddr;
946 	rt->rt6i_dst.plen = 128;
947 
948 	if (!rt6_is_gw_or_nonexthop(ort)) {
949 		if (ort->rt6i_dst.plen != 128 &&
950 		    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
951 			rt->rt6i_flags |= RTF_ANYCAST;
952 #ifdef CONFIG_IPV6_SUBTREES
953 		if (rt->rt6i_src.plen && saddr) {
954 			rt->rt6i_src.addr = *saddr;
955 			rt->rt6i_src.plen = 128;
956 		}
957 #endif
958 	}
959 
960 	return rt;
961 }
962 
963 static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
964 {
965 	struct rt6_info *pcpu_rt;
966 
967 	pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
968 				  rt->dst.dev, rt->dst.flags);
969 
970 	if (!pcpu_rt)
971 		return NULL;
972 	ip6_rt_copy_init(pcpu_rt, rt);
973 	pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
974 	pcpu_rt->rt6i_flags |= RTF_PCPU;
975 	return pcpu_rt;
976 }
977 
978 /* It should be called with read_lock_bh(&tb6_lock) acquired */
979 static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
980 {
981 	struct rt6_info *pcpu_rt, **p;
982 
983 	p = this_cpu_ptr(rt->rt6i_pcpu);
984 	pcpu_rt = *p;
985 
986 	if (pcpu_rt) {
987 		dst_hold(&pcpu_rt->dst);
988 		rt6_dst_from_metrics_check(pcpu_rt);
989 	}
990 	return pcpu_rt;
991 }
992 
993 static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
994 {
995 	struct fib6_table *table = rt->rt6i_table;
996 	struct rt6_info *pcpu_rt, *prev, **p;
997 
998 	pcpu_rt = ip6_rt_pcpu_alloc(rt);
999 	if (!pcpu_rt) {
1000 		struct net *net = dev_net(rt->dst.dev);
1001 
1002 		dst_hold(&net->ipv6.ip6_null_entry->dst);
1003 		return net->ipv6.ip6_null_entry;
1004 	}
1005 
1006 	read_lock_bh(&table->tb6_lock);
1007 	if (rt->rt6i_pcpu) {
1008 		p = this_cpu_ptr(rt->rt6i_pcpu);
1009 		prev = cmpxchg(p, NULL, pcpu_rt);
1010 		if (prev) {
1011 			/* If someone did it before us, return prev instead */
1012 			dst_destroy(&pcpu_rt->dst);
1013 			pcpu_rt = prev;
1014 		}
1015 	} else {
1016 		/* rt has been removed from the fib6 tree
1017 		 * before we have a chance to acquire the read_lock.
1018 		 * In this case, don't brother to create a pcpu rt
1019 		 * since rt is going away anyway.  The next
1020 		 * dst_check() will trigger a re-lookup.
1021 		 */
1022 		dst_destroy(&pcpu_rt->dst);
1023 		pcpu_rt = rt;
1024 	}
1025 	dst_hold(&pcpu_rt->dst);
1026 	rt6_dst_from_metrics_check(pcpu_rt);
1027 	read_unlock_bh(&table->tb6_lock);
1028 	return pcpu_rt;
1029 }
1030 
1031 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
1032 				      struct flowi6 *fl6, int flags)
1033 {
1034 	struct fib6_node *fn, *saved_fn;
1035 	struct rt6_info *rt;
1036 	int strict = 0;
1037 
1038 	strict |= flags & RT6_LOOKUP_F_IFACE;
1039 	if (net->ipv6.devconf_all->forwarding == 0)
1040 		strict |= RT6_LOOKUP_F_REACHABLE;
1041 
1042 	read_lock_bh(&table->tb6_lock);
1043 
1044 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1045 	saved_fn = fn;
1046 
1047 redo_rt6_select:
1048 	rt = rt6_select(fn, oif, strict);
1049 	if (rt->rt6i_nsiblings)
1050 		rt = rt6_multipath_select(rt, fl6, oif, strict);
1051 	if (rt == net->ipv6.ip6_null_entry) {
1052 		fn = fib6_backtrack(fn, &fl6->saddr);
1053 		if (fn)
1054 			goto redo_rt6_select;
1055 		else if (strict & RT6_LOOKUP_F_REACHABLE) {
1056 			/* also consider unreachable route */
1057 			strict &= ~RT6_LOOKUP_F_REACHABLE;
1058 			fn = saved_fn;
1059 			goto redo_rt6_select;
1060 		}
1061 	}
1062 
1063 
1064 	if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
1065 		dst_use(&rt->dst, jiffies);
1066 		read_unlock_bh(&table->tb6_lock);
1067 
1068 		rt6_dst_from_metrics_check(rt);
1069 		return rt;
1070 	} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1071 			    !(rt->rt6i_flags & RTF_GATEWAY))) {
1072 		/* Create a RTF_CACHE clone which will not be
1073 		 * owned by the fib6 tree.  It is for the special case where
1074 		 * the daddr in the skb during the neighbor look-up is different
1075 		 * from the fl6->daddr used to look-up route here.
1076 		 */
1077 
1078 		struct rt6_info *uncached_rt;
1079 
1080 		dst_use(&rt->dst, jiffies);
1081 		read_unlock_bh(&table->tb6_lock);
1082 
1083 		uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1084 		dst_release(&rt->dst);
1085 
1086 		if (uncached_rt)
1087 			rt6_uncached_list_add(uncached_rt);
1088 		else
1089 			uncached_rt = net->ipv6.ip6_null_entry;
1090 
1091 		dst_hold(&uncached_rt->dst);
1092 		return uncached_rt;
1093 
1094 	} else {
1095 		/* Get a percpu copy */
1096 
1097 		struct rt6_info *pcpu_rt;
1098 
1099 		rt->dst.lastuse = jiffies;
1100 		rt->dst.__use++;
1101 		pcpu_rt = rt6_get_pcpu_route(rt);
1102 
1103 		if (pcpu_rt) {
1104 			read_unlock_bh(&table->tb6_lock);
1105 		} else {
1106 			/* We have to do the read_unlock first
1107 			 * because rt6_make_pcpu_route() may trigger
1108 			 * ip6_dst_gc() which will take the write_lock.
1109 			 */
1110 			dst_hold(&rt->dst);
1111 			read_unlock_bh(&table->tb6_lock);
1112 			pcpu_rt = rt6_make_pcpu_route(rt);
1113 			dst_release(&rt->dst);
1114 		}
1115 
1116 		return pcpu_rt;
1117 
1118 	}
1119 }
1120 
1121 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
1122 					    struct flowi6 *fl6, int flags)
1123 {
1124 	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
1125 }
1126 
1127 static struct dst_entry *ip6_route_input_lookup(struct net *net,
1128 						struct net_device *dev,
1129 						struct flowi6 *fl6, int flags)
1130 {
1131 	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1132 		flags |= RT6_LOOKUP_F_IFACE;
1133 
1134 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1135 }
1136 
1137 void ip6_route_input(struct sk_buff *skb)
1138 {
1139 	const struct ipv6hdr *iph = ipv6_hdr(skb);
1140 	struct net *net = dev_net(skb->dev);
1141 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1142 	struct ip_tunnel_info *tun_info;
1143 	struct flowi6 fl6 = {
1144 		.flowi6_iif = skb->dev->ifindex,
1145 		.daddr = iph->daddr,
1146 		.saddr = iph->saddr,
1147 		.flowlabel = ip6_flowinfo(iph),
1148 		.flowi6_mark = skb->mark,
1149 		.flowi6_proto = iph->nexthdr,
1150 	};
1151 
1152 	tun_info = skb_tunnel_info(skb);
1153 	if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
1154 		fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
1155 	skb_dst_drop(skb);
1156 	skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1157 }
1158 
1159 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1160 					     struct flowi6 *fl6, int flags)
1161 {
1162 	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1163 }
1164 
1165 struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
1166 				    struct flowi6 *fl6)
1167 {
1168 	int flags = 0;
1169 
1170 	fl6->flowi6_iif = LOOPBACK_IFINDEX;
1171 
1172 	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
1173 		flags |= RT6_LOOKUP_F_IFACE;
1174 
1175 	if (!ipv6_addr_any(&fl6->saddr))
1176 		flags |= RT6_LOOKUP_F_HAS_SADDR;
1177 	else if (sk)
1178 		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1179 
1180 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1181 }
1182 EXPORT_SYMBOL(ip6_route_output);
1183 
1184 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1185 {
1186 	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1187 	struct dst_entry *new = NULL;
1188 
1189 	rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1190 	if (rt) {
1191 		new = &rt->dst;
1192 
1193 		memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
1194 
1195 		new->__use = 1;
1196 		new->input = dst_discard;
1197 		new->output = dst_discard_sk;
1198 
1199 		if (dst_metrics_read_only(&ort->dst))
1200 			new->_metrics = ort->dst._metrics;
1201 		else
1202 			dst_copy_metrics(new, &ort->dst);
1203 		rt->rt6i_idev = ort->rt6i_idev;
1204 		if (rt->rt6i_idev)
1205 			in6_dev_hold(rt->rt6i_idev);
1206 
1207 		rt->rt6i_gateway = ort->rt6i_gateway;
1208 		rt->rt6i_flags = ort->rt6i_flags;
1209 		rt->rt6i_metric = 0;
1210 
1211 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1212 #ifdef CONFIG_IPV6_SUBTREES
1213 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1214 #endif
1215 
1216 		dst_free(new);
1217 	}
1218 
1219 	dst_release(dst_orig);
1220 	return new ? new : ERR_PTR(-ENOMEM);
1221 }
1222 
1223 /*
1224  *	Destination cache support functions
1225  */
1226 
1227 static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1228 {
1229 	if (rt->dst.from &&
1230 	    dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1231 		dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1232 }
1233 
1234 static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1235 {
1236 	if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1237 		return NULL;
1238 
1239 	if (rt6_check_expired(rt))
1240 		return NULL;
1241 
1242 	return &rt->dst;
1243 }
1244 
1245 static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1246 {
1247 	if (rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1248 	    rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1249 		return &rt->dst;
1250 	else
1251 		return NULL;
1252 }
1253 
1254 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1255 {
1256 	struct rt6_info *rt;
1257 
1258 	rt = (struct rt6_info *) dst;
1259 
1260 	/* All IPV6 dsts are created with ->obsolete set to the value
1261 	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1262 	 * into this function always.
1263 	 */
1264 
1265 	rt6_dst_from_metrics_check(rt);
1266 
1267 	if ((rt->rt6i_flags & RTF_PCPU) || unlikely(dst->flags & DST_NOCACHE))
1268 		return rt6_dst_from_check(rt, cookie);
1269 	else
1270 		return rt6_check(rt, cookie);
1271 }
1272 
1273 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1274 {
1275 	struct rt6_info *rt = (struct rt6_info *) dst;
1276 
1277 	if (rt) {
1278 		if (rt->rt6i_flags & RTF_CACHE) {
1279 			if (rt6_check_expired(rt)) {
1280 				ip6_del_rt(rt);
1281 				dst = NULL;
1282 			}
1283 		} else {
1284 			dst_release(dst);
1285 			dst = NULL;
1286 		}
1287 	}
1288 	return dst;
1289 }
1290 
1291 static void ip6_link_failure(struct sk_buff *skb)
1292 {
1293 	struct rt6_info *rt;
1294 
1295 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1296 
1297 	rt = (struct rt6_info *) skb_dst(skb);
1298 	if (rt) {
1299 		if (rt->rt6i_flags & RTF_CACHE) {
1300 			dst_hold(&rt->dst);
1301 			if (ip6_del_rt(rt))
1302 				dst_free(&rt->dst);
1303 		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1304 			rt->rt6i_node->fn_sernum = -1;
1305 		}
1306 	}
1307 }
1308 
1309 static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1310 {
1311 	struct net *net = dev_net(rt->dst.dev);
1312 
1313 	rt->rt6i_flags |= RTF_MODIFIED;
1314 	rt->rt6i_pmtu = mtu;
1315 	rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1316 }
1317 
1318 static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1319 				 const struct ipv6hdr *iph, u32 mtu)
1320 {
1321 	struct rt6_info *rt6 = (struct rt6_info *)dst;
1322 
1323 	if (rt6->rt6i_flags & RTF_LOCAL)
1324 		return;
1325 
1326 	dst_confirm(dst);
1327 	mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1328 	if (mtu >= dst_mtu(dst))
1329 		return;
1330 
1331 	if (rt6->rt6i_flags & RTF_CACHE) {
1332 		rt6_do_update_pmtu(rt6, mtu);
1333 	} else {
1334 		const struct in6_addr *daddr, *saddr;
1335 		struct rt6_info *nrt6;
1336 
1337 		if (iph) {
1338 			daddr = &iph->daddr;
1339 			saddr = &iph->saddr;
1340 		} else if (sk) {
1341 			daddr = &sk->sk_v6_daddr;
1342 			saddr = &inet6_sk(sk)->saddr;
1343 		} else {
1344 			return;
1345 		}
1346 		nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1347 		if (nrt6) {
1348 			rt6_do_update_pmtu(nrt6, mtu);
1349 
1350 			/* ip6_ins_rt(nrt6) will bump the
1351 			 * rt6->rt6i_node->fn_sernum
1352 			 * which will fail the next rt6_check() and
1353 			 * invalidate the sk->sk_dst_cache.
1354 			 */
1355 			ip6_ins_rt(nrt6);
1356 		}
1357 	}
1358 }
1359 
1360 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1361 			       struct sk_buff *skb, u32 mtu)
1362 {
1363 	__ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
1364 }
1365 
1366 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1367 		     int oif, u32 mark)
1368 {
1369 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1370 	struct dst_entry *dst;
1371 	struct flowi6 fl6;
1372 
1373 	memset(&fl6, 0, sizeof(fl6));
1374 	fl6.flowi6_oif = oif;
1375 	fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
1376 	fl6.daddr = iph->daddr;
1377 	fl6.saddr = iph->saddr;
1378 	fl6.flowlabel = ip6_flowinfo(iph);
1379 
1380 	dst = ip6_route_output(net, NULL, &fl6);
1381 	if (!dst->error)
1382 		__ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
1383 	dst_release(dst);
1384 }
1385 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1386 
1387 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1388 {
1389 	ip6_update_pmtu(skb, sock_net(sk), mtu,
1390 			sk->sk_bound_dev_if, sk->sk_mark);
1391 }
1392 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1393 
1394 /* Handle redirects */
1395 struct ip6rd_flowi {
1396 	struct flowi6 fl6;
1397 	struct in6_addr gateway;
1398 };
1399 
1400 static struct rt6_info *__ip6_route_redirect(struct net *net,
1401 					     struct fib6_table *table,
1402 					     struct flowi6 *fl6,
1403 					     int flags)
1404 {
1405 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1406 	struct rt6_info *rt;
1407 	struct fib6_node *fn;
1408 
1409 	/* Get the "current" route for this destination and
1410 	 * check if the redirect has come from approriate router.
1411 	 *
1412 	 * RFC 4861 specifies that redirects should only be
1413 	 * accepted if they come from the nexthop to the target.
1414 	 * Due to the way the routes are chosen, this notion
1415 	 * is a bit fuzzy and one might need to check all possible
1416 	 * routes.
1417 	 */
1418 
1419 	read_lock_bh(&table->tb6_lock);
1420 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1421 restart:
1422 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1423 		if (rt6_check_expired(rt))
1424 			continue;
1425 		if (rt->dst.error)
1426 			break;
1427 		if (!(rt->rt6i_flags & RTF_GATEWAY))
1428 			continue;
1429 		if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1430 			continue;
1431 		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1432 			continue;
1433 		break;
1434 	}
1435 
1436 	if (!rt)
1437 		rt = net->ipv6.ip6_null_entry;
1438 	else if (rt->dst.error) {
1439 		rt = net->ipv6.ip6_null_entry;
1440 		goto out;
1441 	}
1442 
1443 	if (rt == net->ipv6.ip6_null_entry) {
1444 		fn = fib6_backtrack(fn, &fl6->saddr);
1445 		if (fn)
1446 			goto restart;
1447 	}
1448 
1449 out:
1450 	dst_hold(&rt->dst);
1451 
1452 	read_unlock_bh(&table->tb6_lock);
1453 
1454 	return rt;
1455 };
1456 
1457 static struct dst_entry *ip6_route_redirect(struct net *net,
1458 					const struct flowi6 *fl6,
1459 					const struct in6_addr *gateway)
1460 {
1461 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1462 	struct ip6rd_flowi rdfl;
1463 
1464 	rdfl.fl6 = *fl6;
1465 	rdfl.gateway = *gateway;
1466 
1467 	return fib6_rule_lookup(net, &rdfl.fl6,
1468 				flags, __ip6_route_redirect);
1469 }
1470 
1471 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1472 {
1473 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1474 	struct dst_entry *dst;
1475 	struct flowi6 fl6;
1476 
1477 	memset(&fl6, 0, sizeof(fl6));
1478 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
1479 	fl6.flowi6_oif = oif;
1480 	fl6.flowi6_mark = mark;
1481 	fl6.daddr = iph->daddr;
1482 	fl6.saddr = iph->saddr;
1483 	fl6.flowlabel = ip6_flowinfo(iph);
1484 
1485 	dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1486 	rt6_do_redirect(dst, NULL, skb);
1487 	dst_release(dst);
1488 }
1489 EXPORT_SYMBOL_GPL(ip6_redirect);
1490 
1491 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1492 			    u32 mark)
1493 {
1494 	const struct ipv6hdr *iph = ipv6_hdr(skb);
1495 	const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1496 	struct dst_entry *dst;
1497 	struct flowi6 fl6;
1498 
1499 	memset(&fl6, 0, sizeof(fl6));
1500 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
1501 	fl6.flowi6_oif = oif;
1502 	fl6.flowi6_mark = mark;
1503 	fl6.daddr = msg->dest;
1504 	fl6.saddr = iph->daddr;
1505 
1506 	dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1507 	rt6_do_redirect(dst, NULL, skb);
1508 	dst_release(dst);
1509 }
1510 
1511 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1512 {
1513 	ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1514 }
1515 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1516 
1517 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1518 {
1519 	struct net_device *dev = dst->dev;
1520 	unsigned int mtu = dst_mtu(dst);
1521 	struct net *net = dev_net(dev);
1522 
1523 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1524 
1525 	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1526 		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1527 
1528 	/*
1529 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1530 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1531 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
1532 	 * rely only on pmtu discovery"
1533 	 */
1534 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1535 		mtu = IPV6_MAXPLEN;
1536 	return mtu;
1537 }
1538 
1539 static unsigned int ip6_mtu(const struct dst_entry *dst)
1540 {
1541 	const struct rt6_info *rt = (const struct rt6_info *)dst;
1542 	unsigned int mtu = rt->rt6i_pmtu;
1543 	struct inet6_dev *idev;
1544 
1545 	if (mtu)
1546 		goto out;
1547 
1548 	mtu = dst_metric_raw(dst, RTAX_MTU);
1549 	if (mtu)
1550 		goto out;
1551 
1552 	mtu = IPV6_MIN_MTU;
1553 
1554 	rcu_read_lock();
1555 	idev = __in6_dev_get(dst->dev);
1556 	if (idev)
1557 		mtu = idev->cnf.mtu6;
1558 	rcu_read_unlock();
1559 
1560 out:
1561 	return min_t(unsigned int, mtu, IP6_MAX_MTU);
1562 }
1563 
1564 static struct dst_entry *icmp6_dst_gc_list;
1565 static DEFINE_SPINLOCK(icmp6_dst_lock);
1566 
1567 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1568 				  struct flowi6 *fl6)
1569 {
1570 	struct dst_entry *dst;
1571 	struct rt6_info *rt;
1572 	struct inet6_dev *idev = in6_dev_get(dev);
1573 	struct net *net = dev_net(dev);
1574 
1575 	if (unlikely(!idev))
1576 		return ERR_PTR(-ENODEV);
1577 
1578 	rt = ip6_dst_alloc(net, dev, 0);
1579 	if (unlikely(!rt)) {
1580 		in6_dev_put(idev);
1581 		dst = ERR_PTR(-ENOMEM);
1582 		goto out;
1583 	}
1584 
1585 	rt->dst.flags |= DST_HOST;
1586 	rt->dst.output  = ip6_output;
1587 	atomic_set(&rt->dst.__refcnt, 1);
1588 	rt->rt6i_gateway  = fl6->daddr;
1589 	rt->rt6i_dst.addr = fl6->daddr;
1590 	rt->rt6i_dst.plen = 128;
1591 	rt->rt6i_idev     = idev;
1592 	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1593 
1594 	spin_lock_bh(&icmp6_dst_lock);
1595 	rt->dst.next = icmp6_dst_gc_list;
1596 	icmp6_dst_gc_list = &rt->dst;
1597 	spin_unlock_bh(&icmp6_dst_lock);
1598 
1599 	fib6_force_start_gc(net);
1600 
1601 	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1602 
1603 out:
1604 	return dst;
1605 }
1606 
1607 int icmp6_dst_gc(void)
1608 {
1609 	struct dst_entry *dst, **pprev;
1610 	int more = 0;
1611 
1612 	spin_lock_bh(&icmp6_dst_lock);
1613 	pprev = &icmp6_dst_gc_list;
1614 
1615 	while ((dst = *pprev) != NULL) {
1616 		if (!atomic_read(&dst->__refcnt)) {
1617 			*pprev = dst->next;
1618 			dst_free(dst);
1619 		} else {
1620 			pprev = &dst->next;
1621 			++more;
1622 		}
1623 	}
1624 
1625 	spin_unlock_bh(&icmp6_dst_lock);
1626 
1627 	return more;
1628 }
1629 
1630 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1631 			    void *arg)
1632 {
1633 	struct dst_entry *dst, **pprev;
1634 
1635 	spin_lock_bh(&icmp6_dst_lock);
1636 	pprev = &icmp6_dst_gc_list;
1637 	while ((dst = *pprev) != NULL) {
1638 		struct rt6_info *rt = (struct rt6_info *) dst;
1639 		if (func(rt, arg)) {
1640 			*pprev = dst->next;
1641 			dst_free(dst);
1642 		} else {
1643 			pprev = &dst->next;
1644 		}
1645 	}
1646 	spin_unlock_bh(&icmp6_dst_lock);
1647 }
1648 
1649 static int ip6_dst_gc(struct dst_ops *ops)
1650 {
1651 	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1652 	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1653 	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1654 	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1655 	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1656 	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1657 	int entries;
1658 
1659 	entries = dst_entries_get_fast(ops);
1660 	if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1661 	    entries <= rt_max_size)
1662 		goto out;
1663 
1664 	net->ipv6.ip6_rt_gc_expire++;
1665 	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
1666 	entries = dst_entries_get_slow(ops);
1667 	if (entries < ops->gc_thresh)
1668 		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1669 out:
1670 	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1671 	return entries > rt_max_size;
1672 }
1673 
1674 static int ip6_convert_metrics(struct mx6_config *mxc,
1675 			       const struct fib6_config *cfg)
1676 {
1677 	bool ecn_ca = false;
1678 	struct nlattr *nla;
1679 	int remaining;
1680 	u32 *mp;
1681 
1682 	if (!cfg->fc_mx)
1683 		return 0;
1684 
1685 	mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1686 	if (unlikely(!mp))
1687 		return -ENOMEM;
1688 
1689 	nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1690 		int type = nla_type(nla);
1691 		u32 val;
1692 
1693 		if (!type)
1694 			continue;
1695 		if (unlikely(type > RTAX_MAX))
1696 			goto err;
1697 
1698 		if (type == RTAX_CC_ALGO) {
1699 			char tmp[TCP_CA_NAME_MAX];
1700 
1701 			nla_strlcpy(tmp, nla, sizeof(tmp));
1702 			val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
1703 			if (val == TCP_CA_UNSPEC)
1704 				goto err;
1705 		} else {
1706 			val = nla_get_u32(nla);
1707 		}
1708 		if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
1709 			goto err;
1710 
1711 		mp[type - 1] = val;
1712 		__set_bit(type - 1, mxc->mx_valid);
1713 	}
1714 
1715 	if (ecn_ca) {
1716 		__set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
1717 		mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
1718 	}
1719 
1720 	mxc->mx = mp;
1721 	return 0;
1722  err:
1723 	kfree(mp);
1724 	return -EINVAL;
1725 }
1726 
1727 int ip6_route_info_create(struct fib6_config *cfg, struct rt6_info **rt_ret)
1728 {
1729 	int err;
1730 	struct net *net = cfg->fc_nlinfo.nl_net;
1731 	struct rt6_info *rt = NULL;
1732 	struct net_device *dev = NULL;
1733 	struct inet6_dev *idev = NULL;
1734 	struct fib6_table *table;
1735 	int addr_type;
1736 
1737 	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1738 		return -EINVAL;
1739 #ifndef CONFIG_IPV6_SUBTREES
1740 	if (cfg->fc_src_len)
1741 		return -EINVAL;
1742 #endif
1743 	if (cfg->fc_ifindex) {
1744 		err = -ENODEV;
1745 		dev = dev_get_by_index(net, cfg->fc_ifindex);
1746 		if (!dev)
1747 			goto out;
1748 		idev = in6_dev_get(dev);
1749 		if (!idev)
1750 			goto out;
1751 	}
1752 
1753 	if (cfg->fc_metric == 0)
1754 		cfg->fc_metric = IP6_RT_PRIO_USER;
1755 
1756 	err = -ENOBUFS;
1757 	if (cfg->fc_nlinfo.nlh &&
1758 	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1759 		table = fib6_get_table(net, cfg->fc_table);
1760 		if (!table) {
1761 			pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1762 			table = fib6_new_table(net, cfg->fc_table);
1763 		}
1764 	} else {
1765 		table = fib6_new_table(net, cfg->fc_table);
1766 	}
1767 
1768 	if (!table)
1769 		goto out;
1770 
1771 	rt = ip6_dst_alloc(net, NULL,
1772 			   (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1773 
1774 	if (!rt) {
1775 		err = -ENOMEM;
1776 		goto out;
1777 	}
1778 
1779 	if (cfg->fc_flags & RTF_EXPIRES)
1780 		rt6_set_expires(rt, jiffies +
1781 				clock_t_to_jiffies(cfg->fc_expires));
1782 	else
1783 		rt6_clean_expires(rt);
1784 
1785 	if (cfg->fc_protocol == RTPROT_UNSPEC)
1786 		cfg->fc_protocol = RTPROT_BOOT;
1787 	rt->rt6i_protocol = cfg->fc_protocol;
1788 
1789 	addr_type = ipv6_addr_type(&cfg->fc_dst);
1790 
1791 	if (addr_type & IPV6_ADDR_MULTICAST)
1792 		rt->dst.input = ip6_mc_input;
1793 	else if (cfg->fc_flags & RTF_LOCAL)
1794 		rt->dst.input = ip6_input;
1795 	else
1796 		rt->dst.input = ip6_forward;
1797 
1798 	rt->dst.output = ip6_output;
1799 
1800 	if (cfg->fc_encap) {
1801 		struct lwtunnel_state *lwtstate;
1802 
1803 		err = lwtunnel_build_state(dev, cfg->fc_encap_type,
1804 					   cfg->fc_encap, AF_INET6, cfg,
1805 					   &lwtstate);
1806 		if (err)
1807 			goto out;
1808 		rt->dst.lwtstate = lwtstate_get(lwtstate);
1809 		if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
1810 			rt->dst.lwtstate->orig_output = rt->dst.output;
1811 			rt->dst.output = lwtunnel_output;
1812 		}
1813 		if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
1814 			rt->dst.lwtstate->orig_input = rt->dst.input;
1815 			rt->dst.input = lwtunnel_input;
1816 		}
1817 	}
1818 
1819 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1820 	rt->rt6i_dst.plen = cfg->fc_dst_len;
1821 	if (rt->rt6i_dst.plen == 128)
1822 		rt->dst.flags |= DST_HOST;
1823 
1824 #ifdef CONFIG_IPV6_SUBTREES
1825 	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1826 	rt->rt6i_src.plen = cfg->fc_src_len;
1827 #endif
1828 
1829 	rt->rt6i_metric = cfg->fc_metric;
1830 
1831 	/* We cannot add true routes via loopback here,
1832 	   they would result in kernel looping; promote them to reject routes
1833 	 */
1834 	if ((cfg->fc_flags & RTF_REJECT) ||
1835 	    (dev && (dev->flags & IFF_LOOPBACK) &&
1836 	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
1837 	     !(cfg->fc_flags & RTF_LOCAL))) {
1838 		/* hold loopback dev/idev if we haven't done so. */
1839 		if (dev != net->loopback_dev) {
1840 			if (dev) {
1841 				dev_put(dev);
1842 				in6_dev_put(idev);
1843 			}
1844 			dev = net->loopback_dev;
1845 			dev_hold(dev);
1846 			idev = in6_dev_get(dev);
1847 			if (!idev) {
1848 				err = -ENODEV;
1849 				goto out;
1850 			}
1851 		}
1852 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1853 		switch (cfg->fc_type) {
1854 		case RTN_BLACKHOLE:
1855 			rt->dst.error = -EINVAL;
1856 			rt->dst.output = dst_discard_sk;
1857 			rt->dst.input = dst_discard;
1858 			break;
1859 		case RTN_PROHIBIT:
1860 			rt->dst.error = -EACCES;
1861 			rt->dst.output = ip6_pkt_prohibit_out;
1862 			rt->dst.input = ip6_pkt_prohibit;
1863 			break;
1864 		case RTN_THROW:
1865 		default:
1866 			rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1867 					: -ENETUNREACH;
1868 			rt->dst.output = ip6_pkt_discard_out;
1869 			rt->dst.input = ip6_pkt_discard;
1870 			break;
1871 		}
1872 		goto install_route;
1873 	}
1874 
1875 	if (cfg->fc_flags & RTF_GATEWAY) {
1876 		const struct in6_addr *gw_addr;
1877 		int gwa_type;
1878 
1879 		gw_addr = &cfg->fc_gateway;
1880 		gwa_type = ipv6_addr_type(gw_addr);
1881 
1882 		/* if gw_addr is local we will fail to detect this in case
1883 		 * address is still TENTATIVE (DAD in progress). rt6_lookup()
1884 		 * will return already-added prefix route via interface that
1885 		 * prefix route was assigned to, which might be non-loopback.
1886 		 */
1887 		err = -EINVAL;
1888 		if (ipv6_chk_addr_and_flags(net, gw_addr,
1889 					    gwa_type & IPV6_ADDR_LINKLOCAL ?
1890 					    dev : NULL, 0, 0))
1891 			goto out;
1892 
1893 		rt->rt6i_gateway = *gw_addr;
1894 
1895 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1896 			struct rt6_info *grt;
1897 
1898 			/* IPv6 strictly inhibits using not link-local
1899 			   addresses as nexthop address.
1900 			   Otherwise, router will not able to send redirects.
1901 			   It is very good, but in some (rare!) circumstances
1902 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
1903 			   some exceptions. --ANK
1904 			 */
1905 			if (!(gwa_type & IPV6_ADDR_UNICAST))
1906 				goto out;
1907 
1908 			grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1909 
1910 			err = -EHOSTUNREACH;
1911 			if (!grt)
1912 				goto out;
1913 			if (dev) {
1914 				if (dev != grt->dst.dev) {
1915 					ip6_rt_put(grt);
1916 					goto out;
1917 				}
1918 			} else {
1919 				dev = grt->dst.dev;
1920 				idev = grt->rt6i_idev;
1921 				dev_hold(dev);
1922 				in6_dev_hold(grt->rt6i_idev);
1923 			}
1924 			if (!(grt->rt6i_flags & RTF_GATEWAY))
1925 				err = 0;
1926 			ip6_rt_put(grt);
1927 
1928 			if (err)
1929 				goto out;
1930 		}
1931 		err = -EINVAL;
1932 		if (!dev || (dev->flags & IFF_LOOPBACK))
1933 			goto out;
1934 	}
1935 
1936 	err = -ENODEV;
1937 	if (!dev)
1938 		goto out;
1939 
1940 	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1941 		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1942 			err = -EINVAL;
1943 			goto out;
1944 		}
1945 		rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1946 		rt->rt6i_prefsrc.plen = 128;
1947 	} else
1948 		rt->rt6i_prefsrc.plen = 0;
1949 
1950 	rt->rt6i_flags = cfg->fc_flags;
1951 
1952 install_route:
1953 	rt->dst.dev = dev;
1954 	rt->rt6i_idev = idev;
1955 	rt->rt6i_table = table;
1956 
1957 	cfg->fc_nlinfo.nl_net = dev_net(dev);
1958 
1959 	*rt_ret = rt;
1960 
1961 	return 0;
1962 out:
1963 	if (dev)
1964 		dev_put(dev);
1965 	if (idev)
1966 		in6_dev_put(idev);
1967 	if (rt)
1968 		dst_free(&rt->dst);
1969 
1970 	*rt_ret = NULL;
1971 
1972 	return err;
1973 }
1974 
1975 int ip6_route_add(struct fib6_config *cfg)
1976 {
1977 	struct mx6_config mxc = { .mx = NULL, };
1978 	struct rt6_info *rt = NULL;
1979 	int err;
1980 
1981 	err = ip6_route_info_create(cfg, &rt);
1982 	if (err)
1983 		goto out;
1984 
1985 	err = ip6_convert_metrics(&mxc, cfg);
1986 	if (err)
1987 		goto out;
1988 
1989 	err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
1990 
1991 	kfree(mxc.mx);
1992 
1993 	return err;
1994 out:
1995 	if (rt)
1996 		dst_free(&rt->dst);
1997 
1998 	return err;
1999 }
2000 
2001 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
2002 {
2003 	int err;
2004 	struct fib6_table *table;
2005 	struct net *net = dev_net(rt->dst.dev);
2006 
2007 	if (rt == net->ipv6.ip6_null_entry) {
2008 		err = -ENOENT;
2009 		goto out;
2010 	}
2011 
2012 	table = rt->rt6i_table;
2013 	write_lock_bh(&table->tb6_lock);
2014 	err = fib6_del(rt, info);
2015 	write_unlock_bh(&table->tb6_lock);
2016 
2017 out:
2018 	ip6_rt_put(rt);
2019 	return err;
2020 }
2021 
2022 int ip6_del_rt(struct rt6_info *rt)
2023 {
2024 	struct nl_info info = {
2025 		.nl_net = dev_net(rt->dst.dev),
2026 	};
2027 	return __ip6_del_rt(rt, &info);
2028 }
2029 
2030 static int ip6_route_del(struct fib6_config *cfg)
2031 {
2032 	struct fib6_table *table;
2033 	struct fib6_node *fn;
2034 	struct rt6_info *rt;
2035 	int err = -ESRCH;
2036 
2037 	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
2038 	if (!table)
2039 		return err;
2040 
2041 	read_lock_bh(&table->tb6_lock);
2042 
2043 	fn = fib6_locate(&table->tb6_root,
2044 			 &cfg->fc_dst, cfg->fc_dst_len,
2045 			 &cfg->fc_src, cfg->fc_src_len);
2046 
2047 	if (fn) {
2048 		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2049 			if ((rt->rt6i_flags & RTF_CACHE) &&
2050 			    !(cfg->fc_flags & RTF_CACHE))
2051 				continue;
2052 			if (cfg->fc_ifindex &&
2053 			    (!rt->dst.dev ||
2054 			     rt->dst.dev->ifindex != cfg->fc_ifindex))
2055 				continue;
2056 			if (cfg->fc_flags & RTF_GATEWAY &&
2057 			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
2058 				continue;
2059 			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
2060 				continue;
2061 			dst_hold(&rt->dst);
2062 			read_unlock_bh(&table->tb6_lock);
2063 
2064 			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
2065 		}
2066 	}
2067 	read_unlock_bh(&table->tb6_lock);
2068 
2069 	return err;
2070 }
2071 
2072 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
2073 {
2074 	struct net *net = dev_net(skb->dev);
2075 	struct netevent_redirect netevent;
2076 	struct rt6_info *rt, *nrt = NULL;
2077 	struct ndisc_options ndopts;
2078 	struct inet6_dev *in6_dev;
2079 	struct neighbour *neigh;
2080 	struct rd_msg *msg;
2081 	int optlen, on_link;
2082 	u8 *lladdr;
2083 
2084 	optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
2085 	optlen -= sizeof(*msg);
2086 
2087 	if (optlen < 0) {
2088 		net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
2089 		return;
2090 	}
2091 
2092 	msg = (struct rd_msg *)icmp6_hdr(skb);
2093 
2094 	if (ipv6_addr_is_multicast(&msg->dest)) {
2095 		net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
2096 		return;
2097 	}
2098 
2099 	on_link = 0;
2100 	if (ipv6_addr_equal(&msg->dest, &msg->target)) {
2101 		on_link = 1;
2102 	} else if (ipv6_addr_type(&msg->target) !=
2103 		   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
2104 		net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
2105 		return;
2106 	}
2107 
2108 	in6_dev = __in6_dev_get(skb->dev);
2109 	if (!in6_dev)
2110 		return;
2111 	if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2112 		return;
2113 
2114 	/* RFC2461 8.1:
2115 	 *	The IP source address of the Redirect MUST be the same as the current
2116 	 *	first-hop router for the specified ICMP Destination Address.
2117 	 */
2118 
2119 	if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
2120 		net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2121 		return;
2122 	}
2123 
2124 	lladdr = NULL;
2125 	if (ndopts.nd_opts_tgt_lladdr) {
2126 		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2127 					     skb->dev);
2128 		if (!lladdr) {
2129 			net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2130 			return;
2131 		}
2132 	}
2133 
2134 	rt = (struct rt6_info *) dst;
2135 	if (rt == net->ipv6.ip6_null_entry) {
2136 		net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
2137 		return;
2138 	}
2139 
2140 	/* Redirect received -> path was valid.
2141 	 * Look, redirects are sent only in response to data packets,
2142 	 * so that this nexthop apparently is reachable. --ANK
2143 	 */
2144 	dst_confirm(&rt->dst);
2145 
2146 	neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
2147 	if (!neigh)
2148 		return;
2149 
2150 	/*
2151 	 *	We have finally decided to accept it.
2152 	 */
2153 
2154 	neigh_update(neigh, lladdr, NUD_STALE,
2155 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
2156 		     NEIGH_UPDATE_F_OVERRIDE|
2157 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
2158 				     NEIGH_UPDATE_F_ISROUTER))
2159 		     );
2160 
2161 	nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
2162 	if (!nrt)
2163 		goto out;
2164 
2165 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
2166 	if (on_link)
2167 		nrt->rt6i_flags &= ~RTF_GATEWAY;
2168 
2169 	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
2170 
2171 	if (ip6_ins_rt(nrt))
2172 		goto out;
2173 
2174 	netevent.old = &rt->dst;
2175 	netevent.new = &nrt->dst;
2176 	netevent.daddr = &msg->dest;
2177 	netevent.neigh = neigh;
2178 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
2179 
2180 	if (rt->rt6i_flags & RTF_CACHE) {
2181 		rt = (struct rt6_info *) dst_clone(&rt->dst);
2182 		ip6_del_rt(rt);
2183 	}
2184 
2185 out:
2186 	neigh_release(neigh);
2187 }
2188 
2189 /*
2190  *	Misc support functions
2191  */
2192 
2193 static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
2194 {
2195 	BUG_ON(from->dst.from);
2196 
2197 	rt->rt6i_flags &= ~RTF_EXPIRES;
2198 	dst_hold(&from->dst);
2199 	rt->dst.from = &from->dst;
2200 	dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
2201 }
2202 
2203 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
2204 {
2205 	rt->dst.input = ort->dst.input;
2206 	rt->dst.output = ort->dst.output;
2207 	rt->rt6i_dst = ort->rt6i_dst;
2208 	rt->dst.error = ort->dst.error;
2209 	rt->rt6i_idev = ort->rt6i_idev;
2210 	if (rt->rt6i_idev)
2211 		in6_dev_hold(rt->rt6i_idev);
2212 	rt->dst.lastuse = jiffies;
2213 	rt->rt6i_gateway = ort->rt6i_gateway;
2214 	rt->rt6i_flags = ort->rt6i_flags;
2215 	rt6_set_from(rt, ort);
2216 	rt->rt6i_metric = ort->rt6i_metric;
2217 #ifdef CONFIG_IPV6_SUBTREES
2218 	rt->rt6i_src = ort->rt6i_src;
2219 #endif
2220 	rt->rt6i_prefsrc = ort->rt6i_prefsrc;
2221 	rt->rt6i_table = ort->rt6i_table;
2222 	rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
2223 }
2224 
2225 #ifdef CONFIG_IPV6_ROUTE_INFO
2226 static struct rt6_info *rt6_get_route_info(struct net *net,
2227 					   const struct in6_addr *prefix, int prefixlen,
2228 					   const struct in6_addr *gwaddr, int ifindex)
2229 {
2230 	struct fib6_node *fn;
2231 	struct rt6_info *rt = NULL;
2232 	struct fib6_table *table;
2233 
2234 	table = fib6_get_table(net, RT6_TABLE_INFO);
2235 	if (!table)
2236 		return NULL;
2237 
2238 	read_lock_bh(&table->tb6_lock);
2239 	fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
2240 	if (!fn)
2241 		goto out;
2242 
2243 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2244 		if (rt->dst.dev->ifindex != ifindex)
2245 			continue;
2246 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2247 			continue;
2248 		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2249 			continue;
2250 		dst_hold(&rt->dst);
2251 		break;
2252 	}
2253 out:
2254 	read_unlock_bh(&table->tb6_lock);
2255 	return rt;
2256 }
2257 
2258 static struct rt6_info *rt6_add_route_info(struct net *net,
2259 					   const struct in6_addr *prefix, int prefixlen,
2260 					   const struct in6_addr *gwaddr, int ifindex,
2261 					   unsigned int pref)
2262 {
2263 	struct fib6_config cfg = {
2264 		.fc_table	= RT6_TABLE_INFO,
2265 		.fc_metric	= IP6_RT_PRIO_USER,
2266 		.fc_ifindex	= ifindex,
2267 		.fc_dst_len	= prefixlen,
2268 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2269 				  RTF_UP | RTF_PREF(pref),
2270 		.fc_nlinfo.portid = 0,
2271 		.fc_nlinfo.nlh = NULL,
2272 		.fc_nlinfo.nl_net = net,
2273 	};
2274 
2275 	cfg.fc_dst = *prefix;
2276 	cfg.fc_gateway = *gwaddr;
2277 
2278 	/* We should treat it as a default route if prefix length is 0. */
2279 	if (!prefixlen)
2280 		cfg.fc_flags |= RTF_DEFAULT;
2281 
2282 	ip6_route_add(&cfg);
2283 
2284 	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
2285 }
2286 #endif
2287 
2288 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
2289 {
2290 	struct rt6_info *rt;
2291 	struct fib6_table *table;
2292 
2293 	table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
2294 	if (!table)
2295 		return NULL;
2296 
2297 	read_lock_bh(&table->tb6_lock);
2298 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2299 		if (dev == rt->dst.dev &&
2300 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
2301 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
2302 			break;
2303 	}
2304 	if (rt)
2305 		dst_hold(&rt->dst);
2306 	read_unlock_bh(&table->tb6_lock);
2307 	return rt;
2308 }
2309 
2310 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
2311 				     struct net_device *dev,
2312 				     unsigned int pref)
2313 {
2314 	struct fib6_config cfg = {
2315 		.fc_table	= RT6_TABLE_DFLT,
2316 		.fc_metric	= IP6_RT_PRIO_USER,
2317 		.fc_ifindex	= dev->ifindex,
2318 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2319 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
2320 		.fc_nlinfo.portid = 0,
2321 		.fc_nlinfo.nlh = NULL,
2322 		.fc_nlinfo.nl_net = dev_net(dev),
2323 	};
2324 
2325 	cfg.fc_gateway = *gwaddr;
2326 
2327 	ip6_route_add(&cfg);
2328 
2329 	return rt6_get_dflt_router(gwaddr, dev);
2330 }
2331 
2332 void rt6_purge_dflt_routers(struct net *net)
2333 {
2334 	struct rt6_info *rt;
2335 	struct fib6_table *table;
2336 
2337 	/* NOTE: Keep consistent with rt6_get_dflt_router */
2338 	table = fib6_get_table(net, RT6_TABLE_DFLT);
2339 	if (!table)
2340 		return;
2341 
2342 restart:
2343 	read_lock_bh(&table->tb6_lock);
2344 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2345 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2346 		    (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
2347 			dst_hold(&rt->dst);
2348 			read_unlock_bh(&table->tb6_lock);
2349 			ip6_del_rt(rt);
2350 			goto restart;
2351 		}
2352 	}
2353 	read_unlock_bh(&table->tb6_lock);
2354 }
2355 
2356 static void rtmsg_to_fib6_config(struct net *net,
2357 				 struct in6_rtmsg *rtmsg,
2358 				 struct fib6_config *cfg)
2359 {
2360 	memset(cfg, 0, sizeof(*cfg));
2361 
2362 	cfg->fc_table = RT6_TABLE_MAIN;
2363 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2364 	cfg->fc_metric = rtmsg->rtmsg_metric;
2365 	cfg->fc_expires = rtmsg->rtmsg_info;
2366 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2367 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
2368 	cfg->fc_flags = rtmsg->rtmsg_flags;
2369 
2370 	cfg->fc_nlinfo.nl_net = net;
2371 
2372 	cfg->fc_dst = rtmsg->rtmsg_dst;
2373 	cfg->fc_src = rtmsg->rtmsg_src;
2374 	cfg->fc_gateway = rtmsg->rtmsg_gateway;
2375 }
2376 
2377 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2378 {
2379 	struct fib6_config cfg;
2380 	struct in6_rtmsg rtmsg;
2381 	int err;
2382 
2383 	switch (cmd) {
2384 	case SIOCADDRT:		/* Add a route */
2385 	case SIOCDELRT:		/* Delete a route */
2386 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2387 			return -EPERM;
2388 		err = copy_from_user(&rtmsg, arg,
2389 				     sizeof(struct in6_rtmsg));
2390 		if (err)
2391 			return -EFAULT;
2392 
2393 		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2394 
2395 		rtnl_lock();
2396 		switch (cmd) {
2397 		case SIOCADDRT:
2398 			err = ip6_route_add(&cfg);
2399 			break;
2400 		case SIOCDELRT:
2401 			err = ip6_route_del(&cfg);
2402 			break;
2403 		default:
2404 			err = -EINVAL;
2405 		}
2406 		rtnl_unlock();
2407 
2408 		return err;
2409 	}
2410 
2411 	return -EINVAL;
2412 }
2413 
2414 /*
2415  *	Drop the packet on the floor
2416  */
2417 
2418 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2419 {
2420 	int type;
2421 	struct dst_entry *dst = skb_dst(skb);
2422 	switch (ipstats_mib_noroutes) {
2423 	case IPSTATS_MIB_INNOROUTES:
2424 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2425 		if (type == IPV6_ADDR_ANY) {
2426 			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2427 				      IPSTATS_MIB_INADDRERRORS);
2428 			break;
2429 		}
2430 		/* FALLTHROUGH */
2431 	case IPSTATS_MIB_OUTNOROUTES:
2432 		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2433 			      ipstats_mib_noroutes);
2434 		break;
2435 	}
2436 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2437 	kfree_skb(skb);
2438 	return 0;
2439 }
2440 
2441 static int ip6_pkt_discard(struct sk_buff *skb)
2442 {
2443 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2444 }
2445 
2446 static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb)
2447 {
2448 	skb->dev = skb_dst(skb)->dev;
2449 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2450 }
2451 
2452 static int ip6_pkt_prohibit(struct sk_buff *skb)
2453 {
2454 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2455 }
2456 
2457 static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb)
2458 {
2459 	skb->dev = skb_dst(skb)->dev;
2460 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2461 }
2462 
2463 /*
2464  *	Allocate a dst for local (unicast / anycast) address.
2465  */
2466 
2467 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2468 				    const struct in6_addr *addr,
2469 				    bool anycast)
2470 {
2471 	struct net *net = dev_net(idev->dev);
2472 	struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
2473 					    DST_NOCOUNT);
2474 	if (!rt)
2475 		return ERR_PTR(-ENOMEM);
2476 
2477 	in6_dev_hold(idev);
2478 
2479 	rt->dst.flags |= DST_HOST;
2480 	rt->dst.input = ip6_input;
2481 	rt->dst.output = ip6_output;
2482 	rt->rt6i_idev = idev;
2483 
2484 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2485 	if (anycast)
2486 		rt->rt6i_flags |= RTF_ANYCAST;
2487 	else
2488 		rt->rt6i_flags |= RTF_LOCAL;
2489 
2490 	rt->rt6i_gateway  = *addr;
2491 	rt->rt6i_dst.addr = *addr;
2492 	rt->rt6i_dst.plen = 128;
2493 	rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2494 
2495 	atomic_set(&rt->dst.__refcnt, 1);
2496 
2497 	return rt;
2498 }
2499 
2500 int ip6_route_get_saddr(struct net *net,
2501 			struct rt6_info *rt,
2502 			const struct in6_addr *daddr,
2503 			unsigned int prefs,
2504 			struct in6_addr *saddr)
2505 {
2506 	struct inet6_dev *idev =
2507 		rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
2508 	int err = 0;
2509 	if (rt && rt->rt6i_prefsrc.plen)
2510 		*saddr = rt->rt6i_prefsrc.addr;
2511 	else
2512 		err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2513 					 daddr, prefs, saddr);
2514 	return err;
2515 }
2516 
2517 /* remove deleted ip from prefsrc entries */
2518 struct arg_dev_net_ip {
2519 	struct net_device *dev;
2520 	struct net *net;
2521 	struct in6_addr *addr;
2522 };
2523 
2524 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2525 {
2526 	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2527 	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2528 	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2529 
2530 	if (((void *)rt->dst.dev == dev || !dev) &&
2531 	    rt != net->ipv6.ip6_null_entry &&
2532 	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2533 		/* remove prefsrc entry */
2534 		rt->rt6i_prefsrc.plen = 0;
2535 	}
2536 	return 0;
2537 }
2538 
2539 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2540 {
2541 	struct net *net = dev_net(ifp->idev->dev);
2542 	struct arg_dev_net_ip adni = {
2543 		.dev = ifp->idev->dev,
2544 		.net = net,
2545 		.addr = &ifp->addr,
2546 	};
2547 	fib6_clean_all(net, fib6_remove_prefsrc, &adni);
2548 }
2549 
2550 #define RTF_RA_ROUTER		(RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2551 #define RTF_CACHE_GATEWAY	(RTF_GATEWAY | RTF_CACHE)
2552 
2553 /* Remove routers and update dst entries when gateway turn into host. */
2554 static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2555 {
2556 	struct in6_addr *gateway = (struct in6_addr *)arg;
2557 
2558 	if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2559 	     ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2560 	     ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2561 		return -1;
2562 	}
2563 	return 0;
2564 }
2565 
2566 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2567 {
2568 	fib6_clean_all(net, fib6_clean_tohost, gateway);
2569 }
2570 
2571 struct arg_dev_net {
2572 	struct net_device *dev;
2573 	struct net *net;
2574 };
2575 
2576 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2577 {
2578 	const struct arg_dev_net *adn = arg;
2579 	const struct net_device *dev = adn->dev;
2580 
2581 	if ((rt->dst.dev == dev || !dev) &&
2582 	    rt != adn->net->ipv6.ip6_null_entry)
2583 		return -1;
2584 
2585 	return 0;
2586 }
2587 
2588 void rt6_ifdown(struct net *net, struct net_device *dev)
2589 {
2590 	struct arg_dev_net adn = {
2591 		.dev = dev,
2592 		.net = net,
2593 	};
2594 
2595 	fib6_clean_all(net, fib6_ifdown, &adn);
2596 	icmp6_clean_all(fib6_ifdown, &adn);
2597 	rt6_uncached_list_flush_dev(net, dev);
2598 }
2599 
2600 struct rt6_mtu_change_arg {
2601 	struct net_device *dev;
2602 	unsigned int mtu;
2603 };
2604 
2605 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2606 {
2607 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2608 	struct inet6_dev *idev;
2609 
2610 	/* In IPv6 pmtu discovery is not optional,
2611 	   so that RTAX_MTU lock cannot disable it.
2612 	   We still use this lock to block changes
2613 	   caused by addrconf/ndisc.
2614 	*/
2615 
2616 	idev = __in6_dev_get(arg->dev);
2617 	if (!idev)
2618 		return 0;
2619 
2620 	/* For administrative MTU increase, there is no way to discover
2621 	   IPv6 PMTU increase, so PMTU increase should be updated here.
2622 	   Since RFC 1981 doesn't include administrative MTU increase
2623 	   update PMTU increase is a MUST. (i.e. jumbo frame)
2624 	 */
2625 	/*
2626 	   If new MTU is less than route PMTU, this new MTU will be the
2627 	   lowest MTU in the path, update the route PMTU to reflect PMTU
2628 	   decreases; if new MTU is greater than route PMTU, and the
2629 	   old MTU is the lowest MTU in the path, update the route PMTU
2630 	   to reflect the increase. In this case if the other nodes' MTU
2631 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
2632 	   PMTU discouvery.
2633 	 */
2634 	if (rt->dst.dev == arg->dev &&
2635 	    !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2636 		if (rt->rt6i_flags & RTF_CACHE) {
2637 			/* For RTF_CACHE with rt6i_pmtu == 0
2638 			 * (i.e. a redirected route),
2639 			 * the metrics of its rt->dst.from has already
2640 			 * been updated.
2641 			 */
2642 			if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2643 				rt->rt6i_pmtu = arg->mtu;
2644 		} else if (dst_mtu(&rt->dst) >= arg->mtu ||
2645 			   (dst_mtu(&rt->dst) < arg->mtu &&
2646 			    dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2647 			dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2648 		}
2649 	}
2650 	return 0;
2651 }
2652 
2653 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2654 {
2655 	struct rt6_mtu_change_arg arg = {
2656 		.dev = dev,
2657 		.mtu = mtu,
2658 	};
2659 
2660 	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
2661 }
2662 
2663 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2664 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2665 	[RTA_OIF]               = { .type = NLA_U32 },
2666 	[RTA_IIF]		= { .type = NLA_U32 },
2667 	[RTA_PRIORITY]          = { .type = NLA_U32 },
2668 	[RTA_METRICS]           = { .type = NLA_NESTED },
2669 	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
2670 	[RTA_PREF]              = { .type = NLA_U8 },
2671 	[RTA_ENCAP_TYPE]	= { .type = NLA_U16 },
2672 	[RTA_ENCAP]		= { .type = NLA_NESTED },
2673 };
2674 
2675 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2676 			      struct fib6_config *cfg)
2677 {
2678 	struct rtmsg *rtm;
2679 	struct nlattr *tb[RTA_MAX+1];
2680 	unsigned int pref;
2681 	int err;
2682 
2683 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2684 	if (err < 0)
2685 		goto errout;
2686 
2687 	err = -EINVAL;
2688 	rtm = nlmsg_data(nlh);
2689 	memset(cfg, 0, sizeof(*cfg));
2690 
2691 	cfg->fc_table = rtm->rtm_table;
2692 	cfg->fc_dst_len = rtm->rtm_dst_len;
2693 	cfg->fc_src_len = rtm->rtm_src_len;
2694 	cfg->fc_flags = RTF_UP;
2695 	cfg->fc_protocol = rtm->rtm_protocol;
2696 	cfg->fc_type = rtm->rtm_type;
2697 
2698 	if (rtm->rtm_type == RTN_UNREACHABLE ||
2699 	    rtm->rtm_type == RTN_BLACKHOLE ||
2700 	    rtm->rtm_type == RTN_PROHIBIT ||
2701 	    rtm->rtm_type == RTN_THROW)
2702 		cfg->fc_flags |= RTF_REJECT;
2703 
2704 	if (rtm->rtm_type == RTN_LOCAL)
2705 		cfg->fc_flags |= RTF_LOCAL;
2706 
2707 	if (rtm->rtm_flags & RTM_F_CLONED)
2708 		cfg->fc_flags |= RTF_CACHE;
2709 
2710 	cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2711 	cfg->fc_nlinfo.nlh = nlh;
2712 	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2713 
2714 	if (tb[RTA_GATEWAY]) {
2715 		cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
2716 		cfg->fc_flags |= RTF_GATEWAY;
2717 	}
2718 
2719 	if (tb[RTA_DST]) {
2720 		int plen = (rtm->rtm_dst_len + 7) >> 3;
2721 
2722 		if (nla_len(tb[RTA_DST]) < plen)
2723 			goto errout;
2724 
2725 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2726 	}
2727 
2728 	if (tb[RTA_SRC]) {
2729 		int plen = (rtm->rtm_src_len + 7) >> 3;
2730 
2731 		if (nla_len(tb[RTA_SRC]) < plen)
2732 			goto errout;
2733 
2734 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2735 	}
2736 
2737 	if (tb[RTA_PREFSRC])
2738 		cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
2739 
2740 	if (tb[RTA_OIF])
2741 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2742 
2743 	if (tb[RTA_PRIORITY])
2744 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2745 
2746 	if (tb[RTA_METRICS]) {
2747 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2748 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2749 	}
2750 
2751 	if (tb[RTA_TABLE])
2752 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2753 
2754 	if (tb[RTA_MULTIPATH]) {
2755 		cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2756 		cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2757 	}
2758 
2759 	if (tb[RTA_PREF]) {
2760 		pref = nla_get_u8(tb[RTA_PREF]);
2761 		if (pref != ICMPV6_ROUTER_PREF_LOW &&
2762 		    pref != ICMPV6_ROUTER_PREF_HIGH)
2763 			pref = ICMPV6_ROUTER_PREF_MEDIUM;
2764 		cfg->fc_flags |= RTF_PREF(pref);
2765 	}
2766 
2767 	if (tb[RTA_ENCAP])
2768 		cfg->fc_encap = tb[RTA_ENCAP];
2769 
2770 	if (tb[RTA_ENCAP_TYPE])
2771 		cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
2772 
2773 	err = 0;
2774 errout:
2775 	return err;
2776 }
2777 
2778 struct rt6_nh {
2779 	struct rt6_info *rt6_info;
2780 	struct fib6_config r_cfg;
2781 	struct mx6_config mxc;
2782 	struct list_head next;
2783 };
2784 
2785 static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
2786 {
2787 	struct rt6_nh *nh;
2788 
2789 	list_for_each_entry(nh, rt6_nh_list, next) {
2790 		pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6 nexthop %pI6 ifi %d\n",
2791 		        &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
2792 		        nh->r_cfg.fc_ifindex);
2793 	}
2794 }
2795 
2796 static int ip6_route_info_append(struct list_head *rt6_nh_list,
2797 				 struct rt6_info *rt, struct fib6_config *r_cfg)
2798 {
2799 	struct rt6_nh *nh;
2800 	struct rt6_info *rtnh;
2801 	int err = -EEXIST;
2802 
2803 	list_for_each_entry(nh, rt6_nh_list, next) {
2804 		/* check if rt6_info already exists */
2805 		rtnh = nh->rt6_info;
2806 
2807 		if (rtnh->dst.dev == rt->dst.dev &&
2808 		    rtnh->rt6i_idev == rt->rt6i_idev &&
2809 		    ipv6_addr_equal(&rtnh->rt6i_gateway,
2810 				    &rt->rt6i_gateway))
2811 			return err;
2812 	}
2813 
2814 	nh = kzalloc(sizeof(*nh), GFP_KERNEL);
2815 	if (!nh)
2816 		return -ENOMEM;
2817 	nh->rt6_info = rt;
2818 	err = ip6_convert_metrics(&nh->mxc, r_cfg);
2819 	if (err) {
2820 		kfree(nh);
2821 		return err;
2822 	}
2823 	memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
2824 	list_add_tail(&nh->next, rt6_nh_list);
2825 
2826 	return 0;
2827 }
2828 
2829 static int ip6_route_multipath_add(struct fib6_config *cfg)
2830 {
2831 	struct fib6_config r_cfg;
2832 	struct rtnexthop *rtnh;
2833 	struct rt6_info *rt;
2834 	struct rt6_nh *err_nh;
2835 	struct rt6_nh *nh, *nh_safe;
2836 	int remaining;
2837 	int attrlen;
2838 	int err = 1;
2839 	int nhn = 0;
2840 	int replace = (cfg->fc_nlinfo.nlh &&
2841 		       (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
2842 	LIST_HEAD(rt6_nh_list);
2843 
2844 	remaining = cfg->fc_mp_len;
2845 	rtnh = (struct rtnexthop *)cfg->fc_mp;
2846 
2847 	/* Parse a Multipath Entry and build a list (rt6_nh_list) of
2848 	 * rt6_info structs per nexthop
2849 	 */
2850 	while (rtnh_ok(rtnh, remaining)) {
2851 		memcpy(&r_cfg, cfg, sizeof(*cfg));
2852 		if (rtnh->rtnh_ifindex)
2853 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2854 
2855 		attrlen = rtnh_attrlen(rtnh);
2856 		if (attrlen > 0) {
2857 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2858 
2859 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2860 			if (nla) {
2861 				r_cfg.fc_gateway = nla_get_in6_addr(nla);
2862 				r_cfg.fc_flags |= RTF_GATEWAY;
2863 			}
2864 			r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
2865 			nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
2866 			if (nla)
2867 				r_cfg.fc_encap_type = nla_get_u16(nla);
2868 		}
2869 
2870 		err = ip6_route_info_create(&r_cfg, &rt);
2871 		if (err)
2872 			goto cleanup;
2873 
2874 		err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
2875 		if (err) {
2876 			dst_free(&rt->dst);
2877 			goto cleanup;
2878 		}
2879 
2880 		rtnh = rtnh_next(rtnh, &remaining);
2881 	}
2882 
2883 	err_nh = NULL;
2884 	list_for_each_entry(nh, &rt6_nh_list, next) {
2885 		err = __ip6_ins_rt(nh->rt6_info, &cfg->fc_nlinfo, &nh->mxc);
2886 		/* nh->rt6_info is used or freed at this point, reset to NULL*/
2887 		nh->rt6_info = NULL;
2888 		if (err) {
2889 			if (replace && nhn)
2890 				ip6_print_replace_route_err(&rt6_nh_list);
2891 			err_nh = nh;
2892 			goto add_errout;
2893 		}
2894 
2895 		/* Because each route is added like a single route we remove
2896 		 * these flags after the first nexthop: if there is a collision,
2897 		 * we have already failed to add the first nexthop:
2898 		 * fib6_add_rt2node() has rejected it; when replacing, old
2899 		 * nexthops have been replaced by first new, the rest should
2900 		 * be added to it.
2901 		 */
2902 		cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
2903 						     NLM_F_REPLACE);
2904 		nhn++;
2905 	}
2906 
2907 	goto cleanup;
2908 
2909 add_errout:
2910 	/* Delete routes that were already added */
2911 	list_for_each_entry(nh, &rt6_nh_list, next) {
2912 		if (err_nh == nh)
2913 			break;
2914 		ip6_route_del(&nh->r_cfg);
2915 	}
2916 
2917 cleanup:
2918 	list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
2919 		if (nh->rt6_info)
2920 			dst_free(&nh->rt6_info->dst);
2921 		kfree(nh->mxc.mx);
2922 		list_del(&nh->next);
2923 		kfree(nh);
2924 	}
2925 
2926 	return err;
2927 }
2928 
2929 static int ip6_route_multipath_del(struct fib6_config *cfg)
2930 {
2931 	struct fib6_config r_cfg;
2932 	struct rtnexthop *rtnh;
2933 	int remaining;
2934 	int attrlen;
2935 	int err = 1, last_err = 0;
2936 
2937 	remaining = cfg->fc_mp_len;
2938 	rtnh = (struct rtnexthop *)cfg->fc_mp;
2939 
2940 	/* Parse a Multipath Entry */
2941 	while (rtnh_ok(rtnh, remaining)) {
2942 		memcpy(&r_cfg, cfg, sizeof(*cfg));
2943 		if (rtnh->rtnh_ifindex)
2944 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2945 
2946 		attrlen = rtnh_attrlen(rtnh);
2947 		if (attrlen > 0) {
2948 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2949 
2950 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2951 			if (nla) {
2952 				nla_memcpy(&r_cfg.fc_gateway, nla, 16);
2953 				r_cfg.fc_flags |= RTF_GATEWAY;
2954 			}
2955 		}
2956 		err = ip6_route_del(&r_cfg);
2957 		if (err)
2958 			last_err = err;
2959 
2960 		rtnh = rtnh_next(rtnh, &remaining);
2961 	}
2962 
2963 	return last_err;
2964 }
2965 
2966 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
2967 {
2968 	struct fib6_config cfg;
2969 	int err;
2970 
2971 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2972 	if (err < 0)
2973 		return err;
2974 
2975 	if (cfg.fc_mp)
2976 		return ip6_route_multipath_del(&cfg);
2977 	else
2978 		return ip6_route_del(&cfg);
2979 }
2980 
2981 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
2982 {
2983 	struct fib6_config cfg;
2984 	int err;
2985 
2986 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2987 	if (err < 0)
2988 		return err;
2989 
2990 	if (cfg.fc_mp)
2991 		return ip6_route_multipath_add(&cfg);
2992 	else
2993 		return ip6_route_add(&cfg);
2994 }
2995 
2996 static inline size_t rt6_nlmsg_size(struct rt6_info *rt)
2997 {
2998 	return NLMSG_ALIGN(sizeof(struct rtmsg))
2999 	       + nla_total_size(16) /* RTA_SRC */
3000 	       + nla_total_size(16) /* RTA_DST */
3001 	       + nla_total_size(16) /* RTA_GATEWAY */
3002 	       + nla_total_size(16) /* RTA_PREFSRC */
3003 	       + nla_total_size(4) /* RTA_TABLE */
3004 	       + nla_total_size(4) /* RTA_IIF */
3005 	       + nla_total_size(4) /* RTA_OIF */
3006 	       + nla_total_size(4) /* RTA_PRIORITY */
3007 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
3008 	       + nla_total_size(sizeof(struct rta_cacheinfo))
3009 	       + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
3010 	       + nla_total_size(1) /* RTA_PREF */
3011 	       + lwtunnel_get_encap_size(rt->dst.lwtstate);
3012 }
3013 
3014 static int rt6_fill_node(struct net *net,
3015 			 struct sk_buff *skb, struct rt6_info *rt,
3016 			 struct in6_addr *dst, struct in6_addr *src,
3017 			 int iif, int type, u32 portid, u32 seq,
3018 			 int prefix, int nowait, unsigned int flags)
3019 {
3020 	u32 metrics[RTAX_MAX];
3021 	struct rtmsg *rtm;
3022 	struct nlmsghdr *nlh;
3023 	long expires;
3024 	u32 table;
3025 
3026 	if (prefix) {	/* user wants prefix routes only */
3027 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
3028 			/* success since this is not a prefix route */
3029 			return 1;
3030 		}
3031 	}
3032 
3033 	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
3034 	if (!nlh)
3035 		return -EMSGSIZE;
3036 
3037 	rtm = nlmsg_data(nlh);
3038 	rtm->rtm_family = AF_INET6;
3039 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
3040 	rtm->rtm_src_len = rt->rt6i_src.plen;
3041 	rtm->rtm_tos = 0;
3042 	if (rt->rt6i_table)
3043 		table = rt->rt6i_table->tb6_id;
3044 	else
3045 		table = RT6_TABLE_UNSPEC;
3046 	rtm->rtm_table = table;
3047 	if (nla_put_u32(skb, RTA_TABLE, table))
3048 		goto nla_put_failure;
3049 	if (rt->rt6i_flags & RTF_REJECT) {
3050 		switch (rt->dst.error) {
3051 		case -EINVAL:
3052 			rtm->rtm_type = RTN_BLACKHOLE;
3053 			break;
3054 		case -EACCES:
3055 			rtm->rtm_type = RTN_PROHIBIT;
3056 			break;
3057 		case -EAGAIN:
3058 			rtm->rtm_type = RTN_THROW;
3059 			break;
3060 		default:
3061 			rtm->rtm_type = RTN_UNREACHABLE;
3062 			break;
3063 		}
3064 	}
3065 	else if (rt->rt6i_flags & RTF_LOCAL)
3066 		rtm->rtm_type = RTN_LOCAL;
3067 	else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
3068 		rtm->rtm_type = RTN_LOCAL;
3069 	else
3070 		rtm->rtm_type = RTN_UNICAST;
3071 	rtm->rtm_flags = 0;
3072 	if (!netif_carrier_ok(rt->dst.dev)) {
3073 		rtm->rtm_flags |= RTNH_F_LINKDOWN;
3074 		if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
3075 			rtm->rtm_flags |= RTNH_F_DEAD;
3076 	}
3077 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
3078 	rtm->rtm_protocol = rt->rt6i_protocol;
3079 	if (rt->rt6i_flags & RTF_DYNAMIC)
3080 		rtm->rtm_protocol = RTPROT_REDIRECT;
3081 	else if (rt->rt6i_flags & RTF_ADDRCONF) {
3082 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
3083 			rtm->rtm_protocol = RTPROT_RA;
3084 		else
3085 			rtm->rtm_protocol = RTPROT_KERNEL;
3086 	}
3087 
3088 	if (rt->rt6i_flags & RTF_CACHE)
3089 		rtm->rtm_flags |= RTM_F_CLONED;
3090 
3091 	if (dst) {
3092 		if (nla_put_in6_addr(skb, RTA_DST, dst))
3093 			goto nla_put_failure;
3094 		rtm->rtm_dst_len = 128;
3095 	} else if (rtm->rtm_dst_len)
3096 		if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
3097 			goto nla_put_failure;
3098 #ifdef CONFIG_IPV6_SUBTREES
3099 	if (src) {
3100 		if (nla_put_in6_addr(skb, RTA_SRC, src))
3101 			goto nla_put_failure;
3102 		rtm->rtm_src_len = 128;
3103 	} else if (rtm->rtm_src_len &&
3104 		   nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
3105 		goto nla_put_failure;
3106 #endif
3107 	if (iif) {
3108 #ifdef CONFIG_IPV6_MROUTE
3109 		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
3110 			int err = ip6mr_get_route(net, skb, rtm, nowait);
3111 			if (err <= 0) {
3112 				if (!nowait) {
3113 					if (err == 0)
3114 						return 0;
3115 					goto nla_put_failure;
3116 				} else {
3117 					if (err == -EMSGSIZE)
3118 						goto nla_put_failure;
3119 				}
3120 			}
3121 		} else
3122 #endif
3123 			if (nla_put_u32(skb, RTA_IIF, iif))
3124 				goto nla_put_failure;
3125 	} else if (dst) {
3126 		struct in6_addr saddr_buf;
3127 		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
3128 		    nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3129 			goto nla_put_failure;
3130 	}
3131 
3132 	if (rt->rt6i_prefsrc.plen) {
3133 		struct in6_addr saddr_buf;
3134 		saddr_buf = rt->rt6i_prefsrc.addr;
3135 		if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3136 			goto nla_put_failure;
3137 	}
3138 
3139 	memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
3140 	if (rt->rt6i_pmtu)
3141 		metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
3142 	if (rtnetlink_put_metrics(skb, metrics) < 0)
3143 		goto nla_put_failure;
3144 
3145 	if (rt->rt6i_flags & RTF_GATEWAY) {
3146 		if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
3147 			goto nla_put_failure;
3148 	}
3149 
3150 	if (rt->dst.dev &&
3151 	    nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
3152 		goto nla_put_failure;
3153 	if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
3154 		goto nla_put_failure;
3155 
3156 	expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
3157 
3158 	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
3159 		goto nla_put_failure;
3160 
3161 	if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
3162 		goto nla_put_failure;
3163 
3164 	lwtunnel_fill_encap(skb, rt->dst.lwtstate);
3165 
3166 	nlmsg_end(skb, nlh);
3167 	return 0;
3168 
3169 nla_put_failure:
3170 	nlmsg_cancel(skb, nlh);
3171 	return -EMSGSIZE;
3172 }
3173 
3174 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
3175 {
3176 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
3177 	int prefix;
3178 
3179 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
3180 		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
3181 		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
3182 	} else
3183 		prefix = 0;
3184 
3185 	return rt6_fill_node(arg->net,
3186 		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
3187 		     NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
3188 		     prefix, 0, NLM_F_MULTI);
3189 }
3190 
3191 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
3192 {
3193 	struct net *net = sock_net(in_skb->sk);
3194 	struct nlattr *tb[RTA_MAX+1];
3195 	struct rt6_info *rt;
3196 	struct sk_buff *skb;
3197 	struct rtmsg *rtm;
3198 	struct flowi6 fl6;
3199 	int err, iif = 0, oif = 0;
3200 
3201 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
3202 	if (err < 0)
3203 		goto errout;
3204 
3205 	err = -EINVAL;
3206 	memset(&fl6, 0, sizeof(fl6));
3207 
3208 	if (tb[RTA_SRC]) {
3209 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
3210 			goto errout;
3211 
3212 		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
3213 	}
3214 
3215 	if (tb[RTA_DST]) {
3216 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
3217 			goto errout;
3218 
3219 		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
3220 	}
3221 
3222 	if (tb[RTA_IIF])
3223 		iif = nla_get_u32(tb[RTA_IIF]);
3224 
3225 	if (tb[RTA_OIF])
3226 		oif = nla_get_u32(tb[RTA_OIF]);
3227 
3228 	if (tb[RTA_MARK])
3229 		fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
3230 
3231 	if (iif) {
3232 		struct net_device *dev;
3233 		int flags = 0;
3234 
3235 		dev = __dev_get_by_index(net, iif);
3236 		if (!dev) {
3237 			err = -ENODEV;
3238 			goto errout;
3239 		}
3240 
3241 		fl6.flowi6_iif = iif;
3242 
3243 		if (!ipv6_addr_any(&fl6.saddr))
3244 			flags |= RT6_LOOKUP_F_HAS_SADDR;
3245 
3246 		rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
3247 							       flags);
3248 	} else {
3249 		fl6.flowi6_oif = oif;
3250 
3251 		rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
3252 	}
3253 
3254 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
3255 	if (!skb) {
3256 		ip6_rt_put(rt);
3257 		err = -ENOBUFS;
3258 		goto errout;
3259 	}
3260 
3261 	/* Reserve room for dummy headers, this skb can pass
3262 	   through good chunk of routing engine.
3263 	 */
3264 	skb_reset_mac_header(skb);
3265 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
3266 
3267 	skb_dst_set(skb, &rt->dst);
3268 
3269 	err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
3270 			    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
3271 			    nlh->nlmsg_seq, 0, 0, 0);
3272 	if (err < 0) {
3273 		kfree_skb(skb);
3274 		goto errout;
3275 	}
3276 
3277 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
3278 errout:
3279 	return err;
3280 }
3281 
3282 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
3283 {
3284 	struct sk_buff *skb;
3285 	struct net *net = info->nl_net;
3286 	u32 seq;
3287 	int err;
3288 
3289 	err = -ENOBUFS;
3290 	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3291 
3292 	skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3293 	if (!skb)
3294 		goto errout;
3295 
3296 	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
3297 				event, info->portid, seq, 0, 0, 0);
3298 	if (err < 0) {
3299 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
3300 		WARN_ON(err == -EMSGSIZE);
3301 		kfree_skb(skb);
3302 		goto errout;
3303 	}
3304 	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
3305 		    info->nlh, gfp_any());
3306 	return;
3307 errout:
3308 	if (err < 0)
3309 		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
3310 }
3311 
3312 static int ip6_route_dev_notify(struct notifier_block *this,
3313 				unsigned long event, void *ptr)
3314 {
3315 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
3316 	struct net *net = dev_net(dev);
3317 
3318 	if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
3319 		net->ipv6.ip6_null_entry->dst.dev = dev;
3320 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
3321 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3322 		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
3323 		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
3324 		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
3325 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
3326 #endif
3327 	}
3328 
3329 	return NOTIFY_OK;
3330 }
3331 
3332 /*
3333  *	/proc
3334  */
3335 
3336 #ifdef CONFIG_PROC_FS
3337 
3338 static const struct file_operations ipv6_route_proc_fops = {
3339 	.owner		= THIS_MODULE,
3340 	.open		= ipv6_route_open,
3341 	.read		= seq_read,
3342 	.llseek		= seq_lseek,
3343 	.release	= seq_release_net,
3344 };
3345 
3346 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
3347 {
3348 	struct net *net = (struct net *)seq->private;
3349 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
3350 		   net->ipv6.rt6_stats->fib_nodes,
3351 		   net->ipv6.rt6_stats->fib_route_nodes,
3352 		   net->ipv6.rt6_stats->fib_rt_alloc,
3353 		   net->ipv6.rt6_stats->fib_rt_entries,
3354 		   net->ipv6.rt6_stats->fib_rt_cache,
3355 		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
3356 		   net->ipv6.rt6_stats->fib_discarded_routes);
3357 
3358 	return 0;
3359 }
3360 
3361 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
3362 {
3363 	return single_open_net(inode, file, rt6_stats_seq_show);
3364 }
3365 
3366 static const struct file_operations rt6_stats_seq_fops = {
3367 	.owner	 = THIS_MODULE,
3368 	.open	 = rt6_stats_seq_open,
3369 	.read	 = seq_read,
3370 	.llseek	 = seq_lseek,
3371 	.release = single_release_net,
3372 };
3373 #endif	/* CONFIG_PROC_FS */
3374 
3375 #ifdef CONFIG_SYSCTL
3376 
3377 static
3378 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
3379 			      void __user *buffer, size_t *lenp, loff_t *ppos)
3380 {
3381 	struct net *net;
3382 	int delay;
3383 	if (!write)
3384 		return -EINVAL;
3385 
3386 	net = (struct net *)ctl->extra1;
3387 	delay = net->ipv6.sysctl.flush_delay;
3388 	proc_dointvec(ctl, write, buffer, lenp, ppos);
3389 	fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
3390 	return 0;
3391 }
3392 
3393 struct ctl_table ipv6_route_table_template[] = {
3394 	{
3395 		.procname	=	"flush",
3396 		.data		=	&init_net.ipv6.sysctl.flush_delay,
3397 		.maxlen		=	sizeof(int),
3398 		.mode		=	0200,
3399 		.proc_handler	=	ipv6_sysctl_rtcache_flush
3400 	},
3401 	{
3402 		.procname	=	"gc_thresh",
3403 		.data		=	&ip6_dst_ops_template.gc_thresh,
3404 		.maxlen		=	sizeof(int),
3405 		.mode		=	0644,
3406 		.proc_handler	=	proc_dointvec,
3407 	},
3408 	{
3409 		.procname	=	"max_size",
3410 		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
3411 		.maxlen		=	sizeof(int),
3412 		.mode		=	0644,
3413 		.proc_handler	=	proc_dointvec,
3414 	},
3415 	{
3416 		.procname	=	"gc_min_interval",
3417 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3418 		.maxlen		=	sizeof(int),
3419 		.mode		=	0644,
3420 		.proc_handler	=	proc_dointvec_jiffies,
3421 	},
3422 	{
3423 		.procname	=	"gc_timeout",
3424 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
3425 		.maxlen		=	sizeof(int),
3426 		.mode		=	0644,
3427 		.proc_handler	=	proc_dointvec_jiffies,
3428 	},
3429 	{
3430 		.procname	=	"gc_interval",
3431 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
3432 		.maxlen		=	sizeof(int),
3433 		.mode		=	0644,
3434 		.proc_handler	=	proc_dointvec_jiffies,
3435 	},
3436 	{
3437 		.procname	=	"gc_elasticity",
3438 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
3439 		.maxlen		=	sizeof(int),
3440 		.mode		=	0644,
3441 		.proc_handler	=	proc_dointvec,
3442 	},
3443 	{
3444 		.procname	=	"mtu_expires",
3445 		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
3446 		.maxlen		=	sizeof(int),
3447 		.mode		=	0644,
3448 		.proc_handler	=	proc_dointvec_jiffies,
3449 	},
3450 	{
3451 		.procname	=	"min_adv_mss",
3452 		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
3453 		.maxlen		=	sizeof(int),
3454 		.mode		=	0644,
3455 		.proc_handler	=	proc_dointvec,
3456 	},
3457 	{
3458 		.procname	=	"gc_min_interval_ms",
3459 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3460 		.maxlen		=	sizeof(int),
3461 		.mode		=	0644,
3462 		.proc_handler	=	proc_dointvec_ms_jiffies,
3463 	},
3464 	{ }
3465 };
3466 
3467 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
3468 {
3469 	struct ctl_table *table;
3470 
3471 	table = kmemdup(ipv6_route_table_template,
3472 			sizeof(ipv6_route_table_template),
3473 			GFP_KERNEL);
3474 
3475 	if (table) {
3476 		table[0].data = &net->ipv6.sysctl.flush_delay;
3477 		table[0].extra1 = net;
3478 		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
3479 		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3480 		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3481 		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3482 		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3483 		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3484 		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3485 		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
3486 		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3487 
3488 		/* Don't export sysctls to unprivileged users */
3489 		if (net->user_ns != &init_user_ns)
3490 			table[0].procname = NULL;
3491 	}
3492 
3493 	return table;
3494 }
3495 #endif
3496 
3497 static int __net_init ip6_route_net_init(struct net *net)
3498 {
3499 	int ret = -ENOMEM;
3500 
3501 	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3502 	       sizeof(net->ipv6.ip6_dst_ops));
3503 
3504 	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3505 		goto out_ip6_dst_ops;
3506 
3507 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3508 					   sizeof(*net->ipv6.ip6_null_entry),
3509 					   GFP_KERNEL);
3510 	if (!net->ipv6.ip6_null_entry)
3511 		goto out_ip6_dst_entries;
3512 	net->ipv6.ip6_null_entry->dst.path =
3513 		(struct dst_entry *)net->ipv6.ip6_null_entry;
3514 	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3515 	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3516 			 ip6_template_metrics, true);
3517 
3518 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3519 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3520 					       sizeof(*net->ipv6.ip6_prohibit_entry),
3521 					       GFP_KERNEL);
3522 	if (!net->ipv6.ip6_prohibit_entry)
3523 		goto out_ip6_null_entry;
3524 	net->ipv6.ip6_prohibit_entry->dst.path =
3525 		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3526 	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3527 	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3528 			 ip6_template_metrics, true);
3529 
3530 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3531 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
3532 					       GFP_KERNEL);
3533 	if (!net->ipv6.ip6_blk_hole_entry)
3534 		goto out_ip6_prohibit_entry;
3535 	net->ipv6.ip6_blk_hole_entry->dst.path =
3536 		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3537 	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3538 	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3539 			 ip6_template_metrics, true);
3540 #endif
3541 
3542 	net->ipv6.sysctl.flush_delay = 0;
3543 	net->ipv6.sysctl.ip6_rt_max_size = 4096;
3544 	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3545 	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3546 	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3547 	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3548 	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3549 	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3550 
3551 	net->ipv6.ip6_rt_gc_expire = 30*HZ;
3552 
3553 	ret = 0;
3554 out:
3555 	return ret;
3556 
3557 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3558 out_ip6_prohibit_entry:
3559 	kfree(net->ipv6.ip6_prohibit_entry);
3560 out_ip6_null_entry:
3561 	kfree(net->ipv6.ip6_null_entry);
3562 #endif
3563 out_ip6_dst_entries:
3564 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3565 out_ip6_dst_ops:
3566 	goto out;
3567 }
3568 
3569 static void __net_exit ip6_route_net_exit(struct net *net)
3570 {
3571 	kfree(net->ipv6.ip6_null_entry);
3572 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3573 	kfree(net->ipv6.ip6_prohibit_entry);
3574 	kfree(net->ipv6.ip6_blk_hole_entry);
3575 #endif
3576 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3577 }
3578 
3579 static int __net_init ip6_route_net_init_late(struct net *net)
3580 {
3581 #ifdef CONFIG_PROC_FS
3582 	proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3583 	proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3584 #endif
3585 	return 0;
3586 }
3587 
3588 static void __net_exit ip6_route_net_exit_late(struct net *net)
3589 {
3590 #ifdef CONFIG_PROC_FS
3591 	remove_proc_entry("ipv6_route", net->proc_net);
3592 	remove_proc_entry("rt6_stats", net->proc_net);
3593 #endif
3594 }
3595 
3596 static struct pernet_operations ip6_route_net_ops = {
3597 	.init = ip6_route_net_init,
3598 	.exit = ip6_route_net_exit,
3599 };
3600 
3601 static int __net_init ipv6_inetpeer_init(struct net *net)
3602 {
3603 	struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3604 
3605 	if (!bp)
3606 		return -ENOMEM;
3607 	inet_peer_base_init(bp);
3608 	net->ipv6.peers = bp;
3609 	return 0;
3610 }
3611 
3612 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3613 {
3614 	struct inet_peer_base *bp = net->ipv6.peers;
3615 
3616 	net->ipv6.peers = NULL;
3617 	inetpeer_invalidate_tree(bp);
3618 	kfree(bp);
3619 }
3620 
3621 static struct pernet_operations ipv6_inetpeer_ops = {
3622 	.init	=	ipv6_inetpeer_init,
3623 	.exit	=	ipv6_inetpeer_exit,
3624 };
3625 
3626 static struct pernet_operations ip6_route_net_late_ops = {
3627 	.init = ip6_route_net_init_late,
3628 	.exit = ip6_route_net_exit_late,
3629 };
3630 
3631 static struct notifier_block ip6_route_dev_notifier = {
3632 	.notifier_call = ip6_route_dev_notify,
3633 	.priority = 0,
3634 };
3635 
3636 int __init ip6_route_init(void)
3637 {
3638 	int ret;
3639 	int cpu;
3640 
3641 	ret = -ENOMEM;
3642 	ip6_dst_ops_template.kmem_cachep =
3643 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3644 				  SLAB_HWCACHE_ALIGN, NULL);
3645 	if (!ip6_dst_ops_template.kmem_cachep)
3646 		goto out;
3647 
3648 	ret = dst_entries_init(&ip6_dst_blackhole_ops);
3649 	if (ret)
3650 		goto out_kmem_cache;
3651 
3652 	ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3653 	if (ret)
3654 		goto out_dst_entries;
3655 
3656 	ret = register_pernet_subsys(&ip6_route_net_ops);
3657 	if (ret)
3658 		goto out_register_inetpeer;
3659 
3660 	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3661 
3662 	/* Registering of the loopback is done before this portion of code,
3663 	 * the loopback reference in rt6_info will not be taken, do it
3664 	 * manually for init_net */
3665 	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3666 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3667   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3668 	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3669 	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3670 	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3671 	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3672   #endif
3673 	ret = fib6_init();
3674 	if (ret)
3675 		goto out_register_subsys;
3676 
3677 	ret = xfrm6_init();
3678 	if (ret)
3679 		goto out_fib6_init;
3680 
3681 	ret = fib6_rules_init();
3682 	if (ret)
3683 		goto xfrm6_init;
3684 
3685 	ret = register_pernet_subsys(&ip6_route_net_late_ops);
3686 	if (ret)
3687 		goto fib6_rules_init;
3688 
3689 	ret = -ENOBUFS;
3690 	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3691 	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3692 	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3693 		goto out_register_late_subsys;
3694 
3695 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3696 	if (ret)
3697 		goto out_register_late_subsys;
3698 
3699 	for_each_possible_cpu(cpu) {
3700 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
3701 
3702 		INIT_LIST_HEAD(&ul->head);
3703 		spin_lock_init(&ul->lock);
3704 	}
3705 
3706 out:
3707 	return ret;
3708 
3709 out_register_late_subsys:
3710 	unregister_pernet_subsys(&ip6_route_net_late_ops);
3711 fib6_rules_init:
3712 	fib6_rules_cleanup();
3713 xfrm6_init:
3714 	xfrm6_fini();
3715 out_fib6_init:
3716 	fib6_gc_cleanup();
3717 out_register_subsys:
3718 	unregister_pernet_subsys(&ip6_route_net_ops);
3719 out_register_inetpeer:
3720 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3721 out_dst_entries:
3722 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3723 out_kmem_cache:
3724 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3725 	goto out;
3726 }
3727 
3728 void ip6_route_cleanup(void)
3729 {
3730 	unregister_netdevice_notifier(&ip6_route_dev_notifier);
3731 	unregister_pernet_subsys(&ip6_route_net_late_ops);
3732 	fib6_rules_cleanup();
3733 	xfrm6_fini();
3734 	fib6_gc_cleanup();
3735 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3736 	unregister_pernet_subsys(&ip6_route_net_ops);
3737 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3738 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3739 }
3740