xref: /openbmc/linux/net/ipv6/route.c (revision ab450605b35caa768ca33e86db9403229bf42be4)
1 /*
2  *	Linux INET6 implementation
3  *	FIB front-end.
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13 
14 /*	Changes:
15  *
16  *	YOSHIFUJI Hideaki @USAGI
17  *		reworked default router selection.
18  *		- respect outgoing interface
19  *		- select from (probably) reachable routers (i.e.
20  *		routers in REACHABLE, STALE, DELAY or PROBE states).
21  *		- always select the same router if it is (probably)
22  *		reachable.  otherwise, round-robin the list.
23  *	Ville Nuorvala
24  *		Fixed routing subtrees.
25  */
26 
27 #define pr_fmt(fmt) "IPv6: " fmt
28 
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/xfrm.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60 #include <net/nexthop.h>
61 #include <net/lwtunnel.h>
62 
63 #include <asm/uaccess.h>
64 
65 #ifdef CONFIG_SYSCTL
66 #include <linux/sysctl.h>
67 #endif
68 
69 enum rt6_nud_state {
70 	RT6_NUD_FAIL_HARD = -3,
71 	RT6_NUD_FAIL_PROBE = -2,
72 	RT6_NUD_FAIL_DO_RR = -1,
73 	RT6_NUD_SUCCEED = 1
74 };
75 
76 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
77 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
78 static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
79 static unsigned int	 ip6_mtu(const struct dst_entry *dst);
80 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
81 static void		ip6_dst_destroy(struct dst_entry *);
82 static void		ip6_dst_ifdown(struct dst_entry *,
83 				       struct net_device *dev, int how);
84 static int		 ip6_dst_gc(struct dst_ops *ops);
85 
86 static int		ip6_pkt_discard(struct sk_buff *skb);
87 static int		ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb);
88 static int		ip6_pkt_prohibit(struct sk_buff *skb);
89 static int		ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb);
90 static void		ip6_link_failure(struct sk_buff *skb);
91 static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
92 					   struct sk_buff *skb, u32 mtu);
93 static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
94 					struct sk_buff *skb);
95 static void		rt6_dst_from_metrics_check(struct rt6_info *rt);
96 static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
97 
98 #ifdef CONFIG_IPV6_ROUTE_INFO
99 static struct rt6_info *rt6_add_route_info(struct net *net,
100 					   const struct in6_addr *prefix, int prefixlen,
101 					   const struct in6_addr *gwaddr, int ifindex,
102 					   unsigned int pref);
103 static struct rt6_info *rt6_get_route_info(struct net *net,
104 					   const struct in6_addr *prefix, int prefixlen,
105 					   const struct in6_addr *gwaddr, int ifindex);
106 #endif
107 
108 struct uncached_list {
109 	spinlock_t		lock;
110 	struct list_head	head;
111 };
112 
113 static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
114 
115 static void rt6_uncached_list_add(struct rt6_info *rt)
116 {
117 	struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
118 
119 	rt->dst.flags |= DST_NOCACHE;
120 	rt->rt6i_uncached_list = ul;
121 
122 	spin_lock_bh(&ul->lock);
123 	list_add_tail(&rt->rt6i_uncached, &ul->head);
124 	spin_unlock_bh(&ul->lock);
125 }
126 
127 static void rt6_uncached_list_del(struct rt6_info *rt)
128 {
129 	if (!list_empty(&rt->rt6i_uncached)) {
130 		struct uncached_list *ul = rt->rt6i_uncached_list;
131 
132 		spin_lock_bh(&ul->lock);
133 		list_del(&rt->rt6i_uncached);
134 		spin_unlock_bh(&ul->lock);
135 	}
136 }
137 
138 static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
139 {
140 	struct net_device *loopback_dev = net->loopback_dev;
141 	int cpu;
142 
143 	for_each_possible_cpu(cpu) {
144 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
145 		struct rt6_info *rt;
146 
147 		spin_lock_bh(&ul->lock);
148 		list_for_each_entry(rt, &ul->head, rt6i_uncached) {
149 			struct inet6_dev *rt_idev = rt->rt6i_idev;
150 			struct net_device *rt_dev = rt->dst.dev;
151 
152 			if (rt_idev && (rt_idev->dev == dev || !dev) &&
153 			    rt_idev->dev != loopback_dev) {
154 				rt->rt6i_idev = in6_dev_get(loopback_dev);
155 				in6_dev_put(rt_idev);
156 			}
157 
158 			if (rt_dev && (rt_dev == dev || !dev) &&
159 			    rt_dev != loopback_dev) {
160 				rt->dst.dev = loopback_dev;
161 				dev_hold(rt->dst.dev);
162 				dev_put(rt_dev);
163 			}
164 		}
165 		spin_unlock_bh(&ul->lock);
166 	}
167 }
168 
169 static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
170 {
171 	return dst_metrics_write_ptr(rt->dst.from);
172 }
173 
174 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
175 {
176 	struct rt6_info *rt = (struct rt6_info *)dst;
177 
178 	if (rt->rt6i_flags & RTF_PCPU)
179 		return rt6_pcpu_cow_metrics(rt);
180 	else if (rt->rt6i_flags & RTF_CACHE)
181 		return NULL;
182 	else
183 		return dst_cow_metrics_generic(dst, old);
184 }
185 
186 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
187 					     struct sk_buff *skb,
188 					     const void *daddr)
189 {
190 	struct in6_addr *p = &rt->rt6i_gateway;
191 
192 	if (!ipv6_addr_any(p))
193 		return (const void *) p;
194 	else if (skb)
195 		return &ipv6_hdr(skb)->daddr;
196 	return daddr;
197 }
198 
199 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
200 					  struct sk_buff *skb,
201 					  const void *daddr)
202 {
203 	struct rt6_info *rt = (struct rt6_info *) dst;
204 	struct neighbour *n;
205 
206 	daddr = choose_neigh_daddr(rt, skb, daddr);
207 	n = __ipv6_neigh_lookup(dst->dev, daddr);
208 	if (n)
209 		return n;
210 	return neigh_create(&nd_tbl, daddr, dst->dev);
211 }
212 
213 static struct dst_ops ip6_dst_ops_template = {
214 	.family			=	AF_INET6,
215 	.gc			=	ip6_dst_gc,
216 	.gc_thresh		=	1024,
217 	.check			=	ip6_dst_check,
218 	.default_advmss		=	ip6_default_advmss,
219 	.mtu			=	ip6_mtu,
220 	.cow_metrics		=	ipv6_cow_metrics,
221 	.destroy		=	ip6_dst_destroy,
222 	.ifdown			=	ip6_dst_ifdown,
223 	.negative_advice	=	ip6_negative_advice,
224 	.link_failure		=	ip6_link_failure,
225 	.update_pmtu		=	ip6_rt_update_pmtu,
226 	.redirect		=	rt6_do_redirect,
227 	.local_out		=	__ip6_local_out,
228 	.neigh_lookup		=	ip6_neigh_lookup,
229 };
230 
231 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
232 {
233 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
234 
235 	return mtu ? : dst->dev->mtu;
236 }
237 
238 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
239 					 struct sk_buff *skb, u32 mtu)
240 {
241 }
242 
243 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
244 				      struct sk_buff *skb)
245 {
246 }
247 
248 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
249 					 unsigned long old)
250 {
251 	return NULL;
252 }
253 
254 static struct dst_ops ip6_dst_blackhole_ops = {
255 	.family			=	AF_INET6,
256 	.destroy		=	ip6_dst_destroy,
257 	.check			=	ip6_dst_check,
258 	.mtu			=	ip6_blackhole_mtu,
259 	.default_advmss		=	ip6_default_advmss,
260 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
261 	.redirect		=	ip6_rt_blackhole_redirect,
262 	.cow_metrics		=	ip6_rt_blackhole_cow_metrics,
263 	.neigh_lookup		=	ip6_neigh_lookup,
264 };
265 
266 static const u32 ip6_template_metrics[RTAX_MAX] = {
267 	[RTAX_HOPLIMIT - 1] = 0,
268 };
269 
270 static const struct rt6_info ip6_null_entry_template = {
271 	.dst = {
272 		.__refcnt	= ATOMIC_INIT(1),
273 		.__use		= 1,
274 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
275 		.error		= -ENETUNREACH,
276 		.input		= ip6_pkt_discard,
277 		.output		= ip6_pkt_discard_out,
278 	},
279 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
280 	.rt6i_protocol  = RTPROT_KERNEL,
281 	.rt6i_metric	= ~(u32) 0,
282 	.rt6i_ref	= ATOMIC_INIT(1),
283 };
284 
285 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
286 
287 static const struct rt6_info ip6_prohibit_entry_template = {
288 	.dst = {
289 		.__refcnt	= ATOMIC_INIT(1),
290 		.__use		= 1,
291 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
292 		.error		= -EACCES,
293 		.input		= ip6_pkt_prohibit,
294 		.output		= ip6_pkt_prohibit_out,
295 	},
296 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
297 	.rt6i_protocol  = RTPROT_KERNEL,
298 	.rt6i_metric	= ~(u32) 0,
299 	.rt6i_ref	= ATOMIC_INIT(1),
300 };
301 
302 static const struct rt6_info ip6_blk_hole_entry_template = {
303 	.dst = {
304 		.__refcnt	= ATOMIC_INIT(1),
305 		.__use		= 1,
306 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
307 		.error		= -EINVAL,
308 		.input		= dst_discard,
309 		.output		= dst_discard_sk,
310 	},
311 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
312 	.rt6i_protocol  = RTPROT_KERNEL,
313 	.rt6i_metric	= ~(u32) 0,
314 	.rt6i_ref	= ATOMIC_INIT(1),
315 };
316 
317 #endif
318 
319 /* allocate dst with ip6_dst_ops */
320 static struct rt6_info *__ip6_dst_alloc(struct net *net,
321 					struct net_device *dev,
322 					int flags,
323 					struct fib6_table *table)
324 {
325 	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
326 					0, DST_OBSOLETE_FORCE_CHK, flags);
327 
328 	if (rt) {
329 		struct dst_entry *dst = &rt->dst;
330 
331 		memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
332 		INIT_LIST_HEAD(&rt->rt6i_siblings);
333 		INIT_LIST_HEAD(&rt->rt6i_uncached);
334 	}
335 	return rt;
336 }
337 
338 static struct rt6_info *ip6_dst_alloc(struct net *net,
339 				      struct net_device *dev,
340 				      int flags,
341 				      struct fib6_table *table)
342 {
343 	struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags, table);
344 
345 	if (rt) {
346 		rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
347 		if (rt->rt6i_pcpu) {
348 			int cpu;
349 
350 			for_each_possible_cpu(cpu) {
351 				struct rt6_info **p;
352 
353 				p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
354 				/* no one shares rt */
355 				*p =  NULL;
356 			}
357 		} else {
358 			dst_destroy((struct dst_entry *)rt);
359 			return NULL;
360 		}
361 	}
362 
363 	return rt;
364 }
365 
366 static void ip6_dst_destroy(struct dst_entry *dst)
367 {
368 	struct rt6_info *rt = (struct rt6_info *)dst;
369 	struct dst_entry *from = dst->from;
370 	struct inet6_dev *idev;
371 
372 	dst_destroy_metrics_generic(dst);
373 	free_percpu(rt->rt6i_pcpu);
374 	rt6_uncached_list_del(rt);
375 
376 	idev = rt->rt6i_idev;
377 	if (idev) {
378 		rt->rt6i_idev = NULL;
379 		in6_dev_put(idev);
380 	}
381 
382 	dst->from = NULL;
383 	dst_release(from);
384 }
385 
386 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
387 			   int how)
388 {
389 	struct rt6_info *rt = (struct rt6_info *)dst;
390 	struct inet6_dev *idev = rt->rt6i_idev;
391 	struct net_device *loopback_dev =
392 		dev_net(dev)->loopback_dev;
393 
394 	if (dev != loopback_dev) {
395 		if (idev && idev->dev == dev) {
396 			struct inet6_dev *loopback_idev =
397 				in6_dev_get(loopback_dev);
398 			if (loopback_idev) {
399 				rt->rt6i_idev = loopback_idev;
400 				in6_dev_put(idev);
401 			}
402 		}
403 	}
404 }
405 
406 static bool rt6_check_expired(const struct rt6_info *rt)
407 {
408 	if (rt->rt6i_flags & RTF_EXPIRES) {
409 		if (time_after(jiffies, rt->dst.expires))
410 			return true;
411 	} else if (rt->dst.from) {
412 		return rt6_check_expired((struct rt6_info *) rt->dst.from);
413 	}
414 	return false;
415 }
416 
417 /* Multipath route selection:
418  *   Hash based function using packet header and flowlabel.
419  * Adapted from fib_info_hashfn()
420  */
421 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
422 			       const struct flowi6 *fl6)
423 {
424 	unsigned int val = fl6->flowi6_proto;
425 
426 	val ^= ipv6_addr_hash(&fl6->daddr);
427 	val ^= ipv6_addr_hash(&fl6->saddr);
428 
429 	/* Work only if this not encapsulated */
430 	switch (fl6->flowi6_proto) {
431 	case IPPROTO_UDP:
432 	case IPPROTO_TCP:
433 	case IPPROTO_SCTP:
434 		val ^= (__force u16)fl6->fl6_sport;
435 		val ^= (__force u16)fl6->fl6_dport;
436 		break;
437 
438 	case IPPROTO_ICMPV6:
439 		val ^= (__force u16)fl6->fl6_icmp_type;
440 		val ^= (__force u16)fl6->fl6_icmp_code;
441 		break;
442 	}
443 	/* RFC6438 recommands to use flowlabel */
444 	val ^= (__force u32)fl6->flowlabel;
445 
446 	/* Perhaps, we need to tune, this function? */
447 	val = val ^ (val >> 7) ^ (val >> 12);
448 	return val % candidate_count;
449 }
450 
451 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
452 					     struct flowi6 *fl6, int oif,
453 					     int strict)
454 {
455 	struct rt6_info *sibling, *next_sibling;
456 	int route_choosen;
457 
458 	route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
459 	/* Don't change the route, if route_choosen == 0
460 	 * (siblings does not include ourself)
461 	 */
462 	if (route_choosen)
463 		list_for_each_entry_safe(sibling, next_sibling,
464 				&match->rt6i_siblings, rt6i_siblings) {
465 			route_choosen--;
466 			if (route_choosen == 0) {
467 				if (rt6_score_route(sibling, oif, strict) < 0)
468 					break;
469 				match = sibling;
470 				break;
471 			}
472 		}
473 	return match;
474 }
475 
476 /*
477  *	Route lookup. Any table->tb6_lock is implied.
478  */
479 
480 static inline struct rt6_info *rt6_device_match(struct net *net,
481 						    struct rt6_info *rt,
482 						    const struct in6_addr *saddr,
483 						    int oif,
484 						    int flags)
485 {
486 	struct rt6_info *local = NULL;
487 	struct rt6_info *sprt;
488 
489 	if (!oif && ipv6_addr_any(saddr))
490 		goto out;
491 
492 	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
493 		struct net_device *dev = sprt->dst.dev;
494 
495 		if (oif) {
496 			if (dev->ifindex == oif)
497 				return sprt;
498 			if (dev->flags & IFF_LOOPBACK) {
499 				if (!sprt->rt6i_idev ||
500 				    sprt->rt6i_idev->dev->ifindex != oif) {
501 					if (flags & RT6_LOOKUP_F_IFACE && oif)
502 						continue;
503 					if (local && (!oif ||
504 						      local->rt6i_idev->dev->ifindex == oif))
505 						continue;
506 				}
507 				local = sprt;
508 			}
509 		} else {
510 			if (ipv6_chk_addr(net, saddr, dev,
511 					  flags & RT6_LOOKUP_F_IFACE))
512 				return sprt;
513 		}
514 	}
515 
516 	if (oif) {
517 		if (local)
518 			return local;
519 
520 		if (flags & RT6_LOOKUP_F_IFACE)
521 			return net->ipv6.ip6_null_entry;
522 	}
523 out:
524 	return rt;
525 }
526 
527 #ifdef CONFIG_IPV6_ROUTER_PREF
528 struct __rt6_probe_work {
529 	struct work_struct work;
530 	struct in6_addr target;
531 	struct net_device *dev;
532 };
533 
534 static void rt6_probe_deferred(struct work_struct *w)
535 {
536 	struct in6_addr mcaddr;
537 	struct __rt6_probe_work *work =
538 		container_of(w, struct __rt6_probe_work, work);
539 
540 	addrconf_addr_solict_mult(&work->target, &mcaddr);
541 	ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL, NULL);
542 	dev_put(work->dev);
543 	kfree(work);
544 }
545 
546 static void rt6_probe(struct rt6_info *rt)
547 {
548 	struct __rt6_probe_work *work;
549 	struct neighbour *neigh;
550 	/*
551 	 * Okay, this does not seem to be appropriate
552 	 * for now, however, we need to check if it
553 	 * is really so; aka Router Reachability Probing.
554 	 *
555 	 * Router Reachability Probe MUST be rate-limited
556 	 * to no more than one per minute.
557 	 */
558 	if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
559 		return;
560 	rcu_read_lock_bh();
561 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
562 	if (neigh) {
563 		if (neigh->nud_state & NUD_VALID)
564 			goto out;
565 
566 		work = NULL;
567 		write_lock(&neigh->lock);
568 		if (!(neigh->nud_state & NUD_VALID) &&
569 		    time_after(jiffies,
570 			       neigh->updated +
571 			       rt->rt6i_idev->cnf.rtr_probe_interval)) {
572 			work = kmalloc(sizeof(*work), GFP_ATOMIC);
573 			if (work)
574 				__neigh_set_probe_once(neigh);
575 		}
576 		write_unlock(&neigh->lock);
577 	} else {
578 		work = kmalloc(sizeof(*work), GFP_ATOMIC);
579 	}
580 
581 	if (work) {
582 		INIT_WORK(&work->work, rt6_probe_deferred);
583 		work->target = rt->rt6i_gateway;
584 		dev_hold(rt->dst.dev);
585 		work->dev = rt->dst.dev;
586 		schedule_work(&work->work);
587 	}
588 
589 out:
590 	rcu_read_unlock_bh();
591 }
592 #else
593 static inline void rt6_probe(struct rt6_info *rt)
594 {
595 }
596 #endif
597 
598 /*
599  * Default Router Selection (RFC 2461 6.3.6)
600  */
601 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
602 {
603 	struct net_device *dev = rt->dst.dev;
604 	if (!oif || dev->ifindex == oif)
605 		return 2;
606 	if ((dev->flags & IFF_LOOPBACK) &&
607 	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
608 		return 1;
609 	return 0;
610 }
611 
612 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
613 {
614 	struct neighbour *neigh;
615 	enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
616 
617 	if (rt->rt6i_flags & RTF_NONEXTHOP ||
618 	    !(rt->rt6i_flags & RTF_GATEWAY))
619 		return RT6_NUD_SUCCEED;
620 
621 	rcu_read_lock_bh();
622 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
623 	if (neigh) {
624 		read_lock(&neigh->lock);
625 		if (neigh->nud_state & NUD_VALID)
626 			ret = RT6_NUD_SUCCEED;
627 #ifdef CONFIG_IPV6_ROUTER_PREF
628 		else if (!(neigh->nud_state & NUD_FAILED))
629 			ret = RT6_NUD_SUCCEED;
630 		else
631 			ret = RT6_NUD_FAIL_PROBE;
632 #endif
633 		read_unlock(&neigh->lock);
634 	} else {
635 		ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
636 		      RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
637 	}
638 	rcu_read_unlock_bh();
639 
640 	return ret;
641 }
642 
643 static int rt6_score_route(struct rt6_info *rt, int oif,
644 			   int strict)
645 {
646 	int m;
647 
648 	m = rt6_check_dev(rt, oif);
649 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
650 		return RT6_NUD_FAIL_HARD;
651 #ifdef CONFIG_IPV6_ROUTER_PREF
652 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
653 #endif
654 	if (strict & RT6_LOOKUP_F_REACHABLE) {
655 		int n = rt6_check_neigh(rt);
656 		if (n < 0)
657 			return n;
658 	}
659 	return m;
660 }
661 
662 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
663 				   int *mpri, struct rt6_info *match,
664 				   bool *do_rr)
665 {
666 	int m;
667 	bool match_do_rr = false;
668 	struct inet6_dev *idev = rt->rt6i_idev;
669 	struct net_device *dev = rt->dst.dev;
670 
671 	if (dev && !netif_carrier_ok(dev) &&
672 	    idev->cnf.ignore_routes_with_linkdown)
673 		goto out;
674 
675 	if (rt6_check_expired(rt))
676 		goto out;
677 
678 	m = rt6_score_route(rt, oif, strict);
679 	if (m == RT6_NUD_FAIL_DO_RR) {
680 		match_do_rr = true;
681 		m = 0; /* lowest valid score */
682 	} else if (m == RT6_NUD_FAIL_HARD) {
683 		goto out;
684 	}
685 
686 	if (strict & RT6_LOOKUP_F_REACHABLE)
687 		rt6_probe(rt);
688 
689 	/* note that m can be RT6_NUD_FAIL_PROBE at this point */
690 	if (m > *mpri) {
691 		*do_rr = match_do_rr;
692 		*mpri = m;
693 		match = rt;
694 	}
695 out:
696 	return match;
697 }
698 
699 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
700 				     struct rt6_info *rr_head,
701 				     u32 metric, int oif, int strict,
702 				     bool *do_rr)
703 {
704 	struct rt6_info *rt, *match, *cont;
705 	int mpri = -1;
706 
707 	match = NULL;
708 	cont = NULL;
709 	for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
710 		if (rt->rt6i_metric != metric) {
711 			cont = rt;
712 			break;
713 		}
714 
715 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
716 	}
717 
718 	for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
719 		if (rt->rt6i_metric != metric) {
720 			cont = rt;
721 			break;
722 		}
723 
724 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
725 	}
726 
727 	if (match || !cont)
728 		return match;
729 
730 	for (rt = cont; rt; rt = rt->dst.rt6_next)
731 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
732 
733 	return match;
734 }
735 
736 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
737 {
738 	struct rt6_info *match, *rt0;
739 	struct net *net;
740 	bool do_rr = false;
741 
742 	rt0 = fn->rr_ptr;
743 	if (!rt0)
744 		fn->rr_ptr = rt0 = fn->leaf;
745 
746 	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
747 			     &do_rr);
748 
749 	if (do_rr) {
750 		struct rt6_info *next = rt0->dst.rt6_next;
751 
752 		/* no entries matched; do round-robin */
753 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
754 			next = fn->leaf;
755 
756 		if (next != rt0)
757 			fn->rr_ptr = next;
758 	}
759 
760 	net = dev_net(rt0->dst.dev);
761 	return match ? match : net->ipv6.ip6_null_entry;
762 }
763 
764 static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
765 {
766 	return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
767 }
768 
769 #ifdef CONFIG_IPV6_ROUTE_INFO
770 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
771 		  const struct in6_addr *gwaddr)
772 {
773 	struct net *net = dev_net(dev);
774 	struct route_info *rinfo = (struct route_info *) opt;
775 	struct in6_addr prefix_buf, *prefix;
776 	unsigned int pref;
777 	unsigned long lifetime;
778 	struct rt6_info *rt;
779 
780 	if (len < sizeof(struct route_info)) {
781 		return -EINVAL;
782 	}
783 
784 	/* Sanity check for prefix_len and length */
785 	if (rinfo->length > 3) {
786 		return -EINVAL;
787 	} else if (rinfo->prefix_len > 128) {
788 		return -EINVAL;
789 	} else if (rinfo->prefix_len > 64) {
790 		if (rinfo->length < 2) {
791 			return -EINVAL;
792 		}
793 	} else if (rinfo->prefix_len > 0) {
794 		if (rinfo->length < 1) {
795 			return -EINVAL;
796 		}
797 	}
798 
799 	pref = rinfo->route_pref;
800 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
801 		return -EINVAL;
802 
803 	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
804 
805 	if (rinfo->length == 3)
806 		prefix = (struct in6_addr *)rinfo->prefix;
807 	else {
808 		/* this function is safe */
809 		ipv6_addr_prefix(&prefix_buf,
810 				 (struct in6_addr *)rinfo->prefix,
811 				 rinfo->prefix_len);
812 		prefix = &prefix_buf;
813 	}
814 
815 	if (rinfo->prefix_len == 0)
816 		rt = rt6_get_dflt_router(gwaddr, dev);
817 	else
818 		rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
819 					gwaddr, dev->ifindex);
820 
821 	if (rt && !lifetime) {
822 		ip6_del_rt(rt);
823 		rt = NULL;
824 	}
825 
826 	if (!rt && lifetime)
827 		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
828 					pref);
829 	else if (rt)
830 		rt->rt6i_flags = RTF_ROUTEINFO |
831 				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
832 
833 	if (rt) {
834 		if (!addrconf_finite_timeout(lifetime))
835 			rt6_clean_expires(rt);
836 		else
837 			rt6_set_expires(rt, jiffies + HZ * lifetime);
838 
839 		ip6_rt_put(rt);
840 	}
841 	return 0;
842 }
843 #endif
844 
845 static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
846 					struct in6_addr *saddr)
847 {
848 	struct fib6_node *pn;
849 	while (1) {
850 		if (fn->fn_flags & RTN_TL_ROOT)
851 			return NULL;
852 		pn = fn->parent;
853 		if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
854 			fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
855 		else
856 			fn = pn;
857 		if (fn->fn_flags & RTN_RTINFO)
858 			return fn;
859 	}
860 }
861 
862 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
863 					     struct fib6_table *table,
864 					     struct flowi6 *fl6, int flags)
865 {
866 	struct fib6_node *fn;
867 	struct rt6_info *rt;
868 
869 	read_lock_bh(&table->tb6_lock);
870 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
871 restart:
872 	rt = fn->leaf;
873 	rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
874 	if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
875 		rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
876 	if (rt == net->ipv6.ip6_null_entry) {
877 		fn = fib6_backtrack(fn, &fl6->saddr);
878 		if (fn)
879 			goto restart;
880 	}
881 	dst_use(&rt->dst, jiffies);
882 	read_unlock_bh(&table->tb6_lock);
883 	return rt;
884 
885 }
886 
887 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
888 				    int flags)
889 {
890 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
891 }
892 EXPORT_SYMBOL_GPL(ip6_route_lookup);
893 
894 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
895 			    const struct in6_addr *saddr, int oif, int strict)
896 {
897 	struct flowi6 fl6 = {
898 		.flowi6_oif = oif,
899 		.daddr = *daddr,
900 	};
901 	struct dst_entry *dst;
902 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
903 
904 	if (saddr) {
905 		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
906 		flags |= RT6_LOOKUP_F_HAS_SADDR;
907 	}
908 
909 	dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
910 	if (dst->error == 0)
911 		return (struct rt6_info *) dst;
912 
913 	dst_release(dst);
914 
915 	return NULL;
916 }
917 EXPORT_SYMBOL(rt6_lookup);
918 
919 /* ip6_ins_rt is called with FREE table->tb6_lock.
920    It takes new route entry, the addition fails by any reason the
921    route is freed. In any case, if caller does not hold it, it may
922    be destroyed.
923  */
924 
925 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
926 			struct mx6_config *mxc)
927 {
928 	int err;
929 	struct fib6_table *table;
930 
931 	table = rt->rt6i_table;
932 	write_lock_bh(&table->tb6_lock);
933 	err = fib6_add(&table->tb6_root, rt, info, mxc);
934 	write_unlock_bh(&table->tb6_lock);
935 
936 	return err;
937 }
938 
939 int ip6_ins_rt(struct rt6_info *rt)
940 {
941 	struct nl_info info = {	.nl_net = dev_net(rt->dst.dev), };
942 	struct mx6_config mxc = { .mx = NULL, };
943 
944 	return __ip6_ins_rt(rt, &info, &mxc);
945 }
946 
947 static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
948 					   const struct in6_addr *daddr,
949 					   const struct in6_addr *saddr)
950 {
951 	struct rt6_info *rt;
952 
953 	/*
954 	 *	Clone the route.
955 	 */
956 
957 	if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
958 		ort = (struct rt6_info *)ort->dst.from;
959 
960 	rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev,
961 			     0, ort->rt6i_table);
962 
963 	if (!rt)
964 		return NULL;
965 
966 	ip6_rt_copy_init(rt, ort);
967 	rt->rt6i_flags |= RTF_CACHE;
968 	rt->rt6i_metric = 0;
969 	rt->dst.flags |= DST_HOST;
970 	rt->rt6i_dst.addr = *daddr;
971 	rt->rt6i_dst.plen = 128;
972 
973 	if (!rt6_is_gw_or_nonexthop(ort)) {
974 		if (ort->rt6i_dst.plen != 128 &&
975 		    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
976 			rt->rt6i_flags |= RTF_ANYCAST;
977 #ifdef CONFIG_IPV6_SUBTREES
978 		if (rt->rt6i_src.plen && saddr) {
979 			rt->rt6i_src.addr = *saddr;
980 			rt->rt6i_src.plen = 128;
981 		}
982 #endif
983 	}
984 
985 	return rt;
986 }
987 
988 static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
989 {
990 	struct rt6_info *pcpu_rt;
991 
992 	pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
993 				  rt->dst.dev, rt->dst.flags,
994 				  rt->rt6i_table);
995 
996 	if (!pcpu_rt)
997 		return NULL;
998 	ip6_rt_copy_init(pcpu_rt, rt);
999 	pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
1000 	pcpu_rt->rt6i_flags |= RTF_PCPU;
1001 	return pcpu_rt;
1002 }
1003 
1004 /* It should be called with read_lock_bh(&tb6_lock) acquired */
1005 static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
1006 {
1007 	struct rt6_info *pcpu_rt, *prev, **p;
1008 
1009 	p = this_cpu_ptr(rt->rt6i_pcpu);
1010 	pcpu_rt = *p;
1011 
1012 	if (pcpu_rt)
1013 		goto done;
1014 
1015 	pcpu_rt = ip6_rt_pcpu_alloc(rt);
1016 	if (!pcpu_rt) {
1017 		struct net *net = dev_net(rt->dst.dev);
1018 
1019 		pcpu_rt = net->ipv6.ip6_null_entry;
1020 		goto done;
1021 	}
1022 
1023 	prev = cmpxchg(p, NULL, pcpu_rt);
1024 	if (prev) {
1025 		/* If someone did it before us, return prev instead */
1026 		dst_destroy(&pcpu_rt->dst);
1027 		pcpu_rt = prev;
1028 	}
1029 
1030 done:
1031 	dst_hold(&pcpu_rt->dst);
1032 	rt6_dst_from_metrics_check(pcpu_rt);
1033 	return pcpu_rt;
1034 }
1035 
1036 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
1037 				      struct flowi6 *fl6, int flags)
1038 {
1039 	struct fib6_node *fn, *saved_fn;
1040 	struct rt6_info *rt;
1041 	int strict = 0;
1042 
1043 	strict |= flags & RT6_LOOKUP_F_IFACE;
1044 	if (net->ipv6.devconf_all->forwarding == 0)
1045 		strict |= RT6_LOOKUP_F_REACHABLE;
1046 
1047 	read_lock_bh(&table->tb6_lock);
1048 
1049 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1050 	saved_fn = fn;
1051 
1052 redo_rt6_select:
1053 	rt = rt6_select(fn, oif, strict);
1054 	if (rt->rt6i_nsiblings)
1055 		rt = rt6_multipath_select(rt, fl6, oif, strict);
1056 	if (rt == net->ipv6.ip6_null_entry) {
1057 		fn = fib6_backtrack(fn, &fl6->saddr);
1058 		if (fn)
1059 			goto redo_rt6_select;
1060 		else if (strict & RT6_LOOKUP_F_REACHABLE) {
1061 			/* also consider unreachable route */
1062 			strict &= ~RT6_LOOKUP_F_REACHABLE;
1063 			fn = saved_fn;
1064 			goto redo_rt6_select;
1065 		}
1066 	}
1067 
1068 
1069 	if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
1070 		dst_use(&rt->dst, jiffies);
1071 		read_unlock_bh(&table->tb6_lock);
1072 
1073 		rt6_dst_from_metrics_check(rt);
1074 		return rt;
1075 	} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1076 			    !(rt->rt6i_flags & RTF_GATEWAY))) {
1077 		/* Create a RTF_CACHE clone which will not be
1078 		 * owned by the fib6 tree.  It is for the special case where
1079 		 * the daddr in the skb during the neighbor look-up is different
1080 		 * from the fl6->daddr used to look-up route here.
1081 		 */
1082 
1083 		struct rt6_info *uncached_rt;
1084 
1085 		dst_use(&rt->dst, jiffies);
1086 		read_unlock_bh(&table->tb6_lock);
1087 
1088 		uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1089 		dst_release(&rt->dst);
1090 
1091 		if (uncached_rt)
1092 			rt6_uncached_list_add(uncached_rt);
1093 		else
1094 			uncached_rt = net->ipv6.ip6_null_entry;
1095 
1096 		dst_hold(&uncached_rt->dst);
1097 		return uncached_rt;
1098 
1099 	} else {
1100 		/* Get a percpu copy */
1101 
1102 		struct rt6_info *pcpu_rt;
1103 
1104 		rt->dst.lastuse = jiffies;
1105 		rt->dst.__use++;
1106 		pcpu_rt = rt6_get_pcpu_route(rt);
1107 		read_unlock_bh(&table->tb6_lock);
1108 
1109 		return pcpu_rt;
1110 	}
1111 }
1112 
1113 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
1114 					    struct flowi6 *fl6, int flags)
1115 {
1116 	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
1117 }
1118 
1119 static struct dst_entry *ip6_route_input_lookup(struct net *net,
1120 						struct net_device *dev,
1121 						struct flowi6 *fl6, int flags)
1122 {
1123 	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1124 		flags |= RT6_LOOKUP_F_IFACE;
1125 
1126 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1127 }
1128 
1129 void ip6_route_input(struct sk_buff *skb)
1130 {
1131 	const struct ipv6hdr *iph = ipv6_hdr(skb);
1132 	struct net *net = dev_net(skb->dev);
1133 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1134 	struct flowi6 fl6 = {
1135 		.flowi6_iif = skb->dev->ifindex,
1136 		.daddr = iph->daddr,
1137 		.saddr = iph->saddr,
1138 		.flowlabel = ip6_flowinfo(iph),
1139 		.flowi6_mark = skb->mark,
1140 		.flowi6_proto = iph->nexthdr,
1141 	};
1142 
1143 	skb_dst_drop(skb);
1144 	skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1145 }
1146 
1147 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1148 					     struct flowi6 *fl6, int flags)
1149 {
1150 	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1151 }
1152 
1153 struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
1154 				    struct flowi6 *fl6)
1155 {
1156 	int flags = 0;
1157 
1158 	fl6->flowi6_iif = LOOPBACK_IFINDEX;
1159 
1160 	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
1161 		flags |= RT6_LOOKUP_F_IFACE;
1162 
1163 	if (!ipv6_addr_any(&fl6->saddr))
1164 		flags |= RT6_LOOKUP_F_HAS_SADDR;
1165 	else if (sk)
1166 		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1167 
1168 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1169 }
1170 EXPORT_SYMBOL(ip6_route_output);
1171 
1172 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1173 {
1174 	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1175 	struct dst_entry *new = NULL;
1176 
1177 	rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1178 	if (rt) {
1179 		new = &rt->dst;
1180 
1181 		memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
1182 
1183 		new->__use = 1;
1184 		new->input = dst_discard;
1185 		new->output = dst_discard_sk;
1186 
1187 		if (dst_metrics_read_only(&ort->dst))
1188 			new->_metrics = ort->dst._metrics;
1189 		else
1190 			dst_copy_metrics(new, &ort->dst);
1191 		rt->rt6i_idev = ort->rt6i_idev;
1192 		if (rt->rt6i_idev)
1193 			in6_dev_hold(rt->rt6i_idev);
1194 
1195 		rt->rt6i_gateway = ort->rt6i_gateway;
1196 		rt->rt6i_flags = ort->rt6i_flags;
1197 		rt->rt6i_metric = 0;
1198 
1199 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1200 #ifdef CONFIG_IPV6_SUBTREES
1201 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1202 #endif
1203 
1204 		dst_free(new);
1205 	}
1206 
1207 	dst_release(dst_orig);
1208 	return new ? new : ERR_PTR(-ENOMEM);
1209 }
1210 
1211 /*
1212  *	Destination cache support functions
1213  */
1214 
1215 static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1216 {
1217 	if (rt->dst.from &&
1218 	    dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1219 		dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1220 }
1221 
1222 static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1223 {
1224 	if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1225 		return NULL;
1226 
1227 	if (rt6_check_expired(rt))
1228 		return NULL;
1229 
1230 	return &rt->dst;
1231 }
1232 
1233 static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1234 {
1235 	if (rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1236 	    rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1237 		return &rt->dst;
1238 	else
1239 		return NULL;
1240 }
1241 
1242 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1243 {
1244 	struct rt6_info *rt;
1245 
1246 	rt = (struct rt6_info *) dst;
1247 
1248 	/* All IPV6 dsts are created with ->obsolete set to the value
1249 	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1250 	 * into this function always.
1251 	 */
1252 
1253 	rt6_dst_from_metrics_check(rt);
1254 
1255 	if ((rt->rt6i_flags & RTF_PCPU) || unlikely(dst->flags & DST_NOCACHE))
1256 		return rt6_dst_from_check(rt, cookie);
1257 	else
1258 		return rt6_check(rt, cookie);
1259 }
1260 
1261 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1262 {
1263 	struct rt6_info *rt = (struct rt6_info *) dst;
1264 
1265 	if (rt) {
1266 		if (rt->rt6i_flags & RTF_CACHE) {
1267 			if (rt6_check_expired(rt)) {
1268 				ip6_del_rt(rt);
1269 				dst = NULL;
1270 			}
1271 		} else {
1272 			dst_release(dst);
1273 			dst = NULL;
1274 		}
1275 	}
1276 	return dst;
1277 }
1278 
1279 static void ip6_link_failure(struct sk_buff *skb)
1280 {
1281 	struct rt6_info *rt;
1282 
1283 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1284 
1285 	rt = (struct rt6_info *) skb_dst(skb);
1286 	if (rt) {
1287 		if (rt->rt6i_flags & RTF_CACHE) {
1288 			dst_hold(&rt->dst);
1289 			if (ip6_del_rt(rt))
1290 				dst_free(&rt->dst);
1291 		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1292 			rt->rt6i_node->fn_sernum = -1;
1293 		}
1294 	}
1295 }
1296 
1297 static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1298 {
1299 	struct net *net = dev_net(rt->dst.dev);
1300 
1301 	rt->rt6i_flags |= RTF_MODIFIED;
1302 	rt->rt6i_pmtu = mtu;
1303 	rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1304 }
1305 
1306 static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1307 				 const struct ipv6hdr *iph, u32 mtu)
1308 {
1309 	struct rt6_info *rt6 = (struct rt6_info *)dst;
1310 
1311 	if (rt6->rt6i_flags & RTF_LOCAL)
1312 		return;
1313 
1314 	dst_confirm(dst);
1315 	mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1316 	if (mtu >= dst_mtu(dst))
1317 		return;
1318 
1319 	if (rt6->rt6i_flags & RTF_CACHE) {
1320 		rt6_do_update_pmtu(rt6, mtu);
1321 	} else {
1322 		const struct in6_addr *daddr, *saddr;
1323 		struct rt6_info *nrt6;
1324 
1325 		if (iph) {
1326 			daddr = &iph->daddr;
1327 			saddr = &iph->saddr;
1328 		} else if (sk) {
1329 			daddr = &sk->sk_v6_daddr;
1330 			saddr = &inet6_sk(sk)->saddr;
1331 		} else {
1332 			return;
1333 		}
1334 		nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1335 		if (nrt6) {
1336 			rt6_do_update_pmtu(nrt6, mtu);
1337 
1338 			/* ip6_ins_rt(nrt6) will bump the
1339 			 * rt6->rt6i_node->fn_sernum
1340 			 * which will fail the next rt6_check() and
1341 			 * invalidate the sk->sk_dst_cache.
1342 			 */
1343 			ip6_ins_rt(nrt6);
1344 		}
1345 	}
1346 }
1347 
1348 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1349 			       struct sk_buff *skb, u32 mtu)
1350 {
1351 	__ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
1352 }
1353 
1354 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1355 		     int oif, u32 mark)
1356 {
1357 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1358 	struct dst_entry *dst;
1359 	struct flowi6 fl6;
1360 
1361 	memset(&fl6, 0, sizeof(fl6));
1362 	fl6.flowi6_oif = oif;
1363 	fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
1364 	fl6.daddr = iph->daddr;
1365 	fl6.saddr = iph->saddr;
1366 	fl6.flowlabel = ip6_flowinfo(iph);
1367 
1368 	dst = ip6_route_output(net, NULL, &fl6);
1369 	if (!dst->error)
1370 		__ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
1371 	dst_release(dst);
1372 }
1373 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1374 
1375 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1376 {
1377 	ip6_update_pmtu(skb, sock_net(sk), mtu,
1378 			sk->sk_bound_dev_if, sk->sk_mark);
1379 }
1380 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1381 
1382 /* Handle redirects */
1383 struct ip6rd_flowi {
1384 	struct flowi6 fl6;
1385 	struct in6_addr gateway;
1386 };
1387 
1388 static struct rt6_info *__ip6_route_redirect(struct net *net,
1389 					     struct fib6_table *table,
1390 					     struct flowi6 *fl6,
1391 					     int flags)
1392 {
1393 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1394 	struct rt6_info *rt;
1395 	struct fib6_node *fn;
1396 
1397 	/* Get the "current" route for this destination and
1398 	 * check if the redirect has come from approriate router.
1399 	 *
1400 	 * RFC 4861 specifies that redirects should only be
1401 	 * accepted if they come from the nexthop to the target.
1402 	 * Due to the way the routes are chosen, this notion
1403 	 * is a bit fuzzy and one might need to check all possible
1404 	 * routes.
1405 	 */
1406 
1407 	read_lock_bh(&table->tb6_lock);
1408 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1409 restart:
1410 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1411 		if (rt6_check_expired(rt))
1412 			continue;
1413 		if (rt->dst.error)
1414 			break;
1415 		if (!(rt->rt6i_flags & RTF_GATEWAY))
1416 			continue;
1417 		if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1418 			continue;
1419 		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1420 			continue;
1421 		break;
1422 	}
1423 
1424 	if (!rt)
1425 		rt = net->ipv6.ip6_null_entry;
1426 	else if (rt->dst.error) {
1427 		rt = net->ipv6.ip6_null_entry;
1428 		goto out;
1429 	}
1430 
1431 	if (rt == net->ipv6.ip6_null_entry) {
1432 		fn = fib6_backtrack(fn, &fl6->saddr);
1433 		if (fn)
1434 			goto restart;
1435 	}
1436 
1437 out:
1438 	dst_hold(&rt->dst);
1439 
1440 	read_unlock_bh(&table->tb6_lock);
1441 
1442 	return rt;
1443 };
1444 
1445 static struct dst_entry *ip6_route_redirect(struct net *net,
1446 					const struct flowi6 *fl6,
1447 					const struct in6_addr *gateway)
1448 {
1449 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1450 	struct ip6rd_flowi rdfl;
1451 
1452 	rdfl.fl6 = *fl6;
1453 	rdfl.gateway = *gateway;
1454 
1455 	return fib6_rule_lookup(net, &rdfl.fl6,
1456 				flags, __ip6_route_redirect);
1457 }
1458 
1459 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1460 {
1461 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1462 	struct dst_entry *dst;
1463 	struct flowi6 fl6;
1464 
1465 	memset(&fl6, 0, sizeof(fl6));
1466 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
1467 	fl6.flowi6_oif = oif;
1468 	fl6.flowi6_mark = mark;
1469 	fl6.daddr = iph->daddr;
1470 	fl6.saddr = iph->saddr;
1471 	fl6.flowlabel = ip6_flowinfo(iph);
1472 
1473 	dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1474 	rt6_do_redirect(dst, NULL, skb);
1475 	dst_release(dst);
1476 }
1477 EXPORT_SYMBOL_GPL(ip6_redirect);
1478 
1479 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1480 			    u32 mark)
1481 {
1482 	const struct ipv6hdr *iph = ipv6_hdr(skb);
1483 	const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1484 	struct dst_entry *dst;
1485 	struct flowi6 fl6;
1486 
1487 	memset(&fl6, 0, sizeof(fl6));
1488 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
1489 	fl6.flowi6_oif = oif;
1490 	fl6.flowi6_mark = mark;
1491 	fl6.daddr = msg->dest;
1492 	fl6.saddr = iph->daddr;
1493 
1494 	dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1495 	rt6_do_redirect(dst, NULL, skb);
1496 	dst_release(dst);
1497 }
1498 
1499 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1500 {
1501 	ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1502 }
1503 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1504 
1505 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1506 {
1507 	struct net_device *dev = dst->dev;
1508 	unsigned int mtu = dst_mtu(dst);
1509 	struct net *net = dev_net(dev);
1510 
1511 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1512 
1513 	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1514 		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1515 
1516 	/*
1517 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1518 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1519 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
1520 	 * rely only on pmtu discovery"
1521 	 */
1522 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1523 		mtu = IPV6_MAXPLEN;
1524 	return mtu;
1525 }
1526 
1527 static unsigned int ip6_mtu(const struct dst_entry *dst)
1528 {
1529 	const struct rt6_info *rt = (const struct rt6_info *)dst;
1530 	unsigned int mtu = rt->rt6i_pmtu;
1531 	struct inet6_dev *idev;
1532 
1533 	if (mtu)
1534 		goto out;
1535 
1536 	mtu = dst_metric_raw(dst, RTAX_MTU);
1537 	if (mtu)
1538 		goto out;
1539 
1540 	mtu = IPV6_MIN_MTU;
1541 
1542 	rcu_read_lock();
1543 	idev = __in6_dev_get(dst->dev);
1544 	if (idev)
1545 		mtu = idev->cnf.mtu6;
1546 	rcu_read_unlock();
1547 
1548 out:
1549 	return min_t(unsigned int, mtu, IP6_MAX_MTU);
1550 }
1551 
1552 static struct dst_entry *icmp6_dst_gc_list;
1553 static DEFINE_SPINLOCK(icmp6_dst_lock);
1554 
1555 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1556 				  struct flowi6 *fl6)
1557 {
1558 	struct dst_entry *dst;
1559 	struct rt6_info *rt;
1560 	struct inet6_dev *idev = in6_dev_get(dev);
1561 	struct net *net = dev_net(dev);
1562 
1563 	if (unlikely(!idev))
1564 		return ERR_PTR(-ENODEV);
1565 
1566 	rt = ip6_dst_alloc(net, dev, 0, NULL);
1567 	if (unlikely(!rt)) {
1568 		in6_dev_put(idev);
1569 		dst = ERR_PTR(-ENOMEM);
1570 		goto out;
1571 	}
1572 
1573 	rt->dst.flags |= DST_HOST;
1574 	rt->dst.output  = ip6_output;
1575 	atomic_set(&rt->dst.__refcnt, 1);
1576 	rt->rt6i_gateway  = fl6->daddr;
1577 	rt->rt6i_dst.addr = fl6->daddr;
1578 	rt->rt6i_dst.plen = 128;
1579 	rt->rt6i_idev     = idev;
1580 	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1581 
1582 	spin_lock_bh(&icmp6_dst_lock);
1583 	rt->dst.next = icmp6_dst_gc_list;
1584 	icmp6_dst_gc_list = &rt->dst;
1585 	spin_unlock_bh(&icmp6_dst_lock);
1586 
1587 	fib6_force_start_gc(net);
1588 
1589 	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1590 
1591 out:
1592 	return dst;
1593 }
1594 
1595 int icmp6_dst_gc(void)
1596 {
1597 	struct dst_entry *dst, **pprev;
1598 	int more = 0;
1599 
1600 	spin_lock_bh(&icmp6_dst_lock);
1601 	pprev = &icmp6_dst_gc_list;
1602 
1603 	while ((dst = *pprev) != NULL) {
1604 		if (!atomic_read(&dst->__refcnt)) {
1605 			*pprev = dst->next;
1606 			dst_free(dst);
1607 		} else {
1608 			pprev = &dst->next;
1609 			++more;
1610 		}
1611 	}
1612 
1613 	spin_unlock_bh(&icmp6_dst_lock);
1614 
1615 	return more;
1616 }
1617 
1618 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1619 			    void *arg)
1620 {
1621 	struct dst_entry *dst, **pprev;
1622 
1623 	spin_lock_bh(&icmp6_dst_lock);
1624 	pprev = &icmp6_dst_gc_list;
1625 	while ((dst = *pprev) != NULL) {
1626 		struct rt6_info *rt = (struct rt6_info *) dst;
1627 		if (func(rt, arg)) {
1628 			*pprev = dst->next;
1629 			dst_free(dst);
1630 		} else {
1631 			pprev = &dst->next;
1632 		}
1633 	}
1634 	spin_unlock_bh(&icmp6_dst_lock);
1635 }
1636 
1637 static int ip6_dst_gc(struct dst_ops *ops)
1638 {
1639 	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1640 	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1641 	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1642 	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1643 	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1644 	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1645 	int entries;
1646 
1647 	entries = dst_entries_get_fast(ops);
1648 	if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1649 	    entries <= rt_max_size)
1650 		goto out;
1651 
1652 	net->ipv6.ip6_rt_gc_expire++;
1653 	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
1654 	entries = dst_entries_get_slow(ops);
1655 	if (entries < ops->gc_thresh)
1656 		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1657 out:
1658 	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1659 	return entries > rt_max_size;
1660 }
1661 
1662 static int ip6_convert_metrics(struct mx6_config *mxc,
1663 			       const struct fib6_config *cfg)
1664 {
1665 	struct nlattr *nla;
1666 	int remaining;
1667 	u32 *mp;
1668 
1669 	if (!cfg->fc_mx)
1670 		return 0;
1671 
1672 	mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1673 	if (unlikely(!mp))
1674 		return -ENOMEM;
1675 
1676 	nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1677 		int type = nla_type(nla);
1678 
1679 		if (type) {
1680 			u32 val;
1681 
1682 			if (unlikely(type > RTAX_MAX))
1683 				goto err;
1684 			if (type == RTAX_CC_ALGO) {
1685 				char tmp[TCP_CA_NAME_MAX];
1686 
1687 				nla_strlcpy(tmp, nla, sizeof(tmp));
1688 				val = tcp_ca_get_key_by_name(tmp);
1689 				if (val == TCP_CA_UNSPEC)
1690 					goto err;
1691 			} else {
1692 				val = nla_get_u32(nla);
1693 			}
1694 
1695 			mp[type - 1] = val;
1696 			__set_bit(type - 1, mxc->mx_valid);
1697 		}
1698 	}
1699 
1700 	mxc->mx = mp;
1701 
1702 	return 0;
1703  err:
1704 	kfree(mp);
1705 	return -EINVAL;
1706 }
1707 
1708 int ip6_route_add(struct fib6_config *cfg)
1709 {
1710 	int err;
1711 	struct net *net = cfg->fc_nlinfo.nl_net;
1712 	struct rt6_info *rt = NULL;
1713 	struct net_device *dev = NULL;
1714 	struct inet6_dev *idev = NULL;
1715 	struct fib6_table *table;
1716 	struct mx6_config mxc = { .mx = NULL, };
1717 	int addr_type;
1718 
1719 	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1720 		return -EINVAL;
1721 #ifndef CONFIG_IPV6_SUBTREES
1722 	if (cfg->fc_src_len)
1723 		return -EINVAL;
1724 #endif
1725 	if (cfg->fc_ifindex) {
1726 		err = -ENODEV;
1727 		dev = dev_get_by_index(net, cfg->fc_ifindex);
1728 		if (!dev)
1729 			goto out;
1730 		idev = in6_dev_get(dev);
1731 		if (!idev)
1732 			goto out;
1733 	}
1734 
1735 	if (cfg->fc_metric == 0)
1736 		cfg->fc_metric = IP6_RT_PRIO_USER;
1737 
1738 	err = -ENOBUFS;
1739 	if (cfg->fc_nlinfo.nlh &&
1740 	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1741 		table = fib6_get_table(net, cfg->fc_table);
1742 		if (!table) {
1743 			pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1744 			table = fib6_new_table(net, cfg->fc_table);
1745 		}
1746 	} else {
1747 		table = fib6_new_table(net, cfg->fc_table);
1748 	}
1749 
1750 	if (!table)
1751 		goto out;
1752 
1753 	rt = ip6_dst_alloc(net, NULL, (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT, table);
1754 
1755 	if (!rt) {
1756 		err = -ENOMEM;
1757 		goto out;
1758 	}
1759 
1760 	if (cfg->fc_flags & RTF_EXPIRES)
1761 		rt6_set_expires(rt, jiffies +
1762 				clock_t_to_jiffies(cfg->fc_expires));
1763 	else
1764 		rt6_clean_expires(rt);
1765 
1766 	if (cfg->fc_protocol == RTPROT_UNSPEC)
1767 		cfg->fc_protocol = RTPROT_BOOT;
1768 	rt->rt6i_protocol = cfg->fc_protocol;
1769 
1770 	addr_type = ipv6_addr_type(&cfg->fc_dst);
1771 
1772 	if (addr_type & IPV6_ADDR_MULTICAST)
1773 		rt->dst.input = ip6_mc_input;
1774 	else if (cfg->fc_flags & RTF_LOCAL)
1775 		rt->dst.input = ip6_input;
1776 	else
1777 		rt->dst.input = ip6_forward;
1778 
1779 	rt->dst.output = ip6_output;
1780 
1781 	if (cfg->fc_encap) {
1782 		struct lwtunnel_state *lwtstate;
1783 
1784 		err = lwtunnel_build_state(dev, cfg->fc_encap_type,
1785 					   cfg->fc_encap, &lwtstate);
1786 		if (err)
1787 			goto out;
1788 		rt->dst.lwtstate = lwtstate_get(lwtstate);
1789 		if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
1790 			rt->dst.lwtstate->orig_output = rt->dst.output;
1791 			rt->dst.output = lwtunnel_output;
1792 		}
1793 		if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
1794 			rt->dst.lwtstate->orig_input = rt->dst.input;
1795 			rt->dst.input = lwtunnel_input;
1796 		}
1797 	}
1798 
1799 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1800 	rt->rt6i_dst.plen = cfg->fc_dst_len;
1801 	if (rt->rt6i_dst.plen == 128)
1802 		rt->dst.flags |= DST_HOST;
1803 
1804 #ifdef CONFIG_IPV6_SUBTREES
1805 	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1806 	rt->rt6i_src.plen = cfg->fc_src_len;
1807 #endif
1808 
1809 	rt->rt6i_metric = cfg->fc_metric;
1810 
1811 	/* We cannot add true routes via loopback here,
1812 	   they would result in kernel looping; promote them to reject routes
1813 	 */
1814 	if ((cfg->fc_flags & RTF_REJECT) ||
1815 	    (dev && (dev->flags & IFF_LOOPBACK) &&
1816 	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
1817 	     !(cfg->fc_flags & RTF_LOCAL))) {
1818 		/* hold loopback dev/idev if we haven't done so. */
1819 		if (dev != net->loopback_dev) {
1820 			if (dev) {
1821 				dev_put(dev);
1822 				in6_dev_put(idev);
1823 			}
1824 			dev = net->loopback_dev;
1825 			dev_hold(dev);
1826 			idev = in6_dev_get(dev);
1827 			if (!idev) {
1828 				err = -ENODEV;
1829 				goto out;
1830 			}
1831 		}
1832 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1833 		switch (cfg->fc_type) {
1834 		case RTN_BLACKHOLE:
1835 			rt->dst.error = -EINVAL;
1836 			rt->dst.output = dst_discard_sk;
1837 			rt->dst.input = dst_discard;
1838 			break;
1839 		case RTN_PROHIBIT:
1840 			rt->dst.error = -EACCES;
1841 			rt->dst.output = ip6_pkt_prohibit_out;
1842 			rt->dst.input = ip6_pkt_prohibit;
1843 			break;
1844 		case RTN_THROW:
1845 		default:
1846 			rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1847 					: -ENETUNREACH;
1848 			rt->dst.output = ip6_pkt_discard_out;
1849 			rt->dst.input = ip6_pkt_discard;
1850 			break;
1851 		}
1852 		goto install_route;
1853 	}
1854 
1855 	if (cfg->fc_flags & RTF_GATEWAY) {
1856 		const struct in6_addr *gw_addr;
1857 		int gwa_type;
1858 
1859 		gw_addr = &cfg->fc_gateway;
1860 		gwa_type = ipv6_addr_type(gw_addr);
1861 
1862 		/* if gw_addr is local we will fail to detect this in case
1863 		 * address is still TENTATIVE (DAD in progress). rt6_lookup()
1864 		 * will return already-added prefix route via interface that
1865 		 * prefix route was assigned to, which might be non-loopback.
1866 		 */
1867 		err = -EINVAL;
1868 		if (ipv6_chk_addr_and_flags(net, gw_addr,
1869 					    gwa_type & IPV6_ADDR_LINKLOCAL ?
1870 					    dev : NULL, 0, 0))
1871 			goto out;
1872 
1873 		rt->rt6i_gateway = *gw_addr;
1874 
1875 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1876 			struct rt6_info *grt;
1877 
1878 			/* IPv6 strictly inhibits using not link-local
1879 			   addresses as nexthop address.
1880 			   Otherwise, router will not able to send redirects.
1881 			   It is very good, but in some (rare!) circumstances
1882 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
1883 			   some exceptions. --ANK
1884 			 */
1885 			if (!(gwa_type & IPV6_ADDR_UNICAST))
1886 				goto out;
1887 
1888 			grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1889 
1890 			err = -EHOSTUNREACH;
1891 			if (!grt)
1892 				goto out;
1893 			if (dev) {
1894 				if (dev != grt->dst.dev) {
1895 					ip6_rt_put(grt);
1896 					goto out;
1897 				}
1898 			} else {
1899 				dev = grt->dst.dev;
1900 				idev = grt->rt6i_idev;
1901 				dev_hold(dev);
1902 				in6_dev_hold(grt->rt6i_idev);
1903 			}
1904 			if (!(grt->rt6i_flags & RTF_GATEWAY))
1905 				err = 0;
1906 			ip6_rt_put(grt);
1907 
1908 			if (err)
1909 				goto out;
1910 		}
1911 		err = -EINVAL;
1912 		if (!dev || (dev->flags & IFF_LOOPBACK))
1913 			goto out;
1914 	}
1915 
1916 	err = -ENODEV;
1917 	if (!dev)
1918 		goto out;
1919 
1920 	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1921 		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1922 			err = -EINVAL;
1923 			goto out;
1924 		}
1925 		rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1926 		rt->rt6i_prefsrc.plen = 128;
1927 	} else
1928 		rt->rt6i_prefsrc.plen = 0;
1929 
1930 	rt->rt6i_flags = cfg->fc_flags;
1931 
1932 install_route:
1933 	rt->dst.dev = dev;
1934 	rt->rt6i_idev = idev;
1935 	rt->rt6i_table = table;
1936 
1937 	cfg->fc_nlinfo.nl_net = dev_net(dev);
1938 
1939 	err = ip6_convert_metrics(&mxc, cfg);
1940 	if (err)
1941 		goto out;
1942 
1943 	err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
1944 
1945 	kfree(mxc.mx);
1946 	return err;
1947 out:
1948 	if (dev)
1949 		dev_put(dev);
1950 	if (idev)
1951 		in6_dev_put(idev);
1952 	if (rt)
1953 		dst_free(&rt->dst);
1954 	return err;
1955 }
1956 
1957 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1958 {
1959 	int err;
1960 	struct fib6_table *table;
1961 	struct net *net = dev_net(rt->dst.dev);
1962 
1963 	if (rt == net->ipv6.ip6_null_entry) {
1964 		err = -ENOENT;
1965 		goto out;
1966 	}
1967 
1968 	table = rt->rt6i_table;
1969 	write_lock_bh(&table->tb6_lock);
1970 	err = fib6_del(rt, info);
1971 	write_unlock_bh(&table->tb6_lock);
1972 
1973 out:
1974 	ip6_rt_put(rt);
1975 	return err;
1976 }
1977 
1978 int ip6_del_rt(struct rt6_info *rt)
1979 {
1980 	struct nl_info info = {
1981 		.nl_net = dev_net(rt->dst.dev),
1982 	};
1983 	return __ip6_del_rt(rt, &info);
1984 }
1985 
1986 static int ip6_route_del(struct fib6_config *cfg)
1987 {
1988 	struct fib6_table *table;
1989 	struct fib6_node *fn;
1990 	struct rt6_info *rt;
1991 	int err = -ESRCH;
1992 
1993 	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1994 	if (!table)
1995 		return err;
1996 
1997 	read_lock_bh(&table->tb6_lock);
1998 
1999 	fn = fib6_locate(&table->tb6_root,
2000 			 &cfg->fc_dst, cfg->fc_dst_len,
2001 			 &cfg->fc_src, cfg->fc_src_len);
2002 
2003 	if (fn) {
2004 		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2005 			if ((rt->rt6i_flags & RTF_CACHE) &&
2006 			    !(cfg->fc_flags & RTF_CACHE))
2007 				continue;
2008 			if (cfg->fc_ifindex &&
2009 			    (!rt->dst.dev ||
2010 			     rt->dst.dev->ifindex != cfg->fc_ifindex))
2011 				continue;
2012 			if (cfg->fc_flags & RTF_GATEWAY &&
2013 			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
2014 				continue;
2015 			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
2016 				continue;
2017 			dst_hold(&rt->dst);
2018 			read_unlock_bh(&table->tb6_lock);
2019 
2020 			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
2021 		}
2022 	}
2023 	read_unlock_bh(&table->tb6_lock);
2024 
2025 	return err;
2026 }
2027 
2028 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
2029 {
2030 	struct net *net = dev_net(skb->dev);
2031 	struct netevent_redirect netevent;
2032 	struct rt6_info *rt, *nrt = NULL;
2033 	struct ndisc_options ndopts;
2034 	struct inet6_dev *in6_dev;
2035 	struct neighbour *neigh;
2036 	struct rd_msg *msg;
2037 	int optlen, on_link;
2038 	u8 *lladdr;
2039 
2040 	optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
2041 	optlen -= sizeof(*msg);
2042 
2043 	if (optlen < 0) {
2044 		net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
2045 		return;
2046 	}
2047 
2048 	msg = (struct rd_msg *)icmp6_hdr(skb);
2049 
2050 	if (ipv6_addr_is_multicast(&msg->dest)) {
2051 		net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
2052 		return;
2053 	}
2054 
2055 	on_link = 0;
2056 	if (ipv6_addr_equal(&msg->dest, &msg->target)) {
2057 		on_link = 1;
2058 	} else if (ipv6_addr_type(&msg->target) !=
2059 		   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
2060 		net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
2061 		return;
2062 	}
2063 
2064 	in6_dev = __in6_dev_get(skb->dev);
2065 	if (!in6_dev)
2066 		return;
2067 	if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2068 		return;
2069 
2070 	/* RFC2461 8.1:
2071 	 *	The IP source address of the Redirect MUST be the same as the current
2072 	 *	first-hop router for the specified ICMP Destination Address.
2073 	 */
2074 
2075 	if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
2076 		net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2077 		return;
2078 	}
2079 
2080 	lladdr = NULL;
2081 	if (ndopts.nd_opts_tgt_lladdr) {
2082 		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2083 					     skb->dev);
2084 		if (!lladdr) {
2085 			net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2086 			return;
2087 		}
2088 	}
2089 
2090 	rt = (struct rt6_info *) dst;
2091 	if (rt == net->ipv6.ip6_null_entry) {
2092 		net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
2093 		return;
2094 	}
2095 
2096 	/* Redirect received -> path was valid.
2097 	 * Look, redirects are sent only in response to data packets,
2098 	 * so that this nexthop apparently is reachable. --ANK
2099 	 */
2100 	dst_confirm(&rt->dst);
2101 
2102 	neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
2103 	if (!neigh)
2104 		return;
2105 
2106 	/*
2107 	 *	We have finally decided to accept it.
2108 	 */
2109 
2110 	neigh_update(neigh, lladdr, NUD_STALE,
2111 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
2112 		     NEIGH_UPDATE_F_OVERRIDE|
2113 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
2114 				     NEIGH_UPDATE_F_ISROUTER))
2115 		     );
2116 
2117 	nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
2118 	if (!nrt)
2119 		goto out;
2120 
2121 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
2122 	if (on_link)
2123 		nrt->rt6i_flags &= ~RTF_GATEWAY;
2124 
2125 	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
2126 
2127 	if (ip6_ins_rt(nrt))
2128 		goto out;
2129 
2130 	netevent.old = &rt->dst;
2131 	netevent.new = &nrt->dst;
2132 	netevent.daddr = &msg->dest;
2133 	netevent.neigh = neigh;
2134 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
2135 
2136 	if (rt->rt6i_flags & RTF_CACHE) {
2137 		rt = (struct rt6_info *) dst_clone(&rt->dst);
2138 		ip6_del_rt(rt);
2139 	}
2140 
2141 out:
2142 	neigh_release(neigh);
2143 }
2144 
2145 /*
2146  *	Misc support functions
2147  */
2148 
2149 static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
2150 {
2151 	BUG_ON(from->dst.from);
2152 
2153 	rt->rt6i_flags &= ~RTF_EXPIRES;
2154 	dst_hold(&from->dst);
2155 	rt->dst.from = &from->dst;
2156 	dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
2157 }
2158 
2159 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
2160 {
2161 	rt->dst.input = ort->dst.input;
2162 	rt->dst.output = ort->dst.output;
2163 	rt->rt6i_dst = ort->rt6i_dst;
2164 	rt->dst.error = ort->dst.error;
2165 	rt->rt6i_idev = ort->rt6i_idev;
2166 	if (rt->rt6i_idev)
2167 		in6_dev_hold(rt->rt6i_idev);
2168 	rt->dst.lastuse = jiffies;
2169 	rt->rt6i_gateway = ort->rt6i_gateway;
2170 	rt->rt6i_flags = ort->rt6i_flags;
2171 	rt6_set_from(rt, ort);
2172 	rt->rt6i_metric = ort->rt6i_metric;
2173 #ifdef CONFIG_IPV6_SUBTREES
2174 	rt->rt6i_src = ort->rt6i_src;
2175 #endif
2176 	rt->rt6i_prefsrc = ort->rt6i_prefsrc;
2177 	rt->rt6i_table = ort->rt6i_table;
2178 	rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
2179 }
2180 
2181 #ifdef CONFIG_IPV6_ROUTE_INFO
2182 static struct rt6_info *rt6_get_route_info(struct net *net,
2183 					   const struct in6_addr *prefix, int prefixlen,
2184 					   const struct in6_addr *gwaddr, int ifindex)
2185 {
2186 	struct fib6_node *fn;
2187 	struct rt6_info *rt = NULL;
2188 	struct fib6_table *table;
2189 
2190 	table = fib6_get_table(net, RT6_TABLE_INFO);
2191 	if (!table)
2192 		return NULL;
2193 
2194 	read_lock_bh(&table->tb6_lock);
2195 	fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
2196 	if (!fn)
2197 		goto out;
2198 
2199 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2200 		if (rt->dst.dev->ifindex != ifindex)
2201 			continue;
2202 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2203 			continue;
2204 		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2205 			continue;
2206 		dst_hold(&rt->dst);
2207 		break;
2208 	}
2209 out:
2210 	read_unlock_bh(&table->tb6_lock);
2211 	return rt;
2212 }
2213 
2214 static struct rt6_info *rt6_add_route_info(struct net *net,
2215 					   const struct in6_addr *prefix, int prefixlen,
2216 					   const struct in6_addr *gwaddr, int ifindex,
2217 					   unsigned int pref)
2218 {
2219 	struct fib6_config cfg = {
2220 		.fc_table	= RT6_TABLE_INFO,
2221 		.fc_metric	= IP6_RT_PRIO_USER,
2222 		.fc_ifindex	= ifindex,
2223 		.fc_dst_len	= prefixlen,
2224 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2225 				  RTF_UP | RTF_PREF(pref),
2226 		.fc_nlinfo.portid = 0,
2227 		.fc_nlinfo.nlh = NULL,
2228 		.fc_nlinfo.nl_net = net,
2229 	};
2230 
2231 	cfg.fc_dst = *prefix;
2232 	cfg.fc_gateway = *gwaddr;
2233 
2234 	/* We should treat it as a default route if prefix length is 0. */
2235 	if (!prefixlen)
2236 		cfg.fc_flags |= RTF_DEFAULT;
2237 
2238 	ip6_route_add(&cfg);
2239 
2240 	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
2241 }
2242 #endif
2243 
2244 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
2245 {
2246 	struct rt6_info *rt;
2247 	struct fib6_table *table;
2248 
2249 	table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
2250 	if (!table)
2251 		return NULL;
2252 
2253 	read_lock_bh(&table->tb6_lock);
2254 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2255 		if (dev == rt->dst.dev &&
2256 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
2257 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
2258 			break;
2259 	}
2260 	if (rt)
2261 		dst_hold(&rt->dst);
2262 	read_unlock_bh(&table->tb6_lock);
2263 	return rt;
2264 }
2265 
2266 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
2267 				     struct net_device *dev,
2268 				     unsigned int pref)
2269 {
2270 	struct fib6_config cfg = {
2271 		.fc_table	= RT6_TABLE_DFLT,
2272 		.fc_metric	= IP6_RT_PRIO_USER,
2273 		.fc_ifindex	= dev->ifindex,
2274 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2275 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
2276 		.fc_nlinfo.portid = 0,
2277 		.fc_nlinfo.nlh = NULL,
2278 		.fc_nlinfo.nl_net = dev_net(dev),
2279 	};
2280 
2281 	cfg.fc_gateway = *gwaddr;
2282 
2283 	ip6_route_add(&cfg);
2284 
2285 	return rt6_get_dflt_router(gwaddr, dev);
2286 }
2287 
2288 void rt6_purge_dflt_routers(struct net *net)
2289 {
2290 	struct rt6_info *rt;
2291 	struct fib6_table *table;
2292 
2293 	/* NOTE: Keep consistent with rt6_get_dflt_router */
2294 	table = fib6_get_table(net, RT6_TABLE_DFLT);
2295 	if (!table)
2296 		return;
2297 
2298 restart:
2299 	read_lock_bh(&table->tb6_lock);
2300 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2301 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2302 		    (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
2303 			dst_hold(&rt->dst);
2304 			read_unlock_bh(&table->tb6_lock);
2305 			ip6_del_rt(rt);
2306 			goto restart;
2307 		}
2308 	}
2309 	read_unlock_bh(&table->tb6_lock);
2310 }
2311 
2312 static void rtmsg_to_fib6_config(struct net *net,
2313 				 struct in6_rtmsg *rtmsg,
2314 				 struct fib6_config *cfg)
2315 {
2316 	memset(cfg, 0, sizeof(*cfg));
2317 
2318 	cfg->fc_table = RT6_TABLE_MAIN;
2319 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2320 	cfg->fc_metric = rtmsg->rtmsg_metric;
2321 	cfg->fc_expires = rtmsg->rtmsg_info;
2322 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2323 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
2324 	cfg->fc_flags = rtmsg->rtmsg_flags;
2325 
2326 	cfg->fc_nlinfo.nl_net = net;
2327 
2328 	cfg->fc_dst = rtmsg->rtmsg_dst;
2329 	cfg->fc_src = rtmsg->rtmsg_src;
2330 	cfg->fc_gateway = rtmsg->rtmsg_gateway;
2331 }
2332 
2333 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2334 {
2335 	struct fib6_config cfg;
2336 	struct in6_rtmsg rtmsg;
2337 	int err;
2338 
2339 	switch (cmd) {
2340 	case SIOCADDRT:		/* Add a route */
2341 	case SIOCDELRT:		/* Delete a route */
2342 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2343 			return -EPERM;
2344 		err = copy_from_user(&rtmsg, arg,
2345 				     sizeof(struct in6_rtmsg));
2346 		if (err)
2347 			return -EFAULT;
2348 
2349 		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2350 
2351 		rtnl_lock();
2352 		switch (cmd) {
2353 		case SIOCADDRT:
2354 			err = ip6_route_add(&cfg);
2355 			break;
2356 		case SIOCDELRT:
2357 			err = ip6_route_del(&cfg);
2358 			break;
2359 		default:
2360 			err = -EINVAL;
2361 		}
2362 		rtnl_unlock();
2363 
2364 		return err;
2365 	}
2366 
2367 	return -EINVAL;
2368 }
2369 
2370 /*
2371  *	Drop the packet on the floor
2372  */
2373 
2374 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2375 {
2376 	int type;
2377 	struct dst_entry *dst = skb_dst(skb);
2378 	switch (ipstats_mib_noroutes) {
2379 	case IPSTATS_MIB_INNOROUTES:
2380 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2381 		if (type == IPV6_ADDR_ANY) {
2382 			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2383 				      IPSTATS_MIB_INADDRERRORS);
2384 			break;
2385 		}
2386 		/* FALLTHROUGH */
2387 	case IPSTATS_MIB_OUTNOROUTES:
2388 		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2389 			      ipstats_mib_noroutes);
2390 		break;
2391 	}
2392 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2393 	kfree_skb(skb);
2394 	return 0;
2395 }
2396 
2397 static int ip6_pkt_discard(struct sk_buff *skb)
2398 {
2399 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2400 }
2401 
2402 static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb)
2403 {
2404 	skb->dev = skb_dst(skb)->dev;
2405 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2406 }
2407 
2408 static int ip6_pkt_prohibit(struct sk_buff *skb)
2409 {
2410 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2411 }
2412 
2413 static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb)
2414 {
2415 	skb->dev = skb_dst(skb)->dev;
2416 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2417 }
2418 
2419 /*
2420  *	Allocate a dst for local (unicast / anycast) address.
2421  */
2422 
2423 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2424 				    const struct in6_addr *addr,
2425 				    bool anycast)
2426 {
2427 	struct net *net = dev_net(idev->dev);
2428 	struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
2429 					    DST_NOCOUNT, NULL);
2430 	if (!rt)
2431 		return ERR_PTR(-ENOMEM);
2432 
2433 	in6_dev_hold(idev);
2434 
2435 	rt->dst.flags |= DST_HOST;
2436 	rt->dst.input = ip6_input;
2437 	rt->dst.output = ip6_output;
2438 	rt->rt6i_idev = idev;
2439 
2440 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2441 	if (anycast)
2442 		rt->rt6i_flags |= RTF_ANYCAST;
2443 	else
2444 		rt->rt6i_flags |= RTF_LOCAL;
2445 
2446 	rt->rt6i_gateway  = *addr;
2447 	rt->rt6i_dst.addr = *addr;
2448 	rt->rt6i_dst.plen = 128;
2449 	rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2450 
2451 	atomic_set(&rt->dst.__refcnt, 1);
2452 
2453 	return rt;
2454 }
2455 
2456 int ip6_route_get_saddr(struct net *net,
2457 			struct rt6_info *rt,
2458 			const struct in6_addr *daddr,
2459 			unsigned int prefs,
2460 			struct in6_addr *saddr)
2461 {
2462 	struct inet6_dev *idev =
2463 		rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
2464 	int err = 0;
2465 	if (rt && rt->rt6i_prefsrc.plen)
2466 		*saddr = rt->rt6i_prefsrc.addr;
2467 	else
2468 		err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2469 					 daddr, prefs, saddr);
2470 	return err;
2471 }
2472 
2473 /* remove deleted ip from prefsrc entries */
2474 struct arg_dev_net_ip {
2475 	struct net_device *dev;
2476 	struct net *net;
2477 	struct in6_addr *addr;
2478 };
2479 
2480 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2481 {
2482 	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2483 	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2484 	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2485 
2486 	if (((void *)rt->dst.dev == dev || !dev) &&
2487 	    rt != net->ipv6.ip6_null_entry &&
2488 	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2489 		/* remove prefsrc entry */
2490 		rt->rt6i_prefsrc.plen = 0;
2491 	}
2492 	return 0;
2493 }
2494 
2495 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2496 {
2497 	struct net *net = dev_net(ifp->idev->dev);
2498 	struct arg_dev_net_ip adni = {
2499 		.dev = ifp->idev->dev,
2500 		.net = net,
2501 		.addr = &ifp->addr,
2502 	};
2503 	fib6_clean_all(net, fib6_remove_prefsrc, &adni);
2504 }
2505 
2506 #define RTF_RA_ROUTER		(RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2507 #define RTF_CACHE_GATEWAY	(RTF_GATEWAY | RTF_CACHE)
2508 
2509 /* Remove routers and update dst entries when gateway turn into host. */
2510 static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2511 {
2512 	struct in6_addr *gateway = (struct in6_addr *)arg;
2513 
2514 	if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2515 	     ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2516 	     ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2517 		return -1;
2518 	}
2519 	return 0;
2520 }
2521 
2522 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2523 {
2524 	fib6_clean_all(net, fib6_clean_tohost, gateway);
2525 }
2526 
2527 struct arg_dev_net {
2528 	struct net_device *dev;
2529 	struct net *net;
2530 };
2531 
2532 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2533 {
2534 	const struct arg_dev_net *adn = arg;
2535 	const struct net_device *dev = adn->dev;
2536 
2537 	if ((rt->dst.dev == dev || !dev) &&
2538 	    rt != adn->net->ipv6.ip6_null_entry)
2539 		return -1;
2540 
2541 	return 0;
2542 }
2543 
2544 void rt6_ifdown(struct net *net, struct net_device *dev)
2545 {
2546 	struct arg_dev_net adn = {
2547 		.dev = dev,
2548 		.net = net,
2549 	};
2550 
2551 	fib6_clean_all(net, fib6_ifdown, &adn);
2552 	icmp6_clean_all(fib6_ifdown, &adn);
2553 	rt6_uncached_list_flush_dev(net, dev);
2554 }
2555 
2556 struct rt6_mtu_change_arg {
2557 	struct net_device *dev;
2558 	unsigned int mtu;
2559 };
2560 
2561 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2562 {
2563 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2564 	struct inet6_dev *idev;
2565 
2566 	/* In IPv6 pmtu discovery is not optional,
2567 	   so that RTAX_MTU lock cannot disable it.
2568 	   We still use this lock to block changes
2569 	   caused by addrconf/ndisc.
2570 	*/
2571 
2572 	idev = __in6_dev_get(arg->dev);
2573 	if (!idev)
2574 		return 0;
2575 
2576 	/* For administrative MTU increase, there is no way to discover
2577 	   IPv6 PMTU increase, so PMTU increase should be updated here.
2578 	   Since RFC 1981 doesn't include administrative MTU increase
2579 	   update PMTU increase is a MUST. (i.e. jumbo frame)
2580 	 */
2581 	/*
2582 	   If new MTU is less than route PMTU, this new MTU will be the
2583 	   lowest MTU in the path, update the route PMTU to reflect PMTU
2584 	   decreases; if new MTU is greater than route PMTU, and the
2585 	   old MTU is the lowest MTU in the path, update the route PMTU
2586 	   to reflect the increase. In this case if the other nodes' MTU
2587 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
2588 	   PMTU discouvery.
2589 	 */
2590 	if (rt->dst.dev == arg->dev &&
2591 	    !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2592 		if (rt->rt6i_flags & RTF_CACHE) {
2593 			/* For RTF_CACHE with rt6i_pmtu == 0
2594 			 * (i.e. a redirected route),
2595 			 * the metrics of its rt->dst.from has already
2596 			 * been updated.
2597 			 */
2598 			if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2599 				rt->rt6i_pmtu = arg->mtu;
2600 		} else if (dst_mtu(&rt->dst) >= arg->mtu ||
2601 			   (dst_mtu(&rt->dst) < arg->mtu &&
2602 			    dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2603 			dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2604 		}
2605 	}
2606 	return 0;
2607 }
2608 
2609 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2610 {
2611 	struct rt6_mtu_change_arg arg = {
2612 		.dev = dev,
2613 		.mtu = mtu,
2614 	};
2615 
2616 	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
2617 }
2618 
2619 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2620 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2621 	[RTA_OIF]               = { .type = NLA_U32 },
2622 	[RTA_IIF]		= { .type = NLA_U32 },
2623 	[RTA_PRIORITY]          = { .type = NLA_U32 },
2624 	[RTA_METRICS]           = { .type = NLA_NESTED },
2625 	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
2626 	[RTA_PREF]              = { .type = NLA_U8 },
2627 	[RTA_ENCAP_TYPE]	= { .type = NLA_U16 },
2628 	[RTA_ENCAP]		= { .type = NLA_NESTED },
2629 };
2630 
2631 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2632 			      struct fib6_config *cfg)
2633 {
2634 	struct rtmsg *rtm;
2635 	struct nlattr *tb[RTA_MAX+1];
2636 	unsigned int pref;
2637 	int err;
2638 
2639 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2640 	if (err < 0)
2641 		goto errout;
2642 
2643 	err = -EINVAL;
2644 	rtm = nlmsg_data(nlh);
2645 	memset(cfg, 0, sizeof(*cfg));
2646 
2647 	cfg->fc_table = rtm->rtm_table;
2648 	cfg->fc_dst_len = rtm->rtm_dst_len;
2649 	cfg->fc_src_len = rtm->rtm_src_len;
2650 	cfg->fc_flags = RTF_UP;
2651 	cfg->fc_protocol = rtm->rtm_protocol;
2652 	cfg->fc_type = rtm->rtm_type;
2653 
2654 	if (rtm->rtm_type == RTN_UNREACHABLE ||
2655 	    rtm->rtm_type == RTN_BLACKHOLE ||
2656 	    rtm->rtm_type == RTN_PROHIBIT ||
2657 	    rtm->rtm_type == RTN_THROW)
2658 		cfg->fc_flags |= RTF_REJECT;
2659 
2660 	if (rtm->rtm_type == RTN_LOCAL)
2661 		cfg->fc_flags |= RTF_LOCAL;
2662 
2663 	if (rtm->rtm_flags & RTM_F_CLONED)
2664 		cfg->fc_flags |= RTF_CACHE;
2665 
2666 	cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2667 	cfg->fc_nlinfo.nlh = nlh;
2668 	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2669 
2670 	if (tb[RTA_GATEWAY]) {
2671 		cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
2672 		cfg->fc_flags |= RTF_GATEWAY;
2673 	}
2674 
2675 	if (tb[RTA_DST]) {
2676 		int plen = (rtm->rtm_dst_len + 7) >> 3;
2677 
2678 		if (nla_len(tb[RTA_DST]) < plen)
2679 			goto errout;
2680 
2681 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2682 	}
2683 
2684 	if (tb[RTA_SRC]) {
2685 		int plen = (rtm->rtm_src_len + 7) >> 3;
2686 
2687 		if (nla_len(tb[RTA_SRC]) < plen)
2688 			goto errout;
2689 
2690 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2691 	}
2692 
2693 	if (tb[RTA_PREFSRC])
2694 		cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
2695 
2696 	if (tb[RTA_OIF])
2697 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2698 
2699 	if (tb[RTA_PRIORITY])
2700 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2701 
2702 	if (tb[RTA_METRICS]) {
2703 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2704 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2705 	}
2706 
2707 	if (tb[RTA_TABLE])
2708 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2709 
2710 	if (tb[RTA_MULTIPATH]) {
2711 		cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2712 		cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2713 	}
2714 
2715 	if (tb[RTA_PREF]) {
2716 		pref = nla_get_u8(tb[RTA_PREF]);
2717 		if (pref != ICMPV6_ROUTER_PREF_LOW &&
2718 		    pref != ICMPV6_ROUTER_PREF_HIGH)
2719 			pref = ICMPV6_ROUTER_PREF_MEDIUM;
2720 		cfg->fc_flags |= RTF_PREF(pref);
2721 	}
2722 
2723 	if (tb[RTA_ENCAP])
2724 		cfg->fc_encap = tb[RTA_ENCAP];
2725 
2726 	if (tb[RTA_ENCAP_TYPE])
2727 		cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
2728 
2729 	err = 0;
2730 errout:
2731 	return err;
2732 }
2733 
2734 static int ip6_route_multipath(struct fib6_config *cfg, int add)
2735 {
2736 	struct fib6_config r_cfg;
2737 	struct rtnexthop *rtnh;
2738 	int remaining;
2739 	int attrlen;
2740 	int err = 0, last_err = 0;
2741 
2742 	remaining = cfg->fc_mp_len;
2743 beginning:
2744 	rtnh = (struct rtnexthop *)cfg->fc_mp;
2745 
2746 	/* Parse a Multipath Entry */
2747 	while (rtnh_ok(rtnh, remaining)) {
2748 		memcpy(&r_cfg, cfg, sizeof(*cfg));
2749 		if (rtnh->rtnh_ifindex)
2750 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2751 
2752 		attrlen = rtnh_attrlen(rtnh);
2753 		if (attrlen > 0) {
2754 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2755 
2756 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2757 			if (nla) {
2758 				r_cfg.fc_gateway = nla_get_in6_addr(nla);
2759 				r_cfg.fc_flags |= RTF_GATEWAY;
2760 			}
2761 			r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
2762 			nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
2763 			if (nla)
2764 				r_cfg.fc_encap_type = nla_get_u16(nla);
2765 		}
2766 		err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
2767 		if (err) {
2768 			last_err = err;
2769 			/* If we are trying to remove a route, do not stop the
2770 			 * loop when ip6_route_del() fails (because next hop is
2771 			 * already gone), we should try to remove all next hops.
2772 			 */
2773 			if (add) {
2774 				/* If add fails, we should try to delete all
2775 				 * next hops that have been already added.
2776 				 */
2777 				add = 0;
2778 				remaining = cfg->fc_mp_len - remaining;
2779 				goto beginning;
2780 			}
2781 		}
2782 		/* Because each route is added like a single route we remove
2783 		 * these flags after the first nexthop: if there is a collision,
2784 		 * we have already failed to add the first nexthop:
2785 		 * fib6_add_rt2node() has rejected it; when replacing, old
2786 		 * nexthops have been replaced by first new, the rest should
2787 		 * be added to it.
2788 		 */
2789 		cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
2790 						     NLM_F_REPLACE);
2791 		rtnh = rtnh_next(rtnh, &remaining);
2792 	}
2793 
2794 	return last_err;
2795 }
2796 
2797 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
2798 {
2799 	struct fib6_config cfg;
2800 	int err;
2801 
2802 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2803 	if (err < 0)
2804 		return err;
2805 
2806 	if (cfg.fc_mp)
2807 		return ip6_route_multipath(&cfg, 0);
2808 	else
2809 		return ip6_route_del(&cfg);
2810 }
2811 
2812 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
2813 {
2814 	struct fib6_config cfg;
2815 	int err;
2816 
2817 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2818 	if (err < 0)
2819 		return err;
2820 
2821 	if (cfg.fc_mp)
2822 		return ip6_route_multipath(&cfg, 1);
2823 	else
2824 		return ip6_route_add(&cfg);
2825 }
2826 
2827 static inline size_t rt6_nlmsg_size(struct rt6_info *rt)
2828 {
2829 	return NLMSG_ALIGN(sizeof(struct rtmsg))
2830 	       + nla_total_size(16) /* RTA_SRC */
2831 	       + nla_total_size(16) /* RTA_DST */
2832 	       + nla_total_size(16) /* RTA_GATEWAY */
2833 	       + nla_total_size(16) /* RTA_PREFSRC */
2834 	       + nla_total_size(4) /* RTA_TABLE */
2835 	       + nla_total_size(4) /* RTA_IIF */
2836 	       + nla_total_size(4) /* RTA_OIF */
2837 	       + nla_total_size(4) /* RTA_PRIORITY */
2838 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2839 	       + nla_total_size(sizeof(struct rta_cacheinfo))
2840 	       + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
2841 	       + nla_total_size(1) /* RTA_PREF */
2842 	       + lwtunnel_get_encap_size(rt->dst.lwtstate);
2843 }
2844 
2845 static int rt6_fill_node(struct net *net,
2846 			 struct sk_buff *skb, struct rt6_info *rt,
2847 			 struct in6_addr *dst, struct in6_addr *src,
2848 			 int iif, int type, u32 portid, u32 seq,
2849 			 int prefix, int nowait, unsigned int flags)
2850 {
2851 	u32 metrics[RTAX_MAX];
2852 	struct rtmsg *rtm;
2853 	struct nlmsghdr *nlh;
2854 	long expires;
2855 	u32 table;
2856 
2857 	if (prefix) {	/* user wants prefix routes only */
2858 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2859 			/* success since this is not a prefix route */
2860 			return 1;
2861 		}
2862 	}
2863 
2864 	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
2865 	if (!nlh)
2866 		return -EMSGSIZE;
2867 
2868 	rtm = nlmsg_data(nlh);
2869 	rtm->rtm_family = AF_INET6;
2870 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
2871 	rtm->rtm_src_len = rt->rt6i_src.plen;
2872 	rtm->rtm_tos = 0;
2873 	if (rt->rt6i_table)
2874 		table = rt->rt6i_table->tb6_id;
2875 	else
2876 		table = RT6_TABLE_UNSPEC;
2877 	rtm->rtm_table = table;
2878 	if (nla_put_u32(skb, RTA_TABLE, table))
2879 		goto nla_put_failure;
2880 	if (rt->rt6i_flags & RTF_REJECT) {
2881 		switch (rt->dst.error) {
2882 		case -EINVAL:
2883 			rtm->rtm_type = RTN_BLACKHOLE;
2884 			break;
2885 		case -EACCES:
2886 			rtm->rtm_type = RTN_PROHIBIT;
2887 			break;
2888 		case -EAGAIN:
2889 			rtm->rtm_type = RTN_THROW;
2890 			break;
2891 		default:
2892 			rtm->rtm_type = RTN_UNREACHABLE;
2893 			break;
2894 		}
2895 	}
2896 	else if (rt->rt6i_flags & RTF_LOCAL)
2897 		rtm->rtm_type = RTN_LOCAL;
2898 	else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2899 		rtm->rtm_type = RTN_LOCAL;
2900 	else
2901 		rtm->rtm_type = RTN_UNICAST;
2902 	rtm->rtm_flags = 0;
2903 	if (!netif_carrier_ok(rt->dst.dev)) {
2904 		rtm->rtm_flags |= RTNH_F_LINKDOWN;
2905 		if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
2906 			rtm->rtm_flags |= RTNH_F_DEAD;
2907 	}
2908 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2909 	rtm->rtm_protocol = rt->rt6i_protocol;
2910 	if (rt->rt6i_flags & RTF_DYNAMIC)
2911 		rtm->rtm_protocol = RTPROT_REDIRECT;
2912 	else if (rt->rt6i_flags & RTF_ADDRCONF) {
2913 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2914 			rtm->rtm_protocol = RTPROT_RA;
2915 		else
2916 			rtm->rtm_protocol = RTPROT_KERNEL;
2917 	}
2918 
2919 	if (rt->rt6i_flags & RTF_CACHE)
2920 		rtm->rtm_flags |= RTM_F_CLONED;
2921 
2922 	if (dst) {
2923 		if (nla_put_in6_addr(skb, RTA_DST, dst))
2924 			goto nla_put_failure;
2925 		rtm->rtm_dst_len = 128;
2926 	} else if (rtm->rtm_dst_len)
2927 		if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
2928 			goto nla_put_failure;
2929 #ifdef CONFIG_IPV6_SUBTREES
2930 	if (src) {
2931 		if (nla_put_in6_addr(skb, RTA_SRC, src))
2932 			goto nla_put_failure;
2933 		rtm->rtm_src_len = 128;
2934 	} else if (rtm->rtm_src_len &&
2935 		   nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
2936 		goto nla_put_failure;
2937 #endif
2938 	if (iif) {
2939 #ifdef CONFIG_IPV6_MROUTE
2940 		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2941 			int err = ip6mr_get_route(net, skb, rtm, nowait);
2942 			if (err <= 0) {
2943 				if (!nowait) {
2944 					if (err == 0)
2945 						return 0;
2946 					goto nla_put_failure;
2947 				} else {
2948 					if (err == -EMSGSIZE)
2949 						goto nla_put_failure;
2950 				}
2951 			}
2952 		} else
2953 #endif
2954 			if (nla_put_u32(skb, RTA_IIF, iif))
2955 				goto nla_put_failure;
2956 	} else if (dst) {
2957 		struct in6_addr saddr_buf;
2958 		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2959 		    nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
2960 			goto nla_put_failure;
2961 	}
2962 
2963 	if (rt->rt6i_prefsrc.plen) {
2964 		struct in6_addr saddr_buf;
2965 		saddr_buf = rt->rt6i_prefsrc.addr;
2966 		if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
2967 			goto nla_put_failure;
2968 	}
2969 
2970 	memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
2971 	if (rt->rt6i_pmtu)
2972 		metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
2973 	if (rtnetlink_put_metrics(skb, metrics) < 0)
2974 		goto nla_put_failure;
2975 
2976 	if (rt->rt6i_flags & RTF_GATEWAY) {
2977 		if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
2978 			goto nla_put_failure;
2979 	}
2980 
2981 	if (rt->dst.dev &&
2982 	    nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2983 		goto nla_put_failure;
2984 	if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2985 		goto nla_put_failure;
2986 
2987 	expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2988 
2989 	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2990 		goto nla_put_failure;
2991 
2992 	if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
2993 		goto nla_put_failure;
2994 
2995 	lwtunnel_fill_encap(skb, rt->dst.lwtstate);
2996 
2997 	nlmsg_end(skb, nlh);
2998 	return 0;
2999 
3000 nla_put_failure:
3001 	nlmsg_cancel(skb, nlh);
3002 	return -EMSGSIZE;
3003 }
3004 
3005 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
3006 {
3007 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
3008 	int prefix;
3009 
3010 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
3011 		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
3012 		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
3013 	} else
3014 		prefix = 0;
3015 
3016 	return rt6_fill_node(arg->net,
3017 		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
3018 		     NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
3019 		     prefix, 0, NLM_F_MULTI);
3020 }
3021 
3022 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
3023 {
3024 	struct net *net = sock_net(in_skb->sk);
3025 	struct nlattr *tb[RTA_MAX+1];
3026 	struct rt6_info *rt;
3027 	struct sk_buff *skb;
3028 	struct rtmsg *rtm;
3029 	struct flowi6 fl6;
3030 	int err, iif = 0, oif = 0;
3031 
3032 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
3033 	if (err < 0)
3034 		goto errout;
3035 
3036 	err = -EINVAL;
3037 	memset(&fl6, 0, sizeof(fl6));
3038 
3039 	if (tb[RTA_SRC]) {
3040 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
3041 			goto errout;
3042 
3043 		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
3044 	}
3045 
3046 	if (tb[RTA_DST]) {
3047 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
3048 			goto errout;
3049 
3050 		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
3051 	}
3052 
3053 	if (tb[RTA_IIF])
3054 		iif = nla_get_u32(tb[RTA_IIF]);
3055 
3056 	if (tb[RTA_OIF])
3057 		oif = nla_get_u32(tb[RTA_OIF]);
3058 
3059 	if (tb[RTA_MARK])
3060 		fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
3061 
3062 	if (iif) {
3063 		struct net_device *dev;
3064 		int flags = 0;
3065 
3066 		dev = __dev_get_by_index(net, iif);
3067 		if (!dev) {
3068 			err = -ENODEV;
3069 			goto errout;
3070 		}
3071 
3072 		fl6.flowi6_iif = iif;
3073 
3074 		if (!ipv6_addr_any(&fl6.saddr))
3075 			flags |= RT6_LOOKUP_F_HAS_SADDR;
3076 
3077 		rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
3078 							       flags);
3079 	} else {
3080 		fl6.flowi6_oif = oif;
3081 
3082 		rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
3083 	}
3084 
3085 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
3086 	if (!skb) {
3087 		ip6_rt_put(rt);
3088 		err = -ENOBUFS;
3089 		goto errout;
3090 	}
3091 
3092 	/* Reserve room for dummy headers, this skb can pass
3093 	   through good chunk of routing engine.
3094 	 */
3095 	skb_reset_mac_header(skb);
3096 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
3097 
3098 	skb_dst_set(skb, &rt->dst);
3099 
3100 	err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
3101 			    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
3102 			    nlh->nlmsg_seq, 0, 0, 0);
3103 	if (err < 0) {
3104 		kfree_skb(skb);
3105 		goto errout;
3106 	}
3107 
3108 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
3109 errout:
3110 	return err;
3111 }
3112 
3113 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
3114 {
3115 	struct sk_buff *skb;
3116 	struct net *net = info->nl_net;
3117 	u32 seq;
3118 	int err;
3119 
3120 	err = -ENOBUFS;
3121 	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3122 
3123 	skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3124 	if (!skb)
3125 		goto errout;
3126 
3127 	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
3128 				event, info->portid, seq, 0, 0, 0);
3129 	if (err < 0) {
3130 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
3131 		WARN_ON(err == -EMSGSIZE);
3132 		kfree_skb(skb);
3133 		goto errout;
3134 	}
3135 	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
3136 		    info->nlh, gfp_any());
3137 	return;
3138 errout:
3139 	if (err < 0)
3140 		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
3141 }
3142 
3143 static int ip6_route_dev_notify(struct notifier_block *this,
3144 				unsigned long event, void *ptr)
3145 {
3146 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
3147 	struct net *net = dev_net(dev);
3148 
3149 	if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
3150 		net->ipv6.ip6_null_entry->dst.dev = dev;
3151 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
3152 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3153 		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
3154 		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
3155 		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
3156 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
3157 #endif
3158 	}
3159 
3160 	return NOTIFY_OK;
3161 }
3162 
3163 /*
3164  *	/proc
3165  */
3166 
3167 #ifdef CONFIG_PROC_FS
3168 
3169 static const struct file_operations ipv6_route_proc_fops = {
3170 	.owner		= THIS_MODULE,
3171 	.open		= ipv6_route_open,
3172 	.read		= seq_read,
3173 	.llseek		= seq_lseek,
3174 	.release	= seq_release_net,
3175 };
3176 
3177 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
3178 {
3179 	struct net *net = (struct net *)seq->private;
3180 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
3181 		   net->ipv6.rt6_stats->fib_nodes,
3182 		   net->ipv6.rt6_stats->fib_route_nodes,
3183 		   net->ipv6.rt6_stats->fib_rt_alloc,
3184 		   net->ipv6.rt6_stats->fib_rt_entries,
3185 		   net->ipv6.rt6_stats->fib_rt_cache,
3186 		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
3187 		   net->ipv6.rt6_stats->fib_discarded_routes);
3188 
3189 	return 0;
3190 }
3191 
3192 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
3193 {
3194 	return single_open_net(inode, file, rt6_stats_seq_show);
3195 }
3196 
3197 static const struct file_operations rt6_stats_seq_fops = {
3198 	.owner	 = THIS_MODULE,
3199 	.open	 = rt6_stats_seq_open,
3200 	.read	 = seq_read,
3201 	.llseek	 = seq_lseek,
3202 	.release = single_release_net,
3203 };
3204 #endif	/* CONFIG_PROC_FS */
3205 
3206 #ifdef CONFIG_SYSCTL
3207 
3208 static
3209 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
3210 			      void __user *buffer, size_t *lenp, loff_t *ppos)
3211 {
3212 	struct net *net;
3213 	int delay;
3214 	if (!write)
3215 		return -EINVAL;
3216 
3217 	net = (struct net *)ctl->extra1;
3218 	delay = net->ipv6.sysctl.flush_delay;
3219 	proc_dointvec(ctl, write, buffer, lenp, ppos);
3220 	fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
3221 	return 0;
3222 }
3223 
3224 struct ctl_table ipv6_route_table_template[] = {
3225 	{
3226 		.procname	=	"flush",
3227 		.data		=	&init_net.ipv6.sysctl.flush_delay,
3228 		.maxlen		=	sizeof(int),
3229 		.mode		=	0200,
3230 		.proc_handler	=	ipv6_sysctl_rtcache_flush
3231 	},
3232 	{
3233 		.procname	=	"gc_thresh",
3234 		.data		=	&ip6_dst_ops_template.gc_thresh,
3235 		.maxlen		=	sizeof(int),
3236 		.mode		=	0644,
3237 		.proc_handler	=	proc_dointvec,
3238 	},
3239 	{
3240 		.procname	=	"max_size",
3241 		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
3242 		.maxlen		=	sizeof(int),
3243 		.mode		=	0644,
3244 		.proc_handler	=	proc_dointvec,
3245 	},
3246 	{
3247 		.procname	=	"gc_min_interval",
3248 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3249 		.maxlen		=	sizeof(int),
3250 		.mode		=	0644,
3251 		.proc_handler	=	proc_dointvec_jiffies,
3252 	},
3253 	{
3254 		.procname	=	"gc_timeout",
3255 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
3256 		.maxlen		=	sizeof(int),
3257 		.mode		=	0644,
3258 		.proc_handler	=	proc_dointvec_jiffies,
3259 	},
3260 	{
3261 		.procname	=	"gc_interval",
3262 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
3263 		.maxlen		=	sizeof(int),
3264 		.mode		=	0644,
3265 		.proc_handler	=	proc_dointvec_jiffies,
3266 	},
3267 	{
3268 		.procname	=	"gc_elasticity",
3269 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
3270 		.maxlen		=	sizeof(int),
3271 		.mode		=	0644,
3272 		.proc_handler	=	proc_dointvec,
3273 	},
3274 	{
3275 		.procname	=	"mtu_expires",
3276 		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
3277 		.maxlen		=	sizeof(int),
3278 		.mode		=	0644,
3279 		.proc_handler	=	proc_dointvec_jiffies,
3280 	},
3281 	{
3282 		.procname	=	"min_adv_mss",
3283 		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
3284 		.maxlen		=	sizeof(int),
3285 		.mode		=	0644,
3286 		.proc_handler	=	proc_dointvec,
3287 	},
3288 	{
3289 		.procname	=	"gc_min_interval_ms",
3290 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3291 		.maxlen		=	sizeof(int),
3292 		.mode		=	0644,
3293 		.proc_handler	=	proc_dointvec_ms_jiffies,
3294 	},
3295 	{ }
3296 };
3297 
3298 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
3299 {
3300 	struct ctl_table *table;
3301 
3302 	table = kmemdup(ipv6_route_table_template,
3303 			sizeof(ipv6_route_table_template),
3304 			GFP_KERNEL);
3305 
3306 	if (table) {
3307 		table[0].data = &net->ipv6.sysctl.flush_delay;
3308 		table[0].extra1 = net;
3309 		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
3310 		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3311 		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3312 		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3313 		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3314 		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3315 		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3316 		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
3317 		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3318 
3319 		/* Don't export sysctls to unprivileged users */
3320 		if (net->user_ns != &init_user_ns)
3321 			table[0].procname = NULL;
3322 	}
3323 
3324 	return table;
3325 }
3326 #endif
3327 
3328 static int __net_init ip6_route_net_init(struct net *net)
3329 {
3330 	int ret = -ENOMEM;
3331 
3332 	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3333 	       sizeof(net->ipv6.ip6_dst_ops));
3334 
3335 	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3336 		goto out_ip6_dst_ops;
3337 
3338 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3339 					   sizeof(*net->ipv6.ip6_null_entry),
3340 					   GFP_KERNEL);
3341 	if (!net->ipv6.ip6_null_entry)
3342 		goto out_ip6_dst_entries;
3343 	net->ipv6.ip6_null_entry->dst.path =
3344 		(struct dst_entry *)net->ipv6.ip6_null_entry;
3345 	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3346 	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3347 			 ip6_template_metrics, true);
3348 
3349 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3350 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3351 					       sizeof(*net->ipv6.ip6_prohibit_entry),
3352 					       GFP_KERNEL);
3353 	if (!net->ipv6.ip6_prohibit_entry)
3354 		goto out_ip6_null_entry;
3355 	net->ipv6.ip6_prohibit_entry->dst.path =
3356 		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3357 	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3358 	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3359 			 ip6_template_metrics, true);
3360 
3361 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3362 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
3363 					       GFP_KERNEL);
3364 	if (!net->ipv6.ip6_blk_hole_entry)
3365 		goto out_ip6_prohibit_entry;
3366 	net->ipv6.ip6_blk_hole_entry->dst.path =
3367 		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3368 	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3369 	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3370 			 ip6_template_metrics, true);
3371 #endif
3372 
3373 	net->ipv6.sysctl.flush_delay = 0;
3374 	net->ipv6.sysctl.ip6_rt_max_size = 4096;
3375 	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3376 	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3377 	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3378 	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3379 	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3380 	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3381 
3382 	net->ipv6.ip6_rt_gc_expire = 30*HZ;
3383 
3384 	ret = 0;
3385 out:
3386 	return ret;
3387 
3388 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3389 out_ip6_prohibit_entry:
3390 	kfree(net->ipv6.ip6_prohibit_entry);
3391 out_ip6_null_entry:
3392 	kfree(net->ipv6.ip6_null_entry);
3393 #endif
3394 out_ip6_dst_entries:
3395 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3396 out_ip6_dst_ops:
3397 	goto out;
3398 }
3399 
3400 static void __net_exit ip6_route_net_exit(struct net *net)
3401 {
3402 	kfree(net->ipv6.ip6_null_entry);
3403 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3404 	kfree(net->ipv6.ip6_prohibit_entry);
3405 	kfree(net->ipv6.ip6_blk_hole_entry);
3406 #endif
3407 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3408 }
3409 
3410 static int __net_init ip6_route_net_init_late(struct net *net)
3411 {
3412 #ifdef CONFIG_PROC_FS
3413 	proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3414 	proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3415 #endif
3416 	return 0;
3417 }
3418 
3419 static void __net_exit ip6_route_net_exit_late(struct net *net)
3420 {
3421 #ifdef CONFIG_PROC_FS
3422 	remove_proc_entry("ipv6_route", net->proc_net);
3423 	remove_proc_entry("rt6_stats", net->proc_net);
3424 #endif
3425 }
3426 
3427 static struct pernet_operations ip6_route_net_ops = {
3428 	.init = ip6_route_net_init,
3429 	.exit = ip6_route_net_exit,
3430 };
3431 
3432 static int __net_init ipv6_inetpeer_init(struct net *net)
3433 {
3434 	struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3435 
3436 	if (!bp)
3437 		return -ENOMEM;
3438 	inet_peer_base_init(bp);
3439 	net->ipv6.peers = bp;
3440 	return 0;
3441 }
3442 
3443 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3444 {
3445 	struct inet_peer_base *bp = net->ipv6.peers;
3446 
3447 	net->ipv6.peers = NULL;
3448 	inetpeer_invalidate_tree(bp);
3449 	kfree(bp);
3450 }
3451 
3452 static struct pernet_operations ipv6_inetpeer_ops = {
3453 	.init	=	ipv6_inetpeer_init,
3454 	.exit	=	ipv6_inetpeer_exit,
3455 };
3456 
3457 static struct pernet_operations ip6_route_net_late_ops = {
3458 	.init = ip6_route_net_init_late,
3459 	.exit = ip6_route_net_exit_late,
3460 };
3461 
3462 static struct notifier_block ip6_route_dev_notifier = {
3463 	.notifier_call = ip6_route_dev_notify,
3464 	.priority = 0,
3465 };
3466 
3467 int __init ip6_route_init(void)
3468 {
3469 	int ret;
3470 	int cpu;
3471 
3472 	ret = -ENOMEM;
3473 	ip6_dst_ops_template.kmem_cachep =
3474 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3475 				  SLAB_HWCACHE_ALIGN, NULL);
3476 	if (!ip6_dst_ops_template.kmem_cachep)
3477 		goto out;
3478 
3479 	ret = dst_entries_init(&ip6_dst_blackhole_ops);
3480 	if (ret)
3481 		goto out_kmem_cache;
3482 
3483 	ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3484 	if (ret)
3485 		goto out_dst_entries;
3486 
3487 	ret = register_pernet_subsys(&ip6_route_net_ops);
3488 	if (ret)
3489 		goto out_register_inetpeer;
3490 
3491 	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3492 
3493 	/* Registering of the loopback is done before this portion of code,
3494 	 * the loopback reference in rt6_info will not be taken, do it
3495 	 * manually for init_net */
3496 	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3497 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3498   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3499 	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3500 	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3501 	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3502 	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3503   #endif
3504 	ret = fib6_init();
3505 	if (ret)
3506 		goto out_register_subsys;
3507 
3508 	ret = xfrm6_init();
3509 	if (ret)
3510 		goto out_fib6_init;
3511 
3512 	ret = fib6_rules_init();
3513 	if (ret)
3514 		goto xfrm6_init;
3515 
3516 	ret = register_pernet_subsys(&ip6_route_net_late_ops);
3517 	if (ret)
3518 		goto fib6_rules_init;
3519 
3520 	ret = -ENOBUFS;
3521 	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3522 	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3523 	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3524 		goto out_register_late_subsys;
3525 
3526 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3527 	if (ret)
3528 		goto out_register_late_subsys;
3529 
3530 	for_each_possible_cpu(cpu) {
3531 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
3532 
3533 		INIT_LIST_HEAD(&ul->head);
3534 		spin_lock_init(&ul->lock);
3535 	}
3536 
3537 out:
3538 	return ret;
3539 
3540 out_register_late_subsys:
3541 	unregister_pernet_subsys(&ip6_route_net_late_ops);
3542 fib6_rules_init:
3543 	fib6_rules_cleanup();
3544 xfrm6_init:
3545 	xfrm6_fini();
3546 out_fib6_init:
3547 	fib6_gc_cleanup();
3548 out_register_subsys:
3549 	unregister_pernet_subsys(&ip6_route_net_ops);
3550 out_register_inetpeer:
3551 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3552 out_dst_entries:
3553 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3554 out_kmem_cache:
3555 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3556 	goto out;
3557 }
3558 
3559 void ip6_route_cleanup(void)
3560 {
3561 	unregister_netdevice_notifier(&ip6_route_dev_notifier);
3562 	unregister_pernet_subsys(&ip6_route_net_late_ops);
3563 	fib6_rules_cleanup();
3564 	xfrm6_fini();
3565 	fib6_gc_cleanup();
3566 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3567 	unregister_pernet_subsys(&ip6_route_net_ops);
3568 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3569 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3570 }
3571