xref: /openbmc/linux/net/ipv6/route.c (revision 904af04d30f303d96902584206457128c3051d8d)
1 /*
2  *	Linux INET6 implementation
3  *	FIB front-end.
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13 
14 /*	Changes:
15  *
16  *	YOSHIFUJI Hideaki @USAGI
17  *		reworked default router selection.
18  *		- respect outgoing interface
19  *		- select from (probably) reachable routers (i.e.
20  *		routers in REACHABLE, STALE, DELAY or PROBE states).
21  *		- always select the same router if it is (probably)
22  *		reachable.  otherwise, round-robin the list.
23  *	Ville Nuorvala
24  *		Fixed routing subtrees.
25  */
26 
27 #define pr_fmt(fmt) "IPv6: " fmt
28 
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/dst_metadata.h>
58 #include <net/xfrm.h>
59 #include <net/netevent.h>
60 #include <net/netlink.h>
61 #include <net/nexthop.h>
62 #include <net/lwtunnel.h>
63 #include <net/ip_tunnels.h>
64 
65 #include <asm/uaccess.h>
66 
67 #ifdef CONFIG_SYSCTL
68 #include <linux/sysctl.h>
69 #endif
70 
71 enum rt6_nud_state {
72 	RT6_NUD_FAIL_HARD = -3,
73 	RT6_NUD_FAIL_PROBE = -2,
74 	RT6_NUD_FAIL_DO_RR = -1,
75 	RT6_NUD_SUCCEED = 1
76 };
77 
78 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
79 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
80 static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
81 static unsigned int	 ip6_mtu(const struct dst_entry *dst);
82 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
83 static void		ip6_dst_destroy(struct dst_entry *);
84 static void		ip6_dst_ifdown(struct dst_entry *,
85 				       struct net_device *dev, int how);
86 static int		 ip6_dst_gc(struct dst_ops *ops);
87 
88 static int		ip6_pkt_discard(struct sk_buff *skb);
89 static int		ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb);
90 static int		ip6_pkt_prohibit(struct sk_buff *skb);
91 static int		ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb);
92 static void		ip6_link_failure(struct sk_buff *skb);
93 static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
94 					   struct sk_buff *skb, u32 mtu);
95 static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
96 					struct sk_buff *skb);
97 static void		rt6_dst_from_metrics_check(struct rt6_info *rt);
98 static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
99 
100 #ifdef CONFIG_IPV6_ROUTE_INFO
101 static struct rt6_info *rt6_add_route_info(struct net *net,
102 					   const struct in6_addr *prefix, int prefixlen,
103 					   const struct in6_addr *gwaddr, int ifindex,
104 					   unsigned int pref);
105 static struct rt6_info *rt6_get_route_info(struct net *net,
106 					   const struct in6_addr *prefix, int prefixlen,
107 					   const struct in6_addr *gwaddr, int ifindex);
108 #endif
109 
110 struct uncached_list {
111 	spinlock_t		lock;
112 	struct list_head	head;
113 };
114 
115 static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
116 
117 static void rt6_uncached_list_add(struct rt6_info *rt)
118 {
119 	struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
120 
121 	rt->dst.flags |= DST_NOCACHE;
122 	rt->rt6i_uncached_list = ul;
123 
124 	spin_lock_bh(&ul->lock);
125 	list_add_tail(&rt->rt6i_uncached, &ul->head);
126 	spin_unlock_bh(&ul->lock);
127 }
128 
129 static void rt6_uncached_list_del(struct rt6_info *rt)
130 {
131 	if (!list_empty(&rt->rt6i_uncached)) {
132 		struct uncached_list *ul = rt->rt6i_uncached_list;
133 
134 		spin_lock_bh(&ul->lock);
135 		list_del(&rt->rt6i_uncached);
136 		spin_unlock_bh(&ul->lock);
137 	}
138 }
139 
140 static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
141 {
142 	struct net_device *loopback_dev = net->loopback_dev;
143 	int cpu;
144 
145 	for_each_possible_cpu(cpu) {
146 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
147 		struct rt6_info *rt;
148 
149 		spin_lock_bh(&ul->lock);
150 		list_for_each_entry(rt, &ul->head, rt6i_uncached) {
151 			struct inet6_dev *rt_idev = rt->rt6i_idev;
152 			struct net_device *rt_dev = rt->dst.dev;
153 
154 			if (rt_idev && (rt_idev->dev == dev || !dev) &&
155 			    rt_idev->dev != loopback_dev) {
156 				rt->rt6i_idev = in6_dev_get(loopback_dev);
157 				in6_dev_put(rt_idev);
158 			}
159 
160 			if (rt_dev && (rt_dev == dev || !dev) &&
161 			    rt_dev != loopback_dev) {
162 				rt->dst.dev = loopback_dev;
163 				dev_hold(rt->dst.dev);
164 				dev_put(rt_dev);
165 			}
166 		}
167 		spin_unlock_bh(&ul->lock);
168 	}
169 }
170 
171 static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
172 {
173 	return dst_metrics_write_ptr(rt->dst.from);
174 }
175 
176 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
177 {
178 	struct rt6_info *rt = (struct rt6_info *)dst;
179 
180 	if (rt->rt6i_flags & RTF_PCPU)
181 		return rt6_pcpu_cow_metrics(rt);
182 	else if (rt->rt6i_flags & RTF_CACHE)
183 		return NULL;
184 	else
185 		return dst_cow_metrics_generic(dst, old);
186 }
187 
188 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
189 					     struct sk_buff *skb,
190 					     const void *daddr)
191 {
192 	struct in6_addr *p = &rt->rt6i_gateway;
193 
194 	if (!ipv6_addr_any(p))
195 		return (const void *) p;
196 	else if (skb)
197 		return &ipv6_hdr(skb)->daddr;
198 	return daddr;
199 }
200 
201 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
202 					  struct sk_buff *skb,
203 					  const void *daddr)
204 {
205 	struct rt6_info *rt = (struct rt6_info *) dst;
206 	struct neighbour *n;
207 
208 	daddr = choose_neigh_daddr(rt, skb, daddr);
209 	n = __ipv6_neigh_lookup(dst->dev, daddr);
210 	if (n)
211 		return n;
212 	return neigh_create(&nd_tbl, daddr, dst->dev);
213 }
214 
215 static struct dst_ops ip6_dst_ops_template = {
216 	.family			=	AF_INET6,
217 	.gc			=	ip6_dst_gc,
218 	.gc_thresh		=	1024,
219 	.check			=	ip6_dst_check,
220 	.default_advmss		=	ip6_default_advmss,
221 	.mtu			=	ip6_mtu,
222 	.cow_metrics		=	ipv6_cow_metrics,
223 	.destroy		=	ip6_dst_destroy,
224 	.ifdown			=	ip6_dst_ifdown,
225 	.negative_advice	=	ip6_negative_advice,
226 	.link_failure		=	ip6_link_failure,
227 	.update_pmtu		=	ip6_rt_update_pmtu,
228 	.redirect		=	rt6_do_redirect,
229 	.local_out		=	__ip6_local_out,
230 	.neigh_lookup		=	ip6_neigh_lookup,
231 };
232 
233 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
234 {
235 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
236 
237 	return mtu ? : dst->dev->mtu;
238 }
239 
240 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
241 					 struct sk_buff *skb, u32 mtu)
242 {
243 }
244 
245 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
246 				      struct sk_buff *skb)
247 {
248 }
249 
250 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
251 					 unsigned long old)
252 {
253 	return NULL;
254 }
255 
256 static struct dst_ops ip6_dst_blackhole_ops = {
257 	.family			=	AF_INET6,
258 	.destroy		=	ip6_dst_destroy,
259 	.check			=	ip6_dst_check,
260 	.mtu			=	ip6_blackhole_mtu,
261 	.default_advmss		=	ip6_default_advmss,
262 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
263 	.redirect		=	ip6_rt_blackhole_redirect,
264 	.cow_metrics		=	ip6_rt_blackhole_cow_metrics,
265 	.neigh_lookup		=	ip6_neigh_lookup,
266 };
267 
268 static const u32 ip6_template_metrics[RTAX_MAX] = {
269 	[RTAX_HOPLIMIT - 1] = 0,
270 };
271 
272 static const struct rt6_info ip6_null_entry_template = {
273 	.dst = {
274 		.__refcnt	= ATOMIC_INIT(1),
275 		.__use		= 1,
276 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
277 		.error		= -ENETUNREACH,
278 		.input		= ip6_pkt_discard,
279 		.output		= ip6_pkt_discard_out,
280 	},
281 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
282 	.rt6i_protocol  = RTPROT_KERNEL,
283 	.rt6i_metric	= ~(u32) 0,
284 	.rt6i_ref	= ATOMIC_INIT(1),
285 };
286 
287 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
288 
289 static const struct rt6_info ip6_prohibit_entry_template = {
290 	.dst = {
291 		.__refcnt	= ATOMIC_INIT(1),
292 		.__use		= 1,
293 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
294 		.error		= -EACCES,
295 		.input		= ip6_pkt_prohibit,
296 		.output		= ip6_pkt_prohibit_out,
297 	},
298 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
299 	.rt6i_protocol  = RTPROT_KERNEL,
300 	.rt6i_metric	= ~(u32) 0,
301 	.rt6i_ref	= ATOMIC_INIT(1),
302 };
303 
304 static const struct rt6_info ip6_blk_hole_entry_template = {
305 	.dst = {
306 		.__refcnt	= ATOMIC_INIT(1),
307 		.__use		= 1,
308 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
309 		.error		= -EINVAL,
310 		.input		= dst_discard,
311 		.output		= dst_discard_sk,
312 	},
313 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
314 	.rt6i_protocol  = RTPROT_KERNEL,
315 	.rt6i_metric	= ~(u32) 0,
316 	.rt6i_ref	= ATOMIC_INIT(1),
317 };
318 
319 #endif
320 
321 /* allocate dst with ip6_dst_ops */
322 static struct rt6_info *__ip6_dst_alloc(struct net *net,
323 					struct net_device *dev,
324 					int flags,
325 					struct fib6_table *table)
326 {
327 	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
328 					0, DST_OBSOLETE_FORCE_CHK, flags);
329 
330 	if (rt) {
331 		struct dst_entry *dst = &rt->dst;
332 
333 		memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
334 		INIT_LIST_HEAD(&rt->rt6i_siblings);
335 		INIT_LIST_HEAD(&rt->rt6i_uncached);
336 	}
337 	return rt;
338 }
339 
340 static struct rt6_info *ip6_dst_alloc(struct net *net,
341 				      struct net_device *dev,
342 				      int flags,
343 				      struct fib6_table *table)
344 {
345 	struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags, table);
346 
347 	if (rt) {
348 		rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
349 		if (rt->rt6i_pcpu) {
350 			int cpu;
351 
352 			for_each_possible_cpu(cpu) {
353 				struct rt6_info **p;
354 
355 				p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
356 				/* no one shares rt */
357 				*p =  NULL;
358 			}
359 		} else {
360 			dst_destroy((struct dst_entry *)rt);
361 			return NULL;
362 		}
363 	}
364 
365 	return rt;
366 }
367 
368 static void ip6_dst_destroy(struct dst_entry *dst)
369 {
370 	struct rt6_info *rt = (struct rt6_info *)dst;
371 	struct dst_entry *from = dst->from;
372 	struct inet6_dev *idev;
373 
374 	dst_destroy_metrics_generic(dst);
375 	free_percpu(rt->rt6i_pcpu);
376 	rt6_uncached_list_del(rt);
377 
378 	idev = rt->rt6i_idev;
379 	if (idev) {
380 		rt->rt6i_idev = NULL;
381 		in6_dev_put(idev);
382 	}
383 
384 	dst->from = NULL;
385 	dst_release(from);
386 }
387 
388 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
389 			   int how)
390 {
391 	struct rt6_info *rt = (struct rt6_info *)dst;
392 	struct inet6_dev *idev = rt->rt6i_idev;
393 	struct net_device *loopback_dev =
394 		dev_net(dev)->loopback_dev;
395 
396 	if (dev != loopback_dev) {
397 		if (idev && idev->dev == dev) {
398 			struct inet6_dev *loopback_idev =
399 				in6_dev_get(loopback_dev);
400 			if (loopback_idev) {
401 				rt->rt6i_idev = loopback_idev;
402 				in6_dev_put(idev);
403 			}
404 		}
405 	}
406 }
407 
408 static bool rt6_check_expired(const struct rt6_info *rt)
409 {
410 	if (rt->rt6i_flags & RTF_EXPIRES) {
411 		if (time_after(jiffies, rt->dst.expires))
412 			return true;
413 	} else if (rt->dst.from) {
414 		return rt6_check_expired((struct rt6_info *) rt->dst.from);
415 	}
416 	return false;
417 }
418 
419 /* Multipath route selection:
420  *   Hash based function using packet header and flowlabel.
421  * Adapted from fib_info_hashfn()
422  */
423 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
424 			       const struct flowi6 *fl6)
425 {
426 	unsigned int val = fl6->flowi6_proto;
427 
428 	val ^= ipv6_addr_hash(&fl6->daddr);
429 	val ^= ipv6_addr_hash(&fl6->saddr);
430 
431 	/* Work only if this not encapsulated */
432 	switch (fl6->flowi6_proto) {
433 	case IPPROTO_UDP:
434 	case IPPROTO_TCP:
435 	case IPPROTO_SCTP:
436 		val ^= (__force u16)fl6->fl6_sport;
437 		val ^= (__force u16)fl6->fl6_dport;
438 		break;
439 
440 	case IPPROTO_ICMPV6:
441 		val ^= (__force u16)fl6->fl6_icmp_type;
442 		val ^= (__force u16)fl6->fl6_icmp_code;
443 		break;
444 	}
445 	/* RFC6438 recommands to use flowlabel */
446 	val ^= (__force u32)fl6->flowlabel;
447 
448 	/* Perhaps, we need to tune, this function? */
449 	val = val ^ (val >> 7) ^ (val >> 12);
450 	return val % candidate_count;
451 }
452 
453 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
454 					     struct flowi6 *fl6, int oif,
455 					     int strict)
456 {
457 	struct rt6_info *sibling, *next_sibling;
458 	int route_choosen;
459 
460 	route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
461 	/* Don't change the route, if route_choosen == 0
462 	 * (siblings does not include ourself)
463 	 */
464 	if (route_choosen)
465 		list_for_each_entry_safe(sibling, next_sibling,
466 				&match->rt6i_siblings, rt6i_siblings) {
467 			route_choosen--;
468 			if (route_choosen == 0) {
469 				if (rt6_score_route(sibling, oif, strict) < 0)
470 					break;
471 				match = sibling;
472 				break;
473 			}
474 		}
475 	return match;
476 }
477 
478 /*
479  *	Route lookup. Any table->tb6_lock is implied.
480  */
481 
482 static inline struct rt6_info *rt6_device_match(struct net *net,
483 						    struct rt6_info *rt,
484 						    const struct in6_addr *saddr,
485 						    int oif,
486 						    int flags)
487 {
488 	struct rt6_info *local = NULL;
489 	struct rt6_info *sprt;
490 
491 	if (!oif && ipv6_addr_any(saddr))
492 		goto out;
493 
494 	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
495 		struct net_device *dev = sprt->dst.dev;
496 
497 		if (oif) {
498 			if (dev->ifindex == oif)
499 				return sprt;
500 			if (dev->flags & IFF_LOOPBACK) {
501 				if (!sprt->rt6i_idev ||
502 				    sprt->rt6i_idev->dev->ifindex != oif) {
503 					if (flags & RT6_LOOKUP_F_IFACE && oif)
504 						continue;
505 					if (local && (!oif ||
506 						      local->rt6i_idev->dev->ifindex == oif))
507 						continue;
508 				}
509 				local = sprt;
510 			}
511 		} else {
512 			if (ipv6_chk_addr(net, saddr, dev,
513 					  flags & RT6_LOOKUP_F_IFACE))
514 				return sprt;
515 		}
516 	}
517 
518 	if (oif) {
519 		if (local)
520 			return local;
521 
522 		if (flags & RT6_LOOKUP_F_IFACE)
523 			return net->ipv6.ip6_null_entry;
524 	}
525 out:
526 	return rt;
527 }
528 
529 #ifdef CONFIG_IPV6_ROUTER_PREF
530 struct __rt6_probe_work {
531 	struct work_struct work;
532 	struct in6_addr target;
533 	struct net_device *dev;
534 };
535 
536 static void rt6_probe_deferred(struct work_struct *w)
537 {
538 	struct in6_addr mcaddr;
539 	struct __rt6_probe_work *work =
540 		container_of(w, struct __rt6_probe_work, work);
541 
542 	addrconf_addr_solict_mult(&work->target, &mcaddr);
543 	ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL, NULL);
544 	dev_put(work->dev);
545 	kfree(work);
546 }
547 
548 static void rt6_probe(struct rt6_info *rt)
549 {
550 	struct __rt6_probe_work *work;
551 	struct neighbour *neigh;
552 	/*
553 	 * Okay, this does not seem to be appropriate
554 	 * for now, however, we need to check if it
555 	 * is really so; aka Router Reachability Probing.
556 	 *
557 	 * Router Reachability Probe MUST be rate-limited
558 	 * to no more than one per minute.
559 	 */
560 	if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
561 		return;
562 	rcu_read_lock_bh();
563 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
564 	if (neigh) {
565 		if (neigh->nud_state & NUD_VALID)
566 			goto out;
567 
568 		work = NULL;
569 		write_lock(&neigh->lock);
570 		if (!(neigh->nud_state & NUD_VALID) &&
571 		    time_after(jiffies,
572 			       neigh->updated +
573 			       rt->rt6i_idev->cnf.rtr_probe_interval)) {
574 			work = kmalloc(sizeof(*work), GFP_ATOMIC);
575 			if (work)
576 				__neigh_set_probe_once(neigh);
577 		}
578 		write_unlock(&neigh->lock);
579 	} else {
580 		work = kmalloc(sizeof(*work), GFP_ATOMIC);
581 	}
582 
583 	if (work) {
584 		INIT_WORK(&work->work, rt6_probe_deferred);
585 		work->target = rt->rt6i_gateway;
586 		dev_hold(rt->dst.dev);
587 		work->dev = rt->dst.dev;
588 		schedule_work(&work->work);
589 	}
590 
591 out:
592 	rcu_read_unlock_bh();
593 }
594 #else
595 static inline void rt6_probe(struct rt6_info *rt)
596 {
597 }
598 #endif
599 
600 /*
601  * Default Router Selection (RFC 2461 6.3.6)
602  */
603 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
604 {
605 	struct net_device *dev = rt->dst.dev;
606 	if (!oif || dev->ifindex == oif)
607 		return 2;
608 	if ((dev->flags & IFF_LOOPBACK) &&
609 	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
610 		return 1;
611 	return 0;
612 }
613 
614 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
615 {
616 	struct neighbour *neigh;
617 	enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
618 
619 	if (rt->rt6i_flags & RTF_NONEXTHOP ||
620 	    !(rt->rt6i_flags & RTF_GATEWAY))
621 		return RT6_NUD_SUCCEED;
622 
623 	rcu_read_lock_bh();
624 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
625 	if (neigh) {
626 		read_lock(&neigh->lock);
627 		if (neigh->nud_state & NUD_VALID)
628 			ret = RT6_NUD_SUCCEED;
629 #ifdef CONFIG_IPV6_ROUTER_PREF
630 		else if (!(neigh->nud_state & NUD_FAILED))
631 			ret = RT6_NUD_SUCCEED;
632 		else
633 			ret = RT6_NUD_FAIL_PROBE;
634 #endif
635 		read_unlock(&neigh->lock);
636 	} else {
637 		ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
638 		      RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
639 	}
640 	rcu_read_unlock_bh();
641 
642 	return ret;
643 }
644 
645 static int rt6_score_route(struct rt6_info *rt, int oif,
646 			   int strict)
647 {
648 	int m;
649 
650 	m = rt6_check_dev(rt, oif);
651 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
652 		return RT6_NUD_FAIL_HARD;
653 #ifdef CONFIG_IPV6_ROUTER_PREF
654 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
655 #endif
656 	if (strict & RT6_LOOKUP_F_REACHABLE) {
657 		int n = rt6_check_neigh(rt);
658 		if (n < 0)
659 			return n;
660 	}
661 	return m;
662 }
663 
664 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
665 				   int *mpri, struct rt6_info *match,
666 				   bool *do_rr)
667 {
668 	int m;
669 	bool match_do_rr = false;
670 	struct inet6_dev *idev = rt->rt6i_idev;
671 	struct net_device *dev = rt->dst.dev;
672 
673 	if (dev && !netif_carrier_ok(dev) &&
674 	    idev->cnf.ignore_routes_with_linkdown)
675 		goto out;
676 
677 	if (rt6_check_expired(rt))
678 		goto out;
679 
680 	m = rt6_score_route(rt, oif, strict);
681 	if (m == RT6_NUD_FAIL_DO_RR) {
682 		match_do_rr = true;
683 		m = 0; /* lowest valid score */
684 	} else if (m == RT6_NUD_FAIL_HARD) {
685 		goto out;
686 	}
687 
688 	if (strict & RT6_LOOKUP_F_REACHABLE)
689 		rt6_probe(rt);
690 
691 	/* note that m can be RT6_NUD_FAIL_PROBE at this point */
692 	if (m > *mpri) {
693 		*do_rr = match_do_rr;
694 		*mpri = m;
695 		match = rt;
696 	}
697 out:
698 	return match;
699 }
700 
701 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
702 				     struct rt6_info *rr_head,
703 				     u32 metric, int oif, int strict,
704 				     bool *do_rr)
705 {
706 	struct rt6_info *rt, *match, *cont;
707 	int mpri = -1;
708 
709 	match = NULL;
710 	cont = NULL;
711 	for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
712 		if (rt->rt6i_metric != metric) {
713 			cont = rt;
714 			break;
715 		}
716 
717 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
718 	}
719 
720 	for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
721 		if (rt->rt6i_metric != metric) {
722 			cont = rt;
723 			break;
724 		}
725 
726 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
727 	}
728 
729 	if (match || !cont)
730 		return match;
731 
732 	for (rt = cont; rt; rt = rt->dst.rt6_next)
733 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
734 
735 	return match;
736 }
737 
738 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
739 {
740 	struct rt6_info *match, *rt0;
741 	struct net *net;
742 	bool do_rr = false;
743 
744 	rt0 = fn->rr_ptr;
745 	if (!rt0)
746 		fn->rr_ptr = rt0 = fn->leaf;
747 
748 	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
749 			     &do_rr);
750 
751 	if (do_rr) {
752 		struct rt6_info *next = rt0->dst.rt6_next;
753 
754 		/* no entries matched; do round-robin */
755 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
756 			next = fn->leaf;
757 
758 		if (next != rt0)
759 			fn->rr_ptr = next;
760 	}
761 
762 	net = dev_net(rt0->dst.dev);
763 	return match ? match : net->ipv6.ip6_null_entry;
764 }
765 
766 static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
767 {
768 	return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
769 }
770 
771 #ifdef CONFIG_IPV6_ROUTE_INFO
772 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
773 		  const struct in6_addr *gwaddr)
774 {
775 	struct net *net = dev_net(dev);
776 	struct route_info *rinfo = (struct route_info *) opt;
777 	struct in6_addr prefix_buf, *prefix;
778 	unsigned int pref;
779 	unsigned long lifetime;
780 	struct rt6_info *rt;
781 
782 	if (len < sizeof(struct route_info)) {
783 		return -EINVAL;
784 	}
785 
786 	/* Sanity check for prefix_len and length */
787 	if (rinfo->length > 3) {
788 		return -EINVAL;
789 	} else if (rinfo->prefix_len > 128) {
790 		return -EINVAL;
791 	} else if (rinfo->prefix_len > 64) {
792 		if (rinfo->length < 2) {
793 			return -EINVAL;
794 		}
795 	} else if (rinfo->prefix_len > 0) {
796 		if (rinfo->length < 1) {
797 			return -EINVAL;
798 		}
799 	}
800 
801 	pref = rinfo->route_pref;
802 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
803 		return -EINVAL;
804 
805 	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
806 
807 	if (rinfo->length == 3)
808 		prefix = (struct in6_addr *)rinfo->prefix;
809 	else {
810 		/* this function is safe */
811 		ipv6_addr_prefix(&prefix_buf,
812 				 (struct in6_addr *)rinfo->prefix,
813 				 rinfo->prefix_len);
814 		prefix = &prefix_buf;
815 	}
816 
817 	if (rinfo->prefix_len == 0)
818 		rt = rt6_get_dflt_router(gwaddr, dev);
819 	else
820 		rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
821 					gwaddr, dev->ifindex);
822 
823 	if (rt && !lifetime) {
824 		ip6_del_rt(rt);
825 		rt = NULL;
826 	}
827 
828 	if (!rt && lifetime)
829 		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
830 					pref);
831 	else if (rt)
832 		rt->rt6i_flags = RTF_ROUTEINFO |
833 				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
834 
835 	if (rt) {
836 		if (!addrconf_finite_timeout(lifetime))
837 			rt6_clean_expires(rt);
838 		else
839 			rt6_set_expires(rt, jiffies + HZ * lifetime);
840 
841 		ip6_rt_put(rt);
842 	}
843 	return 0;
844 }
845 #endif
846 
847 static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
848 					struct in6_addr *saddr)
849 {
850 	struct fib6_node *pn;
851 	while (1) {
852 		if (fn->fn_flags & RTN_TL_ROOT)
853 			return NULL;
854 		pn = fn->parent;
855 		if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
856 			fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
857 		else
858 			fn = pn;
859 		if (fn->fn_flags & RTN_RTINFO)
860 			return fn;
861 	}
862 }
863 
864 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
865 					     struct fib6_table *table,
866 					     struct flowi6 *fl6, int flags)
867 {
868 	struct fib6_node *fn;
869 	struct rt6_info *rt;
870 
871 	read_lock_bh(&table->tb6_lock);
872 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
873 restart:
874 	rt = fn->leaf;
875 	rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
876 	if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
877 		rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
878 	if (rt == net->ipv6.ip6_null_entry) {
879 		fn = fib6_backtrack(fn, &fl6->saddr);
880 		if (fn)
881 			goto restart;
882 	}
883 	dst_use(&rt->dst, jiffies);
884 	read_unlock_bh(&table->tb6_lock);
885 	return rt;
886 
887 }
888 
889 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
890 				    int flags)
891 {
892 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
893 }
894 EXPORT_SYMBOL_GPL(ip6_route_lookup);
895 
896 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
897 			    const struct in6_addr *saddr, int oif, int strict)
898 {
899 	struct flowi6 fl6 = {
900 		.flowi6_oif = oif,
901 		.daddr = *daddr,
902 	};
903 	struct dst_entry *dst;
904 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
905 
906 	if (saddr) {
907 		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
908 		flags |= RT6_LOOKUP_F_HAS_SADDR;
909 	}
910 
911 	dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
912 	if (dst->error == 0)
913 		return (struct rt6_info *) dst;
914 
915 	dst_release(dst);
916 
917 	return NULL;
918 }
919 EXPORT_SYMBOL(rt6_lookup);
920 
921 /* ip6_ins_rt is called with FREE table->tb6_lock.
922    It takes new route entry, the addition fails by any reason the
923    route is freed. In any case, if caller does not hold it, it may
924    be destroyed.
925  */
926 
927 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
928 			struct mx6_config *mxc)
929 {
930 	int err;
931 	struct fib6_table *table;
932 
933 	table = rt->rt6i_table;
934 	write_lock_bh(&table->tb6_lock);
935 	err = fib6_add(&table->tb6_root, rt, info, mxc);
936 	write_unlock_bh(&table->tb6_lock);
937 
938 	return err;
939 }
940 
941 int ip6_ins_rt(struct rt6_info *rt)
942 {
943 	struct nl_info info = {	.nl_net = dev_net(rt->dst.dev), };
944 	struct mx6_config mxc = { .mx = NULL, };
945 
946 	return __ip6_ins_rt(rt, &info, &mxc);
947 }
948 
949 static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
950 					   const struct in6_addr *daddr,
951 					   const struct in6_addr *saddr)
952 {
953 	struct rt6_info *rt;
954 
955 	/*
956 	 *	Clone the route.
957 	 */
958 
959 	if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
960 		ort = (struct rt6_info *)ort->dst.from;
961 
962 	rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev,
963 			     0, ort->rt6i_table);
964 
965 	if (!rt)
966 		return NULL;
967 
968 	ip6_rt_copy_init(rt, ort);
969 	rt->rt6i_flags |= RTF_CACHE;
970 	rt->rt6i_metric = 0;
971 	rt->dst.flags |= DST_HOST;
972 	rt->rt6i_dst.addr = *daddr;
973 	rt->rt6i_dst.plen = 128;
974 
975 	if (!rt6_is_gw_or_nonexthop(ort)) {
976 		if (ort->rt6i_dst.plen != 128 &&
977 		    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
978 			rt->rt6i_flags |= RTF_ANYCAST;
979 #ifdef CONFIG_IPV6_SUBTREES
980 		if (rt->rt6i_src.plen && saddr) {
981 			rt->rt6i_src.addr = *saddr;
982 			rt->rt6i_src.plen = 128;
983 		}
984 #endif
985 	}
986 
987 	return rt;
988 }
989 
990 static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
991 {
992 	struct rt6_info *pcpu_rt;
993 
994 	pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
995 				  rt->dst.dev, rt->dst.flags,
996 				  rt->rt6i_table);
997 
998 	if (!pcpu_rt)
999 		return NULL;
1000 	ip6_rt_copy_init(pcpu_rt, rt);
1001 	pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
1002 	pcpu_rt->rt6i_flags |= RTF_PCPU;
1003 	return pcpu_rt;
1004 }
1005 
1006 /* It should be called with read_lock_bh(&tb6_lock) acquired */
1007 static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
1008 {
1009 	struct rt6_info *pcpu_rt, *prev, **p;
1010 
1011 	p = this_cpu_ptr(rt->rt6i_pcpu);
1012 	pcpu_rt = *p;
1013 
1014 	if (pcpu_rt)
1015 		goto done;
1016 
1017 	pcpu_rt = ip6_rt_pcpu_alloc(rt);
1018 	if (!pcpu_rt) {
1019 		struct net *net = dev_net(rt->dst.dev);
1020 
1021 		pcpu_rt = net->ipv6.ip6_null_entry;
1022 		goto done;
1023 	}
1024 
1025 	prev = cmpxchg(p, NULL, pcpu_rt);
1026 	if (prev) {
1027 		/* If someone did it before us, return prev instead */
1028 		dst_destroy(&pcpu_rt->dst);
1029 		pcpu_rt = prev;
1030 	}
1031 
1032 done:
1033 	dst_hold(&pcpu_rt->dst);
1034 	rt6_dst_from_metrics_check(pcpu_rt);
1035 	return pcpu_rt;
1036 }
1037 
1038 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
1039 				      struct flowi6 *fl6, int flags)
1040 {
1041 	struct fib6_node *fn, *saved_fn;
1042 	struct rt6_info *rt;
1043 	int strict = 0;
1044 
1045 	strict |= flags & RT6_LOOKUP_F_IFACE;
1046 	if (net->ipv6.devconf_all->forwarding == 0)
1047 		strict |= RT6_LOOKUP_F_REACHABLE;
1048 
1049 	read_lock_bh(&table->tb6_lock);
1050 
1051 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1052 	saved_fn = fn;
1053 
1054 redo_rt6_select:
1055 	rt = rt6_select(fn, oif, strict);
1056 	if (rt->rt6i_nsiblings)
1057 		rt = rt6_multipath_select(rt, fl6, oif, strict);
1058 	if (rt == net->ipv6.ip6_null_entry) {
1059 		fn = fib6_backtrack(fn, &fl6->saddr);
1060 		if (fn)
1061 			goto redo_rt6_select;
1062 		else if (strict & RT6_LOOKUP_F_REACHABLE) {
1063 			/* also consider unreachable route */
1064 			strict &= ~RT6_LOOKUP_F_REACHABLE;
1065 			fn = saved_fn;
1066 			goto redo_rt6_select;
1067 		}
1068 	}
1069 
1070 
1071 	if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
1072 		dst_use(&rt->dst, jiffies);
1073 		read_unlock_bh(&table->tb6_lock);
1074 
1075 		rt6_dst_from_metrics_check(rt);
1076 		return rt;
1077 	} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1078 			    !(rt->rt6i_flags & RTF_GATEWAY))) {
1079 		/* Create a RTF_CACHE clone which will not be
1080 		 * owned by the fib6 tree.  It is for the special case where
1081 		 * the daddr in the skb during the neighbor look-up is different
1082 		 * from the fl6->daddr used to look-up route here.
1083 		 */
1084 
1085 		struct rt6_info *uncached_rt;
1086 
1087 		dst_use(&rt->dst, jiffies);
1088 		read_unlock_bh(&table->tb6_lock);
1089 
1090 		uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1091 		dst_release(&rt->dst);
1092 
1093 		if (uncached_rt)
1094 			rt6_uncached_list_add(uncached_rt);
1095 		else
1096 			uncached_rt = net->ipv6.ip6_null_entry;
1097 
1098 		dst_hold(&uncached_rt->dst);
1099 		return uncached_rt;
1100 
1101 	} else {
1102 		/* Get a percpu copy */
1103 
1104 		struct rt6_info *pcpu_rt;
1105 
1106 		rt->dst.lastuse = jiffies;
1107 		rt->dst.__use++;
1108 		pcpu_rt = rt6_get_pcpu_route(rt);
1109 		read_unlock_bh(&table->tb6_lock);
1110 
1111 		return pcpu_rt;
1112 	}
1113 }
1114 
1115 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
1116 					    struct flowi6 *fl6, int flags)
1117 {
1118 	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
1119 }
1120 
1121 static struct dst_entry *ip6_route_input_lookup(struct net *net,
1122 						struct net_device *dev,
1123 						struct flowi6 *fl6, int flags)
1124 {
1125 	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1126 		flags |= RT6_LOOKUP_F_IFACE;
1127 
1128 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1129 }
1130 
1131 void ip6_route_input(struct sk_buff *skb)
1132 {
1133 	const struct ipv6hdr *iph = ipv6_hdr(skb);
1134 	struct net *net = dev_net(skb->dev);
1135 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1136 	struct ip_tunnel_info *tun_info;
1137 	struct flowi6 fl6 = {
1138 		.flowi6_iif = skb->dev->ifindex,
1139 		.daddr = iph->daddr,
1140 		.saddr = iph->saddr,
1141 		.flowlabel = ip6_flowinfo(iph),
1142 		.flowi6_mark = skb->mark,
1143 		.flowi6_proto = iph->nexthdr,
1144 	};
1145 
1146 	tun_info = skb_tunnel_info(skb);
1147 	if (tun_info && tun_info->mode == IP_TUNNEL_INFO_RX)
1148 		fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
1149 	skb_dst_drop(skb);
1150 	skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1151 }
1152 
1153 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1154 					     struct flowi6 *fl6, int flags)
1155 {
1156 	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1157 }
1158 
1159 struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
1160 				    struct flowi6 *fl6)
1161 {
1162 	int flags = 0;
1163 
1164 	fl6->flowi6_iif = LOOPBACK_IFINDEX;
1165 
1166 	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
1167 		flags |= RT6_LOOKUP_F_IFACE;
1168 
1169 	if (!ipv6_addr_any(&fl6->saddr))
1170 		flags |= RT6_LOOKUP_F_HAS_SADDR;
1171 	else if (sk)
1172 		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1173 
1174 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1175 }
1176 EXPORT_SYMBOL(ip6_route_output);
1177 
1178 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1179 {
1180 	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1181 	struct dst_entry *new = NULL;
1182 
1183 	rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1184 	if (rt) {
1185 		new = &rt->dst;
1186 
1187 		memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
1188 
1189 		new->__use = 1;
1190 		new->input = dst_discard;
1191 		new->output = dst_discard_sk;
1192 
1193 		if (dst_metrics_read_only(&ort->dst))
1194 			new->_metrics = ort->dst._metrics;
1195 		else
1196 			dst_copy_metrics(new, &ort->dst);
1197 		rt->rt6i_idev = ort->rt6i_idev;
1198 		if (rt->rt6i_idev)
1199 			in6_dev_hold(rt->rt6i_idev);
1200 
1201 		rt->rt6i_gateway = ort->rt6i_gateway;
1202 		rt->rt6i_flags = ort->rt6i_flags;
1203 		rt->rt6i_metric = 0;
1204 
1205 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1206 #ifdef CONFIG_IPV6_SUBTREES
1207 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1208 #endif
1209 
1210 		dst_free(new);
1211 	}
1212 
1213 	dst_release(dst_orig);
1214 	return new ? new : ERR_PTR(-ENOMEM);
1215 }
1216 
1217 /*
1218  *	Destination cache support functions
1219  */
1220 
1221 static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1222 {
1223 	if (rt->dst.from &&
1224 	    dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1225 		dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1226 }
1227 
1228 static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1229 {
1230 	if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1231 		return NULL;
1232 
1233 	if (rt6_check_expired(rt))
1234 		return NULL;
1235 
1236 	return &rt->dst;
1237 }
1238 
1239 static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1240 {
1241 	if (rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1242 	    rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1243 		return &rt->dst;
1244 	else
1245 		return NULL;
1246 }
1247 
1248 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1249 {
1250 	struct rt6_info *rt;
1251 
1252 	rt = (struct rt6_info *) dst;
1253 
1254 	/* All IPV6 dsts are created with ->obsolete set to the value
1255 	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1256 	 * into this function always.
1257 	 */
1258 
1259 	rt6_dst_from_metrics_check(rt);
1260 
1261 	if ((rt->rt6i_flags & RTF_PCPU) || unlikely(dst->flags & DST_NOCACHE))
1262 		return rt6_dst_from_check(rt, cookie);
1263 	else
1264 		return rt6_check(rt, cookie);
1265 }
1266 
1267 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1268 {
1269 	struct rt6_info *rt = (struct rt6_info *) dst;
1270 
1271 	if (rt) {
1272 		if (rt->rt6i_flags & RTF_CACHE) {
1273 			if (rt6_check_expired(rt)) {
1274 				ip6_del_rt(rt);
1275 				dst = NULL;
1276 			}
1277 		} else {
1278 			dst_release(dst);
1279 			dst = NULL;
1280 		}
1281 	}
1282 	return dst;
1283 }
1284 
1285 static void ip6_link_failure(struct sk_buff *skb)
1286 {
1287 	struct rt6_info *rt;
1288 
1289 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1290 
1291 	rt = (struct rt6_info *) skb_dst(skb);
1292 	if (rt) {
1293 		if (rt->rt6i_flags & RTF_CACHE) {
1294 			dst_hold(&rt->dst);
1295 			if (ip6_del_rt(rt))
1296 				dst_free(&rt->dst);
1297 		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1298 			rt->rt6i_node->fn_sernum = -1;
1299 		}
1300 	}
1301 }
1302 
1303 static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1304 {
1305 	struct net *net = dev_net(rt->dst.dev);
1306 
1307 	rt->rt6i_flags |= RTF_MODIFIED;
1308 	rt->rt6i_pmtu = mtu;
1309 	rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1310 }
1311 
1312 static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1313 				 const struct ipv6hdr *iph, u32 mtu)
1314 {
1315 	struct rt6_info *rt6 = (struct rt6_info *)dst;
1316 
1317 	if (rt6->rt6i_flags & RTF_LOCAL)
1318 		return;
1319 
1320 	dst_confirm(dst);
1321 	mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1322 	if (mtu >= dst_mtu(dst))
1323 		return;
1324 
1325 	if (rt6->rt6i_flags & RTF_CACHE) {
1326 		rt6_do_update_pmtu(rt6, mtu);
1327 	} else {
1328 		const struct in6_addr *daddr, *saddr;
1329 		struct rt6_info *nrt6;
1330 
1331 		if (iph) {
1332 			daddr = &iph->daddr;
1333 			saddr = &iph->saddr;
1334 		} else if (sk) {
1335 			daddr = &sk->sk_v6_daddr;
1336 			saddr = &inet6_sk(sk)->saddr;
1337 		} else {
1338 			return;
1339 		}
1340 		nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1341 		if (nrt6) {
1342 			rt6_do_update_pmtu(nrt6, mtu);
1343 
1344 			/* ip6_ins_rt(nrt6) will bump the
1345 			 * rt6->rt6i_node->fn_sernum
1346 			 * which will fail the next rt6_check() and
1347 			 * invalidate the sk->sk_dst_cache.
1348 			 */
1349 			ip6_ins_rt(nrt6);
1350 		}
1351 	}
1352 }
1353 
1354 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1355 			       struct sk_buff *skb, u32 mtu)
1356 {
1357 	__ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
1358 }
1359 
1360 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1361 		     int oif, u32 mark)
1362 {
1363 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1364 	struct dst_entry *dst;
1365 	struct flowi6 fl6;
1366 
1367 	memset(&fl6, 0, sizeof(fl6));
1368 	fl6.flowi6_oif = oif;
1369 	fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
1370 	fl6.daddr = iph->daddr;
1371 	fl6.saddr = iph->saddr;
1372 	fl6.flowlabel = ip6_flowinfo(iph);
1373 
1374 	dst = ip6_route_output(net, NULL, &fl6);
1375 	if (!dst->error)
1376 		__ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
1377 	dst_release(dst);
1378 }
1379 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1380 
1381 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1382 {
1383 	ip6_update_pmtu(skb, sock_net(sk), mtu,
1384 			sk->sk_bound_dev_if, sk->sk_mark);
1385 }
1386 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1387 
1388 /* Handle redirects */
1389 struct ip6rd_flowi {
1390 	struct flowi6 fl6;
1391 	struct in6_addr gateway;
1392 };
1393 
1394 static struct rt6_info *__ip6_route_redirect(struct net *net,
1395 					     struct fib6_table *table,
1396 					     struct flowi6 *fl6,
1397 					     int flags)
1398 {
1399 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1400 	struct rt6_info *rt;
1401 	struct fib6_node *fn;
1402 
1403 	/* Get the "current" route for this destination and
1404 	 * check if the redirect has come from approriate router.
1405 	 *
1406 	 * RFC 4861 specifies that redirects should only be
1407 	 * accepted if they come from the nexthop to the target.
1408 	 * Due to the way the routes are chosen, this notion
1409 	 * is a bit fuzzy and one might need to check all possible
1410 	 * routes.
1411 	 */
1412 
1413 	read_lock_bh(&table->tb6_lock);
1414 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1415 restart:
1416 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1417 		if (rt6_check_expired(rt))
1418 			continue;
1419 		if (rt->dst.error)
1420 			break;
1421 		if (!(rt->rt6i_flags & RTF_GATEWAY))
1422 			continue;
1423 		if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1424 			continue;
1425 		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1426 			continue;
1427 		break;
1428 	}
1429 
1430 	if (!rt)
1431 		rt = net->ipv6.ip6_null_entry;
1432 	else if (rt->dst.error) {
1433 		rt = net->ipv6.ip6_null_entry;
1434 		goto out;
1435 	}
1436 
1437 	if (rt == net->ipv6.ip6_null_entry) {
1438 		fn = fib6_backtrack(fn, &fl6->saddr);
1439 		if (fn)
1440 			goto restart;
1441 	}
1442 
1443 out:
1444 	dst_hold(&rt->dst);
1445 
1446 	read_unlock_bh(&table->tb6_lock);
1447 
1448 	return rt;
1449 };
1450 
1451 static struct dst_entry *ip6_route_redirect(struct net *net,
1452 					const struct flowi6 *fl6,
1453 					const struct in6_addr *gateway)
1454 {
1455 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1456 	struct ip6rd_flowi rdfl;
1457 
1458 	rdfl.fl6 = *fl6;
1459 	rdfl.gateway = *gateway;
1460 
1461 	return fib6_rule_lookup(net, &rdfl.fl6,
1462 				flags, __ip6_route_redirect);
1463 }
1464 
1465 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1466 {
1467 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1468 	struct dst_entry *dst;
1469 	struct flowi6 fl6;
1470 
1471 	memset(&fl6, 0, sizeof(fl6));
1472 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
1473 	fl6.flowi6_oif = oif;
1474 	fl6.flowi6_mark = mark;
1475 	fl6.daddr = iph->daddr;
1476 	fl6.saddr = iph->saddr;
1477 	fl6.flowlabel = ip6_flowinfo(iph);
1478 
1479 	dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1480 	rt6_do_redirect(dst, NULL, skb);
1481 	dst_release(dst);
1482 }
1483 EXPORT_SYMBOL_GPL(ip6_redirect);
1484 
1485 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1486 			    u32 mark)
1487 {
1488 	const struct ipv6hdr *iph = ipv6_hdr(skb);
1489 	const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1490 	struct dst_entry *dst;
1491 	struct flowi6 fl6;
1492 
1493 	memset(&fl6, 0, sizeof(fl6));
1494 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
1495 	fl6.flowi6_oif = oif;
1496 	fl6.flowi6_mark = mark;
1497 	fl6.daddr = msg->dest;
1498 	fl6.saddr = iph->daddr;
1499 
1500 	dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1501 	rt6_do_redirect(dst, NULL, skb);
1502 	dst_release(dst);
1503 }
1504 
1505 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1506 {
1507 	ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1508 }
1509 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1510 
1511 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1512 {
1513 	struct net_device *dev = dst->dev;
1514 	unsigned int mtu = dst_mtu(dst);
1515 	struct net *net = dev_net(dev);
1516 
1517 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1518 
1519 	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1520 		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1521 
1522 	/*
1523 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1524 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1525 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
1526 	 * rely only on pmtu discovery"
1527 	 */
1528 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1529 		mtu = IPV6_MAXPLEN;
1530 	return mtu;
1531 }
1532 
1533 static unsigned int ip6_mtu(const struct dst_entry *dst)
1534 {
1535 	const struct rt6_info *rt = (const struct rt6_info *)dst;
1536 	unsigned int mtu = rt->rt6i_pmtu;
1537 	struct inet6_dev *idev;
1538 
1539 	if (mtu)
1540 		goto out;
1541 
1542 	mtu = dst_metric_raw(dst, RTAX_MTU);
1543 	if (mtu)
1544 		goto out;
1545 
1546 	mtu = IPV6_MIN_MTU;
1547 
1548 	rcu_read_lock();
1549 	idev = __in6_dev_get(dst->dev);
1550 	if (idev)
1551 		mtu = idev->cnf.mtu6;
1552 	rcu_read_unlock();
1553 
1554 out:
1555 	return min_t(unsigned int, mtu, IP6_MAX_MTU);
1556 }
1557 
1558 static struct dst_entry *icmp6_dst_gc_list;
1559 static DEFINE_SPINLOCK(icmp6_dst_lock);
1560 
1561 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1562 				  struct flowi6 *fl6)
1563 {
1564 	struct dst_entry *dst;
1565 	struct rt6_info *rt;
1566 	struct inet6_dev *idev = in6_dev_get(dev);
1567 	struct net *net = dev_net(dev);
1568 
1569 	if (unlikely(!idev))
1570 		return ERR_PTR(-ENODEV);
1571 
1572 	rt = ip6_dst_alloc(net, dev, 0, NULL);
1573 	if (unlikely(!rt)) {
1574 		in6_dev_put(idev);
1575 		dst = ERR_PTR(-ENOMEM);
1576 		goto out;
1577 	}
1578 
1579 	rt->dst.flags |= DST_HOST;
1580 	rt->dst.output  = ip6_output;
1581 	atomic_set(&rt->dst.__refcnt, 1);
1582 	rt->rt6i_gateway  = fl6->daddr;
1583 	rt->rt6i_dst.addr = fl6->daddr;
1584 	rt->rt6i_dst.plen = 128;
1585 	rt->rt6i_idev     = idev;
1586 	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1587 
1588 	spin_lock_bh(&icmp6_dst_lock);
1589 	rt->dst.next = icmp6_dst_gc_list;
1590 	icmp6_dst_gc_list = &rt->dst;
1591 	spin_unlock_bh(&icmp6_dst_lock);
1592 
1593 	fib6_force_start_gc(net);
1594 
1595 	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1596 
1597 out:
1598 	return dst;
1599 }
1600 
1601 int icmp6_dst_gc(void)
1602 {
1603 	struct dst_entry *dst, **pprev;
1604 	int more = 0;
1605 
1606 	spin_lock_bh(&icmp6_dst_lock);
1607 	pprev = &icmp6_dst_gc_list;
1608 
1609 	while ((dst = *pprev) != NULL) {
1610 		if (!atomic_read(&dst->__refcnt)) {
1611 			*pprev = dst->next;
1612 			dst_free(dst);
1613 		} else {
1614 			pprev = &dst->next;
1615 			++more;
1616 		}
1617 	}
1618 
1619 	spin_unlock_bh(&icmp6_dst_lock);
1620 
1621 	return more;
1622 }
1623 
1624 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1625 			    void *arg)
1626 {
1627 	struct dst_entry *dst, **pprev;
1628 
1629 	spin_lock_bh(&icmp6_dst_lock);
1630 	pprev = &icmp6_dst_gc_list;
1631 	while ((dst = *pprev) != NULL) {
1632 		struct rt6_info *rt = (struct rt6_info *) dst;
1633 		if (func(rt, arg)) {
1634 			*pprev = dst->next;
1635 			dst_free(dst);
1636 		} else {
1637 			pprev = &dst->next;
1638 		}
1639 	}
1640 	spin_unlock_bh(&icmp6_dst_lock);
1641 }
1642 
1643 static int ip6_dst_gc(struct dst_ops *ops)
1644 {
1645 	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1646 	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1647 	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1648 	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1649 	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1650 	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1651 	int entries;
1652 
1653 	entries = dst_entries_get_fast(ops);
1654 	if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1655 	    entries <= rt_max_size)
1656 		goto out;
1657 
1658 	net->ipv6.ip6_rt_gc_expire++;
1659 	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
1660 	entries = dst_entries_get_slow(ops);
1661 	if (entries < ops->gc_thresh)
1662 		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1663 out:
1664 	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1665 	return entries > rt_max_size;
1666 }
1667 
1668 static int ip6_convert_metrics(struct mx6_config *mxc,
1669 			       const struct fib6_config *cfg)
1670 {
1671 	struct nlattr *nla;
1672 	int remaining;
1673 	u32 *mp;
1674 
1675 	if (!cfg->fc_mx)
1676 		return 0;
1677 
1678 	mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1679 	if (unlikely(!mp))
1680 		return -ENOMEM;
1681 
1682 	nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1683 		int type = nla_type(nla);
1684 
1685 		if (type) {
1686 			u32 val;
1687 
1688 			if (unlikely(type > RTAX_MAX))
1689 				goto err;
1690 			if (type == RTAX_CC_ALGO) {
1691 				char tmp[TCP_CA_NAME_MAX];
1692 
1693 				nla_strlcpy(tmp, nla, sizeof(tmp));
1694 				val = tcp_ca_get_key_by_name(tmp);
1695 				if (val == TCP_CA_UNSPEC)
1696 					goto err;
1697 			} else {
1698 				val = nla_get_u32(nla);
1699 			}
1700 
1701 			mp[type - 1] = val;
1702 			__set_bit(type - 1, mxc->mx_valid);
1703 		}
1704 	}
1705 
1706 	mxc->mx = mp;
1707 
1708 	return 0;
1709  err:
1710 	kfree(mp);
1711 	return -EINVAL;
1712 }
1713 
1714 int ip6_route_add(struct fib6_config *cfg)
1715 {
1716 	int err;
1717 	struct net *net = cfg->fc_nlinfo.nl_net;
1718 	struct rt6_info *rt = NULL;
1719 	struct net_device *dev = NULL;
1720 	struct inet6_dev *idev = NULL;
1721 	struct fib6_table *table;
1722 	struct mx6_config mxc = { .mx = NULL, };
1723 	int addr_type;
1724 
1725 	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1726 		return -EINVAL;
1727 #ifndef CONFIG_IPV6_SUBTREES
1728 	if (cfg->fc_src_len)
1729 		return -EINVAL;
1730 #endif
1731 	if (cfg->fc_ifindex) {
1732 		err = -ENODEV;
1733 		dev = dev_get_by_index(net, cfg->fc_ifindex);
1734 		if (!dev)
1735 			goto out;
1736 		idev = in6_dev_get(dev);
1737 		if (!idev)
1738 			goto out;
1739 	}
1740 
1741 	if (cfg->fc_metric == 0)
1742 		cfg->fc_metric = IP6_RT_PRIO_USER;
1743 
1744 	err = -ENOBUFS;
1745 	if (cfg->fc_nlinfo.nlh &&
1746 	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1747 		table = fib6_get_table(net, cfg->fc_table);
1748 		if (!table) {
1749 			pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1750 			table = fib6_new_table(net, cfg->fc_table);
1751 		}
1752 	} else {
1753 		table = fib6_new_table(net, cfg->fc_table);
1754 	}
1755 
1756 	if (!table)
1757 		goto out;
1758 
1759 	rt = ip6_dst_alloc(net, NULL, (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT, table);
1760 
1761 	if (!rt) {
1762 		err = -ENOMEM;
1763 		goto out;
1764 	}
1765 
1766 	if (cfg->fc_flags & RTF_EXPIRES)
1767 		rt6_set_expires(rt, jiffies +
1768 				clock_t_to_jiffies(cfg->fc_expires));
1769 	else
1770 		rt6_clean_expires(rt);
1771 
1772 	if (cfg->fc_protocol == RTPROT_UNSPEC)
1773 		cfg->fc_protocol = RTPROT_BOOT;
1774 	rt->rt6i_protocol = cfg->fc_protocol;
1775 
1776 	addr_type = ipv6_addr_type(&cfg->fc_dst);
1777 
1778 	if (addr_type & IPV6_ADDR_MULTICAST)
1779 		rt->dst.input = ip6_mc_input;
1780 	else if (cfg->fc_flags & RTF_LOCAL)
1781 		rt->dst.input = ip6_input;
1782 	else
1783 		rt->dst.input = ip6_forward;
1784 
1785 	rt->dst.output = ip6_output;
1786 
1787 	if (cfg->fc_encap) {
1788 		struct lwtunnel_state *lwtstate;
1789 
1790 		err = lwtunnel_build_state(dev, cfg->fc_encap_type,
1791 					   cfg->fc_encap, &lwtstate);
1792 		if (err)
1793 			goto out;
1794 		rt->dst.lwtstate = lwtstate_get(lwtstate);
1795 		if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
1796 			rt->dst.lwtstate->orig_output = rt->dst.output;
1797 			rt->dst.output = lwtunnel_output;
1798 		}
1799 		if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
1800 			rt->dst.lwtstate->orig_input = rt->dst.input;
1801 			rt->dst.input = lwtunnel_input;
1802 		}
1803 	}
1804 
1805 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1806 	rt->rt6i_dst.plen = cfg->fc_dst_len;
1807 	if (rt->rt6i_dst.plen == 128)
1808 		rt->dst.flags |= DST_HOST;
1809 
1810 #ifdef CONFIG_IPV6_SUBTREES
1811 	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1812 	rt->rt6i_src.plen = cfg->fc_src_len;
1813 #endif
1814 
1815 	rt->rt6i_metric = cfg->fc_metric;
1816 
1817 	/* We cannot add true routes via loopback here,
1818 	   they would result in kernel looping; promote them to reject routes
1819 	 */
1820 	if ((cfg->fc_flags & RTF_REJECT) ||
1821 	    (dev && (dev->flags & IFF_LOOPBACK) &&
1822 	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
1823 	     !(cfg->fc_flags & RTF_LOCAL))) {
1824 		/* hold loopback dev/idev if we haven't done so. */
1825 		if (dev != net->loopback_dev) {
1826 			if (dev) {
1827 				dev_put(dev);
1828 				in6_dev_put(idev);
1829 			}
1830 			dev = net->loopback_dev;
1831 			dev_hold(dev);
1832 			idev = in6_dev_get(dev);
1833 			if (!idev) {
1834 				err = -ENODEV;
1835 				goto out;
1836 			}
1837 		}
1838 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1839 		switch (cfg->fc_type) {
1840 		case RTN_BLACKHOLE:
1841 			rt->dst.error = -EINVAL;
1842 			rt->dst.output = dst_discard_sk;
1843 			rt->dst.input = dst_discard;
1844 			break;
1845 		case RTN_PROHIBIT:
1846 			rt->dst.error = -EACCES;
1847 			rt->dst.output = ip6_pkt_prohibit_out;
1848 			rt->dst.input = ip6_pkt_prohibit;
1849 			break;
1850 		case RTN_THROW:
1851 		default:
1852 			rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1853 					: -ENETUNREACH;
1854 			rt->dst.output = ip6_pkt_discard_out;
1855 			rt->dst.input = ip6_pkt_discard;
1856 			break;
1857 		}
1858 		goto install_route;
1859 	}
1860 
1861 	if (cfg->fc_flags & RTF_GATEWAY) {
1862 		const struct in6_addr *gw_addr;
1863 		int gwa_type;
1864 
1865 		gw_addr = &cfg->fc_gateway;
1866 		gwa_type = ipv6_addr_type(gw_addr);
1867 
1868 		/* if gw_addr is local we will fail to detect this in case
1869 		 * address is still TENTATIVE (DAD in progress). rt6_lookup()
1870 		 * will return already-added prefix route via interface that
1871 		 * prefix route was assigned to, which might be non-loopback.
1872 		 */
1873 		err = -EINVAL;
1874 		if (ipv6_chk_addr_and_flags(net, gw_addr,
1875 					    gwa_type & IPV6_ADDR_LINKLOCAL ?
1876 					    dev : NULL, 0, 0))
1877 			goto out;
1878 
1879 		rt->rt6i_gateway = *gw_addr;
1880 
1881 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1882 			struct rt6_info *grt;
1883 
1884 			/* IPv6 strictly inhibits using not link-local
1885 			   addresses as nexthop address.
1886 			   Otherwise, router will not able to send redirects.
1887 			   It is very good, but in some (rare!) circumstances
1888 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
1889 			   some exceptions. --ANK
1890 			 */
1891 			if (!(gwa_type & IPV6_ADDR_UNICAST))
1892 				goto out;
1893 
1894 			grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1895 
1896 			err = -EHOSTUNREACH;
1897 			if (!grt)
1898 				goto out;
1899 			if (dev) {
1900 				if (dev != grt->dst.dev) {
1901 					ip6_rt_put(grt);
1902 					goto out;
1903 				}
1904 			} else {
1905 				dev = grt->dst.dev;
1906 				idev = grt->rt6i_idev;
1907 				dev_hold(dev);
1908 				in6_dev_hold(grt->rt6i_idev);
1909 			}
1910 			if (!(grt->rt6i_flags & RTF_GATEWAY))
1911 				err = 0;
1912 			ip6_rt_put(grt);
1913 
1914 			if (err)
1915 				goto out;
1916 		}
1917 		err = -EINVAL;
1918 		if (!dev || (dev->flags & IFF_LOOPBACK))
1919 			goto out;
1920 	}
1921 
1922 	err = -ENODEV;
1923 	if (!dev)
1924 		goto out;
1925 
1926 	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1927 		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1928 			err = -EINVAL;
1929 			goto out;
1930 		}
1931 		rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1932 		rt->rt6i_prefsrc.plen = 128;
1933 	} else
1934 		rt->rt6i_prefsrc.plen = 0;
1935 
1936 	rt->rt6i_flags = cfg->fc_flags;
1937 
1938 install_route:
1939 	rt->dst.dev = dev;
1940 	rt->rt6i_idev = idev;
1941 	rt->rt6i_table = table;
1942 
1943 	cfg->fc_nlinfo.nl_net = dev_net(dev);
1944 
1945 	err = ip6_convert_metrics(&mxc, cfg);
1946 	if (err)
1947 		goto out;
1948 
1949 	err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
1950 
1951 	kfree(mxc.mx);
1952 	return err;
1953 out:
1954 	if (dev)
1955 		dev_put(dev);
1956 	if (idev)
1957 		in6_dev_put(idev);
1958 	if (rt)
1959 		dst_free(&rt->dst);
1960 	return err;
1961 }
1962 
1963 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1964 {
1965 	int err;
1966 	struct fib6_table *table;
1967 	struct net *net = dev_net(rt->dst.dev);
1968 
1969 	if (rt == net->ipv6.ip6_null_entry) {
1970 		err = -ENOENT;
1971 		goto out;
1972 	}
1973 
1974 	table = rt->rt6i_table;
1975 	write_lock_bh(&table->tb6_lock);
1976 	err = fib6_del(rt, info);
1977 	write_unlock_bh(&table->tb6_lock);
1978 
1979 out:
1980 	ip6_rt_put(rt);
1981 	return err;
1982 }
1983 
1984 int ip6_del_rt(struct rt6_info *rt)
1985 {
1986 	struct nl_info info = {
1987 		.nl_net = dev_net(rt->dst.dev),
1988 	};
1989 	return __ip6_del_rt(rt, &info);
1990 }
1991 
1992 static int ip6_route_del(struct fib6_config *cfg)
1993 {
1994 	struct fib6_table *table;
1995 	struct fib6_node *fn;
1996 	struct rt6_info *rt;
1997 	int err = -ESRCH;
1998 
1999 	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
2000 	if (!table)
2001 		return err;
2002 
2003 	read_lock_bh(&table->tb6_lock);
2004 
2005 	fn = fib6_locate(&table->tb6_root,
2006 			 &cfg->fc_dst, cfg->fc_dst_len,
2007 			 &cfg->fc_src, cfg->fc_src_len);
2008 
2009 	if (fn) {
2010 		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2011 			if ((rt->rt6i_flags & RTF_CACHE) &&
2012 			    !(cfg->fc_flags & RTF_CACHE))
2013 				continue;
2014 			if (cfg->fc_ifindex &&
2015 			    (!rt->dst.dev ||
2016 			     rt->dst.dev->ifindex != cfg->fc_ifindex))
2017 				continue;
2018 			if (cfg->fc_flags & RTF_GATEWAY &&
2019 			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
2020 				continue;
2021 			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
2022 				continue;
2023 			dst_hold(&rt->dst);
2024 			read_unlock_bh(&table->tb6_lock);
2025 
2026 			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
2027 		}
2028 	}
2029 	read_unlock_bh(&table->tb6_lock);
2030 
2031 	return err;
2032 }
2033 
2034 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
2035 {
2036 	struct net *net = dev_net(skb->dev);
2037 	struct netevent_redirect netevent;
2038 	struct rt6_info *rt, *nrt = NULL;
2039 	struct ndisc_options ndopts;
2040 	struct inet6_dev *in6_dev;
2041 	struct neighbour *neigh;
2042 	struct rd_msg *msg;
2043 	int optlen, on_link;
2044 	u8 *lladdr;
2045 
2046 	optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
2047 	optlen -= sizeof(*msg);
2048 
2049 	if (optlen < 0) {
2050 		net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
2051 		return;
2052 	}
2053 
2054 	msg = (struct rd_msg *)icmp6_hdr(skb);
2055 
2056 	if (ipv6_addr_is_multicast(&msg->dest)) {
2057 		net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
2058 		return;
2059 	}
2060 
2061 	on_link = 0;
2062 	if (ipv6_addr_equal(&msg->dest, &msg->target)) {
2063 		on_link = 1;
2064 	} else if (ipv6_addr_type(&msg->target) !=
2065 		   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
2066 		net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
2067 		return;
2068 	}
2069 
2070 	in6_dev = __in6_dev_get(skb->dev);
2071 	if (!in6_dev)
2072 		return;
2073 	if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2074 		return;
2075 
2076 	/* RFC2461 8.1:
2077 	 *	The IP source address of the Redirect MUST be the same as the current
2078 	 *	first-hop router for the specified ICMP Destination Address.
2079 	 */
2080 
2081 	if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
2082 		net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2083 		return;
2084 	}
2085 
2086 	lladdr = NULL;
2087 	if (ndopts.nd_opts_tgt_lladdr) {
2088 		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2089 					     skb->dev);
2090 		if (!lladdr) {
2091 			net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2092 			return;
2093 		}
2094 	}
2095 
2096 	rt = (struct rt6_info *) dst;
2097 	if (rt == net->ipv6.ip6_null_entry) {
2098 		net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
2099 		return;
2100 	}
2101 
2102 	/* Redirect received -> path was valid.
2103 	 * Look, redirects are sent only in response to data packets,
2104 	 * so that this nexthop apparently is reachable. --ANK
2105 	 */
2106 	dst_confirm(&rt->dst);
2107 
2108 	neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
2109 	if (!neigh)
2110 		return;
2111 
2112 	/*
2113 	 *	We have finally decided to accept it.
2114 	 */
2115 
2116 	neigh_update(neigh, lladdr, NUD_STALE,
2117 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
2118 		     NEIGH_UPDATE_F_OVERRIDE|
2119 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
2120 				     NEIGH_UPDATE_F_ISROUTER))
2121 		     );
2122 
2123 	nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
2124 	if (!nrt)
2125 		goto out;
2126 
2127 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
2128 	if (on_link)
2129 		nrt->rt6i_flags &= ~RTF_GATEWAY;
2130 
2131 	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
2132 
2133 	if (ip6_ins_rt(nrt))
2134 		goto out;
2135 
2136 	netevent.old = &rt->dst;
2137 	netevent.new = &nrt->dst;
2138 	netevent.daddr = &msg->dest;
2139 	netevent.neigh = neigh;
2140 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
2141 
2142 	if (rt->rt6i_flags & RTF_CACHE) {
2143 		rt = (struct rt6_info *) dst_clone(&rt->dst);
2144 		ip6_del_rt(rt);
2145 	}
2146 
2147 out:
2148 	neigh_release(neigh);
2149 }
2150 
2151 /*
2152  *	Misc support functions
2153  */
2154 
2155 static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
2156 {
2157 	BUG_ON(from->dst.from);
2158 
2159 	rt->rt6i_flags &= ~RTF_EXPIRES;
2160 	dst_hold(&from->dst);
2161 	rt->dst.from = &from->dst;
2162 	dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
2163 }
2164 
2165 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
2166 {
2167 	rt->dst.input = ort->dst.input;
2168 	rt->dst.output = ort->dst.output;
2169 	rt->rt6i_dst = ort->rt6i_dst;
2170 	rt->dst.error = ort->dst.error;
2171 	rt->rt6i_idev = ort->rt6i_idev;
2172 	if (rt->rt6i_idev)
2173 		in6_dev_hold(rt->rt6i_idev);
2174 	rt->dst.lastuse = jiffies;
2175 	rt->rt6i_gateway = ort->rt6i_gateway;
2176 	rt->rt6i_flags = ort->rt6i_flags;
2177 	rt6_set_from(rt, ort);
2178 	rt->rt6i_metric = ort->rt6i_metric;
2179 #ifdef CONFIG_IPV6_SUBTREES
2180 	rt->rt6i_src = ort->rt6i_src;
2181 #endif
2182 	rt->rt6i_prefsrc = ort->rt6i_prefsrc;
2183 	rt->rt6i_table = ort->rt6i_table;
2184 	rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
2185 }
2186 
2187 #ifdef CONFIG_IPV6_ROUTE_INFO
2188 static struct rt6_info *rt6_get_route_info(struct net *net,
2189 					   const struct in6_addr *prefix, int prefixlen,
2190 					   const struct in6_addr *gwaddr, int ifindex)
2191 {
2192 	struct fib6_node *fn;
2193 	struct rt6_info *rt = NULL;
2194 	struct fib6_table *table;
2195 
2196 	table = fib6_get_table(net, RT6_TABLE_INFO);
2197 	if (!table)
2198 		return NULL;
2199 
2200 	read_lock_bh(&table->tb6_lock);
2201 	fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
2202 	if (!fn)
2203 		goto out;
2204 
2205 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2206 		if (rt->dst.dev->ifindex != ifindex)
2207 			continue;
2208 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2209 			continue;
2210 		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2211 			continue;
2212 		dst_hold(&rt->dst);
2213 		break;
2214 	}
2215 out:
2216 	read_unlock_bh(&table->tb6_lock);
2217 	return rt;
2218 }
2219 
2220 static struct rt6_info *rt6_add_route_info(struct net *net,
2221 					   const struct in6_addr *prefix, int prefixlen,
2222 					   const struct in6_addr *gwaddr, int ifindex,
2223 					   unsigned int pref)
2224 {
2225 	struct fib6_config cfg = {
2226 		.fc_table	= RT6_TABLE_INFO,
2227 		.fc_metric	= IP6_RT_PRIO_USER,
2228 		.fc_ifindex	= ifindex,
2229 		.fc_dst_len	= prefixlen,
2230 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2231 				  RTF_UP | RTF_PREF(pref),
2232 		.fc_nlinfo.portid = 0,
2233 		.fc_nlinfo.nlh = NULL,
2234 		.fc_nlinfo.nl_net = net,
2235 	};
2236 
2237 	cfg.fc_dst = *prefix;
2238 	cfg.fc_gateway = *gwaddr;
2239 
2240 	/* We should treat it as a default route if prefix length is 0. */
2241 	if (!prefixlen)
2242 		cfg.fc_flags |= RTF_DEFAULT;
2243 
2244 	ip6_route_add(&cfg);
2245 
2246 	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
2247 }
2248 #endif
2249 
2250 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
2251 {
2252 	struct rt6_info *rt;
2253 	struct fib6_table *table;
2254 
2255 	table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
2256 	if (!table)
2257 		return NULL;
2258 
2259 	read_lock_bh(&table->tb6_lock);
2260 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2261 		if (dev == rt->dst.dev &&
2262 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
2263 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
2264 			break;
2265 	}
2266 	if (rt)
2267 		dst_hold(&rt->dst);
2268 	read_unlock_bh(&table->tb6_lock);
2269 	return rt;
2270 }
2271 
2272 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
2273 				     struct net_device *dev,
2274 				     unsigned int pref)
2275 {
2276 	struct fib6_config cfg = {
2277 		.fc_table	= RT6_TABLE_DFLT,
2278 		.fc_metric	= IP6_RT_PRIO_USER,
2279 		.fc_ifindex	= dev->ifindex,
2280 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2281 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
2282 		.fc_nlinfo.portid = 0,
2283 		.fc_nlinfo.nlh = NULL,
2284 		.fc_nlinfo.nl_net = dev_net(dev),
2285 	};
2286 
2287 	cfg.fc_gateway = *gwaddr;
2288 
2289 	ip6_route_add(&cfg);
2290 
2291 	return rt6_get_dflt_router(gwaddr, dev);
2292 }
2293 
2294 void rt6_purge_dflt_routers(struct net *net)
2295 {
2296 	struct rt6_info *rt;
2297 	struct fib6_table *table;
2298 
2299 	/* NOTE: Keep consistent with rt6_get_dflt_router */
2300 	table = fib6_get_table(net, RT6_TABLE_DFLT);
2301 	if (!table)
2302 		return;
2303 
2304 restart:
2305 	read_lock_bh(&table->tb6_lock);
2306 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2307 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2308 		    (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
2309 			dst_hold(&rt->dst);
2310 			read_unlock_bh(&table->tb6_lock);
2311 			ip6_del_rt(rt);
2312 			goto restart;
2313 		}
2314 	}
2315 	read_unlock_bh(&table->tb6_lock);
2316 }
2317 
2318 static void rtmsg_to_fib6_config(struct net *net,
2319 				 struct in6_rtmsg *rtmsg,
2320 				 struct fib6_config *cfg)
2321 {
2322 	memset(cfg, 0, sizeof(*cfg));
2323 
2324 	cfg->fc_table = RT6_TABLE_MAIN;
2325 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2326 	cfg->fc_metric = rtmsg->rtmsg_metric;
2327 	cfg->fc_expires = rtmsg->rtmsg_info;
2328 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2329 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
2330 	cfg->fc_flags = rtmsg->rtmsg_flags;
2331 
2332 	cfg->fc_nlinfo.nl_net = net;
2333 
2334 	cfg->fc_dst = rtmsg->rtmsg_dst;
2335 	cfg->fc_src = rtmsg->rtmsg_src;
2336 	cfg->fc_gateway = rtmsg->rtmsg_gateway;
2337 }
2338 
2339 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2340 {
2341 	struct fib6_config cfg;
2342 	struct in6_rtmsg rtmsg;
2343 	int err;
2344 
2345 	switch (cmd) {
2346 	case SIOCADDRT:		/* Add a route */
2347 	case SIOCDELRT:		/* Delete a route */
2348 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2349 			return -EPERM;
2350 		err = copy_from_user(&rtmsg, arg,
2351 				     sizeof(struct in6_rtmsg));
2352 		if (err)
2353 			return -EFAULT;
2354 
2355 		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2356 
2357 		rtnl_lock();
2358 		switch (cmd) {
2359 		case SIOCADDRT:
2360 			err = ip6_route_add(&cfg);
2361 			break;
2362 		case SIOCDELRT:
2363 			err = ip6_route_del(&cfg);
2364 			break;
2365 		default:
2366 			err = -EINVAL;
2367 		}
2368 		rtnl_unlock();
2369 
2370 		return err;
2371 	}
2372 
2373 	return -EINVAL;
2374 }
2375 
2376 /*
2377  *	Drop the packet on the floor
2378  */
2379 
2380 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2381 {
2382 	int type;
2383 	struct dst_entry *dst = skb_dst(skb);
2384 	switch (ipstats_mib_noroutes) {
2385 	case IPSTATS_MIB_INNOROUTES:
2386 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2387 		if (type == IPV6_ADDR_ANY) {
2388 			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2389 				      IPSTATS_MIB_INADDRERRORS);
2390 			break;
2391 		}
2392 		/* FALLTHROUGH */
2393 	case IPSTATS_MIB_OUTNOROUTES:
2394 		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2395 			      ipstats_mib_noroutes);
2396 		break;
2397 	}
2398 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2399 	kfree_skb(skb);
2400 	return 0;
2401 }
2402 
2403 static int ip6_pkt_discard(struct sk_buff *skb)
2404 {
2405 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2406 }
2407 
2408 static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb)
2409 {
2410 	skb->dev = skb_dst(skb)->dev;
2411 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2412 }
2413 
2414 static int ip6_pkt_prohibit(struct sk_buff *skb)
2415 {
2416 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2417 }
2418 
2419 static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb)
2420 {
2421 	skb->dev = skb_dst(skb)->dev;
2422 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2423 }
2424 
2425 /*
2426  *	Allocate a dst for local (unicast / anycast) address.
2427  */
2428 
2429 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2430 				    const struct in6_addr *addr,
2431 				    bool anycast)
2432 {
2433 	struct net *net = dev_net(idev->dev);
2434 	struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
2435 					    DST_NOCOUNT, NULL);
2436 	if (!rt)
2437 		return ERR_PTR(-ENOMEM);
2438 
2439 	in6_dev_hold(idev);
2440 
2441 	rt->dst.flags |= DST_HOST;
2442 	rt->dst.input = ip6_input;
2443 	rt->dst.output = ip6_output;
2444 	rt->rt6i_idev = idev;
2445 
2446 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2447 	if (anycast)
2448 		rt->rt6i_flags |= RTF_ANYCAST;
2449 	else
2450 		rt->rt6i_flags |= RTF_LOCAL;
2451 
2452 	rt->rt6i_gateway  = *addr;
2453 	rt->rt6i_dst.addr = *addr;
2454 	rt->rt6i_dst.plen = 128;
2455 	rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2456 
2457 	atomic_set(&rt->dst.__refcnt, 1);
2458 
2459 	return rt;
2460 }
2461 
2462 int ip6_route_get_saddr(struct net *net,
2463 			struct rt6_info *rt,
2464 			const struct in6_addr *daddr,
2465 			unsigned int prefs,
2466 			struct in6_addr *saddr)
2467 {
2468 	struct inet6_dev *idev =
2469 		rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
2470 	int err = 0;
2471 	if (rt && rt->rt6i_prefsrc.plen)
2472 		*saddr = rt->rt6i_prefsrc.addr;
2473 	else
2474 		err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2475 					 daddr, prefs, saddr);
2476 	return err;
2477 }
2478 
2479 /* remove deleted ip from prefsrc entries */
2480 struct arg_dev_net_ip {
2481 	struct net_device *dev;
2482 	struct net *net;
2483 	struct in6_addr *addr;
2484 };
2485 
2486 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2487 {
2488 	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2489 	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2490 	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2491 
2492 	if (((void *)rt->dst.dev == dev || !dev) &&
2493 	    rt != net->ipv6.ip6_null_entry &&
2494 	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2495 		/* remove prefsrc entry */
2496 		rt->rt6i_prefsrc.plen = 0;
2497 	}
2498 	return 0;
2499 }
2500 
2501 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2502 {
2503 	struct net *net = dev_net(ifp->idev->dev);
2504 	struct arg_dev_net_ip adni = {
2505 		.dev = ifp->idev->dev,
2506 		.net = net,
2507 		.addr = &ifp->addr,
2508 	};
2509 	fib6_clean_all(net, fib6_remove_prefsrc, &adni);
2510 }
2511 
2512 #define RTF_RA_ROUTER		(RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2513 #define RTF_CACHE_GATEWAY	(RTF_GATEWAY | RTF_CACHE)
2514 
2515 /* Remove routers and update dst entries when gateway turn into host. */
2516 static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2517 {
2518 	struct in6_addr *gateway = (struct in6_addr *)arg;
2519 
2520 	if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2521 	     ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2522 	     ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2523 		return -1;
2524 	}
2525 	return 0;
2526 }
2527 
2528 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2529 {
2530 	fib6_clean_all(net, fib6_clean_tohost, gateway);
2531 }
2532 
2533 struct arg_dev_net {
2534 	struct net_device *dev;
2535 	struct net *net;
2536 };
2537 
2538 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2539 {
2540 	const struct arg_dev_net *adn = arg;
2541 	const struct net_device *dev = adn->dev;
2542 
2543 	if ((rt->dst.dev == dev || !dev) &&
2544 	    rt != adn->net->ipv6.ip6_null_entry)
2545 		return -1;
2546 
2547 	return 0;
2548 }
2549 
2550 void rt6_ifdown(struct net *net, struct net_device *dev)
2551 {
2552 	struct arg_dev_net adn = {
2553 		.dev = dev,
2554 		.net = net,
2555 	};
2556 
2557 	fib6_clean_all(net, fib6_ifdown, &adn);
2558 	icmp6_clean_all(fib6_ifdown, &adn);
2559 	rt6_uncached_list_flush_dev(net, dev);
2560 }
2561 
2562 struct rt6_mtu_change_arg {
2563 	struct net_device *dev;
2564 	unsigned int mtu;
2565 };
2566 
2567 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2568 {
2569 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2570 	struct inet6_dev *idev;
2571 
2572 	/* In IPv6 pmtu discovery is not optional,
2573 	   so that RTAX_MTU lock cannot disable it.
2574 	   We still use this lock to block changes
2575 	   caused by addrconf/ndisc.
2576 	*/
2577 
2578 	idev = __in6_dev_get(arg->dev);
2579 	if (!idev)
2580 		return 0;
2581 
2582 	/* For administrative MTU increase, there is no way to discover
2583 	   IPv6 PMTU increase, so PMTU increase should be updated here.
2584 	   Since RFC 1981 doesn't include administrative MTU increase
2585 	   update PMTU increase is a MUST. (i.e. jumbo frame)
2586 	 */
2587 	/*
2588 	   If new MTU is less than route PMTU, this new MTU will be the
2589 	   lowest MTU in the path, update the route PMTU to reflect PMTU
2590 	   decreases; if new MTU is greater than route PMTU, and the
2591 	   old MTU is the lowest MTU in the path, update the route PMTU
2592 	   to reflect the increase. In this case if the other nodes' MTU
2593 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
2594 	   PMTU discouvery.
2595 	 */
2596 	if (rt->dst.dev == arg->dev &&
2597 	    !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2598 		if (rt->rt6i_flags & RTF_CACHE) {
2599 			/* For RTF_CACHE with rt6i_pmtu == 0
2600 			 * (i.e. a redirected route),
2601 			 * the metrics of its rt->dst.from has already
2602 			 * been updated.
2603 			 */
2604 			if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2605 				rt->rt6i_pmtu = arg->mtu;
2606 		} else if (dst_mtu(&rt->dst) >= arg->mtu ||
2607 			   (dst_mtu(&rt->dst) < arg->mtu &&
2608 			    dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2609 			dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2610 		}
2611 	}
2612 	return 0;
2613 }
2614 
2615 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2616 {
2617 	struct rt6_mtu_change_arg arg = {
2618 		.dev = dev,
2619 		.mtu = mtu,
2620 	};
2621 
2622 	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
2623 }
2624 
2625 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2626 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2627 	[RTA_OIF]               = { .type = NLA_U32 },
2628 	[RTA_IIF]		= { .type = NLA_U32 },
2629 	[RTA_PRIORITY]          = { .type = NLA_U32 },
2630 	[RTA_METRICS]           = { .type = NLA_NESTED },
2631 	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
2632 	[RTA_PREF]              = { .type = NLA_U8 },
2633 	[RTA_ENCAP_TYPE]	= { .type = NLA_U16 },
2634 	[RTA_ENCAP]		= { .type = NLA_NESTED },
2635 };
2636 
2637 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2638 			      struct fib6_config *cfg)
2639 {
2640 	struct rtmsg *rtm;
2641 	struct nlattr *tb[RTA_MAX+1];
2642 	unsigned int pref;
2643 	int err;
2644 
2645 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2646 	if (err < 0)
2647 		goto errout;
2648 
2649 	err = -EINVAL;
2650 	rtm = nlmsg_data(nlh);
2651 	memset(cfg, 0, sizeof(*cfg));
2652 
2653 	cfg->fc_table = rtm->rtm_table;
2654 	cfg->fc_dst_len = rtm->rtm_dst_len;
2655 	cfg->fc_src_len = rtm->rtm_src_len;
2656 	cfg->fc_flags = RTF_UP;
2657 	cfg->fc_protocol = rtm->rtm_protocol;
2658 	cfg->fc_type = rtm->rtm_type;
2659 
2660 	if (rtm->rtm_type == RTN_UNREACHABLE ||
2661 	    rtm->rtm_type == RTN_BLACKHOLE ||
2662 	    rtm->rtm_type == RTN_PROHIBIT ||
2663 	    rtm->rtm_type == RTN_THROW)
2664 		cfg->fc_flags |= RTF_REJECT;
2665 
2666 	if (rtm->rtm_type == RTN_LOCAL)
2667 		cfg->fc_flags |= RTF_LOCAL;
2668 
2669 	if (rtm->rtm_flags & RTM_F_CLONED)
2670 		cfg->fc_flags |= RTF_CACHE;
2671 
2672 	cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2673 	cfg->fc_nlinfo.nlh = nlh;
2674 	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2675 
2676 	if (tb[RTA_GATEWAY]) {
2677 		cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
2678 		cfg->fc_flags |= RTF_GATEWAY;
2679 	}
2680 
2681 	if (tb[RTA_DST]) {
2682 		int plen = (rtm->rtm_dst_len + 7) >> 3;
2683 
2684 		if (nla_len(tb[RTA_DST]) < plen)
2685 			goto errout;
2686 
2687 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2688 	}
2689 
2690 	if (tb[RTA_SRC]) {
2691 		int plen = (rtm->rtm_src_len + 7) >> 3;
2692 
2693 		if (nla_len(tb[RTA_SRC]) < plen)
2694 			goto errout;
2695 
2696 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2697 	}
2698 
2699 	if (tb[RTA_PREFSRC])
2700 		cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
2701 
2702 	if (tb[RTA_OIF])
2703 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2704 
2705 	if (tb[RTA_PRIORITY])
2706 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2707 
2708 	if (tb[RTA_METRICS]) {
2709 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2710 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2711 	}
2712 
2713 	if (tb[RTA_TABLE])
2714 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2715 
2716 	if (tb[RTA_MULTIPATH]) {
2717 		cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2718 		cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2719 	}
2720 
2721 	if (tb[RTA_PREF]) {
2722 		pref = nla_get_u8(tb[RTA_PREF]);
2723 		if (pref != ICMPV6_ROUTER_PREF_LOW &&
2724 		    pref != ICMPV6_ROUTER_PREF_HIGH)
2725 			pref = ICMPV6_ROUTER_PREF_MEDIUM;
2726 		cfg->fc_flags |= RTF_PREF(pref);
2727 	}
2728 
2729 	if (tb[RTA_ENCAP])
2730 		cfg->fc_encap = tb[RTA_ENCAP];
2731 
2732 	if (tb[RTA_ENCAP_TYPE])
2733 		cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
2734 
2735 	err = 0;
2736 errout:
2737 	return err;
2738 }
2739 
2740 static int ip6_route_multipath(struct fib6_config *cfg, int add)
2741 {
2742 	struct fib6_config r_cfg;
2743 	struct rtnexthop *rtnh;
2744 	int remaining;
2745 	int attrlen;
2746 	int err = 0, last_err = 0;
2747 
2748 	remaining = cfg->fc_mp_len;
2749 beginning:
2750 	rtnh = (struct rtnexthop *)cfg->fc_mp;
2751 
2752 	/* Parse a Multipath Entry */
2753 	while (rtnh_ok(rtnh, remaining)) {
2754 		memcpy(&r_cfg, cfg, sizeof(*cfg));
2755 		if (rtnh->rtnh_ifindex)
2756 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2757 
2758 		attrlen = rtnh_attrlen(rtnh);
2759 		if (attrlen > 0) {
2760 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2761 
2762 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2763 			if (nla) {
2764 				r_cfg.fc_gateway = nla_get_in6_addr(nla);
2765 				r_cfg.fc_flags |= RTF_GATEWAY;
2766 			}
2767 			r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
2768 			nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
2769 			if (nla)
2770 				r_cfg.fc_encap_type = nla_get_u16(nla);
2771 		}
2772 		err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
2773 		if (err) {
2774 			last_err = err;
2775 			/* If we are trying to remove a route, do not stop the
2776 			 * loop when ip6_route_del() fails (because next hop is
2777 			 * already gone), we should try to remove all next hops.
2778 			 */
2779 			if (add) {
2780 				/* If add fails, we should try to delete all
2781 				 * next hops that have been already added.
2782 				 */
2783 				add = 0;
2784 				remaining = cfg->fc_mp_len - remaining;
2785 				goto beginning;
2786 			}
2787 		}
2788 		/* Because each route is added like a single route we remove
2789 		 * these flags after the first nexthop: if there is a collision,
2790 		 * we have already failed to add the first nexthop:
2791 		 * fib6_add_rt2node() has rejected it; when replacing, old
2792 		 * nexthops have been replaced by first new, the rest should
2793 		 * be added to it.
2794 		 */
2795 		cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
2796 						     NLM_F_REPLACE);
2797 		rtnh = rtnh_next(rtnh, &remaining);
2798 	}
2799 
2800 	return last_err;
2801 }
2802 
2803 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
2804 {
2805 	struct fib6_config cfg;
2806 	int err;
2807 
2808 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2809 	if (err < 0)
2810 		return err;
2811 
2812 	if (cfg.fc_mp)
2813 		return ip6_route_multipath(&cfg, 0);
2814 	else
2815 		return ip6_route_del(&cfg);
2816 }
2817 
2818 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
2819 {
2820 	struct fib6_config cfg;
2821 	int err;
2822 
2823 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2824 	if (err < 0)
2825 		return err;
2826 
2827 	if (cfg.fc_mp)
2828 		return ip6_route_multipath(&cfg, 1);
2829 	else
2830 		return ip6_route_add(&cfg);
2831 }
2832 
2833 static inline size_t rt6_nlmsg_size(struct rt6_info *rt)
2834 {
2835 	return NLMSG_ALIGN(sizeof(struct rtmsg))
2836 	       + nla_total_size(16) /* RTA_SRC */
2837 	       + nla_total_size(16) /* RTA_DST */
2838 	       + nla_total_size(16) /* RTA_GATEWAY */
2839 	       + nla_total_size(16) /* RTA_PREFSRC */
2840 	       + nla_total_size(4) /* RTA_TABLE */
2841 	       + nla_total_size(4) /* RTA_IIF */
2842 	       + nla_total_size(4) /* RTA_OIF */
2843 	       + nla_total_size(4) /* RTA_PRIORITY */
2844 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2845 	       + nla_total_size(sizeof(struct rta_cacheinfo))
2846 	       + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
2847 	       + nla_total_size(1) /* RTA_PREF */
2848 	       + lwtunnel_get_encap_size(rt->dst.lwtstate);
2849 }
2850 
2851 static int rt6_fill_node(struct net *net,
2852 			 struct sk_buff *skb, struct rt6_info *rt,
2853 			 struct in6_addr *dst, struct in6_addr *src,
2854 			 int iif, int type, u32 portid, u32 seq,
2855 			 int prefix, int nowait, unsigned int flags)
2856 {
2857 	u32 metrics[RTAX_MAX];
2858 	struct rtmsg *rtm;
2859 	struct nlmsghdr *nlh;
2860 	long expires;
2861 	u32 table;
2862 
2863 	if (prefix) {	/* user wants prefix routes only */
2864 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2865 			/* success since this is not a prefix route */
2866 			return 1;
2867 		}
2868 	}
2869 
2870 	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
2871 	if (!nlh)
2872 		return -EMSGSIZE;
2873 
2874 	rtm = nlmsg_data(nlh);
2875 	rtm->rtm_family = AF_INET6;
2876 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
2877 	rtm->rtm_src_len = rt->rt6i_src.plen;
2878 	rtm->rtm_tos = 0;
2879 	if (rt->rt6i_table)
2880 		table = rt->rt6i_table->tb6_id;
2881 	else
2882 		table = RT6_TABLE_UNSPEC;
2883 	rtm->rtm_table = table;
2884 	if (nla_put_u32(skb, RTA_TABLE, table))
2885 		goto nla_put_failure;
2886 	if (rt->rt6i_flags & RTF_REJECT) {
2887 		switch (rt->dst.error) {
2888 		case -EINVAL:
2889 			rtm->rtm_type = RTN_BLACKHOLE;
2890 			break;
2891 		case -EACCES:
2892 			rtm->rtm_type = RTN_PROHIBIT;
2893 			break;
2894 		case -EAGAIN:
2895 			rtm->rtm_type = RTN_THROW;
2896 			break;
2897 		default:
2898 			rtm->rtm_type = RTN_UNREACHABLE;
2899 			break;
2900 		}
2901 	}
2902 	else if (rt->rt6i_flags & RTF_LOCAL)
2903 		rtm->rtm_type = RTN_LOCAL;
2904 	else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2905 		rtm->rtm_type = RTN_LOCAL;
2906 	else
2907 		rtm->rtm_type = RTN_UNICAST;
2908 	rtm->rtm_flags = 0;
2909 	if (!netif_carrier_ok(rt->dst.dev)) {
2910 		rtm->rtm_flags |= RTNH_F_LINKDOWN;
2911 		if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
2912 			rtm->rtm_flags |= RTNH_F_DEAD;
2913 	}
2914 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2915 	rtm->rtm_protocol = rt->rt6i_protocol;
2916 	if (rt->rt6i_flags & RTF_DYNAMIC)
2917 		rtm->rtm_protocol = RTPROT_REDIRECT;
2918 	else if (rt->rt6i_flags & RTF_ADDRCONF) {
2919 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2920 			rtm->rtm_protocol = RTPROT_RA;
2921 		else
2922 			rtm->rtm_protocol = RTPROT_KERNEL;
2923 	}
2924 
2925 	if (rt->rt6i_flags & RTF_CACHE)
2926 		rtm->rtm_flags |= RTM_F_CLONED;
2927 
2928 	if (dst) {
2929 		if (nla_put_in6_addr(skb, RTA_DST, dst))
2930 			goto nla_put_failure;
2931 		rtm->rtm_dst_len = 128;
2932 	} else if (rtm->rtm_dst_len)
2933 		if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
2934 			goto nla_put_failure;
2935 #ifdef CONFIG_IPV6_SUBTREES
2936 	if (src) {
2937 		if (nla_put_in6_addr(skb, RTA_SRC, src))
2938 			goto nla_put_failure;
2939 		rtm->rtm_src_len = 128;
2940 	} else if (rtm->rtm_src_len &&
2941 		   nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
2942 		goto nla_put_failure;
2943 #endif
2944 	if (iif) {
2945 #ifdef CONFIG_IPV6_MROUTE
2946 		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2947 			int err = ip6mr_get_route(net, skb, rtm, nowait);
2948 			if (err <= 0) {
2949 				if (!nowait) {
2950 					if (err == 0)
2951 						return 0;
2952 					goto nla_put_failure;
2953 				} else {
2954 					if (err == -EMSGSIZE)
2955 						goto nla_put_failure;
2956 				}
2957 			}
2958 		} else
2959 #endif
2960 			if (nla_put_u32(skb, RTA_IIF, iif))
2961 				goto nla_put_failure;
2962 	} else if (dst) {
2963 		struct in6_addr saddr_buf;
2964 		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2965 		    nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
2966 			goto nla_put_failure;
2967 	}
2968 
2969 	if (rt->rt6i_prefsrc.plen) {
2970 		struct in6_addr saddr_buf;
2971 		saddr_buf = rt->rt6i_prefsrc.addr;
2972 		if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
2973 			goto nla_put_failure;
2974 	}
2975 
2976 	memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
2977 	if (rt->rt6i_pmtu)
2978 		metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
2979 	if (rtnetlink_put_metrics(skb, metrics) < 0)
2980 		goto nla_put_failure;
2981 
2982 	if (rt->rt6i_flags & RTF_GATEWAY) {
2983 		if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
2984 			goto nla_put_failure;
2985 	}
2986 
2987 	if (rt->dst.dev &&
2988 	    nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2989 		goto nla_put_failure;
2990 	if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2991 		goto nla_put_failure;
2992 
2993 	expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2994 
2995 	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2996 		goto nla_put_failure;
2997 
2998 	if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
2999 		goto nla_put_failure;
3000 
3001 	lwtunnel_fill_encap(skb, rt->dst.lwtstate);
3002 
3003 	nlmsg_end(skb, nlh);
3004 	return 0;
3005 
3006 nla_put_failure:
3007 	nlmsg_cancel(skb, nlh);
3008 	return -EMSGSIZE;
3009 }
3010 
3011 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
3012 {
3013 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
3014 	int prefix;
3015 
3016 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
3017 		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
3018 		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
3019 	} else
3020 		prefix = 0;
3021 
3022 	return rt6_fill_node(arg->net,
3023 		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
3024 		     NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
3025 		     prefix, 0, NLM_F_MULTI);
3026 }
3027 
3028 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
3029 {
3030 	struct net *net = sock_net(in_skb->sk);
3031 	struct nlattr *tb[RTA_MAX+1];
3032 	struct rt6_info *rt;
3033 	struct sk_buff *skb;
3034 	struct rtmsg *rtm;
3035 	struct flowi6 fl6;
3036 	int err, iif = 0, oif = 0;
3037 
3038 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
3039 	if (err < 0)
3040 		goto errout;
3041 
3042 	err = -EINVAL;
3043 	memset(&fl6, 0, sizeof(fl6));
3044 
3045 	if (tb[RTA_SRC]) {
3046 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
3047 			goto errout;
3048 
3049 		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
3050 	}
3051 
3052 	if (tb[RTA_DST]) {
3053 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
3054 			goto errout;
3055 
3056 		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
3057 	}
3058 
3059 	if (tb[RTA_IIF])
3060 		iif = nla_get_u32(tb[RTA_IIF]);
3061 
3062 	if (tb[RTA_OIF])
3063 		oif = nla_get_u32(tb[RTA_OIF]);
3064 
3065 	if (tb[RTA_MARK])
3066 		fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
3067 
3068 	if (iif) {
3069 		struct net_device *dev;
3070 		int flags = 0;
3071 
3072 		dev = __dev_get_by_index(net, iif);
3073 		if (!dev) {
3074 			err = -ENODEV;
3075 			goto errout;
3076 		}
3077 
3078 		fl6.flowi6_iif = iif;
3079 
3080 		if (!ipv6_addr_any(&fl6.saddr))
3081 			flags |= RT6_LOOKUP_F_HAS_SADDR;
3082 
3083 		rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
3084 							       flags);
3085 	} else {
3086 		fl6.flowi6_oif = oif;
3087 
3088 		rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
3089 	}
3090 
3091 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
3092 	if (!skb) {
3093 		ip6_rt_put(rt);
3094 		err = -ENOBUFS;
3095 		goto errout;
3096 	}
3097 
3098 	/* Reserve room for dummy headers, this skb can pass
3099 	   through good chunk of routing engine.
3100 	 */
3101 	skb_reset_mac_header(skb);
3102 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
3103 
3104 	skb_dst_set(skb, &rt->dst);
3105 
3106 	err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
3107 			    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
3108 			    nlh->nlmsg_seq, 0, 0, 0);
3109 	if (err < 0) {
3110 		kfree_skb(skb);
3111 		goto errout;
3112 	}
3113 
3114 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
3115 errout:
3116 	return err;
3117 }
3118 
3119 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
3120 {
3121 	struct sk_buff *skb;
3122 	struct net *net = info->nl_net;
3123 	u32 seq;
3124 	int err;
3125 
3126 	err = -ENOBUFS;
3127 	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3128 
3129 	skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3130 	if (!skb)
3131 		goto errout;
3132 
3133 	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
3134 				event, info->portid, seq, 0, 0, 0);
3135 	if (err < 0) {
3136 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
3137 		WARN_ON(err == -EMSGSIZE);
3138 		kfree_skb(skb);
3139 		goto errout;
3140 	}
3141 	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
3142 		    info->nlh, gfp_any());
3143 	return;
3144 errout:
3145 	if (err < 0)
3146 		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
3147 }
3148 
3149 static int ip6_route_dev_notify(struct notifier_block *this,
3150 				unsigned long event, void *ptr)
3151 {
3152 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
3153 	struct net *net = dev_net(dev);
3154 
3155 	if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
3156 		net->ipv6.ip6_null_entry->dst.dev = dev;
3157 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
3158 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3159 		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
3160 		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
3161 		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
3162 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
3163 #endif
3164 	}
3165 
3166 	return NOTIFY_OK;
3167 }
3168 
3169 /*
3170  *	/proc
3171  */
3172 
3173 #ifdef CONFIG_PROC_FS
3174 
3175 static const struct file_operations ipv6_route_proc_fops = {
3176 	.owner		= THIS_MODULE,
3177 	.open		= ipv6_route_open,
3178 	.read		= seq_read,
3179 	.llseek		= seq_lseek,
3180 	.release	= seq_release_net,
3181 };
3182 
3183 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
3184 {
3185 	struct net *net = (struct net *)seq->private;
3186 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
3187 		   net->ipv6.rt6_stats->fib_nodes,
3188 		   net->ipv6.rt6_stats->fib_route_nodes,
3189 		   net->ipv6.rt6_stats->fib_rt_alloc,
3190 		   net->ipv6.rt6_stats->fib_rt_entries,
3191 		   net->ipv6.rt6_stats->fib_rt_cache,
3192 		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
3193 		   net->ipv6.rt6_stats->fib_discarded_routes);
3194 
3195 	return 0;
3196 }
3197 
3198 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
3199 {
3200 	return single_open_net(inode, file, rt6_stats_seq_show);
3201 }
3202 
3203 static const struct file_operations rt6_stats_seq_fops = {
3204 	.owner	 = THIS_MODULE,
3205 	.open	 = rt6_stats_seq_open,
3206 	.read	 = seq_read,
3207 	.llseek	 = seq_lseek,
3208 	.release = single_release_net,
3209 };
3210 #endif	/* CONFIG_PROC_FS */
3211 
3212 #ifdef CONFIG_SYSCTL
3213 
3214 static
3215 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
3216 			      void __user *buffer, size_t *lenp, loff_t *ppos)
3217 {
3218 	struct net *net;
3219 	int delay;
3220 	if (!write)
3221 		return -EINVAL;
3222 
3223 	net = (struct net *)ctl->extra1;
3224 	delay = net->ipv6.sysctl.flush_delay;
3225 	proc_dointvec(ctl, write, buffer, lenp, ppos);
3226 	fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
3227 	return 0;
3228 }
3229 
3230 struct ctl_table ipv6_route_table_template[] = {
3231 	{
3232 		.procname	=	"flush",
3233 		.data		=	&init_net.ipv6.sysctl.flush_delay,
3234 		.maxlen		=	sizeof(int),
3235 		.mode		=	0200,
3236 		.proc_handler	=	ipv6_sysctl_rtcache_flush
3237 	},
3238 	{
3239 		.procname	=	"gc_thresh",
3240 		.data		=	&ip6_dst_ops_template.gc_thresh,
3241 		.maxlen		=	sizeof(int),
3242 		.mode		=	0644,
3243 		.proc_handler	=	proc_dointvec,
3244 	},
3245 	{
3246 		.procname	=	"max_size",
3247 		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
3248 		.maxlen		=	sizeof(int),
3249 		.mode		=	0644,
3250 		.proc_handler	=	proc_dointvec,
3251 	},
3252 	{
3253 		.procname	=	"gc_min_interval",
3254 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3255 		.maxlen		=	sizeof(int),
3256 		.mode		=	0644,
3257 		.proc_handler	=	proc_dointvec_jiffies,
3258 	},
3259 	{
3260 		.procname	=	"gc_timeout",
3261 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
3262 		.maxlen		=	sizeof(int),
3263 		.mode		=	0644,
3264 		.proc_handler	=	proc_dointvec_jiffies,
3265 	},
3266 	{
3267 		.procname	=	"gc_interval",
3268 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
3269 		.maxlen		=	sizeof(int),
3270 		.mode		=	0644,
3271 		.proc_handler	=	proc_dointvec_jiffies,
3272 	},
3273 	{
3274 		.procname	=	"gc_elasticity",
3275 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
3276 		.maxlen		=	sizeof(int),
3277 		.mode		=	0644,
3278 		.proc_handler	=	proc_dointvec,
3279 	},
3280 	{
3281 		.procname	=	"mtu_expires",
3282 		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
3283 		.maxlen		=	sizeof(int),
3284 		.mode		=	0644,
3285 		.proc_handler	=	proc_dointvec_jiffies,
3286 	},
3287 	{
3288 		.procname	=	"min_adv_mss",
3289 		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
3290 		.maxlen		=	sizeof(int),
3291 		.mode		=	0644,
3292 		.proc_handler	=	proc_dointvec,
3293 	},
3294 	{
3295 		.procname	=	"gc_min_interval_ms",
3296 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3297 		.maxlen		=	sizeof(int),
3298 		.mode		=	0644,
3299 		.proc_handler	=	proc_dointvec_ms_jiffies,
3300 	},
3301 	{ }
3302 };
3303 
3304 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
3305 {
3306 	struct ctl_table *table;
3307 
3308 	table = kmemdup(ipv6_route_table_template,
3309 			sizeof(ipv6_route_table_template),
3310 			GFP_KERNEL);
3311 
3312 	if (table) {
3313 		table[0].data = &net->ipv6.sysctl.flush_delay;
3314 		table[0].extra1 = net;
3315 		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
3316 		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3317 		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3318 		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3319 		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3320 		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3321 		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3322 		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
3323 		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3324 
3325 		/* Don't export sysctls to unprivileged users */
3326 		if (net->user_ns != &init_user_ns)
3327 			table[0].procname = NULL;
3328 	}
3329 
3330 	return table;
3331 }
3332 #endif
3333 
3334 static int __net_init ip6_route_net_init(struct net *net)
3335 {
3336 	int ret = -ENOMEM;
3337 
3338 	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3339 	       sizeof(net->ipv6.ip6_dst_ops));
3340 
3341 	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3342 		goto out_ip6_dst_ops;
3343 
3344 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3345 					   sizeof(*net->ipv6.ip6_null_entry),
3346 					   GFP_KERNEL);
3347 	if (!net->ipv6.ip6_null_entry)
3348 		goto out_ip6_dst_entries;
3349 	net->ipv6.ip6_null_entry->dst.path =
3350 		(struct dst_entry *)net->ipv6.ip6_null_entry;
3351 	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3352 	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3353 			 ip6_template_metrics, true);
3354 
3355 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3356 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3357 					       sizeof(*net->ipv6.ip6_prohibit_entry),
3358 					       GFP_KERNEL);
3359 	if (!net->ipv6.ip6_prohibit_entry)
3360 		goto out_ip6_null_entry;
3361 	net->ipv6.ip6_prohibit_entry->dst.path =
3362 		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3363 	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3364 	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3365 			 ip6_template_metrics, true);
3366 
3367 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3368 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
3369 					       GFP_KERNEL);
3370 	if (!net->ipv6.ip6_blk_hole_entry)
3371 		goto out_ip6_prohibit_entry;
3372 	net->ipv6.ip6_blk_hole_entry->dst.path =
3373 		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3374 	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3375 	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3376 			 ip6_template_metrics, true);
3377 #endif
3378 
3379 	net->ipv6.sysctl.flush_delay = 0;
3380 	net->ipv6.sysctl.ip6_rt_max_size = 4096;
3381 	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3382 	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3383 	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3384 	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3385 	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3386 	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3387 
3388 	net->ipv6.ip6_rt_gc_expire = 30*HZ;
3389 
3390 	ret = 0;
3391 out:
3392 	return ret;
3393 
3394 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3395 out_ip6_prohibit_entry:
3396 	kfree(net->ipv6.ip6_prohibit_entry);
3397 out_ip6_null_entry:
3398 	kfree(net->ipv6.ip6_null_entry);
3399 #endif
3400 out_ip6_dst_entries:
3401 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3402 out_ip6_dst_ops:
3403 	goto out;
3404 }
3405 
3406 static void __net_exit ip6_route_net_exit(struct net *net)
3407 {
3408 	kfree(net->ipv6.ip6_null_entry);
3409 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3410 	kfree(net->ipv6.ip6_prohibit_entry);
3411 	kfree(net->ipv6.ip6_blk_hole_entry);
3412 #endif
3413 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3414 }
3415 
3416 static int __net_init ip6_route_net_init_late(struct net *net)
3417 {
3418 #ifdef CONFIG_PROC_FS
3419 	proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3420 	proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3421 #endif
3422 	return 0;
3423 }
3424 
3425 static void __net_exit ip6_route_net_exit_late(struct net *net)
3426 {
3427 #ifdef CONFIG_PROC_FS
3428 	remove_proc_entry("ipv6_route", net->proc_net);
3429 	remove_proc_entry("rt6_stats", net->proc_net);
3430 #endif
3431 }
3432 
3433 static struct pernet_operations ip6_route_net_ops = {
3434 	.init = ip6_route_net_init,
3435 	.exit = ip6_route_net_exit,
3436 };
3437 
3438 static int __net_init ipv6_inetpeer_init(struct net *net)
3439 {
3440 	struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3441 
3442 	if (!bp)
3443 		return -ENOMEM;
3444 	inet_peer_base_init(bp);
3445 	net->ipv6.peers = bp;
3446 	return 0;
3447 }
3448 
3449 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3450 {
3451 	struct inet_peer_base *bp = net->ipv6.peers;
3452 
3453 	net->ipv6.peers = NULL;
3454 	inetpeer_invalidate_tree(bp);
3455 	kfree(bp);
3456 }
3457 
3458 static struct pernet_operations ipv6_inetpeer_ops = {
3459 	.init	=	ipv6_inetpeer_init,
3460 	.exit	=	ipv6_inetpeer_exit,
3461 };
3462 
3463 static struct pernet_operations ip6_route_net_late_ops = {
3464 	.init = ip6_route_net_init_late,
3465 	.exit = ip6_route_net_exit_late,
3466 };
3467 
3468 static struct notifier_block ip6_route_dev_notifier = {
3469 	.notifier_call = ip6_route_dev_notify,
3470 	.priority = 0,
3471 };
3472 
3473 int __init ip6_route_init(void)
3474 {
3475 	int ret;
3476 	int cpu;
3477 
3478 	ret = -ENOMEM;
3479 	ip6_dst_ops_template.kmem_cachep =
3480 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3481 				  SLAB_HWCACHE_ALIGN, NULL);
3482 	if (!ip6_dst_ops_template.kmem_cachep)
3483 		goto out;
3484 
3485 	ret = dst_entries_init(&ip6_dst_blackhole_ops);
3486 	if (ret)
3487 		goto out_kmem_cache;
3488 
3489 	ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3490 	if (ret)
3491 		goto out_dst_entries;
3492 
3493 	ret = register_pernet_subsys(&ip6_route_net_ops);
3494 	if (ret)
3495 		goto out_register_inetpeer;
3496 
3497 	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3498 
3499 	/* Registering of the loopback is done before this portion of code,
3500 	 * the loopback reference in rt6_info will not be taken, do it
3501 	 * manually for init_net */
3502 	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3503 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3504   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3505 	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3506 	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3507 	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3508 	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3509   #endif
3510 	ret = fib6_init();
3511 	if (ret)
3512 		goto out_register_subsys;
3513 
3514 	ret = xfrm6_init();
3515 	if (ret)
3516 		goto out_fib6_init;
3517 
3518 	ret = fib6_rules_init();
3519 	if (ret)
3520 		goto xfrm6_init;
3521 
3522 	ret = register_pernet_subsys(&ip6_route_net_late_ops);
3523 	if (ret)
3524 		goto fib6_rules_init;
3525 
3526 	ret = -ENOBUFS;
3527 	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3528 	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3529 	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3530 		goto out_register_late_subsys;
3531 
3532 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3533 	if (ret)
3534 		goto out_register_late_subsys;
3535 
3536 	for_each_possible_cpu(cpu) {
3537 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
3538 
3539 		INIT_LIST_HEAD(&ul->head);
3540 		spin_lock_init(&ul->lock);
3541 	}
3542 
3543 out:
3544 	return ret;
3545 
3546 out_register_late_subsys:
3547 	unregister_pernet_subsys(&ip6_route_net_late_ops);
3548 fib6_rules_init:
3549 	fib6_rules_cleanup();
3550 xfrm6_init:
3551 	xfrm6_fini();
3552 out_fib6_init:
3553 	fib6_gc_cleanup();
3554 out_register_subsys:
3555 	unregister_pernet_subsys(&ip6_route_net_ops);
3556 out_register_inetpeer:
3557 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3558 out_dst_entries:
3559 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3560 out_kmem_cache:
3561 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3562 	goto out;
3563 }
3564 
3565 void ip6_route_cleanup(void)
3566 {
3567 	unregister_netdevice_notifier(&ip6_route_dev_notifier);
3568 	unregister_pernet_subsys(&ip6_route_net_late_ops);
3569 	fib6_rules_cleanup();
3570 	xfrm6_fini();
3571 	fib6_gc_cleanup();
3572 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3573 	unregister_pernet_subsys(&ip6_route_net_ops);
3574 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3575 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3576 }
3577