xref: /openbmc/linux/net/ipv6/route.c (revision b8d3e4163a3562d7cba486687904383e78e7dd6a)
1 /*
2  *	Linux INET6 implementation
3  *	FIB front-end.
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13 
14 /*	Changes:
15  *
16  *	YOSHIFUJI Hideaki @USAGI
17  *		reworked default router selection.
18  *		- respect outgoing interface
19  *		- select from (probably) reachable routers (i.e.
20  *		routers in REACHABLE, STALE, DELAY or PROBE states).
21  *		- always select the same router if it is (probably)
22  *		reachable.  otherwise, round-robin the list.
23  *	Ville Nuorvala
24  *		Fixed routing subtrees.
25  */
26 
27 #define pr_fmt(fmt) "IPv6: " fmt
28 
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/dst_metadata.h>
58 #include <net/xfrm.h>
59 #include <net/netevent.h>
60 #include <net/netlink.h>
61 #include <net/nexthop.h>
62 #include <net/lwtunnel.h>
63 #include <net/ip_tunnels.h>
64 
65 #include <asm/uaccess.h>
66 
67 #ifdef CONFIG_SYSCTL
68 #include <linux/sysctl.h>
69 #endif
70 
71 enum rt6_nud_state {
72 	RT6_NUD_FAIL_HARD = -3,
73 	RT6_NUD_FAIL_PROBE = -2,
74 	RT6_NUD_FAIL_DO_RR = -1,
75 	RT6_NUD_SUCCEED = 1
76 };
77 
78 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
79 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
80 static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
81 static unsigned int	 ip6_mtu(const struct dst_entry *dst);
82 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
83 static void		ip6_dst_destroy(struct dst_entry *);
84 static void		ip6_dst_ifdown(struct dst_entry *,
85 				       struct net_device *dev, int how);
86 static int		 ip6_dst_gc(struct dst_ops *ops);
87 
88 static int		ip6_pkt_discard(struct sk_buff *skb);
89 static int		ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb);
90 static int		ip6_pkt_prohibit(struct sk_buff *skb);
91 static int		ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb);
92 static void		ip6_link_failure(struct sk_buff *skb);
93 static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
94 					   struct sk_buff *skb, u32 mtu);
95 static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
96 					struct sk_buff *skb);
97 static void		rt6_dst_from_metrics_check(struct rt6_info *rt);
98 static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
99 
100 #ifdef CONFIG_IPV6_ROUTE_INFO
101 static struct rt6_info *rt6_add_route_info(struct net *net,
102 					   const struct in6_addr *prefix, int prefixlen,
103 					   const struct in6_addr *gwaddr, int ifindex,
104 					   unsigned int pref);
105 static struct rt6_info *rt6_get_route_info(struct net *net,
106 					   const struct in6_addr *prefix, int prefixlen,
107 					   const struct in6_addr *gwaddr, int ifindex);
108 #endif
109 
110 struct uncached_list {
111 	spinlock_t		lock;
112 	struct list_head	head;
113 };
114 
115 static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
116 
117 static void rt6_uncached_list_add(struct rt6_info *rt)
118 {
119 	struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
120 
121 	rt->dst.flags |= DST_NOCACHE;
122 	rt->rt6i_uncached_list = ul;
123 
124 	spin_lock_bh(&ul->lock);
125 	list_add_tail(&rt->rt6i_uncached, &ul->head);
126 	spin_unlock_bh(&ul->lock);
127 }
128 
129 static void rt6_uncached_list_del(struct rt6_info *rt)
130 {
131 	if (!list_empty(&rt->rt6i_uncached)) {
132 		struct uncached_list *ul = rt->rt6i_uncached_list;
133 
134 		spin_lock_bh(&ul->lock);
135 		list_del(&rt->rt6i_uncached);
136 		spin_unlock_bh(&ul->lock);
137 	}
138 }
139 
140 static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
141 {
142 	struct net_device *loopback_dev = net->loopback_dev;
143 	int cpu;
144 
145 	for_each_possible_cpu(cpu) {
146 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
147 		struct rt6_info *rt;
148 
149 		spin_lock_bh(&ul->lock);
150 		list_for_each_entry(rt, &ul->head, rt6i_uncached) {
151 			struct inet6_dev *rt_idev = rt->rt6i_idev;
152 			struct net_device *rt_dev = rt->dst.dev;
153 
154 			if (rt_idev && (rt_idev->dev == dev || !dev) &&
155 			    rt_idev->dev != loopback_dev) {
156 				rt->rt6i_idev = in6_dev_get(loopback_dev);
157 				in6_dev_put(rt_idev);
158 			}
159 
160 			if (rt_dev && (rt_dev == dev || !dev) &&
161 			    rt_dev != loopback_dev) {
162 				rt->dst.dev = loopback_dev;
163 				dev_hold(rt->dst.dev);
164 				dev_put(rt_dev);
165 			}
166 		}
167 		spin_unlock_bh(&ul->lock);
168 	}
169 }
170 
171 static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
172 {
173 	return dst_metrics_write_ptr(rt->dst.from);
174 }
175 
176 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
177 {
178 	struct rt6_info *rt = (struct rt6_info *)dst;
179 
180 	if (rt->rt6i_flags & RTF_PCPU)
181 		return rt6_pcpu_cow_metrics(rt);
182 	else if (rt->rt6i_flags & RTF_CACHE)
183 		return NULL;
184 	else
185 		return dst_cow_metrics_generic(dst, old);
186 }
187 
188 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
189 					     struct sk_buff *skb,
190 					     const void *daddr)
191 {
192 	struct in6_addr *p = &rt->rt6i_gateway;
193 
194 	if (!ipv6_addr_any(p))
195 		return (const void *) p;
196 	else if (skb)
197 		return &ipv6_hdr(skb)->daddr;
198 	return daddr;
199 }
200 
201 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
202 					  struct sk_buff *skb,
203 					  const void *daddr)
204 {
205 	struct rt6_info *rt = (struct rt6_info *) dst;
206 	struct neighbour *n;
207 
208 	daddr = choose_neigh_daddr(rt, skb, daddr);
209 	n = __ipv6_neigh_lookup(dst->dev, daddr);
210 	if (n)
211 		return n;
212 	return neigh_create(&nd_tbl, daddr, dst->dev);
213 }
214 
215 static struct dst_ops ip6_dst_ops_template = {
216 	.family			=	AF_INET6,
217 	.gc			=	ip6_dst_gc,
218 	.gc_thresh		=	1024,
219 	.check			=	ip6_dst_check,
220 	.default_advmss		=	ip6_default_advmss,
221 	.mtu			=	ip6_mtu,
222 	.cow_metrics		=	ipv6_cow_metrics,
223 	.destroy		=	ip6_dst_destroy,
224 	.ifdown			=	ip6_dst_ifdown,
225 	.negative_advice	=	ip6_negative_advice,
226 	.link_failure		=	ip6_link_failure,
227 	.update_pmtu		=	ip6_rt_update_pmtu,
228 	.redirect		=	rt6_do_redirect,
229 	.local_out		=	__ip6_local_out,
230 	.neigh_lookup		=	ip6_neigh_lookup,
231 };
232 
233 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
234 {
235 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
236 
237 	return mtu ? : dst->dev->mtu;
238 }
239 
240 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
241 					 struct sk_buff *skb, u32 mtu)
242 {
243 }
244 
245 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
246 				      struct sk_buff *skb)
247 {
248 }
249 
250 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
251 					 unsigned long old)
252 {
253 	return NULL;
254 }
255 
256 static struct dst_ops ip6_dst_blackhole_ops = {
257 	.family			=	AF_INET6,
258 	.destroy		=	ip6_dst_destroy,
259 	.check			=	ip6_dst_check,
260 	.mtu			=	ip6_blackhole_mtu,
261 	.default_advmss		=	ip6_default_advmss,
262 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
263 	.redirect		=	ip6_rt_blackhole_redirect,
264 	.cow_metrics		=	ip6_rt_blackhole_cow_metrics,
265 	.neigh_lookup		=	ip6_neigh_lookup,
266 };
267 
268 static const u32 ip6_template_metrics[RTAX_MAX] = {
269 	[RTAX_HOPLIMIT - 1] = 0,
270 };
271 
272 static const struct rt6_info ip6_null_entry_template = {
273 	.dst = {
274 		.__refcnt	= ATOMIC_INIT(1),
275 		.__use		= 1,
276 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
277 		.error		= -ENETUNREACH,
278 		.input		= ip6_pkt_discard,
279 		.output		= ip6_pkt_discard_out,
280 	},
281 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
282 	.rt6i_protocol  = RTPROT_KERNEL,
283 	.rt6i_metric	= ~(u32) 0,
284 	.rt6i_ref	= ATOMIC_INIT(1),
285 };
286 
287 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
288 
289 static const struct rt6_info ip6_prohibit_entry_template = {
290 	.dst = {
291 		.__refcnt	= ATOMIC_INIT(1),
292 		.__use		= 1,
293 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
294 		.error		= -EACCES,
295 		.input		= ip6_pkt_prohibit,
296 		.output		= ip6_pkt_prohibit_out,
297 	},
298 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
299 	.rt6i_protocol  = RTPROT_KERNEL,
300 	.rt6i_metric	= ~(u32) 0,
301 	.rt6i_ref	= ATOMIC_INIT(1),
302 };
303 
304 static const struct rt6_info ip6_blk_hole_entry_template = {
305 	.dst = {
306 		.__refcnt	= ATOMIC_INIT(1),
307 		.__use		= 1,
308 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
309 		.error		= -EINVAL,
310 		.input		= dst_discard,
311 		.output		= dst_discard_sk,
312 	},
313 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
314 	.rt6i_protocol  = RTPROT_KERNEL,
315 	.rt6i_metric	= ~(u32) 0,
316 	.rt6i_ref	= ATOMIC_INIT(1),
317 };
318 
319 #endif
320 
321 /* allocate dst with ip6_dst_ops */
322 static struct rt6_info *__ip6_dst_alloc(struct net *net,
323 					struct net_device *dev,
324 					int flags)
325 {
326 	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
327 					0, DST_OBSOLETE_FORCE_CHK, flags);
328 
329 	if (rt) {
330 		struct dst_entry *dst = &rt->dst;
331 
332 		memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
333 		INIT_LIST_HEAD(&rt->rt6i_siblings);
334 		INIT_LIST_HEAD(&rt->rt6i_uncached);
335 	}
336 	return rt;
337 }
338 
339 static struct rt6_info *ip6_dst_alloc(struct net *net,
340 				      struct net_device *dev,
341 				      int flags)
342 {
343 	struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
344 
345 	if (rt) {
346 		rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
347 		if (rt->rt6i_pcpu) {
348 			int cpu;
349 
350 			for_each_possible_cpu(cpu) {
351 				struct rt6_info **p;
352 
353 				p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
354 				/* no one shares rt */
355 				*p =  NULL;
356 			}
357 		} else {
358 			dst_destroy((struct dst_entry *)rt);
359 			return NULL;
360 		}
361 	}
362 
363 	return rt;
364 }
365 
366 static void ip6_dst_destroy(struct dst_entry *dst)
367 {
368 	struct rt6_info *rt = (struct rt6_info *)dst;
369 	struct dst_entry *from = dst->from;
370 	struct inet6_dev *idev;
371 
372 	dst_destroy_metrics_generic(dst);
373 	free_percpu(rt->rt6i_pcpu);
374 	rt6_uncached_list_del(rt);
375 
376 	idev = rt->rt6i_idev;
377 	if (idev) {
378 		rt->rt6i_idev = NULL;
379 		in6_dev_put(idev);
380 	}
381 
382 	dst->from = NULL;
383 	dst_release(from);
384 }
385 
386 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
387 			   int how)
388 {
389 	struct rt6_info *rt = (struct rt6_info *)dst;
390 	struct inet6_dev *idev = rt->rt6i_idev;
391 	struct net_device *loopback_dev =
392 		dev_net(dev)->loopback_dev;
393 
394 	if (dev != loopback_dev) {
395 		if (idev && idev->dev == dev) {
396 			struct inet6_dev *loopback_idev =
397 				in6_dev_get(loopback_dev);
398 			if (loopback_idev) {
399 				rt->rt6i_idev = loopback_idev;
400 				in6_dev_put(idev);
401 			}
402 		}
403 	}
404 }
405 
406 static bool rt6_check_expired(const struct rt6_info *rt)
407 {
408 	if (rt->rt6i_flags & RTF_EXPIRES) {
409 		if (time_after(jiffies, rt->dst.expires))
410 			return true;
411 	} else if (rt->dst.from) {
412 		return rt6_check_expired((struct rt6_info *) rt->dst.from);
413 	}
414 	return false;
415 }
416 
417 /* Multipath route selection:
418  *   Hash based function using packet header and flowlabel.
419  * Adapted from fib_info_hashfn()
420  */
421 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
422 			       const struct flowi6 *fl6)
423 {
424 	unsigned int val = fl6->flowi6_proto;
425 
426 	val ^= ipv6_addr_hash(&fl6->daddr);
427 	val ^= ipv6_addr_hash(&fl6->saddr);
428 
429 	/* Work only if this not encapsulated */
430 	switch (fl6->flowi6_proto) {
431 	case IPPROTO_UDP:
432 	case IPPROTO_TCP:
433 	case IPPROTO_SCTP:
434 		val ^= (__force u16)fl6->fl6_sport;
435 		val ^= (__force u16)fl6->fl6_dport;
436 		break;
437 
438 	case IPPROTO_ICMPV6:
439 		val ^= (__force u16)fl6->fl6_icmp_type;
440 		val ^= (__force u16)fl6->fl6_icmp_code;
441 		break;
442 	}
443 	/* RFC6438 recommands to use flowlabel */
444 	val ^= (__force u32)fl6->flowlabel;
445 
446 	/* Perhaps, we need to tune, this function? */
447 	val = val ^ (val >> 7) ^ (val >> 12);
448 	return val % candidate_count;
449 }
450 
451 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
452 					     struct flowi6 *fl6, int oif,
453 					     int strict)
454 {
455 	struct rt6_info *sibling, *next_sibling;
456 	int route_choosen;
457 
458 	route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
459 	/* Don't change the route, if route_choosen == 0
460 	 * (siblings does not include ourself)
461 	 */
462 	if (route_choosen)
463 		list_for_each_entry_safe(sibling, next_sibling,
464 				&match->rt6i_siblings, rt6i_siblings) {
465 			route_choosen--;
466 			if (route_choosen == 0) {
467 				if (rt6_score_route(sibling, oif, strict) < 0)
468 					break;
469 				match = sibling;
470 				break;
471 			}
472 		}
473 	return match;
474 }
475 
476 /*
477  *	Route lookup. Any table->tb6_lock is implied.
478  */
479 
480 static inline struct rt6_info *rt6_device_match(struct net *net,
481 						    struct rt6_info *rt,
482 						    const struct in6_addr *saddr,
483 						    int oif,
484 						    int flags)
485 {
486 	struct rt6_info *local = NULL;
487 	struct rt6_info *sprt;
488 
489 	if (!oif && ipv6_addr_any(saddr))
490 		goto out;
491 
492 	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
493 		struct net_device *dev = sprt->dst.dev;
494 
495 		if (oif) {
496 			if (dev->ifindex == oif)
497 				return sprt;
498 			if (dev->flags & IFF_LOOPBACK) {
499 				if (!sprt->rt6i_idev ||
500 				    sprt->rt6i_idev->dev->ifindex != oif) {
501 					if (flags & RT6_LOOKUP_F_IFACE && oif)
502 						continue;
503 					if (local && (!oif ||
504 						      local->rt6i_idev->dev->ifindex == oif))
505 						continue;
506 				}
507 				local = sprt;
508 			}
509 		} else {
510 			if (ipv6_chk_addr(net, saddr, dev,
511 					  flags & RT6_LOOKUP_F_IFACE))
512 				return sprt;
513 		}
514 	}
515 
516 	if (oif) {
517 		if (local)
518 			return local;
519 
520 		if (flags & RT6_LOOKUP_F_IFACE)
521 			return net->ipv6.ip6_null_entry;
522 	}
523 out:
524 	return rt;
525 }
526 
527 #ifdef CONFIG_IPV6_ROUTER_PREF
528 struct __rt6_probe_work {
529 	struct work_struct work;
530 	struct in6_addr target;
531 	struct net_device *dev;
532 };
533 
534 static void rt6_probe_deferred(struct work_struct *w)
535 {
536 	struct in6_addr mcaddr;
537 	struct __rt6_probe_work *work =
538 		container_of(w, struct __rt6_probe_work, work);
539 
540 	addrconf_addr_solict_mult(&work->target, &mcaddr);
541 	ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL, NULL);
542 	dev_put(work->dev);
543 	kfree(work);
544 }
545 
546 static void rt6_probe(struct rt6_info *rt)
547 {
548 	struct __rt6_probe_work *work;
549 	struct neighbour *neigh;
550 	/*
551 	 * Okay, this does not seem to be appropriate
552 	 * for now, however, we need to check if it
553 	 * is really so; aka Router Reachability Probing.
554 	 *
555 	 * Router Reachability Probe MUST be rate-limited
556 	 * to no more than one per minute.
557 	 */
558 	if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
559 		return;
560 	rcu_read_lock_bh();
561 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
562 	if (neigh) {
563 		if (neigh->nud_state & NUD_VALID)
564 			goto out;
565 
566 		work = NULL;
567 		write_lock(&neigh->lock);
568 		if (!(neigh->nud_state & NUD_VALID) &&
569 		    time_after(jiffies,
570 			       neigh->updated +
571 			       rt->rt6i_idev->cnf.rtr_probe_interval)) {
572 			work = kmalloc(sizeof(*work), GFP_ATOMIC);
573 			if (work)
574 				__neigh_set_probe_once(neigh);
575 		}
576 		write_unlock(&neigh->lock);
577 	} else {
578 		work = kmalloc(sizeof(*work), GFP_ATOMIC);
579 	}
580 
581 	if (work) {
582 		INIT_WORK(&work->work, rt6_probe_deferred);
583 		work->target = rt->rt6i_gateway;
584 		dev_hold(rt->dst.dev);
585 		work->dev = rt->dst.dev;
586 		schedule_work(&work->work);
587 	}
588 
589 out:
590 	rcu_read_unlock_bh();
591 }
592 #else
593 static inline void rt6_probe(struct rt6_info *rt)
594 {
595 }
596 #endif
597 
598 /*
599  * Default Router Selection (RFC 2461 6.3.6)
600  */
601 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
602 {
603 	struct net_device *dev = rt->dst.dev;
604 	if (!oif || dev->ifindex == oif)
605 		return 2;
606 	if ((dev->flags & IFF_LOOPBACK) &&
607 	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
608 		return 1;
609 	return 0;
610 }
611 
612 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
613 {
614 	struct neighbour *neigh;
615 	enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
616 
617 	if (rt->rt6i_flags & RTF_NONEXTHOP ||
618 	    !(rt->rt6i_flags & RTF_GATEWAY))
619 		return RT6_NUD_SUCCEED;
620 
621 	rcu_read_lock_bh();
622 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
623 	if (neigh) {
624 		read_lock(&neigh->lock);
625 		if (neigh->nud_state & NUD_VALID)
626 			ret = RT6_NUD_SUCCEED;
627 #ifdef CONFIG_IPV6_ROUTER_PREF
628 		else if (!(neigh->nud_state & NUD_FAILED))
629 			ret = RT6_NUD_SUCCEED;
630 		else
631 			ret = RT6_NUD_FAIL_PROBE;
632 #endif
633 		read_unlock(&neigh->lock);
634 	} else {
635 		ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
636 		      RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
637 	}
638 	rcu_read_unlock_bh();
639 
640 	return ret;
641 }
642 
643 static int rt6_score_route(struct rt6_info *rt, int oif,
644 			   int strict)
645 {
646 	int m;
647 
648 	m = rt6_check_dev(rt, oif);
649 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
650 		return RT6_NUD_FAIL_HARD;
651 #ifdef CONFIG_IPV6_ROUTER_PREF
652 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
653 #endif
654 	if (strict & RT6_LOOKUP_F_REACHABLE) {
655 		int n = rt6_check_neigh(rt);
656 		if (n < 0)
657 			return n;
658 	}
659 	return m;
660 }
661 
662 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
663 				   int *mpri, struct rt6_info *match,
664 				   bool *do_rr)
665 {
666 	int m;
667 	bool match_do_rr = false;
668 	struct inet6_dev *idev = rt->rt6i_idev;
669 	struct net_device *dev = rt->dst.dev;
670 
671 	if (dev && !netif_carrier_ok(dev) &&
672 	    idev->cnf.ignore_routes_with_linkdown)
673 		goto out;
674 
675 	if (rt6_check_expired(rt))
676 		goto out;
677 
678 	m = rt6_score_route(rt, oif, strict);
679 	if (m == RT6_NUD_FAIL_DO_RR) {
680 		match_do_rr = true;
681 		m = 0; /* lowest valid score */
682 	} else if (m == RT6_NUD_FAIL_HARD) {
683 		goto out;
684 	}
685 
686 	if (strict & RT6_LOOKUP_F_REACHABLE)
687 		rt6_probe(rt);
688 
689 	/* note that m can be RT6_NUD_FAIL_PROBE at this point */
690 	if (m > *mpri) {
691 		*do_rr = match_do_rr;
692 		*mpri = m;
693 		match = rt;
694 	}
695 out:
696 	return match;
697 }
698 
699 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
700 				     struct rt6_info *rr_head,
701 				     u32 metric, int oif, int strict,
702 				     bool *do_rr)
703 {
704 	struct rt6_info *rt, *match, *cont;
705 	int mpri = -1;
706 
707 	match = NULL;
708 	cont = NULL;
709 	for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
710 		if (rt->rt6i_metric != metric) {
711 			cont = rt;
712 			break;
713 		}
714 
715 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
716 	}
717 
718 	for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
719 		if (rt->rt6i_metric != metric) {
720 			cont = rt;
721 			break;
722 		}
723 
724 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
725 	}
726 
727 	if (match || !cont)
728 		return match;
729 
730 	for (rt = cont; rt; rt = rt->dst.rt6_next)
731 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
732 
733 	return match;
734 }
735 
736 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
737 {
738 	struct rt6_info *match, *rt0;
739 	struct net *net;
740 	bool do_rr = false;
741 
742 	rt0 = fn->rr_ptr;
743 	if (!rt0)
744 		fn->rr_ptr = rt0 = fn->leaf;
745 
746 	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
747 			     &do_rr);
748 
749 	if (do_rr) {
750 		struct rt6_info *next = rt0->dst.rt6_next;
751 
752 		/* no entries matched; do round-robin */
753 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
754 			next = fn->leaf;
755 
756 		if (next != rt0)
757 			fn->rr_ptr = next;
758 	}
759 
760 	net = dev_net(rt0->dst.dev);
761 	return match ? match : net->ipv6.ip6_null_entry;
762 }
763 
764 static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
765 {
766 	return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
767 }
768 
769 #ifdef CONFIG_IPV6_ROUTE_INFO
770 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
771 		  const struct in6_addr *gwaddr)
772 {
773 	struct net *net = dev_net(dev);
774 	struct route_info *rinfo = (struct route_info *) opt;
775 	struct in6_addr prefix_buf, *prefix;
776 	unsigned int pref;
777 	unsigned long lifetime;
778 	struct rt6_info *rt;
779 
780 	if (len < sizeof(struct route_info)) {
781 		return -EINVAL;
782 	}
783 
784 	/* Sanity check for prefix_len and length */
785 	if (rinfo->length > 3) {
786 		return -EINVAL;
787 	} else if (rinfo->prefix_len > 128) {
788 		return -EINVAL;
789 	} else if (rinfo->prefix_len > 64) {
790 		if (rinfo->length < 2) {
791 			return -EINVAL;
792 		}
793 	} else if (rinfo->prefix_len > 0) {
794 		if (rinfo->length < 1) {
795 			return -EINVAL;
796 		}
797 	}
798 
799 	pref = rinfo->route_pref;
800 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
801 		return -EINVAL;
802 
803 	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
804 
805 	if (rinfo->length == 3)
806 		prefix = (struct in6_addr *)rinfo->prefix;
807 	else {
808 		/* this function is safe */
809 		ipv6_addr_prefix(&prefix_buf,
810 				 (struct in6_addr *)rinfo->prefix,
811 				 rinfo->prefix_len);
812 		prefix = &prefix_buf;
813 	}
814 
815 	if (rinfo->prefix_len == 0)
816 		rt = rt6_get_dflt_router(gwaddr, dev);
817 	else
818 		rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
819 					gwaddr, dev->ifindex);
820 
821 	if (rt && !lifetime) {
822 		ip6_del_rt(rt);
823 		rt = NULL;
824 	}
825 
826 	if (!rt && lifetime)
827 		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
828 					pref);
829 	else if (rt)
830 		rt->rt6i_flags = RTF_ROUTEINFO |
831 				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
832 
833 	if (rt) {
834 		if (!addrconf_finite_timeout(lifetime))
835 			rt6_clean_expires(rt);
836 		else
837 			rt6_set_expires(rt, jiffies + HZ * lifetime);
838 
839 		ip6_rt_put(rt);
840 	}
841 	return 0;
842 }
843 #endif
844 
845 static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
846 					struct in6_addr *saddr)
847 {
848 	struct fib6_node *pn;
849 	while (1) {
850 		if (fn->fn_flags & RTN_TL_ROOT)
851 			return NULL;
852 		pn = fn->parent;
853 		if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
854 			fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
855 		else
856 			fn = pn;
857 		if (fn->fn_flags & RTN_RTINFO)
858 			return fn;
859 	}
860 }
861 
862 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
863 					     struct fib6_table *table,
864 					     struct flowi6 *fl6, int flags)
865 {
866 	struct fib6_node *fn;
867 	struct rt6_info *rt;
868 
869 	read_lock_bh(&table->tb6_lock);
870 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
871 restart:
872 	rt = fn->leaf;
873 	rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
874 	if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
875 		rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
876 	if (rt == net->ipv6.ip6_null_entry) {
877 		fn = fib6_backtrack(fn, &fl6->saddr);
878 		if (fn)
879 			goto restart;
880 	}
881 	dst_use(&rt->dst, jiffies);
882 	read_unlock_bh(&table->tb6_lock);
883 	return rt;
884 
885 }
886 
887 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
888 				    int flags)
889 {
890 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
891 }
892 EXPORT_SYMBOL_GPL(ip6_route_lookup);
893 
894 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
895 			    const struct in6_addr *saddr, int oif, int strict)
896 {
897 	struct flowi6 fl6 = {
898 		.flowi6_oif = oif,
899 		.daddr = *daddr,
900 	};
901 	struct dst_entry *dst;
902 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
903 
904 	if (saddr) {
905 		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
906 		flags |= RT6_LOOKUP_F_HAS_SADDR;
907 	}
908 
909 	dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
910 	if (dst->error == 0)
911 		return (struct rt6_info *) dst;
912 
913 	dst_release(dst);
914 
915 	return NULL;
916 }
917 EXPORT_SYMBOL(rt6_lookup);
918 
919 /* ip6_ins_rt is called with FREE table->tb6_lock.
920    It takes new route entry, the addition fails by any reason the
921    route is freed. In any case, if caller does not hold it, it may
922    be destroyed.
923  */
924 
925 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
926 			struct mx6_config *mxc)
927 {
928 	int err;
929 	struct fib6_table *table;
930 
931 	table = rt->rt6i_table;
932 	write_lock_bh(&table->tb6_lock);
933 	err = fib6_add(&table->tb6_root, rt, info, mxc);
934 	write_unlock_bh(&table->tb6_lock);
935 
936 	return err;
937 }
938 
939 int ip6_ins_rt(struct rt6_info *rt)
940 {
941 	struct nl_info info = {	.nl_net = dev_net(rt->dst.dev), };
942 	struct mx6_config mxc = { .mx = NULL, };
943 
944 	return __ip6_ins_rt(rt, &info, &mxc);
945 }
946 
947 static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
948 					   const struct in6_addr *daddr,
949 					   const struct in6_addr *saddr)
950 {
951 	struct rt6_info *rt;
952 
953 	/*
954 	 *	Clone the route.
955 	 */
956 
957 	if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
958 		ort = (struct rt6_info *)ort->dst.from;
959 
960 	rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0);
961 
962 	if (!rt)
963 		return NULL;
964 
965 	ip6_rt_copy_init(rt, ort);
966 	rt->rt6i_flags |= RTF_CACHE;
967 	rt->rt6i_metric = 0;
968 	rt->dst.flags |= DST_HOST;
969 	rt->rt6i_dst.addr = *daddr;
970 	rt->rt6i_dst.plen = 128;
971 
972 	if (!rt6_is_gw_or_nonexthop(ort)) {
973 		if (ort->rt6i_dst.plen != 128 &&
974 		    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
975 			rt->rt6i_flags |= RTF_ANYCAST;
976 #ifdef CONFIG_IPV6_SUBTREES
977 		if (rt->rt6i_src.plen && saddr) {
978 			rt->rt6i_src.addr = *saddr;
979 			rt->rt6i_src.plen = 128;
980 		}
981 #endif
982 	}
983 
984 	return rt;
985 }
986 
987 static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
988 {
989 	struct rt6_info *pcpu_rt;
990 
991 	pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
992 				  rt->dst.dev, rt->dst.flags);
993 
994 	if (!pcpu_rt)
995 		return NULL;
996 	ip6_rt_copy_init(pcpu_rt, rt);
997 	pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
998 	pcpu_rt->rt6i_flags |= RTF_PCPU;
999 	return pcpu_rt;
1000 }
1001 
1002 /* It should be called with read_lock_bh(&tb6_lock) acquired */
1003 static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
1004 {
1005 	struct rt6_info *pcpu_rt, **p;
1006 
1007 	p = this_cpu_ptr(rt->rt6i_pcpu);
1008 	pcpu_rt = *p;
1009 
1010 	if (pcpu_rt) {
1011 		dst_hold(&pcpu_rt->dst);
1012 		rt6_dst_from_metrics_check(pcpu_rt);
1013 	}
1014 	return pcpu_rt;
1015 }
1016 
1017 static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
1018 {
1019 	struct fib6_table *table = rt->rt6i_table;
1020 	struct rt6_info *pcpu_rt, *prev, **p;
1021 
1022 	pcpu_rt = ip6_rt_pcpu_alloc(rt);
1023 	if (!pcpu_rt) {
1024 		struct net *net = dev_net(rt->dst.dev);
1025 
1026 		dst_hold(&net->ipv6.ip6_null_entry->dst);
1027 		return net->ipv6.ip6_null_entry;
1028 	}
1029 
1030 	read_lock_bh(&table->tb6_lock);
1031 	if (rt->rt6i_pcpu) {
1032 		p = this_cpu_ptr(rt->rt6i_pcpu);
1033 		prev = cmpxchg(p, NULL, pcpu_rt);
1034 		if (prev) {
1035 			/* If someone did it before us, return prev instead */
1036 			dst_destroy(&pcpu_rt->dst);
1037 			pcpu_rt = prev;
1038 		}
1039 	} else {
1040 		/* rt has been removed from the fib6 tree
1041 		 * before we have a chance to acquire the read_lock.
1042 		 * In this case, don't brother to create a pcpu rt
1043 		 * since rt is going away anyway.  The next
1044 		 * dst_check() will trigger a re-lookup.
1045 		 */
1046 		dst_destroy(&pcpu_rt->dst);
1047 		pcpu_rt = rt;
1048 	}
1049 	dst_hold(&pcpu_rt->dst);
1050 	rt6_dst_from_metrics_check(pcpu_rt);
1051 	read_unlock_bh(&table->tb6_lock);
1052 	return pcpu_rt;
1053 }
1054 
1055 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
1056 				      struct flowi6 *fl6, int flags)
1057 {
1058 	struct fib6_node *fn, *saved_fn;
1059 	struct rt6_info *rt;
1060 	int strict = 0;
1061 
1062 	strict |= flags & RT6_LOOKUP_F_IFACE;
1063 	if (net->ipv6.devconf_all->forwarding == 0)
1064 		strict |= RT6_LOOKUP_F_REACHABLE;
1065 
1066 	read_lock_bh(&table->tb6_lock);
1067 
1068 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1069 	saved_fn = fn;
1070 
1071 redo_rt6_select:
1072 	rt = rt6_select(fn, oif, strict);
1073 	if (rt->rt6i_nsiblings)
1074 		rt = rt6_multipath_select(rt, fl6, oif, strict);
1075 	if (rt == net->ipv6.ip6_null_entry) {
1076 		fn = fib6_backtrack(fn, &fl6->saddr);
1077 		if (fn)
1078 			goto redo_rt6_select;
1079 		else if (strict & RT6_LOOKUP_F_REACHABLE) {
1080 			/* also consider unreachable route */
1081 			strict &= ~RT6_LOOKUP_F_REACHABLE;
1082 			fn = saved_fn;
1083 			goto redo_rt6_select;
1084 		}
1085 	}
1086 
1087 
1088 	if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
1089 		dst_use(&rt->dst, jiffies);
1090 		read_unlock_bh(&table->tb6_lock);
1091 
1092 		rt6_dst_from_metrics_check(rt);
1093 		return rt;
1094 	} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1095 			    !(rt->rt6i_flags & RTF_GATEWAY))) {
1096 		/* Create a RTF_CACHE clone which will not be
1097 		 * owned by the fib6 tree.  It is for the special case where
1098 		 * the daddr in the skb during the neighbor look-up is different
1099 		 * from the fl6->daddr used to look-up route here.
1100 		 */
1101 
1102 		struct rt6_info *uncached_rt;
1103 
1104 		dst_use(&rt->dst, jiffies);
1105 		read_unlock_bh(&table->tb6_lock);
1106 
1107 		uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1108 		dst_release(&rt->dst);
1109 
1110 		if (uncached_rt)
1111 			rt6_uncached_list_add(uncached_rt);
1112 		else
1113 			uncached_rt = net->ipv6.ip6_null_entry;
1114 
1115 		dst_hold(&uncached_rt->dst);
1116 		return uncached_rt;
1117 
1118 	} else {
1119 		/* Get a percpu copy */
1120 
1121 		struct rt6_info *pcpu_rt;
1122 
1123 		rt->dst.lastuse = jiffies;
1124 		rt->dst.__use++;
1125 		pcpu_rt = rt6_get_pcpu_route(rt);
1126 
1127 		if (pcpu_rt) {
1128 			read_unlock_bh(&table->tb6_lock);
1129 		} else {
1130 			/* We have to do the read_unlock first
1131 			 * because rt6_make_pcpu_route() may trigger
1132 			 * ip6_dst_gc() which will take the write_lock.
1133 			 */
1134 			dst_hold(&rt->dst);
1135 			read_unlock_bh(&table->tb6_lock);
1136 			pcpu_rt = rt6_make_pcpu_route(rt);
1137 			dst_release(&rt->dst);
1138 		}
1139 
1140 		return pcpu_rt;
1141 
1142 	}
1143 }
1144 
1145 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
1146 					    struct flowi6 *fl6, int flags)
1147 {
1148 	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
1149 }
1150 
1151 static struct dst_entry *ip6_route_input_lookup(struct net *net,
1152 						struct net_device *dev,
1153 						struct flowi6 *fl6, int flags)
1154 {
1155 	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1156 		flags |= RT6_LOOKUP_F_IFACE;
1157 
1158 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1159 }
1160 
1161 void ip6_route_input(struct sk_buff *skb)
1162 {
1163 	const struct ipv6hdr *iph = ipv6_hdr(skb);
1164 	struct net *net = dev_net(skb->dev);
1165 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1166 	struct ip_tunnel_info *tun_info;
1167 	struct flowi6 fl6 = {
1168 		.flowi6_iif = skb->dev->ifindex,
1169 		.daddr = iph->daddr,
1170 		.saddr = iph->saddr,
1171 		.flowlabel = ip6_flowinfo(iph),
1172 		.flowi6_mark = skb->mark,
1173 		.flowi6_proto = iph->nexthdr,
1174 	};
1175 
1176 	tun_info = skb_tunnel_info(skb);
1177 	if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
1178 		fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
1179 	skb_dst_drop(skb);
1180 	skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1181 }
1182 
1183 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1184 					     struct flowi6 *fl6, int flags)
1185 {
1186 	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1187 }
1188 
1189 struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
1190 				    struct flowi6 *fl6)
1191 {
1192 	int flags = 0;
1193 
1194 	fl6->flowi6_iif = LOOPBACK_IFINDEX;
1195 
1196 	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
1197 		flags |= RT6_LOOKUP_F_IFACE;
1198 
1199 	if (!ipv6_addr_any(&fl6->saddr))
1200 		flags |= RT6_LOOKUP_F_HAS_SADDR;
1201 	else if (sk)
1202 		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1203 
1204 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1205 }
1206 EXPORT_SYMBOL(ip6_route_output);
1207 
1208 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1209 {
1210 	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1211 	struct dst_entry *new = NULL;
1212 
1213 	rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1214 	if (rt) {
1215 		new = &rt->dst;
1216 
1217 		memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
1218 
1219 		new->__use = 1;
1220 		new->input = dst_discard;
1221 		new->output = dst_discard_sk;
1222 
1223 		if (dst_metrics_read_only(&ort->dst))
1224 			new->_metrics = ort->dst._metrics;
1225 		else
1226 			dst_copy_metrics(new, &ort->dst);
1227 		rt->rt6i_idev = ort->rt6i_idev;
1228 		if (rt->rt6i_idev)
1229 			in6_dev_hold(rt->rt6i_idev);
1230 
1231 		rt->rt6i_gateway = ort->rt6i_gateway;
1232 		rt->rt6i_flags = ort->rt6i_flags;
1233 		rt->rt6i_metric = 0;
1234 
1235 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1236 #ifdef CONFIG_IPV6_SUBTREES
1237 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1238 #endif
1239 
1240 		dst_free(new);
1241 	}
1242 
1243 	dst_release(dst_orig);
1244 	return new ? new : ERR_PTR(-ENOMEM);
1245 }
1246 
1247 /*
1248  *	Destination cache support functions
1249  */
1250 
1251 static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1252 {
1253 	if (rt->dst.from &&
1254 	    dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1255 		dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1256 }
1257 
1258 static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1259 {
1260 	if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1261 		return NULL;
1262 
1263 	if (rt6_check_expired(rt))
1264 		return NULL;
1265 
1266 	return &rt->dst;
1267 }
1268 
1269 static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1270 {
1271 	if (rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1272 	    rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1273 		return &rt->dst;
1274 	else
1275 		return NULL;
1276 }
1277 
1278 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1279 {
1280 	struct rt6_info *rt;
1281 
1282 	rt = (struct rt6_info *) dst;
1283 
1284 	/* All IPV6 dsts are created with ->obsolete set to the value
1285 	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1286 	 * into this function always.
1287 	 */
1288 
1289 	rt6_dst_from_metrics_check(rt);
1290 
1291 	if ((rt->rt6i_flags & RTF_PCPU) || unlikely(dst->flags & DST_NOCACHE))
1292 		return rt6_dst_from_check(rt, cookie);
1293 	else
1294 		return rt6_check(rt, cookie);
1295 }
1296 
1297 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1298 {
1299 	struct rt6_info *rt = (struct rt6_info *) dst;
1300 
1301 	if (rt) {
1302 		if (rt->rt6i_flags & RTF_CACHE) {
1303 			if (rt6_check_expired(rt)) {
1304 				ip6_del_rt(rt);
1305 				dst = NULL;
1306 			}
1307 		} else {
1308 			dst_release(dst);
1309 			dst = NULL;
1310 		}
1311 	}
1312 	return dst;
1313 }
1314 
1315 static void ip6_link_failure(struct sk_buff *skb)
1316 {
1317 	struct rt6_info *rt;
1318 
1319 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1320 
1321 	rt = (struct rt6_info *) skb_dst(skb);
1322 	if (rt) {
1323 		if (rt->rt6i_flags & RTF_CACHE) {
1324 			dst_hold(&rt->dst);
1325 			if (ip6_del_rt(rt))
1326 				dst_free(&rt->dst);
1327 		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1328 			rt->rt6i_node->fn_sernum = -1;
1329 		}
1330 	}
1331 }
1332 
1333 static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1334 {
1335 	struct net *net = dev_net(rt->dst.dev);
1336 
1337 	rt->rt6i_flags |= RTF_MODIFIED;
1338 	rt->rt6i_pmtu = mtu;
1339 	rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1340 }
1341 
1342 static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1343 				 const struct ipv6hdr *iph, u32 mtu)
1344 {
1345 	struct rt6_info *rt6 = (struct rt6_info *)dst;
1346 
1347 	if (rt6->rt6i_flags & RTF_LOCAL)
1348 		return;
1349 
1350 	dst_confirm(dst);
1351 	mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1352 	if (mtu >= dst_mtu(dst))
1353 		return;
1354 
1355 	if (rt6->rt6i_flags & RTF_CACHE) {
1356 		rt6_do_update_pmtu(rt6, mtu);
1357 	} else {
1358 		const struct in6_addr *daddr, *saddr;
1359 		struct rt6_info *nrt6;
1360 
1361 		if (iph) {
1362 			daddr = &iph->daddr;
1363 			saddr = &iph->saddr;
1364 		} else if (sk) {
1365 			daddr = &sk->sk_v6_daddr;
1366 			saddr = &inet6_sk(sk)->saddr;
1367 		} else {
1368 			return;
1369 		}
1370 		nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1371 		if (nrt6) {
1372 			rt6_do_update_pmtu(nrt6, mtu);
1373 
1374 			/* ip6_ins_rt(nrt6) will bump the
1375 			 * rt6->rt6i_node->fn_sernum
1376 			 * which will fail the next rt6_check() and
1377 			 * invalidate the sk->sk_dst_cache.
1378 			 */
1379 			ip6_ins_rt(nrt6);
1380 		}
1381 	}
1382 }
1383 
1384 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1385 			       struct sk_buff *skb, u32 mtu)
1386 {
1387 	__ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
1388 }
1389 
1390 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1391 		     int oif, u32 mark)
1392 {
1393 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1394 	struct dst_entry *dst;
1395 	struct flowi6 fl6;
1396 
1397 	memset(&fl6, 0, sizeof(fl6));
1398 	fl6.flowi6_oif = oif;
1399 	fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
1400 	fl6.daddr = iph->daddr;
1401 	fl6.saddr = iph->saddr;
1402 	fl6.flowlabel = ip6_flowinfo(iph);
1403 
1404 	dst = ip6_route_output(net, NULL, &fl6);
1405 	if (!dst->error)
1406 		__ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
1407 	dst_release(dst);
1408 }
1409 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1410 
1411 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1412 {
1413 	ip6_update_pmtu(skb, sock_net(sk), mtu,
1414 			sk->sk_bound_dev_if, sk->sk_mark);
1415 }
1416 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1417 
1418 /* Handle redirects */
1419 struct ip6rd_flowi {
1420 	struct flowi6 fl6;
1421 	struct in6_addr gateway;
1422 };
1423 
1424 static struct rt6_info *__ip6_route_redirect(struct net *net,
1425 					     struct fib6_table *table,
1426 					     struct flowi6 *fl6,
1427 					     int flags)
1428 {
1429 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1430 	struct rt6_info *rt;
1431 	struct fib6_node *fn;
1432 
1433 	/* Get the "current" route for this destination and
1434 	 * check if the redirect has come from approriate router.
1435 	 *
1436 	 * RFC 4861 specifies that redirects should only be
1437 	 * accepted if they come from the nexthop to the target.
1438 	 * Due to the way the routes are chosen, this notion
1439 	 * is a bit fuzzy and one might need to check all possible
1440 	 * routes.
1441 	 */
1442 
1443 	read_lock_bh(&table->tb6_lock);
1444 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1445 restart:
1446 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1447 		if (rt6_check_expired(rt))
1448 			continue;
1449 		if (rt->dst.error)
1450 			break;
1451 		if (!(rt->rt6i_flags & RTF_GATEWAY))
1452 			continue;
1453 		if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1454 			continue;
1455 		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1456 			continue;
1457 		break;
1458 	}
1459 
1460 	if (!rt)
1461 		rt = net->ipv6.ip6_null_entry;
1462 	else if (rt->dst.error) {
1463 		rt = net->ipv6.ip6_null_entry;
1464 		goto out;
1465 	}
1466 
1467 	if (rt == net->ipv6.ip6_null_entry) {
1468 		fn = fib6_backtrack(fn, &fl6->saddr);
1469 		if (fn)
1470 			goto restart;
1471 	}
1472 
1473 out:
1474 	dst_hold(&rt->dst);
1475 
1476 	read_unlock_bh(&table->tb6_lock);
1477 
1478 	return rt;
1479 };
1480 
1481 static struct dst_entry *ip6_route_redirect(struct net *net,
1482 					const struct flowi6 *fl6,
1483 					const struct in6_addr *gateway)
1484 {
1485 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1486 	struct ip6rd_flowi rdfl;
1487 
1488 	rdfl.fl6 = *fl6;
1489 	rdfl.gateway = *gateway;
1490 
1491 	return fib6_rule_lookup(net, &rdfl.fl6,
1492 				flags, __ip6_route_redirect);
1493 }
1494 
1495 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1496 {
1497 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1498 	struct dst_entry *dst;
1499 	struct flowi6 fl6;
1500 
1501 	memset(&fl6, 0, sizeof(fl6));
1502 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
1503 	fl6.flowi6_oif = oif;
1504 	fl6.flowi6_mark = mark;
1505 	fl6.daddr = iph->daddr;
1506 	fl6.saddr = iph->saddr;
1507 	fl6.flowlabel = ip6_flowinfo(iph);
1508 
1509 	dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1510 	rt6_do_redirect(dst, NULL, skb);
1511 	dst_release(dst);
1512 }
1513 EXPORT_SYMBOL_GPL(ip6_redirect);
1514 
1515 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1516 			    u32 mark)
1517 {
1518 	const struct ipv6hdr *iph = ipv6_hdr(skb);
1519 	const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1520 	struct dst_entry *dst;
1521 	struct flowi6 fl6;
1522 
1523 	memset(&fl6, 0, sizeof(fl6));
1524 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
1525 	fl6.flowi6_oif = oif;
1526 	fl6.flowi6_mark = mark;
1527 	fl6.daddr = msg->dest;
1528 	fl6.saddr = iph->daddr;
1529 
1530 	dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1531 	rt6_do_redirect(dst, NULL, skb);
1532 	dst_release(dst);
1533 }
1534 
1535 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1536 {
1537 	ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1538 }
1539 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1540 
1541 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1542 {
1543 	struct net_device *dev = dst->dev;
1544 	unsigned int mtu = dst_mtu(dst);
1545 	struct net *net = dev_net(dev);
1546 
1547 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1548 
1549 	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1550 		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1551 
1552 	/*
1553 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1554 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1555 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
1556 	 * rely only on pmtu discovery"
1557 	 */
1558 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1559 		mtu = IPV6_MAXPLEN;
1560 	return mtu;
1561 }
1562 
1563 static unsigned int ip6_mtu(const struct dst_entry *dst)
1564 {
1565 	const struct rt6_info *rt = (const struct rt6_info *)dst;
1566 	unsigned int mtu = rt->rt6i_pmtu;
1567 	struct inet6_dev *idev;
1568 
1569 	if (mtu)
1570 		goto out;
1571 
1572 	mtu = dst_metric_raw(dst, RTAX_MTU);
1573 	if (mtu)
1574 		goto out;
1575 
1576 	mtu = IPV6_MIN_MTU;
1577 
1578 	rcu_read_lock();
1579 	idev = __in6_dev_get(dst->dev);
1580 	if (idev)
1581 		mtu = idev->cnf.mtu6;
1582 	rcu_read_unlock();
1583 
1584 out:
1585 	return min_t(unsigned int, mtu, IP6_MAX_MTU);
1586 }
1587 
1588 static struct dst_entry *icmp6_dst_gc_list;
1589 static DEFINE_SPINLOCK(icmp6_dst_lock);
1590 
1591 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1592 				  struct flowi6 *fl6)
1593 {
1594 	struct dst_entry *dst;
1595 	struct rt6_info *rt;
1596 	struct inet6_dev *idev = in6_dev_get(dev);
1597 	struct net *net = dev_net(dev);
1598 
1599 	if (unlikely(!idev))
1600 		return ERR_PTR(-ENODEV);
1601 
1602 	rt = ip6_dst_alloc(net, dev, 0);
1603 	if (unlikely(!rt)) {
1604 		in6_dev_put(idev);
1605 		dst = ERR_PTR(-ENOMEM);
1606 		goto out;
1607 	}
1608 
1609 	rt->dst.flags |= DST_HOST;
1610 	rt->dst.output  = ip6_output;
1611 	atomic_set(&rt->dst.__refcnt, 1);
1612 	rt->rt6i_gateway  = fl6->daddr;
1613 	rt->rt6i_dst.addr = fl6->daddr;
1614 	rt->rt6i_dst.plen = 128;
1615 	rt->rt6i_idev     = idev;
1616 	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1617 
1618 	spin_lock_bh(&icmp6_dst_lock);
1619 	rt->dst.next = icmp6_dst_gc_list;
1620 	icmp6_dst_gc_list = &rt->dst;
1621 	spin_unlock_bh(&icmp6_dst_lock);
1622 
1623 	fib6_force_start_gc(net);
1624 
1625 	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1626 
1627 out:
1628 	return dst;
1629 }
1630 
1631 int icmp6_dst_gc(void)
1632 {
1633 	struct dst_entry *dst, **pprev;
1634 	int more = 0;
1635 
1636 	spin_lock_bh(&icmp6_dst_lock);
1637 	pprev = &icmp6_dst_gc_list;
1638 
1639 	while ((dst = *pprev) != NULL) {
1640 		if (!atomic_read(&dst->__refcnt)) {
1641 			*pprev = dst->next;
1642 			dst_free(dst);
1643 		} else {
1644 			pprev = &dst->next;
1645 			++more;
1646 		}
1647 	}
1648 
1649 	spin_unlock_bh(&icmp6_dst_lock);
1650 
1651 	return more;
1652 }
1653 
1654 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1655 			    void *arg)
1656 {
1657 	struct dst_entry *dst, **pprev;
1658 
1659 	spin_lock_bh(&icmp6_dst_lock);
1660 	pprev = &icmp6_dst_gc_list;
1661 	while ((dst = *pprev) != NULL) {
1662 		struct rt6_info *rt = (struct rt6_info *) dst;
1663 		if (func(rt, arg)) {
1664 			*pprev = dst->next;
1665 			dst_free(dst);
1666 		} else {
1667 			pprev = &dst->next;
1668 		}
1669 	}
1670 	spin_unlock_bh(&icmp6_dst_lock);
1671 }
1672 
1673 static int ip6_dst_gc(struct dst_ops *ops)
1674 {
1675 	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1676 	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1677 	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1678 	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1679 	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1680 	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1681 	int entries;
1682 
1683 	entries = dst_entries_get_fast(ops);
1684 	if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1685 	    entries <= rt_max_size)
1686 		goto out;
1687 
1688 	net->ipv6.ip6_rt_gc_expire++;
1689 	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
1690 	entries = dst_entries_get_slow(ops);
1691 	if (entries < ops->gc_thresh)
1692 		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1693 out:
1694 	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1695 	return entries > rt_max_size;
1696 }
1697 
1698 static int ip6_convert_metrics(struct mx6_config *mxc,
1699 			       const struct fib6_config *cfg)
1700 {
1701 	struct nlattr *nla;
1702 	int remaining;
1703 	u32 *mp;
1704 
1705 	if (!cfg->fc_mx)
1706 		return 0;
1707 
1708 	mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1709 	if (unlikely(!mp))
1710 		return -ENOMEM;
1711 
1712 	nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1713 		int type = nla_type(nla);
1714 		u32 val;
1715 
1716 		if (!type)
1717 			continue;
1718 		if (unlikely(type > RTAX_MAX))
1719 			goto err;
1720 
1721 		if (type == RTAX_CC_ALGO) {
1722 			char tmp[TCP_CA_NAME_MAX];
1723 
1724 			nla_strlcpy(tmp, nla, sizeof(tmp));
1725 			val = tcp_ca_get_key_by_name(tmp);
1726 			if (val == TCP_CA_UNSPEC)
1727 				goto err;
1728 		} else {
1729 			val = nla_get_u32(nla);
1730 		}
1731 		if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
1732 			goto err;
1733 
1734 		mp[type - 1] = val;
1735 		__set_bit(type - 1, mxc->mx_valid);
1736 	}
1737 
1738 	mxc->mx = mp;
1739 
1740 	return 0;
1741  err:
1742 	kfree(mp);
1743 	return -EINVAL;
1744 }
1745 
1746 int ip6_route_add(struct fib6_config *cfg)
1747 {
1748 	int err;
1749 	struct net *net = cfg->fc_nlinfo.nl_net;
1750 	struct rt6_info *rt = NULL;
1751 	struct net_device *dev = NULL;
1752 	struct inet6_dev *idev = NULL;
1753 	struct fib6_table *table;
1754 	struct mx6_config mxc = { .mx = NULL, };
1755 	int addr_type;
1756 
1757 	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1758 		return -EINVAL;
1759 #ifndef CONFIG_IPV6_SUBTREES
1760 	if (cfg->fc_src_len)
1761 		return -EINVAL;
1762 #endif
1763 	if (cfg->fc_ifindex) {
1764 		err = -ENODEV;
1765 		dev = dev_get_by_index(net, cfg->fc_ifindex);
1766 		if (!dev)
1767 			goto out;
1768 		idev = in6_dev_get(dev);
1769 		if (!idev)
1770 			goto out;
1771 	}
1772 
1773 	if (cfg->fc_metric == 0)
1774 		cfg->fc_metric = IP6_RT_PRIO_USER;
1775 
1776 	err = -ENOBUFS;
1777 	if (cfg->fc_nlinfo.nlh &&
1778 	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1779 		table = fib6_get_table(net, cfg->fc_table);
1780 		if (!table) {
1781 			pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1782 			table = fib6_new_table(net, cfg->fc_table);
1783 		}
1784 	} else {
1785 		table = fib6_new_table(net, cfg->fc_table);
1786 	}
1787 
1788 	if (!table)
1789 		goto out;
1790 
1791 	rt = ip6_dst_alloc(net, NULL,
1792 			   (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1793 
1794 	if (!rt) {
1795 		err = -ENOMEM;
1796 		goto out;
1797 	}
1798 
1799 	if (cfg->fc_flags & RTF_EXPIRES)
1800 		rt6_set_expires(rt, jiffies +
1801 				clock_t_to_jiffies(cfg->fc_expires));
1802 	else
1803 		rt6_clean_expires(rt);
1804 
1805 	if (cfg->fc_protocol == RTPROT_UNSPEC)
1806 		cfg->fc_protocol = RTPROT_BOOT;
1807 	rt->rt6i_protocol = cfg->fc_protocol;
1808 
1809 	addr_type = ipv6_addr_type(&cfg->fc_dst);
1810 
1811 	if (addr_type & IPV6_ADDR_MULTICAST)
1812 		rt->dst.input = ip6_mc_input;
1813 	else if (cfg->fc_flags & RTF_LOCAL)
1814 		rt->dst.input = ip6_input;
1815 	else
1816 		rt->dst.input = ip6_forward;
1817 
1818 	rt->dst.output = ip6_output;
1819 
1820 	if (cfg->fc_encap) {
1821 		struct lwtunnel_state *lwtstate;
1822 
1823 		err = lwtunnel_build_state(dev, cfg->fc_encap_type,
1824 					   cfg->fc_encap, AF_INET6, cfg,
1825 					   &lwtstate);
1826 		if (err)
1827 			goto out;
1828 		rt->dst.lwtstate = lwtstate_get(lwtstate);
1829 		if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
1830 			rt->dst.lwtstate->orig_output = rt->dst.output;
1831 			rt->dst.output = lwtunnel_output;
1832 		}
1833 		if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
1834 			rt->dst.lwtstate->orig_input = rt->dst.input;
1835 			rt->dst.input = lwtunnel_input;
1836 		}
1837 	}
1838 
1839 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1840 	rt->rt6i_dst.plen = cfg->fc_dst_len;
1841 	if (rt->rt6i_dst.plen == 128)
1842 		rt->dst.flags |= DST_HOST;
1843 
1844 #ifdef CONFIG_IPV6_SUBTREES
1845 	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1846 	rt->rt6i_src.plen = cfg->fc_src_len;
1847 #endif
1848 
1849 	rt->rt6i_metric = cfg->fc_metric;
1850 
1851 	/* We cannot add true routes via loopback here,
1852 	   they would result in kernel looping; promote them to reject routes
1853 	 */
1854 	if ((cfg->fc_flags & RTF_REJECT) ||
1855 	    (dev && (dev->flags & IFF_LOOPBACK) &&
1856 	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
1857 	     !(cfg->fc_flags & RTF_LOCAL))) {
1858 		/* hold loopback dev/idev if we haven't done so. */
1859 		if (dev != net->loopback_dev) {
1860 			if (dev) {
1861 				dev_put(dev);
1862 				in6_dev_put(idev);
1863 			}
1864 			dev = net->loopback_dev;
1865 			dev_hold(dev);
1866 			idev = in6_dev_get(dev);
1867 			if (!idev) {
1868 				err = -ENODEV;
1869 				goto out;
1870 			}
1871 		}
1872 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1873 		switch (cfg->fc_type) {
1874 		case RTN_BLACKHOLE:
1875 			rt->dst.error = -EINVAL;
1876 			rt->dst.output = dst_discard_sk;
1877 			rt->dst.input = dst_discard;
1878 			break;
1879 		case RTN_PROHIBIT:
1880 			rt->dst.error = -EACCES;
1881 			rt->dst.output = ip6_pkt_prohibit_out;
1882 			rt->dst.input = ip6_pkt_prohibit;
1883 			break;
1884 		case RTN_THROW:
1885 		default:
1886 			rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1887 					: -ENETUNREACH;
1888 			rt->dst.output = ip6_pkt_discard_out;
1889 			rt->dst.input = ip6_pkt_discard;
1890 			break;
1891 		}
1892 		goto install_route;
1893 	}
1894 
1895 	if (cfg->fc_flags & RTF_GATEWAY) {
1896 		const struct in6_addr *gw_addr;
1897 		int gwa_type;
1898 
1899 		gw_addr = &cfg->fc_gateway;
1900 		gwa_type = ipv6_addr_type(gw_addr);
1901 
1902 		/* if gw_addr is local we will fail to detect this in case
1903 		 * address is still TENTATIVE (DAD in progress). rt6_lookup()
1904 		 * will return already-added prefix route via interface that
1905 		 * prefix route was assigned to, which might be non-loopback.
1906 		 */
1907 		err = -EINVAL;
1908 		if (ipv6_chk_addr_and_flags(net, gw_addr,
1909 					    gwa_type & IPV6_ADDR_LINKLOCAL ?
1910 					    dev : NULL, 0, 0))
1911 			goto out;
1912 
1913 		rt->rt6i_gateway = *gw_addr;
1914 
1915 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1916 			struct rt6_info *grt;
1917 
1918 			/* IPv6 strictly inhibits using not link-local
1919 			   addresses as nexthop address.
1920 			   Otherwise, router will not able to send redirects.
1921 			   It is very good, but in some (rare!) circumstances
1922 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
1923 			   some exceptions. --ANK
1924 			 */
1925 			if (!(gwa_type & IPV6_ADDR_UNICAST))
1926 				goto out;
1927 
1928 			grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1929 
1930 			err = -EHOSTUNREACH;
1931 			if (!grt)
1932 				goto out;
1933 			if (dev) {
1934 				if (dev != grt->dst.dev) {
1935 					ip6_rt_put(grt);
1936 					goto out;
1937 				}
1938 			} else {
1939 				dev = grt->dst.dev;
1940 				idev = grt->rt6i_idev;
1941 				dev_hold(dev);
1942 				in6_dev_hold(grt->rt6i_idev);
1943 			}
1944 			if (!(grt->rt6i_flags & RTF_GATEWAY))
1945 				err = 0;
1946 			ip6_rt_put(grt);
1947 
1948 			if (err)
1949 				goto out;
1950 		}
1951 		err = -EINVAL;
1952 		if (!dev || (dev->flags & IFF_LOOPBACK))
1953 			goto out;
1954 	}
1955 
1956 	err = -ENODEV;
1957 	if (!dev)
1958 		goto out;
1959 
1960 	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1961 		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1962 			err = -EINVAL;
1963 			goto out;
1964 		}
1965 		rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1966 		rt->rt6i_prefsrc.plen = 128;
1967 	} else
1968 		rt->rt6i_prefsrc.plen = 0;
1969 
1970 	rt->rt6i_flags = cfg->fc_flags;
1971 
1972 install_route:
1973 	rt->dst.dev = dev;
1974 	rt->rt6i_idev = idev;
1975 	rt->rt6i_table = table;
1976 
1977 	cfg->fc_nlinfo.nl_net = dev_net(dev);
1978 
1979 	err = ip6_convert_metrics(&mxc, cfg);
1980 	if (err)
1981 		goto out;
1982 
1983 	err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
1984 
1985 	kfree(mxc.mx);
1986 	return err;
1987 out:
1988 	if (dev)
1989 		dev_put(dev);
1990 	if (idev)
1991 		in6_dev_put(idev);
1992 	if (rt)
1993 		dst_free(&rt->dst);
1994 	return err;
1995 }
1996 
1997 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1998 {
1999 	int err;
2000 	struct fib6_table *table;
2001 	struct net *net = dev_net(rt->dst.dev);
2002 
2003 	if (rt == net->ipv6.ip6_null_entry) {
2004 		err = -ENOENT;
2005 		goto out;
2006 	}
2007 
2008 	table = rt->rt6i_table;
2009 	write_lock_bh(&table->tb6_lock);
2010 	err = fib6_del(rt, info);
2011 	write_unlock_bh(&table->tb6_lock);
2012 
2013 out:
2014 	ip6_rt_put(rt);
2015 	return err;
2016 }
2017 
2018 int ip6_del_rt(struct rt6_info *rt)
2019 {
2020 	struct nl_info info = {
2021 		.nl_net = dev_net(rt->dst.dev),
2022 	};
2023 	return __ip6_del_rt(rt, &info);
2024 }
2025 
2026 static int ip6_route_del(struct fib6_config *cfg)
2027 {
2028 	struct fib6_table *table;
2029 	struct fib6_node *fn;
2030 	struct rt6_info *rt;
2031 	int err = -ESRCH;
2032 
2033 	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
2034 	if (!table)
2035 		return err;
2036 
2037 	read_lock_bh(&table->tb6_lock);
2038 
2039 	fn = fib6_locate(&table->tb6_root,
2040 			 &cfg->fc_dst, cfg->fc_dst_len,
2041 			 &cfg->fc_src, cfg->fc_src_len);
2042 
2043 	if (fn) {
2044 		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2045 			if ((rt->rt6i_flags & RTF_CACHE) &&
2046 			    !(cfg->fc_flags & RTF_CACHE))
2047 				continue;
2048 			if (cfg->fc_ifindex &&
2049 			    (!rt->dst.dev ||
2050 			     rt->dst.dev->ifindex != cfg->fc_ifindex))
2051 				continue;
2052 			if (cfg->fc_flags & RTF_GATEWAY &&
2053 			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
2054 				continue;
2055 			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
2056 				continue;
2057 			dst_hold(&rt->dst);
2058 			read_unlock_bh(&table->tb6_lock);
2059 
2060 			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
2061 		}
2062 	}
2063 	read_unlock_bh(&table->tb6_lock);
2064 
2065 	return err;
2066 }
2067 
2068 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
2069 {
2070 	struct net *net = dev_net(skb->dev);
2071 	struct netevent_redirect netevent;
2072 	struct rt6_info *rt, *nrt = NULL;
2073 	struct ndisc_options ndopts;
2074 	struct inet6_dev *in6_dev;
2075 	struct neighbour *neigh;
2076 	struct rd_msg *msg;
2077 	int optlen, on_link;
2078 	u8 *lladdr;
2079 
2080 	optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
2081 	optlen -= sizeof(*msg);
2082 
2083 	if (optlen < 0) {
2084 		net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
2085 		return;
2086 	}
2087 
2088 	msg = (struct rd_msg *)icmp6_hdr(skb);
2089 
2090 	if (ipv6_addr_is_multicast(&msg->dest)) {
2091 		net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
2092 		return;
2093 	}
2094 
2095 	on_link = 0;
2096 	if (ipv6_addr_equal(&msg->dest, &msg->target)) {
2097 		on_link = 1;
2098 	} else if (ipv6_addr_type(&msg->target) !=
2099 		   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
2100 		net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
2101 		return;
2102 	}
2103 
2104 	in6_dev = __in6_dev_get(skb->dev);
2105 	if (!in6_dev)
2106 		return;
2107 	if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2108 		return;
2109 
2110 	/* RFC2461 8.1:
2111 	 *	The IP source address of the Redirect MUST be the same as the current
2112 	 *	first-hop router for the specified ICMP Destination Address.
2113 	 */
2114 
2115 	if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
2116 		net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2117 		return;
2118 	}
2119 
2120 	lladdr = NULL;
2121 	if (ndopts.nd_opts_tgt_lladdr) {
2122 		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2123 					     skb->dev);
2124 		if (!lladdr) {
2125 			net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2126 			return;
2127 		}
2128 	}
2129 
2130 	rt = (struct rt6_info *) dst;
2131 	if (rt == net->ipv6.ip6_null_entry) {
2132 		net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
2133 		return;
2134 	}
2135 
2136 	/* Redirect received -> path was valid.
2137 	 * Look, redirects are sent only in response to data packets,
2138 	 * so that this nexthop apparently is reachable. --ANK
2139 	 */
2140 	dst_confirm(&rt->dst);
2141 
2142 	neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
2143 	if (!neigh)
2144 		return;
2145 
2146 	/*
2147 	 *	We have finally decided to accept it.
2148 	 */
2149 
2150 	neigh_update(neigh, lladdr, NUD_STALE,
2151 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
2152 		     NEIGH_UPDATE_F_OVERRIDE|
2153 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
2154 				     NEIGH_UPDATE_F_ISROUTER))
2155 		     );
2156 
2157 	nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
2158 	if (!nrt)
2159 		goto out;
2160 
2161 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
2162 	if (on_link)
2163 		nrt->rt6i_flags &= ~RTF_GATEWAY;
2164 
2165 	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
2166 
2167 	if (ip6_ins_rt(nrt))
2168 		goto out;
2169 
2170 	netevent.old = &rt->dst;
2171 	netevent.new = &nrt->dst;
2172 	netevent.daddr = &msg->dest;
2173 	netevent.neigh = neigh;
2174 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
2175 
2176 	if (rt->rt6i_flags & RTF_CACHE) {
2177 		rt = (struct rt6_info *) dst_clone(&rt->dst);
2178 		ip6_del_rt(rt);
2179 	}
2180 
2181 out:
2182 	neigh_release(neigh);
2183 }
2184 
2185 /*
2186  *	Misc support functions
2187  */
2188 
2189 static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
2190 {
2191 	BUG_ON(from->dst.from);
2192 
2193 	rt->rt6i_flags &= ~RTF_EXPIRES;
2194 	dst_hold(&from->dst);
2195 	rt->dst.from = &from->dst;
2196 	dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
2197 }
2198 
2199 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
2200 {
2201 	rt->dst.input = ort->dst.input;
2202 	rt->dst.output = ort->dst.output;
2203 	rt->rt6i_dst = ort->rt6i_dst;
2204 	rt->dst.error = ort->dst.error;
2205 	rt->rt6i_idev = ort->rt6i_idev;
2206 	if (rt->rt6i_idev)
2207 		in6_dev_hold(rt->rt6i_idev);
2208 	rt->dst.lastuse = jiffies;
2209 	rt->rt6i_gateway = ort->rt6i_gateway;
2210 	rt->rt6i_flags = ort->rt6i_flags;
2211 	rt6_set_from(rt, ort);
2212 	rt->rt6i_metric = ort->rt6i_metric;
2213 #ifdef CONFIG_IPV6_SUBTREES
2214 	rt->rt6i_src = ort->rt6i_src;
2215 #endif
2216 	rt->rt6i_prefsrc = ort->rt6i_prefsrc;
2217 	rt->rt6i_table = ort->rt6i_table;
2218 	rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
2219 }
2220 
2221 #ifdef CONFIG_IPV6_ROUTE_INFO
2222 static struct rt6_info *rt6_get_route_info(struct net *net,
2223 					   const struct in6_addr *prefix, int prefixlen,
2224 					   const struct in6_addr *gwaddr, int ifindex)
2225 {
2226 	struct fib6_node *fn;
2227 	struct rt6_info *rt = NULL;
2228 	struct fib6_table *table;
2229 
2230 	table = fib6_get_table(net, RT6_TABLE_INFO);
2231 	if (!table)
2232 		return NULL;
2233 
2234 	read_lock_bh(&table->tb6_lock);
2235 	fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
2236 	if (!fn)
2237 		goto out;
2238 
2239 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2240 		if (rt->dst.dev->ifindex != ifindex)
2241 			continue;
2242 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2243 			continue;
2244 		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2245 			continue;
2246 		dst_hold(&rt->dst);
2247 		break;
2248 	}
2249 out:
2250 	read_unlock_bh(&table->tb6_lock);
2251 	return rt;
2252 }
2253 
2254 static struct rt6_info *rt6_add_route_info(struct net *net,
2255 					   const struct in6_addr *prefix, int prefixlen,
2256 					   const struct in6_addr *gwaddr, int ifindex,
2257 					   unsigned int pref)
2258 {
2259 	struct fib6_config cfg = {
2260 		.fc_table	= RT6_TABLE_INFO,
2261 		.fc_metric	= IP6_RT_PRIO_USER,
2262 		.fc_ifindex	= ifindex,
2263 		.fc_dst_len	= prefixlen,
2264 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2265 				  RTF_UP | RTF_PREF(pref),
2266 		.fc_nlinfo.portid = 0,
2267 		.fc_nlinfo.nlh = NULL,
2268 		.fc_nlinfo.nl_net = net,
2269 	};
2270 
2271 	cfg.fc_dst = *prefix;
2272 	cfg.fc_gateway = *gwaddr;
2273 
2274 	/* We should treat it as a default route if prefix length is 0. */
2275 	if (!prefixlen)
2276 		cfg.fc_flags |= RTF_DEFAULT;
2277 
2278 	ip6_route_add(&cfg);
2279 
2280 	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
2281 }
2282 #endif
2283 
2284 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
2285 {
2286 	struct rt6_info *rt;
2287 	struct fib6_table *table;
2288 
2289 	table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
2290 	if (!table)
2291 		return NULL;
2292 
2293 	read_lock_bh(&table->tb6_lock);
2294 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2295 		if (dev == rt->dst.dev &&
2296 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
2297 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
2298 			break;
2299 	}
2300 	if (rt)
2301 		dst_hold(&rt->dst);
2302 	read_unlock_bh(&table->tb6_lock);
2303 	return rt;
2304 }
2305 
2306 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
2307 				     struct net_device *dev,
2308 				     unsigned int pref)
2309 {
2310 	struct fib6_config cfg = {
2311 		.fc_table	= RT6_TABLE_DFLT,
2312 		.fc_metric	= IP6_RT_PRIO_USER,
2313 		.fc_ifindex	= dev->ifindex,
2314 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2315 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
2316 		.fc_nlinfo.portid = 0,
2317 		.fc_nlinfo.nlh = NULL,
2318 		.fc_nlinfo.nl_net = dev_net(dev),
2319 	};
2320 
2321 	cfg.fc_gateway = *gwaddr;
2322 
2323 	ip6_route_add(&cfg);
2324 
2325 	return rt6_get_dflt_router(gwaddr, dev);
2326 }
2327 
2328 void rt6_purge_dflt_routers(struct net *net)
2329 {
2330 	struct rt6_info *rt;
2331 	struct fib6_table *table;
2332 
2333 	/* NOTE: Keep consistent with rt6_get_dflt_router */
2334 	table = fib6_get_table(net, RT6_TABLE_DFLT);
2335 	if (!table)
2336 		return;
2337 
2338 restart:
2339 	read_lock_bh(&table->tb6_lock);
2340 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2341 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2342 		    (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
2343 			dst_hold(&rt->dst);
2344 			read_unlock_bh(&table->tb6_lock);
2345 			ip6_del_rt(rt);
2346 			goto restart;
2347 		}
2348 	}
2349 	read_unlock_bh(&table->tb6_lock);
2350 }
2351 
2352 static void rtmsg_to_fib6_config(struct net *net,
2353 				 struct in6_rtmsg *rtmsg,
2354 				 struct fib6_config *cfg)
2355 {
2356 	memset(cfg, 0, sizeof(*cfg));
2357 
2358 	cfg->fc_table = RT6_TABLE_MAIN;
2359 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2360 	cfg->fc_metric = rtmsg->rtmsg_metric;
2361 	cfg->fc_expires = rtmsg->rtmsg_info;
2362 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2363 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
2364 	cfg->fc_flags = rtmsg->rtmsg_flags;
2365 
2366 	cfg->fc_nlinfo.nl_net = net;
2367 
2368 	cfg->fc_dst = rtmsg->rtmsg_dst;
2369 	cfg->fc_src = rtmsg->rtmsg_src;
2370 	cfg->fc_gateway = rtmsg->rtmsg_gateway;
2371 }
2372 
2373 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2374 {
2375 	struct fib6_config cfg;
2376 	struct in6_rtmsg rtmsg;
2377 	int err;
2378 
2379 	switch (cmd) {
2380 	case SIOCADDRT:		/* Add a route */
2381 	case SIOCDELRT:		/* Delete a route */
2382 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2383 			return -EPERM;
2384 		err = copy_from_user(&rtmsg, arg,
2385 				     sizeof(struct in6_rtmsg));
2386 		if (err)
2387 			return -EFAULT;
2388 
2389 		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2390 
2391 		rtnl_lock();
2392 		switch (cmd) {
2393 		case SIOCADDRT:
2394 			err = ip6_route_add(&cfg);
2395 			break;
2396 		case SIOCDELRT:
2397 			err = ip6_route_del(&cfg);
2398 			break;
2399 		default:
2400 			err = -EINVAL;
2401 		}
2402 		rtnl_unlock();
2403 
2404 		return err;
2405 	}
2406 
2407 	return -EINVAL;
2408 }
2409 
2410 /*
2411  *	Drop the packet on the floor
2412  */
2413 
2414 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2415 {
2416 	int type;
2417 	struct dst_entry *dst = skb_dst(skb);
2418 	switch (ipstats_mib_noroutes) {
2419 	case IPSTATS_MIB_INNOROUTES:
2420 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2421 		if (type == IPV6_ADDR_ANY) {
2422 			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2423 				      IPSTATS_MIB_INADDRERRORS);
2424 			break;
2425 		}
2426 		/* FALLTHROUGH */
2427 	case IPSTATS_MIB_OUTNOROUTES:
2428 		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2429 			      ipstats_mib_noroutes);
2430 		break;
2431 	}
2432 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2433 	kfree_skb(skb);
2434 	return 0;
2435 }
2436 
2437 static int ip6_pkt_discard(struct sk_buff *skb)
2438 {
2439 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2440 }
2441 
2442 static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb)
2443 {
2444 	skb->dev = skb_dst(skb)->dev;
2445 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2446 }
2447 
2448 static int ip6_pkt_prohibit(struct sk_buff *skb)
2449 {
2450 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2451 }
2452 
2453 static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb)
2454 {
2455 	skb->dev = skb_dst(skb)->dev;
2456 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2457 }
2458 
2459 /*
2460  *	Allocate a dst for local (unicast / anycast) address.
2461  */
2462 
2463 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2464 				    const struct in6_addr *addr,
2465 				    bool anycast)
2466 {
2467 	struct net *net = dev_net(idev->dev);
2468 	struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
2469 					    DST_NOCOUNT);
2470 	if (!rt)
2471 		return ERR_PTR(-ENOMEM);
2472 
2473 	in6_dev_hold(idev);
2474 
2475 	rt->dst.flags |= DST_HOST;
2476 	rt->dst.input = ip6_input;
2477 	rt->dst.output = ip6_output;
2478 	rt->rt6i_idev = idev;
2479 
2480 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2481 	if (anycast)
2482 		rt->rt6i_flags |= RTF_ANYCAST;
2483 	else
2484 		rt->rt6i_flags |= RTF_LOCAL;
2485 
2486 	rt->rt6i_gateway  = *addr;
2487 	rt->rt6i_dst.addr = *addr;
2488 	rt->rt6i_dst.plen = 128;
2489 	rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2490 
2491 	atomic_set(&rt->dst.__refcnt, 1);
2492 
2493 	return rt;
2494 }
2495 
2496 int ip6_route_get_saddr(struct net *net,
2497 			struct rt6_info *rt,
2498 			const struct in6_addr *daddr,
2499 			unsigned int prefs,
2500 			struct in6_addr *saddr)
2501 {
2502 	struct inet6_dev *idev =
2503 		rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
2504 	int err = 0;
2505 	if (rt && rt->rt6i_prefsrc.plen)
2506 		*saddr = rt->rt6i_prefsrc.addr;
2507 	else
2508 		err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2509 					 daddr, prefs, saddr);
2510 	return err;
2511 }
2512 
2513 /* remove deleted ip from prefsrc entries */
2514 struct arg_dev_net_ip {
2515 	struct net_device *dev;
2516 	struct net *net;
2517 	struct in6_addr *addr;
2518 };
2519 
2520 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2521 {
2522 	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2523 	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2524 	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2525 
2526 	if (((void *)rt->dst.dev == dev || !dev) &&
2527 	    rt != net->ipv6.ip6_null_entry &&
2528 	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2529 		/* remove prefsrc entry */
2530 		rt->rt6i_prefsrc.plen = 0;
2531 	}
2532 	return 0;
2533 }
2534 
2535 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2536 {
2537 	struct net *net = dev_net(ifp->idev->dev);
2538 	struct arg_dev_net_ip adni = {
2539 		.dev = ifp->idev->dev,
2540 		.net = net,
2541 		.addr = &ifp->addr,
2542 	};
2543 	fib6_clean_all(net, fib6_remove_prefsrc, &adni);
2544 }
2545 
2546 #define RTF_RA_ROUTER		(RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2547 #define RTF_CACHE_GATEWAY	(RTF_GATEWAY | RTF_CACHE)
2548 
2549 /* Remove routers and update dst entries when gateway turn into host. */
2550 static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2551 {
2552 	struct in6_addr *gateway = (struct in6_addr *)arg;
2553 
2554 	if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2555 	     ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2556 	     ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2557 		return -1;
2558 	}
2559 	return 0;
2560 }
2561 
2562 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2563 {
2564 	fib6_clean_all(net, fib6_clean_tohost, gateway);
2565 }
2566 
2567 struct arg_dev_net {
2568 	struct net_device *dev;
2569 	struct net *net;
2570 };
2571 
2572 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2573 {
2574 	const struct arg_dev_net *adn = arg;
2575 	const struct net_device *dev = adn->dev;
2576 
2577 	if ((rt->dst.dev == dev || !dev) &&
2578 	    rt != adn->net->ipv6.ip6_null_entry)
2579 		return -1;
2580 
2581 	return 0;
2582 }
2583 
2584 void rt6_ifdown(struct net *net, struct net_device *dev)
2585 {
2586 	struct arg_dev_net adn = {
2587 		.dev = dev,
2588 		.net = net,
2589 	};
2590 
2591 	fib6_clean_all(net, fib6_ifdown, &adn);
2592 	icmp6_clean_all(fib6_ifdown, &adn);
2593 	rt6_uncached_list_flush_dev(net, dev);
2594 }
2595 
2596 struct rt6_mtu_change_arg {
2597 	struct net_device *dev;
2598 	unsigned int mtu;
2599 };
2600 
2601 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2602 {
2603 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2604 	struct inet6_dev *idev;
2605 
2606 	/* In IPv6 pmtu discovery is not optional,
2607 	   so that RTAX_MTU lock cannot disable it.
2608 	   We still use this lock to block changes
2609 	   caused by addrconf/ndisc.
2610 	*/
2611 
2612 	idev = __in6_dev_get(arg->dev);
2613 	if (!idev)
2614 		return 0;
2615 
2616 	/* For administrative MTU increase, there is no way to discover
2617 	   IPv6 PMTU increase, so PMTU increase should be updated here.
2618 	   Since RFC 1981 doesn't include administrative MTU increase
2619 	   update PMTU increase is a MUST. (i.e. jumbo frame)
2620 	 */
2621 	/*
2622 	   If new MTU is less than route PMTU, this new MTU will be the
2623 	   lowest MTU in the path, update the route PMTU to reflect PMTU
2624 	   decreases; if new MTU is greater than route PMTU, and the
2625 	   old MTU is the lowest MTU in the path, update the route PMTU
2626 	   to reflect the increase. In this case if the other nodes' MTU
2627 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
2628 	   PMTU discouvery.
2629 	 */
2630 	if (rt->dst.dev == arg->dev &&
2631 	    !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2632 		if (rt->rt6i_flags & RTF_CACHE) {
2633 			/* For RTF_CACHE with rt6i_pmtu == 0
2634 			 * (i.e. a redirected route),
2635 			 * the metrics of its rt->dst.from has already
2636 			 * been updated.
2637 			 */
2638 			if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2639 				rt->rt6i_pmtu = arg->mtu;
2640 		} else if (dst_mtu(&rt->dst) >= arg->mtu ||
2641 			   (dst_mtu(&rt->dst) < arg->mtu &&
2642 			    dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2643 			dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2644 		}
2645 	}
2646 	return 0;
2647 }
2648 
2649 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2650 {
2651 	struct rt6_mtu_change_arg arg = {
2652 		.dev = dev,
2653 		.mtu = mtu,
2654 	};
2655 
2656 	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
2657 }
2658 
2659 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2660 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2661 	[RTA_OIF]               = { .type = NLA_U32 },
2662 	[RTA_IIF]		= { .type = NLA_U32 },
2663 	[RTA_PRIORITY]          = { .type = NLA_U32 },
2664 	[RTA_METRICS]           = { .type = NLA_NESTED },
2665 	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
2666 	[RTA_PREF]              = { .type = NLA_U8 },
2667 	[RTA_ENCAP_TYPE]	= { .type = NLA_U16 },
2668 	[RTA_ENCAP]		= { .type = NLA_NESTED },
2669 };
2670 
2671 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2672 			      struct fib6_config *cfg)
2673 {
2674 	struct rtmsg *rtm;
2675 	struct nlattr *tb[RTA_MAX+1];
2676 	unsigned int pref;
2677 	int err;
2678 
2679 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2680 	if (err < 0)
2681 		goto errout;
2682 
2683 	err = -EINVAL;
2684 	rtm = nlmsg_data(nlh);
2685 	memset(cfg, 0, sizeof(*cfg));
2686 
2687 	cfg->fc_table = rtm->rtm_table;
2688 	cfg->fc_dst_len = rtm->rtm_dst_len;
2689 	cfg->fc_src_len = rtm->rtm_src_len;
2690 	cfg->fc_flags = RTF_UP;
2691 	cfg->fc_protocol = rtm->rtm_protocol;
2692 	cfg->fc_type = rtm->rtm_type;
2693 
2694 	if (rtm->rtm_type == RTN_UNREACHABLE ||
2695 	    rtm->rtm_type == RTN_BLACKHOLE ||
2696 	    rtm->rtm_type == RTN_PROHIBIT ||
2697 	    rtm->rtm_type == RTN_THROW)
2698 		cfg->fc_flags |= RTF_REJECT;
2699 
2700 	if (rtm->rtm_type == RTN_LOCAL)
2701 		cfg->fc_flags |= RTF_LOCAL;
2702 
2703 	if (rtm->rtm_flags & RTM_F_CLONED)
2704 		cfg->fc_flags |= RTF_CACHE;
2705 
2706 	cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2707 	cfg->fc_nlinfo.nlh = nlh;
2708 	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2709 
2710 	if (tb[RTA_GATEWAY]) {
2711 		cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
2712 		cfg->fc_flags |= RTF_GATEWAY;
2713 	}
2714 
2715 	if (tb[RTA_DST]) {
2716 		int plen = (rtm->rtm_dst_len + 7) >> 3;
2717 
2718 		if (nla_len(tb[RTA_DST]) < plen)
2719 			goto errout;
2720 
2721 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2722 	}
2723 
2724 	if (tb[RTA_SRC]) {
2725 		int plen = (rtm->rtm_src_len + 7) >> 3;
2726 
2727 		if (nla_len(tb[RTA_SRC]) < plen)
2728 			goto errout;
2729 
2730 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2731 	}
2732 
2733 	if (tb[RTA_PREFSRC])
2734 		cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
2735 
2736 	if (tb[RTA_OIF])
2737 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2738 
2739 	if (tb[RTA_PRIORITY])
2740 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2741 
2742 	if (tb[RTA_METRICS]) {
2743 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2744 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2745 	}
2746 
2747 	if (tb[RTA_TABLE])
2748 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2749 
2750 	if (tb[RTA_MULTIPATH]) {
2751 		cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2752 		cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2753 	}
2754 
2755 	if (tb[RTA_PREF]) {
2756 		pref = nla_get_u8(tb[RTA_PREF]);
2757 		if (pref != ICMPV6_ROUTER_PREF_LOW &&
2758 		    pref != ICMPV6_ROUTER_PREF_HIGH)
2759 			pref = ICMPV6_ROUTER_PREF_MEDIUM;
2760 		cfg->fc_flags |= RTF_PREF(pref);
2761 	}
2762 
2763 	if (tb[RTA_ENCAP])
2764 		cfg->fc_encap = tb[RTA_ENCAP];
2765 
2766 	if (tb[RTA_ENCAP_TYPE])
2767 		cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
2768 
2769 	err = 0;
2770 errout:
2771 	return err;
2772 }
2773 
2774 static int ip6_route_multipath(struct fib6_config *cfg, int add)
2775 {
2776 	struct fib6_config r_cfg;
2777 	struct rtnexthop *rtnh;
2778 	int remaining;
2779 	int attrlen;
2780 	int err = 0, last_err = 0;
2781 
2782 	remaining = cfg->fc_mp_len;
2783 beginning:
2784 	rtnh = (struct rtnexthop *)cfg->fc_mp;
2785 
2786 	/* Parse a Multipath Entry */
2787 	while (rtnh_ok(rtnh, remaining)) {
2788 		memcpy(&r_cfg, cfg, sizeof(*cfg));
2789 		if (rtnh->rtnh_ifindex)
2790 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2791 
2792 		attrlen = rtnh_attrlen(rtnh);
2793 		if (attrlen > 0) {
2794 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2795 
2796 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2797 			if (nla) {
2798 				r_cfg.fc_gateway = nla_get_in6_addr(nla);
2799 				r_cfg.fc_flags |= RTF_GATEWAY;
2800 			}
2801 			r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
2802 			nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
2803 			if (nla)
2804 				r_cfg.fc_encap_type = nla_get_u16(nla);
2805 		}
2806 		err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
2807 		if (err) {
2808 			last_err = err;
2809 			/* If we are trying to remove a route, do not stop the
2810 			 * loop when ip6_route_del() fails (because next hop is
2811 			 * already gone), we should try to remove all next hops.
2812 			 */
2813 			if (add) {
2814 				/* If add fails, we should try to delete all
2815 				 * next hops that have been already added.
2816 				 */
2817 				add = 0;
2818 				remaining = cfg->fc_mp_len - remaining;
2819 				goto beginning;
2820 			}
2821 		}
2822 		/* Because each route is added like a single route we remove
2823 		 * these flags after the first nexthop: if there is a collision,
2824 		 * we have already failed to add the first nexthop:
2825 		 * fib6_add_rt2node() has rejected it; when replacing, old
2826 		 * nexthops have been replaced by first new, the rest should
2827 		 * be added to it.
2828 		 */
2829 		cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
2830 						     NLM_F_REPLACE);
2831 		rtnh = rtnh_next(rtnh, &remaining);
2832 	}
2833 
2834 	return last_err;
2835 }
2836 
2837 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
2838 {
2839 	struct fib6_config cfg;
2840 	int err;
2841 
2842 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2843 	if (err < 0)
2844 		return err;
2845 
2846 	if (cfg.fc_mp)
2847 		return ip6_route_multipath(&cfg, 0);
2848 	else
2849 		return ip6_route_del(&cfg);
2850 }
2851 
2852 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
2853 {
2854 	struct fib6_config cfg;
2855 	int err;
2856 
2857 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2858 	if (err < 0)
2859 		return err;
2860 
2861 	if (cfg.fc_mp)
2862 		return ip6_route_multipath(&cfg, 1);
2863 	else
2864 		return ip6_route_add(&cfg);
2865 }
2866 
2867 static inline size_t rt6_nlmsg_size(struct rt6_info *rt)
2868 {
2869 	return NLMSG_ALIGN(sizeof(struct rtmsg))
2870 	       + nla_total_size(16) /* RTA_SRC */
2871 	       + nla_total_size(16) /* RTA_DST */
2872 	       + nla_total_size(16) /* RTA_GATEWAY */
2873 	       + nla_total_size(16) /* RTA_PREFSRC */
2874 	       + nla_total_size(4) /* RTA_TABLE */
2875 	       + nla_total_size(4) /* RTA_IIF */
2876 	       + nla_total_size(4) /* RTA_OIF */
2877 	       + nla_total_size(4) /* RTA_PRIORITY */
2878 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2879 	       + nla_total_size(sizeof(struct rta_cacheinfo))
2880 	       + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
2881 	       + nla_total_size(1) /* RTA_PREF */
2882 	       + lwtunnel_get_encap_size(rt->dst.lwtstate);
2883 }
2884 
2885 static int rt6_fill_node(struct net *net,
2886 			 struct sk_buff *skb, struct rt6_info *rt,
2887 			 struct in6_addr *dst, struct in6_addr *src,
2888 			 int iif, int type, u32 portid, u32 seq,
2889 			 int prefix, int nowait, unsigned int flags)
2890 {
2891 	u32 metrics[RTAX_MAX];
2892 	struct rtmsg *rtm;
2893 	struct nlmsghdr *nlh;
2894 	long expires;
2895 	u32 table;
2896 
2897 	if (prefix) {	/* user wants prefix routes only */
2898 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2899 			/* success since this is not a prefix route */
2900 			return 1;
2901 		}
2902 	}
2903 
2904 	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
2905 	if (!nlh)
2906 		return -EMSGSIZE;
2907 
2908 	rtm = nlmsg_data(nlh);
2909 	rtm->rtm_family = AF_INET6;
2910 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
2911 	rtm->rtm_src_len = rt->rt6i_src.plen;
2912 	rtm->rtm_tos = 0;
2913 	if (rt->rt6i_table)
2914 		table = rt->rt6i_table->tb6_id;
2915 	else
2916 		table = RT6_TABLE_UNSPEC;
2917 	rtm->rtm_table = table;
2918 	if (nla_put_u32(skb, RTA_TABLE, table))
2919 		goto nla_put_failure;
2920 	if (rt->rt6i_flags & RTF_REJECT) {
2921 		switch (rt->dst.error) {
2922 		case -EINVAL:
2923 			rtm->rtm_type = RTN_BLACKHOLE;
2924 			break;
2925 		case -EACCES:
2926 			rtm->rtm_type = RTN_PROHIBIT;
2927 			break;
2928 		case -EAGAIN:
2929 			rtm->rtm_type = RTN_THROW;
2930 			break;
2931 		default:
2932 			rtm->rtm_type = RTN_UNREACHABLE;
2933 			break;
2934 		}
2935 	}
2936 	else if (rt->rt6i_flags & RTF_LOCAL)
2937 		rtm->rtm_type = RTN_LOCAL;
2938 	else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2939 		rtm->rtm_type = RTN_LOCAL;
2940 	else
2941 		rtm->rtm_type = RTN_UNICAST;
2942 	rtm->rtm_flags = 0;
2943 	if (!netif_carrier_ok(rt->dst.dev)) {
2944 		rtm->rtm_flags |= RTNH_F_LINKDOWN;
2945 		if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
2946 			rtm->rtm_flags |= RTNH_F_DEAD;
2947 	}
2948 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2949 	rtm->rtm_protocol = rt->rt6i_protocol;
2950 	if (rt->rt6i_flags & RTF_DYNAMIC)
2951 		rtm->rtm_protocol = RTPROT_REDIRECT;
2952 	else if (rt->rt6i_flags & RTF_ADDRCONF) {
2953 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2954 			rtm->rtm_protocol = RTPROT_RA;
2955 		else
2956 			rtm->rtm_protocol = RTPROT_KERNEL;
2957 	}
2958 
2959 	if (rt->rt6i_flags & RTF_CACHE)
2960 		rtm->rtm_flags |= RTM_F_CLONED;
2961 
2962 	if (dst) {
2963 		if (nla_put_in6_addr(skb, RTA_DST, dst))
2964 			goto nla_put_failure;
2965 		rtm->rtm_dst_len = 128;
2966 	} else if (rtm->rtm_dst_len)
2967 		if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
2968 			goto nla_put_failure;
2969 #ifdef CONFIG_IPV6_SUBTREES
2970 	if (src) {
2971 		if (nla_put_in6_addr(skb, RTA_SRC, src))
2972 			goto nla_put_failure;
2973 		rtm->rtm_src_len = 128;
2974 	} else if (rtm->rtm_src_len &&
2975 		   nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
2976 		goto nla_put_failure;
2977 #endif
2978 	if (iif) {
2979 #ifdef CONFIG_IPV6_MROUTE
2980 		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2981 			int err = ip6mr_get_route(net, skb, rtm, nowait);
2982 			if (err <= 0) {
2983 				if (!nowait) {
2984 					if (err == 0)
2985 						return 0;
2986 					goto nla_put_failure;
2987 				} else {
2988 					if (err == -EMSGSIZE)
2989 						goto nla_put_failure;
2990 				}
2991 			}
2992 		} else
2993 #endif
2994 			if (nla_put_u32(skb, RTA_IIF, iif))
2995 				goto nla_put_failure;
2996 	} else if (dst) {
2997 		struct in6_addr saddr_buf;
2998 		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2999 		    nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3000 			goto nla_put_failure;
3001 	}
3002 
3003 	if (rt->rt6i_prefsrc.plen) {
3004 		struct in6_addr saddr_buf;
3005 		saddr_buf = rt->rt6i_prefsrc.addr;
3006 		if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3007 			goto nla_put_failure;
3008 	}
3009 
3010 	memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
3011 	if (rt->rt6i_pmtu)
3012 		metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
3013 	if (rtnetlink_put_metrics(skb, metrics) < 0)
3014 		goto nla_put_failure;
3015 
3016 	if (rt->rt6i_flags & RTF_GATEWAY) {
3017 		if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
3018 			goto nla_put_failure;
3019 	}
3020 
3021 	if (rt->dst.dev &&
3022 	    nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
3023 		goto nla_put_failure;
3024 	if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
3025 		goto nla_put_failure;
3026 
3027 	expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
3028 
3029 	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
3030 		goto nla_put_failure;
3031 
3032 	if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
3033 		goto nla_put_failure;
3034 
3035 	lwtunnel_fill_encap(skb, rt->dst.lwtstate);
3036 
3037 	nlmsg_end(skb, nlh);
3038 	return 0;
3039 
3040 nla_put_failure:
3041 	nlmsg_cancel(skb, nlh);
3042 	return -EMSGSIZE;
3043 }
3044 
3045 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
3046 {
3047 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
3048 	int prefix;
3049 
3050 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
3051 		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
3052 		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
3053 	} else
3054 		prefix = 0;
3055 
3056 	return rt6_fill_node(arg->net,
3057 		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
3058 		     NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
3059 		     prefix, 0, NLM_F_MULTI);
3060 }
3061 
3062 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
3063 {
3064 	struct net *net = sock_net(in_skb->sk);
3065 	struct nlattr *tb[RTA_MAX+1];
3066 	struct rt6_info *rt;
3067 	struct sk_buff *skb;
3068 	struct rtmsg *rtm;
3069 	struct flowi6 fl6;
3070 	int err, iif = 0, oif = 0;
3071 
3072 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
3073 	if (err < 0)
3074 		goto errout;
3075 
3076 	err = -EINVAL;
3077 	memset(&fl6, 0, sizeof(fl6));
3078 
3079 	if (tb[RTA_SRC]) {
3080 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
3081 			goto errout;
3082 
3083 		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
3084 	}
3085 
3086 	if (tb[RTA_DST]) {
3087 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
3088 			goto errout;
3089 
3090 		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
3091 	}
3092 
3093 	if (tb[RTA_IIF])
3094 		iif = nla_get_u32(tb[RTA_IIF]);
3095 
3096 	if (tb[RTA_OIF])
3097 		oif = nla_get_u32(tb[RTA_OIF]);
3098 
3099 	if (tb[RTA_MARK])
3100 		fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
3101 
3102 	if (iif) {
3103 		struct net_device *dev;
3104 		int flags = 0;
3105 
3106 		dev = __dev_get_by_index(net, iif);
3107 		if (!dev) {
3108 			err = -ENODEV;
3109 			goto errout;
3110 		}
3111 
3112 		fl6.flowi6_iif = iif;
3113 
3114 		if (!ipv6_addr_any(&fl6.saddr))
3115 			flags |= RT6_LOOKUP_F_HAS_SADDR;
3116 
3117 		rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
3118 							       flags);
3119 	} else {
3120 		fl6.flowi6_oif = oif;
3121 
3122 		rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
3123 	}
3124 
3125 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
3126 	if (!skb) {
3127 		ip6_rt_put(rt);
3128 		err = -ENOBUFS;
3129 		goto errout;
3130 	}
3131 
3132 	/* Reserve room for dummy headers, this skb can pass
3133 	   through good chunk of routing engine.
3134 	 */
3135 	skb_reset_mac_header(skb);
3136 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
3137 
3138 	skb_dst_set(skb, &rt->dst);
3139 
3140 	err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
3141 			    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
3142 			    nlh->nlmsg_seq, 0, 0, 0);
3143 	if (err < 0) {
3144 		kfree_skb(skb);
3145 		goto errout;
3146 	}
3147 
3148 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
3149 errout:
3150 	return err;
3151 }
3152 
3153 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
3154 {
3155 	struct sk_buff *skb;
3156 	struct net *net = info->nl_net;
3157 	u32 seq;
3158 	int err;
3159 
3160 	err = -ENOBUFS;
3161 	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3162 
3163 	skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3164 	if (!skb)
3165 		goto errout;
3166 
3167 	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
3168 				event, info->portid, seq, 0, 0, 0);
3169 	if (err < 0) {
3170 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
3171 		WARN_ON(err == -EMSGSIZE);
3172 		kfree_skb(skb);
3173 		goto errout;
3174 	}
3175 	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
3176 		    info->nlh, gfp_any());
3177 	return;
3178 errout:
3179 	if (err < 0)
3180 		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
3181 }
3182 
3183 static int ip6_route_dev_notify(struct notifier_block *this,
3184 				unsigned long event, void *ptr)
3185 {
3186 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
3187 	struct net *net = dev_net(dev);
3188 
3189 	if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
3190 		net->ipv6.ip6_null_entry->dst.dev = dev;
3191 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
3192 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3193 		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
3194 		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
3195 		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
3196 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
3197 #endif
3198 	}
3199 
3200 	return NOTIFY_OK;
3201 }
3202 
3203 /*
3204  *	/proc
3205  */
3206 
3207 #ifdef CONFIG_PROC_FS
3208 
3209 static const struct file_operations ipv6_route_proc_fops = {
3210 	.owner		= THIS_MODULE,
3211 	.open		= ipv6_route_open,
3212 	.read		= seq_read,
3213 	.llseek		= seq_lseek,
3214 	.release	= seq_release_net,
3215 };
3216 
3217 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
3218 {
3219 	struct net *net = (struct net *)seq->private;
3220 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
3221 		   net->ipv6.rt6_stats->fib_nodes,
3222 		   net->ipv6.rt6_stats->fib_route_nodes,
3223 		   net->ipv6.rt6_stats->fib_rt_alloc,
3224 		   net->ipv6.rt6_stats->fib_rt_entries,
3225 		   net->ipv6.rt6_stats->fib_rt_cache,
3226 		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
3227 		   net->ipv6.rt6_stats->fib_discarded_routes);
3228 
3229 	return 0;
3230 }
3231 
3232 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
3233 {
3234 	return single_open_net(inode, file, rt6_stats_seq_show);
3235 }
3236 
3237 static const struct file_operations rt6_stats_seq_fops = {
3238 	.owner	 = THIS_MODULE,
3239 	.open	 = rt6_stats_seq_open,
3240 	.read	 = seq_read,
3241 	.llseek	 = seq_lseek,
3242 	.release = single_release_net,
3243 };
3244 #endif	/* CONFIG_PROC_FS */
3245 
3246 #ifdef CONFIG_SYSCTL
3247 
3248 static
3249 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
3250 			      void __user *buffer, size_t *lenp, loff_t *ppos)
3251 {
3252 	struct net *net;
3253 	int delay;
3254 	if (!write)
3255 		return -EINVAL;
3256 
3257 	net = (struct net *)ctl->extra1;
3258 	delay = net->ipv6.sysctl.flush_delay;
3259 	proc_dointvec(ctl, write, buffer, lenp, ppos);
3260 	fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
3261 	return 0;
3262 }
3263 
3264 struct ctl_table ipv6_route_table_template[] = {
3265 	{
3266 		.procname	=	"flush",
3267 		.data		=	&init_net.ipv6.sysctl.flush_delay,
3268 		.maxlen		=	sizeof(int),
3269 		.mode		=	0200,
3270 		.proc_handler	=	ipv6_sysctl_rtcache_flush
3271 	},
3272 	{
3273 		.procname	=	"gc_thresh",
3274 		.data		=	&ip6_dst_ops_template.gc_thresh,
3275 		.maxlen		=	sizeof(int),
3276 		.mode		=	0644,
3277 		.proc_handler	=	proc_dointvec,
3278 	},
3279 	{
3280 		.procname	=	"max_size",
3281 		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
3282 		.maxlen		=	sizeof(int),
3283 		.mode		=	0644,
3284 		.proc_handler	=	proc_dointvec,
3285 	},
3286 	{
3287 		.procname	=	"gc_min_interval",
3288 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3289 		.maxlen		=	sizeof(int),
3290 		.mode		=	0644,
3291 		.proc_handler	=	proc_dointvec_jiffies,
3292 	},
3293 	{
3294 		.procname	=	"gc_timeout",
3295 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
3296 		.maxlen		=	sizeof(int),
3297 		.mode		=	0644,
3298 		.proc_handler	=	proc_dointvec_jiffies,
3299 	},
3300 	{
3301 		.procname	=	"gc_interval",
3302 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
3303 		.maxlen		=	sizeof(int),
3304 		.mode		=	0644,
3305 		.proc_handler	=	proc_dointvec_jiffies,
3306 	},
3307 	{
3308 		.procname	=	"gc_elasticity",
3309 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
3310 		.maxlen		=	sizeof(int),
3311 		.mode		=	0644,
3312 		.proc_handler	=	proc_dointvec,
3313 	},
3314 	{
3315 		.procname	=	"mtu_expires",
3316 		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
3317 		.maxlen		=	sizeof(int),
3318 		.mode		=	0644,
3319 		.proc_handler	=	proc_dointvec_jiffies,
3320 	},
3321 	{
3322 		.procname	=	"min_adv_mss",
3323 		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
3324 		.maxlen		=	sizeof(int),
3325 		.mode		=	0644,
3326 		.proc_handler	=	proc_dointvec,
3327 	},
3328 	{
3329 		.procname	=	"gc_min_interval_ms",
3330 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3331 		.maxlen		=	sizeof(int),
3332 		.mode		=	0644,
3333 		.proc_handler	=	proc_dointvec_ms_jiffies,
3334 	},
3335 	{ }
3336 };
3337 
3338 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
3339 {
3340 	struct ctl_table *table;
3341 
3342 	table = kmemdup(ipv6_route_table_template,
3343 			sizeof(ipv6_route_table_template),
3344 			GFP_KERNEL);
3345 
3346 	if (table) {
3347 		table[0].data = &net->ipv6.sysctl.flush_delay;
3348 		table[0].extra1 = net;
3349 		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
3350 		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3351 		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3352 		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3353 		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3354 		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3355 		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3356 		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
3357 		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3358 
3359 		/* Don't export sysctls to unprivileged users */
3360 		if (net->user_ns != &init_user_ns)
3361 			table[0].procname = NULL;
3362 	}
3363 
3364 	return table;
3365 }
3366 #endif
3367 
3368 static int __net_init ip6_route_net_init(struct net *net)
3369 {
3370 	int ret = -ENOMEM;
3371 
3372 	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3373 	       sizeof(net->ipv6.ip6_dst_ops));
3374 
3375 	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3376 		goto out_ip6_dst_ops;
3377 
3378 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3379 					   sizeof(*net->ipv6.ip6_null_entry),
3380 					   GFP_KERNEL);
3381 	if (!net->ipv6.ip6_null_entry)
3382 		goto out_ip6_dst_entries;
3383 	net->ipv6.ip6_null_entry->dst.path =
3384 		(struct dst_entry *)net->ipv6.ip6_null_entry;
3385 	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3386 	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3387 			 ip6_template_metrics, true);
3388 
3389 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3390 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3391 					       sizeof(*net->ipv6.ip6_prohibit_entry),
3392 					       GFP_KERNEL);
3393 	if (!net->ipv6.ip6_prohibit_entry)
3394 		goto out_ip6_null_entry;
3395 	net->ipv6.ip6_prohibit_entry->dst.path =
3396 		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3397 	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3398 	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3399 			 ip6_template_metrics, true);
3400 
3401 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3402 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
3403 					       GFP_KERNEL);
3404 	if (!net->ipv6.ip6_blk_hole_entry)
3405 		goto out_ip6_prohibit_entry;
3406 	net->ipv6.ip6_blk_hole_entry->dst.path =
3407 		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3408 	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3409 	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3410 			 ip6_template_metrics, true);
3411 #endif
3412 
3413 	net->ipv6.sysctl.flush_delay = 0;
3414 	net->ipv6.sysctl.ip6_rt_max_size = 4096;
3415 	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3416 	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3417 	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3418 	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3419 	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3420 	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3421 
3422 	net->ipv6.ip6_rt_gc_expire = 30*HZ;
3423 
3424 	ret = 0;
3425 out:
3426 	return ret;
3427 
3428 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3429 out_ip6_prohibit_entry:
3430 	kfree(net->ipv6.ip6_prohibit_entry);
3431 out_ip6_null_entry:
3432 	kfree(net->ipv6.ip6_null_entry);
3433 #endif
3434 out_ip6_dst_entries:
3435 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3436 out_ip6_dst_ops:
3437 	goto out;
3438 }
3439 
3440 static void __net_exit ip6_route_net_exit(struct net *net)
3441 {
3442 	kfree(net->ipv6.ip6_null_entry);
3443 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3444 	kfree(net->ipv6.ip6_prohibit_entry);
3445 	kfree(net->ipv6.ip6_blk_hole_entry);
3446 #endif
3447 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3448 }
3449 
3450 static int __net_init ip6_route_net_init_late(struct net *net)
3451 {
3452 #ifdef CONFIG_PROC_FS
3453 	proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3454 	proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3455 #endif
3456 	return 0;
3457 }
3458 
3459 static void __net_exit ip6_route_net_exit_late(struct net *net)
3460 {
3461 #ifdef CONFIG_PROC_FS
3462 	remove_proc_entry("ipv6_route", net->proc_net);
3463 	remove_proc_entry("rt6_stats", net->proc_net);
3464 #endif
3465 }
3466 
3467 static struct pernet_operations ip6_route_net_ops = {
3468 	.init = ip6_route_net_init,
3469 	.exit = ip6_route_net_exit,
3470 };
3471 
3472 static int __net_init ipv6_inetpeer_init(struct net *net)
3473 {
3474 	struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3475 
3476 	if (!bp)
3477 		return -ENOMEM;
3478 	inet_peer_base_init(bp);
3479 	net->ipv6.peers = bp;
3480 	return 0;
3481 }
3482 
3483 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3484 {
3485 	struct inet_peer_base *bp = net->ipv6.peers;
3486 
3487 	net->ipv6.peers = NULL;
3488 	inetpeer_invalidate_tree(bp);
3489 	kfree(bp);
3490 }
3491 
3492 static struct pernet_operations ipv6_inetpeer_ops = {
3493 	.init	=	ipv6_inetpeer_init,
3494 	.exit	=	ipv6_inetpeer_exit,
3495 };
3496 
3497 static struct pernet_operations ip6_route_net_late_ops = {
3498 	.init = ip6_route_net_init_late,
3499 	.exit = ip6_route_net_exit_late,
3500 };
3501 
3502 static struct notifier_block ip6_route_dev_notifier = {
3503 	.notifier_call = ip6_route_dev_notify,
3504 	.priority = 0,
3505 };
3506 
3507 int __init ip6_route_init(void)
3508 {
3509 	int ret;
3510 	int cpu;
3511 
3512 	ret = -ENOMEM;
3513 	ip6_dst_ops_template.kmem_cachep =
3514 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3515 				  SLAB_HWCACHE_ALIGN, NULL);
3516 	if (!ip6_dst_ops_template.kmem_cachep)
3517 		goto out;
3518 
3519 	ret = dst_entries_init(&ip6_dst_blackhole_ops);
3520 	if (ret)
3521 		goto out_kmem_cache;
3522 
3523 	ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3524 	if (ret)
3525 		goto out_dst_entries;
3526 
3527 	ret = register_pernet_subsys(&ip6_route_net_ops);
3528 	if (ret)
3529 		goto out_register_inetpeer;
3530 
3531 	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3532 
3533 	/* Registering of the loopback is done before this portion of code,
3534 	 * the loopback reference in rt6_info will not be taken, do it
3535 	 * manually for init_net */
3536 	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3537 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3538   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3539 	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3540 	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3541 	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3542 	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3543   #endif
3544 	ret = fib6_init();
3545 	if (ret)
3546 		goto out_register_subsys;
3547 
3548 	ret = xfrm6_init();
3549 	if (ret)
3550 		goto out_fib6_init;
3551 
3552 	ret = fib6_rules_init();
3553 	if (ret)
3554 		goto xfrm6_init;
3555 
3556 	ret = register_pernet_subsys(&ip6_route_net_late_ops);
3557 	if (ret)
3558 		goto fib6_rules_init;
3559 
3560 	ret = -ENOBUFS;
3561 	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3562 	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3563 	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3564 		goto out_register_late_subsys;
3565 
3566 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3567 	if (ret)
3568 		goto out_register_late_subsys;
3569 
3570 	for_each_possible_cpu(cpu) {
3571 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
3572 
3573 		INIT_LIST_HEAD(&ul->head);
3574 		spin_lock_init(&ul->lock);
3575 	}
3576 
3577 out:
3578 	return ret;
3579 
3580 out_register_late_subsys:
3581 	unregister_pernet_subsys(&ip6_route_net_late_ops);
3582 fib6_rules_init:
3583 	fib6_rules_cleanup();
3584 xfrm6_init:
3585 	xfrm6_fini();
3586 out_fib6_init:
3587 	fib6_gc_cleanup();
3588 out_register_subsys:
3589 	unregister_pernet_subsys(&ip6_route_net_ops);
3590 out_register_inetpeer:
3591 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3592 out_dst_entries:
3593 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3594 out_kmem_cache:
3595 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3596 	goto out;
3597 }
3598 
3599 void ip6_route_cleanup(void)
3600 {
3601 	unregister_netdevice_notifier(&ip6_route_dev_notifier);
3602 	unregister_pernet_subsys(&ip6_route_net_late_ops);
3603 	fib6_rules_cleanup();
3604 	xfrm6_fini();
3605 	fib6_gc_cleanup();
3606 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3607 	unregister_pernet_subsys(&ip6_route_net_ops);
3608 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3609 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3610 }
3611