xref: /openbmc/linux/net/ipv6/route.c (revision fee6d4c777a125e56de9370db3b2bf359bf958d6)
1 /*
2  *	Linux INET6 implementation
3  *	FIB front-end.
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13 
14 /*	Changes:
15  *
16  *	YOSHIFUJI Hideaki @USAGI
17  *		reworked default router selection.
18  *		- respect outgoing interface
19  *		- select from (probably) reachable routers (i.e.
20  *		routers in REACHABLE, STALE, DELAY or PROBE states).
21  *		- always select the same router if it is (probably)
22  *		reachable.  otherwise, round-robin the list.
23  *	Ville Nuorvala
24  *		Fixed routing subtrees.
25  */
26 
27 #define pr_fmt(fmt) "IPv6: " fmt
28 
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/dst_metadata.h>
58 #include <net/xfrm.h>
59 #include <net/netevent.h>
60 #include <net/netlink.h>
61 #include <net/nexthop.h>
62 #include <net/lwtunnel.h>
63 #include <net/ip_tunnels.h>
64 
65 #include <asm/uaccess.h>
66 
67 #ifdef CONFIG_SYSCTL
68 #include <linux/sysctl.h>
69 #endif
70 
71 enum rt6_nud_state {
72 	RT6_NUD_FAIL_HARD = -3,
73 	RT6_NUD_FAIL_PROBE = -2,
74 	RT6_NUD_FAIL_DO_RR = -1,
75 	RT6_NUD_SUCCEED = 1
76 };
77 
78 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
79 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
80 static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
81 static unsigned int	 ip6_mtu(const struct dst_entry *dst);
82 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
83 static void		ip6_dst_destroy(struct dst_entry *);
84 static void		ip6_dst_ifdown(struct dst_entry *,
85 				       struct net_device *dev, int how);
86 static int		 ip6_dst_gc(struct dst_ops *ops);
87 
88 static int		ip6_pkt_discard(struct sk_buff *skb);
89 static int		ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb);
90 static int		ip6_pkt_prohibit(struct sk_buff *skb);
91 static int		ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb);
92 static void		ip6_link_failure(struct sk_buff *skb);
93 static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
94 					   struct sk_buff *skb, u32 mtu);
95 static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
96 					struct sk_buff *skb);
97 static void		rt6_dst_from_metrics_check(struct rt6_info *rt);
98 static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
99 
100 #ifdef CONFIG_IPV6_ROUTE_INFO
101 static struct rt6_info *rt6_add_route_info(struct net *net,
102 					   const struct in6_addr *prefix, int prefixlen,
103 					   const struct in6_addr *gwaddr, int ifindex,
104 					   unsigned int pref);
105 static struct rt6_info *rt6_get_route_info(struct net *net,
106 					   const struct in6_addr *prefix, int prefixlen,
107 					   const struct in6_addr *gwaddr, int ifindex);
108 #endif
109 
110 struct uncached_list {
111 	spinlock_t		lock;
112 	struct list_head	head;
113 };
114 
115 static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
116 
117 static void rt6_uncached_list_add(struct rt6_info *rt)
118 {
119 	struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
120 
121 	rt->dst.flags |= DST_NOCACHE;
122 	rt->rt6i_uncached_list = ul;
123 
124 	spin_lock_bh(&ul->lock);
125 	list_add_tail(&rt->rt6i_uncached, &ul->head);
126 	spin_unlock_bh(&ul->lock);
127 }
128 
129 static void rt6_uncached_list_del(struct rt6_info *rt)
130 {
131 	if (!list_empty(&rt->rt6i_uncached)) {
132 		struct uncached_list *ul = rt->rt6i_uncached_list;
133 
134 		spin_lock_bh(&ul->lock);
135 		list_del(&rt->rt6i_uncached);
136 		spin_unlock_bh(&ul->lock);
137 	}
138 }
139 
140 static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
141 {
142 	struct net_device *loopback_dev = net->loopback_dev;
143 	int cpu;
144 
145 	for_each_possible_cpu(cpu) {
146 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
147 		struct rt6_info *rt;
148 
149 		spin_lock_bh(&ul->lock);
150 		list_for_each_entry(rt, &ul->head, rt6i_uncached) {
151 			struct inet6_dev *rt_idev = rt->rt6i_idev;
152 			struct net_device *rt_dev = rt->dst.dev;
153 
154 			if (rt_idev && (rt_idev->dev == dev || !dev) &&
155 			    rt_idev->dev != loopback_dev) {
156 				rt->rt6i_idev = in6_dev_get(loopback_dev);
157 				in6_dev_put(rt_idev);
158 			}
159 
160 			if (rt_dev && (rt_dev == dev || !dev) &&
161 			    rt_dev != loopback_dev) {
162 				rt->dst.dev = loopback_dev;
163 				dev_hold(rt->dst.dev);
164 				dev_put(rt_dev);
165 			}
166 		}
167 		spin_unlock_bh(&ul->lock);
168 	}
169 }
170 
171 static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
172 {
173 	return dst_metrics_write_ptr(rt->dst.from);
174 }
175 
176 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
177 {
178 	struct rt6_info *rt = (struct rt6_info *)dst;
179 
180 	if (rt->rt6i_flags & RTF_PCPU)
181 		return rt6_pcpu_cow_metrics(rt);
182 	else if (rt->rt6i_flags & RTF_CACHE)
183 		return NULL;
184 	else
185 		return dst_cow_metrics_generic(dst, old);
186 }
187 
188 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
189 					     struct sk_buff *skb,
190 					     const void *daddr)
191 {
192 	struct in6_addr *p = &rt->rt6i_gateway;
193 
194 	if (!ipv6_addr_any(p))
195 		return (const void *) p;
196 	else if (skb)
197 		return &ipv6_hdr(skb)->daddr;
198 	return daddr;
199 }
200 
201 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
202 					  struct sk_buff *skb,
203 					  const void *daddr)
204 {
205 	struct rt6_info *rt = (struct rt6_info *) dst;
206 	struct neighbour *n;
207 
208 	daddr = choose_neigh_daddr(rt, skb, daddr);
209 	n = __ipv6_neigh_lookup(dst->dev, daddr);
210 	if (n)
211 		return n;
212 	return neigh_create(&nd_tbl, daddr, dst->dev);
213 }
214 
215 static struct dst_ops ip6_dst_ops_template = {
216 	.family			=	AF_INET6,
217 	.gc			=	ip6_dst_gc,
218 	.gc_thresh		=	1024,
219 	.check			=	ip6_dst_check,
220 	.default_advmss		=	ip6_default_advmss,
221 	.mtu			=	ip6_mtu,
222 	.cow_metrics		=	ipv6_cow_metrics,
223 	.destroy		=	ip6_dst_destroy,
224 	.ifdown			=	ip6_dst_ifdown,
225 	.negative_advice	=	ip6_negative_advice,
226 	.link_failure		=	ip6_link_failure,
227 	.update_pmtu		=	ip6_rt_update_pmtu,
228 	.redirect		=	rt6_do_redirect,
229 	.local_out		=	__ip6_local_out,
230 	.neigh_lookup		=	ip6_neigh_lookup,
231 };
232 
233 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
234 {
235 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
236 
237 	return mtu ? : dst->dev->mtu;
238 }
239 
240 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
241 					 struct sk_buff *skb, u32 mtu)
242 {
243 }
244 
245 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
246 				      struct sk_buff *skb)
247 {
248 }
249 
250 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
251 					 unsigned long old)
252 {
253 	return NULL;
254 }
255 
256 static struct dst_ops ip6_dst_blackhole_ops = {
257 	.family			=	AF_INET6,
258 	.destroy		=	ip6_dst_destroy,
259 	.check			=	ip6_dst_check,
260 	.mtu			=	ip6_blackhole_mtu,
261 	.default_advmss		=	ip6_default_advmss,
262 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
263 	.redirect		=	ip6_rt_blackhole_redirect,
264 	.cow_metrics		=	ip6_rt_blackhole_cow_metrics,
265 	.neigh_lookup		=	ip6_neigh_lookup,
266 };
267 
268 static const u32 ip6_template_metrics[RTAX_MAX] = {
269 	[RTAX_HOPLIMIT - 1] = 0,
270 };
271 
272 static const struct rt6_info ip6_null_entry_template = {
273 	.dst = {
274 		.__refcnt	= ATOMIC_INIT(1),
275 		.__use		= 1,
276 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
277 		.error		= -ENETUNREACH,
278 		.input		= ip6_pkt_discard,
279 		.output		= ip6_pkt_discard_out,
280 	},
281 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
282 	.rt6i_protocol  = RTPROT_KERNEL,
283 	.rt6i_metric	= ~(u32) 0,
284 	.rt6i_ref	= ATOMIC_INIT(1),
285 };
286 
287 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
288 
289 static const struct rt6_info ip6_prohibit_entry_template = {
290 	.dst = {
291 		.__refcnt	= ATOMIC_INIT(1),
292 		.__use		= 1,
293 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
294 		.error		= -EACCES,
295 		.input		= ip6_pkt_prohibit,
296 		.output		= ip6_pkt_prohibit_out,
297 	},
298 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
299 	.rt6i_protocol  = RTPROT_KERNEL,
300 	.rt6i_metric	= ~(u32) 0,
301 	.rt6i_ref	= ATOMIC_INIT(1),
302 };
303 
304 static const struct rt6_info ip6_blk_hole_entry_template = {
305 	.dst = {
306 		.__refcnt	= ATOMIC_INIT(1),
307 		.__use		= 1,
308 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
309 		.error		= -EINVAL,
310 		.input		= dst_discard,
311 		.output		= dst_discard_sk,
312 	},
313 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
314 	.rt6i_protocol  = RTPROT_KERNEL,
315 	.rt6i_metric	= ~(u32) 0,
316 	.rt6i_ref	= ATOMIC_INIT(1),
317 };
318 
319 #endif
320 
321 /* allocate dst with ip6_dst_ops */
322 static struct rt6_info *__ip6_dst_alloc(struct net *net,
323 					struct net_device *dev,
324 					int flags)
325 {
326 	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
327 					0, DST_OBSOLETE_FORCE_CHK, flags);
328 
329 	if (rt) {
330 		struct dst_entry *dst = &rt->dst;
331 
332 		memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
333 		INIT_LIST_HEAD(&rt->rt6i_siblings);
334 		INIT_LIST_HEAD(&rt->rt6i_uncached);
335 	}
336 	return rt;
337 }
338 
339 static struct rt6_info *ip6_dst_alloc(struct net *net,
340 				      struct net_device *dev,
341 				      int flags)
342 {
343 	struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
344 
345 	if (rt) {
346 		rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
347 		if (rt->rt6i_pcpu) {
348 			int cpu;
349 
350 			for_each_possible_cpu(cpu) {
351 				struct rt6_info **p;
352 
353 				p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
354 				/* no one shares rt */
355 				*p =  NULL;
356 			}
357 		} else {
358 			dst_destroy((struct dst_entry *)rt);
359 			return NULL;
360 		}
361 	}
362 
363 	return rt;
364 }
365 
366 static void ip6_dst_destroy(struct dst_entry *dst)
367 {
368 	struct rt6_info *rt = (struct rt6_info *)dst;
369 	struct dst_entry *from = dst->from;
370 	struct inet6_dev *idev;
371 
372 	dst_destroy_metrics_generic(dst);
373 	free_percpu(rt->rt6i_pcpu);
374 	rt6_uncached_list_del(rt);
375 
376 	idev = rt->rt6i_idev;
377 	if (idev) {
378 		rt->rt6i_idev = NULL;
379 		in6_dev_put(idev);
380 	}
381 
382 	dst->from = NULL;
383 	dst_release(from);
384 }
385 
386 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
387 			   int how)
388 {
389 	struct rt6_info *rt = (struct rt6_info *)dst;
390 	struct inet6_dev *idev = rt->rt6i_idev;
391 	struct net_device *loopback_dev =
392 		dev_net(dev)->loopback_dev;
393 
394 	if (dev != loopback_dev) {
395 		if (idev && idev->dev == dev) {
396 			struct inet6_dev *loopback_idev =
397 				in6_dev_get(loopback_dev);
398 			if (loopback_idev) {
399 				rt->rt6i_idev = loopback_idev;
400 				in6_dev_put(idev);
401 			}
402 		}
403 	}
404 }
405 
406 static bool rt6_check_expired(const struct rt6_info *rt)
407 {
408 	if (rt->rt6i_flags & RTF_EXPIRES) {
409 		if (time_after(jiffies, rt->dst.expires))
410 			return true;
411 	} else if (rt->dst.from) {
412 		return rt6_check_expired((struct rt6_info *) rt->dst.from);
413 	}
414 	return false;
415 }
416 
417 /* Multipath route selection:
418  *   Hash based function using packet header and flowlabel.
419  * Adapted from fib_info_hashfn()
420  */
421 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
422 			       const struct flowi6 *fl6)
423 {
424 	return get_hash_from_flowi6(fl6) % candidate_count;
425 }
426 
427 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
428 					     struct flowi6 *fl6, int oif,
429 					     int strict)
430 {
431 	struct rt6_info *sibling, *next_sibling;
432 	int route_choosen;
433 
434 	route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
435 	/* Don't change the route, if route_choosen == 0
436 	 * (siblings does not include ourself)
437 	 */
438 	if (route_choosen)
439 		list_for_each_entry_safe(sibling, next_sibling,
440 				&match->rt6i_siblings, rt6i_siblings) {
441 			route_choosen--;
442 			if (route_choosen == 0) {
443 				if (rt6_score_route(sibling, oif, strict) < 0)
444 					break;
445 				match = sibling;
446 				break;
447 			}
448 		}
449 	return match;
450 }
451 
452 /*
453  *	Route lookup. Any table->tb6_lock is implied.
454  */
455 
456 static inline struct rt6_info *rt6_device_match(struct net *net,
457 						    struct rt6_info *rt,
458 						    const struct in6_addr *saddr,
459 						    int oif,
460 						    int flags)
461 {
462 	struct rt6_info *local = NULL;
463 	struct rt6_info *sprt;
464 
465 	if (!oif && ipv6_addr_any(saddr))
466 		goto out;
467 
468 	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
469 		struct net_device *dev = sprt->dst.dev;
470 
471 		if (oif) {
472 			if (dev->ifindex == oif)
473 				return sprt;
474 			if (dev->flags & IFF_LOOPBACK) {
475 				if (!sprt->rt6i_idev ||
476 				    sprt->rt6i_idev->dev->ifindex != oif) {
477 					if (flags & RT6_LOOKUP_F_IFACE)
478 						continue;
479 					if (local &&
480 					    local->rt6i_idev->dev->ifindex == oif)
481 						continue;
482 				}
483 				local = sprt;
484 			}
485 		} else {
486 			if (ipv6_chk_addr(net, saddr, dev,
487 					  flags & RT6_LOOKUP_F_IFACE))
488 				return sprt;
489 		}
490 	}
491 
492 	if (oif) {
493 		if (local)
494 			return local;
495 
496 		if (flags & RT6_LOOKUP_F_IFACE)
497 			return net->ipv6.ip6_null_entry;
498 	}
499 out:
500 	return rt;
501 }
502 
503 #ifdef CONFIG_IPV6_ROUTER_PREF
504 struct __rt6_probe_work {
505 	struct work_struct work;
506 	struct in6_addr target;
507 	struct net_device *dev;
508 };
509 
510 static void rt6_probe_deferred(struct work_struct *w)
511 {
512 	struct in6_addr mcaddr;
513 	struct __rt6_probe_work *work =
514 		container_of(w, struct __rt6_probe_work, work);
515 
516 	addrconf_addr_solict_mult(&work->target, &mcaddr);
517 	ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, NULL);
518 	dev_put(work->dev);
519 	kfree(work);
520 }
521 
522 static void rt6_probe(struct rt6_info *rt)
523 {
524 	struct __rt6_probe_work *work;
525 	struct neighbour *neigh;
526 	/*
527 	 * Okay, this does not seem to be appropriate
528 	 * for now, however, we need to check if it
529 	 * is really so; aka Router Reachability Probing.
530 	 *
531 	 * Router Reachability Probe MUST be rate-limited
532 	 * to no more than one per minute.
533 	 */
534 	if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
535 		return;
536 	rcu_read_lock_bh();
537 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
538 	if (neigh) {
539 		if (neigh->nud_state & NUD_VALID)
540 			goto out;
541 
542 		work = NULL;
543 		write_lock(&neigh->lock);
544 		if (!(neigh->nud_state & NUD_VALID) &&
545 		    time_after(jiffies,
546 			       neigh->updated +
547 			       rt->rt6i_idev->cnf.rtr_probe_interval)) {
548 			work = kmalloc(sizeof(*work), GFP_ATOMIC);
549 			if (work)
550 				__neigh_set_probe_once(neigh);
551 		}
552 		write_unlock(&neigh->lock);
553 	} else {
554 		work = kmalloc(sizeof(*work), GFP_ATOMIC);
555 	}
556 
557 	if (work) {
558 		INIT_WORK(&work->work, rt6_probe_deferred);
559 		work->target = rt->rt6i_gateway;
560 		dev_hold(rt->dst.dev);
561 		work->dev = rt->dst.dev;
562 		schedule_work(&work->work);
563 	}
564 
565 out:
566 	rcu_read_unlock_bh();
567 }
568 #else
569 static inline void rt6_probe(struct rt6_info *rt)
570 {
571 }
572 #endif
573 
574 /*
575  * Default Router Selection (RFC 2461 6.3.6)
576  */
577 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
578 {
579 	struct net_device *dev = rt->dst.dev;
580 	if (!oif || dev->ifindex == oif)
581 		return 2;
582 	if ((dev->flags & IFF_LOOPBACK) &&
583 	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
584 		return 1;
585 	return 0;
586 }
587 
588 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
589 {
590 	struct neighbour *neigh;
591 	enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
592 
593 	if (rt->rt6i_flags & RTF_NONEXTHOP ||
594 	    !(rt->rt6i_flags & RTF_GATEWAY))
595 		return RT6_NUD_SUCCEED;
596 
597 	rcu_read_lock_bh();
598 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
599 	if (neigh) {
600 		read_lock(&neigh->lock);
601 		if (neigh->nud_state & NUD_VALID)
602 			ret = RT6_NUD_SUCCEED;
603 #ifdef CONFIG_IPV6_ROUTER_PREF
604 		else if (!(neigh->nud_state & NUD_FAILED))
605 			ret = RT6_NUD_SUCCEED;
606 		else
607 			ret = RT6_NUD_FAIL_PROBE;
608 #endif
609 		read_unlock(&neigh->lock);
610 	} else {
611 		ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
612 		      RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
613 	}
614 	rcu_read_unlock_bh();
615 
616 	return ret;
617 }
618 
619 static int rt6_score_route(struct rt6_info *rt, int oif,
620 			   int strict)
621 {
622 	int m;
623 
624 	m = rt6_check_dev(rt, oif);
625 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
626 		return RT6_NUD_FAIL_HARD;
627 #ifdef CONFIG_IPV6_ROUTER_PREF
628 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
629 #endif
630 	if (strict & RT6_LOOKUP_F_REACHABLE) {
631 		int n = rt6_check_neigh(rt);
632 		if (n < 0)
633 			return n;
634 	}
635 	return m;
636 }
637 
638 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
639 				   int *mpri, struct rt6_info *match,
640 				   bool *do_rr)
641 {
642 	int m;
643 	bool match_do_rr = false;
644 	struct inet6_dev *idev = rt->rt6i_idev;
645 	struct net_device *dev = rt->dst.dev;
646 
647 	if (dev && !netif_carrier_ok(dev) &&
648 	    idev->cnf.ignore_routes_with_linkdown)
649 		goto out;
650 
651 	if (rt6_check_expired(rt))
652 		goto out;
653 
654 	m = rt6_score_route(rt, oif, strict);
655 	if (m == RT6_NUD_FAIL_DO_RR) {
656 		match_do_rr = true;
657 		m = 0; /* lowest valid score */
658 	} else if (m == RT6_NUD_FAIL_HARD) {
659 		goto out;
660 	}
661 
662 	if (strict & RT6_LOOKUP_F_REACHABLE)
663 		rt6_probe(rt);
664 
665 	/* note that m can be RT6_NUD_FAIL_PROBE at this point */
666 	if (m > *mpri) {
667 		*do_rr = match_do_rr;
668 		*mpri = m;
669 		match = rt;
670 	}
671 out:
672 	return match;
673 }
674 
675 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
676 				     struct rt6_info *rr_head,
677 				     u32 metric, int oif, int strict,
678 				     bool *do_rr)
679 {
680 	struct rt6_info *rt, *match, *cont;
681 	int mpri = -1;
682 
683 	match = NULL;
684 	cont = NULL;
685 	for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
686 		if (rt->rt6i_metric != metric) {
687 			cont = rt;
688 			break;
689 		}
690 
691 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
692 	}
693 
694 	for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
695 		if (rt->rt6i_metric != metric) {
696 			cont = rt;
697 			break;
698 		}
699 
700 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
701 	}
702 
703 	if (match || !cont)
704 		return match;
705 
706 	for (rt = cont; rt; rt = rt->dst.rt6_next)
707 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
708 
709 	return match;
710 }
711 
712 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
713 {
714 	struct rt6_info *match, *rt0;
715 	struct net *net;
716 	bool do_rr = false;
717 
718 	rt0 = fn->rr_ptr;
719 	if (!rt0)
720 		fn->rr_ptr = rt0 = fn->leaf;
721 
722 	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
723 			     &do_rr);
724 
725 	if (do_rr) {
726 		struct rt6_info *next = rt0->dst.rt6_next;
727 
728 		/* no entries matched; do round-robin */
729 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
730 			next = fn->leaf;
731 
732 		if (next != rt0)
733 			fn->rr_ptr = next;
734 	}
735 
736 	net = dev_net(rt0->dst.dev);
737 	return match ? match : net->ipv6.ip6_null_entry;
738 }
739 
740 static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
741 {
742 	return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
743 }
744 
745 #ifdef CONFIG_IPV6_ROUTE_INFO
746 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
747 		  const struct in6_addr *gwaddr)
748 {
749 	struct net *net = dev_net(dev);
750 	struct route_info *rinfo = (struct route_info *) opt;
751 	struct in6_addr prefix_buf, *prefix;
752 	unsigned int pref;
753 	unsigned long lifetime;
754 	struct rt6_info *rt;
755 
756 	if (len < sizeof(struct route_info)) {
757 		return -EINVAL;
758 	}
759 
760 	/* Sanity check for prefix_len and length */
761 	if (rinfo->length > 3) {
762 		return -EINVAL;
763 	} else if (rinfo->prefix_len > 128) {
764 		return -EINVAL;
765 	} else if (rinfo->prefix_len > 64) {
766 		if (rinfo->length < 2) {
767 			return -EINVAL;
768 		}
769 	} else if (rinfo->prefix_len > 0) {
770 		if (rinfo->length < 1) {
771 			return -EINVAL;
772 		}
773 	}
774 
775 	pref = rinfo->route_pref;
776 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
777 		return -EINVAL;
778 
779 	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
780 
781 	if (rinfo->length == 3)
782 		prefix = (struct in6_addr *)rinfo->prefix;
783 	else {
784 		/* this function is safe */
785 		ipv6_addr_prefix(&prefix_buf,
786 				 (struct in6_addr *)rinfo->prefix,
787 				 rinfo->prefix_len);
788 		prefix = &prefix_buf;
789 	}
790 
791 	if (rinfo->prefix_len == 0)
792 		rt = rt6_get_dflt_router(gwaddr, dev);
793 	else
794 		rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
795 					gwaddr, dev->ifindex);
796 
797 	if (rt && !lifetime) {
798 		ip6_del_rt(rt);
799 		rt = NULL;
800 	}
801 
802 	if (!rt && lifetime)
803 		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
804 					pref);
805 	else if (rt)
806 		rt->rt6i_flags = RTF_ROUTEINFO |
807 				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
808 
809 	if (rt) {
810 		if (!addrconf_finite_timeout(lifetime))
811 			rt6_clean_expires(rt);
812 		else
813 			rt6_set_expires(rt, jiffies + HZ * lifetime);
814 
815 		ip6_rt_put(rt);
816 	}
817 	return 0;
818 }
819 #endif
820 
821 static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
822 					struct in6_addr *saddr)
823 {
824 	struct fib6_node *pn;
825 	while (1) {
826 		if (fn->fn_flags & RTN_TL_ROOT)
827 			return NULL;
828 		pn = fn->parent;
829 		if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
830 			fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
831 		else
832 			fn = pn;
833 		if (fn->fn_flags & RTN_RTINFO)
834 			return fn;
835 	}
836 }
837 
838 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
839 					     struct fib6_table *table,
840 					     struct flowi6 *fl6, int flags)
841 {
842 	struct fib6_node *fn;
843 	struct rt6_info *rt;
844 
845 	read_lock_bh(&table->tb6_lock);
846 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
847 restart:
848 	rt = fn->leaf;
849 	rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
850 	if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
851 		rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
852 	if (rt == net->ipv6.ip6_null_entry) {
853 		fn = fib6_backtrack(fn, &fl6->saddr);
854 		if (fn)
855 			goto restart;
856 	}
857 	dst_use(&rt->dst, jiffies);
858 	read_unlock_bh(&table->tb6_lock);
859 	return rt;
860 
861 }
862 
863 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
864 				    int flags)
865 {
866 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
867 }
868 EXPORT_SYMBOL_GPL(ip6_route_lookup);
869 
870 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
871 			    const struct in6_addr *saddr, int oif, int strict)
872 {
873 	struct flowi6 fl6 = {
874 		.flowi6_oif = oif,
875 		.daddr = *daddr,
876 	};
877 	struct dst_entry *dst;
878 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
879 
880 	if (saddr) {
881 		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
882 		flags |= RT6_LOOKUP_F_HAS_SADDR;
883 	}
884 
885 	dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
886 	if (dst->error == 0)
887 		return (struct rt6_info *) dst;
888 
889 	dst_release(dst);
890 
891 	return NULL;
892 }
893 EXPORT_SYMBOL(rt6_lookup);
894 
895 /* ip6_ins_rt is called with FREE table->tb6_lock.
896    It takes new route entry, the addition fails by any reason the
897    route is freed. In any case, if caller does not hold it, it may
898    be destroyed.
899  */
900 
901 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
902 			struct mx6_config *mxc)
903 {
904 	int err;
905 	struct fib6_table *table;
906 
907 	table = rt->rt6i_table;
908 	write_lock_bh(&table->tb6_lock);
909 	err = fib6_add(&table->tb6_root, rt, info, mxc);
910 	write_unlock_bh(&table->tb6_lock);
911 
912 	return err;
913 }
914 
915 int ip6_ins_rt(struct rt6_info *rt)
916 {
917 	struct nl_info info = {	.nl_net = dev_net(rt->dst.dev), };
918 	struct mx6_config mxc = { .mx = NULL, };
919 
920 	return __ip6_ins_rt(rt, &info, &mxc);
921 }
922 
923 static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
924 					   const struct in6_addr *daddr,
925 					   const struct in6_addr *saddr)
926 {
927 	struct rt6_info *rt;
928 
929 	/*
930 	 *	Clone the route.
931 	 */
932 
933 	if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
934 		ort = (struct rt6_info *)ort->dst.from;
935 
936 	rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0);
937 
938 	if (!rt)
939 		return NULL;
940 
941 	ip6_rt_copy_init(rt, ort);
942 	rt->rt6i_flags |= RTF_CACHE;
943 	rt->rt6i_metric = 0;
944 	rt->dst.flags |= DST_HOST;
945 	rt->rt6i_dst.addr = *daddr;
946 	rt->rt6i_dst.plen = 128;
947 
948 	if (!rt6_is_gw_or_nonexthop(ort)) {
949 		if (ort->rt6i_dst.plen != 128 &&
950 		    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
951 			rt->rt6i_flags |= RTF_ANYCAST;
952 #ifdef CONFIG_IPV6_SUBTREES
953 		if (rt->rt6i_src.plen && saddr) {
954 			rt->rt6i_src.addr = *saddr;
955 			rt->rt6i_src.plen = 128;
956 		}
957 #endif
958 	}
959 
960 	return rt;
961 }
962 
963 static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
964 {
965 	struct rt6_info *pcpu_rt;
966 
967 	pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
968 				  rt->dst.dev, rt->dst.flags);
969 
970 	if (!pcpu_rt)
971 		return NULL;
972 	ip6_rt_copy_init(pcpu_rt, rt);
973 	pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
974 	pcpu_rt->rt6i_flags |= RTF_PCPU;
975 	return pcpu_rt;
976 }
977 
978 /* It should be called with read_lock_bh(&tb6_lock) acquired */
979 static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
980 {
981 	struct rt6_info *pcpu_rt, **p;
982 
983 	p = this_cpu_ptr(rt->rt6i_pcpu);
984 	pcpu_rt = *p;
985 
986 	if (pcpu_rt) {
987 		dst_hold(&pcpu_rt->dst);
988 		rt6_dst_from_metrics_check(pcpu_rt);
989 	}
990 	return pcpu_rt;
991 }
992 
993 static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
994 {
995 	struct fib6_table *table = rt->rt6i_table;
996 	struct rt6_info *pcpu_rt, *prev, **p;
997 
998 	pcpu_rt = ip6_rt_pcpu_alloc(rt);
999 	if (!pcpu_rt) {
1000 		struct net *net = dev_net(rt->dst.dev);
1001 
1002 		dst_hold(&net->ipv6.ip6_null_entry->dst);
1003 		return net->ipv6.ip6_null_entry;
1004 	}
1005 
1006 	read_lock_bh(&table->tb6_lock);
1007 	if (rt->rt6i_pcpu) {
1008 		p = this_cpu_ptr(rt->rt6i_pcpu);
1009 		prev = cmpxchg(p, NULL, pcpu_rt);
1010 		if (prev) {
1011 			/* If someone did it before us, return prev instead */
1012 			dst_destroy(&pcpu_rt->dst);
1013 			pcpu_rt = prev;
1014 		}
1015 	} else {
1016 		/* rt has been removed from the fib6 tree
1017 		 * before we have a chance to acquire the read_lock.
1018 		 * In this case, don't brother to create a pcpu rt
1019 		 * since rt is going away anyway.  The next
1020 		 * dst_check() will trigger a re-lookup.
1021 		 */
1022 		dst_destroy(&pcpu_rt->dst);
1023 		pcpu_rt = rt;
1024 	}
1025 	dst_hold(&pcpu_rt->dst);
1026 	rt6_dst_from_metrics_check(pcpu_rt);
1027 	read_unlock_bh(&table->tb6_lock);
1028 	return pcpu_rt;
1029 }
1030 
1031 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
1032 				      struct flowi6 *fl6, int flags)
1033 {
1034 	struct fib6_node *fn, *saved_fn;
1035 	struct rt6_info *rt;
1036 	int strict = 0;
1037 
1038 	strict |= flags & RT6_LOOKUP_F_IFACE;
1039 	if (net->ipv6.devconf_all->forwarding == 0)
1040 		strict |= RT6_LOOKUP_F_REACHABLE;
1041 
1042 	read_lock_bh(&table->tb6_lock);
1043 
1044 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1045 	saved_fn = fn;
1046 
1047 redo_rt6_select:
1048 	rt = rt6_select(fn, oif, strict);
1049 	if (rt->rt6i_nsiblings)
1050 		rt = rt6_multipath_select(rt, fl6, oif, strict);
1051 	if (rt == net->ipv6.ip6_null_entry) {
1052 		fn = fib6_backtrack(fn, &fl6->saddr);
1053 		if (fn)
1054 			goto redo_rt6_select;
1055 		else if (strict & RT6_LOOKUP_F_REACHABLE) {
1056 			/* also consider unreachable route */
1057 			strict &= ~RT6_LOOKUP_F_REACHABLE;
1058 			fn = saved_fn;
1059 			goto redo_rt6_select;
1060 		}
1061 	}
1062 
1063 
1064 	if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
1065 		dst_use(&rt->dst, jiffies);
1066 		read_unlock_bh(&table->tb6_lock);
1067 
1068 		rt6_dst_from_metrics_check(rt);
1069 		return rt;
1070 	} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1071 			    !(rt->rt6i_flags & RTF_GATEWAY))) {
1072 		/* Create a RTF_CACHE clone which will not be
1073 		 * owned by the fib6 tree.  It is for the special case where
1074 		 * the daddr in the skb during the neighbor look-up is different
1075 		 * from the fl6->daddr used to look-up route here.
1076 		 */
1077 
1078 		struct rt6_info *uncached_rt;
1079 
1080 		dst_use(&rt->dst, jiffies);
1081 		read_unlock_bh(&table->tb6_lock);
1082 
1083 		uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1084 		dst_release(&rt->dst);
1085 
1086 		if (uncached_rt)
1087 			rt6_uncached_list_add(uncached_rt);
1088 		else
1089 			uncached_rt = net->ipv6.ip6_null_entry;
1090 
1091 		dst_hold(&uncached_rt->dst);
1092 		return uncached_rt;
1093 
1094 	} else {
1095 		/* Get a percpu copy */
1096 
1097 		struct rt6_info *pcpu_rt;
1098 
1099 		rt->dst.lastuse = jiffies;
1100 		rt->dst.__use++;
1101 		pcpu_rt = rt6_get_pcpu_route(rt);
1102 
1103 		if (pcpu_rt) {
1104 			read_unlock_bh(&table->tb6_lock);
1105 		} else {
1106 			/* We have to do the read_unlock first
1107 			 * because rt6_make_pcpu_route() may trigger
1108 			 * ip6_dst_gc() which will take the write_lock.
1109 			 */
1110 			dst_hold(&rt->dst);
1111 			read_unlock_bh(&table->tb6_lock);
1112 			pcpu_rt = rt6_make_pcpu_route(rt);
1113 			dst_release(&rt->dst);
1114 		}
1115 
1116 		return pcpu_rt;
1117 
1118 	}
1119 }
1120 
1121 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
1122 					    struct flowi6 *fl6, int flags)
1123 {
1124 	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
1125 }
1126 
1127 static struct dst_entry *ip6_route_input_lookup(struct net *net,
1128 						struct net_device *dev,
1129 						struct flowi6 *fl6, int flags)
1130 {
1131 	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1132 		flags |= RT6_LOOKUP_F_IFACE;
1133 
1134 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1135 }
1136 
1137 void ip6_route_input(struct sk_buff *skb)
1138 {
1139 	const struct ipv6hdr *iph = ipv6_hdr(skb);
1140 	struct net *net = dev_net(skb->dev);
1141 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1142 	struct ip_tunnel_info *tun_info;
1143 	struct flowi6 fl6 = {
1144 		.flowi6_iif = skb->dev->ifindex,
1145 		.daddr = iph->daddr,
1146 		.saddr = iph->saddr,
1147 		.flowlabel = ip6_flowinfo(iph),
1148 		.flowi6_mark = skb->mark,
1149 		.flowi6_proto = iph->nexthdr,
1150 	};
1151 
1152 	tun_info = skb_tunnel_info(skb);
1153 	if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
1154 		fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
1155 	skb_dst_drop(skb);
1156 	skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1157 }
1158 
1159 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1160 					     struct flowi6 *fl6, int flags)
1161 {
1162 	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1163 }
1164 
1165 struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
1166 				    struct flowi6 *fl6)
1167 {
1168 	int flags = 0;
1169 
1170 	fl6->flowi6_iif = LOOPBACK_IFINDEX;
1171 
1172 	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
1173 	    fl6->flowi6_oif)
1174 		flags |= RT6_LOOKUP_F_IFACE;
1175 
1176 	if (!ipv6_addr_any(&fl6->saddr))
1177 		flags |= RT6_LOOKUP_F_HAS_SADDR;
1178 	else if (sk)
1179 		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1180 
1181 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1182 }
1183 EXPORT_SYMBOL(ip6_route_output);
1184 
1185 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1186 {
1187 	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1188 	struct dst_entry *new = NULL;
1189 
1190 	rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1191 	if (rt) {
1192 		new = &rt->dst;
1193 
1194 		memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
1195 
1196 		new->__use = 1;
1197 		new->input = dst_discard;
1198 		new->output = dst_discard_sk;
1199 
1200 		if (dst_metrics_read_only(&ort->dst))
1201 			new->_metrics = ort->dst._metrics;
1202 		else
1203 			dst_copy_metrics(new, &ort->dst);
1204 		rt->rt6i_idev = ort->rt6i_idev;
1205 		if (rt->rt6i_idev)
1206 			in6_dev_hold(rt->rt6i_idev);
1207 
1208 		rt->rt6i_gateway = ort->rt6i_gateway;
1209 		rt->rt6i_flags = ort->rt6i_flags;
1210 		rt->rt6i_metric = 0;
1211 
1212 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1213 #ifdef CONFIG_IPV6_SUBTREES
1214 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1215 #endif
1216 
1217 		dst_free(new);
1218 	}
1219 
1220 	dst_release(dst_orig);
1221 	return new ? new : ERR_PTR(-ENOMEM);
1222 }
1223 
1224 /*
1225  *	Destination cache support functions
1226  */
1227 
1228 static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1229 {
1230 	if (rt->dst.from &&
1231 	    dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1232 		dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1233 }
1234 
1235 static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1236 {
1237 	if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1238 		return NULL;
1239 
1240 	if (rt6_check_expired(rt))
1241 		return NULL;
1242 
1243 	return &rt->dst;
1244 }
1245 
1246 static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1247 {
1248 	if (rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1249 	    rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1250 		return &rt->dst;
1251 	else
1252 		return NULL;
1253 }
1254 
1255 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1256 {
1257 	struct rt6_info *rt;
1258 
1259 	rt = (struct rt6_info *) dst;
1260 
1261 	/* All IPV6 dsts are created with ->obsolete set to the value
1262 	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1263 	 * into this function always.
1264 	 */
1265 
1266 	rt6_dst_from_metrics_check(rt);
1267 
1268 	if ((rt->rt6i_flags & RTF_PCPU) || unlikely(dst->flags & DST_NOCACHE))
1269 		return rt6_dst_from_check(rt, cookie);
1270 	else
1271 		return rt6_check(rt, cookie);
1272 }
1273 
1274 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1275 {
1276 	struct rt6_info *rt = (struct rt6_info *) dst;
1277 
1278 	if (rt) {
1279 		if (rt->rt6i_flags & RTF_CACHE) {
1280 			if (rt6_check_expired(rt)) {
1281 				ip6_del_rt(rt);
1282 				dst = NULL;
1283 			}
1284 		} else {
1285 			dst_release(dst);
1286 			dst = NULL;
1287 		}
1288 	}
1289 	return dst;
1290 }
1291 
1292 static void ip6_link_failure(struct sk_buff *skb)
1293 {
1294 	struct rt6_info *rt;
1295 
1296 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1297 
1298 	rt = (struct rt6_info *) skb_dst(skb);
1299 	if (rt) {
1300 		if (rt->rt6i_flags & RTF_CACHE) {
1301 			dst_hold(&rt->dst);
1302 			ip6_del_rt(rt);
1303 		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1304 			rt->rt6i_node->fn_sernum = -1;
1305 		}
1306 	}
1307 }
1308 
1309 static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1310 {
1311 	struct net *net = dev_net(rt->dst.dev);
1312 
1313 	rt->rt6i_flags |= RTF_MODIFIED;
1314 	rt->rt6i_pmtu = mtu;
1315 	rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1316 }
1317 
1318 static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1319 				 const struct ipv6hdr *iph, u32 mtu)
1320 {
1321 	struct rt6_info *rt6 = (struct rt6_info *)dst;
1322 
1323 	if (rt6->rt6i_flags & RTF_LOCAL)
1324 		return;
1325 
1326 	dst_confirm(dst);
1327 	mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1328 	if (mtu >= dst_mtu(dst))
1329 		return;
1330 
1331 	if (rt6->rt6i_flags & RTF_CACHE) {
1332 		rt6_do_update_pmtu(rt6, mtu);
1333 	} else {
1334 		const struct in6_addr *daddr, *saddr;
1335 		struct rt6_info *nrt6;
1336 
1337 		if (iph) {
1338 			daddr = &iph->daddr;
1339 			saddr = &iph->saddr;
1340 		} else if (sk) {
1341 			daddr = &sk->sk_v6_daddr;
1342 			saddr = &inet6_sk(sk)->saddr;
1343 		} else {
1344 			return;
1345 		}
1346 		nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1347 		if (nrt6) {
1348 			rt6_do_update_pmtu(nrt6, mtu);
1349 
1350 			/* ip6_ins_rt(nrt6) will bump the
1351 			 * rt6->rt6i_node->fn_sernum
1352 			 * which will fail the next rt6_check() and
1353 			 * invalidate the sk->sk_dst_cache.
1354 			 */
1355 			ip6_ins_rt(nrt6);
1356 		}
1357 	}
1358 }
1359 
1360 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1361 			       struct sk_buff *skb, u32 mtu)
1362 {
1363 	__ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
1364 }
1365 
1366 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1367 		     int oif, u32 mark)
1368 {
1369 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1370 	struct dst_entry *dst;
1371 	struct flowi6 fl6;
1372 
1373 	memset(&fl6, 0, sizeof(fl6));
1374 	fl6.flowi6_oif = oif;
1375 	fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
1376 	fl6.daddr = iph->daddr;
1377 	fl6.saddr = iph->saddr;
1378 	fl6.flowlabel = ip6_flowinfo(iph);
1379 
1380 	dst = ip6_route_output(net, NULL, &fl6);
1381 	if (!dst->error)
1382 		__ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
1383 	dst_release(dst);
1384 }
1385 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1386 
1387 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1388 {
1389 	ip6_update_pmtu(skb, sock_net(sk), mtu,
1390 			sk->sk_bound_dev_if, sk->sk_mark);
1391 }
1392 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1393 
1394 /* Handle redirects */
1395 struct ip6rd_flowi {
1396 	struct flowi6 fl6;
1397 	struct in6_addr gateway;
1398 };
1399 
1400 static struct rt6_info *__ip6_route_redirect(struct net *net,
1401 					     struct fib6_table *table,
1402 					     struct flowi6 *fl6,
1403 					     int flags)
1404 {
1405 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1406 	struct rt6_info *rt;
1407 	struct fib6_node *fn;
1408 
1409 	/* Get the "current" route for this destination and
1410 	 * check if the redirect has come from approriate router.
1411 	 *
1412 	 * RFC 4861 specifies that redirects should only be
1413 	 * accepted if they come from the nexthop to the target.
1414 	 * Due to the way the routes are chosen, this notion
1415 	 * is a bit fuzzy and one might need to check all possible
1416 	 * routes.
1417 	 */
1418 
1419 	read_lock_bh(&table->tb6_lock);
1420 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1421 restart:
1422 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1423 		if (rt6_check_expired(rt))
1424 			continue;
1425 		if (rt->dst.error)
1426 			break;
1427 		if (!(rt->rt6i_flags & RTF_GATEWAY))
1428 			continue;
1429 		if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1430 			continue;
1431 		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1432 			continue;
1433 		break;
1434 	}
1435 
1436 	if (!rt)
1437 		rt = net->ipv6.ip6_null_entry;
1438 	else if (rt->dst.error) {
1439 		rt = net->ipv6.ip6_null_entry;
1440 		goto out;
1441 	}
1442 
1443 	if (rt == net->ipv6.ip6_null_entry) {
1444 		fn = fib6_backtrack(fn, &fl6->saddr);
1445 		if (fn)
1446 			goto restart;
1447 	}
1448 
1449 out:
1450 	dst_hold(&rt->dst);
1451 
1452 	read_unlock_bh(&table->tb6_lock);
1453 
1454 	return rt;
1455 };
1456 
1457 static struct dst_entry *ip6_route_redirect(struct net *net,
1458 					const struct flowi6 *fl6,
1459 					const struct in6_addr *gateway)
1460 {
1461 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1462 	struct ip6rd_flowi rdfl;
1463 
1464 	rdfl.fl6 = *fl6;
1465 	rdfl.gateway = *gateway;
1466 
1467 	return fib6_rule_lookup(net, &rdfl.fl6,
1468 				flags, __ip6_route_redirect);
1469 }
1470 
1471 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1472 {
1473 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1474 	struct dst_entry *dst;
1475 	struct flowi6 fl6;
1476 
1477 	memset(&fl6, 0, sizeof(fl6));
1478 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
1479 	fl6.flowi6_oif = oif;
1480 	fl6.flowi6_mark = mark;
1481 	fl6.daddr = iph->daddr;
1482 	fl6.saddr = iph->saddr;
1483 	fl6.flowlabel = ip6_flowinfo(iph);
1484 
1485 	dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1486 	rt6_do_redirect(dst, NULL, skb);
1487 	dst_release(dst);
1488 }
1489 EXPORT_SYMBOL_GPL(ip6_redirect);
1490 
1491 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1492 			    u32 mark)
1493 {
1494 	const struct ipv6hdr *iph = ipv6_hdr(skb);
1495 	const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1496 	struct dst_entry *dst;
1497 	struct flowi6 fl6;
1498 
1499 	memset(&fl6, 0, sizeof(fl6));
1500 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
1501 	fl6.flowi6_oif = oif;
1502 	fl6.flowi6_mark = mark;
1503 	fl6.daddr = msg->dest;
1504 	fl6.saddr = iph->daddr;
1505 
1506 	dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1507 	rt6_do_redirect(dst, NULL, skb);
1508 	dst_release(dst);
1509 }
1510 
1511 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1512 {
1513 	ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1514 }
1515 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1516 
1517 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1518 {
1519 	struct net_device *dev = dst->dev;
1520 	unsigned int mtu = dst_mtu(dst);
1521 	struct net *net = dev_net(dev);
1522 
1523 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1524 
1525 	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1526 		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1527 
1528 	/*
1529 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1530 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1531 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
1532 	 * rely only on pmtu discovery"
1533 	 */
1534 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1535 		mtu = IPV6_MAXPLEN;
1536 	return mtu;
1537 }
1538 
1539 static unsigned int ip6_mtu(const struct dst_entry *dst)
1540 {
1541 	const struct rt6_info *rt = (const struct rt6_info *)dst;
1542 	unsigned int mtu = rt->rt6i_pmtu;
1543 	struct inet6_dev *idev;
1544 
1545 	if (mtu)
1546 		goto out;
1547 
1548 	mtu = dst_metric_raw(dst, RTAX_MTU);
1549 	if (mtu)
1550 		goto out;
1551 
1552 	mtu = IPV6_MIN_MTU;
1553 
1554 	rcu_read_lock();
1555 	idev = __in6_dev_get(dst->dev);
1556 	if (idev)
1557 		mtu = idev->cnf.mtu6;
1558 	rcu_read_unlock();
1559 
1560 out:
1561 	return min_t(unsigned int, mtu, IP6_MAX_MTU);
1562 }
1563 
1564 static struct dst_entry *icmp6_dst_gc_list;
1565 static DEFINE_SPINLOCK(icmp6_dst_lock);
1566 
1567 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1568 				  struct flowi6 *fl6)
1569 {
1570 	struct dst_entry *dst;
1571 	struct rt6_info *rt;
1572 	struct inet6_dev *idev = in6_dev_get(dev);
1573 	struct net *net = dev_net(dev);
1574 
1575 	if (unlikely(!idev))
1576 		return ERR_PTR(-ENODEV);
1577 
1578 	rt = ip6_dst_alloc(net, dev, 0);
1579 	if (unlikely(!rt)) {
1580 		in6_dev_put(idev);
1581 		dst = ERR_PTR(-ENOMEM);
1582 		goto out;
1583 	}
1584 
1585 	rt->dst.flags |= DST_HOST;
1586 	rt->dst.output  = ip6_output;
1587 	atomic_set(&rt->dst.__refcnt, 1);
1588 	rt->rt6i_gateway  = fl6->daddr;
1589 	rt->rt6i_dst.addr = fl6->daddr;
1590 	rt->rt6i_dst.plen = 128;
1591 	rt->rt6i_idev     = idev;
1592 	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1593 
1594 	spin_lock_bh(&icmp6_dst_lock);
1595 	rt->dst.next = icmp6_dst_gc_list;
1596 	icmp6_dst_gc_list = &rt->dst;
1597 	spin_unlock_bh(&icmp6_dst_lock);
1598 
1599 	fib6_force_start_gc(net);
1600 
1601 	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1602 
1603 out:
1604 	return dst;
1605 }
1606 
1607 int icmp6_dst_gc(void)
1608 {
1609 	struct dst_entry *dst, **pprev;
1610 	int more = 0;
1611 
1612 	spin_lock_bh(&icmp6_dst_lock);
1613 	pprev = &icmp6_dst_gc_list;
1614 
1615 	while ((dst = *pprev) != NULL) {
1616 		if (!atomic_read(&dst->__refcnt)) {
1617 			*pprev = dst->next;
1618 			dst_free(dst);
1619 		} else {
1620 			pprev = &dst->next;
1621 			++more;
1622 		}
1623 	}
1624 
1625 	spin_unlock_bh(&icmp6_dst_lock);
1626 
1627 	return more;
1628 }
1629 
1630 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1631 			    void *arg)
1632 {
1633 	struct dst_entry *dst, **pprev;
1634 
1635 	spin_lock_bh(&icmp6_dst_lock);
1636 	pprev = &icmp6_dst_gc_list;
1637 	while ((dst = *pprev) != NULL) {
1638 		struct rt6_info *rt = (struct rt6_info *) dst;
1639 		if (func(rt, arg)) {
1640 			*pprev = dst->next;
1641 			dst_free(dst);
1642 		} else {
1643 			pprev = &dst->next;
1644 		}
1645 	}
1646 	spin_unlock_bh(&icmp6_dst_lock);
1647 }
1648 
1649 static int ip6_dst_gc(struct dst_ops *ops)
1650 {
1651 	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1652 	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1653 	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1654 	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1655 	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1656 	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1657 	int entries;
1658 
1659 	entries = dst_entries_get_fast(ops);
1660 	if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1661 	    entries <= rt_max_size)
1662 		goto out;
1663 
1664 	net->ipv6.ip6_rt_gc_expire++;
1665 	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
1666 	entries = dst_entries_get_slow(ops);
1667 	if (entries < ops->gc_thresh)
1668 		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1669 out:
1670 	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1671 	return entries > rt_max_size;
1672 }
1673 
1674 static int ip6_convert_metrics(struct mx6_config *mxc,
1675 			       const struct fib6_config *cfg)
1676 {
1677 	bool ecn_ca = false;
1678 	struct nlattr *nla;
1679 	int remaining;
1680 	u32 *mp;
1681 
1682 	if (!cfg->fc_mx)
1683 		return 0;
1684 
1685 	mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1686 	if (unlikely(!mp))
1687 		return -ENOMEM;
1688 
1689 	nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1690 		int type = nla_type(nla);
1691 		u32 val;
1692 
1693 		if (!type)
1694 			continue;
1695 		if (unlikely(type > RTAX_MAX))
1696 			goto err;
1697 
1698 		if (type == RTAX_CC_ALGO) {
1699 			char tmp[TCP_CA_NAME_MAX];
1700 
1701 			nla_strlcpy(tmp, nla, sizeof(tmp));
1702 			val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
1703 			if (val == TCP_CA_UNSPEC)
1704 				goto err;
1705 		} else {
1706 			val = nla_get_u32(nla);
1707 		}
1708 		if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
1709 			goto err;
1710 
1711 		mp[type - 1] = val;
1712 		__set_bit(type - 1, mxc->mx_valid);
1713 	}
1714 
1715 	if (ecn_ca) {
1716 		__set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
1717 		mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
1718 	}
1719 
1720 	mxc->mx = mp;
1721 	return 0;
1722  err:
1723 	kfree(mp);
1724 	return -EINVAL;
1725 }
1726 
1727 int ip6_route_info_create(struct fib6_config *cfg, struct rt6_info **rt_ret)
1728 {
1729 	int err;
1730 	struct net *net = cfg->fc_nlinfo.nl_net;
1731 	struct rt6_info *rt = NULL;
1732 	struct net_device *dev = NULL;
1733 	struct inet6_dev *idev = NULL;
1734 	struct fib6_table *table;
1735 	int addr_type;
1736 
1737 	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1738 		return -EINVAL;
1739 #ifndef CONFIG_IPV6_SUBTREES
1740 	if (cfg->fc_src_len)
1741 		return -EINVAL;
1742 #endif
1743 	if (cfg->fc_ifindex) {
1744 		err = -ENODEV;
1745 		dev = dev_get_by_index(net, cfg->fc_ifindex);
1746 		if (!dev)
1747 			goto out;
1748 		idev = in6_dev_get(dev);
1749 		if (!idev)
1750 			goto out;
1751 	}
1752 
1753 	if (cfg->fc_metric == 0)
1754 		cfg->fc_metric = IP6_RT_PRIO_USER;
1755 
1756 	err = -ENOBUFS;
1757 	if (cfg->fc_nlinfo.nlh &&
1758 	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1759 		table = fib6_get_table(net, cfg->fc_table);
1760 		if (!table) {
1761 			pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1762 			table = fib6_new_table(net, cfg->fc_table);
1763 		}
1764 	} else {
1765 		table = fib6_new_table(net, cfg->fc_table);
1766 	}
1767 
1768 	if (!table)
1769 		goto out;
1770 
1771 	rt = ip6_dst_alloc(net, NULL,
1772 			   (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1773 
1774 	if (!rt) {
1775 		err = -ENOMEM;
1776 		goto out;
1777 	}
1778 
1779 	if (cfg->fc_flags & RTF_EXPIRES)
1780 		rt6_set_expires(rt, jiffies +
1781 				clock_t_to_jiffies(cfg->fc_expires));
1782 	else
1783 		rt6_clean_expires(rt);
1784 
1785 	if (cfg->fc_protocol == RTPROT_UNSPEC)
1786 		cfg->fc_protocol = RTPROT_BOOT;
1787 	rt->rt6i_protocol = cfg->fc_protocol;
1788 
1789 	addr_type = ipv6_addr_type(&cfg->fc_dst);
1790 
1791 	if (addr_type & IPV6_ADDR_MULTICAST)
1792 		rt->dst.input = ip6_mc_input;
1793 	else if (cfg->fc_flags & RTF_LOCAL)
1794 		rt->dst.input = ip6_input;
1795 	else
1796 		rt->dst.input = ip6_forward;
1797 
1798 	rt->dst.output = ip6_output;
1799 
1800 	if (cfg->fc_encap) {
1801 		struct lwtunnel_state *lwtstate;
1802 
1803 		err = lwtunnel_build_state(dev, cfg->fc_encap_type,
1804 					   cfg->fc_encap, AF_INET6, cfg,
1805 					   &lwtstate);
1806 		if (err)
1807 			goto out;
1808 		rt->dst.lwtstate = lwtstate_get(lwtstate);
1809 		if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
1810 			rt->dst.lwtstate->orig_output = rt->dst.output;
1811 			rt->dst.output = lwtunnel_output;
1812 		}
1813 		if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
1814 			rt->dst.lwtstate->orig_input = rt->dst.input;
1815 			rt->dst.input = lwtunnel_input;
1816 		}
1817 	}
1818 
1819 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1820 	rt->rt6i_dst.plen = cfg->fc_dst_len;
1821 	if (rt->rt6i_dst.plen == 128)
1822 		rt->dst.flags |= DST_HOST;
1823 
1824 #ifdef CONFIG_IPV6_SUBTREES
1825 	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1826 	rt->rt6i_src.plen = cfg->fc_src_len;
1827 #endif
1828 
1829 	rt->rt6i_metric = cfg->fc_metric;
1830 
1831 	/* We cannot add true routes via loopback here,
1832 	   they would result in kernel looping; promote them to reject routes
1833 	 */
1834 	if ((cfg->fc_flags & RTF_REJECT) ||
1835 	    (dev && (dev->flags & IFF_LOOPBACK) &&
1836 	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
1837 	     !(cfg->fc_flags & RTF_LOCAL))) {
1838 		/* hold loopback dev/idev if we haven't done so. */
1839 		if (dev != net->loopback_dev) {
1840 			if (dev) {
1841 				dev_put(dev);
1842 				in6_dev_put(idev);
1843 			}
1844 			dev = net->loopback_dev;
1845 			dev_hold(dev);
1846 			idev = in6_dev_get(dev);
1847 			if (!idev) {
1848 				err = -ENODEV;
1849 				goto out;
1850 			}
1851 		}
1852 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1853 		switch (cfg->fc_type) {
1854 		case RTN_BLACKHOLE:
1855 			rt->dst.error = -EINVAL;
1856 			rt->dst.output = dst_discard_sk;
1857 			rt->dst.input = dst_discard;
1858 			break;
1859 		case RTN_PROHIBIT:
1860 			rt->dst.error = -EACCES;
1861 			rt->dst.output = ip6_pkt_prohibit_out;
1862 			rt->dst.input = ip6_pkt_prohibit;
1863 			break;
1864 		case RTN_THROW:
1865 		case RTN_UNREACHABLE:
1866 		default:
1867 			rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1868 					: (cfg->fc_type == RTN_UNREACHABLE)
1869 					? -EHOSTUNREACH : -ENETUNREACH;
1870 			rt->dst.output = ip6_pkt_discard_out;
1871 			rt->dst.input = ip6_pkt_discard;
1872 			break;
1873 		}
1874 		goto install_route;
1875 	}
1876 
1877 	if (cfg->fc_flags & RTF_GATEWAY) {
1878 		const struct in6_addr *gw_addr;
1879 		int gwa_type;
1880 
1881 		gw_addr = &cfg->fc_gateway;
1882 		gwa_type = ipv6_addr_type(gw_addr);
1883 
1884 		/* if gw_addr is local we will fail to detect this in case
1885 		 * address is still TENTATIVE (DAD in progress). rt6_lookup()
1886 		 * will return already-added prefix route via interface that
1887 		 * prefix route was assigned to, which might be non-loopback.
1888 		 */
1889 		err = -EINVAL;
1890 		if (ipv6_chk_addr_and_flags(net, gw_addr,
1891 					    gwa_type & IPV6_ADDR_LINKLOCAL ?
1892 					    dev : NULL, 0, 0))
1893 			goto out;
1894 
1895 		rt->rt6i_gateway = *gw_addr;
1896 
1897 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1898 			struct rt6_info *grt;
1899 
1900 			/* IPv6 strictly inhibits using not link-local
1901 			   addresses as nexthop address.
1902 			   Otherwise, router will not able to send redirects.
1903 			   It is very good, but in some (rare!) circumstances
1904 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
1905 			   some exceptions. --ANK
1906 			 */
1907 			if (!(gwa_type & IPV6_ADDR_UNICAST))
1908 				goto out;
1909 
1910 			grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1911 
1912 			err = -EHOSTUNREACH;
1913 			if (!grt)
1914 				goto out;
1915 			if (dev) {
1916 				if (dev != grt->dst.dev) {
1917 					ip6_rt_put(grt);
1918 					goto out;
1919 				}
1920 			} else {
1921 				dev = grt->dst.dev;
1922 				idev = grt->rt6i_idev;
1923 				dev_hold(dev);
1924 				in6_dev_hold(grt->rt6i_idev);
1925 			}
1926 			if (!(grt->rt6i_flags & RTF_GATEWAY))
1927 				err = 0;
1928 			ip6_rt_put(grt);
1929 
1930 			if (err)
1931 				goto out;
1932 		}
1933 		err = -EINVAL;
1934 		if (!dev || (dev->flags & IFF_LOOPBACK))
1935 			goto out;
1936 	}
1937 
1938 	err = -ENODEV;
1939 	if (!dev)
1940 		goto out;
1941 
1942 	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1943 		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1944 			err = -EINVAL;
1945 			goto out;
1946 		}
1947 		rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1948 		rt->rt6i_prefsrc.plen = 128;
1949 	} else
1950 		rt->rt6i_prefsrc.plen = 0;
1951 
1952 	rt->rt6i_flags = cfg->fc_flags;
1953 
1954 install_route:
1955 	rt->dst.dev = dev;
1956 	rt->rt6i_idev = idev;
1957 	rt->rt6i_table = table;
1958 
1959 	cfg->fc_nlinfo.nl_net = dev_net(dev);
1960 
1961 	*rt_ret = rt;
1962 
1963 	return 0;
1964 out:
1965 	if (dev)
1966 		dev_put(dev);
1967 	if (idev)
1968 		in6_dev_put(idev);
1969 	if (rt)
1970 		dst_free(&rt->dst);
1971 
1972 	*rt_ret = NULL;
1973 
1974 	return err;
1975 }
1976 
1977 int ip6_route_add(struct fib6_config *cfg)
1978 {
1979 	struct mx6_config mxc = { .mx = NULL, };
1980 	struct rt6_info *rt = NULL;
1981 	int err;
1982 
1983 	err = ip6_route_info_create(cfg, &rt);
1984 	if (err)
1985 		goto out;
1986 
1987 	err = ip6_convert_metrics(&mxc, cfg);
1988 	if (err)
1989 		goto out;
1990 
1991 	err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
1992 
1993 	kfree(mxc.mx);
1994 
1995 	return err;
1996 out:
1997 	if (rt)
1998 		dst_free(&rt->dst);
1999 
2000 	return err;
2001 }
2002 
2003 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
2004 {
2005 	int err;
2006 	struct fib6_table *table;
2007 	struct net *net = dev_net(rt->dst.dev);
2008 
2009 	if (rt == net->ipv6.ip6_null_entry ||
2010 	    rt->dst.flags & DST_NOCACHE) {
2011 		err = -ENOENT;
2012 		goto out;
2013 	}
2014 
2015 	table = rt->rt6i_table;
2016 	write_lock_bh(&table->tb6_lock);
2017 	err = fib6_del(rt, info);
2018 	write_unlock_bh(&table->tb6_lock);
2019 
2020 out:
2021 	ip6_rt_put(rt);
2022 	return err;
2023 }
2024 
2025 int ip6_del_rt(struct rt6_info *rt)
2026 {
2027 	struct nl_info info = {
2028 		.nl_net = dev_net(rt->dst.dev),
2029 	};
2030 	return __ip6_del_rt(rt, &info);
2031 }
2032 
2033 static int ip6_route_del(struct fib6_config *cfg)
2034 {
2035 	struct fib6_table *table;
2036 	struct fib6_node *fn;
2037 	struct rt6_info *rt;
2038 	int err = -ESRCH;
2039 
2040 	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
2041 	if (!table)
2042 		return err;
2043 
2044 	read_lock_bh(&table->tb6_lock);
2045 
2046 	fn = fib6_locate(&table->tb6_root,
2047 			 &cfg->fc_dst, cfg->fc_dst_len,
2048 			 &cfg->fc_src, cfg->fc_src_len);
2049 
2050 	if (fn) {
2051 		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2052 			if ((rt->rt6i_flags & RTF_CACHE) &&
2053 			    !(cfg->fc_flags & RTF_CACHE))
2054 				continue;
2055 			if (cfg->fc_ifindex &&
2056 			    (!rt->dst.dev ||
2057 			     rt->dst.dev->ifindex != cfg->fc_ifindex))
2058 				continue;
2059 			if (cfg->fc_flags & RTF_GATEWAY &&
2060 			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
2061 				continue;
2062 			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
2063 				continue;
2064 			dst_hold(&rt->dst);
2065 			read_unlock_bh(&table->tb6_lock);
2066 
2067 			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
2068 		}
2069 	}
2070 	read_unlock_bh(&table->tb6_lock);
2071 
2072 	return err;
2073 }
2074 
2075 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
2076 {
2077 	struct net *net = dev_net(skb->dev);
2078 	struct netevent_redirect netevent;
2079 	struct rt6_info *rt, *nrt = NULL;
2080 	struct ndisc_options ndopts;
2081 	struct inet6_dev *in6_dev;
2082 	struct neighbour *neigh;
2083 	struct rd_msg *msg;
2084 	int optlen, on_link;
2085 	u8 *lladdr;
2086 
2087 	optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
2088 	optlen -= sizeof(*msg);
2089 
2090 	if (optlen < 0) {
2091 		net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
2092 		return;
2093 	}
2094 
2095 	msg = (struct rd_msg *)icmp6_hdr(skb);
2096 
2097 	if (ipv6_addr_is_multicast(&msg->dest)) {
2098 		net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
2099 		return;
2100 	}
2101 
2102 	on_link = 0;
2103 	if (ipv6_addr_equal(&msg->dest, &msg->target)) {
2104 		on_link = 1;
2105 	} else if (ipv6_addr_type(&msg->target) !=
2106 		   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
2107 		net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
2108 		return;
2109 	}
2110 
2111 	in6_dev = __in6_dev_get(skb->dev);
2112 	if (!in6_dev)
2113 		return;
2114 	if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2115 		return;
2116 
2117 	/* RFC2461 8.1:
2118 	 *	The IP source address of the Redirect MUST be the same as the current
2119 	 *	first-hop router for the specified ICMP Destination Address.
2120 	 */
2121 
2122 	if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
2123 		net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2124 		return;
2125 	}
2126 
2127 	lladdr = NULL;
2128 	if (ndopts.nd_opts_tgt_lladdr) {
2129 		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2130 					     skb->dev);
2131 		if (!lladdr) {
2132 			net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2133 			return;
2134 		}
2135 	}
2136 
2137 	rt = (struct rt6_info *) dst;
2138 	if (rt == net->ipv6.ip6_null_entry) {
2139 		net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
2140 		return;
2141 	}
2142 
2143 	/* Redirect received -> path was valid.
2144 	 * Look, redirects are sent only in response to data packets,
2145 	 * so that this nexthop apparently is reachable. --ANK
2146 	 */
2147 	dst_confirm(&rt->dst);
2148 
2149 	neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
2150 	if (!neigh)
2151 		return;
2152 
2153 	/*
2154 	 *	We have finally decided to accept it.
2155 	 */
2156 
2157 	neigh_update(neigh, lladdr, NUD_STALE,
2158 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
2159 		     NEIGH_UPDATE_F_OVERRIDE|
2160 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
2161 				     NEIGH_UPDATE_F_ISROUTER))
2162 		     );
2163 
2164 	nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
2165 	if (!nrt)
2166 		goto out;
2167 
2168 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
2169 	if (on_link)
2170 		nrt->rt6i_flags &= ~RTF_GATEWAY;
2171 
2172 	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
2173 
2174 	if (ip6_ins_rt(nrt))
2175 		goto out;
2176 
2177 	netevent.old = &rt->dst;
2178 	netevent.new = &nrt->dst;
2179 	netevent.daddr = &msg->dest;
2180 	netevent.neigh = neigh;
2181 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
2182 
2183 	if (rt->rt6i_flags & RTF_CACHE) {
2184 		rt = (struct rt6_info *) dst_clone(&rt->dst);
2185 		ip6_del_rt(rt);
2186 	}
2187 
2188 out:
2189 	neigh_release(neigh);
2190 }
2191 
2192 /*
2193  *	Misc support functions
2194  */
2195 
2196 static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
2197 {
2198 	BUG_ON(from->dst.from);
2199 
2200 	rt->rt6i_flags &= ~RTF_EXPIRES;
2201 	dst_hold(&from->dst);
2202 	rt->dst.from = &from->dst;
2203 	dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
2204 }
2205 
2206 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
2207 {
2208 	rt->dst.input = ort->dst.input;
2209 	rt->dst.output = ort->dst.output;
2210 	rt->rt6i_dst = ort->rt6i_dst;
2211 	rt->dst.error = ort->dst.error;
2212 	rt->rt6i_idev = ort->rt6i_idev;
2213 	if (rt->rt6i_idev)
2214 		in6_dev_hold(rt->rt6i_idev);
2215 	rt->dst.lastuse = jiffies;
2216 	rt->rt6i_gateway = ort->rt6i_gateway;
2217 	rt->rt6i_flags = ort->rt6i_flags;
2218 	rt6_set_from(rt, ort);
2219 	rt->rt6i_metric = ort->rt6i_metric;
2220 #ifdef CONFIG_IPV6_SUBTREES
2221 	rt->rt6i_src = ort->rt6i_src;
2222 #endif
2223 	rt->rt6i_prefsrc = ort->rt6i_prefsrc;
2224 	rt->rt6i_table = ort->rt6i_table;
2225 	rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
2226 }
2227 
2228 #ifdef CONFIG_IPV6_ROUTE_INFO
2229 static struct rt6_info *rt6_get_route_info(struct net *net,
2230 					   const struct in6_addr *prefix, int prefixlen,
2231 					   const struct in6_addr *gwaddr, int ifindex)
2232 {
2233 	struct fib6_node *fn;
2234 	struct rt6_info *rt = NULL;
2235 	struct fib6_table *table;
2236 
2237 	table = fib6_get_table(net, RT6_TABLE_INFO);
2238 	if (!table)
2239 		return NULL;
2240 
2241 	read_lock_bh(&table->tb6_lock);
2242 	fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
2243 	if (!fn)
2244 		goto out;
2245 
2246 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2247 		if (rt->dst.dev->ifindex != ifindex)
2248 			continue;
2249 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2250 			continue;
2251 		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2252 			continue;
2253 		dst_hold(&rt->dst);
2254 		break;
2255 	}
2256 out:
2257 	read_unlock_bh(&table->tb6_lock);
2258 	return rt;
2259 }
2260 
2261 static struct rt6_info *rt6_add_route_info(struct net *net,
2262 					   const struct in6_addr *prefix, int prefixlen,
2263 					   const struct in6_addr *gwaddr, int ifindex,
2264 					   unsigned int pref)
2265 {
2266 	struct fib6_config cfg = {
2267 		.fc_table	= RT6_TABLE_INFO,
2268 		.fc_metric	= IP6_RT_PRIO_USER,
2269 		.fc_ifindex	= ifindex,
2270 		.fc_dst_len	= prefixlen,
2271 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2272 				  RTF_UP | RTF_PREF(pref),
2273 		.fc_nlinfo.portid = 0,
2274 		.fc_nlinfo.nlh = NULL,
2275 		.fc_nlinfo.nl_net = net,
2276 	};
2277 
2278 	cfg.fc_dst = *prefix;
2279 	cfg.fc_gateway = *gwaddr;
2280 
2281 	/* We should treat it as a default route if prefix length is 0. */
2282 	if (!prefixlen)
2283 		cfg.fc_flags |= RTF_DEFAULT;
2284 
2285 	ip6_route_add(&cfg);
2286 
2287 	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
2288 }
2289 #endif
2290 
2291 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
2292 {
2293 	struct rt6_info *rt;
2294 	struct fib6_table *table;
2295 
2296 	table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
2297 	if (!table)
2298 		return NULL;
2299 
2300 	read_lock_bh(&table->tb6_lock);
2301 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2302 		if (dev == rt->dst.dev &&
2303 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
2304 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
2305 			break;
2306 	}
2307 	if (rt)
2308 		dst_hold(&rt->dst);
2309 	read_unlock_bh(&table->tb6_lock);
2310 	return rt;
2311 }
2312 
2313 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
2314 				     struct net_device *dev,
2315 				     unsigned int pref)
2316 {
2317 	struct fib6_config cfg = {
2318 		.fc_table	= RT6_TABLE_DFLT,
2319 		.fc_metric	= IP6_RT_PRIO_USER,
2320 		.fc_ifindex	= dev->ifindex,
2321 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2322 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
2323 		.fc_nlinfo.portid = 0,
2324 		.fc_nlinfo.nlh = NULL,
2325 		.fc_nlinfo.nl_net = dev_net(dev),
2326 	};
2327 
2328 	cfg.fc_gateway = *gwaddr;
2329 
2330 	ip6_route_add(&cfg);
2331 
2332 	return rt6_get_dflt_router(gwaddr, dev);
2333 }
2334 
2335 void rt6_purge_dflt_routers(struct net *net)
2336 {
2337 	struct rt6_info *rt;
2338 	struct fib6_table *table;
2339 
2340 	/* NOTE: Keep consistent with rt6_get_dflt_router */
2341 	table = fib6_get_table(net, RT6_TABLE_DFLT);
2342 	if (!table)
2343 		return;
2344 
2345 restart:
2346 	read_lock_bh(&table->tb6_lock);
2347 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2348 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2349 		    (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
2350 			dst_hold(&rt->dst);
2351 			read_unlock_bh(&table->tb6_lock);
2352 			ip6_del_rt(rt);
2353 			goto restart;
2354 		}
2355 	}
2356 	read_unlock_bh(&table->tb6_lock);
2357 }
2358 
2359 static void rtmsg_to_fib6_config(struct net *net,
2360 				 struct in6_rtmsg *rtmsg,
2361 				 struct fib6_config *cfg)
2362 {
2363 	memset(cfg, 0, sizeof(*cfg));
2364 
2365 	cfg->fc_table = RT6_TABLE_MAIN;
2366 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2367 	cfg->fc_metric = rtmsg->rtmsg_metric;
2368 	cfg->fc_expires = rtmsg->rtmsg_info;
2369 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2370 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
2371 	cfg->fc_flags = rtmsg->rtmsg_flags;
2372 
2373 	cfg->fc_nlinfo.nl_net = net;
2374 
2375 	cfg->fc_dst = rtmsg->rtmsg_dst;
2376 	cfg->fc_src = rtmsg->rtmsg_src;
2377 	cfg->fc_gateway = rtmsg->rtmsg_gateway;
2378 }
2379 
2380 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2381 {
2382 	struct fib6_config cfg;
2383 	struct in6_rtmsg rtmsg;
2384 	int err;
2385 
2386 	switch (cmd) {
2387 	case SIOCADDRT:		/* Add a route */
2388 	case SIOCDELRT:		/* Delete a route */
2389 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2390 			return -EPERM;
2391 		err = copy_from_user(&rtmsg, arg,
2392 				     sizeof(struct in6_rtmsg));
2393 		if (err)
2394 			return -EFAULT;
2395 
2396 		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2397 
2398 		rtnl_lock();
2399 		switch (cmd) {
2400 		case SIOCADDRT:
2401 			err = ip6_route_add(&cfg);
2402 			break;
2403 		case SIOCDELRT:
2404 			err = ip6_route_del(&cfg);
2405 			break;
2406 		default:
2407 			err = -EINVAL;
2408 		}
2409 		rtnl_unlock();
2410 
2411 		return err;
2412 	}
2413 
2414 	return -EINVAL;
2415 }
2416 
2417 /*
2418  *	Drop the packet on the floor
2419  */
2420 
2421 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2422 {
2423 	int type;
2424 	struct dst_entry *dst = skb_dst(skb);
2425 	switch (ipstats_mib_noroutes) {
2426 	case IPSTATS_MIB_INNOROUTES:
2427 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2428 		if (type == IPV6_ADDR_ANY) {
2429 			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2430 				      IPSTATS_MIB_INADDRERRORS);
2431 			break;
2432 		}
2433 		/* FALLTHROUGH */
2434 	case IPSTATS_MIB_OUTNOROUTES:
2435 		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2436 			      ipstats_mib_noroutes);
2437 		break;
2438 	}
2439 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2440 	kfree_skb(skb);
2441 	return 0;
2442 }
2443 
2444 static int ip6_pkt_discard(struct sk_buff *skb)
2445 {
2446 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2447 }
2448 
2449 static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb)
2450 {
2451 	skb->dev = skb_dst(skb)->dev;
2452 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2453 }
2454 
2455 static int ip6_pkt_prohibit(struct sk_buff *skb)
2456 {
2457 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2458 }
2459 
2460 static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb)
2461 {
2462 	skb->dev = skb_dst(skb)->dev;
2463 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2464 }
2465 
2466 /*
2467  *	Allocate a dst for local (unicast / anycast) address.
2468  */
2469 
2470 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2471 				    const struct in6_addr *addr,
2472 				    bool anycast)
2473 {
2474 	struct net *net = dev_net(idev->dev);
2475 	struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
2476 					    DST_NOCOUNT);
2477 	if (!rt)
2478 		return ERR_PTR(-ENOMEM);
2479 
2480 	in6_dev_hold(idev);
2481 
2482 	rt->dst.flags |= DST_HOST;
2483 	rt->dst.input = ip6_input;
2484 	rt->dst.output = ip6_output;
2485 	rt->rt6i_idev = idev;
2486 
2487 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2488 	if (anycast)
2489 		rt->rt6i_flags |= RTF_ANYCAST;
2490 	else
2491 		rt->rt6i_flags |= RTF_LOCAL;
2492 
2493 	rt->rt6i_gateway  = *addr;
2494 	rt->rt6i_dst.addr = *addr;
2495 	rt->rt6i_dst.plen = 128;
2496 	rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2497 	rt->dst.flags |= DST_NOCACHE;
2498 
2499 	atomic_set(&rt->dst.__refcnt, 1);
2500 
2501 	return rt;
2502 }
2503 
2504 int ip6_route_get_saddr(struct net *net,
2505 			struct rt6_info *rt,
2506 			const struct in6_addr *daddr,
2507 			unsigned int prefs,
2508 			struct in6_addr *saddr)
2509 {
2510 	struct inet6_dev *idev =
2511 		rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
2512 	int err = 0;
2513 	if (rt && rt->rt6i_prefsrc.plen)
2514 		*saddr = rt->rt6i_prefsrc.addr;
2515 	else
2516 		err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2517 					 daddr, prefs, saddr);
2518 	return err;
2519 }
2520 
2521 /* remove deleted ip from prefsrc entries */
2522 struct arg_dev_net_ip {
2523 	struct net_device *dev;
2524 	struct net *net;
2525 	struct in6_addr *addr;
2526 };
2527 
2528 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2529 {
2530 	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2531 	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2532 	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2533 
2534 	if (((void *)rt->dst.dev == dev || !dev) &&
2535 	    rt != net->ipv6.ip6_null_entry &&
2536 	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2537 		/* remove prefsrc entry */
2538 		rt->rt6i_prefsrc.plen = 0;
2539 	}
2540 	return 0;
2541 }
2542 
2543 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2544 {
2545 	struct net *net = dev_net(ifp->idev->dev);
2546 	struct arg_dev_net_ip adni = {
2547 		.dev = ifp->idev->dev,
2548 		.net = net,
2549 		.addr = &ifp->addr,
2550 	};
2551 	fib6_clean_all(net, fib6_remove_prefsrc, &adni);
2552 }
2553 
2554 #define RTF_RA_ROUTER		(RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2555 #define RTF_CACHE_GATEWAY	(RTF_GATEWAY | RTF_CACHE)
2556 
2557 /* Remove routers and update dst entries when gateway turn into host. */
2558 static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2559 {
2560 	struct in6_addr *gateway = (struct in6_addr *)arg;
2561 
2562 	if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2563 	     ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2564 	     ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2565 		return -1;
2566 	}
2567 	return 0;
2568 }
2569 
2570 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2571 {
2572 	fib6_clean_all(net, fib6_clean_tohost, gateway);
2573 }
2574 
2575 struct arg_dev_net {
2576 	struct net_device *dev;
2577 	struct net *net;
2578 };
2579 
2580 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2581 {
2582 	const struct arg_dev_net *adn = arg;
2583 	const struct net_device *dev = adn->dev;
2584 
2585 	if ((rt->dst.dev == dev || !dev) &&
2586 	    rt != adn->net->ipv6.ip6_null_entry)
2587 		return -1;
2588 
2589 	return 0;
2590 }
2591 
2592 void rt6_ifdown(struct net *net, struct net_device *dev)
2593 {
2594 	struct arg_dev_net adn = {
2595 		.dev = dev,
2596 		.net = net,
2597 	};
2598 
2599 	fib6_clean_all(net, fib6_ifdown, &adn);
2600 	icmp6_clean_all(fib6_ifdown, &adn);
2601 	rt6_uncached_list_flush_dev(net, dev);
2602 }
2603 
2604 struct rt6_mtu_change_arg {
2605 	struct net_device *dev;
2606 	unsigned int mtu;
2607 };
2608 
2609 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2610 {
2611 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2612 	struct inet6_dev *idev;
2613 
2614 	/* In IPv6 pmtu discovery is not optional,
2615 	   so that RTAX_MTU lock cannot disable it.
2616 	   We still use this lock to block changes
2617 	   caused by addrconf/ndisc.
2618 	*/
2619 
2620 	idev = __in6_dev_get(arg->dev);
2621 	if (!idev)
2622 		return 0;
2623 
2624 	/* For administrative MTU increase, there is no way to discover
2625 	   IPv6 PMTU increase, so PMTU increase should be updated here.
2626 	   Since RFC 1981 doesn't include administrative MTU increase
2627 	   update PMTU increase is a MUST. (i.e. jumbo frame)
2628 	 */
2629 	/*
2630 	   If new MTU is less than route PMTU, this new MTU will be the
2631 	   lowest MTU in the path, update the route PMTU to reflect PMTU
2632 	   decreases; if new MTU is greater than route PMTU, and the
2633 	   old MTU is the lowest MTU in the path, update the route PMTU
2634 	   to reflect the increase. In this case if the other nodes' MTU
2635 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
2636 	   PMTU discouvery.
2637 	 */
2638 	if (rt->dst.dev == arg->dev &&
2639 	    !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2640 		if (rt->rt6i_flags & RTF_CACHE) {
2641 			/* For RTF_CACHE with rt6i_pmtu == 0
2642 			 * (i.e. a redirected route),
2643 			 * the metrics of its rt->dst.from has already
2644 			 * been updated.
2645 			 */
2646 			if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2647 				rt->rt6i_pmtu = arg->mtu;
2648 		} else if (dst_mtu(&rt->dst) >= arg->mtu ||
2649 			   (dst_mtu(&rt->dst) < arg->mtu &&
2650 			    dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2651 			dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2652 		}
2653 	}
2654 	return 0;
2655 }
2656 
2657 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2658 {
2659 	struct rt6_mtu_change_arg arg = {
2660 		.dev = dev,
2661 		.mtu = mtu,
2662 	};
2663 
2664 	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
2665 }
2666 
2667 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2668 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2669 	[RTA_OIF]               = { .type = NLA_U32 },
2670 	[RTA_IIF]		= { .type = NLA_U32 },
2671 	[RTA_PRIORITY]          = { .type = NLA_U32 },
2672 	[RTA_METRICS]           = { .type = NLA_NESTED },
2673 	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
2674 	[RTA_PREF]              = { .type = NLA_U8 },
2675 	[RTA_ENCAP_TYPE]	= { .type = NLA_U16 },
2676 	[RTA_ENCAP]		= { .type = NLA_NESTED },
2677 };
2678 
2679 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2680 			      struct fib6_config *cfg)
2681 {
2682 	struct rtmsg *rtm;
2683 	struct nlattr *tb[RTA_MAX+1];
2684 	unsigned int pref;
2685 	int err;
2686 
2687 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2688 	if (err < 0)
2689 		goto errout;
2690 
2691 	err = -EINVAL;
2692 	rtm = nlmsg_data(nlh);
2693 	memset(cfg, 0, sizeof(*cfg));
2694 
2695 	cfg->fc_table = rtm->rtm_table;
2696 	cfg->fc_dst_len = rtm->rtm_dst_len;
2697 	cfg->fc_src_len = rtm->rtm_src_len;
2698 	cfg->fc_flags = RTF_UP;
2699 	cfg->fc_protocol = rtm->rtm_protocol;
2700 	cfg->fc_type = rtm->rtm_type;
2701 
2702 	if (rtm->rtm_type == RTN_UNREACHABLE ||
2703 	    rtm->rtm_type == RTN_BLACKHOLE ||
2704 	    rtm->rtm_type == RTN_PROHIBIT ||
2705 	    rtm->rtm_type == RTN_THROW)
2706 		cfg->fc_flags |= RTF_REJECT;
2707 
2708 	if (rtm->rtm_type == RTN_LOCAL)
2709 		cfg->fc_flags |= RTF_LOCAL;
2710 
2711 	if (rtm->rtm_flags & RTM_F_CLONED)
2712 		cfg->fc_flags |= RTF_CACHE;
2713 
2714 	cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2715 	cfg->fc_nlinfo.nlh = nlh;
2716 	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2717 
2718 	if (tb[RTA_GATEWAY]) {
2719 		cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
2720 		cfg->fc_flags |= RTF_GATEWAY;
2721 	}
2722 
2723 	if (tb[RTA_DST]) {
2724 		int plen = (rtm->rtm_dst_len + 7) >> 3;
2725 
2726 		if (nla_len(tb[RTA_DST]) < plen)
2727 			goto errout;
2728 
2729 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2730 	}
2731 
2732 	if (tb[RTA_SRC]) {
2733 		int plen = (rtm->rtm_src_len + 7) >> 3;
2734 
2735 		if (nla_len(tb[RTA_SRC]) < plen)
2736 			goto errout;
2737 
2738 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2739 	}
2740 
2741 	if (tb[RTA_PREFSRC])
2742 		cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
2743 
2744 	if (tb[RTA_OIF])
2745 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2746 
2747 	if (tb[RTA_PRIORITY])
2748 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2749 
2750 	if (tb[RTA_METRICS]) {
2751 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2752 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2753 	}
2754 
2755 	if (tb[RTA_TABLE])
2756 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2757 
2758 	if (tb[RTA_MULTIPATH]) {
2759 		cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2760 		cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2761 	}
2762 
2763 	if (tb[RTA_PREF]) {
2764 		pref = nla_get_u8(tb[RTA_PREF]);
2765 		if (pref != ICMPV6_ROUTER_PREF_LOW &&
2766 		    pref != ICMPV6_ROUTER_PREF_HIGH)
2767 			pref = ICMPV6_ROUTER_PREF_MEDIUM;
2768 		cfg->fc_flags |= RTF_PREF(pref);
2769 	}
2770 
2771 	if (tb[RTA_ENCAP])
2772 		cfg->fc_encap = tb[RTA_ENCAP];
2773 
2774 	if (tb[RTA_ENCAP_TYPE])
2775 		cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
2776 
2777 	err = 0;
2778 errout:
2779 	return err;
2780 }
2781 
2782 struct rt6_nh {
2783 	struct rt6_info *rt6_info;
2784 	struct fib6_config r_cfg;
2785 	struct mx6_config mxc;
2786 	struct list_head next;
2787 };
2788 
2789 static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
2790 {
2791 	struct rt6_nh *nh;
2792 
2793 	list_for_each_entry(nh, rt6_nh_list, next) {
2794 		pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6 nexthop %pI6 ifi %d\n",
2795 		        &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
2796 		        nh->r_cfg.fc_ifindex);
2797 	}
2798 }
2799 
2800 static int ip6_route_info_append(struct list_head *rt6_nh_list,
2801 				 struct rt6_info *rt, struct fib6_config *r_cfg)
2802 {
2803 	struct rt6_nh *nh;
2804 	struct rt6_info *rtnh;
2805 	int err = -EEXIST;
2806 
2807 	list_for_each_entry(nh, rt6_nh_list, next) {
2808 		/* check if rt6_info already exists */
2809 		rtnh = nh->rt6_info;
2810 
2811 		if (rtnh->dst.dev == rt->dst.dev &&
2812 		    rtnh->rt6i_idev == rt->rt6i_idev &&
2813 		    ipv6_addr_equal(&rtnh->rt6i_gateway,
2814 				    &rt->rt6i_gateway))
2815 			return err;
2816 	}
2817 
2818 	nh = kzalloc(sizeof(*nh), GFP_KERNEL);
2819 	if (!nh)
2820 		return -ENOMEM;
2821 	nh->rt6_info = rt;
2822 	err = ip6_convert_metrics(&nh->mxc, r_cfg);
2823 	if (err) {
2824 		kfree(nh);
2825 		return err;
2826 	}
2827 	memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
2828 	list_add_tail(&nh->next, rt6_nh_list);
2829 
2830 	return 0;
2831 }
2832 
2833 static int ip6_route_multipath_add(struct fib6_config *cfg)
2834 {
2835 	struct fib6_config r_cfg;
2836 	struct rtnexthop *rtnh;
2837 	struct rt6_info *rt;
2838 	struct rt6_nh *err_nh;
2839 	struct rt6_nh *nh, *nh_safe;
2840 	int remaining;
2841 	int attrlen;
2842 	int err = 1;
2843 	int nhn = 0;
2844 	int replace = (cfg->fc_nlinfo.nlh &&
2845 		       (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
2846 	LIST_HEAD(rt6_nh_list);
2847 
2848 	remaining = cfg->fc_mp_len;
2849 	rtnh = (struct rtnexthop *)cfg->fc_mp;
2850 
2851 	/* Parse a Multipath Entry and build a list (rt6_nh_list) of
2852 	 * rt6_info structs per nexthop
2853 	 */
2854 	while (rtnh_ok(rtnh, remaining)) {
2855 		memcpy(&r_cfg, cfg, sizeof(*cfg));
2856 		if (rtnh->rtnh_ifindex)
2857 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2858 
2859 		attrlen = rtnh_attrlen(rtnh);
2860 		if (attrlen > 0) {
2861 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2862 
2863 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2864 			if (nla) {
2865 				r_cfg.fc_gateway = nla_get_in6_addr(nla);
2866 				r_cfg.fc_flags |= RTF_GATEWAY;
2867 			}
2868 			r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
2869 			nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
2870 			if (nla)
2871 				r_cfg.fc_encap_type = nla_get_u16(nla);
2872 		}
2873 
2874 		err = ip6_route_info_create(&r_cfg, &rt);
2875 		if (err)
2876 			goto cleanup;
2877 
2878 		err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
2879 		if (err) {
2880 			dst_free(&rt->dst);
2881 			goto cleanup;
2882 		}
2883 
2884 		rtnh = rtnh_next(rtnh, &remaining);
2885 	}
2886 
2887 	err_nh = NULL;
2888 	list_for_each_entry(nh, &rt6_nh_list, next) {
2889 		err = __ip6_ins_rt(nh->rt6_info, &cfg->fc_nlinfo, &nh->mxc);
2890 		/* nh->rt6_info is used or freed at this point, reset to NULL*/
2891 		nh->rt6_info = NULL;
2892 		if (err) {
2893 			if (replace && nhn)
2894 				ip6_print_replace_route_err(&rt6_nh_list);
2895 			err_nh = nh;
2896 			goto add_errout;
2897 		}
2898 
2899 		/* Because each route is added like a single route we remove
2900 		 * these flags after the first nexthop: if there is a collision,
2901 		 * we have already failed to add the first nexthop:
2902 		 * fib6_add_rt2node() has rejected it; when replacing, old
2903 		 * nexthops have been replaced by first new, the rest should
2904 		 * be added to it.
2905 		 */
2906 		cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
2907 						     NLM_F_REPLACE);
2908 		nhn++;
2909 	}
2910 
2911 	goto cleanup;
2912 
2913 add_errout:
2914 	/* Delete routes that were already added */
2915 	list_for_each_entry(nh, &rt6_nh_list, next) {
2916 		if (err_nh == nh)
2917 			break;
2918 		ip6_route_del(&nh->r_cfg);
2919 	}
2920 
2921 cleanup:
2922 	list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
2923 		if (nh->rt6_info)
2924 			dst_free(&nh->rt6_info->dst);
2925 		kfree(nh->mxc.mx);
2926 		list_del(&nh->next);
2927 		kfree(nh);
2928 	}
2929 
2930 	return err;
2931 }
2932 
2933 static int ip6_route_multipath_del(struct fib6_config *cfg)
2934 {
2935 	struct fib6_config r_cfg;
2936 	struct rtnexthop *rtnh;
2937 	int remaining;
2938 	int attrlen;
2939 	int err = 1, last_err = 0;
2940 
2941 	remaining = cfg->fc_mp_len;
2942 	rtnh = (struct rtnexthop *)cfg->fc_mp;
2943 
2944 	/* Parse a Multipath Entry */
2945 	while (rtnh_ok(rtnh, remaining)) {
2946 		memcpy(&r_cfg, cfg, sizeof(*cfg));
2947 		if (rtnh->rtnh_ifindex)
2948 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2949 
2950 		attrlen = rtnh_attrlen(rtnh);
2951 		if (attrlen > 0) {
2952 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2953 
2954 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2955 			if (nla) {
2956 				nla_memcpy(&r_cfg.fc_gateway, nla, 16);
2957 				r_cfg.fc_flags |= RTF_GATEWAY;
2958 			}
2959 		}
2960 		err = ip6_route_del(&r_cfg);
2961 		if (err)
2962 			last_err = err;
2963 
2964 		rtnh = rtnh_next(rtnh, &remaining);
2965 	}
2966 
2967 	return last_err;
2968 }
2969 
2970 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
2971 {
2972 	struct fib6_config cfg;
2973 	int err;
2974 
2975 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2976 	if (err < 0)
2977 		return err;
2978 
2979 	if (cfg.fc_mp)
2980 		return ip6_route_multipath_del(&cfg);
2981 	else
2982 		return ip6_route_del(&cfg);
2983 }
2984 
2985 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
2986 {
2987 	struct fib6_config cfg;
2988 	int err;
2989 
2990 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2991 	if (err < 0)
2992 		return err;
2993 
2994 	if (cfg.fc_mp)
2995 		return ip6_route_multipath_add(&cfg);
2996 	else
2997 		return ip6_route_add(&cfg);
2998 }
2999 
3000 static inline size_t rt6_nlmsg_size(struct rt6_info *rt)
3001 {
3002 	return NLMSG_ALIGN(sizeof(struct rtmsg))
3003 	       + nla_total_size(16) /* RTA_SRC */
3004 	       + nla_total_size(16) /* RTA_DST */
3005 	       + nla_total_size(16) /* RTA_GATEWAY */
3006 	       + nla_total_size(16) /* RTA_PREFSRC */
3007 	       + nla_total_size(4) /* RTA_TABLE */
3008 	       + nla_total_size(4) /* RTA_IIF */
3009 	       + nla_total_size(4) /* RTA_OIF */
3010 	       + nla_total_size(4) /* RTA_PRIORITY */
3011 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
3012 	       + nla_total_size(sizeof(struct rta_cacheinfo))
3013 	       + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
3014 	       + nla_total_size(1) /* RTA_PREF */
3015 	       + lwtunnel_get_encap_size(rt->dst.lwtstate);
3016 }
3017 
3018 static int rt6_fill_node(struct net *net,
3019 			 struct sk_buff *skb, struct rt6_info *rt,
3020 			 struct in6_addr *dst, struct in6_addr *src,
3021 			 int iif, int type, u32 portid, u32 seq,
3022 			 int prefix, int nowait, unsigned int flags)
3023 {
3024 	u32 metrics[RTAX_MAX];
3025 	struct rtmsg *rtm;
3026 	struct nlmsghdr *nlh;
3027 	long expires;
3028 	u32 table;
3029 
3030 	if (prefix) {	/* user wants prefix routes only */
3031 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
3032 			/* success since this is not a prefix route */
3033 			return 1;
3034 		}
3035 	}
3036 
3037 	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
3038 	if (!nlh)
3039 		return -EMSGSIZE;
3040 
3041 	rtm = nlmsg_data(nlh);
3042 	rtm->rtm_family = AF_INET6;
3043 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
3044 	rtm->rtm_src_len = rt->rt6i_src.plen;
3045 	rtm->rtm_tos = 0;
3046 	if (rt->rt6i_table)
3047 		table = rt->rt6i_table->tb6_id;
3048 	else
3049 		table = RT6_TABLE_UNSPEC;
3050 	rtm->rtm_table = table;
3051 	if (nla_put_u32(skb, RTA_TABLE, table))
3052 		goto nla_put_failure;
3053 	if (rt->rt6i_flags & RTF_REJECT) {
3054 		switch (rt->dst.error) {
3055 		case -EINVAL:
3056 			rtm->rtm_type = RTN_BLACKHOLE;
3057 			break;
3058 		case -EACCES:
3059 			rtm->rtm_type = RTN_PROHIBIT;
3060 			break;
3061 		case -EAGAIN:
3062 			rtm->rtm_type = RTN_THROW;
3063 			break;
3064 		default:
3065 			rtm->rtm_type = RTN_UNREACHABLE;
3066 			break;
3067 		}
3068 	}
3069 	else if (rt->rt6i_flags & RTF_LOCAL)
3070 		rtm->rtm_type = RTN_LOCAL;
3071 	else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
3072 		rtm->rtm_type = RTN_LOCAL;
3073 	else
3074 		rtm->rtm_type = RTN_UNICAST;
3075 	rtm->rtm_flags = 0;
3076 	if (!netif_carrier_ok(rt->dst.dev)) {
3077 		rtm->rtm_flags |= RTNH_F_LINKDOWN;
3078 		if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
3079 			rtm->rtm_flags |= RTNH_F_DEAD;
3080 	}
3081 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
3082 	rtm->rtm_protocol = rt->rt6i_protocol;
3083 	if (rt->rt6i_flags & RTF_DYNAMIC)
3084 		rtm->rtm_protocol = RTPROT_REDIRECT;
3085 	else if (rt->rt6i_flags & RTF_ADDRCONF) {
3086 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
3087 			rtm->rtm_protocol = RTPROT_RA;
3088 		else
3089 			rtm->rtm_protocol = RTPROT_KERNEL;
3090 	}
3091 
3092 	if (rt->rt6i_flags & RTF_CACHE)
3093 		rtm->rtm_flags |= RTM_F_CLONED;
3094 
3095 	if (dst) {
3096 		if (nla_put_in6_addr(skb, RTA_DST, dst))
3097 			goto nla_put_failure;
3098 		rtm->rtm_dst_len = 128;
3099 	} else if (rtm->rtm_dst_len)
3100 		if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
3101 			goto nla_put_failure;
3102 #ifdef CONFIG_IPV6_SUBTREES
3103 	if (src) {
3104 		if (nla_put_in6_addr(skb, RTA_SRC, src))
3105 			goto nla_put_failure;
3106 		rtm->rtm_src_len = 128;
3107 	} else if (rtm->rtm_src_len &&
3108 		   nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
3109 		goto nla_put_failure;
3110 #endif
3111 	if (iif) {
3112 #ifdef CONFIG_IPV6_MROUTE
3113 		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
3114 			int err = ip6mr_get_route(net, skb, rtm, nowait);
3115 			if (err <= 0) {
3116 				if (!nowait) {
3117 					if (err == 0)
3118 						return 0;
3119 					goto nla_put_failure;
3120 				} else {
3121 					if (err == -EMSGSIZE)
3122 						goto nla_put_failure;
3123 				}
3124 			}
3125 		} else
3126 #endif
3127 			if (nla_put_u32(skb, RTA_IIF, iif))
3128 				goto nla_put_failure;
3129 	} else if (dst) {
3130 		struct in6_addr saddr_buf;
3131 		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
3132 		    nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3133 			goto nla_put_failure;
3134 	}
3135 
3136 	if (rt->rt6i_prefsrc.plen) {
3137 		struct in6_addr saddr_buf;
3138 		saddr_buf = rt->rt6i_prefsrc.addr;
3139 		if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3140 			goto nla_put_failure;
3141 	}
3142 
3143 	memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
3144 	if (rt->rt6i_pmtu)
3145 		metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
3146 	if (rtnetlink_put_metrics(skb, metrics) < 0)
3147 		goto nla_put_failure;
3148 
3149 	if (rt->rt6i_flags & RTF_GATEWAY) {
3150 		if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
3151 			goto nla_put_failure;
3152 	}
3153 
3154 	if (rt->dst.dev &&
3155 	    nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
3156 		goto nla_put_failure;
3157 	if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
3158 		goto nla_put_failure;
3159 
3160 	expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
3161 
3162 	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
3163 		goto nla_put_failure;
3164 
3165 	if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
3166 		goto nla_put_failure;
3167 
3168 	lwtunnel_fill_encap(skb, rt->dst.lwtstate);
3169 
3170 	nlmsg_end(skb, nlh);
3171 	return 0;
3172 
3173 nla_put_failure:
3174 	nlmsg_cancel(skb, nlh);
3175 	return -EMSGSIZE;
3176 }
3177 
3178 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
3179 {
3180 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
3181 	int prefix;
3182 
3183 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
3184 		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
3185 		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
3186 	} else
3187 		prefix = 0;
3188 
3189 	return rt6_fill_node(arg->net,
3190 		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
3191 		     NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
3192 		     prefix, 0, NLM_F_MULTI);
3193 }
3194 
3195 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
3196 {
3197 	struct net *net = sock_net(in_skb->sk);
3198 	struct nlattr *tb[RTA_MAX+1];
3199 	struct rt6_info *rt;
3200 	struct sk_buff *skb;
3201 	struct rtmsg *rtm;
3202 	struct flowi6 fl6;
3203 	int err, iif = 0, oif = 0;
3204 
3205 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
3206 	if (err < 0)
3207 		goto errout;
3208 
3209 	err = -EINVAL;
3210 	memset(&fl6, 0, sizeof(fl6));
3211 
3212 	if (tb[RTA_SRC]) {
3213 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
3214 			goto errout;
3215 
3216 		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
3217 	}
3218 
3219 	if (tb[RTA_DST]) {
3220 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
3221 			goto errout;
3222 
3223 		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
3224 	}
3225 
3226 	if (tb[RTA_IIF])
3227 		iif = nla_get_u32(tb[RTA_IIF]);
3228 
3229 	if (tb[RTA_OIF])
3230 		oif = nla_get_u32(tb[RTA_OIF]);
3231 
3232 	if (tb[RTA_MARK])
3233 		fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
3234 
3235 	if (iif) {
3236 		struct net_device *dev;
3237 		int flags = 0;
3238 
3239 		dev = __dev_get_by_index(net, iif);
3240 		if (!dev) {
3241 			err = -ENODEV;
3242 			goto errout;
3243 		}
3244 
3245 		fl6.flowi6_iif = iif;
3246 
3247 		if (!ipv6_addr_any(&fl6.saddr))
3248 			flags |= RT6_LOOKUP_F_HAS_SADDR;
3249 
3250 		rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
3251 							       flags);
3252 	} else {
3253 		fl6.flowi6_oif = oif;
3254 
3255 		rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
3256 	}
3257 
3258 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
3259 	if (!skb) {
3260 		ip6_rt_put(rt);
3261 		err = -ENOBUFS;
3262 		goto errout;
3263 	}
3264 
3265 	/* Reserve room for dummy headers, this skb can pass
3266 	   through good chunk of routing engine.
3267 	 */
3268 	skb_reset_mac_header(skb);
3269 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
3270 
3271 	skb_dst_set(skb, &rt->dst);
3272 
3273 	err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
3274 			    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
3275 			    nlh->nlmsg_seq, 0, 0, 0);
3276 	if (err < 0) {
3277 		kfree_skb(skb);
3278 		goto errout;
3279 	}
3280 
3281 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
3282 errout:
3283 	return err;
3284 }
3285 
3286 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
3287 		     unsigned int nlm_flags)
3288 {
3289 	struct sk_buff *skb;
3290 	struct net *net = info->nl_net;
3291 	u32 seq;
3292 	int err;
3293 
3294 	err = -ENOBUFS;
3295 	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3296 
3297 	skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3298 	if (!skb)
3299 		goto errout;
3300 
3301 	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
3302 				event, info->portid, seq, 0, 0, nlm_flags);
3303 	if (err < 0) {
3304 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
3305 		WARN_ON(err == -EMSGSIZE);
3306 		kfree_skb(skb);
3307 		goto errout;
3308 	}
3309 	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
3310 		    info->nlh, gfp_any());
3311 	return;
3312 errout:
3313 	if (err < 0)
3314 		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
3315 }
3316 
3317 static int ip6_route_dev_notify(struct notifier_block *this,
3318 				unsigned long event, void *ptr)
3319 {
3320 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
3321 	struct net *net = dev_net(dev);
3322 
3323 	if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
3324 		net->ipv6.ip6_null_entry->dst.dev = dev;
3325 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
3326 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3327 		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
3328 		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
3329 		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
3330 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
3331 #endif
3332 	}
3333 
3334 	return NOTIFY_OK;
3335 }
3336 
3337 /*
3338  *	/proc
3339  */
3340 
3341 #ifdef CONFIG_PROC_FS
3342 
3343 static const struct file_operations ipv6_route_proc_fops = {
3344 	.owner		= THIS_MODULE,
3345 	.open		= ipv6_route_open,
3346 	.read		= seq_read,
3347 	.llseek		= seq_lseek,
3348 	.release	= seq_release_net,
3349 };
3350 
3351 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
3352 {
3353 	struct net *net = (struct net *)seq->private;
3354 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
3355 		   net->ipv6.rt6_stats->fib_nodes,
3356 		   net->ipv6.rt6_stats->fib_route_nodes,
3357 		   net->ipv6.rt6_stats->fib_rt_alloc,
3358 		   net->ipv6.rt6_stats->fib_rt_entries,
3359 		   net->ipv6.rt6_stats->fib_rt_cache,
3360 		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
3361 		   net->ipv6.rt6_stats->fib_discarded_routes);
3362 
3363 	return 0;
3364 }
3365 
3366 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
3367 {
3368 	return single_open_net(inode, file, rt6_stats_seq_show);
3369 }
3370 
3371 static const struct file_operations rt6_stats_seq_fops = {
3372 	.owner	 = THIS_MODULE,
3373 	.open	 = rt6_stats_seq_open,
3374 	.read	 = seq_read,
3375 	.llseek	 = seq_lseek,
3376 	.release = single_release_net,
3377 };
3378 #endif	/* CONFIG_PROC_FS */
3379 
3380 #ifdef CONFIG_SYSCTL
3381 
3382 static
3383 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
3384 			      void __user *buffer, size_t *lenp, loff_t *ppos)
3385 {
3386 	struct net *net;
3387 	int delay;
3388 	if (!write)
3389 		return -EINVAL;
3390 
3391 	net = (struct net *)ctl->extra1;
3392 	delay = net->ipv6.sysctl.flush_delay;
3393 	proc_dointvec(ctl, write, buffer, lenp, ppos);
3394 	fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
3395 	return 0;
3396 }
3397 
3398 struct ctl_table ipv6_route_table_template[] = {
3399 	{
3400 		.procname	=	"flush",
3401 		.data		=	&init_net.ipv6.sysctl.flush_delay,
3402 		.maxlen		=	sizeof(int),
3403 		.mode		=	0200,
3404 		.proc_handler	=	ipv6_sysctl_rtcache_flush
3405 	},
3406 	{
3407 		.procname	=	"gc_thresh",
3408 		.data		=	&ip6_dst_ops_template.gc_thresh,
3409 		.maxlen		=	sizeof(int),
3410 		.mode		=	0644,
3411 		.proc_handler	=	proc_dointvec,
3412 	},
3413 	{
3414 		.procname	=	"max_size",
3415 		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
3416 		.maxlen		=	sizeof(int),
3417 		.mode		=	0644,
3418 		.proc_handler	=	proc_dointvec,
3419 	},
3420 	{
3421 		.procname	=	"gc_min_interval",
3422 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3423 		.maxlen		=	sizeof(int),
3424 		.mode		=	0644,
3425 		.proc_handler	=	proc_dointvec_jiffies,
3426 	},
3427 	{
3428 		.procname	=	"gc_timeout",
3429 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
3430 		.maxlen		=	sizeof(int),
3431 		.mode		=	0644,
3432 		.proc_handler	=	proc_dointvec_jiffies,
3433 	},
3434 	{
3435 		.procname	=	"gc_interval",
3436 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
3437 		.maxlen		=	sizeof(int),
3438 		.mode		=	0644,
3439 		.proc_handler	=	proc_dointvec_jiffies,
3440 	},
3441 	{
3442 		.procname	=	"gc_elasticity",
3443 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
3444 		.maxlen		=	sizeof(int),
3445 		.mode		=	0644,
3446 		.proc_handler	=	proc_dointvec,
3447 	},
3448 	{
3449 		.procname	=	"mtu_expires",
3450 		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
3451 		.maxlen		=	sizeof(int),
3452 		.mode		=	0644,
3453 		.proc_handler	=	proc_dointvec_jiffies,
3454 	},
3455 	{
3456 		.procname	=	"min_adv_mss",
3457 		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
3458 		.maxlen		=	sizeof(int),
3459 		.mode		=	0644,
3460 		.proc_handler	=	proc_dointvec,
3461 	},
3462 	{
3463 		.procname	=	"gc_min_interval_ms",
3464 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3465 		.maxlen		=	sizeof(int),
3466 		.mode		=	0644,
3467 		.proc_handler	=	proc_dointvec_ms_jiffies,
3468 	},
3469 	{ }
3470 };
3471 
3472 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
3473 {
3474 	struct ctl_table *table;
3475 
3476 	table = kmemdup(ipv6_route_table_template,
3477 			sizeof(ipv6_route_table_template),
3478 			GFP_KERNEL);
3479 
3480 	if (table) {
3481 		table[0].data = &net->ipv6.sysctl.flush_delay;
3482 		table[0].extra1 = net;
3483 		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
3484 		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3485 		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3486 		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3487 		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3488 		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3489 		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3490 		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
3491 		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3492 
3493 		/* Don't export sysctls to unprivileged users */
3494 		if (net->user_ns != &init_user_ns)
3495 			table[0].procname = NULL;
3496 	}
3497 
3498 	return table;
3499 }
3500 #endif
3501 
3502 static int __net_init ip6_route_net_init(struct net *net)
3503 {
3504 	int ret = -ENOMEM;
3505 
3506 	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3507 	       sizeof(net->ipv6.ip6_dst_ops));
3508 
3509 	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3510 		goto out_ip6_dst_ops;
3511 
3512 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3513 					   sizeof(*net->ipv6.ip6_null_entry),
3514 					   GFP_KERNEL);
3515 	if (!net->ipv6.ip6_null_entry)
3516 		goto out_ip6_dst_entries;
3517 	net->ipv6.ip6_null_entry->dst.path =
3518 		(struct dst_entry *)net->ipv6.ip6_null_entry;
3519 	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3520 	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3521 			 ip6_template_metrics, true);
3522 
3523 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3524 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3525 					       sizeof(*net->ipv6.ip6_prohibit_entry),
3526 					       GFP_KERNEL);
3527 	if (!net->ipv6.ip6_prohibit_entry)
3528 		goto out_ip6_null_entry;
3529 	net->ipv6.ip6_prohibit_entry->dst.path =
3530 		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3531 	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3532 	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3533 			 ip6_template_metrics, true);
3534 
3535 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3536 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
3537 					       GFP_KERNEL);
3538 	if (!net->ipv6.ip6_blk_hole_entry)
3539 		goto out_ip6_prohibit_entry;
3540 	net->ipv6.ip6_blk_hole_entry->dst.path =
3541 		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3542 	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3543 	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3544 			 ip6_template_metrics, true);
3545 #endif
3546 
3547 	net->ipv6.sysctl.flush_delay = 0;
3548 	net->ipv6.sysctl.ip6_rt_max_size = 4096;
3549 	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3550 	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3551 	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3552 	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3553 	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3554 	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3555 
3556 	net->ipv6.ip6_rt_gc_expire = 30*HZ;
3557 
3558 	ret = 0;
3559 out:
3560 	return ret;
3561 
3562 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3563 out_ip6_prohibit_entry:
3564 	kfree(net->ipv6.ip6_prohibit_entry);
3565 out_ip6_null_entry:
3566 	kfree(net->ipv6.ip6_null_entry);
3567 #endif
3568 out_ip6_dst_entries:
3569 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3570 out_ip6_dst_ops:
3571 	goto out;
3572 }
3573 
3574 static void __net_exit ip6_route_net_exit(struct net *net)
3575 {
3576 	kfree(net->ipv6.ip6_null_entry);
3577 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3578 	kfree(net->ipv6.ip6_prohibit_entry);
3579 	kfree(net->ipv6.ip6_blk_hole_entry);
3580 #endif
3581 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3582 }
3583 
3584 static int __net_init ip6_route_net_init_late(struct net *net)
3585 {
3586 #ifdef CONFIG_PROC_FS
3587 	proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3588 	proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3589 #endif
3590 	return 0;
3591 }
3592 
3593 static void __net_exit ip6_route_net_exit_late(struct net *net)
3594 {
3595 #ifdef CONFIG_PROC_FS
3596 	remove_proc_entry("ipv6_route", net->proc_net);
3597 	remove_proc_entry("rt6_stats", net->proc_net);
3598 #endif
3599 }
3600 
3601 static struct pernet_operations ip6_route_net_ops = {
3602 	.init = ip6_route_net_init,
3603 	.exit = ip6_route_net_exit,
3604 };
3605 
3606 static int __net_init ipv6_inetpeer_init(struct net *net)
3607 {
3608 	struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3609 
3610 	if (!bp)
3611 		return -ENOMEM;
3612 	inet_peer_base_init(bp);
3613 	net->ipv6.peers = bp;
3614 	return 0;
3615 }
3616 
3617 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3618 {
3619 	struct inet_peer_base *bp = net->ipv6.peers;
3620 
3621 	net->ipv6.peers = NULL;
3622 	inetpeer_invalidate_tree(bp);
3623 	kfree(bp);
3624 }
3625 
3626 static struct pernet_operations ipv6_inetpeer_ops = {
3627 	.init	=	ipv6_inetpeer_init,
3628 	.exit	=	ipv6_inetpeer_exit,
3629 };
3630 
3631 static struct pernet_operations ip6_route_net_late_ops = {
3632 	.init = ip6_route_net_init_late,
3633 	.exit = ip6_route_net_exit_late,
3634 };
3635 
3636 static struct notifier_block ip6_route_dev_notifier = {
3637 	.notifier_call = ip6_route_dev_notify,
3638 	.priority = 0,
3639 };
3640 
3641 int __init ip6_route_init(void)
3642 {
3643 	int ret;
3644 	int cpu;
3645 
3646 	ret = -ENOMEM;
3647 	ip6_dst_ops_template.kmem_cachep =
3648 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3649 				  SLAB_HWCACHE_ALIGN, NULL);
3650 	if (!ip6_dst_ops_template.kmem_cachep)
3651 		goto out;
3652 
3653 	ret = dst_entries_init(&ip6_dst_blackhole_ops);
3654 	if (ret)
3655 		goto out_kmem_cache;
3656 
3657 	ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3658 	if (ret)
3659 		goto out_dst_entries;
3660 
3661 	ret = register_pernet_subsys(&ip6_route_net_ops);
3662 	if (ret)
3663 		goto out_register_inetpeer;
3664 
3665 	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3666 
3667 	/* Registering of the loopback is done before this portion of code,
3668 	 * the loopback reference in rt6_info will not be taken, do it
3669 	 * manually for init_net */
3670 	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3671 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3672   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3673 	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3674 	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3675 	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3676 	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3677   #endif
3678 	ret = fib6_init();
3679 	if (ret)
3680 		goto out_register_subsys;
3681 
3682 	ret = xfrm6_init();
3683 	if (ret)
3684 		goto out_fib6_init;
3685 
3686 	ret = fib6_rules_init();
3687 	if (ret)
3688 		goto xfrm6_init;
3689 
3690 	ret = register_pernet_subsys(&ip6_route_net_late_ops);
3691 	if (ret)
3692 		goto fib6_rules_init;
3693 
3694 	ret = -ENOBUFS;
3695 	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3696 	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3697 	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3698 		goto out_register_late_subsys;
3699 
3700 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3701 	if (ret)
3702 		goto out_register_late_subsys;
3703 
3704 	for_each_possible_cpu(cpu) {
3705 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
3706 
3707 		INIT_LIST_HEAD(&ul->head);
3708 		spin_lock_init(&ul->lock);
3709 	}
3710 
3711 out:
3712 	return ret;
3713 
3714 out_register_late_subsys:
3715 	unregister_pernet_subsys(&ip6_route_net_late_ops);
3716 fib6_rules_init:
3717 	fib6_rules_cleanup();
3718 xfrm6_init:
3719 	xfrm6_fini();
3720 out_fib6_init:
3721 	fib6_gc_cleanup();
3722 out_register_subsys:
3723 	unregister_pernet_subsys(&ip6_route_net_ops);
3724 out_register_inetpeer:
3725 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3726 out_dst_entries:
3727 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3728 out_kmem_cache:
3729 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3730 	goto out;
3731 }
3732 
3733 void ip6_route_cleanup(void)
3734 {
3735 	unregister_netdevice_notifier(&ip6_route_dev_notifier);
3736 	unregister_pernet_subsys(&ip6_route_net_late_ops);
3737 	fib6_rules_cleanup();
3738 	xfrm6_fini();
3739 	fib6_gc_cleanup();
3740 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3741 	unregister_pernet_subsys(&ip6_route_net_ops);
3742 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3743 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3744 }
3745