xref: /openbmc/linux/net/ipv6/route.c (revision fa7f32422ea1ac276b45b96a540ed5981caaa61f)
1 /*
2  *	Linux INET6 implementation
3  *	FIB front-end.
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13 
14 /*	Changes:
15  *
16  *	YOSHIFUJI Hideaki @USAGI
17  *		reworked default router selection.
18  *		- respect outgoing interface
19  *		- select from (probably) reachable routers (i.e.
20  *		routers in REACHABLE, STALE, DELAY or PROBE states).
21  *		- always select the same router if it is (probably)
22  *		reachable.  otherwise, round-robin the list.
23  *	Ville Nuorvala
24  *		Fixed routing subtrees.
25  */
26 
27 #define pr_fmt(fmt) "IPv6: " fmt
28 
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/dst_metadata.h>
58 #include <net/xfrm.h>
59 #include <net/netevent.h>
60 #include <net/netlink.h>
61 #include <net/nexthop.h>
62 #include <net/lwtunnel.h>
63 #include <net/ip_tunnels.h>
64 #include <net/l3mdev.h>
65 #include <trace/events/fib6.h>
66 
67 #include <asm/uaccess.h>
68 
69 #ifdef CONFIG_SYSCTL
70 #include <linux/sysctl.h>
71 #endif
72 
73 enum rt6_nud_state {
74 	RT6_NUD_FAIL_HARD = -3,
75 	RT6_NUD_FAIL_PROBE = -2,
76 	RT6_NUD_FAIL_DO_RR = -1,
77 	RT6_NUD_SUCCEED = 1
78 };
79 
80 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
81 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
82 static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
83 static unsigned int	 ip6_mtu(const struct dst_entry *dst);
84 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
85 static void		ip6_dst_destroy(struct dst_entry *);
86 static void		ip6_dst_ifdown(struct dst_entry *,
87 				       struct net_device *dev, int how);
88 static int		 ip6_dst_gc(struct dst_ops *ops);
89 
90 static int		ip6_pkt_discard(struct sk_buff *skb);
91 static int		ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
92 static int		ip6_pkt_prohibit(struct sk_buff *skb);
93 static int		ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
94 static void		ip6_link_failure(struct sk_buff *skb);
95 static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
96 					   struct sk_buff *skb, u32 mtu);
97 static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
98 					struct sk_buff *skb);
99 static void		rt6_dst_from_metrics_check(struct rt6_info *rt);
100 static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
101 
102 #ifdef CONFIG_IPV6_ROUTE_INFO
103 static struct rt6_info *rt6_add_route_info(struct net *net,
104 					   const struct in6_addr *prefix, int prefixlen,
105 					   const struct in6_addr *gwaddr, int ifindex,
106 					   unsigned int pref);
107 static struct rt6_info *rt6_get_route_info(struct net *net,
108 					   const struct in6_addr *prefix, int prefixlen,
109 					   const struct in6_addr *gwaddr, int ifindex);
110 #endif
111 
112 struct uncached_list {
113 	spinlock_t		lock;
114 	struct list_head	head;
115 };
116 
117 static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
118 
119 static void rt6_uncached_list_add(struct rt6_info *rt)
120 {
121 	struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
122 
123 	rt->dst.flags |= DST_NOCACHE;
124 	rt->rt6i_uncached_list = ul;
125 
126 	spin_lock_bh(&ul->lock);
127 	list_add_tail(&rt->rt6i_uncached, &ul->head);
128 	spin_unlock_bh(&ul->lock);
129 }
130 
131 static void rt6_uncached_list_del(struct rt6_info *rt)
132 {
133 	if (!list_empty(&rt->rt6i_uncached)) {
134 		struct uncached_list *ul = rt->rt6i_uncached_list;
135 
136 		spin_lock_bh(&ul->lock);
137 		list_del(&rt->rt6i_uncached);
138 		spin_unlock_bh(&ul->lock);
139 	}
140 }
141 
142 static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
143 {
144 	struct net_device *loopback_dev = net->loopback_dev;
145 	int cpu;
146 
147 	if (dev == loopback_dev)
148 		return;
149 
150 	for_each_possible_cpu(cpu) {
151 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
152 		struct rt6_info *rt;
153 
154 		spin_lock_bh(&ul->lock);
155 		list_for_each_entry(rt, &ul->head, rt6i_uncached) {
156 			struct inet6_dev *rt_idev = rt->rt6i_idev;
157 			struct net_device *rt_dev = rt->dst.dev;
158 
159 			if (rt_idev->dev == dev) {
160 				rt->rt6i_idev = in6_dev_get(loopback_dev);
161 				in6_dev_put(rt_idev);
162 			}
163 
164 			if (rt_dev == dev) {
165 				rt->dst.dev = loopback_dev;
166 				dev_hold(rt->dst.dev);
167 				dev_put(rt_dev);
168 			}
169 		}
170 		spin_unlock_bh(&ul->lock);
171 	}
172 }
173 
174 static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
175 {
176 	return dst_metrics_write_ptr(rt->dst.from);
177 }
178 
179 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
180 {
181 	struct rt6_info *rt = (struct rt6_info *)dst;
182 
183 	if (rt->rt6i_flags & RTF_PCPU)
184 		return rt6_pcpu_cow_metrics(rt);
185 	else if (rt->rt6i_flags & RTF_CACHE)
186 		return NULL;
187 	else
188 		return dst_cow_metrics_generic(dst, old);
189 }
190 
191 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
192 					     struct sk_buff *skb,
193 					     const void *daddr)
194 {
195 	struct in6_addr *p = &rt->rt6i_gateway;
196 
197 	if (!ipv6_addr_any(p))
198 		return (const void *) p;
199 	else if (skb)
200 		return &ipv6_hdr(skb)->daddr;
201 	return daddr;
202 }
203 
204 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
205 					  struct sk_buff *skb,
206 					  const void *daddr)
207 {
208 	struct rt6_info *rt = (struct rt6_info *) dst;
209 	struct neighbour *n;
210 
211 	daddr = choose_neigh_daddr(rt, skb, daddr);
212 	n = __ipv6_neigh_lookup(dst->dev, daddr);
213 	if (n)
214 		return n;
215 	return neigh_create(&nd_tbl, daddr, dst->dev);
216 }
217 
218 static struct dst_ops ip6_dst_ops_template = {
219 	.family			=	AF_INET6,
220 	.gc			=	ip6_dst_gc,
221 	.gc_thresh		=	1024,
222 	.check			=	ip6_dst_check,
223 	.default_advmss		=	ip6_default_advmss,
224 	.mtu			=	ip6_mtu,
225 	.cow_metrics		=	ipv6_cow_metrics,
226 	.destroy		=	ip6_dst_destroy,
227 	.ifdown			=	ip6_dst_ifdown,
228 	.negative_advice	=	ip6_negative_advice,
229 	.link_failure		=	ip6_link_failure,
230 	.update_pmtu		=	ip6_rt_update_pmtu,
231 	.redirect		=	rt6_do_redirect,
232 	.local_out		=	__ip6_local_out,
233 	.neigh_lookup		=	ip6_neigh_lookup,
234 };
235 
236 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
237 {
238 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
239 
240 	return mtu ? : dst->dev->mtu;
241 }
242 
243 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
244 					 struct sk_buff *skb, u32 mtu)
245 {
246 }
247 
248 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
249 				      struct sk_buff *skb)
250 {
251 }
252 
253 static struct dst_ops ip6_dst_blackhole_ops = {
254 	.family			=	AF_INET6,
255 	.destroy		=	ip6_dst_destroy,
256 	.check			=	ip6_dst_check,
257 	.mtu			=	ip6_blackhole_mtu,
258 	.default_advmss		=	ip6_default_advmss,
259 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
260 	.redirect		=	ip6_rt_blackhole_redirect,
261 	.cow_metrics		=	dst_cow_metrics_generic,
262 	.neigh_lookup		=	ip6_neigh_lookup,
263 };
264 
265 static const u32 ip6_template_metrics[RTAX_MAX] = {
266 	[RTAX_HOPLIMIT - 1] = 0,
267 };
268 
269 static const struct rt6_info ip6_null_entry_template = {
270 	.dst = {
271 		.__refcnt	= ATOMIC_INIT(1),
272 		.__use		= 1,
273 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
274 		.error		= -ENETUNREACH,
275 		.input		= ip6_pkt_discard,
276 		.output		= ip6_pkt_discard_out,
277 	},
278 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
279 	.rt6i_protocol  = RTPROT_KERNEL,
280 	.rt6i_metric	= ~(u32) 0,
281 	.rt6i_ref	= ATOMIC_INIT(1),
282 };
283 
284 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
285 
286 static const struct rt6_info ip6_prohibit_entry_template = {
287 	.dst = {
288 		.__refcnt	= ATOMIC_INIT(1),
289 		.__use		= 1,
290 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
291 		.error		= -EACCES,
292 		.input		= ip6_pkt_prohibit,
293 		.output		= ip6_pkt_prohibit_out,
294 	},
295 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
296 	.rt6i_protocol  = RTPROT_KERNEL,
297 	.rt6i_metric	= ~(u32) 0,
298 	.rt6i_ref	= ATOMIC_INIT(1),
299 };
300 
301 static const struct rt6_info ip6_blk_hole_entry_template = {
302 	.dst = {
303 		.__refcnt	= ATOMIC_INIT(1),
304 		.__use		= 1,
305 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
306 		.error		= -EINVAL,
307 		.input		= dst_discard,
308 		.output		= dst_discard_out,
309 	},
310 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
311 	.rt6i_protocol  = RTPROT_KERNEL,
312 	.rt6i_metric	= ~(u32) 0,
313 	.rt6i_ref	= ATOMIC_INIT(1),
314 };
315 
316 #endif
317 
318 static void rt6_info_init(struct rt6_info *rt)
319 {
320 	struct dst_entry *dst = &rt->dst;
321 
322 	memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
323 	INIT_LIST_HEAD(&rt->rt6i_siblings);
324 	INIT_LIST_HEAD(&rt->rt6i_uncached);
325 }
326 
327 /* allocate dst with ip6_dst_ops */
328 static struct rt6_info *__ip6_dst_alloc(struct net *net,
329 					struct net_device *dev,
330 					int flags)
331 {
332 	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
333 					0, DST_OBSOLETE_FORCE_CHK, flags);
334 
335 	if (rt)
336 		rt6_info_init(rt);
337 
338 	return rt;
339 }
340 
341 struct rt6_info *ip6_dst_alloc(struct net *net,
342 			       struct net_device *dev,
343 			       int flags)
344 {
345 	struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
346 
347 	if (rt) {
348 		rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
349 		if (rt->rt6i_pcpu) {
350 			int cpu;
351 
352 			for_each_possible_cpu(cpu) {
353 				struct rt6_info **p;
354 
355 				p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
356 				/* no one shares rt */
357 				*p =  NULL;
358 			}
359 		} else {
360 			dst_destroy((struct dst_entry *)rt);
361 			return NULL;
362 		}
363 	}
364 
365 	return rt;
366 }
367 EXPORT_SYMBOL(ip6_dst_alloc);
368 
369 static void ip6_dst_destroy(struct dst_entry *dst)
370 {
371 	struct rt6_info *rt = (struct rt6_info *)dst;
372 	struct dst_entry *from = dst->from;
373 	struct inet6_dev *idev;
374 
375 	dst_destroy_metrics_generic(dst);
376 	free_percpu(rt->rt6i_pcpu);
377 	rt6_uncached_list_del(rt);
378 
379 	idev = rt->rt6i_idev;
380 	if (idev) {
381 		rt->rt6i_idev = NULL;
382 		in6_dev_put(idev);
383 	}
384 
385 	dst->from = NULL;
386 	dst_release(from);
387 }
388 
389 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
390 			   int how)
391 {
392 	struct rt6_info *rt = (struct rt6_info *)dst;
393 	struct inet6_dev *idev = rt->rt6i_idev;
394 	struct net_device *loopback_dev =
395 		dev_net(dev)->loopback_dev;
396 
397 	if (dev != loopback_dev) {
398 		if (idev && idev->dev == dev) {
399 			struct inet6_dev *loopback_idev =
400 				in6_dev_get(loopback_dev);
401 			if (loopback_idev) {
402 				rt->rt6i_idev = loopback_idev;
403 				in6_dev_put(idev);
404 			}
405 		}
406 	}
407 }
408 
409 static bool __rt6_check_expired(const struct rt6_info *rt)
410 {
411 	if (rt->rt6i_flags & RTF_EXPIRES)
412 		return time_after(jiffies, rt->dst.expires);
413 	else
414 		return false;
415 }
416 
417 static bool rt6_check_expired(const struct rt6_info *rt)
418 {
419 	if (rt->rt6i_flags & RTF_EXPIRES) {
420 		if (time_after(jiffies, rt->dst.expires))
421 			return true;
422 	} else if (rt->dst.from) {
423 		return rt6_check_expired((struct rt6_info *) rt->dst.from);
424 	}
425 	return false;
426 }
427 
428 /* Multipath route selection:
429  *   Hash based function using packet header and flowlabel.
430  * Adapted from fib_info_hashfn()
431  */
432 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
433 			       const struct flowi6 *fl6)
434 {
435 	return get_hash_from_flowi6(fl6) % candidate_count;
436 }
437 
438 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
439 					     struct flowi6 *fl6, int oif,
440 					     int strict)
441 {
442 	struct rt6_info *sibling, *next_sibling;
443 	int route_choosen;
444 
445 	route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
446 	/* Don't change the route, if route_choosen == 0
447 	 * (siblings does not include ourself)
448 	 */
449 	if (route_choosen)
450 		list_for_each_entry_safe(sibling, next_sibling,
451 				&match->rt6i_siblings, rt6i_siblings) {
452 			route_choosen--;
453 			if (route_choosen == 0) {
454 				if (rt6_score_route(sibling, oif, strict) < 0)
455 					break;
456 				match = sibling;
457 				break;
458 			}
459 		}
460 	return match;
461 }
462 
463 /*
464  *	Route lookup. Any table->tb6_lock is implied.
465  */
466 
467 static inline struct rt6_info *rt6_device_match(struct net *net,
468 						    struct rt6_info *rt,
469 						    const struct in6_addr *saddr,
470 						    int oif,
471 						    int flags)
472 {
473 	struct rt6_info *local = NULL;
474 	struct rt6_info *sprt;
475 
476 	if (!oif && ipv6_addr_any(saddr))
477 		goto out;
478 
479 	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
480 		struct net_device *dev = sprt->dst.dev;
481 
482 		if (oif) {
483 			if (dev->ifindex == oif)
484 				return sprt;
485 			if (dev->flags & IFF_LOOPBACK) {
486 				if (!sprt->rt6i_idev ||
487 				    sprt->rt6i_idev->dev->ifindex != oif) {
488 					if (flags & RT6_LOOKUP_F_IFACE)
489 						continue;
490 					if (local &&
491 					    local->rt6i_idev->dev->ifindex == oif)
492 						continue;
493 				}
494 				local = sprt;
495 			}
496 		} else {
497 			if (ipv6_chk_addr(net, saddr, dev,
498 					  flags & RT6_LOOKUP_F_IFACE))
499 				return sprt;
500 		}
501 	}
502 
503 	if (oif) {
504 		if (local)
505 			return local;
506 
507 		if (flags & RT6_LOOKUP_F_IFACE)
508 			return net->ipv6.ip6_null_entry;
509 	}
510 out:
511 	return rt;
512 }
513 
514 #ifdef CONFIG_IPV6_ROUTER_PREF
515 struct __rt6_probe_work {
516 	struct work_struct work;
517 	struct in6_addr target;
518 	struct net_device *dev;
519 };
520 
521 static void rt6_probe_deferred(struct work_struct *w)
522 {
523 	struct in6_addr mcaddr;
524 	struct __rt6_probe_work *work =
525 		container_of(w, struct __rt6_probe_work, work);
526 
527 	addrconf_addr_solict_mult(&work->target, &mcaddr);
528 	ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL);
529 	dev_put(work->dev);
530 	kfree(work);
531 }
532 
533 static void rt6_probe(struct rt6_info *rt)
534 {
535 	struct __rt6_probe_work *work;
536 	struct neighbour *neigh;
537 	/*
538 	 * Okay, this does not seem to be appropriate
539 	 * for now, however, we need to check if it
540 	 * is really so; aka Router Reachability Probing.
541 	 *
542 	 * Router Reachability Probe MUST be rate-limited
543 	 * to no more than one per minute.
544 	 */
545 	if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
546 		return;
547 	rcu_read_lock_bh();
548 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
549 	if (neigh) {
550 		if (neigh->nud_state & NUD_VALID)
551 			goto out;
552 
553 		work = NULL;
554 		write_lock(&neigh->lock);
555 		if (!(neigh->nud_state & NUD_VALID) &&
556 		    time_after(jiffies,
557 			       neigh->updated +
558 			       rt->rt6i_idev->cnf.rtr_probe_interval)) {
559 			work = kmalloc(sizeof(*work), GFP_ATOMIC);
560 			if (work)
561 				__neigh_set_probe_once(neigh);
562 		}
563 		write_unlock(&neigh->lock);
564 	} else {
565 		work = kmalloc(sizeof(*work), GFP_ATOMIC);
566 	}
567 
568 	if (work) {
569 		INIT_WORK(&work->work, rt6_probe_deferred);
570 		work->target = rt->rt6i_gateway;
571 		dev_hold(rt->dst.dev);
572 		work->dev = rt->dst.dev;
573 		schedule_work(&work->work);
574 	}
575 
576 out:
577 	rcu_read_unlock_bh();
578 }
579 #else
580 static inline void rt6_probe(struct rt6_info *rt)
581 {
582 }
583 #endif
584 
585 /*
586  * Default Router Selection (RFC 2461 6.3.6)
587  */
588 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
589 {
590 	struct net_device *dev = rt->dst.dev;
591 	if (!oif || dev->ifindex == oif)
592 		return 2;
593 	if ((dev->flags & IFF_LOOPBACK) &&
594 	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
595 		return 1;
596 	return 0;
597 }
598 
599 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
600 {
601 	struct neighbour *neigh;
602 	enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
603 
604 	if (rt->rt6i_flags & RTF_NONEXTHOP ||
605 	    !(rt->rt6i_flags & RTF_GATEWAY))
606 		return RT6_NUD_SUCCEED;
607 
608 	rcu_read_lock_bh();
609 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
610 	if (neigh) {
611 		read_lock(&neigh->lock);
612 		if (neigh->nud_state & NUD_VALID)
613 			ret = RT6_NUD_SUCCEED;
614 #ifdef CONFIG_IPV6_ROUTER_PREF
615 		else if (!(neigh->nud_state & NUD_FAILED))
616 			ret = RT6_NUD_SUCCEED;
617 		else
618 			ret = RT6_NUD_FAIL_PROBE;
619 #endif
620 		read_unlock(&neigh->lock);
621 	} else {
622 		ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
623 		      RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
624 	}
625 	rcu_read_unlock_bh();
626 
627 	return ret;
628 }
629 
630 static int rt6_score_route(struct rt6_info *rt, int oif,
631 			   int strict)
632 {
633 	int m;
634 
635 	m = rt6_check_dev(rt, oif);
636 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
637 		return RT6_NUD_FAIL_HARD;
638 #ifdef CONFIG_IPV6_ROUTER_PREF
639 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
640 #endif
641 	if (strict & RT6_LOOKUP_F_REACHABLE) {
642 		int n = rt6_check_neigh(rt);
643 		if (n < 0)
644 			return n;
645 	}
646 	return m;
647 }
648 
649 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
650 				   int *mpri, struct rt6_info *match,
651 				   bool *do_rr)
652 {
653 	int m;
654 	bool match_do_rr = false;
655 	struct inet6_dev *idev = rt->rt6i_idev;
656 	struct net_device *dev = rt->dst.dev;
657 
658 	if (dev && !netif_carrier_ok(dev) &&
659 	    idev->cnf.ignore_routes_with_linkdown)
660 		goto out;
661 
662 	if (rt6_check_expired(rt))
663 		goto out;
664 
665 	m = rt6_score_route(rt, oif, strict);
666 	if (m == RT6_NUD_FAIL_DO_RR) {
667 		match_do_rr = true;
668 		m = 0; /* lowest valid score */
669 	} else if (m == RT6_NUD_FAIL_HARD) {
670 		goto out;
671 	}
672 
673 	if (strict & RT6_LOOKUP_F_REACHABLE)
674 		rt6_probe(rt);
675 
676 	/* note that m can be RT6_NUD_FAIL_PROBE at this point */
677 	if (m > *mpri) {
678 		*do_rr = match_do_rr;
679 		*mpri = m;
680 		match = rt;
681 	}
682 out:
683 	return match;
684 }
685 
686 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
687 				     struct rt6_info *rr_head,
688 				     u32 metric, int oif, int strict,
689 				     bool *do_rr)
690 {
691 	struct rt6_info *rt, *match, *cont;
692 	int mpri = -1;
693 
694 	match = NULL;
695 	cont = NULL;
696 	for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
697 		if (rt->rt6i_metric != metric) {
698 			cont = rt;
699 			break;
700 		}
701 
702 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
703 	}
704 
705 	for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
706 		if (rt->rt6i_metric != metric) {
707 			cont = rt;
708 			break;
709 		}
710 
711 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
712 	}
713 
714 	if (match || !cont)
715 		return match;
716 
717 	for (rt = cont; rt; rt = rt->dst.rt6_next)
718 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
719 
720 	return match;
721 }
722 
723 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
724 {
725 	struct rt6_info *match, *rt0;
726 	struct net *net;
727 	bool do_rr = false;
728 
729 	rt0 = fn->rr_ptr;
730 	if (!rt0)
731 		fn->rr_ptr = rt0 = fn->leaf;
732 
733 	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
734 			     &do_rr);
735 
736 	if (do_rr) {
737 		struct rt6_info *next = rt0->dst.rt6_next;
738 
739 		/* no entries matched; do round-robin */
740 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
741 			next = fn->leaf;
742 
743 		if (next != rt0)
744 			fn->rr_ptr = next;
745 	}
746 
747 	net = dev_net(rt0->dst.dev);
748 	return match ? match : net->ipv6.ip6_null_entry;
749 }
750 
751 static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
752 {
753 	return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
754 }
755 
756 #ifdef CONFIG_IPV6_ROUTE_INFO
757 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
758 		  const struct in6_addr *gwaddr)
759 {
760 	struct net *net = dev_net(dev);
761 	struct route_info *rinfo = (struct route_info *) opt;
762 	struct in6_addr prefix_buf, *prefix;
763 	unsigned int pref;
764 	unsigned long lifetime;
765 	struct rt6_info *rt;
766 
767 	if (len < sizeof(struct route_info)) {
768 		return -EINVAL;
769 	}
770 
771 	/* Sanity check for prefix_len and length */
772 	if (rinfo->length > 3) {
773 		return -EINVAL;
774 	} else if (rinfo->prefix_len > 128) {
775 		return -EINVAL;
776 	} else if (rinfo->prefix_len > 64) {
777 		if (rinfo->length < 2) {
778 			return -EINVAL;
779 		}
780 	} else if (rinfo->prefix_len > 0) {
781 		if (rinfo->length < 1) {
782 			return -EINVAL;
783 		}
784 	}
785 
786 	pref = rinfo->route_pref;
787 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
788 		return -EINVAL;
789 
790 	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
791 
792 	if (rinfo->length == 3)
793 		prefix = (struct in6_addr *)rinfo->prefix;
794 	else {
795 		/* this function is safe */
796 		ipv6_addr_prefix(&prefix_buf,
797 				 (struct in6_addr *)rinfo->prefix,
798 				 rinfo->prefix_len);
799 		prefix = &prefix_buf;
800 	}
801 
802 	if (rinfo->prefix_len == 0)
803 		rt = rt6_get_dflt_router(gwaddr, dev);
804 	else
805 		rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
806 					gwaddr, dev->ifindex);
807 
808 	if (rt && !lifetime) {
809 		ip6_del_rt(rt);
810 		rt = NULL;
811 	}
812 
813 	if (!rt && lifetime)
814 		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
815 					pref);
816 	else if (rt)
817 		rt->rt6i_flags = RTF_ROUTEINFO |
818 				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
819 
820 	if (rt) {
821 		if (!addrconf_finite_timeout(lifetime))
822 			rt6_clean_expires(rt);
823 		else
824 			rt6_set_expires(rt, jiffies + HZ * lifetime);
825 
826 		ip6_rt_put(rt);
827 	}
828 	return 0;
829 }
830 #endif
831 
832 static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
833 					struct in6_addr *saddr)
834 {
835 	struct fib6_node *pn;
836 	while (1) {
837 		if (fn->fn_flags & RTN_TL_ROOT)
838 			return NULL;
839 		pn = fn->parent;
840 		if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
841 			fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
842 		else
843 			fn = pn;
844 		if (fn->fn_flags & RTN_RTINFO)
845 			return fn;
846 	}
847 }
848 
849 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
850 					     struct fib6_table *table,
851 					     struct flowi6 *fl6, int flags)
852 {
853 	struct fib6_node *fn;
854 	struct rt6_info *rt;
855 
856 	read_lock_bh(&table->tb6_lock);
857 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
858 restart:
859 	rt = fn->leaf;
860 	rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
861 	if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
862 		rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
863 	if (rt == net->ipv6.ip6_null_entry) {
864 		fn = fib6_backtrack(fn, &fl6->saddr);
865 		if (fn)
866 			goto restart;
867 	}
868 	dst_use(&rt->dst, jiffies);
869 	read_unlock_bh(&table->tb6_lock);
870 
871 	trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
872 
873 	return rt;
874 
875 }
876 
877 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
878 				    int flags)
879 {
880 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
881 }
882 EXPORT_SYMBOL_GPL(ip6_route_lookup);
883 
884 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
885 			    const struct in6_addr *saddr, int oif, int strict)
886 {
887 	struct flowi6 fl6 = {
888 		.flowi6_oif = oif,
889 		.daddr = *daddr,
890 	};
891 	struct dst_entry *dst;
892 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
893 
894 	if (saddr) {
895 		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
896 		flags |= RT6_LOOKUP_F_HAS_SADDR;
897 	}
898 
899 	dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
900 	if (dst->error == 0)
901 		return (struct rt6_info *) dst;
902 
903 	dst_release(dst);
904 
905 	return NULL;
906 }
907 EXPORT_SYMBOL(rt6_lookup);
908 
909 /* ip6_ins_rt is called with FREE table->tb6_lock.
910    It takes new route entry, the addition fails by any reason the
911    route is freed. In any case, if caller does not hold it, it may
912    be destroyed.
913  */
914 
915 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
916 			struct mx6_config *mxc)
917 {
918 	int err;
919 	struct fib6_table *table;
920 
921 	table = rt->rt6i_table;
922 	write_lock_bh(&table->tb6_lock);
923 	err = fib6_add(&table->tb6_root, rt, info, mxc);
924 	write_unlock_bh(&table->tb6_lock);
925 
926 	return err;
927 }
928 
929 int ip6_ins_rt(struct rt6_info *rt)
930 {
931 	struct nl_info info = {	.nl_net = dev_net(rt->dst.dev), };
932 	struct mx6_config mxc = { .mx = NULL, };
933 
934 	return __ip6_ins_rt(rt, &info, &mxc);
935 }
936 
937 static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
938 					   const struct in6_addr *daddr,
939 					   const struct in6_addr *saddr)
940 {
941 	struct rt6_info *rt;
942 
943 	/*
944 	 *	Clone the route.
945 	 */
946 
947 	if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
948 		ort = (struct rt6_info *)ort->dst.from;
949 
950 	rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0);
951 
952 	if (!rt)
953 		return NULL;
954 
955 	ip6_rt_copy_init(rt, ort);
956 	rt->rt6i_flags |= RTF_CACHE;
957 	rt->rt6i_metric = 0;
958 	rt->dst.flags |= DST_HOST;
959 	rt->rt6i_dst.addr = *daddr;
960 	rt->rt6i_dst.plen = 128;
961 
962 	if (!rt6_is_gw_or_nonexthop(ort)) {
963 		if (ort->rt6i_dst.plen != 128 &&
964 		    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
965 			rt->rt6i_flags |= RTF_ANYCAST;
966 #ifdef CONFIG_IPV6_SUBTREES
967 		if (rt->rt6i_src.plen && saddr) {
968 			rt->rt6i_src.addr = *saddr;
969 			rt->rt6i_src.plen = 128;
970 		}
971 #endif
972 	}
973 
974 	return rt;
975 }
976 
977 static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
978 {
979 	struct rt6_info *pcpu_rt;
980 
981 	pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
982 				  rt->dst.dev, rt->dst.flags);
983 
984 	if (!pcpu_rt)
985 		return NULL;
986 	ip6_rt_copy_init(pcpu_rt, rt);
987 	pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
988 	pcpu_rt->rt6i_flags |= RTF_PCPU;
989 	return pcpu_rt;
990 }
991 
992 /* It should be called with read_lock_bh(&tb6_lock) acquired */
993 static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
994 {
995 	struct rt6_info *pcpu_rt, **p;
996 
997 	p = this_cpu_ptr(rt->rt6i_pcpu);
998 	pcpu_rt = *p;
999 
1000 	if (pcpu_rt) {
1001 		dst_hold(&pcpu_rt->dst);
1002 		rt6_dst_from_metrics_check(pcpu_rt);
1003 	}
1004 	return pcpu_rt;
1005 }
1006 
1007 static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
1008 {
1009 	struct fib6_table *table = rt->rt6i_table;
1010 	struct rt6_info *pcpu_rt, *prev, **p;
1011 
1012 	pcpu_rt = ip6_rt_pcpu_alloc(rt);
1013 	if (!pcpu_rt) {
1014 		struct net *net = dev_net(rt->dst.dev);
1015 
1016 		dst_hold(&net->ipv6.ip6_null_entry->dst);
1017 		return net->ipv6.ip6_null_entry;
1018 	}
1019 
1020 	read_lock_bh(&table->tb6_lock);
1021 	if (rt->rt6i_pcpu) {
1022 		p = this_cpu_ptr(rt->rt6i_pcpu);
1023 		prev = cmpxchg(p, NULL, pcpu_rt);
1024 		if (prev) {
1025 			/* If someone did it before us, return prev instead */
1026 			dst_destroy(&pcpu_rt->dst);
1027 			pcpu_rt = prev;
1028 		}
1029 	} else {
1030 		/* rt has been removed from the fib6 tree
1031 		 * before we have a chance to acquire the read_lock.
1032 		 * In this case, don't brother to create a pcpu rt
1033 		 * since rt is going away anyway.  The next
1034 		 * dst_check() will trigger a re-lookup.
1035 		 */
1036 		dst_destroy(&pcpu_rt->dst);
1037 		pcpu_rt = rt;
1038 	}
1039 	dst_hold(&pcpu_rt->dst);
1040 	rt6_dst_from_metrics_check(pcpu_rt);
1041 	read_unlock_bh(&table->tb6_lock);
1042 	return pcpu_rt;
1043 }
1044 
1045 struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1046 			       int oif, struct flowi6 *fl6, int flags)
1047 {
1048 	struct fib6_node *fn, *saved_fn;
1049 	struct rt6_info *rt;
1050 	int strict = 0;
1051 
1052 	strict |= flags & RT6_LOOKUP_F_IFACE;
1053 	if (net->ipv6.devconf_all->forwarding == 0)
1054 		strict |= RT6_LOOKUP_F_REACHABLE;
1055 
1056 	read_lock_bh(&table->tb6_lock);
1057 
1058 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1059 	saved_fn = fn;
1060 
1061 	if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1062 		oif = 0;
1063 
1064 redo_rt6_select:
1065 	rt = rt6_select(fn, oif, strict);
1066 	if (rt->rt6i_nsiblings)
1067 		rt = rt6_multipath_select(rt, fl6, oif, strict);
1068 	if (rt == net->ipv6.ip6_null_entry) {
1069 		fn = fib6_backtrack(fn, &fl6->saddr);
1070 		if (fn)
1071 			goto redo_rt6_select;
1072 		else if (strict & RT6_LOOKUP_F_REACHABLE) {
1073 			/* also consider unreachable route */
1074 			strict &= ~RT6_LOOKUP_F_REACHABLE;
1075 			fn = saved_fn;
1076 			goto redo_rt6_select;
1077 		}
1078 	}
1079 
1080 
1081 	if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
1082 		dst_use(&rt->dst, jiffies);
1083 		read_unlock_bh(&table->tb6_lock);
1084 
1085 		rt6_dst_from_metrics_check(rt);
1086 
1087 		trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
1088 		return rt;
1089 	} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1090 			    !(rt->rt6i_flags & RTF_GATEWAY))) {
1091 		/* Create a RTF_CACHE clone which will not be
1092 		 * owned by the fib6 tree.  It is for the special case where
1093 		 * the daddr in the skb during the neighbor look-up is different
1094 		 * from the fl6->daddr used to look-up route here.
1095 		 */
1096 
1097 		struct rt6_info *uncached_rt;
1098 
1099 		dst_use(&rt->dst, jiffies);
1100 		read_unlock_bh(&table->tb6_lock);
1101 
1102 		uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1103 		dst_release(&rt->dst);
1104 
1105 		if (uncached_rt)
1106 			rt6_uncached_list_add(uncached_rt);
1107 		else
1108 			uncached_rt = net->ipv6.ip6_null_entry;
1109 
1110 		dst_hold(&uncached_rt->dst);
1111 
1112 		trace_fib6_table_lookup(net, uncached_rt, table->tb6_id, fl6);
1113 		return uncached_rt;
1114 
1115 	} else {
1116 		/* Get a percpu copy */
1117 
1118 		struct rt6_info *pcpu_rt;
1119 
1120 		rt->dst.lastuse = jiffies;
1121 		rt->dst.__use++;
1122 		pcpu_rt = rt6_get_pcpu_route(rt);
1123 
1124 		if (pcpu_rt) {
1125 			read_unlock_bh(&table->tb6_lock);
1126 		} else {
1127 			/* We have to do the read_unlock first
1128 			 * because rt6_make_pcpu_route() may trigger
1129 			 * ip6_dst_gc() which will take the write_lock.
1130 			 */
1131 			dst_hold(&rt->dst);
1132 			read_unlock_bh(&table->tb6_lock);
1133 			pcpu_rt = rt6_make_pcpu_route(rt);
1134 			dst_release(&rt->dst);
1135 		}
1136 
1137 		trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6);
1138 		return pcpu_rt;
1139 
1140 	}
1141 }
1142 EXPORT_SYMBOL_GPL(ip6_pol_route);
1143 
1144 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
1145 					    struct flowi6 *fl6, int flags)
1146 {
1147 	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
1148 }
1149 
1150 struct dst_entry *ip6_route_input_lookup(struct net *net,
1151 					 struct net_device *dev,
1152 					 struct flowi6 *fl6, int flags)
1153 {
1154 	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1155 		flags |= RT6_LOOKUP_F_IFACE;
1156 
1157 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1158 }
1159 EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
1160 
1161 void ip6_route_input(struct sk_buff *skb)
1162 {
1163 	const struct ipv6hdr *iph = ipv6_hdr(skb);
1164 	struct net *net = dev_net(skb->dev);
1165 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1166 	struct ip_tunnel_info *tun_info;
1167 	struct flowi6 fl6 = {
1168 		.flowi6_iif = skb->dev->ifindex,
1169 		.daddr = iph->daddr,
1170 		.saddr = iph->saddr,
1171 		.flowlabel = ip6_flowinfo(iph),
1172 		.flowi6_mark = skb->mark,
1173 		.flowi6_proto = iph->nexthdr,
1174 	};
1175 
1176 	tun_info = skb_tunnel_info(skb);
1177 	if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
1178 		fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
1179 	skb_dst_drop(skb);
1180 	skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1181 }
1182 
1183 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1184 					     struct flowi6 *fl6, int flags)
1185 {
1186 	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1187 }
1188 
1189 struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
1190 					 struct flowi6 *fl6, int flags)
1191 {
1192 	bool any_src;
1193 
1194 	if (rt6_need_strict(&fl6->daddr)) {
1195 		struct dst_entry *dst;
1196 
1197 		dst = l3mdev_link_scope_lookup(net, fl6);
1198 		if (dst)
1199 			return dst;
1200 	}
1201 
1202 	fl6->flowi6_iif = LOOPBACK_IFINDEX;
1203 
1204 	any_src = ipv6_addr_any(&fl6->saddr);
1205 	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
1206 	    (fl6->flowi6_oif && any_src))
1207 		flags |= RT6_LOOKUP_F_IFACE;
1208 
1209 	if (!any_src)
1210 		flags |= RT6_LOOKUP_F_HAS_SADDR;
1211 	else if (sk)
1212 		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1213 
1214 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1215 }
1216 EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1217 
1218 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1219 {
1220 	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1221 	struct dst_entry *new = NULL;
1222 
1223 	rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1224 	if (rt) {
1225 		rt6_info_init(rt);
1226 
1227 		new = &rt->dst;
1228 		new->__use = 1;
1229 		new->input = dst_discard;
1230 		new->output = dst_discard_out;
1231 
1232 		dst_copy_metrics(new, &ort->dst);
1233 		rt->rt6i_idev = ort->rt6i_idev;
1234 		if (rt->rt6i_idev)
1235 			in6_dev_hold(rt->rt6i_idev);
1236 
1237 		rt->rt6i_gateway = ort->rt6i_gateway;
1238 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
1239 		rt->rt6i_metric = 0;
1240 
1241 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1242 #ifdef CONFIG_IPV6_SUBTREES
1243 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1244 #endif
1245 
1246 		dst_free(new);
1247 	}
1248 
1249 	dst_release(dst_orig);
1250 	return new ? new : ERR_PTR(-ENOMEM);
1251 }
1252 
1253 /*
1254  *	Destination cache support functions
1255  */
1256 
1257 static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1258 {
1259 	if (rt->dst.from &&
1260 	    dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1261 		dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1262 }
1263 
1264 static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1265 {
1266 	if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1267 		return NULL;
1268 
1269 	if (rt6_check_expired(rt))
1270 		return NULL;
1271 
1272 	return &rt->dst;
1273 }
1274 
1275 static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1276 {
1277 	if (!__rt6_check_expired(rt) &&
1278 	    rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1279 	    rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1280 		return &rt->dst;
1281 	else
1282 		return NULL;
1283 }
1284 
1285 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1286 {
1287 	struct rt6_info *rt;
1288 
1289 	rt = (struct rt6_info *) dst;
1290 
1291 	/* All IPV6 dsts are created with ->obsolete set to the value
1292 	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1293 	 * into this function always.
1294 	 */
1295 
1296 	rt6_dst_from_metrics_check(rt);
1297 
1298 	if (rt->rt6i_flags & RTF_PCPU ||
1299 	    (unlikely(dst->flags & DST_NOCACHE) && rt->dst.from))
1300 		return rt6_dst_from_check(rt, cookie);
1301 	else
1302 		return rt6_check(rt, cookie);
1303 }
1304 
1305 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1306 {
1307 	struct rt6_info *rt = (struct rt6_info *) dst;
1308 
1309 	if (rt) {
1310 		if (rt->rt6i_flags & RTF_CACHE) {
1311 			if (rt6_check_expired(rt)) {
1312 				ip6_del_rt(rt);
1313 				dst = NULL;
1314 			}
1315 		} else {
1316 			dst_release(dst);
1317 			dst = NULL;
1318 		}
1319 	}
1320 	return dst;
1321 }
1322 
1323 static void ip6_link_failure(struct sk_buff *skb)
1324 {
1325 	struct rt6_info *rt;
1326 
1327 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1328 
1329 	rt = (struct rt6_info *) skb_dst(skb);
1330 	if (rt) {
1331 		if (rt->rt6i_flags & RTF_CACHE) {
1332 			dst_hold(&rt->dst);
1333 			ip6_del_rt(rt);
1334 		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1335 			rt->rt6i_node->fn_sernum = -1;
1336 		}
1337 	}
1338 }
1339 
1340 static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1341 {
1342 	struct net *net = dev_net(rt->dst.dev);
1343 
1344 	rt->rt6i_flags |= RTF_MODIFIED;
1345 	rt->rt6i_pmtu = mtu;
1346 	rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1347 }
1348 
1349 static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
1350 {
1351 	return !(rt->rt6i_flags & RTF_CACHE) &&
1352 		(rt->rt6i_flags & RTF_PCPU || rt->rt6i_node);
1353 }
1354 
1355 static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1356 				 const struct ipv6hdr *iph, u32 mtu)
1357 {
1358 	struct rt6_info *rt6 = (struct rt6_info *)dst;
1359 
1360 	if (rt6->rt6i_flags & RTF_LOCAL)
1361 		return;
1362 
1363 	dst_confirm(dst);
1364 	mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1365 	if (mtu >= dst_mtu(dst))
1366 		return;
1367 
1368 	if (!rt6_cache_allowed_for_pmtu(rt6)) {
1369 		rt6_do_update_pmtu(rt6, mtu);
1370 	} else {
1371 		const struct in6_addr *daddr, *saddr;
1372 		struct rt6_info *nrt6;
1373 
1374 		if (iph) {
1375 			daddr = &iph->daddr;
1376 			saddr = &iph->saddr;
1377 		} else if (sk) {
1378 			daddr = &sk->sk_v6_daddr;
1379 			saddr = &inet6_sk(sk)->saddr;
1380 		} else {
1381 			return;
1382 		}
1383 		nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1384 		if (nrt6) {
1385 			rt6_do_update_pmtu(nrt6, mtu);
1386 
1387 			/* ip6_ins_rt(nrt6) will bump the
1388 			 * rt6->rt6i_node->fn_sernum
1389 			 * which will fail the next rt6_check() and
1390 			 * invalidate the sk->sk_dst_cache.
1391 			 */
1392 			ip6_ins_rt(nrt6);
1393 		}
1394 	}
1395 }
1396 
1397 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1398 			       struct sk_buff *skb, u32 mtu)
1399 {
1400 	__ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
1401 }
1402 
1403 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1404 		     int oif, u32 mark)
1405 {
1406 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1407 	struct dst_entry *dst;
1408 	struct flowi6 fl6;
1409 
1410 	memset(&fl6, 0, sizeof(fl6));
1411 	fl6.flowi6_oif = oif;
1412 	fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
1413 	fl6.daddr = iph->daddr;
1414 	fl6.saddr = iph->saddr;
1415 	fl6.flowlabel = ip6_flowinfo(iph);
1416 
1417 	dst = ip6_route_output(net, NULL, &fl6);
1418 	if (!dst->error)
1419 		__ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
1420 	dst_release(dst);
1421 }
1422 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1423 
1424 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1425 {
1426 	struct dst_entry *dst;
1427 
1428 	ip6_update_pmtu(skb, sock_net(sk), mtu,
1429 			sk->sk_bound_dev_if, sk->sk_mark);
1430 
1431 	dst = __sk_dst_get(sk);
1432 	if (!dst || !dst->obsolete ||
1433 	    dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
1434 		return;
1435 
1436 	bh_lock_sock(sk);
1437 	if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
1438 		ip6_datagram_dst_update(sk, false);
1439 	bh_unlock_sock(sk);
1440 }
1441 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1442 
1443 /* Handle redirects */
1444 struct ip6rd_flowi {
1445 	struct flowi6 fl6;
1446 	struct in6_addr gateway;
1447 };
1448 
1449 static struct rt6_info *__ip6_route_redirect(struct net *net,
1450 					     struct fib6_table *table,
1451 					     struct flowi6 *fl6,
1452 					     int flags)
1453 {
1454 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1455 	struct rt6_info *rt;
1456 	struct fib6_node *fn;
1457 
1458 	/* Get the "current" route for this destination and
1459 	 * check if the redirect has come from approriate router.
1460 	 *
1461 	 * RFC 4861 specifies that redirects should only be
1462 	 * accepted if they come from the nexthop to the target.
1463 	 * Due to the way the routes are chosen, this notion
1464 	 * is a bit fuzzy and one might need to check all possible
1465 	 * routes.
1466 	 */
1467 
1468 	read_lock_bh(&table->tb6_lock);
1469 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1470 restart:
1471 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1472 		if (rt6_check_expired(rt))
1473 			continue;
1474 		if (rt->dst.error)
1475 			break;
1476 		if (!(rt->rt6i_flags & RTF_GATEWAY))
1477 			continue;
1478 		if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1479 			continue;
1480 		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1481 			continue;
1482 		break;
1483 	}
1484 
1485 	if (!rt)
1486 		rt = net->ipv6.ip6_null_entry;
1487 	else if (rt->dst.error) {
1488 		rt = net->ipv6.ip6_null_entry;
1489 		goto out;
1490 	}
1491 
1492 	if (rt == net->ipv6.ip6_null_entry) {
1493 		fn = fib6_backtrack(fn, &fl6->saddr);
1494 		if (fn)
1495 			goto restart;
1496 	}
1497 
1498 out:
1499 	dst_hold(&rt->dst);
1500 
1501 	read_unlock_bh(&table->tb6_lock);
1502 
1503 	trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
1504 	return rt;
1505 };
1506 
1507 static struct dst_entry *ip6_route_redirect(struct net *net,
1508 					const struct flowi6 *fl6,
1509 					const struct in6_addr *gateway)
1510 {
1511 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1512 	struct ip6rd_flowi rdfl;
1513 
1514 	rdfl.fl6 = *fl6;
1515 	rdfl.gateway = *gateway;
1516 
1517 	return fib6_rule_lookup(net, &rdfl.fl6,
1518 				flags, __ip6_route_redirect);
1519 }
1520 
1521 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1522 {
1523 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1524 	struct dst_entry *dst;
1525 	struct flowi6 fl6;
1526 
1527 	memset(&fl6, 0, sizeof(fl6));
1528 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
1529 	fl6.flowi6_oif = oif;
1530 	fl6.flowi6_mark = mark;
1531 	fl6.daddr = iph->daddr;
1532 	fl6.saddr = iph->saddr;
1533 	fl6.flowlabel = ip6_flowinfo(iph);
1534 
1535 	dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1536 	rt6_do_redirect(dst, NULL, skb);
1537 	dst_release(dst);
1538 }
1539 EXPORT_SYMBOL_GPL(ip6_redirect);
1540 
1541 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1542 			    u32 mark)
1543 {
1544 	const struct ipv6hdr *iph = ipv6_hdr(skb);
1545 	const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1546 	struct dst_entry *dst;
1547 	struct flowi6 fl6;
1548 
1549 	memset(&fl6, 0, sizeof(fl6));
1550 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
1551 	fl6.flowi6_oif = oif;
1552 	fl6.flowi6_mark = mark;
1553 	fl6.daddr = msg->dest;
1554 	fl6.saddr = iph->daddr;
1555 
1556 	dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1557 	rt6_do_redirect(dst, NULL, skb);
1558 	dst_release(dst);
1559 }
1560 
1561 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1562 {
1563 	ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1564 }
1565 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1566 
1567 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1568 {
1569 	struct net_device *dev = dst->dev;
1570 	unsigned int mtu = dst_mtu(dst);
1571 	struct net *net = dev_net(dev);
1572 
1573 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1574 
1575 	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1576 		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1577 
1578 	/*
1579 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1580 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1581 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
1582 	 * rely only on pmtu discovery"
1583 	 */
1584 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1585 		mtu = IPV6_MAXPLEN;
1586 	return mtu;
1587 }
1588 
1589 static unsigned int ip6_mtu(const struct dst_entry *dst)
1590 {
1591 	const struct rt6_info *rt = (const struct rt6_info *)dst;
1592 	unsigned int mtu = rt->rt6i_pmtu;
1593 	struct inet6_dev *idev;
1594 
1595 	if (mtu)
1596 		goto out;
1597 
1598 	mtu = dst_metric_raw(dst, RTAX_MTU);
1599 	if (mtu)
1600 		goto out;
1601 
1602 	mtu = IPV6_MIN_MTU;
1603 
1604 	rcu_read_lock();
1605 	idev = __in6_dev_get(dst->dev);
1606 	if (idev)
1607 		mtu = idev->cnf.mtu6;
1608 	rcu_read_unlock();
1609 
1610 out:
1611 	mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1612 
1613 	return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
1614 }
1615 
1616 static struct dst_entry *icmp6_dst_gc_list;
1617 static DEFINE_SPINLOCK(icmp6_dst_lock);
1618 
1619 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1620 				  struct flowi6 *fl6)
1621 {
1622 	struct dst_entry *dst;
1623 	struct rt6_info *rt;
1624 	struct inet6_dev *idev = in6_dev_get(dev);
1625 	struct net *net = dev_net(dev);
1626 
1627 	if (unlikely(!idev))
1628 		return ERR_PTR(-ENODEV);
1629 
1630 	rt = ip6_dst_alloc(net, dev, 0);
1631 	if (unlikely(!rt)) {
1632 		in6_dev_put(idev);
1633 		dst = ERR_PTR(-ENOMEM);
1634 		goto out;
1635 	}
1636 
1637 	rt->dst.flags |= DST_HOST;
1638 	rt->dst.output  = ip6_output;
1639 	atomic_set(&rt->dst.__refcnt, 1);
1640 	rt->rt6i_gateway  = fl6->daddr;
1641 	rt->rt6i_dst.addr = fl6->daddr;
1642 	rt->rt6i_dst.plen = 128;
1643 	rt->rt6i_idev     = idev;
1644 	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1645 
1646 	spin_lock_bh(&icmp6_dst_lock);
1647 	rt->dst.next = icmp6_dst_gc_list;
1648 	icmp6_dst_gc_list = &rt->dst;
1649 	spin_unlock_bh(&icmp6_dst_lock);
1650 
1651 	fib6_force_start_gc(net);
1652 
1653 	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1654 
1655 out:
1656 	return dst;
1657 }
1658 
1659 int icmp6_dst_gc(void)
1660 {
1661 	struct dst_entry *dst, **pprev;
1662 	int more = 0;
1663 
1664 	spin_lock_bh(&icmp6_dst_lock);
1665 	pprev = &icmp6_dst_gc_list;
1666 
1667 	while ((dst = *pprev) != NULL) {
1668 		if (!atomic_read(&dst->__refcnt)) {
1669 			*pprev = dst->next;
1670 			dst_free(dst);
1671 		} else {
1672 			pprev = &dst->next;
1673 			++more;
1674 		}
1675 	}
1676 
1677 	spin_unlock_bh(&icmp6_dst_lock);
1678 
1679 	return more;
1680 }
1681 
1682 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1683 			    void *arg)
1684 {
1685 	struct dst_entry *dst, **pprev;
1686 
1687 	spin_lock_bh(&icmp6_dst_lock);
1688 	pprev = &icmp6_dst_gc_list;
1689 	while ((dst = *pprev) != NULL) {
1690 		struct rt6_info *rt = (struct rt6_info *) dst;
1691 		if (func(rt, arg)) {
1692 			*pprev = dst->next;
1693 			dst_free(dst);
1694 		} else {
1695 			pprev = &dst->next;
1696 		}
1697 	}
1698 	spin_unlock_bh(&icmp6_dst_lock);
1699 }
1700 
1701 static int ip6_dst_gc(struct dst_ops *ops)
1702 {
1703 	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1704 	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1705 	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1706 	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1707 	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1708 	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1709 	int entries;
1710 
1711 	entries = dst_entries_get_fast(ops);
1712 	if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1713 	    entries <= rt_max_size)
1714 		goto out;
1715 
1716 	net->ipv6.ip6_rt_gc_expire++;
1717 	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
1718 	entries = dst_entries_get_slow(ops);
1719 	if (entries < ops->gc_thresh)
1720 		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1721 out:
1722 	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1723 	return entries > rt_max_size;
1724 }
1725 
1726 static int ip6_convert_metrics(struct mx6_config *mxc,
1727 			       const struct fib6_config *cfg)
1728 {
1729 	bool ecn_ca = false;
1730 	struct nlattr *nla;
1731 	int remaining;
1732 	u32 *mp;
1733 
1734 	if (!cfg->fc_mx)
1735 		return 0;
1736 
1737 	mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1738 	if (unlikely(!mp))
1739 		return -ENOMEM;
1740 
1741 	nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1742 		int type = nla_type(nla);
1743 		u32 val;
1744 
1745 		if (!type)
1746 			continue;
1747 		if (unlikely(type > RTAX_MAX))
1748 			goto err;
1749 
1750 		if (type == RTAX_CC_ALGO) {
1751 			char tmp[TCP_CA_NAME_MAX];
1752 
1753 			nla_strlcpy(tmp, nla, sizeof(tmp));
1754 			val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
1755 			if (val == TCP_CA_UNSPEC)
1756 				goto err;
1757 		} else {
1758 			val = nla_get_u32(nla);
1759 		}
1760 		if (type == RTAX_HOPLIMIT && val > 255)
1761 			val = 255;
1762 		if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
1763 			goto err;
1764 
1765 		mp[type - 1] = val;
1766 		__set_bit(type - 1, mxc->mx_valid);
1767 	}
1768 
1769 	if (ecn_ca) {
1770 		__set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
1771 		mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
1772 	}
1773 
1774 	mxc->mx = mp;
1775 	return 0;
1776  err:
1777 	kfree(mp);
1778 	return -EINVAL;
1779 }
1780 
1781 static struct rt6_info *ip6_nh_lookup_table(struct net *net,
1782 					    struct fib6_config *cfg,
1783 					    const struct in6_addr *gw_addr)
1784 {
1785 	struct flowi6 fl6 = {
1786 		.flowi6_oif = cfg->fc_ifindex,
1787 		.daddr = *gw_addr,
1788 		.saddr = cfg->fc_prefsrc,
1789 	};
1790 	struct fib6_table *table;
1791 	struct rt6_info *rt;
1792 	int flags = RT6_LOOKUP_F_IFACE;
1793 
1794 	table = fib6_get_table(net, cfg->fc_table);
1795 	if (!table)
1796 		return NULL;
1797 
1798 	if (!ipv6_addr_any(&cfg->fc_prefsrc))
1799 		flags |= RT6_LOOKUP_F_HAS_SADDR;
1800 
1801 	rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags);
1802 
1803 	/* if table lookup failed, fall back to full lookup */
1804 	if (rt == net->ipv6.ip6_null_entry) {
1805 		ip6_rt_put(rt);
1806 		rt = NULL;
1807 	}
1808 
1809 	return rt;
1810 }
1811 
1812 static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
1813 {
1814 	struct net *net = cfg->fc_nlinfo.nl_net;
1815 	struct rt6_info *rt = NULL;
1816 	struct net_device *dev = NULL;
1817 	struct inet6_dev *idev = NULL;
1818 	struct fib6_table *table;
1819 	int addr_type;
1820 	int err = -EINVAL;
1821 
1822 	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1823 		goto out;
1824 #ifndef CONFIG_IPV6_SUBTREES
1825 	if (cfg->fc_src_len)
1826 		goto out;
1827 #endif
1828 	if (cfg->fc_ifindex) {
1829 		err = -ENODEV;
1830 		dev = dev_get_by_index(net, cfg->fc_ifindex);
1831 		if (!dev)
1832 			goto out;
1833 		idev = in6_dev_get(dev);
1834 		if (!idev)
1835 			goto out;
1836 	}
1837 
1838 	if (cfg->fc_metric == 0)
1839 		cfg->fc_metric = IP6_RT_PRIO_USER;
1840 
1841 	err = -ENOBUFS;
1842 	if (cfg->fc_nlinfo.nlh &&
1843 	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1844 		table = fib6_get_table(net, cfg->fc_table);
1845 		if (!table) {
1846 			pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1847 			table = fib6_new_table(net, cfg->fc_table);
1848 		}
1849 	} else {
1850 		table = fib6_new_table(net, cfg->fc_table);
1851 	}
1852 
1853 	if (!table)
1854 		goto out;
1855 
1856 	rt = ip6_dst_alloc(net, NULL,
1857 			   (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1858 
1859 	if (!rt) {
1860 		err = -ENOMEM;
1861 		goto out;
1862 	}
1863 
1864 	if (cfg->fc_flags & RTF_EXPIRES)
1865 		rt6_set_expires(rt, jiffies +
1866 				clock_t_to_jiffies(cfg->fc_expires));
1867 	else
1868 		rt6_clean_expires(rt);
1869 
1870 	if (cfg->fc_protocol == RTPROT_UNSPEC)
1871 		cfg->fc_protocol = RTPROT_BOOT;
1872 	rt->rt6i_protocol = cfg->fc_protocol;
1873 
1874 	addr_type = ipv6_addr_type(&cfg->fc_dst);
1875 
1876 	if (addr_type & IPV6_ADDR_MULTICAST)
1877 		rt->dst.input = ip6_mc_input;
1878 	else if (cfg->fc_flags & RTF_LOCAL)
1879 		rt->dst.input = ip6_input;
1880 	else
1881 		rt->dst.input = ip6_forward;
1882 
1883 	rt->dst.output = ip6_output;
1884 
1885 	if (cfg->fc_encap) {
1886 		struct lwtunnel_state *lwtstate;
1887 
1888 		err = lwtunnel_build_state(dev, cfg->fc_encap_type,
1889 					   cfg->fc_encap, AF_INET6, cfg,
1890 					   &lwtstate);
1891 		if (err)
1892 			goto out;
1893 		rt->dst.lwtstate = lwtstate_get(lwtstate);
1894 		if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
1895 			rt->dst.lwtstate->orig_output = rt->dst.output;
1896 			rt->dst.output = lwtunnel_output;
1897 		}
1898 		if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
1899 			rt->dst.lwtstate->orig_input = rt->dst.input;
1900 			rt->dst.input = lwtunnel_input;
1901 		}
1902 	}
1903 
1904 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1905 	rt->rt6i_dst.plen = cfg->fc_dst_len;
1906 	if (rt->rt6i_dst.plen == 128)
1907 		rt->dst.flags |= DST_HOST;
1908 
1909 #ifdef CONFIG_IPV6_SUBTREES
1910 	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1911 	rt->rt6i_src.plen = cfg->fc_src_len;
1912 #endif
1913 
1914 	rt->rt6i_metric = cfg->fc_metric;
1915 
1916 	/* We cannot add true routes via loopback here,
1917 	   they would result in kernel looping; promote them to reject routes
1918 	 */
1919 	if ((cfg->fc_flags & RTF_REJECT) ||
1920 	    (dev && (dev->flags & IFF_LOOPBACK) &&
1921 	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
1922 	     !(cfg->fc_flags & RTF_LOCAL))) {
1923 		/* hold loopback dev/idev if we haven't done so. */
1924 		if (dev != net->loopback_dev) {
1925 			if (dev) {
1926 				dev_put(dev);
1927 				in6_dev_put(idev);
1928 			}
1929 			dev = net->loopback_dev;
1930 			dev_hold(dev);
1931 			idev = in6_dev_get(dev);
1932 			if (!idev) {
1933 				err = -ENODEV;
1934 				goto out;
1935 			}
1936 		}
1937 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1938 		switch (cfg->fc_type) {
1939 		case RTN_BLACKHOLE:
1940 			rt->dst.error = -EINVAL;
1941 			rt->dst.output = dst_discard_out;
1942 			rt->dst.input = dst_discard;
1943 			break;
1944 		case RTN_PROHIBIT:
1945 			rt->dst.error = -EACCES;
1946 			rt->dst.output = ip6_pkt_prohibit_out;
1947 			rt->dst.input = ip6_pkt_prohibit;
1948 			break;
1949 		case RTN_THROW:
1950 		case RTN_UNREACHABLE:
1951 		default:
1952 			rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1953 					: (cfg->fc_type == RTN_UNREACHABLE)
1954 					? -EHOSTUNREACH : -ENETUNREACH;
1955 			rt->dst.output = ip6_pkt_discard_out;
1956 			rt->dst.input = ip6_pkt_discard;
1957 			break;
1958 		}
1959 		goto install_route;
1960 	}
1961 
1962 	if (cfg->fc_flags & RTF_GATEWAY) {
1963 		const struct in6_addr *gw_addr;
1964 		int gwa_type;
1965 
1966 		gw_addr = &cfg->fc_gateway;
1967 		gwa_type = ipv6_addr_type(gw_addr);
1968 
1969 		/* if gw_addr is local we will fail to detect this in case
1970 		 * address is still TENTATIVE (DAD in progress). rt6_lookup()
1971 		 * will return already-added prefix route via interface that
1972 		 * prefix route was assigned to, which might be non-loopback.
1973 		 */
1974 		err = -EINVAL;
1975 		if (ipv6_chk_addr_and_flags(net, gw_addr,
1976 					    gwa_type & IPV6_ADDR_LINKLOCAL ?
1977 					    dev : NULL, 0, 0))
1978 			goto out;
1979 
1980 		rt->rt6i_gateway = *gw_addr;
1981 
1982 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1983 			struct rt6_info *grt = NULL;
1984 
1985 			/* IPv6 strictly inhibits using not link-local
1986 			   addresses as nexthop address.
1987 			   Otherwise, router will not able to send redirects.
1988 			   It is very good, but in some (rare!) circumstances
1989 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
1990 			   some exceptions. --ANK
1991 			 */
1992 			if (!(gwa_type & IPV6_ADDR_UNICAST))
1993 				goto out;
1994 
1995 			if (cfg->fc_table) {
1996 				grt = ip6_nh_lookup_table(net, cfg, gw_addr);
1997 
1998 				if (grt) {
1999 					if (grt->rt6i_flags & RTF_GATEWAY ||
2000 					    (dev && dev != grt->dst.dev)) {
2001 						ip6_rt_put(grt);
2002 						grt = NULL;
2003 					}
2004 				}
2005 			}
2006 
2007 			if (!grt)
2008 				grt = rt6_lookup(net, gw_addr, NULL,
2009 						 cfg->fc_ifindex, 1);
2010 
2011 			err = -EHOSTUNREACH;
2012 			if (!grt)
2013 				goto out;
2014 			if (dev) {
2015 				if (dev != grt->dst.dev) {
2016 					ip6_rt_put(grt);
2017 					goto out;
2018 				}
2019 			} else {
2020 				dev = grt->dst.dev;
2021 				idev = grt->rt6i_idev;
2022 				dev_hold(dev);
2023 				in6_dev_hold(grt->rt6i_idev);
2024 			}
2025 			if (!(grt->rt6i_flags & RTF_GATEWAY))
2026 				err = 0;
2027 			ip6_rt_put(grt);
2028 
2029 			if (err)
2030 				goto out;
2031 		}
2032 		err = -EINVAL;
2033 		if (!dev || (dev->flags & IFF_LOOPBACK))
2034 			goto out;
2035 	}
2036 
2037 	err = -ENODEV;
2038 	if (!dev)
2039 		goto out;
2040 
2041 	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
2042 		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
2043 			err = -EINVAL;
2044 			goto out;
2045 		}
2046 		rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
2047 		rt->rt6i_prefsrc.plen = 128;
2048 	} else
2049 		rt->rt6i_prefsrc.plen = 0;
2050 
2051 	rt->rt6i_flags = cfg->fc_flags;
2052 
2053 install_route:
2054 	rt->dst.dev = dev;
2055 	rt->rt6i_idev = idev;
2056 	rt->rt6i_table = table;
2057 
2058 	cfg->fc_nlinfo.nl_net = dev_net(dev);
2059 
2060 	return rt;
2061 out:
2062 	if (dev)
2063 		dev_put(dev);
2064 	if (idev)
2065 		in6_dev_put(idev);
2066 	if (rt)
2067 		dst_free(&rt->dst);
2068 
2069 	return ERR_PTR(err);
2070 }
2071 
2072 int ip6_route_add(struct fib6_config *cfg)
2073 {
2074 	struct mx6_config mxc = { .mx = NULL, };
2075 	struct rt6_info *rt;
2076 	int err;
2077 
2078 	rt = ip6_route_info_create(cfg);
2079 	if (IS_ERR(rt)) {
2080 		err = PTR_ERR(rt);
2081 		rt = NULL;
2082 		goto out;
2083 	}
2084 
2085 	err = ip6_convert_metrics(&mxc, cfg);
2086 	if (err)
2087 		goto out;
2088 
2089 	err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
2090 
2091 	kfree(mxc.mx);
2092 
2093 	return err;
2094 out:
2095 	if (rt)
2096 		dst_free(&rt->dst);
2097 
2098 	return err;
2099 }
2100 
2101 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
2102 {
2103 	int err;
2104 	struct fib6_table *table;
2105 	struct net *net = dev_net(rt->dst.dev);
2106 
2107 	if (rt == net->ipv6.ip6_null_entry ||
2108 	    rt->dst.flags & DST_NOCACHE) {
2109 		err = -ENOENT;
2110 		goto out;
2111 	}
2112 
2113 	table = rt->rt6i_table;
2114 	write_lock_bh(&table->tb6_lock);
2115 	err = fib6_del(rt, info);
2116 	write_unlock_bh(&table->tb6_lock);
2117 
2118 out:
2119 	ip6_rt_put(rt);
2120 	return err;
2121 }
2122 
2123 int ip6_del_rt(struct rt6_info *rt)
2124 {
2125 	struct nl_info info = {
2126 		.nl_net = dev_net(rt->dst.dev),
2127 	};
2128 	return __ip6_del_rt(rt, &info);
2129 }
2130 
2131 static int ip6_route_del(struct fib6_config *cfg)
2132 {
2133 	struct fib6_table *table;
2134 	struct fib6_node *fn;
2135 	struct rt6_info *rt;
2136 	int err = -ESRCH;
2137 
2138 	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
2139 	if (!table)
2140 		return err;
2141 
2142 	read_lock_bh(&table->tb6_lock);
2143 
2144 	fn = fib6_locate(&table->tb6_root,
2145 			 &cfg->fc_dst, cfg->fc_dst_len,
2146 			 &cfg->fc_src, cfg->fc_src_len);
2147 
2148 	if (fn) {
2149 		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2150 			if ((rt->rt6i_flags & RTF_CACHE) &&
2151 			    !(cfg->fc_flags & RTF_CACHE))
2152 				continue;
2153 			if (cfg->fc_ifindex &&
2154 			    (!rt->dst.dev ||
2155 			     rt->dst.dev->ifindex != cfg->fc_ifindex))
2156 				continue;
2157 			if (cfg->fc_flags & RTF_GATEWAY &&
2158 			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
2159 				continue;
2160 			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
2161 				continue;
2162 			dst_hold(&rt->dst);
2163 			read_unlock_bh(&table->tb6_lock);
2164 
2165 			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
2166 		}
2167 	}
2168 	read_unlock_bh(&table->tb6_lock);
2169 
2170 	return err;
2171 }
2172 
2173 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
2174 {
2175 	struct netevent_redirect netevent;
2176 	struct rt6_info *rt, *nrt = NULL;
2177 	struct ndisc_options ndopts;
2178 	struct inet6_dev *in6_dev;
2179 	struct neighbour *neigh;
2180 	struct rd_msg *msg;
2181 	int optlen, on_link;
2182 	u8 *lladdr;
2183 
2184 	optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
2185 	optlen -= sizeof(*msg);
2186 
2187 	if (optlen < 0) {
2188 		net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
2189 		return;
2190 	}
2191 
2192 	msg = (struct rd_msg *)icmp6_hdr(skb);
2193 
2194 	if (ipv6_addr_is_multicast(&msg->dest)) {
2195 		net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
2196 		return;
2197 	}
2198 
2199 	on_link = 0;
2200 	if (ipv6_addr_equal(&msg->dest, &msg->target)) {
2201 		on_link = 1;
2202 	} else if (ipv6_addr_type(&msg->target) !=
2203 		   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
2204 		net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
2205 		return;
2206 	}
2207 
2208 	in6_dev = __in6_dev_get(skb->dev);
2209 	if (!in6_dev)
2210 		return;
2211 	if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2212 		return;
2213 
2214 	/* RFC2461 8.1:
2215 	 *	The IP source address of the Redirect MUST be the same as the current
2216 	 *	first-hop router for the specified ICMP Destination Address.
2217 	 */
2218 
2219 	if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
2220 		net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2221 		return;
2222 	}
2223 
2224 	lladdr = NULL;
2225 	if (ndopts.nd_opts_tgt_lladdr) {
2226 		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2227 					     skb->dev);
2228 		if (!lladdr) {
2229 			net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2230 			return;
2231 		}
2232 	}
2233 
2234 	rt = (struct rt6_info *) dst;
2235 	if (rt->rt6i_flags & RTF_REJECT) {
2236 		net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
2237 		return;
2238 	}
2239 
2240 	/* Redirect received -> path was valid.
2241 	 * Look, redirects are sent only in response to data packets,
2242 	 * so that this nexthop apparently is reachable. --ANK
2243 	 */
2244 	dst_confirm(&rt->dst);
2245 
2246 	neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
2247 	if (!neigh)
2248 		return;
2249 
2250 	/*
2251 	 *	We have finally decided to accept it.
2252 	 */
2253 
2254 	ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
2255 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
2256 		     NEIGH_UPDATE_F_OVERRIDE|
2257 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
2258 				     NEIGH_UPDATE_F_ISROUTER)),
2259 		     NDISC_REDIRECT, &ndopts);
2260 
2261 	nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
2262 	if (!nrt)
2263 		goto out;
2264 
2265 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
2266 	if (on_link)
2267 		nrt->rt6i_flags &= ~RTF_GATEWAY;
2268 
2269 	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
2270 
2271 	if (ip6_ins_rt(nrt))
2272 		goto out;
2273 
2274 	netevent.old = &rt->dst;
2275 	netevent.new = &nrt->dst;
2276 	netevent.daddr = &msg->dest;
2277 	netevent.neigh = neigh;
2278 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
2279 
2280 	if (rt->rt6i_flags & RTF_CACHE) {
2281 		rt = (struct rt6_info *) dst_clone(&rt->dst);
2282 		ip6_del_rt(rt);
2283 	}
2284 
2285 out:
2286 	neigh_release(neigh);
2287 }
2288 
2289 /*
2290  *	Misc support functions
2291  */
2292 
2293 static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
2294 {
2295 	BUG_ON(from->dst.from);
2296 
2297 	rt->rt6i_flags &= ~RTF_EXPIRES;
2298 	dst_hold(&from->dst);
2299 	rt->dst.from = &from->dst;
2300 	dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
2301 }
2302 
2303 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
2304 {
2305 	rt->dst.input = ort->dst.input;
2306 	rt->dst.output = ort->dst.output;
2307 	rt->rt6i_dst = ort->rt6i_dst;
2308 	rt->dst.error = ort->dst.error;
2309 	rt->rt6i_idev = ort->rt6i_idev;
2310 	if (rt->rt6i_idev)
2311 		in6_dev_hold(rt->rt6i_idev);
2312 	rt->dst.lastuse = jiffies;
2313 	rt->rt6i_gateway = ort->rt6i_gateway;
2314 	rt->rt6i_flags = ort->rt6i_flags;
2315 	rt6_set_from(rt, ort);
2316 	rt->rt6i_metric = ort->rt6i_metric;
2317 #ifdef CONFIG_IPV6_SUBTREES
2318 	rt->rt6i_src = ort->rt6i_src;
2319 #endif
2320 	rt->rt6i_prefsrc = ort->rt6i_prefsrc;
2321 	rt->rt6i_table = ort->rt6i_table;
2322 	rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
2323 }
2324 
2325 #ifdef CONFIG_IPV6_ROUTE_INFO
2326 static struct rt6_info *rt6_get_route_info(struct net *net,
2327 					   const struct in6_addr *prefix, int prefixlen,
2328 					   const struct in6_addr *gwaddr, int ifindex)
2329 {
2330 	struct fib6_node *fn;
2331 	struct rt6_info *rt = NULL;
2332 	struct fib6_table *table;
2333 
2334 	table = fib6_get_table(net, RT6_TABLE_INFO);
2335 	if (!table)
2336 		return NULL;
2337 
2338 	read_lock_bh(&table->tb6_lock);
2339 	fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
2340 	if (!fn)
2341 		goto out;
2342 
2343 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2344 		if (rt->dst.dev->ifindex != ifindex)
2345 			continue;
2346 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2347 			continue;
2348 		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2349 			continue;
2350 		dst_hold(&rt->dst);
2351 		break;
2352 	}
2353 out:
2354 	read_unlock_bh(&table->tb6_lock);
2355 	return rt;
2356 }
2357 
2358 static struct rt6_info *rt6_add_route_info(struct net *net,
2359 					   const struct in6_addr *prefix, int prefixlen,
2360 					   const struct in6_addr *gwaddr, int ifindex,
2361 					   unsigned int pref)
2362 {
2363 	struct fib6_config cfg = {
2364 		.fc_metric	= IP6_RT_PRIO_USER,
2365 		.fc_ifindex	= ifindex,
2366 		.fc_dst_len	= prefixlen,
2367 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2368 				  RTF_UP | RTF_PREF(pref),
2369 		.fc_nlinfo.portid = 0,
2370 		.fc_nlinfo.nlh = NULL,
2371 		.fc_nlinfo.nl_net = net,
2372 	};
2373 
2374 	cfg.fc_table = l3mdev_fib_table_by_index(net, ifindex) ? : RT6_TABLE_INFO;
2375 	cfg.fc_dst = *prefix;
2376 	cfg.fc_gateway = *gwaddr;
2377 
2378 	/* We should treat it as a default route if prefix length is 0. */
2379 	if (!prefixlen)
2380 		cfg.fc_flags |= RTF_DEFAULT;
2381 
2382 	ip6_route_add(&cfg);
2383 
2384 	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
2385 }
2386 #endif
2387 
2388 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
2389 {
2390 	struct rt6_info *rt;
2391 	struct fib6_table *table;
2392 
2393 	table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
2394 	if (!table)
2395 		return NULL;
2396 
2397 	read_lock_bh(&table->tb6_lock);
2398 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2399 		if (dev == rt->dst.dev &&
2400 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
2401 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
2402 			break;
2403 	}
2404 	if (rt)
2405 		dst_hold(&rt->dst);
2406 	read_unlock_bh(&table->tb6_lock);
2407 	return rt;
2408 }
2409 
2410 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
2411 				     struct net_device *dev,
2412 				     unsigned int pref)
2413 {
2414 	struct fib6_config cfg = {
2415 		.fc_table	= l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
2416 		.fc_metric	= IP6_RT_PRIO_USER,
2417 		.fc_ifindex	= dev->ifindex,
2418 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2419 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
2420 		.fc_nlinfo.portid = 0,
2421 		.fc_nlinfo.nlh = NULL,
2422 		.fc_nlinfo.nl_net = dev_net(dev),
2423 	};
2424 
2425 	cfg.fc_gateway = *gwaddr;
2426 
2427 	ip6_route_add(&cfg);
2428 
2429 	return rt6_get_dflt_router(gwaddr, dev);
2430 }
2431 
2432 void rt6_purge_dflt_routers(struct net *net)
2433 {
2434 	struct rt6_info *rt;
2435 	struct fib6_table *table;
2436 
2437 	/* NOTE: Keep consistent with rt6_get_dflt_router */
2438 	table = fib6_get_table(net, RT6_TABLE_DFLT);
2439 	if (!table)
2440 		return;
2441 
2442 restart:
2443 	read_lock_bh(&table->tb6_lock);
2444 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2445 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2446 		    (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
2447 			dst_hold(&rt->dst);
2448 			read_unlock_bh(&table->tb6_lock);
2449 			ip6_del_rt(rt);
2450 			goto restart;
2451 		}
2452 	}
2453 	read_unlock_bh(&table->tb6_lock);
2454 }
2455 
2456 static void rtmsg_to_fib6_config(struct net *net,
2457 				 struct in6_rtmsg *rtmsg,
2458 				 struct fib6_config *cfg)
2459 {
2460 	memset(cfg, 0, sizeof(*cfg));
2461 
2462 	cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
2463 			 : RT6_TABLE_MAIN;
2464 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2465 	cfg->fc_metric = rtmsg->rtmsg_metric;
2466 	cfg->fc_expires = rtmsg->rtmsg_info;
2467 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2468 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
2469 	cfg->fc_flags = rtmsg->rtmsg_flags;
2470 
2471 	cfg->fc_nlinfo.nl_net = net;
2472 
2473 	cfg->fc_dst = rtmsg->rtmsg_dst;
2474 	cfg->fc_src = rtmsg->rtmsg_src;
2475 	cfg->fc_gateway = rtmsg->rtmsg_gateway;
2476 }
2477 
2478 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2479 {
2480 	struct fib6_config cfg;
2481 	struct in6_rtmsg rtmsg;
2482 	int err;
2483 
2484 	switch (cmd) {
2485 	case SIOCADDRT:		/* Add a route */
2486 	case SIOCDELRT:		/* Delete a route */
2487 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2488 			return -EPERM;
2489 		err = copy_from_user(&rtmsg, arg,
2490 				     sizeof(struct in6_rtmsg));
2491 		if (err)
2492 			return -EFAULT;
2493 
2494 		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2495 
2496 		rtnl_lock();
2497 		switch (cmd) {
2498 		case SIOCADDRT:
2499 			err = ip6_route_add(&cfg);
2500 			break;
2501 		case SIOCDELRT:
2502 			err = ip6_route_del(&cfg);
2503 			break;
2504 		default:
2505 			err = -EINVAL;
2506 		}
2507 		rtnl_unlock();
2508 
2509 		return err;
2510 	}
2511 
2512 	return -EINVAL;
2513 }
2514 
2515 /*
2516  *	Drop the packet on the floor
2517  */
2518 
2519 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2520 {
2521 	int type;
2522 	struct dst_entry *dst = skb_dst(skb);
2523 	switch (ipstats_mib_noroutes) {
2524 	case IPSTATS_MIB_INNOROUTES:
2525 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2526 		if (type == IPV6_ADDR_ANY) {
2527 			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2528 				      IPSTATS_MIB_INADDRERRORS);
2529 			break;
2530 		}
2531 		/* FALLTHROUGH */
2532 	case IPSTATS_MIB_OUTNOROUTES:
2533 		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2534 			      ipstats_mib_noroutes);
2535 		break;
2536 	}
2537 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2538 	kfree_skb(skb);
2539 	return 0;
2540 }
2541 
2542 static int ip6_pkt_discard(struct sk_buff *skb)
2543 {
2544 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2545 }
2546 
2547 static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2548 {
2549 	skb->dev = skb_dst(skb)->dev;
2550 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2551 }
2552 
2553 static int ip6_pkt_prohibit(struct sk_buff *skb)
2554 {
2555 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2556 }
2557 
2558 static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2559 {
2560 	skb->dev = skb_dst(skb)->dev;
2561 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2562 }
2563 
2564 /*
2565  *	Allocate a dst for local (unicast / anycast) address.
2566  */
2567 
2568 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2569 				    const struct in6_addr *addr,
2570 				    bool anycast)
2571 {
2572 	u32 tb_id;
2573 	struct net *net = dev_net(idev->dev);
2574 	struct net_device *dev = net->loopback_dev;
2575 	struct rt6_info *rt;
2576 
2577 	/* use L3 Master device as loopback for host routes if device
2578 	 * is enslaved and address is not link local or multicast
2579 	 */
2580 	if (!rt6_need_strict(addr))
2581 		dev = l3mdev_master_dev_rcu(idev->dev) ? : dev;
2582 
2583 	rt = ip6_dst_alloc(net, dev, DST_NOCOUNT);
2584 	if (!rt)
2585 		return ERR_PTR(-ENOMEM);
2586 
2587 	in6_dev_hold(idev);
2588 
2589 	rt->dst.flags |= DST_HOST;
2590 	rt->dst.input = ip6_input;
2591 	rt->dst.output = ip6_output;
2592 	rt->rt6i_idev = idev;
2593 
2594 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2595 	if (anycast)
2596 		rt->rt6i_flags |= RTF_ANYCAST;
2597 	else
2598 		rt->rt6i_flags |= RTF_LOCAL;
2599 
2600 	rt->rt6i_gateway  = *addr;
2601 	rt->rt6i_dst.addr = *addr;
2602 	rt->rt6i_dst.plen = 128;
2603 	tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
2604 	rt->rt6i_table = fib6_get_table(net, tb_id);
2605 	rt->dst.flags |= DST_NOCACHE;
2606 
2607 	atomic_set(&rt->dst.__refcnt, 1);
2608 
2609 	return rt;
2610 }
2611 
2612 /* remove deleted ip from prefsrc entries */
2613 struct arg_dev_net_ip {
2614 	struct net_device *dev;
2615 	struct net *net;
2616 	struct in6_addr *addr;
2617 };
2618 
2619 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2620 {
2621 	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2622 	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2623 	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2624 
2625 	if (((void *)rt->dst.dev == dev || !dev) &&
2626 	    rt != net->ipv6.ip6_null_entry &&
2627 	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2628 		/* remove prefsrc entry */
2629 		rt->rt6i_prefsrc.plen = 0;
2630 	}
2631 	return 0;
2632 }
2633 
2634 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2635 {
2636 	struct net *net = dev_net(ifp->idev->dev);
2637 	struct arg_dev_net_ip adni = {
2638 		.dev = ifp->idev->dev,
2639 		.net = net,
2640 		.addr = &ifp->addr,
2641 	};
2642 	fib6_clean_all(net, fib6_remove_prefsrc, &adni);
2643 }
2644 
2645 #define RTF_RA_ROUTER		(RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2646 #define RTF_CACHE_GATEWAY	(RTF_GATEWAY | RTF_CACHE)
2647 
2648 /* Remove routers and update dst entries when gateway turn into host. */
2649 static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2650 {
2651 	struct in6_addr *gateway = (struct in6_addr *)arg;
2652 
2653 	if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2654 	     ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2655 	     ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2656 		return -1;
2657 	}
2658 	return 0;
2659 }
2660 
2661 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2662 {
2663 	fib6_clean_all(net, fib6_clean_tohost, gateway);
2664 }
2665 
2666 struct arg_dev_net {
2667 	struct net_device *dev;
2668 	struct net *net;
2669 };
2670 
2671 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2672 {
2673 	const struct arg_dev_net *adn = arg;
2674 	const struct net_device *dev = adn->dev;
2675 
2676 	if ((rt->dst.dev == dev || !dev) &&
2677 	    rt != adn->net->ipv6.ip6_null_entry)
2678 		return -1;
2679 
2680 	return 0;
2681 }
2682 
2683 void rt6_ifdown(struct net *net, struct net_device *dev)
2684 {
2685 	struct arg_dev_net adn = {
2686 		.dev = dev,
2687 		.net = net,
2688 	};
2689 
2690 	fib6_clean_all(net, fib6_ifdown, &adn);
2691 	icmp6_clean_all(fib6_ifdown, &adn);
2692 	if (dev)
2693 		rt6_uncached_list_flush_dev(net, dev);
2694 }
2695 
2696 struct rt6_mtu_change_arg {
2697 	struct net_device *dev;
2698 	unsigned int mtu;
2699 };
2700 
2701 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2702 {
2703 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2704 	struct inet6_dev *idev;
2705 
2706 	/* In IPv6 pmtu discovery is not optional,
2707 	   so that RTAX_MTU lock cannot disable it.
2708 	   We still use this lock to block changes
2709 	   caused by addrconf/ndisc.
2710 	*/
2711 
2712 	idev = __in6_dev_get(arg->dev);
2713 	if (!idev)
2714 		return 0;
2715 
2716 	/* For administrative MTU increase, there is no way to discover
2717 	   IPv6 PMTU increase, so PMTU increase should be updated here.
2718 	   Since RFC 1981 doesn't include administrative MTU increase
2719 	   update PMTU increase is a MUST. (i.e. jumbo frame)
2720 	 */
2721 	/*
2722 	   If new MTU is less than route PMTU, this new MTU will be the
2723 	   lowest MTU in the path, update the route PMTU to reflect PMTU
2724 	   decreases; if new MTU is greater than route PMTU, and the
2725 	   old MTU is the lowest MTU in the path, update the route PMTU
2726 	   to reflect the increase. In this case if the other nodes' MTU
2727 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
2728 	   PMTU discouvery.
2729 	 */
2730 	if (rt->dst.dev == arg->dev &&
2731 	    !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2732 		if (rt->rt6i_flags & RTF_CACHE) {
2733 			/* For RTF_CACHE with rt6i_pmtu == 0
2734 			 * (i.e. a redirected route),
2735 			 * the metrics of its rt->dst.from has already
2736 			 * been updated.
2737 			 */
2738 			if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2739 				rt->rt6i_pmtu = arg->mtu;
2740 		} else if (dst_mtu(&rt->dst) >= arg->mtu ||
2741 			   (dst_mtu(&rt->dst) < arg->mtu &&
2742 			    dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2743 			dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2744 		}
2745 	}
2746 	return 0;
2747 }
2748 
2749 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2750 {
2751 	struct rt6_mtu_change_arg arg = {
2752 		.dev = dev,
2753 		.mtu = mtu,
2754 	};
2755 
2756 	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
2757 }
2758 
2759 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2760 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2761 	[RTA_OIF]               = { .type = NLA_U32 },
2762 	[RTA_IIF]		= { .type = NLA_U32 },
2763 	[RTA_PRIORITY]          = { .type = NLA_U32 },
2764 	[RTA_METRICS]           = { .type = NLA_NESTED },
2765 	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
2766 	[RTA_PREF]              = { .type = NLA_U8 },
2767 	[RTA_ENCAP_TYPE]	= { .type = NLA_U16 },
2768 	[RTA_ENCAP]		= { .type = NLA_NESTED },
2769 	[RTA_EXPIRES]		= { .type = NLA_U32 },
2770 };
2771 
2772 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2773 			      struct fib6_config *cfg)
2774 {
2775 	struct rtmsg *rtm;
2776 	struct nlattr *tb[RTA_MAX+1];
2777 	unsigned int pref;
2778 	int err;
2779 
2780 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2781 	if (err < 0)
2782 		goto errout;
2783 
2784 	err = -EINVAL;
2785 	rtm = nlmsg_data(nlh);
2786 	memset(cfg, 0, sizeof(*cfg));
2787 
2788 	cfg->fc_table = rtm->rtm_table;
2789 	cfg->fc_dst_len = rtm->rtm_dst_len;
2790 	cfg->fc_src_len = rtm->rtm_src_len;
2791 	cfg->fc_flags = RTF_UP;
2792 	cfg->fc_protocol = rtm->rtm_protocol;
2793 	cfg->fc_type = rtm->rtm_type;
2794 
2795 	if (rtm->rtm_type == RTN_UNREACHABLE ||
2796 	    rtm->rtm_type == RTN_BLACKHOLE ||
2797 	    rtm->rtm_type == RTN_PROHIBIT ||
2798 	    rtm->rtm_type == RTN_THROW)
2799 		cfg->fc_flags |= RTF_REJECT;
2800 
2801 	if (rtm->rtm_type == RTN_LOCAL)
2802 		cfg->fc_flags |= RTF_LOCAL;
2803 
2804 	if (rtm->rtm_flags & RTM_F_CLONED)
2805 		cfg->fc_flags |= RTF_CACHE;
2806 
2807 	cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2808 	cfg->fc_nlinfo.nlh = nlh;
2809 	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2810 
2811 	if (tb[RTA_GATEWAY]) {
2812 		cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
2813 		cfg->fc_flags |= RTF_GATEWAY;
2814 	}
2815 
2816 	if (tb[RTA_DST]) {
2817 		int plen = (rtm->rtm_dst_len + 7) >> 3;
2818 
2819 		if (nla_len(tb[RTA_DST]) < plen)
2820 			goto errout;
2821 
2822 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2823 	}
2824 
2825 	if (tb[RTA_SRC]) {
2826 		int plen = (rtm->rtm_src_len + 7) >> 3;
2827 
2828 		if (nla_len(tb[RTA_SRC]) < plen)
2829 			goto errout;
2830 
2831 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2832 	}
2833 
2834 	if (tb[RTA_PREFSRC])
2835 		cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
2836 
2837 	if (tb[RTA_OIF])
2838 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2839 
2840 	if (tb[RTA_PRIORITY])
2841 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2842 
2843 	if (tb[RTA_METRICS]) {
2844 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2845 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2846 	}
2847 
2848 	if (tb[RTA_TABLE])
2849 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2850 
2851 	if (tb[RTA_MULTIPATH]) {
2852 		cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2853 		cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2854 	}
2855 
2856 	if (tb[RTA_PREF]) {
2857 		pref = nla_get_u8(tb[RTA_PREF]);
2858 		if (pref != ICMPV6_ROUTER_PREF_LOW &&
2859 		    pref != ICMPV6_ROUTER_PREF_HIGH)
2860 			pref = ICMPV6_ROUTER_PREF_MEDIUM;
2861 		cfg->fc_flags |= RTF_PREF(pref);
2862 	}
2863 
2864 	if (tb[RTA_ENCAP])
2865 		cfg->fc_encap = tb[RTA_ENCAP];
2866 
2867 	if (tb[RTA_ENCAP_TYPE])
2868 		cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
2869 
2870 	if (tb[RTA_EXPIRES]) {
2871 		unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
2872 
2873 		if (addrconf_finite_timeout(timeout)) {
2874 			cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
2875 			cfg->fc_flags |= RTF_EXPIRES;
2876 		}
2877 	}
2878 
2879 	err = 0;
2880 errout:
2881 	return err;
2882 }
2883 
2884 struct rt6_nh {
2885 	struct rt6_info *rt6_info;
2886 	struct fib6_config r_cfg;
2887 	struct mx6_config mxc;
2888 	struct list_head next;
2889 };
2890 
2891 static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
2892 {
2893 	struct rt6_nh *nh;
2894 
2895 	list_for_each_entry(nh, rt6_nh_list, next) {
2896 		pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6 nexthop %pI6 ifi %d\n",
2897 		        &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
2898 		        nh->r_cfg.fc_ifindex);
2899 	}
2900 }
2901 
2902 static int ip6_route_info_append(struct list_head *rt6_nh_list,
2903 				 struct rt6_info *rt, struct fib6_config *r_cfg)
2904 {
2905 	struct rt6_nh *nh;
2906 	struct rt6_info *rtnh;
2907 	int err = -EEXIST;
2908 
2909 	list_for_each_entry(nh, rt6_nh_list, next) {
2910 		/* check if rt6_info already exists */
2911 		rtnh = nh->rt6_info;
2912 
2913 		if (rtnh->dst.dev == rt->dst.dev &&
2914 		    rtnh->rt6i_idev == rt->rt6i_idev &&
2915 		    ipv6_addr_equal(&rtnh->rt6i_gateway,
2916 				    &rt->rt6i_gateway))
2917 			return err;
2918 	}
2919 
2920 	nh = kzalloc(sizeof(*nh), GFP_KERNEL);
2921 	if (!nh)
2922 		return -ENOMEM;
2923 	nh->rt6_info = rt;
2924 	err = ip6_convert_metrics(&nh->mxc, r_cfg);
2925 	if (err) {
2926 		kfree(nh);
2927 		return err;
2928 	}
2929 	memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
2930 	list_add_tail(&nh->next, rt6_nh_list);
2931 
2932 	return 0;
2933 }
2934 
2935 static int ip6_route_multipath_add(struct fib6_config *cfg)
2936 {
2937 	struct fib6_config r_cfg;
2938 	struct rtnexthop *rtnh;
2939 	struct rt6_info *rt;
2940 	struct rt6_nh *err_nh;
2941 	struct rt6_nh *nh, *nh_safe;
2942 	int remaining;
2943 	int attrlen;
2944 	int err = 1;
2945 	int nhn = 0;
2946 	int replace = (cfg->fc_nlinfo.nlh &&
2947 		       (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
2948 	LIST_HEAD(rt6_nh_list);
2949 
2950 	remaining = cfg->fc_mp_len;
2951 	rtnh = (struct rtnexthop *)cfg->fc_mp;
2952 
2953 	/* Parse a Multipath Entry and build a list (rt6_nh_list) of
2954 	 * rt6_info structs per nexthop
2955 	 */
2956 	while (rtnh_ok(rtnh, remaining)) {
2957 		memcpy(&r_cfg, cfg, sizeof(*cfg));
2958 		if (rtnh->rtnh_ifindex)
2959 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2960 
2961 		attrlen = rtnh_attrlen(rtnh);
2962 		if (attrlen > 0) {
2963 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2964 
2965 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2966 			if (nla) {
2967 				r_cfg.fc_gateway = nla_get_in6_addr(nla);
2968 				r_cfg.fc_flags |= RTF_GATEWAY;
2969 			}
2970 			r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
2971 			nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
2972 			if (nla)
2973 				r_cfg.fc_encap_type = nla_get_u16(nla);
2974 		}
2975 
2976 		rt = ip6_route_info_create(&r_cfg);
2977 		if (IS_ERR(rt)) {
2978 			err = PTR_ERR(rt);
2979 			rt = NULL;
2980 			goto cleanup;
2981 		}
2982 
2983 		err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
2984 		if (err) {
2985 			dst_free(&rt->dst);
2986 			goto cleanup;
2987 		}
2988 
2989 		rtnh = rtnh_next(rtnh, &remaining);
2990 	}
2991 
2992 	err_nh = NULL;
2993 	list_for_each_entry(nh, &rt6_nh_list, next) {
2994 		err = __ip6_ins_rt(nh->rt6_info, &cfg->fc_nlinfo, &nh->mxc);
2995 		/* nh->rt6_info is used or freed at this point, reset to NULL*/
2996 		nh->rt6_info = NULL;
2997 		if (err) {
2998 			if (replace && nhn)
2999 				ip6_print_replace_route_err(&rt6_nh_list);
3000 			err_nh = nh;
3001 			goto add_errout;
3002 		}
3003 
3004 		/* Because each route is added like a single route we remove
3005 		 * these flags after the first nexthop: if there is a collision,
3006 		 * we have already failed to add the first nexthop:
3007 		 * fib6_add_rt2node() has rejected it; when replacing, old
3008 		 * nexthops have been replaced by first new, the rest should
3009 		 * be added to it.
3010 		 */
3011 		cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
3012 						     NLM_F_REPLACE);
3013 		nhn++;
3014 	}
3015 
3016 	goto cleanup;
3017 
3018 add_errout:
3019 	/* Delete routes that were already added */
3020 	list_for_each_entry(nh, &rt6_nh_list, next) {
3021 		if (err_nh == nh)
3022 			break;
3023 		ip6_route_del(&nh->r_cfg);
3024 	}
3025 
3026 cleanup:
3027 	list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
3028 		if (nh->rt6_info)
3029 			dst_free(&nh->rt6_info->dst);
3030 		kfree(nh->mxc.mx);
3031 		list_del(&nh->next);
3032 		kfree(nh);
3033 	}
3034 
3035 	return err;
3036 }
3037 
3038 static int ip6_route_multipath_del(struct fib6_config *cfg)
3039 {
3040 	struct fib6_config r_cfg;
3041 	struct rtnexthop *rtnh;
3042 	int remaining;
3043 	int attrlen;
3044 	int err = 1, last_err = 0;
3045 
3046 	remaining = cfg->fc_mp_len;
3047 	rtnh = (struct rtnexthop *)cfg->fc_mp;
3048 
3049 	/* Parse a Multipath Entry */
3050 	while (rtnh_ok(rtnh, remaining)) {
3051 		memcpy(&r_cfg, cfg, sizeof(*cfg));
3052 		if (rtnh->rtnh_ifindex)
3053 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
3054 
3055 		attrlen = rtnh_attrlen(rtnh);
3056 		if (attrlen > 0) {
3057 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3058 
3059 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3060 			if (nla) {
3061 				nla_memcpy(&r_cfg.fc_gateway, nla, 16);
3062 				r_cfg.fc_flags |= RTF_GATEWAY;
3063 			}
3064 		}
3065 		err = ip6_route_del(&r_cfg);
3066 		if (err)
3067 			last_err = err;
3068 
3069 		rtnh = rtnh_next(rtnh, &remaining);
3070 	}
3071 
3072 	return last_err;
3073 }
3074 
3075 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
3076 {
3077 	struct fib6_config cfg;
3078 	int err;
3079 
3080 	err = rtm_to_fib6_config(skb, nlh, &cfg);
3081 	if (err < 0)
3082 		return err;
3083 
3084 	if (cfg.fc_mp)
3085 		return ip6_route_multipath_del(&cfg);
3086 	else
3087 		return ip6_route_del(&cfg);
3088 }
3089 
3090 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
3091 {
3092 	struct fib6_config cfg;
3093 	int err;
3094 
3095 	err = rtm_to_fib6_config(skb, nlh, &cfg);
3096 	if (err < 0)
3097 		return err;
3098 
3099 	if (cfg.fc_mp)
3100 		return ip6_route_multipath_add(&cfg);
3101 	else
3102 		return ip6_route_add(&cfg);
3103 }
3104 
3105 static inline size_t rt6_nlmsg_size(struct rt6_info *rt)
3106 {
3107 	return NLMSG_ALIGN(sizeof(struct rtmsg))
3108 	       + nla_total_size(16) /* RTA_SRC */
3109 	       + nla_total_size(16) /* RTA_DST */
3110 	       + nla_total_size(16) /* RTA_GATEWAY */
3111 	       + nla_total_size(16) /* RTA_PREFSRC */
3112 	       + nla_total_size(4) /* RTA_TABLE */
3113 	       + nla_total_size(4) /* RTA_IIF */
3114 	       + nla_total_size(4) /* RTA_OIF */
3115 	       + nla_total_size(4) /* RTA_PRIORITY */
3116 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
3117 	       + nla_total_size(sizeof(struct rta_cacheinfo))
3118 	       + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
3119 	       + nla_total_size(1) /* RTA_PREF */
3120 	       + lwtunnel_get_encap_size(rt->dst.lwtstate);
3121 }
3122 
3123 static int rt6_fill_node(struct net *net,
3124 			 struct sk_buff *skb, struct rt6_info *rt,
3125 			 struct in6_addr *dst, struct in6_addr *src,
3126 			 int iif, int type, u32 portid, u32 seq,
3127 			 int prefix, int nowait, unsigned int flags)
3128 {
3129 	u32 metrics[RTAX_MAX];
3130 	struct rtmsg *rtm;
3131 	struct nlmsghdr *nlh;
3132 	long expires;
3133 	u32 table;
3134 
3135 	if (prefix) {	/* user wants prefix routes only */
3136 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
3137 			/* success since this is not a prefix route */
3138 			return 1;
3139 		}
3140 	}
3141 
3142 	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
3143 	if (!nlh)
3144 		return -EMSGSIZE;
3145 
3146 	rtm = nlmsg_data(nlh);
3147 	rtm->rtm_family = AF_INET6;
3148 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
3149 	rtm->rtm_src_len = rt->rt6i_src.plen;
3150 	rtm->rtm_tos = 0;
3151 	if (rt->rt6i_table)
3152 		table = rt->rt6i_table->tb6_id;
3153 	else
3154 		table = RT6_TABLE_UNSPEC;
3155 	rtm->rtm_table = table;
3156 	if (nla_put_u32(skb, RTA_TABLE, table))
3157 		goto nla_put_failure;
3158 	if (rt->rt6i_flags & RTF_REJECT) {
3159 		switch (rt->dst.error) {
3160 		case -EINVAL:
3161 			rtm->rtm_type = RTN_BLACKHOLE;
3162 			break;
3163 		case -EACCES:
3164 			rtm->rtm_type = RTN_PROHIBIT;
3165 			break;
3166 		case -EAGAIN:
3167 			rtm->rtm_type = RTN_THROW;
3168 			break;
3169 		default:
3170 			rtm->rtm_type = RTN_UNREACHABLE;
3171 			break;
3172 		}
3173 	}
3174 	else if (rt->rt6i_flags & RTF_LOCAL)
3175 		rtm->rtm_type = RTN_LOCAL;
3176 	else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
3177 		rtm->rtm_type = RTN_LOCAL;
3178 	else
3179 		rtm->rtm_type = RTN_UNICAST;
3180 	rtm->rtm_flags = 0;
3181 	if (!netif_carrier_ok(rt->dst.dev)) {
3182 		rtm->rtm_flags |= RTNH_F_LINKDOWN;
3183 		if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
3184 			rtm->rtm_flags |= RTNH_F_DEAD;
3185 	}
3186 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
3187 	rtm->rtm_protocol = rt->rt6i_protocol;
3188 	if (rt->rt6i_flags & RTF_DYNAMIC)
3189 		rtm->rtm_protocol = RTPROT_REDIRECT;
3190 	else if (rt->rt6i_flags & RTF_ADDRCONF) {
3191 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
3192 			rtm->rtm_protocol = RTPROT_RA;
3193 		else
3194 			rtm->rtm_protocol = RTPROT_KERNEL;
3195 	}
3196 
3197 	if (rt->rt6i_flags & RTF_CACHE)
3198 		rtm->rtm_flags |= RTM_F_CLONED;
3199 
3200 	if (dst) {
3201 		if (nla_put_in6_addr(skb, RTA_DST, dst))
3202 			goto nla_put_failure;
3203 		rtm->rtm_dst_len = 128;
3204 	} else if (rtm->rtm_dst_len)
3205 		if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
3206 			goto nla_put_failure;
3207 #ifdef CONFIG_IPV6_SUBTREES
3208 	if (src) {
3209 		if (nla_put_in6_addr(skb, RTA_SRC, src))
3210 			goto nla_put_failure;
3211 		rtm->rtm_src_len = 128;
3212 	} else if (rtm->rtm_src_len &&
3213 		   nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
3214 		goto nla_put_failure;
3215 #endif
3216 	if (iif) {
3217 #ifdef CONFIG_IPV6_MROUTE
3218 		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
3219 			int err = ip6mr_get_route(net, skb, rtm, nowait,
3220 						  portid);
3221 
3222 			if (err <= 0) {
3223 				if (!nowait) {
3224 					if (err == 0)
3225 						return 0;
3226 					goto nla_put_failure;
3227 				} else {
3228 					if (err == -EMSGSIZE)
3229 						goto nla_put_failure;
3230 				}
3231 			}
3232 		} else
3233 #endif
3234 			if (nla_put_u32(skb, RTA_IIF, iif))
3235 				goto nla_put_failure;
3236 	} else if (dst) {
3237 		struct in6_addr saddr_buf;
3238 		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
3239 		    nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3240 			goto nla_put_failure;
3241 	}
3242 
3243 	if (rt->rt6i_prefsrc.plen) {
3244 		struct in6_addr saddr_buf;
3245 		saddr_buf = rt->rt6i_prefsrc.addr;
3246 		if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3247 			goto nla_put_failure;
3248 	}
3249 
3250 	memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
3251 	if (rt->rt6i_pmtu)
3252 		metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
3253 	if (rtnetlink_put_metrics(skb, metrics) < 0)
3254 		goto nla_put_failure;
3255 
3256 	if (rt->rt6i_flags & RTF_GATEWAY) {
3257 		if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
3258 			goto nla_put_failure;
3259 	}
3260 
3261 	if (rt->dst.dev &&
3262 	    nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
3263 		goto nla_put_failure;
3264 	if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
3265 		goto nla_put_failure;
3266 
3267 	expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
3268 
3269 	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
3270 		goto nla_put_failure;
3271 
3272 	if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
3273 		goto nla_put_failure;
3274 
3275 	lwtunnel_fill_encap(skb, rt->dst.lwtstate);
3276 
3277 	nlmsg_end(skb, nlh);
3278 	return 0;
3279 
3280 nla_put_failure:
3281 	nlmsg_cancel(skb, nlh);
3282 	return -EMSGSIZE;
3283 }
3284 
3285 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
3286 {
3287 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
3288 	int prefix;
3289 
3290 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
3291 		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
3292 		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
3293 	} else
3294 		prefix = 0;
3295 
3296 	return rt6_fill_node(arg->net,
3297 		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
3298 		     NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
3299 		     prefix, 0, NLM_F_MULTI);
3300 }
3301 
3302 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
3303 {
3304 	struct net *net = sock_net(in_skb->sk);
3305 	struct nlattr *tb[RTA_MAX+1];
3306 	struct rt6_info *rt;
3307 	struct sk_buff *skb;
3308 	struct rtmsg *rtm;
3309 	struct flowi6 fl6;
3310 	int err, iif = 0, oif = 0;
3311 
3312 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
3313 	if (err < 0)
3314 		goto errout;
3315 
3316 	err = -EINVAL;
3317 	memset(&fl6, 0, sizeof(fl6));
3318 	rtm = nlmsg_data(nlh);
3319 	fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
3320 
3321 	if (tb[RTA_SRC]) {
3322 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
3323 			goto errout;
3324 
3325 		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
3326 	}
3327 
3328 	if (tb[RTA_DST]) {
3329 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
3330 			goto errout;
3331 
3332 		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
3333 	}
3334 
3335 	if (tb[RTA_IIF])
3336 		iif = nla_get_u32(tb[RTA_IIF]);
3337 
3338 	if (tb[RTA_OIF])
3339 		oif = nla_get_u32(tb[RTA_OIF]);
3340 
3341 	if (tb[RTA_MARK])
3342 		fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
3343 
3344 	if (iif) {
3345 		struct net_device *dev;
3346 		int flags = 0;
3347 
3348 		dev = __dev_get_by_index(net, iif);
3349 		if (!dev) {
3350 			err = -ENODEV;
3351 			goto errout;
3352 		}
3353 
3354 		fl6.flowi6_iif = iif;
3355 
3356 		if (!ipv6_addr_any(&fl6.saddr))
3357 			flags |= RT6_LOOKUP_F_HAS_SADDR;
3358 
3359 		rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
3360 							       flags);
3361 	} else {
3362 		fl6.flowi6_oif = oif;
3363 
3364 		rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
3365 	}
3366 
3367 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
3368 	if (!skb) {
3369 		ip6_rt_put(rt);
3370 		err = -ENOBUFS;
3371 		goto errout;
3372 	}
3373 
3374 	/* Reserve room for dummy headers, this skb can pass
3375 	   through good chunk of routing engine.
3376 	 */
3377 	skb_reset_mac_header(skb);
3378 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
3379 
3380 	skb_dst_set(skb, &rt->dst);
3381 
3382 	err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
3383 			    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
3384 			    nlh->nlmsg_seq, 0, 0, 0);
3385 	if (err < 0) {
3386 		kfree_skb(skb);
3387 		goto errout;
3388 	}
3389 
3390 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
3391 errout:
3392 	return err;
3393 }
3394 
3395 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
3396 		     unsigned int nlm_flags)
3397 {
3398 	struct sk_buff *skb;
3399 	struct net *net = info->nl_net;
3400 	u32 seq;
3401 	int err;
3402 
3403 	err = -ENOBUFS;
3404 	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3405 
3406 	skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3407 	if (!skb)
3408 		goto errout;
3409 
3410 	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
3411 				event, info->portid, seq, 0, 0, nlm_flags);
3412 	if (err < 0) {
3413 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
3414 		WARN_ON(err == -EMSGSIZE);
3415 		kfree_skb(skb);
3416 		goto errout;
3417 	}
3418 	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
3419 		    info->nlh, gfp_any());
3420 	return;
3421 errout:
3422 	if (err < 0)
3423 		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
3424 }
3425 
3426 static int ip6_route_dev_notify(struct notifier_block *this,
3427 				unsigned long event, void *ptr)
3428 {
3429 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
3430 	struct net *net = dev_net(dev);
3431 
3432 	if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
3433 		net->ipv6.ip6_null_entry->dst.dev = dev;
3434 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
3435 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3436 		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
3437 		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
3438 		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
3439 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
3440 #endif
3441 	}
3442 
3443 	return NOTIFY_OK;
3444 }
3445 
3446 /*
3447  *	/proc
3448  */
3449 
3450 #ifdef CONFIG_PROC_FS
3451 
3452 static const struct file_operations ipv6_route_proc_fops = {
3453 	.owner		= THIS_MODULE,
3454 	.open		= ipv6_route_open,
3455 	.read		= seq_read,
3456 	.llseek		= seq_lseek,
3457 	.release	= seq_release_net,
3458 };
3459 
3460 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
3461 {
3462 	struct net *net = (struct net *)seq->private;
3463 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
3464 		   net->ipv6.rt6_stats->fib_nodes,
3465 		   net->ipv6.rt6_stats->fib_route_nodes,
3466 		   net->ipv6.rt6_stats->fib_rt_alloc,
3467 		   net->ipv6.rt6_stats->fib_rt_entries,
3468 		   net->ipv6.rt6_stats->fib_rt_cache,
3469 		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
3470 		   net->ipv6.rt6_stats->fib_discarded_routes);
3471 
3472 	return 0;
3473 }
3474 
3475 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
3476 {
3477 	return single_open_net(inode, file, rt6_stats_seq_show);
3478 }
3479 
3480 static const struct file_operations rt6_stats_seq_fops = {
3481 	.owner	 = THIS_MODULE,
3482 	.open	 = rt6_stats_seq_open,
3483 	.read	 = seq_read,
3484 	.llseek	 = seq_lseek,
3485 	.release = single_release_net,
3486 };
3487 #endif	/* CONFIG_PROC_FS */
3488 
3489 #ifdef CONFIG_SYSCTL
3490 
3491 static
3492 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
3493 			      void __user *buffer, size_t *lenp, loff_t *ppos)
3494 {
3495 	struct net *net;
3496 	int delay;
3497 	if (!write)
3498 		return -EINVAL;
3499 
3500 	net = (struct net *)ctl->extra1;
3501 	delay = net->ipv6.sysctl.flush_delay;
3502 	proc_dointvec(ctl, write, buffer, lenp, ppos);
3503 	fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
3504 	return 0;
3505 }
3506 
3507 struct ctl_table ipv6_route_table_template[] = {
3508 	{
3509 		.procname	=	"flush",
3510 		.data		=	&init_net.ipv6.sysctl.flush_delay,
3511 		.maxlen		=	sizeof(int),
3512 		.mode		=	0200,
3513 		.proc_handler	=	ipv6_sysctl_rtcache_flush
3514 	},
3515 	{
3516 		.procname	=	"gc_thresh",
3517 		.data		=	&ip6_dst_ops_template.gc_thresh,
3518 		.maxlen		=	sizeof(int),
3519 		.mode		=	0644,
3520 		.proc_handler	=	proc_dointvec,
3521 	},
3522 	{
3523 		.procname	=	"max_size",
3524 		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
3525 		.maxlen		=	sizeof(int),
3526 		.mode		=	0644,
3527 		.proc_handler	=	proc_dointvec,
3528 	},
3529 	{
3530 		.procname	=	"gc_min_interval",
3531 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3532 		.maxlen		=	sizeof(int),
3533 		.mode		=	0644,
3534 		.proc_handler	=	proc_dointvec_jiffies,
3535 	},
3536 	{
3537 		.procname	=	"gc_timeout",
3538 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
3539 		.maxlen		=	sizeof(int),
3540 		.mode		=	0644,
3541 		.proc_handler	=	proc_dointvec_jiffies,
3542 	},
3543 	{
3544 		.procname	=	"gc_interval",
3545 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
3546 		.maxlen		=	sizeof(int),
3547 		.mode		=	0644,
3548 		.proc_handler	=	proc_dointvec_jiffies,
3549 	},
3550 	{
3551 		.procname	=	"gc_elasticity",
3552 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
3553 		.maxlen		=	sizeof(int),
3554 		.mode		=	0644,
3555 		.proc_handler	=	proc_dointvec,
3556 	},
3557 	{
3558 		.procname	=	"mtu_expires",
3559 		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
3560 		.maxlen		=	sizeof(int),
3561 		.mode		=	0644,
3562 		.proc_handler	=	proc_dointvec_jiffies,
3563 	},
3564 	{
3565 		.procname	=	"min_adv_mss",
3566 		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
3567 		.maxlen		=	sizeof(int),
3568 		.mode		=	0644,
3569 		.proc_handler	=	proc_dointvec,
3570 	},
3571 	{
3572 		.procname	=	"gc_min_interval_ms",
3573 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3574 		.maxlen		=	sizeof(int),
3575 		.mode		=	0644,
3576 		.proc_handler	=	proc_dointvec_ms_jiffies,
3577 	},
3578 	{ }
3579 };
3580 
3581 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
3582 {
3583 	struct ctl_table *table;
3584 
3585 	table = kmemdup(ipv6_route_table_template,
3586 			sizeof(ipv6_route_table_template),
3587 			GFP_KERNEL);
3588 
3589 	if (table) {
3590 		table[0].data = &net->ipv6.sysctl.flush_delay;
3591 		table[0].extra1 = net;
3592 		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
3593 		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3594 		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3595 		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3596 		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3597 		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3598 		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3599 		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
3600 		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3601 
3602 		/* Don't export sysctls to unprivileged users */
3603 		if (net->user_ns != &init_user_ns)
3604 			table[0].procname = NULL;
3605 	}
3606 
3607 	return table;
3608 }
3609 #endif
3610 
3611 static int __net_init ip6_route_net_init(struct net *net)
3612 {
3613 	int ret = -ENOMEM;
3614 
3615 	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3616 	       sizeof(net->ipv6.ip6_dst_ops));
3617 
3618 	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3619 		goto out_ip6_dst_ops;
3620 
3621 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3622 					   sizeof(*net->ipv6.ip6_null_entry),
3623 					   GFP_KERNEL);
3624 	if (!net->ipv6.ip6_null_entry)
3625 		goto out_ip6_dst_entries;
3626 	net->ipv6.ip6_null_entry->dst.path =
3627 		(struct dst_entry *)net->ipv6.ip6_null_entry;
3628 	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3629 	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3630 			 ip6_template_metrics, true);
3631 
3632 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3633 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3634 					       sizeof(*net->ipv6.ip6_prohibit_entry),
3635 					       GFP_KERNEL);
3636 	if (!net->ipv6.ip6_prohibit_entry)
3637 		goto out_ip6_null_entry;
3638 	net->ipv6.ip6_prohibit_entry->dst.path =
3639 		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3640 	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3641 	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3642 			 ip6_template_metrics, true);
3643 
3644 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3645 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
3646 					       GFP_KERNEL);
3647 	if (!net->ipv6.ip6_blk_hole_entry)
3648 		goto out_ip6_prohibit_entry;
3649 	net->ipv6.ip6_blk_hole_entry->dst.path =
3650 		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3651 	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3652 	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3653 			 ip6_template_metrics, true);
3654 #endif
3655 
3656 	net->ipv6.sysctl.flush_delay = 0;
3657 	net->ipv6.sysctl.ip6_rt_max_size = 4096;
3658 	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3659 	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3660 	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3661 	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3662 	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3663 	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3664 
3665 	net->ipv6.ip6_rt_gc_expire = 30*HZ;
3666 
3667 	ret = 0;
3668 out:
3669 	return ret;
3670 
3671 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3672 out_ip6_prohibit_entry:
3673 	kfree(net->ipv6.ip6_prohibit_entry);
3674 out_ip6_null_entry:
3675 	kfree(net->ipv6.ip6_null_entry);
3676 #endif
3677 out_ip6_dst_entries:
3678 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3679 out_ip6_dst_ops:
3680 	goto out;
3681 }
3682 
3683 static void __net_exit ip6_route_net_exit(struct net *net)
3684 {
3685 	kfree(net->ipv6.ip6_null_entry);
3686 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3687 	kfree(net->ipv6.ip6_prohibit_entry);
3688 	kfree(net->ipv6.ip6_blk_hole_entry);
3689 #endif
3690 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3691 }
3692 
3693 static int __net_init ip6_route_net_init_late(struct net *net)
3694 {
3695 #ifdef CONFIG_PROC_FS
3696 	proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3697 	proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3698 #endif
3699 	return 0;
3700 }
3701 
3702 static void __net_exit ip6_route_net_exit_late(struct net *net)
3703 {
3704 #ifdef CONFIG_PROC_FS
3705 	remove_proc_entry("ipv6_route", net->proc_net);
3706 	remove_proc_entry("rt6_stats", net->proc_net);
3707 #endif
3708 }
3709 
3710 static struct pernet_operations ip6_route_net_ops = {
3711 	.init = ip6_route_net_init,
3712 	.exit = ip6_route_net_exit,
3713 };
3714 
3715 static int __net_init ipv6_inetpeer_init(struct net *net)
3716 {
3717 	struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3718 
3719 	if (!bp)
3720 		return -ENOMEM;
3721 	inet_peer_base_init(bp);
3722 	net->ipv6.peers = bp;
3723 	return 0;
3724 }
3725 
3726 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3727 {
3728 	struct inet_peer_base *bp = net->ipv6.peers;
3729 
3730 	net->ipv6.peers = NULL;
3731 	inetpeer_invalidate_tree(bp);
3732 	kfree(bp);
3733 }
3734 
3735 static struct pernet_operations ipv6_inetpeer_ops = {
3736 	.init	=	ipv6_inetpeer_init,
3737 	.exit	=	ipv6_inetpeer_exit,
3738 };
3739 
3740 static struct pernet_operations ip6_route_net_late_ops = {
3741 	.init = ip6_route_net_init_late,
3742 	.exit = ip6_route_net_exit_late,
3743 };
3744 
3745 static struct notifier_block ip6_route_dev_notifier = {
3746 	.notifier_call = ip6_route_dev_notify,
3747 	.priority = 0,
3748 };
3749 
3750 int __init ip6_route_init(void)
3751 {
3752 	int ret;
3753 	int cpu;
3754 
3755 	ret = -ENOMEM;
3756 	ip6_dst_ops_template.kmem_cachep =
3757 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3758 				  SLAB_HWCACHE_ALIGN, NULL);
3759 	if (!ip6_dst_ops_template.kmem_cachep)
3760 		goto out;
3761 
3762 	ret = dst_entries_init(&ip6_dst_blackhole_ops);
3763 	if (ret)
3764 		goto out_kmem_cache;
3765 
3766 	ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3767 	if (ret)
3768 		goto out_dst_entries;
3769 
3770 	ret = register_pernet_subsys(&ip6_route_net_ops);
3771 	if (ret)
3772 		goto out_register_inetpeer;
3773 
3774 	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3775 
3776 	/* Registering of the loopback is done before this portion of code,
3777 	 * the loopback reference in rt6_info will not be taken, do it
3778 	 * manually for init_net */
3779 	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3780 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3781   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3782 	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3783 	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3784 	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3785 	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3786   #endif
3787 	ret = fib6_init();
3788 	if (ret)
3789 		goto out_register_subsys;
3790 
3791 	ret = xfrm6_init();
3792 	if (ret)
3793 		goto out_fib6_init;
3794 
3795 	ret = fib6_rules_init();
3796 	if (ret)
3797 		goto xfrm6_init;
3798 
3799 	ret = register_pernet_subsys(&ip6_route_net_late_ops);
3800 	if (ret)
3801 		goto fib6_rules_init;
3802 
3803 	ret = -ENOBUFS;
3804 	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3805 	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3806 	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3807 		goto out_register_late_subsys;
3808 
3809 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3810 	if (ret)
3811 		goto out_register_late_subsys;
3812 
3813 	for_each_possible_cpu(cpu) {
3814 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
3815 
3816 		INIT_LIST_HEAD(&ul->head);
3817 		spin_lock_init(&ul->lock);
3818 	}
3819 
3820 out:
3821 	return ret;
3822 
3823 out_register_late_subsys:
3824 	unregister_pernet_subsys(&ip6_route_net_late_ops);
3825 fib6_rules_init:
3826 	fib6_rules_cleanup();
3827 xfrm6_init:
3828 	xfrm6_fini();
3829 out_fib6_init:
3830 	fib6_gc_cleanup();
3831 out_register_subsys:
3832 	unregister_pernet_subsys(&ip6_route_net_ops);
3833 out_register_inetpeer:
3834 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3835 out_dst_entries:
3836 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3837 out_kmem_cache:
3838 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3839 	goto out;
3840 }
3841 
3842 void ip6_route_cleanup(void)
3843 {
3844 	unregister_netdevice_notifier(&ip6_route_dev_notifier);
3845 	unregister_pernet_subsys(&ip6_route_net_late_ops);
3846 	fib6_rules_cleanup();
3847 	xfrm6_fini();
3848 	fib6_gc_cleanup();
3849 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3850 	unregister_pernet_subsys(&ip6_route_net_ops);
3851 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3852 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3853 }
3854