xref: /openbmc/linux/net/ipv6/route.c (revision efe4a1ac)
1 /*
2  *	Linux INET6 implementation
3  *	FIB front-end.
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13 
14 /*	Changes:
15  *
16  *	YOSHIFUJI Hideaki @USAGI
17  *		reworked default router selection.
18  *		- respect outgoing interface
19  *		- select from (probably) reachable routers (i.e.
20  *		routers in REACHABLE, STALE, DELAY or PROBE states).
21  *		- always select the same router if it is (probably)
22  *		reachable.  otherwise, round-robin the list.
23  *	Ville Nuorvala
24  *		Fixed routing subtrees.
25  */
26 
27 #define pr_fmt(fmt) "IPv6: " fmt
28 
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/dst_metadata.h>
58 #include <net/xfrm.h>
59 #include <net/netevent.h>
60 #include <net/netlink.h>
61 #include <net/nexthop.h>
62 #include <net/lwtunnel.h>
63 #include <net/ip_tunnels.h>
64 #include <net/l3mdev.h>
65 #include <trace/events/fib6.h>
66 
67 #include <linux/uaccess.h>
68 
69 #ifdef CONFIG_SYSCTL
70 #include <linux/sysctl.h>
71 #endif
72 
73 enum rt6_nud_state {
74 	RT6_NUD_FAIL_HARD = -3,
75 	RT6_NUD_FAIL_PROBE = -2,
76 	RT6_NUD_FAIL_DO_RR = -1,
77 	RT6_NUD_SUCCEED = 1
78 };
79 
80 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
81 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
82 static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
83 static unsigned int	 ip6_mtu(const struct dst_entry *dst);
84 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
85 static void		ip6_dst_destroy(struct dst_entry *);
86 static void		ip6_dst_ifdown(struct dst_entry *,
87 				       struct net_device *dev, int how);
88 static int		 ip6_dst_gc(struct dst_ops *ops);
89 
90 static int		ip6_pkt_discard(struct sk_buff *skb);
91 static int		ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
92 static int		ip6_pkt_prohibit(struct sk_buff *skb);
93 static int		ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
94 static void		ip6_link_failure(struct sk_buff *skb);
95 static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
96 					   struct sk_buff *skb, u32 mtu);
97 static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
98 					struct sk_buff *skb);
99 static void		rt6_dst_from_metrics_check(struct rt6_info *rt);
100 static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
101 static size_t rt6_nlmsg_size(struct rt6_info *rt);
102 static int rt6_fill_node(struct net *net,
103 			 struct sk_buff *skb, struct rt6_info *rt,
104 			 struct in6_addr *dst, struct in6_addr *src,
105 			 int iif, int type, u32 portid, u32 seq,
106 			 unsigned int flags);
107 
108 #ifdef CONFIG_IPV6_ROUTE_INFO
109 static struct rt6_info *rt6_add_route_info(struct net *net,
110 					   const struct in6_addr *prefix, int prefixlen,
111 					   const struct in6_addr *gwaddr,
112 					   struct net_device *dev,
113 					   unsigned int pref);
114 static struct rt6_info *rt6_get_route_info(struct net *net,
115 					   const struct in6_addr *prefix, int prefixlen,
116 					   const struct in6_addr *gwaddr,
117 					   struct net_device *dev);
118 #endif
119 
120 struct uncached_list {
121 	spinlock_t		lock;
122 	struct list_head	head;
123 };
124 
125 static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
126 
127 static void rt6_uncached_list_add(struct rt6_info *rt)
128 {
129 	struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
130 
131 	rt->dst.flags |= DST_NOCACHE;
132 	rt->rt6i_uncached_list = ul;
133 
134 	spin_lock_bh(&ul->lock);
135 	list_add_tail(&rt->rt6i_uncached, &ul->head);
136 	spin_unlock_bh(&ul->lock);
137 }
138 
139 static void rt6_uncached_list_del(struct rt6_info *rt)
140 {
141 	if (!list_empty(&rt->rt6i_uncached)) {
142 		struct uncached_list *ul = rt->rt6i_uncached_list;
143 
144 		spin_lock_bh(&ul->lock);
145 		list_del(&rt->rt6i_uncached);
146 		spin_unlock_bh(&ul->lock);
147 	}
148 }
149 
150 static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
151 {
152 	struct net_device *loopback_dev = net->loopback_dev;
153 	int cpu;
154 
155 	if (dev == loopback_dev)
156 		return;
157 
158 	for_each_possible_cpu(cpu) {
159 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
160 		struct rt6_info *rt;
161 
162 		spin_lock_bh(&ul->lock);
163 		list_for_each_entry(rt, &ul->head, rt6i_uncached) {
164 			struct inet6_dev *rt_idev = rt->rt6i_idev;
165 			struct net_device *rt_dev = rt->dst.dev;
166 
167 			if (rt_idev->dev == dev) {
168 				rt->rt6i_idev = in6_dev_get(loopback_dev);
169 				in6_dev_put(rt_idev);
170 			}
171 
172 			if (rt_dev == dev) {
173 				rt->dst.dev = loopback_dev;
174 				dev_hold(rt->dst.dev);
175 				dev_put(rt_dev);
176 			}
177 		}
178 		spin_unlock_bh(&ul->lock);
179 	}
180 }
181 
182 static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
183 {
184 	return dst_metrics_write_ptr(rt->dst.from);
185 }
186 
187 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
188 {
189 	struct rt6_info *rt = (struct rt6_info *)dst;
190 
191 	if (rt->rt6i_flags & RTF_PCPU)
192 		return rt6_pcpu_cow_metrics(rt);
193 	else if (rt->rt6i_flags & RTF_CACHE)
194 		return NULL;
195 	else
196 		return dst_cow_metrics_generic(dst, old);
197 }
198 
199 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
200 					     struct sk_buff *skb,
201 					     const void *daddr)
202 {
203 	struct in6_addr *p = &rt->rt6i_gateway;
204 
205 	if (!ipv6_addr_any(p))
206 		return (const void *) p;
207 	else if (skb)
208 		return &ipv6_hdr(skb)->daddr;
209 	return daddr;
210 }
211 
212 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
213 					  struct sk_buff *skb,
214 					  const void *daddr)
215 {
216 	struct rt6_info *rt = (struct rt6_info *) dst;
217 	struct neighbour *n;
218 
219 	daddr = choose_neigh_daddr(rt, skb, daddr);
220 	n = __ipv6_neigh_lookup(dst->dev, daddr);
221 	if (n)
222 		return n;
223 	return neigh_create(&nd_tbl, daddr, dst->dev);
224 }
225 
226 static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
227 {
228 	struct net_device *dev = dst->dev;
229 	struct rt6_info *rt = (struct rt6_info *)dst;
230 
231 	daddr = choose_neigh_daddr(rt, NULL, daddr);
232 	if (!daddr)
233 		return;
234 	if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
235 		return;
236 	if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
237 		return;
238 	__ipv6_confirm_neigh(dev, daddr);
239 }
240 
241 static struct dst_ops ip6_dst_ops_template = {
242 	.family			=	AF_INET6,
243 	.gc			=	ip6_dst_gc,
244 	.gc_thresh		=	1024,
245 	.check			=	ip6_dst_check,
246 	.default_advmss		=	ip6_default_advmss,
247 	.mtu			=	ip6_mtu,
248 	.cow_metrics		=	ipv6_cow_metrics,
249 	.destroy		=	ip6_dst_destroy,
250 	.ifdown			=	ip6_dst_ifdown,
251 	.negative_advice	=	ip6_negative_advice,
252 	.link_failure		=	ip6_link_failure,
253 	.update_pmtu		=	ip6_rt_update_pmtu,
254 	.redirect		=	rt6_do_redirect,
255 	.local_out		=	__ip6_local_out,
256 	.neigh_lookup		=	ip6_neigh_lookup,
257 	.confirm_neigh		=	ip6_confirm_neigh,
258 };
259 
260 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
261 {
262 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
263 
264 	return mtu ? : dst->dev->mtu;
265 }
266 
267 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
268 					 struct sk_buff *skb, u32 mtu)
269 {
270 }
271 
272 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
273 				      struct sk_buff *skb)
274 {
275 }
276 
277 static struct dst_ops ip6_dst_blackhole_ops = {
278 	.family			=	AF_INET6,
279 	.destroy		=	ip6_dst_destroy,
280 	.check			=	ip6_dst_check,
281 	.mtu			=	ip6_blackhole_mtu,
282 	.default_advmss		=	ip6_default_advmss,
283 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
284 	.redirect		=	ip6_rt_blackhole_redirect,
285 	.cow_metrics		=	dst_cow_metrics_generic,
286 	.neigh_lookup		=	ip6_neigh_lookup,
287 };
288 
289 static const u32 ip6_template_metrics[RTAX_MAX] = {
290 	[RTAX_HOPLIMIT - 1] = 0,
291 };
292 
293 static const struct rt6_info ip6_null_entry_template = {
294 	.dst = {
295 		.__refcnt	= ATOMIC_INIT(1),
296 		.__use		= 1,
297 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
298 		.error		= -ENETUNREACH,
299 		.input		= ip6_pkt_discard,
300 		.output		= ip6_pkt_discard_out,
301 	},
302 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
303 	.rt6i_protocol  = RTPROT_KERNEL,
304 	.rt6i_metric	= ~(u32) 0,
305 	.rt6i_ref	= ATOMIC_INIT(1),
306 };
307 
308 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
309 
310 static const struct rt6_info ip6_prohibit_entry_template = {
311 	.dst = {
312 		.__refcnt	= ATOMIC_INIT(1),
313 		.__use		= 1,
314 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
315 		.error		= -EACCES,
316 		.input		= ip6_pkt_prohibit,
317 		.output		= ip6_pkt_prohibit_out,
318 	},
319 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
320 	.rt6i_protocol  = RTPROT_KERNEL,
321 	.rt6i_metric	= ~(u32) 0,
322 	.rt6i_ref	= ATOMIC_INIT(1),
323 };
324 
325 static const struct rt6_info ip6_blk_hole_entry_template = {
326 	.dst = {
327 		.__refcnt	= ATOMIC_INIT(1),
328 		.__use		= 1,
329 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
330 		.error		= -EINVAL,
331 		.input		= dst_discard,
332 		.output		= dst_discard_out,
333 	},
334 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
335 	.rt6i_protocol  = RTPROT_KERNEL,
336 	.rt6i_metric	= ~(u32) 0,
337 	.rt6i_ref	= ATOMIC_INIT(1),
338 };
339 
340 #endif
341 
342 static void rt6_info_init(struct rt6_info *rt)
343 {
344 	struct dst_entry *dst = &rt->dst;
345 
346 	memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
347 	INIT_LIST_HEAD(&rt->rt6i_siblings);
348 	INIT_LIST_HEAD(&rt->rt6i_uncached);
349 }
350 
351 /* allocate dst with ip6_dst_ops */
352 static struct rt6_info *__ip6_dst_alloc(struct net *net,
353 					struct net_device *dev,
354 					int flags)
355 {
356 	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
357 					0, DST_OBSOLETE_FORCE_CHK, flags);
358 
359 	if (rt)
360 		rt6_info_init(rt);
361 
362 	return rt;
363 }
364 
365 struct rt6_info *ip6_dst_alloc(struct net *net,
366 			       struct net_device *dev,
367 			       int flags)
368 {
369 	struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
370 
371 	if (rt) {
372 		rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
373 		if (rt->rt6i_pcpu) {
374 			int cpu;
375 
376 			for_each_possible_cpu(cpu) {
377 				struct rt6_info **p;
378 
379 				p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
380 				/* no one shares rt */
381 				*p =  NULL;
382 			}
383 		} else {
384 			dst_destroy((struct dst_entry *)rt);
385 			return NULL;
386 		}
387 	}
388 
389 	return rt;
390 }
391 EXPORT_SYMBOL(ip6_dst_alloc);
392 
393 static void ip6_dst_destroy(struct dst_entry *dst)
394 {
395 	struct rt6_info *rt = (struct rt6_info *)dst;
396 	struct dst_entry *from = dst->from;
397 	struct inet6_dev *idev;
398 
399 	dst_destroy_metrics_generic(dst);
400 	free_percpu(rt->rt6i_pcpu);
401 	rt6_uncached_list_del(rt);
402 
403 	idev = rt->rt6i_idev;
404 	if (idev) {
405 		rt->rt6i_idev = NULL;
406 		in6_dev_put(idev);
407 	}
408 
409 	dst->from = NULL;
410 	dst_release(from);
411 }
412 
413 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
414 			   int how)
415 {
416 	struct rt6_info *rt = (struct rt6_info *)dst;
417 	struct inet6_dev *idev = rt->rt6i_idev;
418 	struct net_device *loopback_dev =
419 		dev_net(dev)->loopback_dev;
420 
421 	if (dev != loopback_dev) {
422 		if (idev && idev->dev == dev) {
423 			struct inet6_dev *loopback_idev =
424 				in6_dev_get(loopback_dev);
425 			if (loopback_idev) {
426 				rt->rt6i_idev = loopback_idev;
427 				in6_dev_put(idev);
428 			}
429 		}
430 	}
431 }
432 
433 static bool __rt6_check_expired(const struct rt6_info *rt)
434 {
435 	if (rt->rt6i_flags & RTF_EXPIRES)
436 		return time_after(jiffies, rt->dst.expires);
437 	else
438 		return false;
439 }
440 
441 static bool rt6_check_expired(const struct rt6_info *rt)
442 {
443 	if (rt->rt6i_flags & RTF_EXPIRES) {
444 		if (time_after(jiffies, rt->dst.expires))
445 			return true;
446 	} else if (rt->dst.from) {
447 		return rt6_check_expired((struct rt6_info *) rt->dst.from);
448 	}
449 	return false;
450 }
451 
452 /* Multipath route selection:
453  *   Hash based function using packet header and flowlabel.
454  * Adapted from fib_info_hashfn()
455  */
456 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
457 			       const struct flowi6 *fl6)
458 {
459 	return get_hash_from_flowi6(fl6) % candidate_count;
460 }
461 
462 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
463 					     struct flowi6 *fl6, int oif,
464 					     int strict)
465 {
466 	struct rt6_info *sibling, *next_sibling;
467 	int route_choosen;
468 
469 	route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
470 	/* Don't change the route, if route_choosen == 0
471 	 * (siblings does not include ourself)
472 	 */
473 	if (route_choosen)
474 		list_for_each_entry_safe(sibling, next_sibling,
475 				&match->rt6i_siblings, rt6i_siblings) {
476 			route_choosen--;
477 			if (route_choosen == 0) {
478 				if (rt6_score_route(sibling, oif, strict) < 0)
479 					break;
480 				match = sibling;
481 				break;
482 			}
483 		}
484 	return match;
485 }
486 
487 /*
488  *	Route lookup. Any table->tb6_lock is implied.
489  */
490 
491 static inline struct rt6_info *rt6_device_match(struct net *net,
492 						    struct rt6_info *rt,
493 						    const struct in6_addr *saddr,
494 						    int oif,
495 						    int flags)
496 {
497 	struct rt6_info *local = NULL;
498 	struct rt6_info *sprt;
499 
500 	if (!oif && ipv6_addr_any(saddr))
501 		goto out;
502 
503 	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
504 		struct net_device *dev = sprt->dst.dev;
505 
506 		if (oif) {
507 			if (dev->ifindex == oif)
508 				return sprt;
509 			if (dev->flags & IFF_LOOPBACK) {
510 				if (!sprt->rt6i_idev ||
511 				    sprt->rt6i_idev->dev->ifindex != oif) {
512 					if (flags & RT6_LOOKUP_F_IFACE)
513 						continue;
514 					if (local &&
515 					    local->rt6i_idev->dev->ifindex == oif)
516 						continue;
517 				}
518 				local = sprt;
519 			}
520 		} else {
521 			if (ipv6_chk_addr(net, saddr, dev,
522 					  flags & RT6_LOOKUP_F_IFACE))
523 				return sprt;
524 		}
525 	}
526 
527 	if (oif) {
528 		if (local)
529 			return local;
530 
531 		if (flags & RT6_LOOKUP_F_IFACE)
532 			return net->ipv6.ip6_null_entry;
533 	}
534 out:
535 	return rt;
536 }
537 
538 #ifdef CONFIG_IPV6_ROUTER_PREF
539 struct __rt6_probe_work {
540 	struct work_struct work;
541 	struct in6_addr target;
542 	struct net_device *dev;
543 };
544 
545 static void rt6_probe_deferred(struct work_struct *w)
546 {
547 	struct in6_addr mcaddr;
548 	struct __rt6_probe_work *work =
549 		container_of(w, struct __rt6_probe_work, work);
550 
551 	addrconf_addr_solict_mult(&work->target, &mcaddr);
552 	ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
553 	dev_put(work->dev);
554 	kfree(work);
555 }
556 
557 static void rt6_probe(struct rt6_info *rt)
558 {
559 	struct __rt6_probe_work *work;
560 	struct neighbour *neigh;
561 	/*
562 	 * Okay, this does not seem to be appropriate
563 	 * for now, however, we need to check if it
564 	 * is really so; aka Router Reachability Probing.
565 	 *
566 	 * Router Reachability Probe MUST be rate-limited
567 	 * to no more than one per minute.
568 	 */
569 	if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
570 		return;
571 	rcu_read_lock_bh();
572 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
573 	if (neigh) {
574 		if (neigh->nud_state & NUD_VALID)
575 			goto out;
576 
577 		work = NULL;
578 		write_lock(&neigh->lock);
579 		if (!(neigh->nud_state & NUD_VALID) &&
580 		    time_after(jiffies,
581 			       neigh->updated +
582 			       rt->rt6i_idev->cnf.rtr_probe_interval)) {
583 			work = kmalloc(sizeof(*work), GFP_ATOMIC);
584 			if (work)
585 				__neigh_set_probe_once(neigh);
586 		}
587 		write_unlock(&neigh->lock);
588 	} else {
589 		work = kmalloc(sizeof(*work), GFP_ATOMIC);
590 	}
591 
592 	if (work) {
593 		INIT_WORK(&work->work, rt6_probe_deferred);
594 		work->target = rt->rt6i_gateway;
595 		dev_hold(rt->dst.dev);
596 		work->dev = rt->dst.dev;
597 		schedule_work(&work->work);
598 	}
599 
600 out:
601 	rcu_read_unlock_bh();
602 }
603 #else
604 static inline void rt6_probe(struct rt6_info *rt)
605 {
606 }
607 #endif
608 
609 /*
610  * Default Router Selection (RFC 2461 6.3.6)
611  */
612 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
613 {
614 	struct net_device *dev = rt->dst.dev;
615 	if (!oif || dev->ifindex == oif)
616 		return 2;
617 	if ((dev->flags & IFF_LOOPBACK) &&
618 	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
619 		return 1;
620 	return 0;
621 }
622 
623 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
624 {
625 	struct neighbour *neigh;
626 	enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
627 
628 	if (rt->rt6i_flags & RTF_NONEXTHOP ||
629 	    !(rt->rt6i_flags & RTF_GATEWAY))
630 		return RT6_NUD_SUCCEED;
631 
632 	rcu_read_lock_bh();
633 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
634 	if (neigh) {
635 		read_lock(&neigh->lock);
636 		if (neigh->nud_state & NUD_VALID)
637 			ret = RT6_NUD_SUCCEED;
638 #ifdef CONFIG_IPV6_ROUTER_PREF
639 		else if (!(neigh->nud_state & NUD_FAILED))
640 			ret = RT6_NUD_SUCCEED;
641 		else
642 			ret = RT6_NUD_FAIL_PROBE;
643 #endif
644 		read_unlock(&neigh->lock);
645 	} else {
646 		ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
647 		      RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
648 	}
649 	rcu_read_unlock_bh();
650 
651 	return ret;
652 }
653 
654 static int rt6_score_route(struct rt6_info *rt, int oif,
655 			   int strict)
656 {
657 	int m;
658 
659 	m = rt6_check_dev(rt, oif);
660 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
661 		return RT6_NUD_FAIL_HARD;
662 #ifdef CONFIG_IPV6_ROUTER_PREF
663 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
664 #endif
665 	if (strict & RT6_LOOKUP_F_REACHABLE) {
666 		int n = rt6_check_neigh(rt);
667 		if (n < 0)
668 			return n;
669 	}
670 	return m;
671 }
672 
673 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
674 				   int *mpri, struct rt6_info *match,
675 				   bool *do_rr)
676 {
677 	int m;
678 	bool match_do_rr = false;
679 	struct inet6_dev *idev = rt->rt6i_idev;
680 	struct net_device *dev = rt->dst.dev;
681 
682 	if (dev && !netif_carrier_ok(dev) &&
683 	    idev->cnf.ignore_routes_with_linkdown &&
684 	    !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
685 		goto out;
686 
687 	if (rt6_check_expired(rt))
688 		goto out;
689 
690 	m = rt6_score_route(rt, oif, strict);
691 	if (m == RT6_NUD_FAIL_DO_RR) {
692 		match_do_rr = true;
693 		m = 0; /* lowest valid score */
694 	} else if (m == RT6_NUD_FAIL_HARD) {
695 		goto out;
696 	}
697 
698 	if (strict & RT6_LOOKUP_F_REACHABLE)
699 		rt6_probe(rt);
700 
701 	/* note that m can be RT6_NUD_FAIL_PROBE at this point */
702 	if (m > *mpri) {
703 		*do_rr = match_do_rr;
704 		*mpri = m;
705 		match = rt;
706 	}
707 out:
708 	return match;
709 }
710 
711 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
712 				     struct rt6_info *rr_head,
713 				     u32 metric, int oif, int strict,
714 				     bool *do_rr)
715 {
716 	struct rt6_info *rt, *match, *cont;
717 	int mpri = -1;
718 
719 	match = NULL;
720 	cont = NULL;
721 	for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
722 		if (rt->rt6i_metric != metric) {
723 			cont = rt;
724 			break;
725 		}
726 
727 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
728 	}
729 
730 	for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
731 		if (rt->rt6i_metric != metric) {
732 			cont = rt;
733 			break;
734 		}
735 
736 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
737 	}
738 
739 	if (match || !cont)
740 		return match;
741 
742 	for (rt = cont; rt; rt = rt->dst.rt6_next)
743 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
744 
745 	return match;
746 }
747 
748 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
749 {
750 	struct rt6_info *match, *rt0;
751 	struct net *net;
752 	bool do_rr = false;
753 
754 	rt0 = fn->rr_ptr;
755 	if (!rt0)
756 		fn->rr_ptr = rt0 = fn->leaf;
757 
758 	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
759 			     &do_rr);
760 
761 	if (do_rr) {
762 		struct rt6_info *next = rt0->dst.rt6_next;
763 
764 		/* no entries matched; do round-robin */
765 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
766 			next = fn->leaf;
767 
768 		if (next != rt0)
769 			fn->rr_ptr = next;
770 	}
771 
772 	net = dev_net(rt0->dst.dev);
773 	return match ? match : net->ipv6.ip6_null_entry;
774 }
775 
776 static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
777 {
778 	return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
779 }
780 
781 #ifdef CONFIG_IPV6_ROUTE_INFO
782 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
783 		  const struct in6_addr *gwaddr)
784 {
785 	struct net *net = dev_net(dev);
786 	struct route_info *rinfo = (struct route_info *) opt;
787 	struct in6_addr prefix_buf, *prefix;
788 	unsigned int pref;
789 	unsigned long lifetime;
790 	struct rt6_info *rt;
791 
792 	if (len < sizeof(struct route_info)) {
793 		return -EINVAL;
794 	}
795 
796 	/* Sanity check for prefix_len and length */
797 	if (rinfo->length > 3) {
798 		return -EINVAL;
799 	} else if (rinfo->prefix_len > 128) {
800 		return -EINVAL;
801 	} else if (rinfo->prefix_len > 64) {
802 		if (rinfo->length < 2) {
803 			return -EINVAL;
804 		}
805 	} else if (rinfo->prefix_len > 0) {
806 		if (rinfo->length < 1) {
807 			return -EINVAL;
808 		}
809 	}
810 
811 	pref = rinfo->route_pref;
812 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
813 		return -EINVAL;
814 
815 	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
816 
817 	if (rinfo->length == 3)
818 		prefix = (struct in6_addr *)rinfo->prefix;
819 	else {
820 		/* this function is safe */
821 		ipv6_addr_prefix(&prefix_buf,
822 				 (struct in6_addr *)rinfo->prefix,
823 				 rinfo->prefix_len);
824 		prefix = &prefix_buf;
825 	}
826 
827 	if (rinfo->prefix_len == 0)
828 		rt = rt6_get_dflt_router(gwaddr, dev);
829 	else
830 		rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
831 					gwaddr, dev);
832 
833 	if (rt && !lifetime) {
834 		ip6_del_rt(rt);
835 		rt = NULL;
836 	}
837 
838 	if (!rt && lifetime)
839 		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
840 					dev, pref);
841 	else if (rt)
842 		rt->rt6i_flags = RTF_ROUTEINFO |
843 				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
844 
845 	if (rt) {
846 		if (!addrconf_finite_timeout(lifetime))
847 			rt6_clean_expires(rt);
848 		else
849 			rt6_set_expires(rt, jiffies + HZ * lifetime);
850 
851 		ip6_rt_put(rt);
852 	}
853 	return 0;
854 }
855 #endif
856 
857 static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
858 					struct in6_addr *saddr)
859 {
860 	struct fib6_node *pn;
861 	while (1) {
862 		if (fn->fn_flags & RTN_TL_ROOT)
863 			return NULL;
864 		pn = fn->parent;
865 		if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
866 			fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
867 		else
868 			fn = pn;
869 		if (fn->fn_flags & RTN_RTINFO)
870 			return fn;
871 	}
872 }
873 
874 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
875 					     struct fib6_table *table,
876 					     struct flowi6 *fl6, int flags)
877 {
878 	struct fib6_node *fn;
879 	struct rt6_info *rt;
880 
881 	read_lock_bh(&table->tb6_lock);
882 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
883 restart:
884 	rt = fn->leaf;
885 	rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
886 	if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
887 		rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
888 	if (rt == net->ipv6.ip6_null_entry) {
889 		fn = fib6_backtrack(fn, &fl6->saddr);
890 		if (fn)
891 			goto restart;
892 	}
893 	dst_use(&rt->dst, jiffies);
894 	read_unlock_bh(&table->tb6_lock);
895 
896 	trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
897 
898 	return rt;
899 
900 }
901 
902 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
903 				    int flags)
904 {
905 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
906 }
907 EXPORT_SYMBOL_GPL(ip6_route_lookup);
908 
909 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
910 			    const struct in6_addr *saddr, int oif, int strict)
911 {
912 	struct flowi6 fl6 = {
913 		.flowi6_oif = oif,
914 		.daddr = *daddr,
915 	};
916 	struct dst_entry *dst;
917 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
918 
919 	if (saddr) {
920 		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
921 		flags |= RT6_LOOKUP_F_HAS_SADDR;
922 	}
923 
924 	dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
925 	if (dst->error == 0)
926 		return (struct rt6_info *) dst;
927 
928 	dst_release(dst);
929 
930 	return NULL;
931 }
932 EXPORT_SYMBOL(rt6_lookup);
933 
934 /* ip6_ins_rt is called with FREE table->tb6_lock.
935    It takes new route entry, the addition fails by any reason the
936    route is freed. In any case, if caller does not hold it, it may
937    be destroyed.
938  */
939 
940 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
941 			struct mx6_config *mxc)
942 {
943 	int err;
944 	struct fib6_table *table;
945 
946 	table = rt->rt6i_table;
947 	write_lock_bh(&table->tb6_lock);
948 	err = fib6_add(&table->tb6_root, rt, info, mxc);
949 	write_unlock_bh(&table->tb6_lock);
950 
951 	return err;
952 }
953 
954 int ip6_ins_rt(struct rt6_info *rt)
955 {
956 	struct nl_info info = {	.nl_net = dev_net(rt->dst.dev), };
957 	struct mx6_config mxc = { .mx = NULL, };
958 
959 	return __ip6_ins_rt(rt, &info, &mxc);
960 }
961 
962 static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
963 					   const struct in6_addr *daddr,
964 					   const struct in6_addr *saddr)
965 {
966 	struct rt6_info *rt;
967 
968 	/*
969 	 *	Clone the route.
970 	 */
971 
972 	if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
973 		ort = (struct rt6_info *)ort->dst.from;
974 
975 	rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0);
976 
977 	if (!rt)
978 		return NULL;
979 
980 	ip6_rt_copy_init(rt, ort);
981 	rt->rt6i_flags |= RTF_CACHE;
982 	rt->rt6i_metric = 0;
983 	rt->dst.flags |= DST_HOST;
984 	rt->rt6i_dst.addr = *daddr;
985 	rt->rt6i_dst.plen = 128;
986 
987 	if (!rt6_is_gw_or_nonexthop(ort)) {
988 		if (ort->rt6i_dst.plen != 128 &&
989 		    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
990 			rt->rt6i_flags |= RTF_ANYCAST;
991 #ifdef CONFIG_IPV6_SUBTREES
992 		if (rt->rt6i_src.plen && saddr) {
993 			rt->rt6i_src.addr = *saddr;
994 			rt->rt6i_src.plen = 128;
995 		}
996 #endif
997 	}
998 
999 	return rt;
1000 }
1001 
1002 static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
1003 {
1004 	struct rt6_info *pcpu_rt;
1005 
1006 	pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
1007 				  rt->dst.dev, rt->dst.flags);
1008 
1009 	if (!pcpu_rt)
1010 		return NULL;
1011 	ip6_rt_copy_init(pcpu_rt, rt);
1012 	pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
1013 	pcpu_rt->rt6i_flags |= RTF_PCPU;
1014 	return pcpu_rt;
1015 }
1016 
1017 /* It should be called with read_lock_bh(&tb6_lock) acquired */
1018 static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
1019 {
1020 	struct rt6_info *pcpu_rt, **p;
1021 
1022 	p = this_cpu_ptr(rt->rt6i_pcpu);
1023 	pcpu_rt = *p;
1024 
1025 	if (pcpu_rt) {
1026 		dst_hold(&pcpu_rt->dst);
1027 		rt6_dst_from_metrics_check(pcpu_rt);
1028 	}
1029 	return pcpu_rt;
1030 }
1031 
1032 static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
1033 {
1034 	struct fib6_table *table = rt->rt6i_table;
1035 	struct rt6_info *pcpu_rt, *prev, **p;
1036 
1037 	pcpu_rt = ip6_rt_pcpu_alloc(rt);
1038 	if (!pcpu_rt) {
1039 		struct net *net = dev_net(rt->dst.dev);
1040 
1041 		dst_hold(&net->ipv6.ip6_null_entry->dst);
1042 		return net->ipv6.ip6_null_entry;
1043 	}
1044 
1045 	read_lock_bh(&table->tb6_lock);
1046 	if (rt->rt6i_pcpu) {
1047 		p = this_cpu_ptr(rt->rt6i_pcpu);
1048 		prev = cmpxchg(p, NULL, pcpu_rt);
1049 		if (prev) {
1050 			/* If someone did it before us, return prev instead */
1051 			dst_destroy(&pcpu_rt->dst);
1052 			pcpu_rt = prev;
1053 		}
1054 	} else {
1055 		/* rt has been removed from the fib6 tree
1056 		 * before we have a chance to acquire the read_lock.
1057 		 * In this case, don't brother to create a pcpu rt
1058 		 * since rt is going away anyway.  The next
1059 		 * dst_check() will trigger a re-lookup.
1060 		 */
1061 		dst_destroy(&pcpu_rt->dst);
1062 		pcpu_rt = rt;
1063 	}
1064 	dst_hold(&pcpu_rt->dst);
1065 	rt6_dst_from_metrics_check(pcpu_rt);
1066 	read_unlock_bh(&table->tb6_lock);
1067 	return pcpu_rt;
1068 }
1069 
1070 struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1071 			       int oif, struct flowi6 *fl6, int flags)
1072 {
1073 	struct fib6_node *fn, *saved_fn;
1074 	struct rt6_info *rt;
1075 	int strict = 0;
1076 
1077 	strict |= flags & RT6_LOOKUP_F_IFACE;
1078 	strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
1079 	if (net->ipv6.devconf_all->forwarding == 0)
1080 		strict |= RT6_LOOKUP_F_REACHABLE;
1081 
1082 	read_lock_bh(&table->tb6_lock);
1083 
1084 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1085 	saved_fn = fn;
1086 
1087 	if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1088 		oif = 0;
1089 
1090 redo_rt6_select:
1091 	rt = rt6_select(fn, oif, strict);
1092 	if (rt->rt6i_nsiblings)
1093 		rt = rt6_multipath_select(rt, fl6, oif, strict);
1094 	if (rt == net->ipv6.ip6_null_entry) {
1095 		fn = fib6_backtrack(fn, &fl6->saddr);
1096 		if (fn)
1097 			goto redo_rt6_select;
1098 		else if (strict & RT6_LOOKUP_F_REACHABLE) {
1099 			/* also consider unreachable route */
1100 			strict &= ~RT6_LOOKUP_F_REACHABLE;
1101 			fn = saved_fn;
1102 			goto redo_rt6_select;
1103 		}
1104 	}
1105 
1106 
1107 	if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
1108 		dst_use(&rt->dst, jiffies);
1109 		read_unlock_bh(&table->tb6_lock);
1110 
1111 		rt6_dst_from_metrics_check(rt);
1112 
1113 		trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
1114 		return rt;
1115 	} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1116 			    !(rt->rt6i_flags & RTF_GATEWAY))) {
1117 		/* Create a RTF_CACHE clone which will not be
1118 		 * owned by the fib6 tree.  It is for the special case where
1119 		 * the daddr in the skb during the neighbor look-up is different
1120 		 * from the fl6->daddr used to look-up route here.
1121 		 */
1122 
1123 		struct rt6_info *uncached_rt;
1124 
1125 		dst_use(&rt->dst, jiffies);
1126 		read_unlock_bh(&table->tb6_lock);
1127 
1128 		uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1129 		dst_release(&rt->dst);
1130 
1131 		if (uncached_rt)
1132 			rt6_uncached_list_add(uncached_rt);
1133 		else
1134 			uncached_rt = net->ipv6.ip6_null_entry;
1135 
1136 		dst_hold(&uncached_rt->dst);
1137 
1138 		trace_fib6_table_lookup(net, uncached_rt, table->tb6_id, fl6);
1139 		return uncached_rt;
1140 
1141 	} else {
1142 		/* Get a percpu copy */
1143 
1144 		struct rt6_info *pcpu_rt;
1145 
1146 		rt->dst.lastuse = jiffies;
1147 		rt->dst.__use++;
1148 		pcpu_rt = rt6_get_pcpu_route(rt);
1149 
1150 		if (pcpu_rt) {
1151 			read_unlock_bh(&table->tb6_lock);
1152 		} else {
1153 			/* We have to do the read_unlock first
1154 			 * because rt6_make_pcpu_route() may trigger
1155 			 * ip6_dst_gc() which will take the write_lock.
1156 			 */
1157 			dst_hold(&rt->dst);
1158 			read_unlock_bh(&table->tb6_lock);
1159 			pcpu_rt = rt6_make_pcpu_route(rt);
1160 			dst_release(&rt->dst);
1161 		}
1162 
1163 		trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6);
1164 		return pcpu_rt;
1165 
1166 	}
1167 }
1168 EXPORT_SYMBOL_GPL(ip6_pol_route);
1169 
1170 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
1171 					    struct flowi6 *fl6, int flags)
1172 {
1173 	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
1174 }
1175 
1176 struct dst_entry *ip6_route_input_lookup(struct net *net,
1177 					 struct net_device *dev,
1178 					 struct flowi6 *fl6, int flags)
1179 {
1180 	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1181 		flags |= RT6_LOOKUP_F_IFACE;
1182 
1183 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1184 }
1185 EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
1186 
1187 void ip6_route_input(struct sk_buff *skb)
1188 {
1189 	const struct ipv6hdr *iph = ipv6_hdr(skb);
1190 	struct net *net = dev_net(skb->dev);
1191 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1192 	struct ip_tunnel_info *tun_info;
1193 	struct flowi6 fl6 = {
1194 		.flowi6_iif = skb->dev->ifindex,
1195 		.daddr = iph->daddr,
1196 		.saddr = iph->saddr,
1197 		.flowlabel = ip6_flowinfo(iph),
1198 		.flowi6_mark = skb->mark,
1199 		.flowi6_proto = iph->nexthdr,
1200 	};
1201 
1202 	tun_info = skb_tunnel_info(skb);
1203 	if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
1204 		fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
1205 	skb_dst_drop(skb);
1206 	skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1207 }
1208 
1209 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1210 					     struct flowi6 *fl6, int flags)
1211 {
1212 	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1213 }
1214 
1215 struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
1216 					 struct flowi6 *fl6, int flags)
1217 {
1218 	bool any_src;
1219 
1220 	if (rt6_need_strict(&fl6->daddr)) {
1221 		struct dst_entry *dst;
1222 
1223 		dst = l3mdev_link_scope_lookup(net, fl6);
1224 		if (dst)
1225 			return dst;
1226 	}
1227 
1228 	fl6->flowi6_iif = LOOPBACK_IFINDEX;
1229 
1230 	any_src = ipv6_addr_any(&fl6->saddr);
1231 	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
1232 	    (fl6->flowi6_oif && any_src))
1233 		flags |= RT6_LOOKUP_F_IFACE;
1234 
1235 	if (!any_src)
1236 		flags |= RT6_LOOKUP_F_HAS_SADDR;
1237 	else if (sk)
1238 		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1239 
1240 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1241 }
1242 EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1243 
1244 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1245 {
1246 	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1247 	struct dst_entry *new = NULL;
1248 
1249 	rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1250 	if (rt) {
1251 		rt6_info_init(rt);
1252 
1253 		new = &rt->dst;
1254 		new->__use = 1;
1255 		new->input = dst_discard;
1256 		new->output = dst_discard_out;
1257 
1258 		dst_copy_metrics(new, &ort->dst);
1259 		rt->rt6i_idev = ort->rt6i_idev;
1260 		if (rt->rt6i_idev)
1261 			in6_dev_hold(rt->rt6i_idev);
1262 
1263 		rt->rt6i_gateway = ort->rt6i_gateway;
1264 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
1265 		rt->rt6i_metric = 0;
1266 
1267 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1268 #ifdef CONFIG_IPV6_SUBTREES
1269 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1270 #endif
1271 
1272 		dst_free(new);
1273 	}
1274 
1275 	dst_release(dst_orig);
1276 	return new ? new : ERR_PTR(-ENOMEM);
1277 }
1278 
1279 /*
1280  *	Destination cache support functions
1281  */
1282 
1283 static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1284 {
1285 	if (rt->dst.from &&
1286 	    dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1287 		dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1288 }
1289 
1290 static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1291 {
1292 	if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1293 		return NULL;
1294 
1295 	if (rt6_check_expired(rt))
1296 		return NULL;
1297 
1298 	return &rt->dst;
1299 }
1300 
1301 static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1302 {
1303 	if (!__rt6_check_expired(rt) &&
1304 	    rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1305 	    rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1306 		return &rt->dst;
1307 	else
1308 		return NULL;
1309 }
1310 
1311 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1312 {
1313 	struct rt6_info *rt;
1314 
1315 	rt = (struct rt6_info *) dst;
1316 
1317 	/* All IPV6 dsts are created with ->obsolete set to the value
1318 	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1319 	 * into this function always.
1320 	 */
1321 
1322 	rt6_dst_from_metrics_check(rt);
1323 
1324 	if (rt->rt6i_flags & RTF_PCPU ||
1325 	    (unlikely(dst->flags & DST_NOCACHE) && rt->dst.from))
1326 		return rt6_dst_from_check(rt, cookie);
1327 	else
1328 		return rt6_check(rt, cookie);
1329 }
1330 
1331 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1332 {
1333 	struct rt6_info *rt = (struct rt6_info *) dst;
1334 
1335 	if (rt) {
1336 		if (rt->rt6i_flags & RTF_CACHE) {
1337 			if (rt6_check_expired(rt)) {
1338 				ip6_del_rt(rt);
1339 				dst = NULL;
1340 			}
1341 		} else {
1342 			dst_release(dst);
1343 			dst = NULL;
1344 		}
1345 	}
1346 	return dst;
1347 }
1348 
1349 static void ip6_link_failure(struct sk_buff *skb)
1350 {
1351 	struct rt6_info *rt;
1352 
1353 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1354 
1355 	rt = (struct rt6_info *) skb_dst(skb);
1356 	if (rt) {
1357 		if (rt->rt6i_flags & RTF_CACHE) {
1358 			dst_hold(&rt->dst);
1359 			ip6_del_rt(rt);
1360 		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1361 			rt->rt6i_node->fn_sernum = -1;
1362 		}
1363 	}
1364 }
1365 
1366 static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1367 {
1368 	struct net *net = dev_net(rt->dst.dev);
1369 
1370 	rt->rt6i_flags |= RTF_MODIFIED;
1371 	rt->rt6i_pmtu = mtu;
1372 	rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1373 }
1374 
1375 static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
1376 {
1377 	return !(rt->rt6i_flags & RTF_CACHE) &&
1378 		(rt->rt6i_flags & RTF_PCPU || rt->rt6i_node);
1379 }
1380 
1381 static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1382 				 const struct ipv6hdr *iph, u32 mtu)
1383 {
1384 	const struct in6_addr *daddr, *saddr;
1385 	struct rt6_info *rt6 = (struct rt6_info *)dst;
1386 
1387 	if (rt6->rt6i_flags & RTF_LOCAL)
1388 		return;
1389 
1390 	if (dst_metric_locked(dst, RTAX_MTU))
1391 		return;
1392 
1393 	if (iph) {
1394 		daddr = &iph->daddr;
1395 		saddr = &iph->saddr;
1396 	} else if (sk) {
1397 		daddr = &sk->sk_v6_daddr;
1398 		saddr = &inet6_sk(sk)->saddr;
1399 	} else {
1400 		daddr = NULL;
1401 		saddr = NULL;
1402 	}
1403 	dst_confirm_neigh(dst, daddr);
1404 	mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1405 	if (mtu >= dst_mtu(dst))
1406 		return;
1407 
1408 	if (!rt6_cache_allowed_for_pmtu(rt6)) {
1409 		rt6_do_update_pmtu(rt6, mtu);
1410 	} else if (daddr) {
1411 		struct rt6_info *nrt6;
1412 
1413 		nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1414 		if (nrt6) {
1415 			rt6_do_update_pmtu(nrt6, mtu);
1416 
1417 			/* ip6_ins_rt(nrt6) will bump the
1418 			 * rt6->rt6i_node->fn_sernum
1419 			 * which will fail the next rt6_check() and
1420 			 * invalidate the sk->sk_dst_cache.
1421 			 */
1422 			ip6_ins_rt(nrt6);
1423 		}
1424 	}
1425 }
1426 
1427 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1428 			       struct sk_buff *skb, u32 mtu)
1429 {
1430 	__ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
1431 }
1432 
1433 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1434 		     int oif, u32 mark, kuid_t uid)
1435 {
1436 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1437 	struct dst_entry *dst;
1438 	struct flowi6 fl6;
1439 
1440 	memset(&fl6, 0, sizeof(fl6));
1441 	fl6.flowi6_oif = oif;
1442 	fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
1443 	fl6.daddr = iph->daddr;
1444 	fl6.saddr = iph->saddr;
1445 	fl6.flowlabel = ip6_flowinfo(iph);
1446 	fl6.flowi6_uid = uid;
1447 
1448 	dst = ip6_route_output(net, NULL, &fl6);
1449 	if (!dst->error)
1450 		__ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
1451 	dst_release(dst);
1452 }
1453 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1454 
1455 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1456 {
1457 	struct dst_entry *dst;
1458 
1459 	ip6_update_pmtu(skb, sock_net(sk), mtu,
1460 			sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid);
1461 
1462 	dst = __sk_dst_get(sk);
1463 	if (!dst || !dst->obsolete ||
1464 	    dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
1465 		return;
1466 
1467 	bh_lock_sock(sk);
1468 	if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
1469 		ip6_datagram_dst_update(sk, false);
1470 	bh_unlock_sock(sk);
1471 }
1472 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1473 
1474 /* Handle redirects */
1475 struct ip6rd_flowi {
1476 	struct flowi6 fl6;
1477 	struct in6_addr gateway;
1478 };
1479 
1480 static struct rt6_info *__ip6_route_redirect(struct net *net,
1481 					     struct fib6_table *table,
1482 					     struct flowi6 *fl6,
1483 					     int flags)
1484 {
1485 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1486 	struct rt6_info *rt;
1487 	struct fib6_node *fn;
1488 
1489 	/* Get the "current" route for this destination and
1490 	 * check if the redirect has come from appropriate router.
1491 	 *
1492 	 * RFC 4861 specifies that redirects should only be
1493 	 * accepted if they come from the nexthop to the target.
1494 	 * Due to the way the routes are chosen, this notion
1495 	 * is a bit fuzzy and one might need to check all possible
1496 	 * routes.
1497 	 */
1498 
1499 	read_lock_bh(&table->tb6_lock);
1500 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1501 restart:
1502 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1503 		if (rt6_check_expired(rt))
1504 			continue;
1505 		if (rt->dst.error)
1506 			break;
1507 		if (!(rt->rt6i_flags & RTF_GATEWAY))
1508 			continue;
1509 		if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1510 			continue;
1511 		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1512 			continue;
1513 		break;
1514 	}
1515 
1516 	if (!rt)
1517 		rt = net->ipv6.ip6_null_entry;
1518 	else if (rt->dst.error) {
1519 		rt = net->ipv6.ip6_null_entry;
1520 		goto out;
1521 	}
1522 
1523 	if (rt == net->ipv6.ip6_null_entry) {
1524 		fn = fib6_backtrack(fn, &fl6->saddr);
1525 		if (fn)
1526 			goto restart;
1527 	}
1528 
1529 out:
1530 	dst_hold(&rt->dst);
1531 
1532 	read_unlock_bh(&table->tb6_lock);
1533 
1534 	trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
1535 	return rt;
1536 };
1537 
1538 static struct dst_entry *ip6_route_redirect(struct net *net,
1539 					const struct flowi6 *fl6,
1540 					const struct in6_addr *gateway)
1541 {
1542 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1543 	struct ip6rd_flowi rdfl;
1544 
1545 	rdfl.fl6 = *fl6;
1546 	rdfl.gateway = *gateway;
1547 
1548 	return fib6_rule_lookup(net, &rdfl.fl6,
1549 				flags, __ip6_route_redirect);
1550 }
1551 
1552 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
1553 		  kuid_t uid)
1554 {
1555 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1556 	struct dst_entry *dst;
1557 	struct flowi6 fl6;
1558 
1559 	memset(&fl6, 0, sizeof(fl6));
1560 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
1561 	fl6.flowi6_oif = oif;
1562 	fl6.flowi6_mark = mark;
1563 	fl6.daddr = iph->daddr;
1564 	fl6.saddr = iph->saddr;
1565 	fl6.flowlabel = ip6_flowinfo(iph);
1566 	fl6.flowi6_uid = uid;
1567 
1568 	dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1569 	rt6_do_redirect(dst, NULL, skb);
1570 	dst_release(dst);
1571 }
1572 EXPORT_SYMBOL_GPL(ip6_redirect);
1573 
1574 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1575 			    u32 mark)
1576 {
1577 	const struct ipv6hdr *iph = ipv6_hdr(skb);
1578 	const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1579 	struct dst_entry *dst;
1580 	struct flowi6 fl6;
1581 
1582 	memset(&fl6, 0, sizeof(fl6));
1583 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
1584 	fl6.flowi6_oif = oif;
1585 	fl6.flowi6_mark = mark;
1586 	fl6.daddr = msg->dest;
1587 	fl6.saddr = iph->daddr;
1588 	fl6.flowi6_uid = sock_net_uid(net, NULL);
1589 
1590 	dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1591 	rt6_do_redirect(dst, NULL, skb);
1592 	dst_release(dst);
1593 }
1594 
1595 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1596 {
1597 	ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
1598 		     sk->sk_uid);
1599 }
1600 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1601 
1602 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1603 {
1604 	struct net_device *dev = dst->dev;
1605 	unsigned int mtu = dst_mtu(dst);
1606 	struct net *net = dev_net(dev);
1607 
1608 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1609 
1610 	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1611 		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1612 
1613 	/*
1614 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1615 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1616 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
1617 	 * rely only on pmtu discovery"
1618 	 */
1619 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1620 		mtu = IPV6_MAXPLEN;
1621 	return mtu;
1622 }
1623 
1624 static unsigned int ip6_mtu(const struct dst_entry *dst)
1625 {
1626 	const struct rt6_info *rt = (const struct rt6_info *)dst;
1627 	unsigned int mtu = rt->rt6i_pmtu;
1628 	struct inet6_dev *idev;
1629 
1630 	if (mtu)
1631 		goto out;
1632 
1633 	mtu = dst_metric_raw(dst, RTAX_MTU);
1634 	if (mtu)
1635 		goto out;
1636 
1637 	mtu = IPV6_MIN_MTU;
1638 
1639 	rcu_read_lock();
1640 	idev = __in6_dev_get(dst->dev);
1641 	if (idev)
1642 		mtu = idev->cnf.mtu6;
1643 	rcu_read_unlock();
1644 
1645 out:
1646 	mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1647 
1648 	return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
1649 }
1650 
1651 static struct dst_entry *icmp6_dst_gc_list;
1652 static DEFINE_SPINLOCK(icmp6_dst_lock);
1653 
1654 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1655 				  struct flowi6 *fl6)
1656 {
1657 	struct dst_entry *dst;
1658 	struct rt6_info *rt;
1659 	struct inet6_dev *idev = in6_dev_get(dev);
1660 	struct net *net = dev_net(dev);
1661 
1662 	if (unlikely(!idev))
1663 		return ERR_PTR(-ENODEV);
1664 
1665 	rt = ip6_dst_alloc(net, dev, 0);
1666 	if (unlikely(!rt)) {
1667 		in6_dev_put(idev);
1668 		dst = ERR_PTR(-ENOMEM);
1669 		goto out;
1670 	}
1671 
1672 	rt->dst.flags |= DST_HOST;
1673 	rt->dst.output  = ip6_output;
1674 	atomic_set(&rt->dst.__refcnt, 1);
1675 	rt->rt6i_gateway  = fl6->daddr;
1676 	rt->rt6i_dst.addr = fl6->daddr;
1677 	rt->rt6i_dst.plen = 128;
1678 	rt->rt6i_idev     = idev;
1679 	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1680 
1681 	spin_lock_bh(&icmp6_dst_lock);
1682 	rt->dst.next = icmp6_dst_gc_list;
1683 	icmp6_dst_gc_list = &rt->dst;
1684 	spin_unlock_bh(&icmp6_dst_lock);
1685 
1686 	fib6_force_start_gc(net);
1687 
1688 	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1689 
1690 out:
1691 	return dst;
1692 }
1693 
1694 int icmp6_dst_gc(void)
1695 {
1696 	struct dst_entry *dst, **pprev;
1697 	int more = 0;
1698 
1699 	spin_lock_bh(&icmp6_dst_lock);
1700 	pprev = &icmp6_dst_gc_list;
1701 
1702 	while ((dst = *pprev) != NULL) {
1703 		if (!atomic_read(&dst->__refcnt)) {
1704 			*pprev = dst->next;
1705 			dst_free(dst);
1706 		} else {
1707 			pprev = &dst->next;
1708 			++more;
1709 		}
1710 	}
1711 
1712 	spin_unlock_bh(&icmp6_dst_lock);
1713 
1714 	return more;
1715 }
1716 
1717 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1718 			    void *arg)
1719 {
1720 	struct dst_entry *dst, **pprev;
1721 
1722 	spin_lock_bh(&icmp6_dst_lock);
1723 	pprev = &icmp6_dst_gc_list;
1724 	while ((dst = *pprev) != NULL) {
1725 		struct rt6_info *rt = (struct rt6_info *) dst;
1726 		if (func(rt, arg)) {
1727 			*pprev = dst->next;
1728 			dst_free(dst);
1729 		} else {
1730 			pprev = &dst->next;
1731 		}
1732 	}
1733 	spin_unlock_bh(&icmp6_dst_lock);
1734 }
1735 
1736 static int ip6_dst_gc(struct dst_ops *ops)
1737 {
1738 	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1739 	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1740 	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1741 	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1742 	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1743 	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1744 	int entries;
1745 
1746 	entries = dst_entries_get_fast(ops);
1747 	if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1748 	    entries <= rt_max_size)
1749 		goto out;
1750 
1751 	net->ipv6.ip6_rt_gc_expire++;
1752 	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
1753 	entries = dst_entries_get_slow(ops);
1754 	if (entries < ops->gc_thresh)
1755 		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1756 out:
1757 	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1758 	return entries > rt_max_size;
1759 }
1760 
1761 static int ip6_convert_metrics(struct mx6_config *mxc,
1762 			       const struct fib6_config *cfg)
1763 {
1764 	bool ecn_ca = false;
1765 	struct nlattr *nla;
1766 	int remaining;
1767 	u32 *mp;
1768 
1769 	if (!cfg->fc_mx)
1770 		return 0;
1771 
1772 	mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1773 	if (unlikely(!mp))
1774 		return -ENOMEM;
1775 
1776 	nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1777 		int type = nla_type(nla);
1778 		u32 val;
1779 
1780 		if (!type)
1781 			continue;
1782 		if (unlikely(type > RTAX_MAX))
1783 			goto err;
1784 
1785 		if (type == RTAX_CC_ALGO) {
1786 			char tmp[TCP_CA_NAME_MAX];
1787 
1788 			nla_strlcpy(tmp, nla, sizeof(tmp));
1789 			val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
1790 			if (val == TCP_CA_UNSPEC)
1791 				goto err;
1792 		} else {
1793 			val = nla_get_u32(nla);
1794 		}
1795 		if (type == RTAX_HOPLIMIT && val > 255)
1796 			val = 255;
1797 		if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
1798 			goto err;
1799 
1800 		mp[type - 1] = val;
1801 		__set_bit(type - 1, mxc->mx_valid);
1802 	}
1803 
1804 	if (ecn_ca) {
1805 		__set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
1806 		mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
1807 	}
1808 
1809 	mxc->mx = mp;
1810 	return 0;
1811  err:
1812 	kfree(mp);
1813 	return -EINVAL;
1814 }
1815 
1816 static struct rt6_info *ip6_nh_lookup_table(struct net *net,
1817 					    struct fib6_config *cfg,
1818 					    const struct in6_addr *gw_addr)
1819 {
1820 	struct flowi6 fl6 = {
1821 		.flowi6_oif = cfg->fc_ifindex,
1822 		.daddr = *gw_addr,
1823 		.saddr = cfg->fc_prefsrc,
1824 	};
1825 	struct fib6_table *table;
1826 	struct rt6_info *rt;
1827 	int flags = RT6_LOOKUP_F_IFACE | RT6_LOOKUP_F_IGNORE_LINKSTATE;
1828 
1829 	table = fib6_get_table(net, cfg->fc_table);
1830 	if (!table)
1831 		return NULL;
1832 
1833 	if (!ipv6_addr_any(&cfg->fc_prefsrc))
1834 		flags |= RT6_LOOKUP_F_HAS_SADDR;
1835 
1836 	rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags);
1837 
1838 	/* if table lookup failed, fall back to full lookup */
1839 	if (rt == net->ipv6.ip6_null_entry) {
1840 		ip6_rt_put(rt);
1841 		rt = NULL;
1842 	}
1843 
1844 	return rt;
1845 }
1846 
1847 static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
1848 {
1849 	struct net *net = cfg->fc_nlinfo.nl_net;
1850 	struct rt6_info *rt = NULL;
1851 	struct net_device *dev = NULL;
1852 	struct inet6_dev *idev = NULL;
1853 	struct fib6_table *table;
1854 	int addr_type;
1855 	int err = -EINVAL;
1856 
1857 	/* RTF_PCPU is an internal flag; can not be set by userspace */
1858 	if (cfg->fc_flags & RTF_PCPU)
1859 		goto out;
1860 
1861 	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1862 		goto out;
1863 #ifndef CONFIG_IPV6_SUBTREES
1864 	if (cfg->fc_src_len)
1865 		goto out;
1866 #endif
1867 	if (cfg->fc_ifindex) {
1868 		err = -ENODEV;
1869 		dev = dev_get_by_index(net, cfg->fc_ifindex);
1870 		if (!dev)
1871 			goto out;
1872 		idev = in6_dev_get(dev);
1873 		if (!idev)
1874 			goto out;
1875 	}
1876 
1877 	if (cfg->fc_metric == 0)
1878 		cfg->fc_metric = IP6_RT_PRIO_USER;
1879 
1880 	err = -ENOBUFS;
1881 	if (cfg->fc_nlinfo.nlh &&
1882 	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1883 		table = fib6_get_table(net, cfg->fc_table);
1884 		if (!table) {
1885 			pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1886 			table = fib6_new_table(net, cfg->fc_table);
1887 		}
1888 	} else {
1889 		table = fib6_new_table(net, cfg->fc_table);
1890 	}
1891 
1892 	if (!table)
1893 		goto out;
1894 
1895 	rt = ip6_dst_alloc(net, NULL,
1896 			   (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1897 
1898 	if (!rt) {
1899 		err = -ENOMEM;
1900 		goto out;
1901 	}
1902 
1903 	if (cfg->fc_flags & RTF_EXPIRES)
1904 		rt6_set_expires(rt, jiffies +
1905 				clock_t_to_jiffies(cfg->fc_expires));
1906 	else
1907 		rt6_clean_expires(rt);
1908 
1909 	if (cfg->fc_protocol == RTPROT_UNSPEC)
1910 		cfg->fc_protocol = RTPROT_BOOT;
1911 	rt->rt6i_protocol = cfg->fc_protocol;
1912 
1913 	addr_type = ipv6_addr_type(&cfg->fc_dst);
1914 
1915 	if (addr_type & IPV6_ADDR_MULTICAST)
1916 		rt->dst.input = ip6_mc_input;
1917 	else if (cfg->fc_flags & RTF_LOCAL)
1918 		rt->dst.input = ip6_input;
1919 	else
1920 		rt->dst.input = ip6_forward;
1921 
1922 	rt->dst.output = ip6_output;
1923 
1924 	if (cfg->fc_encap) {
1925 		struct lwtunnel_state *lwtstate;
1926 
1927 		err = lwtunnel_build_state(cfg->fc_encap_type,
1928 					   cfg->fc_encap, AF_INET6, cfg,
1929 					   &lwtstate);
1930 		if (err)
1931 			goto out;
1932 		rt->dst.lwtstate = lwtstate_get(lwtstate);
1933 		if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
1934 			rt->dst.lwtstate->orig_output = rt->dst.output;
1935 			rt->dst.output = lwtunnel_output;
1936 		}
1937 		if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
1938 			rt->dst.lwtstate->orig_input = rt->dst.input;
1939 			rt->dst.input = lwtunnel_input;
1940 		}
1941 	}
1942 
1943 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1944 	rt->rt6i_dst.plen = cfg->fc_dst_len;
1945 	if (rt->rt6i_dst.plen == 128)
1946 		rt->dst.flags |= DST_HOST;
1947 
1948 #ifdef CONFIG_IPV6_SUBTREES
1949 	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1950 	rt->rt6i_src.plen = cfg->fc_src_len;
1951 #endif
1952 
1953 	rt->rt6i_metric = cfg->fc_metric;
1954 
1955 	/* We cannot add true routes via loopback here,
1956 	   they would result in kernel looping; promote them to reject routes
1957 	 */
1958 	if ((cfg->fc_flags & RTF_REJECT) ||
1959 	    (dev && (dev->flags & IFF_LOOPBACK) &&
1960 	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
1961 	     !(cfg->fc_flags & RTF_LOCAL))) {
1962 		/* hold loopback dev/idev if we haven't done so. */
1963 		if (dev != net->loopback_dev) {
1964 			if (dev) {
1965 				dev_put(dev);
1966 				in6_dev_put(idev);
1967 			}
1968 			dev = net->loopback_dev;
1969 			dev_hold(dev);
1970 			idev = in6_dev_get(dev);
1971 			if (!idev) {
1972 				err = -ENODEV;
1973 				goto out;
1974 			}
1975 		}
1976 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1977 		switch (cfg->fc_type) {
1978 		case RTN_BLACKHOLE:
1979 			rt->dst.error = -EINVAL;
1980 			rt->dst.output = dst_discard_out;
1981 			rt->dst.input = dst_discard;
1982 			break;
1983 		case RTN_PROHIBIT:
1984 			rt->dst.error = -EACCES;
1985 			rt->dst.output = ip6_pkt_prohibit_out;
1986 			rt->dst.input = ip6_pkt_prohibit;
1987 			break;
1988 		case RTN_THROW:
1989 		case RTN_UNREACHABLE:
1990 		default:
1991 			rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1992 					: (cfg->fc_type == RTN_UNREACHABLE)
1993 					? -EHOSTUNREACH : -ENETUNREACH;
1994 			rt->dst.output = ip6_pkt_discard_out;
1995 			rt->dst.input = ip6_pkt_discard;
1996 			break;
1997 		}
1998 		goto install_route;
1999 	}
2000 
2001 	if (cfg->fc_flags & RTF_GATEWAY) {
2002 		const struct in6_addr *gw_addr;
2003 		int gwa_type;
2004 
2005 		gw_addr = &cfg->fc_gateway;
2006 		gwa_type = ipv6_addr_type(gw_addr);
2007 
2008 		/* if gw_addr is local we will fail to detect this in case
2009 		 * address is still TENTATIVE (DAD in progress). rt6_lookup()
2010 		 * will return already-added prefix route via interface that
2011 		 * prefix route was assigned to, which might be non-loopback.
2012 		 */
2013 		err = -EINVAL;
2014 		if (ipv6_chk_addr_and_flags(net, gw_addr,
2015 					    gwa_type & IPV6_ADDR_LINKLOCAL ?
2016 					    dev : NULL, 0, 0))
2017 			goto out;
2018 
2019 		rt->rt6i_gateway = *gw_addr;
2020 
2021 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
2022 			struct rt6_info *grt = NULL;
2023 
2024 			/* IPv6 strictly inhibits using not link-local
2025 			   addresses as nexthop address.
2026 			   Otherwise, router will not able to send redirects.
2027 			   It is very good, but in some (rare!) circumstances
2028 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
2029 			   some exceptions. --ANK
2030 			   We allow IPv4-mapped nexthops to support RFC4798-type
2031 			   addressing
2032 			 */
2033 			if (!(gwa_type & (IPV6_ADDR_UNICAST |
2034 					  IPV6_ADDR_MAPPED)))
2035 				goto out;
2036 
2037 			if (cfg->fc_table) {
2038 				grt = ip6_nh_lookup_table(net, cfg, gw_addr);
2039 
2040 				if (grt) {
2041 					if (grt->rt6i_flags & RTF_GATEWAY ||
2042 					    (dev && dev != grt->dst.dev)) {
2043 						ip6_rt_put(grt);
2044 						grt = NULL;
2045 					}
2046 				}
2047 			}
2048 
2049 			if (!grt)
2050 				grt = rt6_lookup(net, gw_addr, NULL,
2051 						 cfg->fc_ifindex, 1);
2052 
2053 			err = -EHOSTUNREACH;
2054 			if (!grt)
2055 				goto out;
2056 			if (dev) {
2057 				if (dev != grt->dst.dev) {
2058 					ip6_rt_put(grt);
2059 					goto out;
2060 				}
2061 			} else {
2062 				dev = grt->dst.dev;
2063 				idev = grt->rt6i_idev;
2064 				dev_hold(dev);
2065 				in6_dev_hold(grt->rt6i_idev);
2066 			}
2067 			if (!(grt->rt6i_flags & RTF_GATEWAY))
2068 				err = 0;
2069 			ip6_rt_put(grt);
2070 
2071 			if (err)
2072 				goto out;
2073 		}
2074 		err = -EINVAL;
2075 		if (!dev || (dev->flags & IFF_LOOPBACK))
2076 			goto out;
2077 	}
2078 
2079 	err = -ENODEV;
2080 	if (!dev)
2081 		goto out;
2082 
2083 	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
2084 		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
2085 			err = -EINVAL;
2086 			goto out;
2087 		}
2088 		rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
2089 		rt->rt6i_prefsrc.plen = 128;
2090 	} else
2091 		rt->rt6i_prefsrc.plen = 0;
2092 
2093 	rt->rt6i_flags = cfg->fc_flags;
2094 
2095 install_route:
2096 	rt->dst.dev = dev;
2097 	rt->rt6i_idev = idev;
2098 	rt->rt6i_table = table;
2099 
2100 	cfg->fc_nlinfo.nl_net = dev_net(dev);
2101 
2102 	return rt;
2103 out:
2104 	if (dev)
2105 		dev_put(dev);
2106 	if (idev)
2107 		in6_dev_put(idev);
2108 	if (rt)
2109 		dst_free(&rt->dst);
2110 
2111 	return ERR_PTR(err);
2112 }
2113 
2114 int ip6_route_add(struct fib6_config *cfg)
2115 {
2116 	struct mx6_config mxc = { .mx = NULL, };
2117 	struct rt6_info *rt;
2118 	int err;
2119 
2120 	rt = ip6_route_info_create(cfg);
2121 	if (IS_ERR(rt)) {
2122 		err = PTR_ERR(rt);
2123 		rt = NULL;
2124 		goto out;
2125 	}
2126 
2127 	err = ip6_convert_metrics(&mxc, cfg);
2128 	if (err)
2129 		goto out;
2130 
2131 	err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
2132 
2133 	kfree(mxc.mx);
2134 
2135 	return err;
2136 out:
2137 	if (rt)
2138 		dst_free(&rt->dst);
2139 
2140 	return err;
2141 }
2142 
2143 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
2144 {
2145 	int err;
2146 	struct fib6_table *table;
2147 	struct net *net = dev_net(rt->dst.dev);
2148 
2149 	if (rt == net->ipv6.ip6_null_entry ||
2150 	    rt->dst.flags & DST_NOCACHE) {
2151 		err = -ENOENT;
2152 		goto out;
2153 	}
2154 
2155 	table = rt->rt6i_table;
2156 	write_lock_bh(&table->tb6_lock);
2157 	err = fib6_del(rt, info);
2158 	write_unlock_bh(&table->tb6_lock);
2159 
2160 out:
2161 	ip6_rt_put(rt);
2162 	return err;
2163 }
2164 
2165 int ip6_del_rt(struct rt6_info *rt)
2166 {
2167 	struct nl_info info = {
2168 		.nl_net = dev_net(rt->dst.dev),
2169 	};
2170 	return __ip6_del_rt(rt, &info);
2171 }
2172 
2173 static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
2174 {
2175 	struct nl_info *info = &cfg->fc_nlinfo;
2176 	struct net *net = info->nl_net;
2177 	struct sk_buff *skb = NULL;
2178 	struct fib6_table *table;
2179 	int err = -ENOENT;
2180 
2181 	if (rt == net->ipv6.ip6_null_entry)
2182 		goto out_put;
2183 	table = rt->rt6i_table;
2184 	write_lock_bh(&table->tb6_lock);
2185 
2186 	if (rt->rt6i_nsiblings && cfg->fc_delete_all_nh) {
2187 		struct rt6_info *sibling, *next_sibling;
2188 
2189 		/* prefer to send a single notification with all hops */
2190 		skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
2191 		if (skb) {
2192 			u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2193 
2194 			if (rt6_fill_node(net, skb, rt,
2195 					  NULL, NULL, 0, RTM_DELROUTE,
2196 					  info->portid, seq, 0) < 0) {
2197 				kfree_skb(skb);
2198 				skb = NULL;
2199 			} else
2200 				info->skip_notify = 1;
2201 		}
2202 
2203 		list_for_each_entry_safe(sibling, next_sibling,
2204 					 &rt->rt6i_siblings,
2205 					 rt6i_siblings) {
2206 			err = fib6_del(sibling, info);
2207 			if (err)
2208 				goto out_unlock;
2209 		}
2210 	}
2211 
2212 	err = fib6_del(rt, info);
2213 out_unlock:
2214 	write_unlock_bh(&table->tb6_lock);
2215 out_put:
2216 	ip6_rt_put(rt);
2217 
2218 	if (skb) {
2219 		rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2220 			    info->nlh, gfp_any());
2221 	}
2222 	return err;
2223 }
2224 
2225 static int ip6_route_del(struct fib6_config *cfg)
2226 {
2227 	struct fib6_table *table;
2228 	struct fib6_node *fn;
2229 	struct rt6_info *rt;
2230 	int err = -ESRCH;
2231 
2232 	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
2233 	if (!table)
2234 		return err;
2235 
2236 	read_lock_bh(&table->tb6_lock);
2237 
2238 	fn = fib6_locate(&table->tb6_root,
2239 			 &cfg->fc_dst, cfg->fc_dst_len,
2240 			 &cfg->fc_src, cfg->fc_src_len);
2241 
2242 	if (fn) {
2243 		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2244 			if ((rt->rt6i_flags & RTF_CACHE) &&
2245 			    !(cfg->fc_flags & RTF_CACHE))
2246 				continue;
2247 			if (cfg->fc_ifindex &&
2248 			    (!rt->dst.dev ||
2249 			     rt->dst.dev->ifindex != cfg->fc_ifindex))
2250 				continue;
2251 			if (cfg->fc_flags & RTF_GATEWAY &&
2252 			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
2253 				continue;
2254 			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
2255 				continue;
2256 			if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol)
2257 				continue;
2258 			dst_hold(&rt->dst);
2259 			read_unlock_bh(&table->tb6_lock);
2260 
2261 			/* if gateway was specified only delete the one hop */
2262 			if (cfg->fc_flags & RTF_GATEWAY)
2263 				return __ip6_del_rt(rt, &cfg->fc_nlinfo);
2264 
2265 			return __ip6_del_rt_siblings(rt, cfg);
2266 		}
2267 	}
2268 	read_unlock_bh(&table->tb6_lock);
2269 
2270 	return err;
2271 }
2272 
2273 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
2274 {
2275 	struct netevent_redirect netevent;
2276 	struct rt6_info *rt, *nrt = NULL;
2277 	struct ndisc_options ndopts;
2278 	struct inet6_dev *in6_dev;
2279 	struct neighbour *neigh;
2280 	struct rd_msg *msg;
2281 	int optlen, on_link;
2282 	u8 *lladdr;
2283 
2284 	optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
2285 	optlen -= sizeof(*msg);
2286 
2287 	if (optlen < 0) {
2288 		net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
2289 		return;
2290 	}
2291 
2292 	msg = (struct rd_msg *)icmp6_hdr(skb);
2293 
2294 	if (ipv6_addr_is_multicast(&msg->dest)) {
2295 		net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
2296 		return;
2297 	}
2298 
2299 	on_link = 0;
2300 	if (ipv6_addr_equal(&msg->dest, &msg->target)) {
2301 		on_link = 1;
2302 	} else if (ipv6_addr_type(&msg->target) !=
2303 		   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
2304 		net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
2305 		return;
2306 	}
2307 
2308 	in6_dev = __in6_dev_get(skb->dev);
2309 	if (!in6_dev)
2310 		return;
2311 	if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2312 		return;
2313 
2314 	/* RFC2461 8.1:
2315 	 *	The IP source address of the Redirect MUST be the same as the current
2316 	 *	first-hop router for the specified ICMP Destination Address.
2317 	 */
2318 
2319 	if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
2320 		net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2321 		return;
2322 	}
2323 
2324 	lladdr = NULL;
2325 	if (ndopts.nd_opts_tgt_lladdr) {
2326 		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2327 					     skb->dev);
2328 		if (!lladdr) {
2329 			net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2330 			return;
2331 		}
2332 	}
2333 
2334 	rt = (struct rt6_info *) dst;
2335 	if (rt->rt6i_flags & RTF_REJECT) {
2336 		net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
2337 		return;
2338 	}
2339 
2340 	/* Redirect received -> path was valid.
2341 	 * Look, redirects are sent only in response to data packets,
2342 	 * so that this nexthop apparently is reachable. --ANK
2343 	 */
2344 	dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
2345 
2346 	neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
2347 	if (!neigh)
2348 		return;
2349 
2350 	/*
2351 	 *	We have finally decided to accept it.
2352 	 */
2353 
2354 	ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
2355 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
2356 		     NEIGH_UPDATE_F_OVERRIDE|
2357 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
2358 				     NEIGH_UPDATE_F_ISROUTER)),
2359 		     NDISC_REDIRECT, &ndopts);
2360 
2361 	nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
2362 	if (!nrt)
2363 		goto out;
2364 
2365 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
2366 	if (on_link)
2367 		nrt->rt6i_flags &= ~RTF_GATEWAY;
2368 
2369 	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
2370 
2371 	if (ip6_ins_rt(nrt))
2372 		goto out;
2373 
2374 	netevent.old = &rt->dst;
2375 	netevent.new = &nrt->dst;
2376 	netevent.daddr = &msg->dest;
2377 	netevent.neigh = neigh;
2378 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
2379 
2380 	if (rt->rt6i_flags & RTF_CACHE) {
2381 		rt = (struct rt6_info *) dst_clone(&rt->dst);
2382 		ip6_del_rt(rt);
2383 	}
2384 
2385 out:
2386 	neigh_release(neigh);
2387 }
2388 
2389 /*
2390  *	Misc support functions
2391  */
2392 
2393 static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
2394 {
2395 	BUG_ON(from->dst.from);
2396 
2397 	rt->rt6i_flags &= ~RTF_EXPIRES;
2398 	dst_hold(&from->dst);
2399 	rt->dst.from = &from->dst;
2400 	dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
2401 }
2402 
2403 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
2404 {
2405 	rt->dst.input = ort->dst.input;
2406 	rt->dst.output = ort->dst.output;
2407 	rt->rt6i_dst = ort->rt6i_dst;
2408 	rt->dst.error = ort->dst.error;
2409 	rt->rt6i_idev = ort->rt6i_idev;
2410 	if (rt->rt6i_idev)
2411 		in6_dev_hold(rt->rt6i_idev);
2412 	rt->dst.lastuse = jiffies;
2413 	rt->rt6i_gateway = ort->rt6i_gateway;
2414 	rt->rt6i_flags = ort->rt6i_flags;
2415 	rt6_set_from(rt, ort);
2416 	rt->rt6i_metric = ort->rt6i_metric;
2417 #ifdef CONFIG_IPV6_SUBTREES
2418 	rt->rt6i_src = ort->rt6i_src;
2419 #endif
2420 	rt->rt6i_prefsrc = ort->rt6i_prefsrc;
2421 	rt->rt6i_table = ort->rt6i_table;
2422 	rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
2423 }
2424 
2425 #ifdef CONFIG_IPV6_ROUTE_INFO
2426 static struct rt6_info *rt6_get_route_info(struct net *net,
2427 					   const struct in6_addr *prefix, int prefixlen,
2428 					   const struct in6_addr *gwaddr,
2429 					   struct net_device *dev)
2430 {
2431 	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
2432 	int ifindex = dev->ifindex;
2433 	struct fib6_node *fn;
2434 	struct rt6_info *rt = NULL;
2435 	struct fib6_table *table;
2436 
2437 	table = fib6_get_table(net, tb_id);
2438 	if (!table)
2439 		return NULL;
2440 
2441 	read_lock_bh(&table->tb6_lock);
2442 	fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
2443 	if (!fn)
2444 		goto out;
2445 
2446 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2447 		if (rt->dst.dev->ifindex != ifindex)
2448 			continue;
2449 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2450 			continue;
2451 		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2452 			continue;
2453 		dst_hold(&rt->dst);
2454 		break;
2455 	}
2456 out:
2457 	read_unlock_bh(&table->tb6_lock);
2458 	return rt;
2459 }
2460 
2461 static struct rt6_info *rt6_add_route_info(struct net *net,
2462 					   const struct in6_addr *prefix, int prefixlen,
2463 					   const struct in6_addr *gwaddr,
2464 					   struct net_device *dev,
2465 					   unsigned int pref)
2466 {
2467 	struct fib6_config cfg = {
2468 		.fc_metric	= IP6_RT_PRIO_USER,
2469 		.fc_ifindex	= dev->ifindex,
2470 		.fc_dst_len	= prefixlen,
2471 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2472 				  RTF_UP | RTF_PREF(pref),
2473 		.fc_nlinfo.portid = 0,
2474 		.fc_nlinfo.nlh = NULL,
2475 		.fc_nlinfo.nl_net = net,
2476 	};
2477 
2478 	cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
2479 	cfg.fc_dst = *prefix;
2480 	cfg.fc_gateway = *gwaddr;
2481 
2482 	/* We should treat it as a default route if prefix length is 0. */
2483 	if (!prefixlen)
2484 		cfg.fc_flags |= RTF_DEFAULT;
2485 
2486 	ip6_route_add(&cfg);
2487 
2488 	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
2489 }
2490 #endif
2491 
2492 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
2493 {
2494 	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
2495 	struct rt6_info *rt;
2496 	struct fib6_table *table;
2497 
2498 	table = fib6_get_table(dev_net(dev), tb_id);
2499 	if (!table)
2500 		return NULL;
2501 
2502 	read_lock_bh(&table->tb6_lock);
2503 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2504 		if (dev == rt->dst.dev &&
2505 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
2506 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
2507 			break;
2508 	}
2509 	if (rt)
2510 		dst_hold(&rt->dst);
2511 	read_unlock_bh(&table->tb6_lock);
2512 	return rt;
2513 }
2514 
2515 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
2516 				     struct net_device *dev,
2517 				     unsigned int pref)
2518 {
2519 	struct fib6_config cfg = {
2520 		.fc_table	= l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
2521 		.fc_metric	= IP6_RT_PRIO_USER,
2522 		.fc_ifindex	= dev->ifindex,
2523 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2524 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
2525 		.fc_nlinfo.portid = 0,
2526 		.fc_nlinfo.nlh = NULL,
2527 		.fc_nlinfo.nl_net = dev_net(dev),
2528 	};
2529 
2530 	cfg.fc_gateway = *gwaddr;
2531 
2532 	if (!ip6_route_add(&cfg)) {
2533 		struct fib6_table *table;
2534 
2535 		table = fib6_get_table(dev_net(dev), cfg.fc_table);
2536 		if (table)
2537 			table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
2538 	}
2539 
2540 	return rt6_get_dflt_router(gwaddr, dev);
2541 }
2542 
2543 static void __rt6_purge_dflt_routers(struct fib6_table *table)
2544 {
2545 	struct rt6_info *rt;
2546 
2547 restart:
2548 	read_lock_bh(&table->tb6_lock);
2549 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2550 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2551 		    (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
2552 			dst_hold(&rt->dst);
2553 			read_unlock_bh(&table->tb6_lock);
2554 			ip6_del_rt(rt);
2555 			goto restart;
2556 		}
2557 	}
2558 	read_unlock_bh(&table->tb6_lock);
2559 
2560 	table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
2561 }
2562 
2563 void rt6_purge_dflt_routers(struct net *net)
2564 {
2565 	struct fib6_table *table;
2566 	struct hlist_head *head;
2567 	unsigned int h;
2568 
2569 	rcu_read_lock();
2570 
2571 	for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
2572 		head = &net->ipv6.fib_table_hash[h];
2573 		hlist_for_each_entry_rcu(table, head, tb6_hlist) {
2574 			if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
2575 				__rt6_purge_dflt_routers(table);
2576 		}
2577 	}
2578 
2579 	rcu_read_unlock();
2580 }
2581 
2582 static void rtmsg_to_fib6_config(struct net *net,
2583 				 struct in6_rtmsg *rtmsg,
2584 				 struct fib6_config *cfg)
2585 {
2586 	memset(cfg, 0, sizeof(*cfg));
2587 
2588 	cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
2589 			 : RT6_TABLE_MAIN;
2590 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2591 	cfg->fc_metric = rtmsg->rtmsg_metric;
2592 	cfg->fc_expires = rtmsg->rtmsg_info;
2593 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2594 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
2595 	cfg->fc_flags = rtmsg->rtmsg_flags;
2596 
2597 	cfg->fc_nlinfo.nl_net = net;
2598 
2599 	cfg->fc_dst = rtmsg->rtmsg_dst;
2600 	cfg->fc_src = rtmsg->rtmsg_src;
2601 	cfg->fc_gateway = rtmsg->rtmsg_gateway;
2602 }
2603 
2604 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2605 {
2606 	struct fib6_config cfg;
2607 	struct in6_rtmsg rtmsg;
2608 	int err;
2609 
2610 	switch (cmd) {
2611 	case SIOCADDRT:		/* Add a route */
2612 	case SIOCDELRT:		/* Delete a route */
2613 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2614 			return -EPERM;
2615 		err = copy_from_user(&rtmsg, arg,
2616 				     sizeof(struct in6_rtmsg));
2617 		if (err)
2618 			return -EFAULT;
2619 
2620 		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2621 
2622 		rtnl_lock();
2623 		switch (cmd) {
2624 		case SIOCADDRT:
2625 			err = ip6_route_add(&cfg);
2626 			break;
2627 		case SIOCDELRT:
2628 			err = ip6_route_del(&cfg);
2629 			break;
2630 		default:
2631 			err = -EINVAL;
2632 		}
2633 		rtnl_unlock();
2634 
2635 		return err;
2636 	}
2637 
2638 	return -EINVAL;
2639 }
2640 
2641 /*
2642  *	Drop the packet on the floor
2643  */
2644 
2645 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2646 {
2647 	int type;
2648 	struct dst_entry *dst = skb_dst(skb);
2649 	switch (ipstats_mib_noroutes) {
2650 	case IPSTATS_MIB_INNOROUTES:
2651 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2652 		if (type == IPV6_ADDR_ANY) {
2653 			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2654 				      IPSTATS_MIB_INADDRERRORS);
2655 			break;
2656 		}
2657 		/* FALLTHROUGH */
2658 	case IPSTATS_MIB_OUTNOROUTES:
2659 		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2660 			      ipstats_mib_noroutes);
2661 		break;
2662 	}
2663 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2664 	kfree_skb(skb);
2665 	return 0;
2666 }
2667 
2668 static int ip6_pkt_discard(struct sk_buff *skb)
2669 {
2670 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2671 }
2672 
2673 static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2674 {
2675 	skb->dev = skb_dst(skb)->dev;
2676 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2677 }
2678 
2679 static int ip6_pkt_prohibit(struct sk_buff *skb)
2680 {
2681 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2682 }
2683 
2684 static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2685 {
2686 	skb->dev = skb_dst(skb)->dev;
2687 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2688 }
2689 
2690 /*
2691  *	Allocate a dst for local (unicast / anycast) address.
2692  */
2693 
2694 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2695 				    const struct in6_addr *addr,
2696 				    bool anycast)
2697 {
2698 	u32 tb_id;
2699 	struct net *net = dev_net(idev->dev);
2700 	struct net_device *dev = net->loopback_dev;
2701 	struct rt6_info *rt;
2702 
2703 	/* use L3 Master device as loopback for host routes if device
2704 	 * is enslaved and address is not link local or multicast
2705 	 */
2706 	if (!rt6_need_strict(addr))
2707 		dev = l3mdev_master_dev_rcu(idev->dev) ? : dev;
2708 
2709 	rt = ip6_dst_alloc(net, dev, DST_NOCOUNT);
2710 	if (!rt)
2711 		return ERR_PTR(-ENOMEM);
2712 
2713 	in6_dev_hold(idev);
2714 
2715 	rt->dst.flags |= DST_HOST;
2716 	rt->dst.input = ip6_input;
2717 	rt->dst.output = ip6_output;
2718 	rt->rt6i_idev = idev;
2719 
2720 	rt->rt6i_protocol = RTPROT_KERNEL;
2721 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2722 	if (anycast)
2723 		rt->rt6i_flags |= RTF_ANYCAST;
2724 	else
2725 		rt->rt6i_flags |= RTF_LOCAL;
2726 
2727 	rt->rt6i_gateway  = *addr;
2728 	rt->rt6i_dst.addr = *addr;
2729 	rt->rt6i_dst.plen = 128;
2730 	tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
2731 	rt->rt6i_table = fib6_get_table(net, tb_id);
2732 	rt->dst.flags |= DST_NOCACHE;
2733 
2734 	atomic_set(&rt->dst.__refcnt, 1);
2735 
2736 	return rt;
2737 }
2738 
2739 /* remove deleted ip from prefsrc entries */
2740 struct arg_dev_net_ip {
2741 	struct net_device *dev;
2742 	struct net *net;
2743 	struct in6_addr *addr;
2744 };
2745 
2746 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2747 {
2748 	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2749 	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2750 	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2751 
2752 	if (((void *)rt->dst.dev == dev || !dev) &&
2753 	    rt != net->ipv6.ip6_null_entry &&
2754 	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2755 		/* remove prefsrc entry */
2756 		rt->rt6i_prefsrc.plen = 0;
2757 	}
2758 	return 0;
2759 }
2760 
2761 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2762 {
2763 	struct net *net = dev_net(ifp->idev->dev);
2764 	struct arg_dev_net_ip adni = {
2765 		.dev = ifp->idev->dev,
2766 		.net = net,
2767 		.addr = &ifp->addr,
2768 	};
2769 	fib6_clean_all(net, fib6_remove_prefsrc, &adni);
2770 }
2771 
2772 #define RTF_RA_ROUTER		(RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2773 #define RTF_CACHE_GATEWAY	(RTF_GATEWAY | RTF_CACHE)
2774 
2775 /* Remove routers and update dst entries when gateway turn into host. */
2776 static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2777 {
2778 	struct in6_addr *gateway = (struct in6_addr *)arg;
2779 
2780 	if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2781 	     ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2782 	     ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2783 		return -1;
2784 	}
2785 	return 0;
2786 }
2787 
2788 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2789 {
2790 	fib6_clean_all(net, fib6_clean_tohost, gateway);
2791 }
2792 
2793 struct arg_dev_net {
2794 	struct net_device *dev;
2795 	struct net *net;
2796 };
2797 
2798 /* called with write lock held for table with rt */
2799 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2800 {
2801 	const struct arg_dev_net *adn = arg;
2802 	const struct net_device *dev = adn->dev;
2803 
2804 	if ((rt->dst.dev == dev || !dev) &&
2805 	    rt != adn->net->ipv6.ip6_null_entry &&
2806 	    (rt->rt6i_nsiblings == 0 ||
2807 	     (dev && netdev_unregistering(dev)) ||
2808 	     !rt->rt6i_idev->cnf.ignore_routes_with_linkdown))
2809 		return -1;
2810 
2811 	return 0;
2812 }
2813 
2814 void rt6_ifdown(struct net *net, struct net_device *dev)
2815 {
2816 	struct arg_dev_net adn = {
2817 		.dev = dev,
2818 		.net = net,
2819 	};
2820 
2821 	fib6_clean_all(net, fib6_ifdown, &adn);
2822 	icmp6_clean_all(fib6_ifdown, &adn);
2823 	if (dev)
2824 		rt6_uncached_list_flush_dev(net, dev);
2825 }
2826 
2827 struct rt6_mtu_change_arg {
2828 	struct net_device *dev;
2829 	unsigned int mtu;
2830 };
2831 
2832 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2833 {
2834 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2835 	struct inet6_dev *idev;
2836 
2837 	/* In IPv6 pmtu discovery is not optional,
2838 	   so that RTAX_MTU lock cannot disable it.
2839 	   We still use this lock to block changes
2840 	   caused by addrconf/ndisc.
2841 	*/
2842 
2843 	idev = __in6_dev_get(arg->dev);
2844 	if (!idev)
2845 		return 0;
2846 
2847 	/* For administrative MTU increase, there is no way to discover
2848 	   IPv6 PMTU increase, so PMTU increase should be updated here.
2849 	   Since RFC 1981 doesn't include administrative MTU increase
2850 	   update PMTU increase is a MUST. (i.e. jumbo frame)
2851 	 */
2852 	/*
2853 	   If new MTU is less than route PMTU, this new MTU will be the
2854 	   lowest MTU in the path, update the route PMTU to reflect PMTU
2855 	   decreases; if new MTU is greater than route PMTU, and the
2856 	   old MTU is the lowest MTU in the path, update the route PMTU
2857 	   to reflect the increase. In this case if the other nodes' MTU
2858 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
2859 	   PMTU discovery.
2860 	 */
2861 	if (rt->dst.dev == arg->dev &&
2862 	    dst_metric_raw(&rt->dst, RTAX_MTU) &&
2863 	    !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2864 		if (rt->rt6i_flags & RTF_CACHE) {
2865 			/* For RTF_CACHE with rt6i_pmtu == 0
2866 			 * (i.e. a redirected route),
2867 			 * the metrics of its rt->dst.from has already
2868 			 * been updated.
2869 			 */
2870 			if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2871 				rt->rt6i_pmtu = arg->mtu;
2872 		} else if (dst_mtu(&rt->dst) >= arg->mtu ||
2873 			   (dst_mtu(&rt->dst) < arg->mtu &&
2874 			    dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2875 			dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2876 		}
2877 	}
2878 	return 0;
2879 }
2880 
2881 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2882 {
2883 	struct rt6_mtu_change_arg arg = {
2884 		.dev = dev,
2885 		.mtu = mtu,
2886 	};
2887 
2888 	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
2889 }
2890 
2891 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2892 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2893 	[RTA_OIF]               = { .type = NLA_U32 },
2894 	[RTA_IIF]		= { .type = NLA_U32 },
2895 	[RTA_PRIORITY]          = { .type = NLA_U32 },
2896 	[RTA_METRICS]           = { .type = NLA_NESTED },
2897 	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
2898 	[RTA_PREF]              = { .type = NLA_U8 },
2899 	[RTA_ENCAP_TYPE]	= { .type = NLA_U16 },
2900 	[RTA_ENCAP]		= { .type = NLA_NESTED },
2901 	[RTA_EXPIRES]		= { .type = NLA_U32 },
2902 	[RTA_UID]		= { .type = NLA_U32 },
2903 	[RTA_MARK]		= { .type = NLA_U32 },
2904 };
2905 
2906 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2907 			      struct fib6_config *cfg)
2908 {
2909 	struct rtmsg *rtm;
2910 	struct nlattr *tb[RTA_MAX+1];
2911 	unsigned int pref;
2912 	int err;
2913 
2914 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
2915 			  NULL);
2916 	if (err < 0)
2917 		goto errout;
2918 
2919 	err = -EINVAL;
2920 	rtm = nlmsg_data(nlh);
2921 	memset(cfg, 0, sizeof(*cfg));
2922 
2923 	cfg->fc_table = rtm->rtm_table;
2924 	cfg->fc_dst_len = rtm->rtm_dst_len;
2925 	cfg->fc_src_len = rtm->rtm_src_len;
2926 	cfg->fc_flags = RTF_UP;
2927 	cfg->fc_protocol = rtm->rtm_protocol;
2928 	cfg->fc_type = rtm->rtm_type;
2929 
2930 	if (rtm->rtm_type == RTN_UNREACHABLE ||
2931 	    rtm->rtm_type == RTN_BLACKHOLE ||
2932 	    rtm->rtm_type == RTN_PROHIBIT ||
2933 	    rtm->rtm_type == RTN_THROW)
2934 		cfg->fc_flags |= RTF_REJECT;
2935 
2936 	if (rtm->rtm_type == RTN_LOCAL)
2937 		cfg->fc_flags |= RTF_LOCAL;
2938 
2939 	if (rtm->rtm_flags & RTM_F_CLONED)
2940 		cfg->fc_flags |= RTF_CACHE;
2941 
2942 	cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2943 	cfg->fc_nlinfo.nlh = nlh;
2944 	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2945 
2946 	if (tb[RTA_GATEWAY]) {
2947 		cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
2948 		cfg->fc_flags |= RTF_GATEWAY;
2949 	}
2950 
2951 	if (tb[RTA_DST]) {
2952 		int plen = (rtm->rtm_dst_len + 7) >> 3;
2953 
2954 		if (nla_len(tb[RTA_DST]) < plen)
2955 			goto errout;
2956 
2957 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2958 	}
2959 
2960 	if (tb[RTA_SRC]) {
2961 		int plen = (rtm->rtm_src_len + 7) >> 3;
2962 
2963 		if (nla_len(tb[RTA_SRC]) < plen)
2964 			goto errout;
2965 
2966 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2967 	}
2968 
2969 	if (tb[RTA_PREFSRC])
2970 		cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
2971 
2972 	if (tb[RTA_OIF])
2973 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2974 
2975 	if (tb[RTA_PRIORITY])
2976 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2977 
2978 	if (tb[RTA_METRICS]) {
2979 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2980 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2981 	}
2982 
2983 	if (tb[RTA_TABLE])
2984 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2985 
2986 	if (tb[RTA_MULTIPATH]) {
2987 		cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2988 		cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2989 
2990 		err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
2991 						     cfg->fc_mp_len);
2992 		if (err < 0)
2993 			goto errout;
2994 	}
2995 
2996 	if (tb[RTA_PREF]) {
2997 		pref = nla_get_u8(tb[RTA_PREF]);
2998 		if (pref != ICMPV6_ROUTER_PREF_LOW &&
2999 		    pref != ICMPV6_ROUTER_PREF_HIGH)
3000 			pref = ICMPV6_ROUTER_PREF_MEDIUM;
3001 		cfg->fc_flags |= RTF_PREF(pref);
3002 	}
3003 
3004 	if (tb[RTA_ENCAP])
3005 		cfg->fc_encap = tb[RTA_ENCAP];
3006 
3007 	if (tb[RTA_ENCAP_TYPE]) {
3008 		cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
3009 
3010 		err = lwtunnel_valid_encap_type(cfg->fc_encap_type);
3011 		if (err < 0)
3012 			goto errout;
3013 	}
3014 
3015 	if (tb[RTA_EXPIRES]) {
3016 		unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
3017 
3018 		if (addrconf_finite_timeout(timeout)) {
3019 			cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
3020 			cfg->fc_flags |= RTF_EXPIRES;
3021 		}
3022 	}
3023 
3024 	err = 0;
3025 errout:
3026 	return err;
3027 }
3028 
3029 struct rt6_nh {
3030 	struct rt6_info *rt6_info;
3031 	struct fib6_config r_cfg;
3032 	struct mx6_config mxc;
3033 	struct list_head next;
3034 };
3035 
3036 static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
3037 {
3038 	struct rt6_nh *nh;
3039 
3040 	list_for_each_entry(nh, rt6_nh_list, next) {
3041 		pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6c nexthop %pI6c ifi %d\n",
3042 		        &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
3043 		        nh->r_cfg.fc_ifindex);
3044 	}
3045 }
3046 
3047 static int ip6_route_info_append(struct list_head *rt6_nh_list,
3048 				 struct rt6_info *rt, struct fib6_config *r_cfg)
3049 {
3050 	struct rt6_nh *nh;
3051 	struct rt6_info *rtnh;
3052 	int err = -EEXIST;
3053 
3054 	list_for_each_entry(nh, rt6_nh_list, next) {
3055 		/* check if rt6_info already exists */
3056 		rtnh = nh->rt6_info;
3057 
3058 		if (rtnh->dst.dev == rt->dst.dev &&
3059 		    rtnh->rt6i_idev == rt->rt6i_idev &&
3060 		    ipv6_addr_equal(&rtnh->rt6i_gateway,
3061 				    &rt->rt6i_gateway))
3062 			return err;
3063 	}
3064 
3065 	nh = kzalloc(sizeof(*nh), GFP_KERNEL);
3066 	if (!nh)
3067 		return -ENOMEM;
3068 	nh->rt6_info = rt;
3069 	err = ip6_convert_metrics(&nh->mxc, r_cfg);
3070 	if (err) {
3071 		kfree(nh);
3072 		return err;
3073 	}
3074 	memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
3075 	list_add_tail(&nh->next, rt6_nh_list);
3076 
3077 	return 0;
3078 }
3079 
3080 static void ip6_route_mpath_notify(struct rt6_info *rt,
3081 				   struct rt6_info *rt_last,
3082 				   struct nl_info *info,
3083 				   __u16 nlflags)
3084 {
3085 	/* if this is an APPEND route, then rt points to the first route
3086 	 * inserted and rt_last points to last route inserted. Userspace
3087 	 * wants a consistent dump of the route which starts at the first
3088 	 * nexthop. Since sibling routes are always added at the end of
3089 	 * the list, find the first sibling of the last route appended
3090 	 */
3091 	if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->rt6i_nsiblings) {
3092 		rt = list_first_entry(&rt_last->rt6i_siblings,
3093 				      struct rt6_info,
3094 				      rt6i_siblings);
3095 	}
3096 
3097 	if (rt)
3098 		inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
3099 }
3100 
3101 static int ip6_route_multipath_add(struct fib6_config *cfg)
3102 {
3103 	struct rt6_info *rt_notif = NULL, *rt_last = NULL;
3104 	struct nl_info *info = &cfg->fc_nlinfo;
3105 	struct fib6_config r_cfg;
3106 	struct rtnexthop *rtnh;
3107 	struct rt6_info *rt;
3108 	struct rt6_nh *err_nh;
3109 	struct rt6_nh *nh, *nh_safe;
3110 	__u16 nlflags;
3111 	int remaining;
3112 	int attrlen;
3113 	int err = 1;
3114 	int nhn = 0;
3115 	int replace = (cfg->fc_nlinfo.nlh &&
3116 		       (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
3117 	LIST_HEAD(rt6_nh_list);
3118 
3119 	nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
3120 	if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
3121 		nlflags |= NLM_F_APPEND;
3122 
3123 	remaining = cfg->fc_mp_len;
3124 	rtnh = (struct rtnexthop *)cfg->fc_mp;
3125 
3126 	/* Parse a Multipath Entry and build a list (rt6_nh_list) of
3127 	 * rt6_info structs per nexthop
3128 	 */
3129 	while (rtnh_ok(rtnh, remaining)) {
3130 		memcpy(&r_cfg, cfg, sizeof(*cfg));
3131 		if (rtnh->rtnh_ifindex)
3132 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
3133 
3134 		attrlen = rtnh_attrlen(rtnh);
3135 		if (attrlen > 0) {
3136 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3137 
3138 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3139 			if (nla) {
3140 				r_cfg.fc_gateway = nla_get_in6_addr(nla);
3141 				r_cfg.fc_flags |= RTF_GATEWAY;
3142 			}
3143 			r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
3144 			nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
3145 			if (nla)
3146 				r_cfg.fc_encap_type = nla_get_u16(nla);
3147 		}
3148 
3149 		rt = ip6_route_info_create(&r_cfg);
3150 		if (IS_ERR(rt)) {
3151 			err = PTR_ERR(rt);
3152 			rt = NULL;
3153 			goto cleanup;
3154 		}
3155 
3156 		err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
3157 		if (err) {
3158 			dst_free(&rt->dst);
3159 			goto cleanup;
3160 		}
3161 
3162 		rtnh = rtnh_next(rtnh, &remaining);
3163 	}
3164 
3165 	/* for add and replace send one notification with all nexthops.
3166 	 * Skip the notification in fib6_add_rt2node and send one with
3167 	 * the full route when done
3168 	 */
3169 	info->skip_notify = 1;
3170 
3171 	err_nh = NULL;
3172 	list_for_each_entry(nh, &rt6_nh_list, next) {
3173 		rt_last = nh->rt6_info;
3174 		err = __ip6_ins_rt(nh->rt6_info, info, &nh->mxc);
3175 		/* save reference to first route for notification */
3176 		if (!rt_notif && !err)
3177 			rt_notif = nh->rt6_info;
3178 
3179 		/* nh->rt6_info is used or freed at this point, reset to NULL*/
3180 		nh->rt6_info = NULL;
3181 		if (err) {
3182 			if (replace && nhn)
3183 				ip6_print_replace_route_err(&rt6_nh_list);
3184 			err_nh = nh;
3185 			goto add_errout;
3186 		}
3187 
3188 		/* Because each route is added like a single route we remove
3189 		 * these flags after the first nexthop: if there is a collision,
3190 		 * we have already failed to add the first nexthop:
3191 		 * fib6_add_rt2node() has rejected it; when replacing, old
3192 		 * nexthops have been replaced by first new, the rest should
3193 		 * be added to it.
3194 		 */
3195 		cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
3196 						     NLM_F_REPLACE);
3197 		nhn++;
3198 	}
3199 
3200 	/* success ... tell user about new route */
3201 	ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
3202 	goto cleanup;
3203 
3204 add_errout:
3205 	/* send notification for routes that were added so that
3206 	 * the delete notifications sent by ip6_route_del are
3207 	 * coherent
3208 	 */
3209 	if (rt_notif)
3210 		ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
3211 
3212 	/* Delete routes that were already added */
3213 	list_for_each_entry(nh, &rt6_nh_list, next) {
3214 		if (err_nh == nh)
3215 			break;
3216 		ip6_route_del(&nh->r_cfg);
3217 	}
3218 
3219 cleanup:
3220 	list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
3221 		if (nh->rt6_info)
3222 			dst_free(&nh->rt6_info->dst);
3223 		kfree(nh->mxc.mx);
3224 		list_del(&nh->next);
3225 		kfree(nh);
3226 	}
3227 
3228 	return err;
3229 }
3230 
3231 static int ip6_route_multipath_del(struct fib6_config *cfg)
3232 {
3233 	struct fib6_config r_cfg;
3234 	struct rtnexthop *rtnh;
3235 	int remaining;
3236 	int attrlen;
3237 	int err = 1, last_err = 0;
3238 
3239 	remaining = cfg->fc_mp_len;
3240 	rtnh = (struct rtnexthop *)cfg->fc_mp;
3241 
3242 	/* Parse a Multipath Entry */
3243 	while (rtnh_ok(rtnh, remaining)) {
3244 		memcpy(&r_cfg, cfg, sizeof(*cfg));
3245 		if (rtnh->rtnh_ifindex)
3246 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
3247 
3248 		attrlen = rtnh_attrlen(rtnh);
3249 		if (attrlen > 0) {
3250 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3251 
3252 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3253 			if (nla) {
3254 				nla_memcpy(&r_cfg.fc_gateway, nla, 16);
3255 				r_cfg.fc_flags |= RTF_GATEWAY;
3256 			}
3257 		}
3258 		err = ip6_route_del(&r_cfg);
3259 		if (err)
3260 			last_err = err;
3261 
3262 		rtnh = rtnh_next(rtnh, &remaining);
3263 	}
3264 
3265 	return last_err;
3266 }
3267 
3268 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
3269 			      struct netlink_ext_ack *extack)
3270 {
3271 	struct fib6_config cfg;
3272 	int err;
3273 
3274 	err = rtm_to_fib6_config(skb, nlh, &cfg);
3275 	if (err < 0)
3276 		return err;
3277 
3278 	if (cfg.fc_mp)
3279 		return ip6_route_multipath_del(&cfg);
3280 	else {
3281 		cfg.fc_delete_all_nh = 1;
3282 		return ip6_route_del(&cfg);
3283 	}
3284 }
3285 
3286 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
3287 			      struct netlink_ext_ack *extack)
3288 {
3289 	struct fib6_config cfg;
3290 	int err;
3291 
3292 	err = rtm_to_fib6_config(skb, nlh, &cfg);
3293 	if (err < 0)
3294 		return err;
3295 
3296 	if (cfg.fc_mp)
3297 		return ip6_route_multipath_add(&cfg);
3298 	else
3299 		return ip6_route_add(&cfg);
3300 }
3301 
3302 static size_t rt6_nlmsg_size(struct rt6_info *rt)
3303 {
3304 	int nexthop_len = 0;
3305 
3306 	if (rt->rt6i_nsiblings) {
3307 		nexthop_len = nla_total_size(0)	 /* RTA_MULTIPATH */
3308 			    + NLA_ALIGN(sizeof(struct rtnexthop))
3309 			    + nla_total_size(16) /* RTA_GATEWAY */
3310 			    + lwtunnel_get_encap_size(rt->dst.lwtstate);
3311 
3312 		nexthop_len *= rt->rt6i_nsiblings;
3313 	}
3314 
3315 	return NLMSG_ALIGN(sizeof(struct rtmsg))
3316 	       + nla_total_size(16) /* RTA_SRC */
3317 	       + nla_total_size(16) /* RTA_DST */
3318 	       + nla_total_size(16) /* RTA_GATEWAY */
3319 	       + nla_total_size(16) /* RTA_PREFSRC */
3320 	       + nla_total_size(4) /* RTA_TABLE */
3321 	       + nla_total_size(4) /* RTA_IIF */
3322 	       + nla_total_size(4) /* RTA_OIF */
3323 	       + nla_total_size(4) /* RTA_PRIORITY */
3324 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
3325 	       + nla_total_size(sizeof(struct rta_cacheinfo))
3326 	       + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
3327 	       + nla_total_size(1) /* RTA_PREF */
3328 	       + lwtunnel_get_encap_size(rt->dst.lwtstate)
3329 	       + nexthop_len;
3330 }
3331 
3332 static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
3333 			    unsigned int *flags, bool skip_oif)
3334 {
3335 	if (!netif_running(rt->dst.dev) || !netif_carrier_ok(rt->dst.dev)) {
3336 		*flags |= RTNH_F_LINKDOWN;
3337 		if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
3338 			*flags |= RTNH_F_DEAD;
3339 	}
3340 
3341 	if (rt->rt6i_flags & RTF_GATEWAY) {
3342 		if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
3343 			goto nla_put_failure;
3344 	}
3345 
3346 	/* not needed for multipath encoding b/c it has a rtnexthop struct */
3347 	if (!skip_oif && rt->dst.dev &&
3348 	    nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
3349 		goto nla_put_failure;
3350 
3351 	if (rt->dst.lwtstate &&
3352 	    lwtunnel_fill_encap(skb, rt->dst.lwtstate) < 0)
3353 		goto nla_put_failure;
3354 
3355 	return 0;
3356 
3357 nla_put_failure:
3358 	return -EMSGSIZE;
3359 }
3360 
3361 /* add multipath next hop */
3362 static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)
3363 {
3364 	struct rtnexthop *rtnh;
3365 	unsigned int flags = 0;
3366 
3367 	rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
3368 	if (!rtnh)
3369 		goto nla_put_failure;
3370 
3371 	rtnh->rtnh_hops = 0;
3372 	rtnh->rtnh_ifindex = rt->dst.dev ? rt->dst.dev->ifindex : 0;
3373 
3374 	if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
3375 		goto nla_put_failure;
3376 
3377 	rtnh->rtnh_flags = flags;
3378 
3379 	/* length of rtnetlink header + attributes */
3380 	rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
3381 
3382 	return 0;
3383 
3384 nla_put_failure:
3385 	return -EMSGSIZE;
3386 }
3387 
3388 static int rt6_fill_node(struct net *net,
3389 			 struct sk_buff *skb, struct rt6_info *rt,
3390 			 struct in6_addr *dst, struct in6_addr *src,
3391 			 int iif, int type, u32 portid, u32 seq,
3392 			 unsigned int flags)
3393 {
3394 	u32 metrics[RTAX_MAX];
3395 	struct rtmsg *rtm;
3396 	struct nlmsghdr *nlh;
3397 	long expires;
3398 	u32 table;
3399 
3400 	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
3401 	if (!nlh)
3402 		return -EMSGSIZE;
3403 
3404 	rtm = nlmsg_data(nlh);
3405 	rtm->rtm_family = AF_INET6;
3406 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
3407 	rtm->rtm_src_len = rt->rt6i_src.plen;
3408 	rtm->rtm_tos = 0;
3409 	if (rt->rt6i_table)
3410 		table = rt->rt6i_table->tb6_id;
3411 	else
3412 		table = RT6_TABLE_UNSPEC;
3413 	rtm->rtm_table = table;
3414 	if (nla_put_u32(skb, RTA_TABLE, table))
3415 		goto nla_put_failure;
3416 	if (rt->rt6i_flags & RTF_REJECT) {
3417 		switch (rt->dst.error) {
3418 		case -EINVAL:
3419 			rtm->rtm_type = RTN_BLACKHOLE;
3420 			break;
3421 		case -EACCES:
3422 			rtm->rtm_type = RTN_PROHIBIT;
3423 			break;
3424 		case -EAGAIN:
3425 			rtm->rtm_type = RTN_THROW;
3426 			break;
3427 		default:
3428 			rtm->rtm_type = RTN_UNREACHABLE;
3429 			break;
3430 		}
3431 	}
3432 	else if (rt->rt6i_flags & RTF_LOCAL)
3433 		rtm->rtm_type = RTN_LOCAL;
3434 	else if (rt->rt6i_flags & RTF_ANYCAST)
3435 		rtm->rtm_type = RTN_ANYCAST;
3436 	else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
3437 		rtm->rtm_type = RTN_LOCAL;
3438 	else
3439 		rtm->rtm_type = RTN_UNICAST;
3440 	rtm->rtm_flags = 0;
3441 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
3442 	rtm->rtm_protocol = rt->rt6i_protocol;
3443 	if (rt->rt6i_flags & RTF_DYNAMIC)
3444 		rtm->rtm_protocol = RTPROT_REDIRECT;
3445 	else if (rt->rt6i_flags & RTF_ADDRCONF) {
3446 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
3447 			rtm->rtm_protocol = RTPROT_RA;
3448 		else
3449 			rtm->rtm_protocol = RTPROT_KERNEL;
3450 	}
3451 
3452 	if (rt->rt6i_flags & RTF_CACHE)
3453 		rtm->rtm_flags |= RTM_F_CLONED;
3454 
3455 	if (dst) {
3456 		if (nla_put_in6_addr(skb, RTA_DST, dst))
3457 			goto nla_put_failure;
3458 		rtm->rtm_dst_len = 128;
3459 	} else if (rtm->rtm_dst_len)
3460 		if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
3461 			goto nla_put_failure;
3462 #ifdef CONFIG_IPV6_SUBTREES
3463 	if (src) {
3464 		if (nla_put_in6_addr(skb, RTA_SRC, src))
3465 			goto nla_put_failure;
3466 		rtm->rtm_src_len = 128;
3467 	} else if (rtm->rtm_src_len &&
3468 		   nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
3469 		goto nla_put_failure;
3470 #endif
3471 	if (iif) {
3472 #ifdef CONFIG_IPV6_MROUTE
3473 		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
3474 			int err = ip6mr_get_route(net, skb, rtm, portid);
3475 
3476 			if (err == 0)
3477 				return 0;
3478 			if (err < 0)
3479 				goto nla_put_failure;
3480 		} else
3481 #endif
3482 			if (nla_put_u32(skb, RTA_IIF, iif))
3483 				goto nla_put_failure;
3484 	} else if (dst) {
3485 		struct in6_addr saddr_buf;
3486 		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
3487 		    nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3488 			goto nla_put_failure;
3489 	}
3490 
3491 	if (rt->rt6i_prefsrc.plen) {
3492 		struct in6_addr saddr_buf;
3493 		saddr_buf = rt->rt6i_prefsrc.addr;
3494 		if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3495 			goto nla_put_failure;
3496 	}
3497 
3498 	memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
3499 	if (rt->rt6i_pmtu)
3500 		metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
3501 	if (rtnetlink_put_metrics(skb, metrics) < 0)
3502 		goto nla_put_failure;
3503 
3504 	if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
3505 		goto nla_put_failure;
3506 
3507 	/* For multipath routes, walk the siblings list and add
3508 	 * each as a nexthop within RTA_MULTIPATH.
3509 	 */
3510 	if (rt->rt6i_nsiblings) {
3511 		struct rt6_info *sibling, *next_sibling;
3512 		struct nlattr *mp;
3513 
3514 		mp = nla_nest_start(skb, RTA_MULTIPATH);
3515 		if (!mp)
3516 			goto nla_put_failure;
3517 
3518 		if (rt6_add_nexthop(skb, rt) < 0)
3519 			goto nla_put_failure;
3520 
3521 		list_for_each_entry_safe(sibling, next_sibling,
3522 					 &rt->rt6i_siblings, rt6i_siblings) {
3523 			if (rt6_add_nexthop(skb, sibling) < 0)
3524 				goto nla_put_failure;
3525 		}
3526 
3527 		nla_nest_end(skb, mp);
3528 	} else {
3529 		if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0)
3530 			goto nla_put_failure;
3531 	}
3532 
3533 	expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
3534 
3535 	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
3536 		goto nla_put_failure;
3537 
3538 	if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
3539 		goto nla_put_failure;
3540 
3541 
3542 	nlmsg_end(skb, nlh);
3543 	return 0;
3544 
3545 nla_put_failure:
3546 	nlmsg_cancel(skb, nlh);
3547 	return -EMSGSIZE;
3548 }
3549 
3550 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
3551 {
3552 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
3553 	struct net *net = arg->net;
3554 
3555 	if (rt == net->ipv6.ip6_null_entry)
3556 		return 0;
3557 
3558 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
3559 		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
3560 
3561 		/* user wants prefix routes only */
3562 		if (rtm->rtm_flags & RTM_F_PREFIX &&
3563 		    !(rt->rt6i_flags & RTF_PREFIX_RT)) {
3564 			/* success since this is not a prefix route */
3565 			return 1;
3566 		}
3567 	}
3568 
3569 	return rt6_fill_node(net,
3570 		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
3571 		     NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
3572 		     NLM_F_MULTI);
3573 }
3574 
3575 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
3576 			      struct netlink_ext_ack *extack)
3577 {
3578 	struct net *net = sock_net(in_skb->sk);
3579 	struct nlattr *tb[RTA_MAX+1];
3580 	struct rt6_info *rt;
3581 	struct sk_buff *skb;
3582 	struct rtmsg *rtm;
3583 	struct flowi6 fl6;
3584 	int err, iif = 0, oif = 0;
3585 
3586 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
3587 			  extack);
3588 	if (err < 0)
3589 		goto errout;
3590 
3591 	err = -EINVAL;
3592 	memset(&fl6, 0, sizeof(fl6));
3593 	rtm = nlmsg_data(nlh);
3594 	fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
3595 
3596 	if (tb[RTA_SRC]) {
3597 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
3598 			goto errout;
3599 
3600 		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
3601 	}
3602 
3603 	if (tb[RTA_DST]) {
3604 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
3605 			goto errout;
3606 
3607 		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
3608 	}
3609 
3610 	if (tb[RTA_IIF])
3611 		iif = nla_get_u32(tb[RTA_IIF]);
3612 
3613 	if (tb[RTA_OIF])
3614 		oif = nla_get_u32(tb[RTA_OIF]);
3615 
3616 	if (tb[RTA_MARK])
3617 		fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
3618 
3619 	if (tb[RTA_UID])
3620 		fl6.flowi6_uid = make_kuid(current_user_ns(),
3621 					   nla_get_u32(tb[RTA_UID]));
3622 	else
3623 		fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
3624 
3625 	if (iif) {
3626 		struct net_device *dev;
3627 		int flags = 0;
3628 
3629 		dev = __dev_get_by_index(net, iif);
3630 		if (!dev) {
3631 			err = -ENODEV;
3632 			goto errout;
3633 		}
3634 
3635 		fl6.flowi6_iif = iif;
3636 
3637 		if (!ipv6_addr_any(&fl6.saddr))
3638 			flags |= RT6_LOOKUP_F_HAS_SADDR;
3639 
3640 		rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
3641 							       flags);
3642 	} else {
3643 		fl6.flowi6_oif = oif;
3644 
3645 		rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
3646 	}
3647 
3648 	if (rt == net->ipv6.ip6_null_entry) {
3649 		err = rt->dst.error;
3650 		ip6_rt_put(rt);
3651 		goto errout;
3652 	}
3653 
3654 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
3655 	if (!skb) {
3656 		ip6_rt_put(rt);
3657 		err = -ENOBUFS;
3658 		goto errout;
3659 	}
3660 
3661 	skb_dst_set(skb, &rt->dst);
3662 
3663 	err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
3664 			    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
3665 			    nlh->nlmsg_seq, 0);
3666 	if (err < 0) {
3667 		kfree_skb(skb);
3668 		goto errout;
3669 	}
3670 
3671 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
3672 errout:
3673 	return err;
3674 }
3675 
3676 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
3677 		     unsigned int nlm_flags)
3678 {
3679 	struct sk_buff *skb;
3680 	struct net *net = info->nl_net;
3681 	u32 seq;
3682 	int err;
3683 
3684 	err = -ENOBUFS;
3685 	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3686 
3687 	skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3688 	if (!skb)
3689 		goto errout;
3690 
3691 	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
3692 				event, info->portid, seq, nlm_flags);
3693 	if (err < 0) {
3694 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
3695 		WARN_ON(err == -EMSGSIZE);
3696 		kfree_skb(skb);
3697 		goto errout;
3698 	}
3699 	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
3700 		    info->nlh, gfp_any());
3701 	return;
3702 errout:
3703 	if (err < 0)
3704 		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
3705 }
3706 
3707 static int ip6_route_dev_notify(struct notifier_block *this,
3708 				unsigned long event, void *ptr)
3709 {
3710 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
3711 	struct net *net = dev_net(dev);
3712 
3713 	if (!(dev->flags & IFF_LOOPBACK))
3714 		return NOTIFY_OK;
3715 
3716 	if (event == NETDEV_REGISTER) {
3717 		net->ipv6.ip6_null_entry->dst.dev = dev;
3718 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
3719 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3720 		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
3721 		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
3722 		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
3723 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
3724 #endif
3725 	 } else if (event == NETDEV_UNREGISTER) {
3726 		in6_dev_put(net->ipv6.ip6_null_entry->rt6i_idev);
3727 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3728 		in6_dev_put(net->ipv6.ip6_prohibit_entry->rt6i_idev);
3729 		in6_dev_put(net->ipv6.ip6_blk_hole_entry->rt6i_idev);
3730 #endif
3731 	}
3732 
3733 	return NOTIFY_OK;
3734 }
3735 
3736 /*
3737  *	/proc
3738  */
3739 
3740 #ifdef CONFIG_PROC_FS
3741 
3742 static const struct file_operations ipv6_route_proc_fops = {
3743 	.owner		= THIS_MODULE,
3744 	.open		= ipv6_route_open,
3745 	.read		= seq_read,
3746 	.llseek		= seq_lseek,
3747 	.release	= seq_release_net,
3748 };
3749 
3750 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
3751 {
3752 	struct net *net = (struct net *)seq->private;
3753 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
3754 		   net->ipv6.rt6_stats->fib_nodes,
3755 		   net->ipv6.rt6_stats->fib_route_nodes,
3756 		   net->ipv6.rt6_stats->fib_rt_alloc,
3757 		   net->ipv6.rt6_stats->fib_rt_entries,
3758 		   net->ipv6.rt6_stats->fib_rt_cache,
3759 		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
3760 		   net->ipv6.rt6_stats->fib_discarded_routes);
3761 
3762 	return 0;
3763 }
3764 
3765 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
3766 {
3767 	return single_open_net(inode, file, rt6_stats_seq_show);
3768 }
3769 
3770 static const struct file_operations rt6_stats_seq_fops = {
3771 	.owner	 = THIS_MODULE,
3772 	.open	 = rt6_stats_seq_open,
3773 	.read	 = seq_read,
3774 	.llseek	 = seq_lseek,
3775 	.release = single_release_net,
3776 };
3777 #endif	/* CONFIG_PROC_FS */
3778 
3779 #ifdef CONFIG_SYSCTL
3780 
3781 static
3782 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
3783 			      void __user *buffer, size_t *lenp, loff_t *ppos)
3784 {
3785 	struct net *net;
3786 	int delay;
3787 	if (!write)
3788 		return -EINVAL;
3789 
3790 	net = (struct net *)ctl->extra1;
3791 	delay = net->ipv6.sysctl.flush_delay;
3792 	proc_dointvec(ctl, write, buffer, lenp, ppos);
3793 	fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
3794 	return 0;
3795 }
3796 
3797 struct ctl_table ipv6_route_table_template[] = {
3798 	{
3799 		.procname	=	"flush",
3800 		.data		=	&init_net.ipv6.sysctl.flush_delay,
3801 		.maxlen		=	sizeof(int),
3802 		.mode		=	0200,
3803 		.proc_handler	=	ipv6_sysctl_rtcache_flush
3804 	},
3805 	{
3806 		.procname	=	"gc_thresh",
3807 		.data		=	&ip6_dst_ops_template.gc_thresh,
3808 		.maxlen		=	sizeof(int),
3809 		.mode		=	0644,
3810 		.proc_handler	=	proc_dointvec,
3811 	},
3812 	{
3813 		.procname	=	"max_size",
3814 		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
3815 		.maxlen		=	sizeof(int),
3816 		.mode		=	0644,
3817 		.proc_handler	=	proc_dointvec,
3818 	},
3819 	{
3820 		.procname	=	"gc_min_interval",
3821 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3822 		.maxlen		=	sizeof(int),
3823 		.mode		=	0644,
3824 		.proc_handler	=	proc_dointvec_jiffies,
3825 	},
3826 	{
3827 		.procname	=	"gc_timeout",
3828 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
3829 		.maxlen		=	sizeof(int),
3830 		.mode		=	0644,
3831 		.proc_handler	=	proc_dointvec_jiffies,
3832 	},
3833 	{
3834 		.procname	=	"gc_interval",
3835 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
3836 		.maxlen		=	sizeof(int),
3837 		.mode		=	0644,
3838 		.proc_handler	=	proc_dointvec_jiffies,
3839 	},
3840 	{
3841 		.procname	=	"gc_elasticity",
3842 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
3843 		.maxlen		=	sizeof(int),
3844 		.mode		=	0644,
3845 		.proc_handler	=	proc_dointvec,
3846 	},
3847 	{
3848 		.procname	=	"mtu_expires",
3849 		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
3850 		.maxlen		=	sizeof(int),
3851 		.mode		=	0644,
3852 		.proc_handler	=	proc_dointvec_jiffies,
3853 	},
3854 	{
3855 		.procname	=	"min_adv_mss",
3856 		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
3857 		.maxlen		=	sizeof(int),
3858 		.mode		=	0644,
3859 		.proc_handler	=	proc_dointvec,
3860 	},
3861 	{
3862 		.procname	=	"gc_min_interval_ms",
3863 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3864 		.maxlen		=	sizeof(int),
3865 		.mode		=	0644,
3866 		.proc_handler	=	proc_dointvec_ms_jiffies,
3867 	},
3868 	{ }
3869 };
3870 
3871 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
3872 {
3873 	struct ctl_table *table;
3874 
3875 	table = kmemdup(ipv6_route_table_template,
3876 			sizeof(ipv6_route_table_template),
3877 			GFP_KERNEL);
3878 
3879 	if (table) {
3880 		table[0].data = &net->ipv6.sysctl.flush_delay;
3881 		table[0].extra1 = net;
3882 		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
3883 		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3884 		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3885 		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3886 		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3887 		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3888 		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3889 		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
3890 		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3891 
3892 		/* Don't export sysctls to unprivileged users */
3893 		if (net->user_ns != &init_user_ns)
3894 			table[0].procname = NULL;
3895 	}
3896 
3897 	return table;
3898 }
3899 #endif
3900 
3901 static int __net_init ip6_route_net_init(struct net *net)
3902 {
3903 	int ret = -ENOMEM;
3904 
3905 	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3906 	       sizeof(net->ipv6.ip6_dst_ops));
3907 
3908 	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3909 		goto out_ip6_dst_ops;
3910 
3911 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3912 					   sizeof(*net->ipv6.ip6_null_entry),
3913 					   GFP_KERNEL);
3914 	if (!net->ipv6.ip6_null_entry)
3915 		goto out_ip6_dst_entries;
3916 	net->ipv6.ip6_null_entry->dst.path =
3917 		(struct dst_entry *)net->ipv6.ip6_null_entry;
3918 	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3919 	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3920 			 ip6_template_metrics, true);
3921 
3922 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3923 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3924 					       sizeof(*net->ipv6.ip6_prohibit_entry),
3925 					       GFP_KERNEL);
3926 	if (!net->ipv6.ip6_prohibit_entry)
3927 		goto out_ip6_null_entry;
3928 	net->ipv6.ip6_prohibit_entry->dst.path =
3929 		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3930 	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3931 	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3932 			 ip6_template_metrics, true);
3933 
3934 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3935 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
3936 					       GFP_KERNEL);
3937 	if (!net->ipv6.ip6_blk_hole_entry)
3938 		goto out_ip6_prohibit_entry;
3939 	net->ipv6.ip6_blk_hole_entry->dst.path =
3940 		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3941 	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3942 	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3943 			 ip6_template_metrics, true);
3944 #endif
3945 
3946 	net->ipv6.sysctl.flush_delay = 0;
3947 	net->ipv6.sysctl.ip6_rt_max_size = 4096;
3948 	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3949 	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3950 	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3951 	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3952 	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3953 	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3954 
3955 	net->ipv6.ip6_rt_gc_expire = 30*HZ;
3956 
3957 	ret = 0;
3958 out:
3959 	return ret;
3960 
3961 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3962 out_ip6_prohibit_entry:
3963 	kfree(net->ipv6.ip6_prohibit_entry);
3964 out_ip6_null_entry:
3965 	kfree(net->ipv6.ip6_null_entry);
3966 #endif
3967 out_ip6_dst_entries:
3968 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3969 out_ip6_dst_ops:
3970 	goto out;
3971 }
3972 
3973 static void __net_exit ip6_route_net_exit(struct net *net)
3974 {
3975 	kfree(net->ipv6.ip6_null_entry);
3976 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3977 	kfree(net->ipv6.ip6_prohibit_entry);
3978 	kfree(net->ipv6.ip6_blk_hole_entry);
3979 #endif
3980 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3981 }
3982 
3983 static int __net_init ip6_route_net_init_late(struct net *net)
3984 {
3985 #ifdef CONFIG_PROC_FS
3986 	proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3987 	proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3988 #endif
3989 	return 0;
3990 }
3991 
3992 static void __net_exit ip6_route_net_exit_late(struct net *net)
3993 {
3994 #ifdef CONFIG_PROC_FS
3995 	remove_proc_entry("ipv6_route", net->proc_net);
3996 	remove_proc_entry("rt6_stats", net->proc_net);
3997 #endif
3998 }
3999 
4000 static struct pernet_operations ip6_route_net_ops = {
4001 	.init = ip6_route_net_init,
4002 	.exit = ip6_route_net_exit,
4003 };
4004 
4005 static int __net_init ipv6_inetpeer_init(struct net *net)
4006 {
4007 	struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
4008 
4009 	if (!bp)
4010 		return -ENOMEM;
4011 	inet_peer_base_init(bp);
4012 	net->ipv6.peers = bp;
4013 	return 0;
4014 }
4015 
4016 static void __net_exit ipv6_inetpeer_exit(struct net *net)
4017 {
4018 	struct inet_peer_base *bp = net->ipv6.peers;
4019 
4020 	net->ipv6.peers = NULL;
4021 	inetpeer_invalidate_tree(bp);
4022 	kfree(bp);
4023 }
4024 
4025 static struct pernet_operations ipv6_inetpeer_ops = {
4026 	.init	=	ipv6_inetpeer_init,
4027 	.exit	=	ipv6_inetpeer_exit,
4028 };
4029 
4030 static struct pernet_operations ip6_route_net_late_ops = {
4031 	.init = ip6_route_net_init_late,
4032 	.exit = ip6_route_net_exit_late,
4033 };
4034 
4035 static struct notifier_block ip6_route_dev_notifier = {
4036 	.notifier_call = ip6_route_dev_notify,
4037 	.priority = ADDRCONF_NOTIFY_PRIORITY - 10,
4038 };
4039 
4040 void __init ip6_route_init_special_entries(void)
4041 {
4042 	/* Registering of the loopback is done before this portion of code,
4043 	 * the loopback reference in rt6_info will not be taken, do it
4044 	 * manually for init_net */
4045 	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
4046 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
4047   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
4048 	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
4049 	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
4050 	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
4051 	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
4052   #endif
4053 }
4054 
4055 int __init ip6_route_init(void)
4056 {
4057 	int ret;
4058 	int cpu;
4059 
4060 	ret = -ENOMEM;
4061 	ip6_dst_ops_template.kmem_cachep =
4062 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
4063 				  SLAB_HWCACHE_ALIGN, NULL);
4064 	if (!ip6_dst_ops_template.kmem_cachep)
4065 		goto out;
4066 
4067 	ret = dst_entries_init(&ip6_dst_blackhole_ops);
4068 	if (ret)
4069 		goto out_kmem_cache;
4070 
4071 	ret = register_pernet_subsys(&ipv6_inetpeer_ops);
4072 	if (ret)
4073 		goto out_dst_entries;
4074 
4075 	ret = register_pernet_subsys(&ip6_route_net_ops);
4076 	if (ret)
4077 		goto out_register_inetpeer;
4078 
4079 	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
4080 
4081 	ret = fib6_init();
4082 	if (ret)
4083 		goto out_register_subsys;
4084 
4085 	ret = xfrm6_init();
4086 	if (ret)
4087 		goto out_fib6_init;
4088 
4089 	ret = fib6_rules_init();
4090 	if (ret)
4091 		goto xfrm6_init;
4092 
4093 	ret = register_pernet_subsys(&ip6_route_net_late_ops);
4094 	if (ret)
4095 		goto fib6_rules_init;
4096 
4097 	ret = -ENOBUFS;
4098 	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
4099 	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
4100 	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
4101 		goto out_register_late_subsys;
4102 
4103 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
4104 	if (ret)
4105 		goto out_register_late_subsys;
4106 
4107 	for_each_possible_cpu(cpu) {
4108 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
4109 
4110 		INIT_LIST_HEAD(&ul->head);
4111 		spin_lock_init(&ul->lock);
4112 	}
4113 
4114 out:
4115 	return ret;
4116 
4117 out_register_late_subsys:
4118 	unregister_pernet_subsys(&ip6_route_net_late_ops);
4119 fib6_rules_init:
4120 	fib6_rules_cleanup();
4121 xfrm6_init:
4122 	xfrm6_fini();
4123 out_fib6_init:
4124 	fib6_gc_cleanup();
4125 out_register_subsys:
4126 	unregister_pernet_subsys(&ip6_route_net_ops);
4127 out_register_inetpeer:
4128 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
4129 out_dst_entries:
4130 	dst_entries_destroy(&ip6_dst_blackhole_ops);
4131 out_kmem_cache:
4132 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
4133 	goto out;
4134 }
4135 
4136 void ip6_route_cleanup(void)
4137 {
4138 	unregister_netdevice_notifier(&ip6_route_dev_notifier);
4139 	unregister_pernet_subsys(&ip6_route_net_late_ops);
4140 	fib6_rules_cleanup();
4141 	xfrm6_fini();
4142 	fib6_gc_cleanup();
4143 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
4144 	unregister_pernet_subsys(&ip6_route_net_ops);
4145 	dst_entries_destroy(&ip6_dst_blackhole_ops);
4146 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
4147 }
4148