xref: /openbmc/linux/net/ipv6/route.c (revision c8dbaa22)
1 /*
2  *	Linux INET6 implementation
3  *	FIB front-end.
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13 
14 /*	Changes:
15  *
16  *	YOSHIFUJI Hideaki @USAGI
17  *		reworked default router selection.
18  *		- respect outgoing interface
19  *		- select from (probably) reachable routers (i.e.
20  *		routers in REACHABLE, STALE, DELAY or PROBE states).
21  *		- always select the same router if it is (probably)
22  *		reachable.  otherwise, round-robin the list.
23  *	Ville Nuorvala
24  *		Fixed routing subtrees.
25  */
26 
27 #define pr_fmt(fmt) "IPv6: " fmt
28 
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/dst_metadata.h>
58 #include <net/xfrm.h>
59 #include <net/netevent.h>
60 #include <net/netlink.h>
61 #include <net/nexthop.h>
62 #include <net/lwtunnel.h>
63 #include <net/ip_tunnels.h>
64 #include <net/l3mdev.h>
65 #include <trace/events/fib6.h>
66 
67 #include <linux/uaccess.h>
68 
69 #ifdef CONFIG_SYSCTL
70 #include <linux/sysctl.h>
71 #endif
72 
73 enum rt6_nud_state {
74 	RT6_NUD_FAIL_HARD = -3,
75 	RT6_NUD_FAIL_PROBE = -2,
76 	RT6_NUD_FAIL_DO_RR = -1,
77 	RT6_NUD_SUCCEED = 1
78 };
79 
80 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
81 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
82 static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
83 static unsigned int	 ip6_mtu(const struct dst_entry *dst);
84 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
85 static void		ip6_dst_destroy(struct dst_entry *);
86 static void		ip6_dst_ifdown(struct dst_entry *,
87 				       struct net_device *dev, int how);
88 static int		 ip6_dst_gc(struct dst_ops *ops);
89 
90 static int		ip6_pkt_discard(struct sk_buff *skb);
91 static int		ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
92 static int		ip6_pkt_prohibit(struct sk_buff *skb);
93 static int		ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
94 static void		ip6_link_failure(struct sk_buff *skb);
95 static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
96 					   struct sk_buff *skb, u32 mtu);
97 static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
98 					struct sk_buff *skb);
99 static void		rt6_dst_from_metrics_check(struct rt6_info *rt);
100 static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
101 static size_t rt6_nlmsg_size(struct rt6_info *rt);
102 static int rt6_fill_node(struct net *net,
103 			 struct sk_buff *skb, struct rt6_info *rt,
104 			 struct in6_addr *dst, struct in6_addr *src,
105 			 int iif, int type, u32 portid, u32 seq,
106 			 unsigned int flags);
107 
108 #ifdef CONFIG_IPV6_ROUTE_INFO
109 static struct rt6_info *rt6_add_route_info(struct net *net,
110 					   const struct in6_addr *prefix, int prefixlen,
111 					   const struct in6_addr *gwaddr,
112 					   struct net_device *dev,
113 					   unsigned int pref);
114 static struct rt6_info *rt6_get_route_info(struct net *net,
115 					   const struct in6_addr *prefix, int prefixlen,
116 					   const struct in6_addr *gwaddr,
117 					   struct net_device *dev);
118 #endif
119 
120 struct uncached_list {
121 	spinlock_t		lock;
122 	struct list_head	head;
123 };
124 
125 static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
126 
127 static void rt6_uncached_list_add(struct rt6_info *rt)
128 {
129 	struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
130 
131 	rt->rt6i_uncached_list = ul;
132 
133 	spin_lock_bh(&ul->lock);
134 	list_add_tail(&rt->rt6i_uncached, &ul->head);
135 	spin_unlock_bh(&ul->lock);
136 }
137 
138 static void rt6_uncached_list_del(struct rt6_info *rt)
139 {
140 	if (!list_empty(&rt->rt6i_uncached)) {
141 		struct uncached_list *ul = rt->rt6i_uncached_list;
142 
143 		spin_lock_bh(&ul->lock);
144 		list_del(&rt->rt6i_uncached);
145 		spin_unlock_bh(&ul->lock);
146 	}
147 }
148 
149 static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
150 {
151 	struct net_device *loopback_dev = net->loopback_dev;
152 	int cpu;
153 
154 	if (dev == loopback_dev)
155 		return;
156 
157 	for_each_possible_cpu(cpu) {
158 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
159 		struct rt6_info *rt;
160 
161 		spin_lock_bh(&ul->lock);
162 		list_for_each_entry(rt, &ul->head, rt6i_uncached) {
163 			struct inet6_dev *rt_idev = rt->rt6i_idev;
164 			struct net_device *rt_dev = rt->dst.dev;
165 
166 			if (rt_idev->dev == dev) {
167 				rt->rt6i_idev = in6_dev_get(loopback_dev);
168 				in6_dev_put(rt_idev);
169 			}
170 
171 			if (rt_dev == dev) {
172 				rt->dst.dev = loopback_dev;
173 				dev_hold(rt->dst.dev);
174 				dev_put(rt_dev);
175 			}
176 		}
177 		spin_unlock_bh(&ul->lock);
178 	}
179 }
180 
181 static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
182 {
183 	return dst_metrics_write_ptr(rt->dst.from);
184 }
185 
186 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
187 {
188 	struct rt6_info *rt = (struct rt6_info *)dst;
189 
190 	if (rt->rt6i_flags & RTF_PCPU)
191 		return rt6_pcpu_cow_metrics(rt);
192 	else if (rt->rt6i_flags & RTF_CACHE)
193 		return NULL;
194 	else
195 		return dst_cow_metrics_generic(dst, old);
196 }
197 
198 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
199 					     struct sk_buff *skb,
200 					     const void *daddr)
201 {
202 	struct in6_addr *p = &rt->rt6i_gateway;
203 
204 	if (!ipv6_addr_any(p))
205 		return (const void *) p;
206 	else if (skb)
207 		return &ipv6_hdr(skb)->daddr;
208 	return daddr;
209 }
210 
211 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
212 					  struct sk_buff *skb,
213 					  const void *daddr)
214 {
215 	struct rt6_info *rt = (struct rt6_info *) dst;
216 	struct neighbour *n;
217 
218 	daddr = choose_neigh_daddr(rt, skb, daddr);
219 	n = __ipv6_neigh_lookup(dst->dev, daddr);
220 	if (n)
221 		return n;
222 	return neigh_create(&nd_tbl, daddr, dst->dev);
223 }
224 
225 static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
226 {
227 	struct net_device *dev = dst->dev;
228 	struct rt6_info *rt = (struct rt6_info *)dst;
229 
230 	daddr = choose_neigh_daddr(rt, NULL, daddr);
231 	if (!daddr)
232 		return;
233 	if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
234 		return;
235 	if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
236 		return;
237 	__ipv6_confirm_neigh(dev, daddr);
238 }
239 
240 static struct dst_ops ip6_dst_ops_template = {
241 	.family			=	AF_INET6,
242 	.gc			=	ip6_dst_gc,
243 	.gc_thresh		=	1024,
244 	.check			=	ip6_dst_check,
245 	.default_advmss		=	ip6_default_advmss,
246 	.mtu			=	ip6_mtu,
247 	.cow_metrics		=	ipv6_cow_metrics,
248 	.destroy		=	ip6_dst_destroy,
249 	.ifdown			=	ip6_dst_ifdown,
250 	.negative_advice	=	ip6_negative_advice,
251 	.link_failure		=	ip6_link_failure,
252 	.update_pmtu		=	ip6_rt_update_pmtu,
253 	.redirect		=	rt6_do_redirect,
254 	.local_out		=	__ip6_local_out,
255 	.neigh_lookup		=	ip6_neigh_lookup,
256 	.confirm_neigh		=	ip6_confirm_neigh,
257 };
258 
259 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
260 {
261 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
262 
263 	return mtu ? : dst->dev->mtu;
264 }
265 
266 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
267 					 struct sk_buff *skb, u32 mtu)
268 {
269 }
270 
271 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
272 				      struct sk_buff *skb)
273 {
274 }
275 
276 static struct dst_ops ip6_dst_blackhole_ops = {
277 	.family			=	AF_INET6,
278 	.destroy		=	ip6_dst_destroy,
279 	.check			=	ip6_dst_check,
280 	.mtu			=	ip6_blackhole_mtu,
281 	.default_advmss		=	ip6_default_advmss,
282 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
283 	.redirect		=	ip6_rt_blackhole_redirect,
284 	.cow_metrics		=	dst_cow_metrics_generic,
285 	.neigh_lookup		=	ip6_neigh_lookup,
286 };
287 
288 static const u32 ip6_template_metrics[RTAX_MAX] = {
289 	[RTAX_HOPLIMIT - 1] = 0,
290 };
291 
292 static const struct rt6_info ip6_null_entry_template = {
293 	.dst = {
294 		.__refcnt	= ATOMIC_INIT(1),
295 		.__use		= 1,
296 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
297 		.error		= -ENETUNREACH,
298 		.input		= ip6_pkt_discard,
299 		.output		= ip6_pkt_discard_out,
300 	},
301 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
302 	.rt6i_protocol  = RTPROT_KERNEL,
303 	.rt6i_metric	= ~(u32) 0,
304 	.rt6i_ref	= ATOMIC_INIT(1),
305 };
306 
307 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
308 
309 static const struct rt6_info ip6_prohibit_entry_template = {
310 	.dst = {
311 		.__refcnt	= ATOMIC_INIT(1),
312 		.__use		= 1,
313 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
314 		.error		= -EACCES,
315 		.input		= ip6_pkt_prohibit,
316 		.output		= ip6_pkt_prohibit_out,
317 	},
318 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
319 	.rt6i_protocol  = RTPROT_KERNEL,
320 	.rt6i_metric	= ~(u32) 0,
321 	.rt6i_ref	= ATOMIC_INIT(1),
322 };
323 
324 static const struct rt6_info ip6_blk_hole_entry_template = {
325 	.dst = {
326 		.__refcnt	= ATOMIC_INIT(1),
327 		.__use		= 1,
328 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
329 		.error		= -EINVAL,
330 		.input		= dst_discard,
331 		.output		= dst_discard_out,
332 	},
333 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
334 	.rt6i_protocol  = RTPROT_KERNEL,
335 	.rt6i_metric	= ~(u32) 0,
336 	.rt6i_ref	= ATOMIC_INIT(1),
337 };
338 
339 #endif
340 
341 static void rt6_info_init(struct rt6_info *rt)
342 {
343 	struct dst_entry *dst = &rt->dst;
344 
345 	memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
346 	INIT_LIST_HEAD(&rt->rt6i_siblings);
347 	INIT_LIST_HEAD(&rt->rt6i_uncached);
348 }
349 
350 /* allocate dst with ip6_dst_ops */
351 static struct rt6_info *__ip6_dst_alloc(struct net *net,
352 					struct net_device *dev,
353 					int flags)
354 {
355 	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
356 					1, DST_OBSOLETE_FORCE_CHK, flags);
357 
358 	if (rt)
359 		rt6_info_init(rt);
360 
361 	return rt;
362 }
363 
364 struct rt6_info *ip6_dst_alloc(struct net *net,
365 			       struct net_device *dev,
366 			       int flags)
367 {
368 	struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
369 
370 	if (rt) {
371 		rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
372 		if (rt->rt6i_pcpu) {
373 			int cpu;
374 
375 			for_each_possible_cpu(cpu) {
376 				struct rt6_info **p;
377 
378 				p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
379 				/* no one shares rt */
380 				*p =  NULL;
381 			}
382 		} else {
383 			dst_release_immediate(&rt->dst);
384 			return NULL;
385 		}
386 	}
387 
388 	return rt;
389 }
390 EXPORT_SYMBOL(ip6_dst_alloc);
391 
392 static void ip6_dst_destroy(struct dst_entry *dst)
393 {
394 	struct rt6_info *rt = (struct rt6_info *)dst;
395 	struct dst_entry *from = dst->from;
396 	struct inet6_dev *idev;
397 
398 	dst_destroy_metrics_generic(dst);
399 	free_percpu(rt->rt6i_pcpu);
400 	rt6_uncached_list_del(rt);
401 
402 	idev = rt->rt6i_idev;
403 	if (idev) {
404 		rt->rt6i_idev = NULL;
405 		in6_dev_put(idev);
406 	}
407 
408 	dst->from = NULL;
409 	dst_release(from);
410 }
411 
412 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
413 			   int how)
414 {
415 	struct rt6_info *rt = (struct rt6_info *)dst;
416 	struct inet6_dev *idev = rt->rt6i_idev;
417 	struct net_device *loopback_dev =
418 		dev_net(dev)->loopback_dev;
419 
420 	if (idev && idev->dev != loopback_dev) {
421 		struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
422 		if (loopback_idev) {
423 			rt->rt6i_idev = loopback_idev;
424 			in6_dev_put(idev);
425 		}
426 	}
427 }
428 
429 static bool __rt6_check_expired(const struct rt6_info *rt)
430 {
431 	if (rt->rt6i_flags & RTF_EXPIRES)
432 		return time_after(jiffies, rt->dst.expires);
433 	else
434 		return false;
435 }
436 
437 static bool rt6_check_expired(const struct rt6_info *rt)
438 {
439 	if (rt->rt6i_flags & RTF_EXPIRES) {
440 		if (time_after(jiffies, rt->dst.expires))
441 			return true;
442 	} else if (rt->dst.from) {
443 		return rt6_check_expired((struct rt6_info *) rt->dst.from);
444 	}
445 	return false;
446 }
447 
448 /* Multipath route selection:
449  *   Hash based function using packet header and flowlabel.
450  * Adapted from fib_info_hashfn()
451  */
452 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
453 			       const struct flowi6 *fl6)
454 {
455 	return get_hash_from_flowi6(fl6) % candidate_count;
456 }
457 
458 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
459 					     struct flowi6 *fl6, int oif,
460 					     int strict)
461 {
462 	struct rt6_info *sibling, *next_sibling;
463 	int route_choosen;
464 
465 	route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
466 	/* Don't change the route, if route_choosen == 0
467 	 * (siblings does not include ourself)
468 	 */
469 	if (route_choosen)
470 		list_for_each_entry_safe(sibling, next_sibling,
471 				&match->rt6i_siblings, rt6i_siblings) {
472 			route_choosen--;
473 			if (route_choosen == 0) {
474 				if (rt6_score_route(sibling, oif, strict) < 0)
475 					break;
476 				match = sibling;
477 				break;
478 			}
479 		}
480 	return match;
481 }
482 
483 /*
484  *	Route lookup. Any table->tb6_lock is implied.
485  */
486 
487 static inline struct rt6_info *rt6_device_match(struct net *net,
488 						    struct rt6_info *rt,
489 						    const struct in6_addr *saddr,
490 						    int oif,
491 						    int flags)
492 {
493 	struct rt6_info *local = NULL;
494 	struct rt6_info *sprt;
495 
496 	if (!oif && ipv6_addr_any(saddr))
497 		goto out;
498 
499 	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
500 		struct net_device *dev = sprt->dst.dev;
501 
502 		if (oif) {
503 			if (dev->ifindex == oif)
504 				return sprt;
505 			if (dev->flags & IFF_LOOPBACK) {
506 				if (!sprt->rt6i_idev ||
507 				    sprt->rt6i_idev->dev->ifindex != oif) {
508 					if (flags & RT6_LOOKUP_F_IFACE)
509 						continue;
510 					if (local &&
511 					    local->rt6i_idev->dev->ifindex == oif)
512 						continue;
513 				}
514 				local = sprt;
515 			}
516 		} else {
517 			if (ipv6_chk_addr(net, saddr, dev,
518 					  flags & RT6_LOOKUP_F_IFACE))
519 				return sprt;
520 		}
521 	}
522 
523 	if (oif) {
524 		if (local)
525 			return local;
526 
527 		if (flags & RT6_LOOKUP_F_IFACE)
528 			return net->ipv6.ip6_null_entry;
529 	}
530 out:
531 	return rt;
532 }
533 
534 #ifdef CONFIG_IPV6_ROUTER_PREF
535 struct __rt6_probe_work {
536 	struct work_struct work;
537 	struct in6_addr target;
538 	struct net_device *dev;
539 };
540 
541 static void rt6_probe_deferred(struct work_struct *w)
542 {
543 	struct in6_addr mcaddr;
544 	struct __rt6_probe_work *work =
545 		container_of(w, struct __rt6_probe_work, work);
546 
547 	addrconf_addr_solict_mult(&work->target, &mcaddr);
548 	ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
549 	dev_put(work->dev);
550 	kfree(work);
551 }
552 
553 static void rt6_probe(struct rt6_info *rt)
554 {
555 	struct __rt6_probe_work *work;
556 	struct neighbour *neigh;
557 	/*
558 	 * Okay, this does not seem to be appropriate
559 	 * for now, however, we need to check if it
560 	 * is really so; aka Router Reachability Probing.
561 	 *
562 	 * Router Reachability Probe MUST be rate-limited
563 	 * to no more than one per minute.
564 	 */
565 	if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
566 		return;
567 	rcu_read_lock_bh();
568 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
569 	if (neigh) {
570 		if (neigh->nud_state & NUD_VALID)
571 			goto out;
572 
573 		work = NULL;
574 		write_lock(&neigh->lock);
575 		if (!(neigh->nud_state & NUD_VALID) &&
576 		    time_after(jiffies,
577 			       neigh->updated +
578 			       rt->rt6i_idev->cnf.rtr_probe_interval)) {
579 			work = kmalloc(sizeof(*work), GFP_ATOMIC);
580 			if (work)
581 				__neigh_set_probe_once(neigh);
582 		}
583 		write_unlock(&neigh->lock);
584 	} else {
585 		work = kmalloc(sizeof(*work), GFP_ATOMIC);
586 	}
587 
588 	if (work) {
589 		INIT_WORK(&work->work, rt6_probe_deferred);
590 		work->target = rt->rt6i_gateway;
591 		dev_hold(rt->dst.dev);
592 		work->dev = rt->dst.dev;
593 		schedule_work(&work->work);
594 	}
595 
596 out:
597 	rcu_read_unlock_bh();
598 }
599 #else
600 static inline void rt6_probe(struct rt6_info *rt)
601 {
602 }
603 #endif
604 
605 /*
606  * Default Router Selection (RFC 2461 6.3.6)
607  */
608 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
609 {
610 	struct net_device *dev = rt->dst.dev;
611 	if (!oif || dev->ifindex == oif)
612 		return 2;
613 	if ((dev->flags & IFF_LOOPBACK) &&
614 	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
615 		return 1;
616 	return 0;
617 }
618 
619 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
620 {
621 	struct neighbour *neigh;
622 	enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
623 
624 	if (rt->rt6i_flags & RTF_NONEXTHOP ||
625 	    !(rt->rt6i_flags & RTF_GATEWAY))
626 		return RT6_NUD_SUCCEED;
627 
628 	rcu_read_lock_bh();
629 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
630 	if (neigh) {
631 		read_lock(&neigh->lock);
632 		if (neigh->nud_state & NUD_VALID)
633 			ret = RT6_NUD_SUCCEED;
634 #ifdef CONFIG_IPV6_ROUTER_PREF
635 		else if (!(neigh->nud_state & NUD_FAILED))
636 			ret = RT6_NUD_SUCCEED;
637 		else
638 			ret = RT6_NUD_FAIL_PROBE;
639 #endif
640 		read_unlock(&neigh->lock);
641 	} else {
642 		ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
643 		      RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
644 	}
645 	rcu_read_unlock_bh();
646 
647 	return ret;
648 }
649 
650 static int rt6_score_route(struct rt6_info *rt, int oif,
651 			   int strict)
652 {
653 	int m;
654 
655 	m = rt6_check_dev(rt, oif);
656 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
657 		return RT6_NUD_FAIL_HARD;
658 #ifdef CONFIG_IPV6_ROUTER_PREF
659 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
660 #endif
661 	if (strict & RT6_LOOKUP_F_REACHABLE) {
662 		int n = rt6_check_neigh(rt);
663 		if (n < 0)
664 			return n;
665 	}
666 	return m;
667 }
668 
669 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
670 				   int *mpri, struct rt6_info *match,
671 				   bool *do_rr)
672 {
673 	int m;
674 	bool match_do_rr = false;
675 	struct inet6_dev *idev = rt->rt6i_idev;
676 	struct net_device *dev = rt->dst.dev;
677 
678 	if (dev && !netif_carrier_ok(dev) &&
679 	    idev->cnf.ignore_routes_with_linkdown &&
680 	    !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
681 		goto out;
682 
683 	if (rt6_check_expired(rt))
684 		goto out;
685 
686 	m = rt6_score_route(rt, oif, strict);
687 	if (m == RT6_NUD_FAIL_DO_RR) {
688 		match_do_rr = true;
689 		m = 0; /* lowest valid score */
690 	} else if (m == RT6_NUD_FAIL_HARD) {
691 		goto out;
692 	}
693 
694 	if (strict & RT6_LOOKUP_F_REACHABLE)
695 		rt6_probe(rt);
696 
697 	/* note that m can be RT6_NUD_FAIL_PROBE at this point */
698 	if (m > *mpri) {
699 		*do_rr = match_do_rr;
700 		*mpri = m;
701 		match = rt;
702 	}
703 out:
704 	return match;
705 }
706 
707 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
708 				     struct rt6_info *rr_head,
709 				     u32 metric, int oif, int strict,
710 				     bool *do_rr)
711 {
712 	struct rt6_info *rt, *match, *cont;
713 	int mpri = -1;
714 
715 	match = NULL;
716 	cont = NULL;
717 	for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
718 		if (rt->rt6i_metric != metric) {
719 			cont = rt;
720 			break;
721 		}
722 
723 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
724 	}
725 
726 	for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
727 		if (rt->rt6i_metric != metric) {
728 			cont = rt;
729 			break;
730 		}
731 
732 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
733 	}
734 
735 	if (match || !cont)
736 		return match;
737 
738 	for (rt = cont; rt; rt = rt->dst.rt6_next)
739 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
740 
741 	return match;
742 }
743 
744 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
745 {
746 	struct rt6_info *match, *rt0;
747 	struct net *net;
748 	bool do_rr = false;
749 
750 	rt0 = fn->rr_ptr;
751 	if (!rt0)
752 		fn->rr_ptr = rt0 = fn->leaf;
753 
754 	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
755 			     &do_rr);
756 
757 	if (do_rr) {
758 		struct rt6_info *next = rt0->dst.rt6_next;
759 
760 		/* no entries matched; do round-robin */
761 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
762 			next = fn->leaf;
763 
764 		if (next != rt0)
765 			fn->rr_ptr = next;
766 	}
767 
768 	net = dev_net(rt0->dst.dev);
769 	return match ? match : net->ipv6.ip6_null_entry;
770 }
771 
772 static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
773 {
774 	return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
775 }
776 
777 #ifdef CONFIG_IPV6_ROUTE_INFO
778 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
779 		  const struct in6_addr *gwaddr)
780 {
781 	struct net *net = dev_net(dev);
782 	struct route_info *rinfo = (struct route_info *) opt;
783 	struct in6_addr prefix_buf, *prefix;
784 	unsigned int pref;
785 	unsigned long lifetime;
786 	struct rt6_info *rt;
787 
788 	if (len < sizeof(struct route_info)) {
789 		return -EINVAL;
790 	}
791 
792 	/* Sanity check for prefix_len and length */
793 	if (rinfo->length > 3) {
794 		return -EINVAL;
795 	} else if (rinfo->prefix_len > 128) {
796 		return -EINVAL;
797 	} else if (rinfo->prefix_len > 64) {
798 		if (rinfo->length < 2) {
799 			return -EINVAL;
800 		}
801 	} else if (rinfo->prefix_len > 0) {
802 		if (rinfo->length < 1) {
803 			return -EINVAL;
804 		}
805 	}
806 
807 	pref = rinfo->route_pref;
808 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
809 		return -EINVAL;
810 
811 	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
812 
813 	if (rinfo->length == 3)
814 		prefix = (struct in6_addr *)rinfo->prefix;
815 	else {
816 		/* this function is safe */
817 		ipv6_addr_prefix(&prefix_buf,
818 				 (struct in6_addr *)rinfo->prefix,
819 				 rinfo->prefix_len);
820 		prefix = &prefix_buf;
821 	}
822 
823 	if (rinfo->prefix_len == 0)
824 		rt = rt6_get_dflt_router(gwaddr, dev);
825 	else
826 		rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
827 					gwaddr, dev);
828 
829 	if (rt && !lifetime) {
830 		ip6_del_rt(rt);
831 		rt = NULL;
832 	}
833 
834 	if (!rt && lifetime)
835 		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
836 					dev, pref);
837 	else if (rt)
838 		rt->rt6i_flags = RTF_ROUTEINFO |
839 				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
840 
841 	if (rt) {
842 		if (!addrconf_finite_timeout(lifetime))
843 			rt6_clean_expires(rt);
844 		else
845 			rt6_set_expires(rt, jiffies + HZ * lifetime);
846 
847 		ip6_rt_put(rt);
848 	}
849 	return 0;
850 }
851 #endif
852 
853 static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
854 					struct in6_addr *saddr)
855 {
856 	struct fib6_node *pn;
857 	while (1) {
858 		if (fn->fn_flags & RTN_TL_ROOT)
859 			return NULL;
860 		pn = fn->parent;
861 		if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
862 			fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
863 		else
864 			fn = pn;
865 		if (fn->fn_flags & RTN_RTINFO)
866 			return fn;
867 	}
868 }
869 
870 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
871 					     struct fib6_table *table,
872 					     struct flowi6 *fl6, int flags)
873 {
874 	struct fib6_node *fn;
875 	struct rt6_info *rt;
876 
877 	read_lock_bh(&table->tb6_lock);
878 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
879 restart:
880 	rt = fn->leaf;
881 	rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
882 	if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
883 		rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
884 	if (rt == net->ipv6.ip6_null_entry) {
885 		fn = fib6_backtrack(fn, &fl6->saddr);
886 		if (fn)
887 			goto restart;
888 	}
889 	dst_use(&rt->dst, jiffies);
890 	read_unlock_bh(&table->tb6_lock);
891 
892 	trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
893 
894 	return rt;
895 
896 }
897 
898 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
899 				    int flags)
900 {
901 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
902 }
903 EXPORT_SYMBOL_GPL(ip6_route_lookup);
904 
905 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
906 			    const struct in6_addr *saddr, int oif, int strict)
907 {
908 	struct flowi6 fl6 = {
909 		.flowi6_oif = oif,
910 		.daddr = *daddr,
911 	};
912 	struct dst_entry *dst;
913 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
914 
915 	if (saddr) {
916 		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
917 		flags |= RT6_LOOKUP_F_HAS_SADDR;
918 	}
919 
920 	dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
921 	if (dst->error == 0)
922 		return (struct rt6_info *) dst;
923 
924 	dst_release(dst);
925 
926 	return NULL;
927 }
928 EXPORT_SYMBOL(rt6_lookup);
929 
930 /* ip6_ins_rt is called with FREE table->tb6_lock.
931  * It takes new route entry, the addition fails by any reason the
932  * route is released.
933  * Caller must hold dst before calling it.
934  */
935 
936 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
937 			struct mx6_config *mxc,
938 			struct netlink_ext_ack *extack)
939 {
940 	int err;
941 	struct fib6_table *table;
942 
943 	table = rt->rt6i_table;
944 	write_lock_bh(&table->tb6_lock);
945 	err = fib6_add(&table->tb6_root, rt, info, mxc, extack);
946 	write_unlock_bh(&table->tb6_lock);
947 
948 	return err;
949 }
950 
951 int ip6_ins_rt(struct rt6_info *rt)
952 {
953 	struct nl_info info = {	.nl_net = dev_net(rt->dst.dev), };
954 	struct mx6_config mxc = { .mx = NULL, };
955 
956 	/* Hold dst to account for the reference from the fib6 tree */
957 	dst_hold(&rt->dst);
958 	return __ip6_ins_rt(rt, &info, &mxc, NULL);
959 }
960 
961 static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
962 					   const struct in6_addr *daddr,
963 					   const struct in6_addr *saddr)
964 {
965 	struct rt6_info *rt;
966 
967 	/*
968 	 *	Clone the route.
969 	 */
970 
971 	if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
972 		ort = (struct rt6_info *)ort->dst.from;
973 
974 	rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0);
975 
976 	if (!rt)
977 		return NULL;
978 
979 	ip6_rt_copy_init(rt, ort);
980 	rt->rt6i_flags |= RTF_CACHE;
981 	rt->rt6i_metric = 0;
982 	rt->dst.flags |= DST_HOST;
983 	rt->rt6i_dst.addr = *daddr;
984 	rt->rt6i_dst.plen = 128;
985 
986 	if (!rt6_is_gw_or_nonexthop(ort)) {
987 		if (ort->rt6i_dst.plen != 128 &&
988 		    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
989 			rt->rt6i_flags |= RTF_ANYCAST;
990 #ifdef CONFIG_IPV6_SUBTREES
991 		if (rt->rt6i_src.plen && saddr) {
992 			rt->rt6i_src.addr = *saddr;
993 			rt->rt6i_src.plen = 128;
994 		}
995 #endif
996 	}
997 
998 	return rt;
999 }
1000 
1001 static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
1002 {
1003 	struct rt6_info *pcpu_rt;
1004 
1005 	pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
1006 				  rt->dst.dev, rt->dst.flags);
1007 
1008 	if (!pcpu_rt)
1009 		return NULL;
1010 	ip6_rt_copy_init(pcpu_rt, rt);
1011 	pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
1012 	pcpu_rt->rt6i_flags |= RTF_PCPU;
1013 	return pcpu_rt;
1014 }
1015 
1016 /* It should be called with read_lock_bh(&tb6_lock) acquired */
1017 static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
1018 {
1019 	struct rt6_info *pcpu_rt, **p;
1020 
1021 	p = this_cpu_ptr(rt->rt6i_pcpu);
1022 	pcpu_rt = *p;
1023 
1024 	if (pcpu_rt) {
1025 		dst_hold(&pcpu_rt->dst);
1026 		rt6_dst_from_metrics_check(pcpu_rt);
1027 	}
1028 	return pcpu_rt;
1029 }
1030 
1031 static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
1032 {
1033 	struct fib6_table *table = rt->rt6i_table;
1034 	struct rt6_info *pcpu_rt, *prev, **p;
1035 
1036 	pcpu_rt = ip6_rt_pcpu_alloc(rt);
1037 	if (!pcpu_rt) {
1038 		struct net *net = dev_net(rt->dst.dev);
1039 
1040 		dst_hold(&net->ipv6.ip6_null_entry->dst);
1041 		return net->ipv6.ip6_null_entry;
1042 	}
1043 
1044 	read_lock_bh(&table->tb6_lock);
1045 	if (rt->rt6i_pcpu) {
1046 		p = this_cpu_ptr(rt->rt6i_pcpu);
1047 		prev = cmpxchg(p, NULL, pcpu_rt);
1048 		if (prev) {
1049 			/* If someone did it before us, return prev instead */
1050 			dst_release_immediate(&pcpu_rt->dst);
1051 			pcpu_rt = prev;
1052 		}
1053 	} else {
1054 		/* rt has been removed from the fib6 tree
1055 		 * before we have a chance to acquire the read_lock.
1056 		 * In this case, don't brother to create a pcpu rt
1057 		 * since rt is going away anyway.  The next
1058 		 * dst_check() will trigger a re-lookup.
1059 		 */
1060 		dst_release_immediate(&pcpu_rt->dst);
1061 		pcpu_rt = rt;
1062 	}
1063 	dst_hold(&pcpu_rt->dst);
1064 	rt6_dst_from_metrics_check(pcpu_rt);
1065 	read_unlock_bh(&table->tb6_lock);
1066 	return pcpu_rt;
1067 }
1068 
1069 struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1070 			       int oif, struct flowi6 *fl6, int flags)
1071 {
1072 	struct fib6_node *fn, *saved_fn;
1073 	struct rt6_info *rt;
1074 	int strict = 0;
1075 
1076 	strict |= flags & RT6_LOOKUP_F_IFACE;
1077 	strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
1078 	if (net->ipv6.devconf_all->forwarding == 0)
1079 		strict |= RT6_LOOKUP_F_REACHABLE;
1080 
1081 	read_lock_bh(&table->tb6_lock);
1082 
1083 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1084 	saved_fn = fn;
1085 
1086 	if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1087 		oif = 0;
1088 
1089 redo_rt6_select:
1090 	rt = rt6_select(fn, oif, strict);
1091 	if (rt->rt6i_nsiblings)
1092 		rt = rt6_multipath_select(rt, fl6, oif, strict);
1093 	if (rt == net->ipv6.ip6_null_entry) {
1094 		fn = fib6_backtrack(fn, &fl6->saddr);
1095 		if (fn)
1096 			goto redo_rt6_select;
1097 		else if (strict & RT6_LOOKUP_F_REACHABLE) {
1098 			/* also consider unreachable route */
1099 			strict &= ~RT6_LOOKUP_F_REACHABLE;
1100 			fn = saved_fn;
1101 			goto redo_rt6_select;
1102 		}
1103 	}
1104 
1105 
1106 	if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
1107 		dst_use(&rt->dst, jiffies);
1108 		read_unlock_bh(&table->tb6_lock);
1109 
1110 		rt6_dst_from_metrics_check(rt);
1111 
1112 		trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
1113 		return rt;
1114 	} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1115 			    !(rt->rt6i_flags & RTF_GATEWAY))) {
1116 		/* Create a RTF_CACHE clone which will not be
1117 		 * owned by the fib6 tree.  It is for the special case where
1118 		 * the daddr in the skb during the neighbor look-up is different
1119 		 * from the fl6->daddr used to look-up route here.
1120 		 */
1121 
1122 		struct rt6_info *uncached_rt;
1123 
1124 		dst_use(&rt->dst, jiffies);
1125 		read_unlock_bh(&table->tb6_lock);
1126 
1127 		uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1128 		dst_release(&rt->dst);
1129 
1130 		if (uncached_rt) {
1131 			/* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
1132 			 * No need for another dst_hold()
1133 			 */
1134 			rt6_uncached_list_add(uncached_rt);
1135 		} else {
1136 			uncached_rt = net->ipv6.ip6_null_entry;
1137 			dst_hold(&uncached_rt->dst);
1138 		}
1139 
1140 		trace_fib6_table_lookup(net, uncached_rt, table->tb6_id, fl6);
1141 		return uncached_rt;
1142 
1143 	} else {
1144 		/* Get a percpu copy */
1145 
1146 		struct rt6_info *pcpu_rt;
1147 
1148 		rt->dst.lastuse = jiffies;
1149 		rt->dst.__use++;
1150 		pcpu_rt = rt6_get_pcpu_route(rt);
1151 
1152 		if (pcpu_rt) {
1153 			read_unlock_bh(&table->tb6_lock);
1154 		} else {
1155 			/* We have to do the read_unlock first
1156 			 * because rt6_make_pcpu_route() may trigger
1157 			 * ip6_dst_gc() which will take the write_lock.
1158 			 */
1159 			dst_hold(&rt->dst);
1160 			read_unlock_bh(&table->tb6_lock);
1161 			pcpu_rt = rt6_make_pcpu_route(rt);
1162 			dst_release(&rt->dst);
1163 		}
1164 
1165 		trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6);
1166 		return pcpu_rt;
1167 
1168 	}
1169 }
1170 EXPORT_SYMBOL_GPL(ip6_pol_route);
1171 
1172 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
1173 					    struct flowi6 *fl6, int flags)
1174 {
1175 	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
1176 }
1177 
1178 struct dst_entry *ip6_route_input_lookup(struct net *net,
1179 					 struct net_device *dev,
1180 					 struct flowi6 *fl6, int flags)
1181 {
1182 	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1183 		flags |= RT6_LOOKUP_F_IFACE;
1184 
1185 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1186 }
1187 EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
1188 
1189 void ip6_route_input(struct sk_buff *skb)
1190 {
1191 	const struct ipv6hdr *iph = ipv6_hdr(skb);
1192 	struct net *net = dev_net(skb->dev);
1193 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1194 	struct ip_tunnel_info *tun_info;
1195 	struct flowi6 fl6 = {
1196 		.flowi6_iif = skb->dev->ifindex,
1197 		.daddr = iph->daddr,
1198 		.saddr = iph->saddr,
1199 		.flowlabel = ip6_flowinfo(iph),
1200 		.flowi6_mark = skb->mark,
1201 		.flowi6_proto = iph->nexthdr,
1202 	};
1203 
1204 	tun_info = skb_tunnel_info(skb);
1205 	if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
1206 		fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
1207 	skb_dst_drop(skb);
1208 	skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1209 }
1210 
1211 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1212 					     struct flowi6 *fl6, int flags)
1213 {
1214 	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1215 }
1216 
1217 struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
1218 					 struct flowi6 *fl6, int flags)
1219 {
1220 	bool any_src;
1221 
1222 	if (rt6_need_strict(&fl6->daddr)) {
1223 		struct dst_entry *dst;
1224 
1225 		dst = l3mdev_link_scope_lookup(net, fl6);
1226 		if (dst)
1227 			return dst;
1228 	}
1229 
1230 	fl6->flowi6_iif = LOOPBACK_IFINDEX;
1231 
1232 	any_src = ipv6_addr_any(&fl6->saddr);
1233 	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
1234 	    (fl6->flowi6_oif && any_src))
1235 		flags |= RT6_LOOKUP_F_IFACE;
1236 
1237 	if (!any_src)
1238 		flags |= RT6_LOOKUP_F_HAS_SADDR;
1239 	else if (sk)
1240 		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1241 
1242 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1243 }
1244 EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1245 
1246 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1247 {
1248 	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1249 	struct net_device *loopback_dev = net->loopback_dev;
1250 	struct dst_entry *new = NULL;
1251 
1252 	rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
1253 		       DST_OBSOLETE_NONE, 0);
1254 	if (rt) {
1255 		rt6_info_init(rt);
1256 
1257 		new = &rt->dst;
1258 		new->__use = 1;
1259 		new->input = dst_discard;
1260 		new->output = dst_discard_out;
1261 
1262 		dst_copy_metrics(new, &ort->dst);
1263 
1264 		rt->rt6i_idev = in6_dev_get(loopback_dev);
1265 		rt->rt6i_gateway = ort->rt6i_gateway;
1266 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
1267 		rt->rt6i_metric = 0;
1268 
1269 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1270 #ifdef CONFIG_IPV6_SUBTREES
1271 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1272 #endif
1273 	}
1274 
1275 	dst_release(dst_orig);
1276 	return new ? new : ERR_PTR(-ENOMEM);
1277 }
1278 
1279 /*
1280  *	Destination cache support functions
1281  */
1282 
1283 static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1284 {
1285 	if (rt->dst.from &&
1286 	    dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1287 		dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1288 }
1289 
1290 static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1291 {
1292 	if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1293 		return NULL;
1294 
1295 	if (rt6_check_expired(rt))
1296 		return NULL;
1297 
1298 	return &rt->dst;
1299 }
1300 
1301 static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1302 {
1303 	if (!__rt6_check_expired(rt) &&
1304 	    rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1305 	    rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1306 		return &rt->dst;
1307 	else
1308 		return NULL;
1309 }
1310 
1311 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1312 {
1313 	struct rt6_info *rt;
1314 
1315 	rt = (struct rt6_info *) dst;
1316 
1317 	/* All IPV6 dsts are created with ->obsolete set to the value
1318 	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1319 	 * into this function always.
1320 	 */
1321 
1322 	rt6_dst_from_metrics_check(rt);
1323 
1324 	if (rt->rt6i_flags & RTF_PCPU ||
1325 	    (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->dst.from))
1326 		return rt6_dst_from_check(rt, cookie);
1327 	else
1328 		return rt6_check(rt, cookie);
1329 }
1330 
1331 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1332 {
1333 	struct rt6_info *rt = (struct rt6_info *) dst;
1334 
1335 	if (rt) {
1336 		if (rt->rt6i_flags & RTF_CACHE) {
1337 			if (rt6_check_expired(rt)) {
1338 				ip6_del_rt(rt);
1339 				dst = NULL;
1340 			}
1341 		} else {
1342 			dst_release(dst);
1343 			dst = NULL;
1344 		}
1345 	}
1346 	return dst;
1347 }
1348 
1349 static void ip6_link_failure(struct sk_buff *skb)
1350 {
1351 	struct rt6_info *rt;
1352 
1353 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1354 
1355 	rt = (struct rt6_info *) skb_dst(skb);
1356 	if (rt) {
1357 		if (rt->rt6i_flags & RTF_CACHE) {
1358 			if (dst_hold_safe(&rt->dst))
1359 				ip6_del_rt(rt);
1360 		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1361 			rt->rt6i_node->fn_sernum = -1;
1362 		}
1363 	}
1364 }
1365 
1366 static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1367 {
1368 	struct net *net = dev_net(rt->dst.dev);
1369 
1370 	rt->rt6i_flags |= RTF_MODIFIED;
1371 	rt->rt6i_pmtu = mtu;
1372 	rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1373 }
1374 
1375 static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
1376 {
1377 	return !(rt->rt6i_flags & RTF_CACHE) &&
1378 		(rt->rt6i_flags & RTF_PCPU || rt->rt6i_node);
1379 }
1380 
1381 static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1382 				 const struct ipv6hdr *iph, u32 mtu)
1383 {
1384 	const struct in6_addr *daddr, *saddr;
1385 	struct rt6_info *rt6 = (struct rt6_info *)dst;
1386 
1387 	if (rt6->rt6i_flags & RTF_LOCAL)
1388 		return;
1389 
1390 	if (dst_metric_locked(dst, RTAX_MTU))
1391 		return;
1392 
1393 	if (iph) {
1394 		daddr = &iph->daddr;
1395 		saddr = &iph->saddr;
1396 	} else if (sk) {
1397 		daddr = &sk->sk_v6_daddr;
1398 		saddr = &inet6_sk(sk)->saddr;
1399 	} else {
1400 		daddr = NULL;
1401 		saddr = NULL;
1402 	}
1403 	dst_confirm_neigh(dst, daddr);
1404 	mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1405 	if (mtu >= dst_mtu(dst))
1406 		return;
1407 
1408 	if (!rt6_cache_allowed_for_pmtu(rt6)) {
1409 		rt6_do_update_pmtu(rt6, mtu);
1410 	} else if (daddr) {
1411 		struct rt6_info *nrt6;
1412 
1413 		nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1414 		if (nrt6) {
1415 			rt6_do_update_pmtu(nrt6, mtu);
1416 
1417 			/* ip6_ins_rt(nrt6) will bump the
1418 			 * rt6->rt6i_node->fn_sernum
1419 			 * which will fail the next rt6_check() and
1420 			 * invalidate the sk->sk_dst_cache.
1421 			 */
1422 			ip6_ins_rt(nrt6);
1423 			/* Release the reference taken in
1424 			 * ip6_rt_cache_alloc()
1425 			 */
1426 			dst_release(&nrt6->dst);
1427 		}
1428 	}
1429 }
1430 
1431 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1432 			       struct sk_buff *skb, u32 mtu)
1433 {
1434 	__ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
1435 }
1436 
1437 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1438 		     int oif, u32 mark, kuid_t uid)
1439 {
1440 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1441 	struct dst_entry *dst;
1442 	struct flowi6 fl6;
1443 
1444 	memset(&fl6, 0, sizeof(fl6));
1445 	fl6.flowi6_oif = oif;
1446 	fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
1447 	fl6.daddr = iph->daddr;
1448 	fl6.saddr = iph->saddr;
1449 	fl6.flowlabel = ip6_flowinfo(iph);
1450 	fl6.flowi6_uid = uid;
1451 
1452 	dst = ip6_route_output(net, NULL, &fl6);
1453 	if (!dst->error)
1454 		__ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
1455 	dst_release(dst);
1456 }
1457 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1458 
1459 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1460 {
1461 	struct dst_entry *dst;
1462 
1463 	ip6_update_pmtu(skb, sock_net(sk), mtu,
1464 			sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid);
1465 
1466 	dst = __sk_dst_get(sk);
1467 	if (!dst || !dst->obsolete ||
1468 	    dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
1469 		return;
1470 
1471 	bh_lock_sock(sk);
1472 	if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
1473 		ip6_datagram_dst_update(sk, false);
1474 	bh_unlock_sock(sk);
1475 }
1476 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1477 
1478 /* Handle redirects */
1479 struct ip6rd_flowi {
1480 	struct flowi6 fl6;
1481 	struct in6_addr gateway;
1482 };
1483 
1484 static struct rt6_info *__ip6_route_redirect(struct net *net,
1485 					     struct fib6_table *table,
1486 					     struct flowi6 *fl6,
1487 					     int flags)
1488 {
1489 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1490 	struct rt6_info *rt;
1491 	struct fib6_node *fn;
1492 
1493 	/* Get the "current" route for this destination and
1494 	 * check if the redirect has come from appropriate router.
1495 	 *
1496 	 * RFC 4861 specifies that redirects should only be
1497 	 * accepted if they come from the nexthop to the target.
1498 	 * Due to the way the routes are chosen, this notion
1499 	 * is a bit fuzzy and one might need to check all possible
1500 	 * routes.
1501 	 */
1502 
1503 	read_lock_bh(&table->tb6_lock);
1504 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1505 restart:
1506 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1507 		if (rt6_check_expired(rt))
1508 			continue;
1509 		if (rt->dst.error)
1510 			break;
1511 		if (!(rt->rt6i_flags & RTF_GATEWAY))
1512 			continue;
1513 		if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1514 			continue;
1515 		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1516 			continue;
1517 		break;
1518 	}
1519 
1520 	if (!rt)
1521 		rt = net->ipv6.ip6_null_entry;
1522 	else if (rt->dst.error) {
1523 		rt = net->ipv6.ip6_null_entry;
1524 		goto out;
1525 	}
1526 
1527 	if (rt == net->ipv6.ip6_null_entry) {
1528 		fn = fib6_backtrack(fn, &fl6->saddr);
1529 		if (fn)
1530 			goto restart;
1531 	}
1532 
1533 out:
1534 	dst_hold(&rt->dst);
1535 
1536 	read_unlock_bh(&table->tb6_lock);
1537 
1538 	trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
1539 	return rt;
1540 };
1541 
1542 static struct dst_entry *ip6_route_redirect(struct net *net,
1543 					const struct flowi6 *fl6,
1544 					const struct in6_addr *gateway)
1545 {
1546 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1547 	struct ip6rd_flowi rdfl;
1548 
1549 	rdfl.fl6 = *fl6;
1550 	rdfl.gateway = *gateway;
1551 
1552 	return fib6_rule_lookup(net, &rdfl.fl6,
1553 				flags, __ip6_route_redirect);
1554 }
1555 
1556 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
1557 		  kuid_t uid)
1558 {
1559 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1560 	struct dst_entry *dst;
1561 	struct flowi6 fl6;
1562 
1563 	memset(&fl6, 0, sizeof(fl6));
1564 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
1565 	fl6.flowi6_oif = oif;
1566 	fl6.flowi6_mark = mark;
1567 	fl6.daddr = iph->daddr;
1568 	fl6.saddr = iph->saddr;
1569 	fl6.flowlabel = ip6_flowinfo(iph);
1570 	fl6.flowi6_uid = uid;
1571 
1572 	dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1573 	rt6_do_redirect(dst, NULL, skb);
1574 	dst_release(dst);
1575 }
1576 EXPORT_SYMBOL_GPL(ip6_redirect);
1577 
1578 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1579 			    u32 mark)
1580 {
1581 	const struct ipv6hdr *iph = ipv6_hdr(skb);
1582 	const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1583 	struct dst_entry *dst;
1584 	struct flowi6 fl6;
1585 
1586 	memset(&fl6, 0, sizeof(fl6));
1587 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
1588 	fl6.flowi6_oif = oif;
1589 	fl6.flowi6_mark = mark;
1590 	fl6.daddr = msg->dest;
1591 	fl6.saddr = iph->daddr;
1592 	fl6.flowi6_uid = sock_net_uid(net, NULL);
1593 
1594 	dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1595 	rt6_do_redirect(dst, NULL, skb);
1596 	dst_release(dst);
1597 }
1598 
1599 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1600 {
1601 	ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
1602 		     sk->sk_uid);
1603 }
1604 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1605 
1606 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1607 {
1608 	struct net_device *dev = dst->dev;
1609 	unsigned int mtu = dst_mtu(dst);
1610 	struct net *net = dev_net(dev);
1611 
1612 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1613 
1614 	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1615 		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1616 
1617 	/*
1618 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1619 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1620 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
1621 	 * rely only on pmtu discovery"
1622 	 */
1623 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1624 		mtu = IPV6_MAXPLEN;
1625 	return mtu;
1626 }
1627 
1628 static unsigned int ip6_mtu(const struct dst_entry *dst)
1629 {
1630 	const struct rt6_info *rt = (const struct rt6_info *)dst;
1631 	unsigned int mtu = rt->rt6i_pmtu;
1632 	struct inet6_dev *idev;
1633 
1634 	if (mtu)
1635 		goto out;
1636 
1637 	mtu = dst_metric_raw(dst, RTAX_MTU);
1638 	if (mtu)
1639 		goto out;
1640 
1641 	mtu = IPV6_MIN_MTU;
1642 
1643 	rcu_read_lock();
1644 	idev = __in6_dev_get(dst->dev);
1645 	if (idev)
1646 		mtu = idev->cnf.mtu6;
1647 	rcu_read_unlock();
1648 
1649 out:
1650 	mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1651 
1652 	return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
1653 }
1654 
1655 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1656 				  struct flowi6 *fl6)
1657 {
1658 	struct dst_entry *dst;
1659 	struct rt6_info *rt;
1660 	struct inet6_dev *idev = in6_dev_get(dev);
1661 	struct net *net = dev_net(dev);
1662 
1663 	if (unlikely(!idev))
1664 		return ERR_PTR(-ENODEV);
1665 
1666 	rt = ip6_dst_alloc(net, dev, 0);
1667 	if (unlikely(!rt)) {
1668 		in6_dev_put(idev);
1669 		dst = ERR_PTR(-ENOMEM);
1670 		goto out;
1671 	}
1672 
1673 	rt->dst.flags |= DST_HOST;
1674 	rt->dst.output  = ip6_output;
1675 	rt->rt6i_gateway  = fl6->daddr;
1676 	rt->rt6i_dst.addr = fl6->daddr;
1677 	rt->rt6i_dst.plen = 128;
1678 	rt->rt6i_idev     = idev;
1679 	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1680 
1681 	/* Add this dst into uncached_list so that rt6_ifdown() can
1682 	 * do proper release of the net_device
1683 	 */
1684 	rt6_uncached_list_add(rt);
1685 
1686 	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1687 
1688 out:
1689 	return dst;
1690 }
1691 
1692 static int ip6_dst_gc(struct dst_ops *ops)
1693 {
1694 	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1695 	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1696 	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1697 	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1698 	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1699 	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1700 	int entries;
1701 
1702 	entries = dst_entries_get_fast(ops);
1703 	if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1704 	    entries <= rt_max_size)
1705 		goto out;
1706 
1707 	net->ipv6.ip6_rt_gc_expire++;
1708 	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
1709 	entries = dst_entries_get_slow(ops);
1710 	if (entries < ops->gc_thresh)
1711 		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1712 out:
1713 	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1714 	return entries > rt_max_size;
1715 }
1716 
1717 static int ip6_convert_metrics(struct mx6_config *mxc,
1718 			       const struct fib6_config *cfg)
1719 {
1720 	bool ecn_ca = false;
1721 	struct nlattr *nla;
1722 	int remaining;
1723 	u32 *mp;
1724 
1725 	if (!cfg->fc_mx)
1726 		return 0;
1727 
1728 	mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1729 	if (unlikely(!mp))
1730 		return -ENOMEM;
1731 
1732 	nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1733 		int type = nla_type(nla);
1734 		u32 val;
1735 
1736 		if (!type)
1737 			continue;
1738 		if (unlikely(type > RTAX_MAX))
1739 			goto err;
1740 
1741 		if (type == RTAX_CC_ALGO) {
1742 			char tmp[TCP_CA_NAME_MAX];
1743 
1744 			nla_strlcpy(tmp, nla, sizeof(tmp));
1745 			val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
1746 			if (val == TCP_CA_UNSPEC)
1747 				goto err;
1748 		} else {
1749 			val = nla_get_u32(nla);
1750 		}
1751 		if (type == RTAX_HOPLIMIT && val > 255)
1752 			val = 255;
1753 		if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
1754 			goto err;
1755 
1756 		mp[type - 1] = val;
1757 		__set_bit(type - 1, mxc->mx_valid);
1758 	}
1759 
1760 	if (ecn_ca) {
1761 		__set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
1762 		mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
1763 	}
1764 
1765 	mxc->mx = mp;
1766 	return 0;
1767  err:
1768 	kfree(mp);
1769 	return -EINVAL;
1770 }
1771 
1772 static struct rt6_info *ip6_nh_lookup_table(struct net *net,
1773 					    struct fib6_config *cfg,
1774 					    const struct in6_addr *gw_addr)
1775 {
1776 	struct flowi6 fl6 = {
1777 		.flowi6_oif = cfg->fc_ifindex,
1778 		.daddr = *gw_addr,
1779 		.saddr = cfg->fc_prefsrc,
1780 	};
1781 	struct fib6_table *table;
1782 	struct rt6_info *rt;
1783 	int flags = RT6_LOOKUP_F_IFACE | RT6_LOOKUP_F_IGNORE_LINKSTATE;
1784 
1785 	table = fib6_get_table(net, cfg->fc_table);
1786 	if (!table)
1787 		return NULL;
1788 
1789 	if (!ipv6_addr_any(&cfg->fc_prefsrc))
1790 		flags |= RT6_LOOKUP_F_HAS_SADDR;
1791 
1792 	rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags);
1793 
1794 	/* if table lookup failed, fall back to full lookup */
1795 	if (rt == net->ipv6.ip6_null_entry) {
1796 		ip6_rt_put(rt);
1797 		rt = NULL;
1798 	}
1799 
1800 	return rt;
1801 }
1802 
1803 static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
1804 					      struct netlink_ext_ack *extack)
1805 {
1806 	struct net *net = cfg->fc_nlinfo.nl_net;
1807 	struct rt6_info *rt = NULL;
1808 	struct net_device *dev = NULL;
1809 	struct inet6_dev *idev = NULL;
1810 	struct fib6_table *table;
1811 	int addr_type;
1812 	int err = -EINVAL;
1813 
1814 	/* RTF_PCPU is an internal flag; can not be set by userspace */
1815 	if (cfg->fc_flags & RTF_PCPU) {
1816 		NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
1817 		goto out;
1818 	}
1819 
1820 	if (cfg->fc_dst_len > 128) {
1821 		NL_SET_ERR_MSG(extack, "Invalid prefix length");
1822 		goto out;
1823 	}
1824 	if (cfg->fc_src_len > 128) {
1825 		NL_SET_ERR_MSG(extack, "Invalid source address length");
1826 		goto out;
1827 	}
1828 #ifndef CONFIG_IPV6_SUBTREES
1829 	if (cfg->fc_src_len) {
1830 		NL_SET_ERR_MSG(extack,
1831 			       "Specifying source address requires IPV6_SUBTREES to be enabled");
1832 		goto out;
1833 	}
1834 #endif
1835 	if (cfg->fc_ifindex) {
1836 		err = -ENODEV;
1837 		dev = dev_get_by_index(net, cfg->fc_ifindex);
1838 		if (!dev)
1839 			goto out;
1840 		idev = in6_dev_get(dev);
1841 		if (!idev)
1842 			goto out;
1843 	}
1844 
1845 	if (cfg->fc_metric == 0)
1846 		cfg->fc_metric = IP6_RT_PRIO_USER;
1847 
1848 	err = -ENOBUFS;
1849 	if (cfg->fc_nlinfo.nlh &&
1850 	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1851 		table = fib6_get_table(net, cfg->fc_table);
1852 		if (!table) {
1853 			pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1854 			table = fib6_new_table(net, cfg->fc_table);
1855 		}
1856 	} else {
1857 		table = fib6_new_table(net, cfg->fc_table);
1858 	}
1859 
1860 	if (!table)
1861 		goto out;
1862 
1863 	rt = ip6_dst_alloc(net, NULL,
1864 			   (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1865 
1866 	if (!rt) {
1867 		err = -ENOMEM;
1868 		goto out;
1869 	}
1870 
1871 	if (cfg->fc_flags & RTF_EXPIRES)
1872 		rt6_set_expires(rt, jiffies +
1873 				clock_t_to_jiffies(cfg->fc_expires));
1874 	else
1875 		rt6_clean_expires(rt);
1876 
1877 	if (cfg->fc_protocol == RTPROT_UNSPEC)
1878 		cfg->fc_protocol = RTPROT_BOOT;
1879 	rt->rt6i_protocol = cfg->fc_protocol;
1880 
1881 	addr_type = ipv6_addr_type(&cfg->fc_dst);
1882 
1883 	if (addr_type & IPV6_ADDR_MULTICAST)
1884 		rt->dst.input = ip6_mc_input;
1885 	else if (cfg->fc_flags & RTF_LOCAL)
1886 		rt->dst.input = ip6_input;
1887 	else
1888 		rt->dst.input = ip6_forward;
1889 
1890 	rt->dst.output = ip6_output;
1891 
1892 	if (cfg->fc_encap) {
1893 		struct lwtunnel_state *lwtstate;
1894 
1895 		err = lwtunnel_build_state(cfg->fc_encap_type,
1896 					   cfg->fc_encap, AF_INET6, cfg,
1897 					   &lwtstate, extack);
1898 		if (err)
1899 			goto out;
1900 		rt->dst.lwtstate = lwtstate_get(lwtstate);
1901 		if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
1902 			rt->dst.lwtstate->orig_output = rt->dst.output;
1903 			rt->dst.output = lwtunnel_output;
1904 		}
1905 		if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
1906 			rt->dst.lwtstate->orig_input = rt->dst.input;
1907 			rt->dst.input = lwtunnel_input;
1908 		}
1909 	}
1910 
1911 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1912 	rt->rt6i_dst.plen = cfg->fc_dst_len;
1913 	if (rt->rt6i_dst.plen == 128)
1914 		rt->dst.flags |= DST_HOST;
1915 
1916 #ifdef CONFIG_IPV6_SUBTREES
1917 	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1918 	rt->rt6i_src.plen = cfg->fc_src_len;
1919 #endif
1920 
1921 	rt->rt6i_metric = cfg->fc_metric;
1922 
1923 	/* We cannot add true routes via loopback here,
1924 	   they would result in kernel looping; promote them to reject routes
1925 	 */
1926 	if ((cfg->fc_flags & RTF_REJECT) ||
1927 	    (dev && (dev->flags & IFF_LOOPBACK) &&
1928 	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
1929 	     !(cfg->fc_flags & RTF_LOCAL))) {
1930 		/* hold loopback dev/idev if we haven't done so. */
1931 		if (dev != net->loopback_dev) {
1932 			if (dev) {
1933 				dev_put(dev);
1934 				in6_dev_put(idev);
1935 			}
1936 			dev = net->loopback_dev;
1937 			dev_hold(dev);
1938 			idev = in6_dev_get(dev);
1939 			if (!idev) {
1940 				err = -ENODEV;
1941 				goto out;
1942 			}
1943 		}
1944 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1945 		switch (cfg->fc_type) {
1946 		case RTN_BLACKHOLE:
1947 			rt->dst.error = -EINVAL;
1948 			rt->dst.output = dst_discard_out;
1949 			rt->dst.input = dst_discard;
1950 			break;
1951 		case RTN_PROHIBIT:
1952 			rt->dst.error = -EACCES;
1953 			rt->dst.output = ip6_pkt_prohibit_out;
1954 			rt->dst.input = ip6_pkt_prohibit;
1955 			break;
1956 		case RTN_THROW:
1957 		case RTN_UNREACHABLE:
1958 		default:
1959 			rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1960 					: (cfg->fc_type == RTN_UNREACHABLE)
1961 					? -EHOSTUNREACH : -ENETUNREACH;
1962 			rt->dst.output = ip6_pkt_discard_out;
1963 			rt->dst.input = ip6_pkt_discard;
1964 			break;
1965 		}
1966 		goto install_route;
1967 	}
1968 
1969 	if (cfg->fc_flags & RTF_GATEWAY) {
1970 		const struct in6_addr *gw_addr;
1971 		int gwa_type;
1972 
1973 		gw_addr = &cfg->fc_gateway;
1974 		gwa_type = ipv6_addr_type(gw_addr);
1975 
1976 		/* if gw_addr is local we will fail to detect this in case
1977 		 * address is still TENTATIVE (DAD in progress). rt6_lookup()
1978 		 * will return already-added prefix route via interface that
1979 		 * prefix route was assigned to, which might be non-loopback.
1980 		 */
1981 		err = -EINVAL;
1982 		if (ipv6_chk_addr_and_flags(net, gw_addr,
1983 					    gwa_type & IPV6_ADDR_LINKLOCAL ?
1984 					    dev : NULL, 0, 0)) {
1985 			NL_SET_ERR_MSG(extack, "Invalid gateway address");
1986 			goto out;
1987 		}
1988 		rt->rt6i_gateway = *gw_addr;
1989 
1990 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1991 			struct rt6_info *grt = NULL;
1992 
1993 			/* IPv6 strictly inhibits using not link-local
1994 			   addresses as nexthop address.
1995 			   Otherwise, router will not able to send redirects.
1996 			   It is very good, but in some (rare!) circumstances
1997 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
1998 			   some exceptions. --ANK
1999 			   We allow IPv4-mapped nexthops to support RFC4798-type
2000 			   addressing
2001 			 */
2002 			if (!(gwa_type & (IPV6_ADDR_UNICAST |
2003 					  IPV6_ADDR_MAPPED))) {
2004 				NL_SET_ERR_MSG(extack,
2005 					       "Invalid gateway address");
2006 				goto out;
2007 			}
2008 
2009 			if (cfg->fc_table) {
2010 				grt = ip6_nh_lookup_table(net, cfg, gw_addr);
2011 
2012 				if (grt) {
2013 					if (grt->rt6i_flags & RTF_GATEWAY ||
2014 					    (dev && dev != grt->dst.dev)) {
2015 						ip6_rt_put(grt);
2016 						grt = NULL;
2017 					}
2018 				}
2019 			}
2020 
2021 			if (!grt)
2022 				grt = rt6_lookup(net, gw_addr, NULL,
2023 						 cfg->fc_ifindex, 1);
2024 
2025 			err = -EHOSTUNREACH;
2026 			if (!grt)
2027 				goto out;
2028 			if (dev) {
2029 				if (dev != grt->dst.dev) {
2030 					ip6_rt_put(grt);
2031 					goto out;
2032 				}
2033 			} else {
2034 				dev = grt->dst.dev;
2035 				idev = grt->rt6i_idev;
2036 				dev_hold(dev);
2037 				in6_dev_hold(grt->rt6i_idev);
2038 			}
2039 			if (!(grt->rt6i_flags & RTF_GATEWAY))
2040 				err = 0;
2041 			ip6_rt_put(grt);
2042 
2043 			if (err)
2044 				goto out;
2045 		}
2046 		err = -EINVAL;
2047 		if (!dev) {
2048 			NL_SET_ERR_MSG(extack, "Egress device not specified");
2049 			goto out;
2050 		} else if (dev->flags & IFF_LOOPBACK) {
2051 			NL_SET_ERR_MSG(extack,
2052 				       "Egress device can not be loopback device for this route");
2053 			goto out;
2054 		}
2055 	}
2056 
2057 	err = -ENODEV;
2058 	if (!dev)
2059 		goto out;
2060 
2061 	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
2062 		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
2063 			NL_SET_ERR_MSG(extack, "Invalid source address");
2064 			err = -EINVAL;
2065 			goto out;
2066 		}
2067 		rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
2068 		rt->rt6i_prefsrc.plen = 128;
2069 	} else
2070 		rt->rt6i_prefsrc.plen = 0;
2071 
2072 	rt->rt6i_flags = cfg->fc_flags;
2073 
2074 install_route:
2075 	rt->dst.dev = dev;
2076 	rt->rt6i_idev = idev;
2077 	rt->rt6i_table = table;
2078 
2079 	cfg->fc_nlinfo.nl_net = dev_net(dev);
2080 
2081 	return rt;
2082 out:
2083 	if (dev)
2084 		dev_put(dev);
2085 	if (idev)
2086 		in6_dev_put(idev);
2087 	if (rt)
2088 		dst_release_immediate(&rt->dst);
2089 
2090 	return ERR_PTR(err);
2091 }
2092 
2093 int ip6_route_add(struct fib6_config *cfg,
2094 		  struct netlink_ext_ack *extack)
2095 {
2096 	struct mx6_config mxc = { .mx = NULL, };
2097 	struct rt6_info *rt;
2098 	int err;
2099 
2100 	rt = ip6_route_info_create(cfg, extack);
2101 	if (IS_ERR(rt)) {
2102 		err = PTR_ERR(rt);
2103 		rt = NULL;
2104 		goto out;
2105 	}
2106 
2107 	err = ip6_convert_metrics(&mxc, cfg);
2108 	if (err)
2109 		goto out;
2110 
2111 	err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc, extack);
2112 
2113 	kfree(mxc.mx);
2114 
2115 	return err;
2116 out:
2117 	if (rt)
2118 		dst_release_immediate(&rt->dst);
2119 
2120 	return err;
2121 }
2122 
2123 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
2124 {
2125 	int err;
2126 	struct fib6_table *table;
2127 	struct net *net = dev_net(rt->dst.dev);
2128 
2129 	if (rt == net->ipv6.ip6_null_entry) {
2130 		err = -ENOENT;
2131 		goto out;
2132 	}
2133 
2134 	table = rt->rt6i_table;
2135 	write_lock_bh(&table->tb6_lock);
2136 	err = fib6_del(rt, info);
2137 	write_unlock_bh(&table->tb6_lock);
2138 
2139 out:
2140 	ip6_rt_put(rt);
2141 	return err;
2142 }
2143 
2144 int ip6_del_rt(struct rt6_info *rt)
2145 {
2146 	struct nl_info info = {
2147 		.nl_net = dev_net(rt->dst.dev),
2148 	};
2149 	return __ip6_del_rt(rt, &info);
2150 }
2151 
2152 static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
2153 {
2154 	struct nl_info *info = &cfg->fc_nlinfo;
2155 	struct net *net = info->nl_net;
2156 	struct sk_buff *skb = NULL;
2157 	struct fib6_table *table;
2158 	int err = -ENOENT;
2159 
2160 	if (rt == net->ipv6.ip6_null_entry)
2161 		goto out_put;
2162 	table = rt->rt6i_table;
2163 	write_lock_bh(&table->tb6_lock);
2164 
2165 	if (rt->rt6i_nsiblings && cfg->fc_delete_all_nh) {
2166 		struct rt6_info *sibling, *next_sibling;
2167 
2168 		/* prefer to send a single notification with all hops */
2169 		skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
2170 		if (skb) {
2171 			u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2172 
2173 			if (rt6_fill_node(net, skb, rt,
2174 					  NULL, NULL, 0, RTM_DELROUTE,
2175 					  info->portid, seq, 0) < 0) {
2176 				kfree_skb(skb);
2177 				skb = NULL;
2178 			} else
2179 				info->skip_notify = 1;
2180 		}
2181 
2182 		list_for_each_entry_safe(sibling, next_sibling,
2183 					 &rt->rt6i_siblings,
2184 					 rt6i_siblings) {
2185 			err = fib6_del(sibling, info);
2186 			if (err)
2187 				goto out_unlock;
2188 		}
2189 	}
2190 
2191 	err = fib6_del(rt, info);
2192 out_unlock:
2193 	write_unlock_bh(&table->tb6_lock);
2194 out_put:
2195 	ip6_rt_put(rt);
2196 
2197 	if (skb) {
2198 		rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2199 			    info->nlh, gfp_any());
2200 	}
2201 	return err;
2202 }
2203 
2204 static int ip6_route_del(struct fib6_config *cfg,
2205 			 struct netlink_ext_ack *extack)
2206 {
2207 	struct fib6_table *table;
2208 	struct fib6_node *fn;
2209 	struct rt6_info *rt;
2210 	int err = -ESRCH;
2211 
2212 	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
2213 	if (!table) {
2214 		NL_SET_ERR_MSG(extack, "FIB table does not exist");
2215 		return err;
2216 	}
2217 
2218 	read_lock_bh(&table->tb6_lock);
2219 
2220 	fn = fib6_locate(&table->tb6_root,
2221 			 &cfg->fc_dst, cfg->fc_dst_len,
2222 			 &cfg->fc_src, cfg->fc_src_len);
2223 
2224 	if (fn) {
2225 		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2226 			if ((rt->rt6i_flags & RTF_CACHE) &&
2227 			    !(cfg->fc_flags & RTF_CACHE))
2228 				continue;
2229 			if (cfg->fc_ifindex &&
2230 			    (!rt->dst.dev ||
2231 			     rt->dst.dev->ifindex != cfg->fc_ifindex))
2232 				continue;
2233 			if (cfg->fc_flags & RTF_GATEWAY &&
2234 			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
2235 				continue;
2236 			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
2237 				continue;
2238 			if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol)
2239 				continue;
2240 			dst_hold(&rt->dst);
2241 			read_unlock_bh(&table->tb6_lock);
2242 
2243 			/* if gateway was specified only delete the one hop */
2244 			if (cfg->fc_flags & RTF_GATEWAY)
2245 				return __ip6_del_rt(rt, &cfg->fc_nlinfo);
2246 
2247 			return __ip6_del_rt_siblings(rt, cfg);
2248 		}
2249 	}
2250 	read_unlock_bh(&table->tb6_lock);
2251 
2252 	return err;
2253 }
2254 
2255 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
2256 {
2257 	struct netevent_redirect netevent;
2258 	struct rt6_info *rt, *nrt = NULL;
2259 	struct ndisc_options ndopts;
2260 	struct inet6_dev *in6_dev;
2261 	struct neighbour *neigh;
2262 	struct rd_msg *msg;
2263 	int optlen, on_link;
2264 	u8 *lladdr;
2265 
2266 	optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
2267 	optlen -= sizeof(*msg);
2268 
2269 	if (optlen < 0) {
2270 		net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
2271 		return;
2272 	}
2273 
2274 	msg = (struct rd_msg *)icmp6_hdr(skb);
2275 
2276 	if (ipv6_addr_is_multicast(&msg->dest)) {
2277 		net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
2278 		return;
2279 	}
2280 
2281 	on_link = 0;
2282 	if (ipv6_addr_equal(&msg->dest, &msg->target)) {
2283 		on_link = 1;
2284 	} else if (ipv6_addr_type(&msg->target) !=
2285 		   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
2286 		net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
2287 		return;
2288 	}
2289 
2290 	in6_dev = __in6_dev_get(skb->dev);
2291 	if (!in6_dev)
2292 		return;
2293 	if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2294 		return;
2295 
2296 	/* RFC2461 8.1:
2297 	 *	The IP source address of the Redirect MUST be the same as the current
2298 	 *	first-hop router for the specified ICMP Destination Address.
2299 	 */
2300 
2301 	if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
2302 		net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2303 		return;
2304 	}
2305 
2306 	lladdr = NULL;
2307 	if (ndopts.nd_opts_tgt_lladdr) {
2308 		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2309 					     skb->dev);
2310 		if (!lladdr) {
2311 			net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2312 			return;
2313 		}
2314 	}
2315 
2316 	rt = (struct rt6_info *) dst;
2317 	if (rt->rt6i_flags & RTF_REJECT) {
2318 		net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
2319 		return;
2320 	}
2321 
2322 	/* Redirect received -> path was valid.
2323 	 * Look, redirects are sent only in response to data packets,
2324 	 * so that this nexthop apparently is reachable. --ANK
2325 	 */
2326 	dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
2327 
2328 	neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
2329 	if (!neigh)
2330 		return;
2331 
2332 	/*
2333 	 *	We have finally decided to accept it.
2334 	 */
2335 
2336 	ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
2337 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
2338 		     NEIGH_UPDATE_F_OVERRIDE|
2339 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
2340 				     NEIGH_UPDATE_F_ISROUTER)),
2341 		     NDISC_REDIRECT, &ndopts);
2342 
2343 	nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
2344 	if (!nrt)
2345 		goto out;
2346 
2347 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
2348 	if (on_link)
2349 		nrt->rt6i_flags &= ~RTF_GATEWAY;
2350 
2351 	nrt->rt6i_protocol = RTPROT_REDIRECT;
2352 	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
2353 
2354 	if (ip6_ins_rt(nrt))
2355 		goto out_release;
2356 
2357 	netevent.old = &rt->dst;
2358 	netevent.new = &nrt->dst;
2359 	netevent.daddr = &msg->dest;
2360 	netevent.neigh = neigh;
2361 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
2362 
2363 	if (rt->rt6i_flags & RTF_CACHE) {
2364 		rt = (struct rt6_info *) dst_clone(&rt->dst);
2365 		ip6_del_rt(rt);
2366 	}
2367 
2368 out_release:
2369 	/* Release the reference taken in
2370 	 * ip6_rt_cache_alloc()
2371 	 */
2372 	dst_release(&nrt->dst);
2373 
2374 out:
2375 	neigh_release(neigh);
2376 }
2377 
2378 /*
2379  *	Misc support functions
2380  */
2381 
2382 static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
2383 {
2384 	BUG_ON(from->dst.from);
2385 
2386 	rt->rt6i_flags &= ~RTF_EXPIRES;
2387 	dst_hold(&from->dst);
2388 	rt->dst.from = &from->dst;
2389 	dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
2390 }
2391 
2392 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
2393 {
2394 	rt->dst.input = ort->dst.input;
2395 	rt->dst.output = ort->dst.output;
2396 	rt->rt6i_dst = ort->rt6i_dst;
2397 	rt->dst.error = ort->dst.error;
2398 	rt->rt6i_idev = ort->rt6i_idev;
2399 	if (rt->rt6i_idev)
2400 		in6_dev_hold(rt->rt6i_idev);
2401 	rt->dst.lastuse = jiffies;
2402 	rt->rt6i_gateway = ort->rt6i_gateway;
2403 	rt->rt6i_flags = ort->rt6i_flags;
2404 	rt6_set_from(rt, ort);
2405 	rt->rt6i_metric = ort->rt6i_metric;
2406 #ifdef CONFIG_IPV6_SUBTREES
2407 	rt->rt6i_src = ort->rt6i_src;
2408 #endif
2409 	rt->rt6i_prefsrc = ort->rt6i_prefsrc;
2410 	rt->rt6i_table = ort->rt6i_table;
2411 	rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
2412 }
2413 
2414 #ifdef CONFIG_IPV6_ROUTE_INFO
2415 static struct rt6_info *rt6_get_route_info(struct net *net,
2416 					   const struct in6_addr *prefix, int prefixlen,
2417 					   const struct in6_addr *gwaddr,
2418 					   struct net_device *dev)
2419 {
2420 	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
2421 	int ifindex = dev->ifindex;
2422 	struct fib6_node *fn;
2423 	struct rt6_info *rt = NULL;
2424 	struct fib6_table *table;
2425 
2426 	table = fib6_get_table(net, tb_id);
2427 	if (!table)
2428 		return NULL;
2429 
2430 	read_lock_bh(&table->tb6_lock);
2431 	fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
2432 	if (!fn)
2433 		goto out;
2434 
2435 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2436 		if (rt->dst.dev->ifindex != ifindex)
2437 			continue;
2438 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2439 			continue;
2440 		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2441 			continue;
2442 		dst_hold(&rt->dst);
2443 		break;
2444 	}
2445 out:
2446 	read_unlock_bh(&table->tb6_lock);
2447 	return rt;
2448 }
2449 
2450 static struct rt6_info *rt6_add_route_info(struct net *net,
2451 					   const struct in6_addr *prefix, int prefixlen,
2452 					   const struct in6_addr *gwaddr,
2453 					   struct net_device *dev,
2454 					   unsigned int pref)
2455 {
2456 	struct fib6_config cfg = {
2457 		.fc_metric	= IP6_RT_PRIO_USER,
2458 		.fc_ifindex	= dev->ifindex,
2459 		.fc_dst_len	= prefixlen,
2460 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2461 				  RTF_UP | RTF_PREF(pref),
2462 		.fc_protocol = RTPROT_RA,
2463 		.fc_nlinfo.portid = 0,
2464 		.fc_nlinfo.nlh = NULL,
2465 		.fc_nlinfo.nl_net = net,
2466 	};
2467 
2468 	cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
2469 	cfg.fc_dst = *prefix;
2470 	cfg.fc_gateway = *gwaddr;
2471 
2472 	/* We should treat it as a default route if prefix length is 0. */
2473 	if (!prefixlen)
2474 		cfg.fc_flags |= RTF_DEFAULT;
2475 
2476 	ip6_route_add(&cfg, NULL);
2477 
2478 	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
2479 }
2480 #endif
2481 
2482 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
2483 {
2484 	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
2485 	struct rt6_info *rt;
2486 	struct fib6_table *table;
2487 
2488 	table = fib6_get_table(dev_net(dev), tb_id);
2489 	if (!table)
2490 		return NULL;
2491 
2492 	read_lock_bh(&table->tb6_lock);
2493 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2494 		if (dev == rt->dst.dev &&
2495 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
2496 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
2497 			break;
2498 	}
2499 	if (rt)
2500 		dst_hold(&rt->dst);
2501 	read_unlock_bh(&table->tb6_lock);
2502 	return rt;
2503 }
2504 
2505 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
2506 				     struct net_device *dev,
2507 				     unsigned int pref)
2508 {
2509 	struct fib6_config cfg = {
2510 		.fc_table	= l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
2511 		.fc_metric	= IP6_RT_PRIO_USER,
2512 		.fc_ifindex	= dev->ifindex,
2513 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2514 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
2515 		.fc_protocol = RTPROT_RA,
2516 		.fc_nlinfo.portid = 0,
2517 		.fc_nlinfo.nlh = NULL,
2518 		.fc_nlinfo.nl_net = dev_net(dev),
2519 	};
2520 
2521 	cfg.fc_gateway = *gwaddr;
2522 
2523 	if (!ip6_route_add(&cfg, NULL)) {
2524 		struct fib6_table *table;
2525 
2526 		table = fib6_get_table(dev_net(dev), cfg.fc_table);
2527 		if (table)
2528 			table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
2529 	}
2530 
2531 	return rt6_get_dflt_router(gwaddr, dev);
2532 }
2533 
2534 static void __rt6_purge_dflt_routers(struct fib6_table *table)
2535 {
2536 	struct rt6_info *rt;
2537 
2538 restart:
2539 	read_lock_bh(&table->tb6_lock);
2540 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2541 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2542 		    (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
2543 			dst_hold(&rt->dst);
2544 			read_unlock_bh(&table->tb6_lock);
2545 			ip6_del_rt(rt);
2546 			goto restart;
2547 		}
2548 	}
2549 	read_unlock_bh(&table->tb6_lock);
2550 
2551 	table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
2552 }
2553 
2554 void rt6_purge_dflt_routers(struct net *net)
2555 {
2556 	struct fib6_table *table;
2557 	struct hlist_head *head;
2558 	unsigned int h;
2559 
2560 	rcu_read_lock();
2561 
2562 	for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
2563 		head = &net->ipv6.fib_table_hash[h];
2564 		hlist_for_each_entry_rcu(table, head, tb6_hlist) {
2565 			if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
2566 				__rt6_purge_dflt_routers(table);
2567 		}
2568 	}
2569 
2570 	rcu_read_unlock();
2571 }
2572 
2573 static void rtmsg_to_fib6_config(struct net *net,
2574 				 struct in6_rtmsg *rtmsg,
2575 				 struct fib6_config *cfg)
2576 {
2577 	memset(cfg, 0, sizeof(*cfg));
2578 
2579 	cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
2580 			 : RT6_TABLE_MAIN;
2581 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2582 	cfg->fc_metric = rtmsg->rtmsg_metric;
2583 	cfg->fc_expires = rtmsg->rtmsg_info;
2584 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2585 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
2586 	cfg->fc_flags = rtmsg->rtmsg_flags;
2587 
2588 	cfg->fc_nlinfo.nl_net = net;
2589 
2590 	cfg->fc_dst = rtmsg->rtmsg_dst;
2591 	cfg->fc_src = rtmsg->rtmsg_src;
2592 	cfg->fc_gateway = rtmsg->rtmsg_gateway;
2593 }
2594 
2595 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2596 {
2597 	struct fib6_config cfg;
2598 	struct in6_rtmsg rtmsg;
2599 	int err;
2600 
2601 	switch (cmd) {
2602 	case SIOCADDRT:		/* Add a route */
2603 	case SIOCDELRT:		/* Delete a route */
2604 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2605 			return -EPERM;
2606 		err = copy_from_user(&rtmsg, arg,
2607 				     sizeof(struct in6_rtmsg));
2608 		if (err)
2609 			return -EFAULT;
2610 
2611 		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2612 
2613 		rtnl_lock();
2614 		switch (cmd) {
2615 		case SIOCADDRT:
2616 			err = ip6_route_add(&cfg, NULL);
2617 			break;
2618 		case SIOCDELRT:
2619 			err = ip6_route_del(&cfg, NULL);
2620 			break;
2621 		default:
2622 			err = -EINVAL;
2623 		}
2624 		rtnl_unlock();
2625 
2626 		return err;
2627 	}
2628 
2629 	return -EINVAL;
2630 }
2631 
2632 /*
2633  *	Drop the packet on the floor
2634  */
2635 
2636 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2637 {
2638 	int type;
2639 	struct dst_entry *dst = skb_dst(skb);
2640 	switch (ipstats_mib_noroutes) {
2641 	case IPSTATS_MIB_INNOROUTES:
2642 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2643 		if (type == IPV6_ADDR_ANY) {
2644 			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2645 				      IPSTATS_MIB_INADDRERRORS);
2646 			break;
2647 		}
2648 		/* FALLTHROUGH */
2649 	case IPSTATS_MIB_OUTNOROUTES:
2650 		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2651 			      ipstats_mib_noroutes);
2652 		break;
2653 	}
2654 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2655 	kfree_skb(skb);
2656 	return 0;
2657 }
2658 
2659 static int ip6_pkt_discard(struct sk_buff *skb)
2660 {
2661 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2662 }
2663 
2664 static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2665 {
2666 	skb->dev = skb_dst(skb)->dev;
2667 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2668 }
2669 
2670 static int ip6_pkt_prohibit(struct sk_buff *skb)
2671 {
2672 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2673 }
2674 
2675 static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2676 {
2677 	skb->dev = skb_dst(skb)->dev;
2678 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2679 }
2680 
2681 /*
2682  *	Allocate a dst for local (unicast / anycast) address.
2683  */
2684 
2685 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2686 				    const struct in6_addr *addr,
2687 				    bool anycast)
2688 {
2689 	u32 tb_id;
2690 	struct net *net = dev_net(idev->dev);
2691 	struct net_device *dev = net->loopback_dev;
2692 	struct rt6_info *rt;
2693 
2694 	/* use L3 Master device as loopback for host routes if device
2695 	 * is enslaved and address is not link local or multicast
2696 	 */
2697 	if (!rt6_need_strict(addr))
2698 		dev = l3mdev_master_dev_rcu(idev->dev) ? : dev;
2699 
2700 	rt = ip6_dst_alloc(net, dev, DST_NOCOUNT);
2701 	if (!rt)
2702 		return ERR_PTR(-ENOMEM);
2703 
2704 	in6_dev_hold(idev);
2705 
2706 	rt->dst.flags |= DST_HOST;
2707 	rt->dst.input = ip6_input;
2708 	rt->dst.output = ip6_output;
2709 	rt->rt6i_idev = idev;
2710 
2711 	rt->rt6i_protocol = RTPROT_KERNEL;
2712 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2713 	if (anycast)
2714 		rt->rt6i_flags |= RTF_ANYCAST;
2715 	else
2716 		rt->rt6i_flags |= RTF_LOCAL;
2717 
2718 	rt->rt6i_gateway  = *addr;
2719 	rt->rt6i_dst.addr = *addr;
2720 	rt->rt6i_dst.plen = 128;
2721 	tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
2722 	rt->rt6i_table = fib6_get_table(net, tb_id);
2723 
2724 	return rt;
2725 }
2726 
2727 /* remove deleted ip from prefsrc entries */
2728 struct arg_dev_net_ip {
2729 	struct net_device *dev;
2730 	struct net *net;
2731 	struct in6_addr *addr;
2732 };
2733 
2734 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2735 {
2736 	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2737 	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2738 	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2739 
2740 	if (((void *)rt->dst.dev == dev || !dev) &&
2741 	    rt != net->ipv6.ip6_null_entry &&
2742 	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2743 		/* remove prefsrc entry */
2744 		rt->rt6i_prefsrc.plen = 0;
2745 	}
2746 	return 0;
2747 }
2748 
2749 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2750 {
2751 	struct net *net = dev_net(ifp->idev->dev);
2752 	struct arg_dev_net_ip adni = {
2753 		.dev = ifp->idev->dev,
2754 		.net = net,
2755 		.addr = &ifp->addr,
2756 	};
2757 	fib6_clean_all(net, fib6_remove_prefsrc, &adni);
2758 }
2759 
2760 #define RTF_RA_ROUTER		(RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2761 #define RTF_CACHE_GATEWAY	(RTF_GATEWAY | RTF_CACHE)
2762 
2763 /* Remove routers and update dst entries when gateway turn into host. */
2764 static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2765 {
2766 	struct in6_addr *gateway = (struct in6_addr *)arg;
2767 
2768 	if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2769 	     ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2770 	     ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2771 		return -1;
2772 	}
2773 	return 0;
2774 }
2775 
2776 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2777 {
2778 	fib6_clean_all(net, fib6_clean_tohost, gateway);
2779 }
2780 
2781 struct arg_dev_net {
2782 	struct net_device *dev;
2783 	struct net *net;
2784 };
2785 
2786 /* called with write lock held for table with rt */
2787 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2788 {
2789 	const struct arg_dev_net *adn = arg;
2790 	const struct net_device *dev = adn->dev;
2791 
2792 	if ((rt->dst.dev == dev || !dev) &&
2793 	    rt != adn->net->ipv6.ip6_null_entry &&
2794 	    (rt->rt6i_nsiblings == 0 ||
2795 	     (dev && netdev_unregistering(dev)) ||
2796 	     !rt->rt6i_idev->cnf.ignore_routes_with_linkdown))
2797 		return -1;
2798 
2799 	return 0;
2800 }
2801 
2802 void rt6_ifdown(struct net *net, struct net_device *dev)
2803 {
2804 	struct arg_dev_net adn = {
2805 		.dev = dev,
2806 		.net = net,
2807 	};
2808 
2809 	fib6_clean_all(net, fib6_ifdown, &adn);
2810 	if (dev)
2811 		rt6_uncached_list_flush_dev(net, dev);
2812 }
2813 
2814 struct rt6_mtu_change_arg {
2815 	struct net_device *dev;
2816 	unsigned int mtu;
2817 };
2818 
2819 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2820 {
2821 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2822 	struct inet6_dev *idev;
2823 
2824 	/* In IPv6 pmtu discovery is not optional,
2825 	   so that RTAX_MTU lock cannot disable it.
2826 	   We still use this lock to block changes
2827 	   caused by addrconf/ndisc.
2828 	*/
2829 
2830 	idev = __in6_dev_get(arg->dev);
2831 	if (!idev)
2832 		return 0;
2833 
2834 	/* For administrative MTU increase, there is no way to discover
2835 	   IPv6 PMTU increase, so PMTU increase should be updated here.
2836 	   Since RFC 1981 doesn't include administrative MTU increase
2837 	   update PMTU increase is a MUST. (i.e. jumbo frame)
2838 	 */
2839 	/*
2840 	   If new MTU is less than route PMTU, this new MTU will be the
2841 	   lowest MTU in the path, update the route PMTU to reflect PMTU
2842 	   decreases; if new MTU is greater than route PMTU, and the
2843 	   old MTU is the lowest MTU in the path, update the route PMTU
2844 	   to reflect the increase. In this case if the other nodes' MTU
2845 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
2846 	   PMTU discovery.
2847 	 */
2848 	if (rt->dst.dev == arg->dev &&
2849 	    dst_metric_raw(&rt->dst, RTAX_MTU) &&
2850 	    !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2851 		if (rt->rt6i_flags & RTF_CACHE) {
2852 			/* For RTF_CACHE with rt6i_pmtu == 0
2853 			 * (i.e. a redirected route),
2854 			 * the metrics of its rt->dst.from has already
2855 			 * been updated.
2856 			 */
2857 			if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2858 				rt->rt6i_pmtu = arg->mtu;
2859 		} else if (dst_mtu(&rt->dst) >= arg->mtu ||
2860 			   (dst_mtu(&rt->dst) < arg->mtu &&
2861 			    dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2862 			dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2863 		}
2864 	}
2865 	return 0;
2866 }
2867 
2868 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2869 {
2870 	struct rt6_mtu_change_arg arg = {
2871 		.dev = dev,
2872 		.mtu = mtu,
2873 	};
2874 
2875 	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
2876 }
2877 
2878 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2879 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2880 	[RTA_OIF]               = { .type = NLA_U32 },
2881 	[RTA_IIF]		= { .type = NLA_U32 },
2882 	[RTA_PRIORITY]          = { .type = NLA_U32 },
2883 	[RTA_METRICS]           = { .type = NLA_NESTED },
2884 	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
2885 	[RTA_PREF]              = { .type = NLA_U8 },
2886 	[RTA_ENCAP_TYPE]	= { .type = NLA_U16 },
2887 	[RTA_ENCAP]		= { .type = NLA_NESTED },
2888 	[RTA_EXPIRES]		= { .type = NLA_U32 },
2889 	[RTA_UID]		= { .type = NLA_U32 },
2890 	[RTA_MARK]		= { .type = NLA_U32 },
2891 };
2892 
2893 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2894 			      struct fib6_config *cfg,
2895 			      struct netlink_ext_ack *extack)
2896 {
2897 	struct rtmsg *rtm;
2898 	struct nlattr *tb[RTA_MAX+1];
2899 	unsigned int pref;
2900 	int err;
2901 
2902 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
2903 			  NULL);
2904 	if (err < 0)
2905 		goto errout;
2906 
2907 	err = -EINVAL;
2908 	rtm = nlmsg_data(nlh);
2909 	memset(cfg, 0, sizeof(*cfg));
2910 
2911 	cfg->fc_table = rtm->rtm_table;
2912 	cfg->fc_dst_len = rtm->rtm_dst_len;
2913 	cfg->fc_src_len = rtm->rtm_src_len;
2914 	cfg->fc_flags = RTF_UP;
2915 	cfg->fc_protocol = rtm->rtm_protocol;
2916 	cfg->fc_type = rtm->rtm_type;
2917 
2918 	if (rtm->rtm_type == RTN_UNREACHABLE ||
2919 	    rtm->rtm_type == RTN_BLACKHOLE ||
2920 	    rtm->rtm_type == RTN_PROHIBIT ||
2921 	    rtm->rtm_type == RTN_THROW)
2922 		cfg->fc_flags |= RTF_REJECT;
2923 
2924 	if (rtm->rtm_type == RTN_LOCAL)
2925 		cfg->fc_flags |= RTF_LOCAL;
2926 
2927 	if (rtm->rtm_flags & RTM_F_CLONED)
2928 		cfg->fc_flags |= RTF_CACHE;
2929 
2930 	cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2931 	cfg->fc_nlinfo.nlh = nlh;
2932 	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2933 
2934 	if (tb[RTA_GATEWAY]) {
2935 		cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
2936 		cfg->fc_flags |= RTF_GATEWAY;
2937 	}
2938 
2939 	if (tb[RTA_DST]) {
2940 		int plen = (rtm->rtm_dst_len + 7) >> 3;
2941 
2942 		if (nla_len(tb[RTA_DST]) < plen)
2943 			goto errout;
2944 
2945 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2946 	}
2947 
2948 	if (tb[RTA_SRC]) {
2949 		int plen = (rtm->rtm_src_len + 7) >> 3;
2950 
2951 		if (nla_len(tb[RTA_SRC]) < plen)
2952 			goto errout;
2953 
2954 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2955 	}
2956 
2957 	if (tb[RTA_PREFSRC])
2958 		cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
2959 
2960 	if (tb[RTA_OIF])
2961 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2962 
2963 	if (tb[RTA_PRIORITY])
2964 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2965 
2966 	if (tb[RTA_METRICS]) {
2967 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2968 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2969 	}
2970 
2971 	if (tb[RTA_TABLE])
2972 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2973 
2974 	if (tb[RTA_MULTIPATH]) {
2975 		cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2976 		cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2977 
2978 		err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
2979 						     cfg->fc_mp_len, extack);
2980 		if (err < 0)
2981 			goto errout;
2982 	}
2983 
2984 	if (tb[RTA_PREF]) {
2985 		pref = nla_get_u8(tb[RTA_PREF]);
2986 		if (pref != ICMPV6_ROUTER_PREF_LOW &&
2987 		    pref != ICMPV6_ROUTER_PREF_HIGH)
2988 			pref = ICMPV6_ROUTER_PREF_MEDIUM;
2989 		cfg->fc_flags |= RTF_PREF(pref);
2990 	}
2991 
2992 	if (tb[RTA_ENCAP])
2993 		cfg->fc_encap = tb[RTA_ENCAP];
2994 
2995 	if (tb[RTA_ENCAP_TYPE]) {
2996 		cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
2997 
2998 		err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
2999 		if (err < 0)
3000 			goto errout;
3001 	}
3002 
3003 	if (tb[RTA_EXPIRES]) {
3004 		unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
3005 
3006 		if (addrconf_finite_timeout(timeout)) {
3007 			cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
3008 			cfg->fc_flags |= RTF_EXPIRES;
3009 		}
3010 	}
3011 
3012 	err = 0;
3013 errout:
3014 	return err;
3015 }
3016 
3017 struct rt6_nh {
3018 	struct rt6_info *rt6_info;
3019 	struct fib6_config r_cfg;
3020 	struct mx6_config mxc;
3021 	struct list_head next;
3022 };
3023 
3024 static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
3025 {
3026 	struct rt6_nh *nh;
3027 
3028 	list_for_each_entry(nh, rt6_nh_list, next) {
3029 		pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6c nexthop %pI6c ifi %d\n",
3030 		        &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
3031 		        nh->r_cfg.fc_ifindex);
3032 	}
3033 }
3034 
3035 static int ip6_route_info_append(struct list_head *rt6_nh_list,
3036 				 struct rt6_info *rt, struct fib6_config *r_cfg)
3037 {
3038 	struct rt6_nh *nh;
3039 	int err = -EEXIST;
3040 
3041 	list_for_each_entry(nh, rt6_nh_list, next) {
3042 		/* check if rt6_info already exists */
3043 		if (rt6_duplicate_nexthop(nh->rt6_info, rt))
3044 			return err;
3045 	}
3046 
3047 	nh = kzalloc(sizeof(*nh), GFP_KERNEL);
3048 	if (!nh)
3049 		return -ENOMEM;
3050 	nh->rt6_info = rt;
3051 	err = ip6_convert_metrics(&nh->mxc, r_cfg);
3052 	if (err) {
3053 		kfree(nh);
3054 		return err;
3055 	}
3056 	memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
3057 	list_add_tail(&nh->next, rt6_nh_list);
3058 
3059 	return 0;
3060 }
3061 
3062 static void ip6_route_mpath_notify(struct rt6_info *rt,
3063 				   struct rt6_info *rt_last,
3064 				   struct nl_info *info,
3065 				   __u16 nlflags)
3066 {
3067 	/* if this is an APPEND route, then rt points to the first route
3068 	 * inserted and rt_last points to last route inserted. Userspace
3069 	 * wants a consistent dump of the route which starts at the first
3070 	 * nexthop. Since sibling routes are always added at the end of
3071 	 * the list, find the first sibling of the last route appended
3072 	 */
3073 	if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->rt6i_nsiblings) {
3074 		rt = list_first_entry(&rt_last->rt6i_siblings,
3075 				      struct rt6_info,
3076 				      rt6i_siblings);
3077 	}
3078 
3079 	if (rt)
3080 		inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
3081 }
3082 
3083 static int ip6_route_multipath_add(struct fib6_config *cfg,
3084 				   struct netlink_ext_ack *extack)
3085 {
3086 	struct rt6_info *rt_notif = NULL, *rt_last = NULL;
3087 	struct nl_info *info = &cfg->fc_nlinfo;
3088 	struct fib6_config r_cfg;
3089 	struct rtnexthop *rtnh;
3090 	struct rt6_info *rt;
3091 	struct rt6_nh *err_nh;
3092 	struct rt6_nh *nh, *nh_safe;
3093 	__u16 nlflags;
3094 	int remaining;
3095 	int attrlen;
3096 	int err = 1;
3097 	int nhn = 0;
3098 	int replace = (cfg->fc_nlinfo.nlh &&
3099 		       (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
3100 	LIST_HEAD(rt6_nh_list);
3101 
3102 	nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
3103 	if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
3104 		nlflags |= NLM_F_APPEND;
3105 
3106 	remaining = cfg->fc_mp_len;
3107 	rtnh = (struct rtnexthop *)cfg->fc_mp;
3108 
3109 	/* Parse a Multipath Entry and build a list (rt6_nh_list) of
3110 	 * rt6_info structs per nexthop
3111 	 */
3112 	while (rtnh_ok(rtnh, remaining)) {
3113 		memcpy(&r_cfg, cfg, sizeof(*cfg));
3114 		if (rtnh->rtnh_ifindex)
3115 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
3116 
3117 		attrlen = rtnh_attrlen(rtnh);
3118 		if (attrlen > 0) {
3119 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3120 
3121 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3122 			if (nla) {
3123 				r_cfg.fc_gateway = nla_get_in6_addr(nla);
3124 				r_cfg.fc_flags |= RTF_GATEWAY;
3125 			}
3126 			r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
3127 			nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
3128 			if (nla)
3129 				r_cfg.fc_encap_type = nla_get_u16(nla);
3130 		}
3131 
3132 		rt = ip6_route_info_create(&r_cfg, extack);
3133 		if (IS_ERR(rt)) {
3134 			err = PTR_ERR(rt);
3135 			rt = NULL;
3136 			goto cleanup;
3137 		}
3138 
3139 		err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
3140 		if (err) {
3141 			dst_release_immediate(&rt->dst);
3142 			goto cleanup;
3143 		}
3144 
3145 		rtnh = rtnh_next(rtnh, &remaining);
3146 	}
3147 
3148 	/* for add and replace send one notification with all nexthops.
3149 	 * Skip the notification in fib6_add_rt2node and send one with
3150 	 * the full route when done
3151 	 */
3152 	info->skip_notify = 1;
3153 
3154 	err_nh = NULL;
3155 	list_for_each_entry(nh, &rt6_nh_list, next) {
3156 		rt_last = nh->rt6_info;
3157 		err = __ip6_ins_rt(nh->rt6_info, info, &nh->mxc, extack);
3158 		/* save reference to first route for notification */
3159 		if (!rt_notif && !err)
3160 			rt_notif = nh->rt6_info;
3161 
3162 		/* nh->rt6_info is used or freed at this point, reset to NULL*/
3163 		nh->rt6_info = NULL;
3164 		if (err) {
3165 			if (replace && nhn)
3166 				ip6_print_replace_route_err(&rt6_nh_list);
3167 			err_nh = nh;
3168 			goto add_errout;
3169 		}
3170 
3171 		/* Because each route is added like a single route we remove
3172 		 * these flags after the first nexthop: if there is a collision,
3173 		 * we have already failed to add the first nexthop:
3174 		 * fib6_add_rt2node() has rejected it; when replacing, old
3175 		 * nexthops have been replaced by first new, the rest should
3176 		 * be added to it.
3177 		 */
3178 		cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
3179 						     NLM_F_REPLACE);
3180 		nhn++;
3181 	}
3182 
3183 	/* success ... tell user about new route */
3184 	ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
3185 	goto cleanup;
3186 
3187 add_errout:
3188 	/* send notification for routes that were added so that
3189 	 * the delete notifications sent by ip6_route_del are
3190 	 * coherent
3191 	 */
3192 	if (rt_notif)
3193 		ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
3194 
3195 	/* Delete routes that were already added */
3196 	list_for_each_entry(nh, &rt6_nh_list, next) {
3197 		if (err_nh == nh)
3198 			break;
3199 		ip6_route_del(&nh->r_cfg, extack);
3200 	}
3201 
3202 cleanup:
3203 	list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
3204 		if (nh->rt6_info)
3205 			dst_release_immediate(&nh->rt6_info->dst);
3206 		kfree(nh->mxc.mx);
3207 		list_del(&nh->next);
3208 		kfree(nh);
3209 	}
3210 
3211 	return err;
3212 }
3213 
3214 static int ip6_route_multipath_del(struct fib6_config *cfg,
3215 				   struct netlink_ext_ack *extack)
3216 {
3217 	struct fib6_config r_cfg;
3218 	struct rtnexthop *rtnh;
3219 	int remaining;
3220 	int attrlen;
3221 	int err = 1, last_err = 0;
3222 
3223 	remaining = cfg->fc_mp_len;
3224 	rtnh = (struct rtnexthop *)cfg->fc_mp;
3225 
3226 	/* Parse a Multipath Entry */
3227 	while (rtnh_ok(rtnh, remaining)) {
3228 		memcpy(&r_cfg, cfg, sizeof(*cfg));
3229 		if (rtnh->rtnh_ifindex)
3230 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
3231 
3232 		attrlen = rtnh_attrlen(rtnh);
3233 		if (attrlen > 0) {
3234 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3235 
3236 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3237 			if (nla) {
3238 				nla_memcpy(&r_cfg.fc_gateway, nla, 16);
3239 				r_cfg.fc_flags |= RTF_GATEWAY;
3240 			}
3241 		}
3242 		err = ip6_route_del(&r_cfg, extack);
3243 		if (err)
3244 			last_err = err;
3245 
3246 		rtnh = rtnh_next(rtnh, &remaining);
3247 	}
3248 
3249 	return last_err;
3250 }
3251 
3252 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
3253 			      struct netlink_ext_ack *extack)
3254 {
3255 	struct fib6_config cfg;
3256 	int err;
3257 
3258 	err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
3259 	if (err < 0)
3260 		return err;
3261 
3262 	if (cfg.fc_mp)
3263 		return ip6_route_multipath_del(&cfg, extack);
3264 	else {
3265 		cfg.fc_delete_all_nh = 1;
3266 		return ip6_route_del(&cfg, extack);
3267 	}
3268 }
3269 
3270 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
3271 			      struct netlink_ext_ack *extack)
3272 {
3273 	struct fib6_config cfg;
3274 	int err;
3275 
3276 	err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
3277 	if (err < 0)
3278 		return err;
3279 
3280 	if (cfg.fc_mp)
3281 		return ip6_route_multipath_add(&cfg, extack);
3282 	else
3283 		return ip6_route_add(&cfg, extack);
3284 }
3285 
3286 static size_t rt6_nlmsg_size(struct rt6_info *rt)
3287 {
3288 	int nexthop_len = 0;
3289 
3290 	if (rt->rt6i_nsiblings) {
3291 		nexthop_len = nla_total_size(0)	 /* RTA_MULTIPATH */
3292 			    + NLA_ALIGN(sizeof(struct rtnexthop))
3293 			    + nla_total_size(16) /* RTA_GATEWAY */
3294 			    + lwtunnel_get_encap_size(rt->dst.lwtstate);
3295 
3296 		nexthop_len *= rt->rt6i_nsiblings;
3297 	}
3298 
3299 	return NLMSG_ALIGN(sizeof(struct rtmsg))
3300 	       + nla_total_size(16) /* RTA_SRC */
3301 	       + nla_total_size(16) /* RTA_DST */
3302 	       + nla_total_size(16) /* RTA_GATEWAY */
3303 	       + nla_total_size(16) /* RTA_PREFSRC */
3304 	       + nla_total_size(4) /* RTA_TABLE */
3305 	       + nla_total_size(4) /* RTA_IIF */
3306 	       + nla_total_size(4) /* RTA_OIF */
3307 	       + nla_total_size(4) /* RTA_PRIORITY */
3308 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
3309 	       + nla_total_size(sizeof(struct rta_cacheinfo))
3310 	       + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
3311 	       + nla_total_size(1) /* RTA_PREF */
3312 	       + lwtunnel_get_encap_size(rt->dst.lwtstate)
3313 	       + nexthop_len;
3314 }
3315 
3316 static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
3317 			    unsigned int *flags, bool skip_oif)
3318 {
3319 	if (!netif_running(rt->dst.dev) || !netif_carrier_ok(rt->dst.dev)) {
3320 		*flags |= RTNH_F_LINKDOWN;
3321 		if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
3322 			*flags |= RTNH_F_DEAD;
3323 	}
3324 
3325 	if (rt->rt6i_flags & RTF_GATEWAY) {
3326 		if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
3327 			goto nla_put_failure;
3328 	}
3329 
3330 	/* not needed for multipath encoding b/c it has a rtnexthop struct */
3331 	if (!skip_oif && rt->dst.dev &&
3332 	    nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
3333 		goto nla_put_failure;
3334 
3335 	if (rt->dst.lwtstate &&
3336 	    lwtunnel_fill_encap(skb, rt->dst.lwtstate) < 0)
3337 		goto nla_put_failure;
3338 
3339 	return 0;
3340 
3341 nla_put_failure:
3342 	return -EMSGSIZE;
3343 }
3344 
3345 /* add multipath next hop */
3346 static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)
3347 {
3348 	struct rtnexthop *rtnh;
3349 	unsigned int flags = 0;
3350 
3351 	rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
3352 	if (!rtnh)
3353 		goto nla_put_failure;
3354 
3355 	rtnh->rtnh_hops = 0;
3356 	rtnh->rtnh_ifindex = rt->dst.dev ? rt->dst.dev->ifindex : 0;
3357 
3358 	if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
3359 		goto nla_put_failure;
3360 
3361 	rtnh->rtnh_flags = flags;
3362 
3363 	/* length of rtnetlink header + attributes */
3364 	rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
3365 
3366 	return 0;
3367 
3368 nla_put_failure:
3369 	return -EMSGSIZE;
3370 }
3371 
3372 static int rt6_fill_node(struct net *net,
3373 			 struct sk_buff *skb, struct rt6_info *rt,
3374 			 struct in6_addr *dst, struct in6_addr *src,
3375 			 int iif, int type, u32 portid, u32 seq,
3376 			 unsigned int flags)
3377 {
3378 	u32 metrics[RTAX_MAX];
3379 	struct rtmsg *rtm;
3380 	struct nlmsghdr *nlh;
3381 	long expires;
3382 	u32 table;
3383 
3384 	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
3385 	if (!nlh)
3386 		return -EMSGSIZE;
3387 
3388 	rtm = nlmsg_data(nlh);
3389 	rtm->rtm_family = AF_INET6;
3390 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
3391 	rtm->rtm_src_len = rt->rt6i_src.plen;
3392 	rtm->rtm_tos = 0;
3393 	if (rt->rt6i_table)
3394 		table = rt->rt6i_table->tb6_id;
3395 	else
3396 		table = RT6_TABLE_UNSPEC;
3397 	rtm->rtm_table = table;
3398 	if (nla_put_u32(skb, RTA_TABLE, table))
3399 		goto nla_put_failure;
3400 	if (rt->rt6i_flags & RTF_REJECT) {
3401 		switch (rt->dst.error) {
3402 		case -EINVAL:
3403 			rtm->rtm_type = RTN_BLACKHOLE;
3404 			break;
3405 		case -EACCES:
3406 			rtm->rtm_type = RTN_PROHIBIT;
3407 			break;
3408 		case -EAGAIN:
3409 			rtm->rtm_type = RTN_THROW;
3410 			break;
3411 		default:
3412 			rtm->rtm_type = RTN_UNREACHABLE;
3413 			break;
3414 		}
3415 	}
3416 	else if (rt->rt6i_flags & RTF_LOCAL)
3417 		rtm->rtm_type = RTN_LOCAL;
3418 	else if (rt->rt6i_flags & RTF_ANYCAST)
3419 		rtm->rtm_type = RTN_ANYCAST;
3420 	else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
3421 		rtm->rtm_type = RTN_LOCAL;
3422 	else
3423 		rtm->rtm_type = RTN_UNICAST;
3424 	rtm->rtm_flags = 0;
3425 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
3426 	rtm->rtm_protocol = rt->rt6i_protocol;
3427 
3428 	if (rt->rt6i_flags & RTF_CACHE)
3429 		rtm->rtm_flags |= RTM_F_CLONED;
3430 
3431 	if (dst) {
3432 		if (nla_put_in6_addr(skb, RTA_DST, dst))
3433 			goto nla_put_failure;
3434 		rtm->rtm_dst_len = 128;
3435 	} else if (rtm->rtm_dst_len)
3436 		if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
3437 			goto nla_put_failure;
3438 #ifdef CONFIG_IPV6_SUBTREES
3439 	if (src) {
3440 		if (nla_put_in6_addr(skb, RTA_SRC, src))
3441 			goto nla_put_failure;
3442 		rtm->rtm_src_len = 128;
3443 	} else if (rtm->rtm_src_len &&
3444 		   nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
3445 		goto nla_put_failure;
3446 #endif
3447 	if (iif) {
3448 #ifdef CONFIG_IPV6_MROUTE
3449 		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
3450 			int err = ip6mr_get_route(net, skb, rtm, portid);
3451 
3452 			if (err == 0)
3453 				return 0;
3454 			if (err < 0)
3455 				goto nla_put_failure;
3456 		} else
3457 #endif
3458 			if (nla_put_u32(skb, RTA_IIF, iif))
3459 				goto nla_put_failure;
3460 	} else if (dst) {
3461 		struct in6_addr saddr_buf;
3462 		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
3463 		    nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3464 			goto nla_put_failure;
3465 	}
3466 
3467 	if (rt->rt6i_prefsrc.plen) {
3468 		struct in6_addr saddr_buf;
3469 		saddr_buf = rt->rt6i_prefsrc.addr;
3470 		if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3471 			goto nla_put_failure;
3472 	}
3473 
3474 	memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
3475 	if (rt->rt6i_pmtu)
3476 		metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
3477 	if (rtnetlink_put_metrics(skb, metrics) < 0)
3478 		goto nla_put_failure;
3479 
3480 	if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
3481 		goto nla_put_failure;
3482 
3483 	/* For multipath routes, walk the siblings list and add
3484 	 * each as a nexthop within RTA_MULTIPATH.
3485 	 */
3486 	if (rt->rt6i_nsiblings) {
3487 		struct rt6_info *sibling, *next_sibling;
3488 		struct nlattr *mp;
3489 
3490 		mp = nla_nest_start(skb, RTA_MULTIPATH);
3491 		if (!mp)
3492 			goto nla_put_failure;
3493 
3494 		if (rt6_add_nexthop(skb, rt) < 0)
3495 			goto nla_put_failure;
3496 
3497 		list_for_each_entry_safe(sibling, next_sibling,
3498 					 &rt->rt6i_siblings, rt6i_siblings) {
3499 			if (rt6_add_nexthop(skb, sibling) < 0)
3500 				goto nla_put_failure;
3501 		}
3502 
3503 		nla_nest_end(skb, mp);
3504 	} else {
3505 		if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0)
3506 			goto nla_put_failure;
3507 	}
3508 
3509 	expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
3510 
3511 	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
3512 		goto nla_put_failure;
3513 
3514 	if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
3515 		goto nla_put_failure;
3516 
3517 
3518 	nlmsg_end(skb, nlh);
3519 	return 0;
3520 
3521 nla_put_failure:
3522 	nlmsg_cancel(skb, nlh);
3523 	return -EMSGSIZE;
3524 }
3525 
3526 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
3527 {
3528 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
3529 	struct net *net = arg->net;
3530 
3531 	if (rt == net->ipv6.ip6_null_entry)
3532 		return 0;
3533 
3534 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
3535 		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
3536 
3537 		/* user wants prefix routes only */
3538 		if (rtm->rtm_flags & RTM_F_PREFIX &&
3539 		    !(rt->rt6i_flags & RTF_PREFIX_RT)) {
3540 			/* success since this is not a prefix route */
3541 			return 1;
3542 		}
3543 	}
3544 
3545 	return rt6_fill_node(net,
3546 		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
3547 		     NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
3548 		     NLM_F_MULTI);
3549 }
3550 
3551 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
3552 			      struct netlink_ext_ack *extack)
3553 {
3554 	struct net *net = sock_net(in_skb->sk);
3555 	struct nlattr *tb[RTA_MAX+1];
3556 	int err, iif = 0, oif = 0;
3557 	struct dst_entry *dst;
3558 	struct rt6_info *rt;
3559 	struct sk_buff *skb;
3560 	struct rtmsg *rtm;
3561 	struct flowi6 fl6;
3562 	bool fibmatch;
3563 
3564 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
3565 			  extack);
3566 	if (err < 0)
3567 		goto errout;
3568 
3569 	err = -EINVAL;
3570 	memset(&fl6, 0, sizeof(fl6));
3571 	rtm = nlmsg_data(nlh);
3572 	fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
3573 	fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
3574 
3575 	if (tb[RTA_SRC]) {
3576 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
3577 			goto errout;
3578 
3579 		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
3580 	}
3581 
3582 	if (tb[RTA_DST]) {
3583 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
3584 			goto errout;
3585 
3586 		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
3587 	}
3588 
3589 	if (tb[RTA_IIF])
3590 		iif = nla_get_u32(tb[RTA_IIF]);
3591 
3592 	if (tb[RTA_OIF])
3593 		oif = nla_get_u32(tb[RTA_OIF]);
3594 
3595 	if (tb[RTA_MARK])
3596 		fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
3597 
3598 	if (tb[RTA_UID])
3599 		fl6.flowi6_uid = make_kuid(current_user_ns(),
3600 					   nla_get_u32(tb[RTA_UID]));
3601 	else
3602 		fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
3603 
3604 	if (iif) {
3605 		struct net_device *dev;
3606 		int flags = 0;
3607 
3608 		dev = __dev_get_by_index(net, iif);
3609 		if (!dev) {
3610 			err = -ENODEV;
3611 			goto errout;
3612 		}
3613 
3614 		fl6.flowi6_iif = iif;
3615 
3616 		if (!ipv6_addr_any(&fl6.saddr))
3617 			flags |= RT6_LOOKUP_F_HAS_SADDR;
3618 
3619 		if (!fibmatch)
3620 			dst = ip6_route_input_lookup(net, dev, &fl6, flags);
3621 	} else {
3622 		fl6.flowi6_oif = oif;
3623 
3624 		if (!fibmatch)
3625 			dst = ip6_route_output(net, NULL, &fl6);
3626 	}
3627 
3628 	if (fibmatch)
3629 		dst = ip6_route_lookup(net, &fl6, 0);
3630 
3631 	rt = container_of(dst, struct rt6_info, dst);
3632 	if (rt->dst.error) {
3633 		err = rt->dst.error;
3634 		ip6_rt_put(rt);
3635 		goto errout;
3636 	}
3637 
3638 	if (rt == net->ipv6.ip6_null_entry) {
3639 		err = rt->dst.error;
3640 		ip6_rt_put(rt);
3641 		goto errout;
3642 	}
3643 
3644 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
3645 	if (!skb) {
3646 		ip6_rt_put(rt);
3647 		err = -ENOBUFS;
3648 		goto errout;
3649 	}
3650 
3651 	skb_dst_set(skb, &rt->dst);
3652 	if (fibmatch)
3653 		err = rt6_fill_node(net, skb, rt, NULL, NULL, iif,
3654 				    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
3655 				    nlh->nlmsg_seq, 0);
3656 	else
3657 		err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
3658 				    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
3659 				    nlh->nlmsg_seq, 0);
3660 	if (err < 0) {
3661 		kfree_skb(skb);
3662 		goto errout;
3663 	}
3664 
3665 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
3666 errout:
3667 	return err;
3668 }
3669 
3670 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
3671 		     unsigned int nlm_flags)
3672 {
3673 	struct sk_buff *skb;
3674 	struct net *net = info->nl_net;
3675 	u32 seq;
3676 	int err;
3677 
3678 	err = -ENOBUFS;
3679 	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3680 
3681 	skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3682 	if (!skb)
3683 		goto errout;
3684 
3685 	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
3686 				event, info->portid, seq, nlm_flags);
3687 	if (err < 0) {
3688 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
3689 		WARN_ON(err == -EMSGSIZE);
3690 		kfree_skb(skb);
3691 		goto errout;
3692 	}
3693 	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
3694 		    info->nlh, gfp_any());
3695 	return;
3696 errout:
3697 	if (err < 0)
3698 		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
3699 }
3700 
3701 static int ip6_route_dev_notify(struct notifier_block *this,
3702 				unsigned long event, void *ptr)
3703 {
3704 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
3705 	struct net *net = dev_net(dev);
3706 
3707 	if (!(dev->flags & IFF_LOOPBACK))
3708 		return NOTIFY_OK;
3709 
3710 	if (event == NETDEV_REGISTER) {
3711 		net->ipv6.ip6_null_entry->dst.dev = dev;
3712 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
3713 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3714 		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
3715 		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
3716 		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
3717 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
3718 #endif
3719 	 } else if (event == NETDEV_UNREGISTER &&
3720 		    dev->reg_state != NETREG_UNREGISTERED) {
3721 		/* NETDEV_UNREGISTER could be fired for multiple times by
3722 		 * netdev_wait_allrefs(). Make sure we only call this once.
3723 		 */
3724 		in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
3725 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3726 		in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
3727 		in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
3728 #endif
3729 	}
3730 
3731 	return NOTIFY_OK;
3732 }
3733 
3734 /*
3735  *	/proc
3736  */
3737 
3738 #ifdef CONFIG_PROC_FS
3739 
3740 static const struct file_operations ipv6_route_proc_fops = {
3741 	.owner		= THIS_MODULE,
3742 	.open		= ipv6_route_open,
3743 	.read		= seq_read,
3744 	.llseek		= seq_lseek,
3745 	.release	= seq_release_net,
3746 };
3747 
3748 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
3749 {
3750 	struct net *net = (struct net *)seq->private;
3751 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
3752 		   net->ipv6.rt6_stats->fib_nodes,
3753 		   net->ipv6.rt6_stats->fib_route_nodes,
3754 		   net->ipv6.rt6_stats->fib_rt_alloc,
3755 		   net->ipv6.rt6_stats->fib_rt_entries,
3756 		   net->ipv6.rt6_stats->fib_rt_cache,
3757 		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
3758 		   net->ipv6.rt6_stats->fib_discarded_routes);
3759 
3760 	return 0;
3761 }
3762 
3763 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
3764 {
3765 	return single_open_net(inode, file, rt6_stats_seq_show);
3766 }
3767 
3768 static const struct file_operations rt6_stats_seq_fops = {
3769 	.owner	 = THIS_MODULE,
3770 	.open	 = rt6_stats_seq_open,
3771 	.read	 = seq_read,
3772 	.llseek	 = seq_lseek,
3773 	.release = single_release_net,
3774 };
3775 #endif	/* CONFIG_PROC_FS */
3776 
3777 #ifdef CONFIG_SYSCTL
3778 
3779 static
3780 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
3781 			      void __user *buffer, size_t *lenp, loff_t *ppos)
3782 {
3783 	struct net *net;
3784 	int delay;
3785 	if (!write)
3786 		return -EINVAL;
3787 
3788 	net = (struct net *)ctl->extra1;
3789 	delay = net->ipv6.sysctl.flush_delay;
3790 	proc_dointvec(ctl, write, buffer, lenp, ppos);
3791 	fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
3792 	return 0;
3793 }
3794 
3795 struct ctl_table ipv6_route_table_template[] = {
3796 	{
3797 		.procname	=	"flush",
3798 		.data		=	&init_net.ipv6.sysctl.flush_delay,
3799 		.maxlen		=	sizeof(int),
3800 		.mode		=	0200,
3801 		.proc_handler	=	ipv6_sysctl_rtcache_flush
3802 	},
3803 	{
3804 		.procname	=	"gc_thresh",
3805 		.data		=	&ip6_dst_ops_template.gc_thresh,
3806 		.maxlen		=	sizeof(int),
3807 		.mode		=	0644,
3808 		.proc_handler	=	proc_dointvec,
3809 	},
3810 	{
3811 		.procname	=	"max_size",
3812 		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
3813 		.maxlen		=	sizeof(int),
3814 		.mode		=	0644,
3815 		.proc_handler	=	proc_dointvec,
3816 	},
3817 	{
3818 		.procname	=	"gc_min_interval",
3819 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3820 		.maxlen		=	sizeof(int),
3821 		.mode		=	0644,
3822 		.proc_handler	=	proc_dointvec_jiffies,
3823 	},
3824 	{
3825 		.procname	=	"gc_timeout",
3826 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
3827 		.maxlen		=	sizeof(int),
3828 		.mode		=	0644,
3829 		.proc_handler	=	proc_dointvec_jiffies,
3830 	},
3831 	{
3832 		.procname	=	"gc_interval",
3833 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
3834 		.maxlen		=	sizeof(int),
3835 		.mode		=	0644,
3836 		.proc_handler	=	proc_dointvec_jiffies,
3837 	},
3838 	{
3839 		.procname	=	"gc_elasticity",
3840 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
3841 		.maxlen		=	sizeof(int),
3842 		.mode		=	0644,
3843 		.proc_handler	=	proc_dointvec,
3844 	},
3845 	{
3846 		.procname	=	"mtu_expires",
3847 		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
3848 		.maxlen		=	sizeof(int),
3849 		.mode		=	0644,
3850 		.proc_handler	=	proc_dointvec_jiffies,
3851 	},
3852 	{
3853 		.procname	=	"min_adv_mss",
3854 		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
3855 		.maxlen		=	sizeof(int),
3856 		.mode		=	0644,
3857 		.proc_handler	=	proc_dointvec,
3858 	},
3859 	{
3860 		.procname	=	"gc_min_interval_ms",
3861 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3862 		.maxlen		=	sizeof(int),
3863 		.mode		=	0644,
3864 		.proc_handler	=	proc_dointvec_ms_jiffies,
3865 	},
3866 	{ }
3867 };
3868 
3869 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
3870 {
3871 	struct ctl_table *table;
3872 
3873 	table = kmemdup(ipv6_route_table_template,
3874 			sizeof(ipv6_route_table_template),
3875 			GFP_KERNEL);
3876 
3877 	if (table) {
3878 		table[0].data = &net->ipv6.sysctl.flush_delay;
3879 		table[0].extra1 = net;
3880 		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
3881 		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3882 		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3883 		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3884 		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3885 		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3886 		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3887 		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
3888 		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3889 
3890 		/* Don't export sysctls to unprivileged users */
3891 		if (net->user_ns != &init_user_ns)
3892 			table[0].procname = NULL;
3893 	}
3894 
3895 	return table;
3896 }
3897 #endif
3898 
3899 static int __net_init ip6_route_net_init(struct net *net)
3900 {
3901 	int ret = -ENOMEM;
3902 
3903 	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3904 	       sizeof(net->ipv6.ip6_dst_ops));
3905 
3906 	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3907 		goto out_ip6_dst_ops;
3908 
3909 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3910 					   sizeof(*net->ipv6.ip6_null_entry),
3911 					   GFP_KERNEL);
3912 	if (!net->ipv6.ip6_null_entry)
3913 		goto out_ip6_dst_entries;
3914 	net->ipv6.ip6_null_entry->dst.path =
3915 		(struct dst_entry *)net->ipv6.ip6_null_entry;
3916 	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3917 	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3918 			 ip6_template_metrics, true);
3919 
3920 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3921 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3922 					       sizeof(*net->ipv6.ip6_prohibit_entry),
3923 					       GFP_KERNEL);
3924 	if (!net->ipv6.ip6_prohibit_entry)
3925 		goto out_ip6_null_entry;
3926 	net->ipv6.ip6_prohibit_entry->dst.path =
3927 		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3928 	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3929 	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3930 			 ip6_template_metrics, true);
3931 
3932 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3933 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
3934 					       GFP_KERNEL);
3935 	if (!net->ipv6.ip6_blk_hole_entry)
3936 		goto out_ip6_prohibit_entry;
3937 	net->ipv6.ip6_blk_hole_entry->dst.path =
3938 		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3939 	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3940 	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3941 			 ip6_template_metrics, true);
3942 #endif
3943 
3944 	net->ipv6.sysctl.flush_delay = 0;
3945 	net->ipv6.sysctl.ip6_rt_max_size = 4096;
3946 	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3947 	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3948 	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3949 	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3950 	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3951 	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3952 
3953 	net->ipv6.ip6_rt_gc_expire = 30*HZ;
3954 
3955 	ret = 0;
3956 out:
3957 	return ret;
3958 
3959 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3960 out_ip6_prohibit_entry:
3961 	kfree(net->ipv6.ip6_prohibit_entry);
3962 out_ip6_null_entry:
3963 	kfree(net->ipv6.ip6_null_entry);
3964 #endif
3965 out_ip6_dst_entries:
3966 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3967 out_ip6_dst_ops:
3968 	goto out;
3969 }
3970 
3971 static void __net_exit ip6_route_net_exit(struct net *net)
3972 {
3973 	kfree(net->ipv6.ip6_null_entry);
3974 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3975 	kfree(net->ipv6.ip6_prohibit_entry);
3976 	kfree(net->ipv6.ip6_blk_hole_entry);
3977 #endif
3978 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3979 }
3980 
3981 static int __net_init ip6_route_net_init_late(struct net *net)
3982 {
3983 #ifdef CONFIG_PROC_FS
3984 	proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3985 	proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3986 #endif
3987 	return 0;
3988 }
3989 
3990 static void __net_exit ip6_route_net_exit_late(struct net *net)
3991 {
3992 #ifdef CONFIG_PROC_FS
3993 	remove_proc_entry("ipv6_route", net->proc_net);
3994 	remove_proc_entry("rt6_stats", net->proc_net);
3995 #endif
3996 }
3997 
3998 static struct pernet_operations ip6_route_net_ops = {
3999 	.init = ip6_route_net_init,
4000 	.exit = ip6_route_net_exit,
4001 };
4002 
4003 static int __net_init ipv6_inetpeer_init(struct net *net)
4004 {
4005 	struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
4006 
4007 	if (!bp)
4008 		return -ENOMEM;
4009 	inet_peer_base_init(bp);
4010 	net->ipv6.peers = bp;
4011 	return 0;
4012 }
4013 
4014 static void __net_exit ipv6_inetpeer_exit(struct net *net)
4015 {
4016 	struct inet_peer_base *bp = net->ipv6.peers;
4017 
4018 	net->ipv6.peers = NULL;
4019 	inetpeer_invalidate_tree(bp);
4020 	kfree(bp);
4021 }
4022 
4023 static struct pernet_operations ipv6_inetpeer_ops = {
4024 	.init	=	ipv6_inetpeer_init,
4025 	.exit	=	ipv6_inetpeer_exit,
4026 };
4027 
4028 static struct pernet_operations ip6_route_net_late_ops = {
4029 	.init = ip6_route_net_init_late,
4030 	.exit = ip6_route_net_exit_late,
4031 };
4032 
4033 static struct notifier_block ip6_route_dev_notifier = {
4034 	.notifier_call = ip6_route_dev_notify,
4035 	.priority = ADDRCONF_NOTIFY_PRIORITY - 10,
4036 };
4037 
4038 void __init ip6_route_init_special_entries(void)
4039 {
4040 	/* Registering of the loopback is done before this portion of code,
4041 	 * the loopback reference in rt6_info will not be taken, do it
4042 	 * manually for init_net */
4043 	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
4044 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
4045   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
4046 	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
4047 	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
4048 	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
4049 	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
4050   #endif
4051 }
4052 
4053 int __init ip6_route_init(void)
4054 {
4055 	int ret;
4056 	int cpu;
4057 
4058 	ret = -ENOMEM;
4059 	ip6_dst_ops_template.kmem_cachep =
4060 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
4061 				  SLAB_HWCACHE_ALIGN, NULL);
4062 	if (!ip6_dst_ops_template.kmem_cachep)
4063 		goto out;
4064 
4065 	ret = dst_entries_init(&ip6_dst_blackhole_ops);
4066 	if (ret)
4067 		goto out_kmem_cache;
4068 
4069 	ret = register_pernet_subsys(&ipv6_inetpeer_ops);
4070 	if (ret)
4071 		goto out_dst_entries;
4072 
4073 	ret = register_pernet_subsys(&ip6_route_net_ops);
4074 	if (ret)
4075 		goto out_register_inetpeer;
4076 
4077 	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
4078 
4079 	ret = fib6_init();
4080 	if (ret)
4081 		goto out_register_subsys;
4082 
4083 	ret = xfrm6_init();
4084 	if (ret)
4085 		goto out_fib6_init;
4086 
4087 	ret = fib6_rules_init();
4088 	if (ret)
4089 		goto xfrm6_init;
4090 
4091 	ret = register_pernet_subsys(&ip6_route_net_late_ops);
4092 	if (ret)
4093 		goto fib6_rules_init;
4094 
4095 	ret = -ENOBUFS;
4096 	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
4097 	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
4098 	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
4099 		goto out_register_late_subsys;
4100 
4101 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
4102 	if (ret)
4103 		goto out_register_late_subsys;
4104 
4105 	for_each_possible_cpu(cpu) {
4106 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
4107 
4108 		INIT_LIST_HEAD(&ul->head);
4109 		spin_lock_init(&ul->lock);
4110 	}
4111 
4112 out:
4113 	return ret;
4114 
4115 out_register_late_subsys:
4116 	unregister_pernet_subsys(&ip6_route_net_late_ops);
4117 fib6_rules_init:
4118 	fib6_rules_cleanup();
4119 xfrm6_init:
4120 	xfrm6_fini();
4121 out_fib6_init:
4122 	fib6_gc_cleanup();
4123 out_register_subsys:
4124 	unregister_pernet_subsys(&ip6_route_net_ops);
4125 out_register_inetpeer:
4126 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
4127 out_dst_entries:
4128 	dst_entries_destroy(&ip6_dst_blackhole_ops);
4129 out_kmem_cache:
4130 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
4131 	goto out;
4132 }
4133 
4134 void ip6_route_cleanup(void)
4135 {
4136 	unregister_netdevice_notifier(&ip6_route_dev_notifier);
4137 	unregister_pernet_subsys(&ip6_route_net_late_ops);
4138 	fib6_rules_cleanup();
4139 	xfrm6_fini();
4140 	fib6_gc_cleanup();
4141 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
4142 	unregister_pernet_subsys(&ip6_route_net_ops);
4143 	dst_entries_destroy(&ip6_dst_blackhole_ops);
4144 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
4145 }
4146