xref: /openbmc/linux/net/ipv6/route.c (revision d4bea421f7322400d804c2284739e42e61f78349)
1 /*
2  *	Linux INET6 implementation
3  *	FIB front-end.
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13 
14 /*	Changes:
15  *
16  *	YOSHIFUJI Hideaki @USAGI
17  *		reworked default router selection.
18  *		- respect outgoing interface
19  *		- select from (probably) reachable routers (i.e.
20  *		routers in REACHABLE, STALE, DELAY or PROBE states).
21  *		- always select the same router if it is (probably)
22  *		reachable.  otherwise, round-robin the list.
23  *	Ville Nuorvala
24  *		Fixed routing subtrees.
25  */
26 
27 #define pr_fmt(fmt) "IPv6: " fmt
28 
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <linux/jhash.h>
48 #include <net/net_namespace.h>
49 #include <net/snmp.h>
50 #include <net/ipv6.h>
51 #include <net/ip6_fib.h>
52 #include <net/ip6_route.h>
53 #include <net/ndisc.h>
54 #include <net/addrconf.h>
55 #include <net/tcp.h>
56 #include <linux/rtnetlink.h>
57 #include <net/dst.h>
58 #include <net/dst_metadata.h>
59 #include <net/xfrm.h>
60 #include <net/netevent.h>
61 #include <net/netlink.h>
62 #include <net/nexthop.h>
63 #include <net/lwtunnel.h>
64 #include <net/ip_tunnels.h>
65 #include <net/l3mdev.h>
66 #include <trace/events/fib6.h>
67 
68 #include <linux/uaccess.h>
69 
70 #ifdef CONFIG_SYSCTL
71 #include <linux/sysctl.h>
72 #endif
73 
74 enum rt6_nud_state {
75 	RT6_NUD_FAIL_HARD = -3,
76 	RT6_NUD_FAIL_PROBE = -2,
77 	RT6_NUD_FAIL_DO_RR = -1,
78 	RT6_NUD_SUCCEED = 1
79 };
80 
81 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
82 static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
83 static unsigned int	 ip6_mtu(const struct dst_entry *dst);
84 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
85 static void		ip6_dst_destroy(struct dst_entry *);
86 static void		ip6_dst_ifdown(struct dst_entry *,
87 				       struct net_device *dev, int how);
88 static int		 ip6_dst_gc(struct dst_ops *ops);
89 
90 static int		ip6_pkt_discard(struct sk_buff *skb);
91 static int		ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
92 static int		ip6_pkt_prohibit(struct sk_buff *skb);
93 static int		ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
94 static void		ip6_link_failure(struct sk_buff *skb);
95 static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
96 					   struct sk_buff *skb, u32 mtu);
97 static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
98 					struct sk_buff *skb);
99 static int rt6_score_route(struct fib6_info *rt, int oif, int strict);
100 static size_t rt6_nlmsg_size(struct fib6_info *rt);
101 static int rt6_fill_node(struct net *net, struct sk_buff *skb,
102 			 struct fib6_info *rt, struct dst_entry *dst,
103 			 struct in6_addr *dest, struct in6_addr *src,
104 			 int iif, int type, u32 portid, u32 seq,
105 			 unsigned int flags);
106 static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
107 					   struct in6_addr *daddr,
108 					   struct in6_addr *saddr);
109 
110 #ifdef CONFIG_IPV6_ROUTE_INFO
111 static struct fib6_info *rt6_add_route_info(struct net *net,
112 					   const struct in6_addr *prefix, int prefixlen,
113 					   const struct in6_addr *gwaddr,
114 					   struct net_device *dev,
115 					   unsigned int pref);
116 static struct fib6_info *rt6_get_route_info(struct net *net,
117 					   const struct in6_addr *prefix, int prefixlen,
118 					   const struct in6_addr *gwaddr,
119 					   struct net_device *dev);
120 #endif
121 
122 struct uncached_list {
123 	spinlock_t		lock;
124 	struct list_head	head;
125 };
126 
127 static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
128 
129 void rt6_uncached_list_add(struct rt6_info *rt)
130 {
131 	struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
132 
133 	rt->rt6i_uncached_list = ul;
134 
135 	spin_lock_bh(&ul->lock);
136 	list_add_tail(&rt->rt6i_uncached, &ul->head);
137 	spin_unlock_bh(&ul->lock);
138 }
139 
140 void rt6_uncached_list_del(struct rt6_info *rt)
141 {
142 	if (!list_empty(&rt->rt6i_uncached)) {
143 		struct uncached_list *ul = rt->rt6i_uncached_list;
144 		struct net *net = dev_net(rt->dst.dev);
145 
146 		spin_lock_bh(&ul->lock);
147 		list_del(&rt->rt6i_uncached);
148 		atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache);
149 		spin_unlock_bh(&ul->lock);
150 	}
151 }
152 
153 static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
154 {
155 	struct net_device *loopback_dev = net->loopback_dev;
156 	int cpu;
157 
158 	if (dev == loopback_dev)
159 		return;
160 
161 	for_each_possible_cpu(cpu) {
162 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
163 		struct rt6_info *rt;
164 
165 		spin_lock_bh(&ul->lock);
166 		list_for_each_entry(rt, &ul->head, rt6i_uncached) {
167 			struct inet6_dev *rt_idev = rt->rt6i_idev;
168 			struct net_device *rt_dev = rt->dst.dev;
169 
170 			if (rt_idev->dev == dev) {
171 				rt->rt6i_idev = in6_dev_get(loopback_dev);
172 				in6_dev_put(rt_idev);
173 			}
174 
175 			if (rt_dev == dev) {
176 				rt->dst.dev = loopback_dev;
177 				dev_hold(rt->dst.dev);
178 				dev_put(rt_dev);
179 			}
180 		}
181 		spin_unlock_bh(&ul->lock);
182 	}
183 }
184 
185 static inline const void *choose_neigh_daddr(const struct in6_addr *p,
186 					     struct sk_buff *skb,
187 					     const void *daddr)
188 {
189 	if (!ipv6_addr_any(p))
190 		return (const void *) p;
191 	else if (skb)
192 		return &ipv6_hdr(skb)->daddr;
193 	return daddr;
194 }
195 
196 struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
197 				   struct net_device *dev,
198 				   struct sk_buff *skb,
199 				   const void *daddr)
200 {
201 	struct neighbour *n;
202 
203 	daddr = choose_neigh_daddr(gw, skb, daddr);
204 	n = __ipv6_neigh_lookup(dev, daddr);
205 	if (n)
206 		return n;
207 	return neigh_create(&nd_tbl, daddr, dev);
208 }
209 
210 static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst,
211 					      struct sk_buff *skb,
212 					      const void *daddr)
213 {
214 	const struct rt6_info *rt = container_of(dst, struct rt6_info, dst);
215 
216 	return ip6_neigh_lookup(&rt->rt6i_gateway, dst->dev, skb, daddr);
217 }
218 
219 static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
220 {
221 	struct net_device *dev = dst->dev;
222 	struct rt6_info *rt = (struct rt6_info *)dst;
223 
224 	daddr = choose_neigh_daddr(&rt->rt6i_gateway, NULL, daddr);
225 	if (!daddr)
226 		return;
227 	if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
228 		return;
229 	if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
230 		return;
231 	__ipv6_confirm_neigh(dev, daddr);
232 }
233 
234 static struct dst_ops ip6_dst_ops_template = {
235 	.family			=	AF_INET6,
236 	.gc			=	ip6_dst_gc,
237 	.gc_thresh		=	1024,
238 	.check			=	ip6_dst_check,
239 	.default_advmss		=	ip6_default_advmss,
240 	.mtu			=	ip6_mtu,
241 	.cow_metrics		=	dst_cow_metrics_generic,
242 	.destroy		=	ip6_dst_destroy,
243 	.ifdown			=	ip6_dst_ifdown,
244 	.negative_advice	=	ip6_negative_advice,
245 	.link_failure		=	ip6_link_failure,
246 	.update_pmtu		=	ip6_rt_update_pmtu,
247 	.redirect		=	rt6_do_redirect,
248 	.local_out		=	__ip6_local_out,
249 	.neigh_lookup		=	ip6_dst_neigh_lookup,
250 	.confirm_neigh		=	ip6_confirm_neigh,
251 };
252 
253 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
254 {
255 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
256 
257 	return mtu ? : dst->dev->mtu;
258 }
259 
260 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
261 					 struct sk_buff *skb, u32 mtu)
262 {
263 }
264 
265 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
266 				      struct sk_buff *skb)
267 {
268 }
269 
270 static struct dst_ops ip6_dst_blackhole_ops = {
271 	.family			=	AF_INET6,
272 	.destroy		=	ip6_dst_destroy,
273 	.check			=	ip6_dst_check,
274 	.mtu			=	ip6_blackhole_mtu,
275 	.default_advmss		=	ip6_default_advmss,
276 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
277 	.redirect		=	ip6_rt_blackhole_redirect,
278 	.cow_metrics		=	dst_cow_metrics_generic,
279 	.neigh_lookup		=	ip6_dst_neigh_lookup,
280 };
281 
282 static const u32 ip6_template_metrics[RTAX_MAX] = {
283 	[RTAX_HOPLIMIT - 1] = 0,
284 };
285 
286 static const struct fib6_info fib6_null_entry_template = {
287 	.fib6_flags	= (RTF_REJECT | RTF_NONEXTHOP),
288 	.fib6_protocol  = RTPROT_KERNEL,
289 	.fib6_metric	= ~(u32)0,
290 	.fib6_ref	= ATOMIC_INIT(1),
291 	.fib6_type	= RTN_UNREACHABLE,
292 	.fib6_metrics	= (struct dst_metrics *)&dst_default_metrics,
293 };
294 
295 static const struct rt6_info ip6_null_entry_template = {
296 	.dst = {
297 		.__refcnt	= ATOMIC_INIT(1),
298 		.__use		= 1,
299 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
300 		.error		= -ENETUNREACH,
301 		.input		= ip6_pkt_discard,
302 		.output		= ip6_pkt_discard_out,
303 	},
304 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
305 };
306 
307 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
308 
309 static const struct rt6_info ip6_prohibit_entry_template = {
310 	.dst = {
311 		.__refcnt	= ATOMIC_INIT(1),
312 		.__use		= 1,
313 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
314 		.error		= -EACCES,
315 		.input		= ip6_pkt_prohibit,
316 		.output		= ip6_pkt_prohibit_out,
317 	},
318 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
319 };
320 
321 static const struct rt6_info ip6_blk_hole_entry_template = {
322 	.dst = {
323 		.__refcnt	= ATOMIC_INIT(1),
324 		.__use		= 1,
325 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
326 		.error		= -EINVAL,
327 		.input		= dst_discard,
328 		.output		= dst_discard_out,
329 	},
330 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
331 };
332 
333 #endif
334 
335 static void rt6_info_init(struct rt6_info *rt)
336 {
337 	struct dst_entry *dst = &rt->dst;
338 
339 	memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
340 	INIT_LIST_HEAD(&rt->rt6i_uncached);
341 }
342 
343 /* allocate dst with ip6_dst_ops */
344 struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
345 			       int flags)
346 {
347 	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
348 					1, DST_OBSOLETE_FORCE_CHK, flags);
349 
350 	if (rt) {
351 		rt6_info_init(rt);
352 		atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
353 	}
354 
355 	return rt;
356 }
357 EXPORT_SYMBOL(ip6_dst_alloc);
358 
359 static void ip6_dst_destroy(struct dst_entry *dst)
360 {
361 	struct rt6_info *rt = (struct rt6_info *)dst;
362 	struct fib6_info *from;
363 	struct inet6_dev *idev;
364 
365 	dst_destroy_metrics_generic(dst);
366 	rt6_uncached_list_del(rt);
367 
368 	idev = rt->rt6i_idev;
369 	if (idev) {
370 		rt->rt6i_idev = NULL;
371 		in6_dev_put(idev);
372 	}
373 
374 	rcu_read_lock();
375 	from = rcu_dereference(rt->from);
376 	rcu_assign_pointer(rt->from, NULL);
377 	fib6_info_release(from);
378 	rcu_read_unlock();
379 }
380 
381 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
382 			   int how)
383 {
384 	struct rt6_info *rt = (struct rt6_info *)dst;
385 	struct inet6_dev *idev = rt->rt6i_idev;
386 	struct net_device *loopback_dev =
387 		dev_net(dev)->loopback_dev;
388 
389 	if (idev && idev->dev != loopback_dev) {
390 		struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
391 		if (loopback_idev) {
392 			rt->rt6i_idev = loopback_idev;
393 			in6_dev_put(idev);
394 		}
395 	}
396 }
397 
398 static bool __rt6_check_expired(const struct rt6_info *rt)
399 {
400 	if (rt->rt6i_flags & RTF_EXPIRES)
401 		return time_after(jiffies, rt->dst.expires);
402 	else
403 		return false;
404 }
405 
406 static bool rt6_check_expired(const struct rt6_info *rt)
407 {
408 	struct fib6_info *from;
409 
410 	from = rcu_dereference(rt->from);
411 
412 	if (rt->rt6i_flags & RTF_EXPIRES) {
413 		if (time_after(jiffies, rt->dst.expires))
414 			return true;
415 	} else if (from) {
416 		return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
417 			fib6_check_expired(from);
418 	}
419 	return false;
420 }
421 
422 struct fib6_info *fib6_multipath_select(const struct net *net,
423 					struct fib6_info *match,
424 					struct flowi6 *fl6, int oif,
425 					const struct sk_buff *skb,
426 					int strict)
427 {
428 	struct fib6_info *sibling, *next_sibling;
429 
430 	/* We might have already computed the hash for ICMPv6 errors. In such
431 	 * case it will always be non-zero. Otherwise now is the time to do it.
432 	 */
433 	if (!fl6->mp_hash)
434 		fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
435 
436 	if (fl6->mp_hash <= atomic_read(&match->fib6_nh.nh_upper_bound))
437 		return match;
438 
439 	list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings,
440 				 fib6_siblings) {
441 		int nh_upper_bound;
442 
443 		nh_upper_bound = atomic_read(&sibling->fib6_nh.nh_upper_bound);
444 		if (fl6->mp_hash > nh_upper_bound)
445 			continue;
446 		if (rt6_score_route(sibling, oif, strict) < 0)
447 			break;
448 		match = sibling;
449 		break;
450 	}
451 
452 	return match;
453 }
454 
455 /*
456  *	Route lookup. rcu_read_lock() should be held.
457  */
458 
459 static inline struct fib6_info *rt6_device_match(struct net *net,
460 						 struct fib6_info *rt,
461 						    const struct in6_addr *saddr,
462 						    int oif,
463 						    int flags)
464 {
465 	struct fib6_info *sprt;
466 
467 	if (!oif && ipv6_addr_any(saddr) &&
468 	    !(rt->fib6_nh.nh_flags & RTNH_F_DEAD))
469 		return rt;
470 
471 	for (sprt = rt; sprt; sprt = rcu_dereference(sprt->fib6_next)) {
472 		const struct net_device *dev = sprt->fib6_nh.nh_dev;
473 
474 		if (sprt->fib6_nh.nh_flags & RTNH_F_DEAD)
475 			continue;
476 
477 		if (oif) {
478 			if (dev->ifindex == oif)
479 				return sprt;
480 		} else {
481 			if (ipv6_chk_addr(net, saddr, dev,
482 					  flags & RT6_LOOKUP_F_IFACE))
483 				return sprt;
484 		}
485 	}
486 
487 	if (oif && flags & RT6_LOOKUP_F_IFACE)
488 		return net->ipv6.fib6_null_entry;
489 
490 	return rt->fib6_nh.nh_flags & RTNH_F_DEAD ? net->ipv6.fib6_null_entry : rt;
491 }
492 
493 #ifdef CONFIG_IPV6_ROUTER_PREF
494 struct __rt6_probe_work {
495 	struct work_struct work;
496 	struct in6_addr target;
497 	struct net_device *dev;
498 };
499 
500 static void rt6_probe_deferred(struct work_struct *w)
501 {
502 	struct in6_addr mcaddr;
503 	struct __rt6_probe_work *work =
504 		container_of(w, struct __rt6_probe_work, work);
505 
506 	addrconf_addr_solict_mult(&work->target, &mcaddr);
507 	ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
508 	dev_put(work->dev);
509 	kfree(work);
510 }
511 
512 static void rt6_probe(struct fib6_info *rt)
513 {
514 	struct __rt6_probe_work *work;
515 	const struct in6_addr *nh_gw;
516 	struct neighbour *neigh;
517 	struct net_device *dev;
518 
519 	/*
520 	 * Okay, this does not seem to be appropriate
521 	 * for now, however, we need to check if it
522 	 * is really so; aka Router Reachability Probing.
523 	 *
524 	 * Router Reachability Probe MUST be rate-limited
525 	 * to no more than one per minute.
526 	 */
527 	if (!rt || !(rt->fib6_flags & RTF_GATEWAY))
528 		return;
529 
530 	nh_gw = &rt->fib6_nh.nh_gw;
531 	dev = rt->fib6_nh.nh_dev;
532 	rcu_read_lock_bh();
533 	neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
534 	if (neigh) {
535 		struct inet6_dev *idev;
536 
537 		if (neigh->nud_state & NUD_VALID)
538 			goto out;
539 
540 		idev = __in6_dev_get(dev);
541 		work = NULL;
542 		write_lock(&neigh->lock);
543 		if (!(neigh->nud_state & NUD_VALID) &&
544 		    time_after(jiffies,
545 			       neigh->updated + idev->cnf.rtr_probe_interval)) {
546 			work = kmalloc(sizeof(*work), GFP_ATOMIC);
547 			if (work)
548 				__neigh_set_probe_once(neigh);
549 		}
550 		write_unlock(&neigh->lock);
551 	} else {
552 		work = kmalloc(sizeof(*work), GFP_ATOMIC);
553 	}
554 
555 	if (work) {
556 		INIT_WORK(&work->work, rt6_probe_deferred);
557 		work->target = *nh_gw;
558 		dev_hold(dev);
559 		work->dev = dev;
560 		schedule_work(&work->work);
561 	}
562 
563 out:
564 	rcu_read_unlock_bh();
565 }
566 #else
567 static inline void rt6_probe(struct fib6_info *rt)
568 {
569 }
570 #endif
571 
572 /*
573  * Default Router Selection (RFC 2461 6.3.6)
574  */
575 static inline int rt6_check_dev(struct fib6_info *rt, int oif)
576 {
577 	const struct net_device *dev = rt->fib6_nh.nh_dev;
578 
579 	if (!oif || dev->ifindex == oif)
580 		return 2;
581 	return 0;
582 }
583 
584 static inline enum rt6_nud_state rt6_check_neigh(struct fib6_info *rt)
585 {
586 	enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
587 	struct neighbour *neigh;
588 
589 	if (rt->fib6_flags & RTF_NONEXTHOP ||
590 	    !(rt->fib6_flags & RTF_GATEWAY))
591 		return RT6_NUD_SUCCEED;
592 
593 	rcu_read_lock_bh();
594 	neigh = __ipv6_neigh_lookup_noref(rt->fib6_nh.nh_dev,
595 					  &rt->fib6_nh.nh_gw);
596 	if (neigh) {
597 		read_lock(&neigh->lock);
598 		if (neigh->nud_state & NUD_VALID)
599 			ret = RT6_NUD_SUCCEED;
600 #ifdef CONFIG_IPV6_ROUTER_PREF
601 		else if (!(neigh->nud_state & NUD_FAILED))
602 			ret = RT6_NUD_SUCCEED;
603 		else
604 			ret = RT6_NUD_FAIL_PROBE;
605 #endif
606 		read_unlock(&neigh->lock);
607 	} else {
608 		ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
609 		      RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
610 	}
611 	rcu_read_unlock_bh();
612 
613 	return ret;
614 }
615 
616 static int rt6_score_route(struct fib6_info *rt, int oif, int strict)
617 {
618 	int m;
619 
620 	m = rt6_check_dev(rt, oif);
621 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
622 		return RT6_NUD_FAIL_HARD;
623 #ifdef CONFIG_IPV6_ROUTER_PREF
624 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->fib6_flags)) << 2;
625 #endif
626 	if (strict & RT6_LOOKUP_F_REACHABLE) {
627 		int n = rt6_check_neigh(rt);
628 		if (n < 0)
629 			return n;
630 	}
631 	return m;
632 }
633 
634 /* called with rc_read_lock held */
635 static inline bool fib6_ignore_linkdown(const struct fib6_info *f6i)
636 {
637 	const struct net_device *dev = fib6_info_nh_dev(f6i);
638 	bool rc = false;
639 
640 	if (dev) {
641 		const struct inet6_dev *idev = __in6_dev_get(dev);
642 
643 		rc = !!idev->cnf.ignore_routes_with_linkdown;
644 	}
645 
646 	return rc;
647 }
648 
649 static struct fib6_info *find_match(struct fib6_info *rt, int oif, int strict,
650 				   int *mpri, struct fib6_info *match,
651 				   bool *do_rr)
652 {
653 	int m;
654 	bool match_do_rr = false;
655 
656 	if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
657 		goto out;
658 
659 	if (fib6_ignore_linkdown(rt) &&
660 	    rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
661 	    !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
662 		goto out;
663 
664 	if (fib6_check_expired(rt))
665 		goto out;
666 
667 	m = rt6_score_route(rt, oif, strict);
668 	if (m == RT6_NUD_FAIL_DO_RR) {
669 		match_do_rr = true;
670 		m = 0; /* lowest valid score */
671 	} else if (m == RT6_NUD_FAIL_HARD) {
672 		goto out;
673 	}
674 
675 	if (strict & RT6_LOOKUP_F_REACHABLE)
676 		rt6_probe(rt);
677 
678 	/* note that m can be RT6_NUD_FAIL_PROBE at this point */
679 	if (m > *mpri) {
680 		*do_rr = match_do_rr;
681 		*mpri = m;
682 		match = rt;
683 	}
684 out:
685 	return match;
686 }
687 
688 static struct fib6_info *find_rr_leaf(struct fib6_node *fn,
689 				     struct fib6_info *leaf,
690 				     struct fib6_info *rr_head,
691 				     u32 metric, int oif, int strict,
692 				     bool *do_rr)
693 {
694 	struct fib6_info *rt, *match, *cont;
695 	int mpri = -1;
696 
697 	match = NULL;
698 	cont = NULL;
699 	for (rt = rr_head; rt; rt = rcu_dereference(rt->fib6_next)) {
700 		if (rt->fib6_metric != metric) {
701 			cont = rt;
702 			break;
703 		}
704 
705 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
706 	}
707 
708 	for (rt = leaf; rt && rt != rr_head;
709 	     rt = rcu_dereference(rt->fib6_next)) {
710 		if (rt->fib6_metric != metric) {
711 			cont = rt;
712 			break;
713 		}
714 
715 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
716 	}
717 
718 	if (match || !cont)
719 		return match;
720 
721 	for (rt = cont; rt; rt = rcu_dereference(rt->fib6_next))
722 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
723 
724 	return match;
725 }
726 
727 static struct fib6_info *rt6_select(struct net *net, struct fib6_node *fn,
728 				   int oif, int strict)
729 {
730 	struct fib6_info *leaf = rcu_dereference(fn->leaf);
731 	struct fib6_info *match, *rt0;
732 	bool do_rr = false;
733 	int key_plen;
734 
735 	if (!leaf || leaf == net->ipv6.fib6_null_entry)
736 		return net->ipv6.fib6_null_entry;
737 
738 	rt0 = rcu_dereference(fn->rr_ptr);
739 	if (!rt0)
740 		rt0 = leaf;
741 
742 	/* Double check to make sure fn is not an intermediate node
743 	 * and fn->leaf does not points to its child's leaf
744 	 * (This might happen if all routes under fn are deleted from
745 	 * the tree and fib6_repair_tree() is called on the node.)
746 	 */
747 	key_plen = rt0->fib6_dst.plen;
748 #ifdef CONFIG_IPV6_SUBTREES
749 	if (rt0->fib6_src.plen)
750 		key_plen = rt0->fib6_src.plen;
751 #endif
752 	if (fn->fn_bit != key_plen)
753 		return net->ipv6.fib6_null_entry;
754 
755 	match = find_rr_leaf(fn, leaf, rt0, rt0->fib6_metric, oif, strict,
756 			     &do_rr);
757 
758 	if (do_rr) {
759 		struct fib6_info *next = rcu_dereference(rt0->fib6_next);
760 
761 		/* no entries matched; do round-robin */
762 		if (!next || next->fib6_metric != rt0->fib6_metric)
763 			next = leaf;
764 
765 		if (next != rt0) {
766 			spin_lock_bh(&leaf->fib6_table->tb6_lock);
767 			/* make sure next is not being deleted from the tree */
768 			if (next->fib6_node)
769 				rcu_assign_pointer(fn->rr_ptr, next);
770 			spin_unlock_bh(&leaf->fib6_table->tb6_lock);
771 		}
772 	}
773 
774 	return match ? match : net->ipv6.fib6_null_entry;
775 }
776 
777 static bool rt6_is_gw_or_nonexthop(const struct fib6_info *rt)
778 {
779 	return (rt->fib6_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
780 }
781 
782 #ifdef CONFIG_IPV6_ROUTE_INFO
783 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
784 		  const struct in6_addr *gwaddr)
785 {
786 	struct net *net = dev_net(dev);
787 	struct route_info *rinfo = (struct route_info *) opt;
788 	struct in6_addr prefix_buf, *prefix;
789 	unsigned int pref;
790 	unsigned long lifetime;
791 	struct fib6_info *rt;
792 
793 	if (len < sizeof(struct route_info)) {
794 		return -EINVAL;
795 	}
796 
797 	/* Sanity check for prefix_len and length */
798 	if (rinfo->length > 3) {
799 		return -EINVAL;
800 	} else if (rinfo->prefix_len > 128) {
801 		return -EINVAL;
802 	} else if (rinfo->prefix_len > 64) {
803 		if (rinfo->length < 2) {
804 			return -EINVAL;
805 		}
806 	} else if (rinfo->prefix_len > 0) {
807 		if (rinfo->length < 1) {
808 			return -EINVAL;
809 		}
810 	}
811 
812 	pref = rinfo->route_pref;
813 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
814 		return -EINVAL;
815 
816 	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
817 
818 	if (rinfo->length == 3)
819 		prefix = (struct in6_addr *)rinfo->prefix;
820 	else {
821 		/* this function is safe */
822 		ipv6_addr_prefix(&prefix_buf,
823 				 (struct in6_addr *)rinfo->prefix,
824 				 rinfo->prefix_len);
825 		prefix = &prefix_buf;
826 	}
827 
828 	if (rinfo->prefix_len == 0)
829 		rt = rt6_get_dflt_router(net, gwaddr, dev);
830 	else
831 		rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
832 					gwaddr, dev);
833 
834 	if (rt && !lifetime) {
835 		ip6_del_rt(net, rt);
836 		rt = NULL;
837 	}
838 
839 	if (!rt && lifetime)
840 		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
841 					dev, pref);
842 	else if (rt)
843 		rt->fib6_flags = RTF_ROUTEINFO |
844 				 (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
845 
846 	if (rt) {
847 		if (!addrconf_finite_timeout(lifetime))
848 			fib6_clean_expires(rt);
849 		else
850 			fib6_set_expires(rt, jiffies + HZ * lifetime);
851 
852 		fib6_info_release(rt);
853 	}
854 	return 0;
855 }
856 #endif
857 
858 /*
859  *	Misc support functions
860  */
861 
862 /* called with rcu_lock held */
863 static struct net_device *ip6_rt_get_dev_rcu(struct fib6_info *rt)
864 {
865 	struct net_device *dev = rt->fib6_nh.nh_dev;
866 
867 	if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) {
868 		/* for copies of local routes, dst->dev needs to be the
869 		 * device if it is a master device, the master device if
870 		 * device is enslaved, and the loopback as the default
871 		 */
872 		if (netif_is_l3_slave(dev) &&
873 		    !rt6_need_strict(&rt->fib6_dst.addr))
874 			dev = l3mdev_master_dev_rcu(dev);
875 		else if (!netif_is_l3_master(dev))
876 			dev = dev_net(dev)->loopback_dev;
877 		/* last case is netif_is_l3_master(dev) is true in which
878 		 * case we want dev returned to be dev
879 		 */
880 	}
881 
882 	return dev;
883 }
884 
885 static const int fib6_prop[RTN_MAX + 1] = {
886 	[RTN_UNSPEC]	= 0,
887 	[RTN_UNICAST]	= 0,
888 	[RTN_LOCAL]	= 0,
889 	[RTN_BROADCAST]	= 0,
890 	[RTN_ANYCAST]	= 0,
891 	[RTN_MULTICAST]	= 0,
892 	[RTN_BLACKHOLE]	= -EINVAL,
893 	[RTN_UNREACHABLE] = -EHOSTUNREACH,
894 	[RTN_PROHIBIT]	= -EACCES,
895 	[RTN_THROW]	= -EAGAIN,
896 	[RTN_NAT]	= -EINVAL,
897 	[RTN_XRESOLVE]	= -EINVAL,
898 };
899 
900 static int ip6_rt_type_to_error(u8 fib6_type)
901 {
902 	return fib6_prop[fib6_type];
903 }
904 
905 static unsigned short fib6_info_dst_flags(struct fib6_info *rt)
906 {
907 	unsigned short flags = 0;
908 
909 	if (rt->dst_nocount)
910 		flags |= DST_NOCOUNT;
911 	if (rt->dst_nopolicy)
912 		flags |= DST_NOPOLICY;
913 	if (rt->dst_host)
914 		flags |= DST_HOST;
915 
916 	return flags;
917 }
918 
919 static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct fib6_info *ort)
920 {
921 	rt->dst.error = ip6_rt_type_to_error(ort->fib6_type);
922 
923 	switch (ort->fib6_type) {
924 	case RTN_BLACKHOLE:
925 		rt->dst.output = dst_discard_out;
926 		rt->dst.input = dst_discard;
927 		break;
928 	case RTN_PROHIBIT:
929 		rt->dst.output = ip6_pkt_prohibit_out;
930 		rt->dst.input = ip6_pkt_prohibit;
931 		break;
932 	case RTN_THROW:
933 	case RTN_UNREACHABLE:
934 	default:
935 		rt->dst.output = ip6_pkt_discard_out;
936 		rt->dst.input = ip6_pkt_discard;
937 		break;
938 	}
939 }
940 
941 static void ip6_rt_init_dst(struct rt6_info *rt, struct fib6_info *ort)
942 {
943 	rt->dst.flags |= fib6_info_dst_flags(ort);
944 
945 	if (ort->fib6_flags & RTF_REJECT) {
946 		ip6_rt_init_dst_reject(rt, ort);
947 		return;
948 	}
949 
950 	rt->dst.error = 0;
951 	rt->dst.output = ip6_output;
952 
953 	if (ort->fib6_type == RTN_LOCAL) {
954 		rt->dst.input = ip6_input;
955 	} else if (ipv6_addr_type(&ort->fib6_dst.addr) & IPV6_ADDR_MULTICAST) {
956 		rt->dst.input = ip6_mc_input;
957 	} else {
958 		rt->dst.input = ip6_forward;
959 	}
960 
961 	if (ort->fib6_nh.nh_lwtstate) {
962 		rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate);
963 		lwtunnel_set_redirect(&rt->dst);
964 	}
965 
966 	rt->dst.lastuse = jiffies;
967 }
968 
969 static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from)
970 {
971 	rt->rt6i_flags &= ~RTF_EXPIRES;
972 	fib6_info_hold(from);
973 	rcu_assign_pointer(rt->from, from);
974 	dst_init_metrics(&rt->dst, from->fib6_metrics->metrics, true);
975 	if (from->fib6_metrics != &dst_default_metrics) {
976 		rt->dst._metrics |= DST_METRICS_REFCOUNTED;
977 		refcount_inc(&from->fib6_metrics->refcnt);
978 	}
979 }
980 
981 static void ip6_rt_copy_init(struct rt6_info *rt, struct fib6_info *ort)
982 {
983 	struct net_device *dev = fib6_info_nh_dev(ort);
984 
985 	ip6_rt_init_dst(rt, ort);
986 
987 	rt->rt6i_dst = ort->fib6_dst;
988 	rt->rt6i_idev = dev ? in6_dev_get(dev) : NULL;
989 	rt->rt6i_gateway = ort->fib6_nh.nh_gw;
990 	rt->rt6i_flags = ort->fib6_flags;
991 	rt6_set_from(rt, ort);
992 #ifdef CONFIG_IPV6_SUBTREES
993 	rt->rt6i_src = ort->fib6_src;
994 #endif
995 	rt->rt6i_prefsrc = ort->fib6_prefsrc;
996 	rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate);
997 }
998 
999 static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
1000 					struct in6_addr *saddr)
1001 {
1002 	struct fib6_node *pn, *sn;
1003 	while (1) {
1004 		if (fn->fn_flags & RTN_TL_ROOT)
1005 			return NULL;
1006 		pn = rcu_dereference(fn->parent);
1007 		sn = FIB6_SUBTREE(pn);
1008 		if (sn && sn != fn)
1009 			fn = fib6_node_lookup(sn, NULL, saddr);
1010 		else
1011 			fn = pn;
1012 		if (fn->fn_flags & RTN_RTINFO)
1013 			return fn;
1014 	}
1015 }
1016 
1017 static bool ip6_hold_safe(struct net *net, struct rt6_info **prt,
1018 			  bool null_fallback)
1019 {
1020 	struct rt6_info *rt = *prt;
1021 
1022 	if (dst_hold_safe(&rt->dst))
1023 		return true;
1024 	if (null_fallback) {
1025 		rt = net->ipv6.ip6_null_entry;
1026 		dst_hold(&rt->dst);
1027 	} else {
1028 		rt = NULL;
1029 	}
1030 	*prt = rt;
1031 	return false;
1032 }
1033 
1034 /* called with rcu_lock held */
1035 static struct rt6_info *ip6_create_rt_rcu(struct fib6_info *rt)
1036 {
1037 	unsigned short flags = fib6_info_dst_flags(rt);
1038 	struct net_device *dev = rt->fib6_nh.nh_dev;
1039 	struct rt6_info *nrt;
1040 
1041 	nrt = ip6_dst_alloc(dev_net(dev), dev, flags);
1042 	if (nrt)
1043 		ip6_rt_copy_init(nrt, rt);
1044 
1045 	return nrt;
1046 }
1047 
1048 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
1049 					     struct fib6_table *table,
1050 					     struct flowi6 *fl6,
1051 					     const struct sk_buff *skb,
1052 					     int flags)
1053 {
1054 	struct fib6_info *f6i;
1055 	struct fib6_node *fn;
1056 	struct rt6_info *rt;
1057 
1058 	if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1059 		flags &= ~RT6_LOOKUP_F_IFACE;
1060 
1061 	rcu_read_lock();
1062 	fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1063 restart:
1064 	f6i = rcu_dereference(fn->leaf);
1065 	if (!f6i) {
1066 		f6i = net->ipv6.fib6_null_entry;
1067 	} else {
1068 		f6i = rt6_device_match(net, f6i, &fl6->saddr,
1069 				      fl6->flowi6_oif, flags);
1070 		if (f6i->fib6_nsiblings && fl6->flowi6_oif == 0)
1071 			f6i = fib6_multipath_select(net, f6i, fl6,
1072 						    fl6->flowi6_oif, skb,
1073 						    flags);
1074 	}
1075 	if (f6i == net->ipv6.fib6_null_entry) {
1076 		fn = fib6_backtrack(fn, &fl6->saddr);
1077 		if (fn)
1078 			goto restart;
1079 	}
1080 
1081 	trace_fib6_table_lookup(net, f6i, table, fl6);
1082 
1083 	/* Search through exception table */
1084 	rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
1085 	if (rt) {
1086 		if (ip6_hold_safe(net, &rt, true))
1087 			dst_use_noref(&rt->dst, jiffies);
1088 	} else if (f6i == net->ipv6.fib6_null_entry) {
1089 		rt = net->ipv6.ip6_null_entry;
1090 		dst_hold(&rt->dst);
1091 	} else {
1092 		rt = ip6_create_rt_rcu(f6i);
1093 		if (!rt) {
1094 			rt = net->ipv6.ip6_null_entry;
1095 			dst_hold(&rt->dst);
1096 		}
1097 	}
1098 
1099 	rcu_read_unlock();
1100 
1101 	return rt;
1102 }
1103 
1104 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
1105 				   const struct sk_buff *skb, int flags)
1106 {
1107 	return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_lookup);
1108 }
1109 EXPORT_SYMBOL_GPL(ip6_route_lookup);
1110 
1111 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
1112 			    const struct in6_addr *saddr, int oif,
1113 			    const struct sk_buff *skb, int strict)
1114 {
1115 	struct flowi6 fl6 = {
1116 		.flowi6_oif = oif,
1117 		.daddr = *daddr,
1118 	};
1119 	struct dst_entry *dst;
1120 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
1121 
1122 	if (saddr) {
1123 		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
1124 		flags |= RT6_LOOKUP_F_HAS_SADDR;
1125 	}
1126 
1127 	dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup);
1128 	if (dst->error == 0)
1129 		return (struct rt6_info *) dst;
1130 
1131 	dst_release(dst);
1132 
1133 	return NULL;
1134 }
1135 EXPORT_SYMBOL(rt6_lookup);
1136 
1137 /* ip6_ins_rt is called with FREE table->tb6_lock.
1138  * It takes new route entry, the addition fails by any reason the
1139  * route is released.
1140  * Caller must hold dst before calling it.
1141  */
1142 
1143 static int __ip6_ins_rt(struct fib6_info *rt, struct nl_info *info,
1144 			struct netlink_ext_ack *extack)
1145 {
1146 	int err;
1147 	struct fib6_table *table;
1148 
1149 	table = rt->fib6_table;
1150 	spin_lock_bh(&table->tb6_lock);
1151 	err = fib6_add(&table->tb6_root, rt, info, extack);
1152 	spin_unlock_bh(&table->tb6_lock);
1153 
1154 	return err;
1155 }
1156 
1157 int ip6_ins_rt(struct net *net, struct fib6_info *rt)
1158 {
1159 	struct nl_info info = {	.nl_net = net, };
1160 
1161 	return __ip6_ins_rt(rt, &info, NULL);
1162 }
1163 
1164 static struct rt6_info *ip6_rt_cache_alloc(struct fib6_info *ort,
1165 					   const struct in6_addr *daddr,
1166 					   const struct in6_addr *saddr)
1167 {
1168 	struct net_device *dev;
1169 	struct rt6_info *rt;
1170 
1171 	/*
1172 	 *	Clone the route.
1173 	 */
1174 
1175 	dev = ip6_rt_get_dev_rcu(ort);
1176 	rt = ip6_dst_alloc(dev_net(dev), dev, 0);
1177 	if (!rt)
1178 		return NULL;
1179 
1180 	ip6_rt_copy_init(rt, ort);
1181 	rt->rt6i_flags |= RTF_CACHE;
1182 	rt->dst.flags |= DST_HOST;
1183 	rt->rt6i_dst.addr = *daddr;
1184 	rt->rt6i_dst.plen = 128;
1185 
1186 	if (!rt6_is_gw_or_nonexthop(ort)) {
1187 		if (ort->fib6_dst.plen != 128 &&
1188 		    ipv6_addr_equal(&ort->fib6_dst.addr, daddr))
1189 			rt->rt6i_flags |= RTF_ANYCAST;
1190 #ifdef CONFIG_IPV6_SUBTREES
1191 		if (rt->rt6i_src.plen && saddr) {
1192 			rt->rt6i_src.addr = *saddr;
1193 			rt->rt6i_src.plen = 128;
1194 		}
1195 #endif
1196 	}
1197 
1198 	return rt;
1199 }
1200 
1201 static struct rt6_info *ip6_rt_pcpu_alloc(struct fib6_info *rt)
1202 {
1203 	unsigned short flags = fib6_info_dst_flags(rt);
1204 	struct net_device *dev;
1205 	struct rt6_info *pcpu_rt;
1206 
1207 	rcu_read_lock();
1208 	dev = ip6_rt_get_dev_rcu(rt);
1209 	pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags);
1210 	rcu_read_unlock();
1211 	if (!pcpu_rt)
1212 		return NULL;
1213 	ip6_rt_copy_init(pcpu_rt, rt);
1214 	pcpu_rt->rt6i_flags |= RTF_PCPU;
1215 	return pcpu_rt;
1216 }
1217 
1218 /* It should be called with rcu_read_lock() acquired */
1219 static struct rt6_info *rt6_get_pcpu_route(struct fib6_info *rt)
1220 {
1221 	struct rt6_info *pcpu_rt, **p;
1222 
1223 	p = this_cpu_ptr(rt->rt6i_pcpu);
1224 	pcpu_rt = *p;
1225 
1226 	if (pcpu_rt)
1227 		ip6_hold_safe(NULL, &pcpu_rt, false);
1228 
1229 	return pcpu_rt;
1230 }
1231 
1232 static struct rt6_info *rt6_make_pcpu_route(struct net *net,
1233 					    struct fib6_info *rt)
1234 {
1235 	struct rt6_info *pcpu_rt, *prev, **p;
1236 
1237 	pcpu_rt = ip6_rt_pcpu_alloc(rt);
1238 	if (!pcpu_rt) {
1239 		dst_hold(&net->ipv6.ip6_null_entry->dst);
1240 		return net->ipv6.ip6_null_entry;
1241 	}
1242 
1243 	dst_hold(&pcpu_rt->dst);
1244 	p = this_cpu_ptr(rt->rt6i_pcpu);
1245 	prev = cmpxchg(p, NULL, pcpu_rt);
1246 	BUG_ON(prev);
1247 
1248 	return pcpu_rt;
1249 }
1250 
1251 /* exception hash table implementation
1252  */
1253 static DEFINE_SPINLOCK(rt6_exception_lock);
1254 
1255 /* Remove rt6_ex from hash table and free the memory
1256  * Caller must hold rt6_exception_lock
1257  */
1258 static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
1259 				 struct rt6_exception *rt6_ex)
1260 {
1261 	struct net *net;
1262 
1263 	if (!bucket || !rt6_ex)
1264 		return;
1265 
1266 	net = dev_net(rt6_ex->rt6i->dst.dev);
1267 	hlist_del_rcu(&rt6_ex->hlist);
1268 	dst_release(&rt6_ex->rt6i->dst);
1269 	kfree_rcu(rt6_ex, rcu);
1270 	WARN_ON_ONCE(!bucket->depth);
1271 	bucket->depth--;
1272 	net->ipv6.rt6_stats->fib_rt_cache--;
1273 }
1274 
1275 /* Remove oldest rt6_ex in bucket and free the memory
1276  * Caller must hold rt6_exception_lock
1277  */
1278 static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
1279 {
1280 	struct rt6_exception *rt6_ex, *oldest = NULL;
1281 
1282 	if (!bucket)
1283 		return;
1284 
1285 	hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1286 		if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
1287 			oldest = rt6_ex;
1288 	}
1289 	rt6_remove_exception(bucket, oldest);
1290 }
1291 
1292 static u32 rt6_exception_hash(const struct in6_addr *dst,
1293 			      const struct in6_addr *src)
1294 {
1295 	static u32 seed __read_mostly;
1296 	u32 val;
1297 
1298 	net_get_random_once(&seed, sizeof(seed));
1299 	val = jhash(dst, sizeof(*dst), seed);
1300 
1301 #ifdef CONFIG_IPV6_SUBTREES
1302 	if (src)
1303 		val = jhash(src, sizeof(*src), val);
1304 #endif
1305 	return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
1306 }
1307 
1308 /* Helper function to find the cached rt in the hash table
1309  * and update bucket pointer to point to the bucket for this
1310  * (daddr, saddr) pair
1311  * Caller must hold rt6_exception_lock
1312  */
1313 static struct rt6_exception *
1314 __rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
1315 			      const struct in6_addr *daddr,
1316 			      const struct in6_addr *saddr)
1317 {
1318 	struct rt6_exception *rt6_ex;
1319 	u32 hval;
1320 
1321 	if (!(*bucket) || !daddr)
1322 		return NULL;
1323 
1324 	hval = rt6_exception_hash(daddr, saddr);
1325 	*bucket += hval;
1326 
1327 	hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
1328 		struct rt6_info *rt6 = rt6_ex->rt6i;
1329 		bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1330 
1331 #ifdef CONFIG_IPV6_SUBTREES
1332 		if (matched && saddr)
1333 			matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1334 #endif
1335 		if (matched)
1336 			return rt6_ex;
1337 	}
1338 	return NULL;
1339 }
1340 
1341 /* Helper function to find the cached rt in the hash table
1342  * and update bucket pointer to point to the bucket for this
1343  * (daddr, saddr) pair
1344  * Caller must hold rcu_read_lock()
1345  */
1346 static struct rt6_exception *
1347 __rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
1348 			 const struct in6_addr *daddr,
1349 			 const struct in6_addr *saddr)
1350 {
1351 	struct rt6_exception *rt6_ex;
1352 	u32 hval;
1353 
1354 	WARN_ON_ONCE(!rcu_read_lock_held());
1355 
1356 	if (!(*bucket) || !daddr)
1357 		return NULL;
1358 
1359 	hval = rt6_exception_hash(daddr, saddr);
1360 	*bucket += hval;
1361 
1362 	hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
1363 		struct rt6_info *rt6 = rt6_ex->rt6i;
1364 		bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1365 
1366 #ifdef CONFIG_IPV6_SUBTREES
1367 		if (matched && saddr)
1368 			matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1369 #endif
1370 		if (matched)
1371 			return rt6_ex;
1372 	}
1373 	return NULL;
1374 }
1375 
1376 static unsigned int fib6_mtu(const struct fib6_info *rt)
1377 {
1378 	unsigned int mtu;
1379 
1380 	if (rt->fib6_pmtu) {
1381 		mtu = rt->fib6_pmtu;
1382 	} else {
1383 		struct net_device *dev = fib6_info_nh_dev(rt);
1384 		struct inet6_dev *idev;
1385 
1386 		rcu_read_lock();
1387 		idev = __in6_dev_get(dev);
1388 		mtu = idev->cnf.mtu6;
1389 		rcu_read_unlock();
1390 	}
1391 
1392 	mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1393 
1394 	return mtu - lwtunnel_headroom(rt->fib6_nh.nh_lwtstate, mtu);
1395 }
1396 
1397 static int rt6_insert_exception(struct rt6_info *nrt,
1398 				struct fib6_info *ort)
1399 {
1400 	struct net *net = dev_net(nrt->dst.dev);
1401 	struct rt6_exception_bucket *bucket;
1402 	struct in6_addr *src_key = NULL;
1403 	struct rt6_exception *rt6_ex;
1404 	int err = 0;
1405 
1406 	spin_lock_bh(&rt6_exception_lock);
1407 
1408 	if (ort->exception_bucket_flushed) {
1409 		err = -EINVAL;
1410 		goto out;
1411 	}
1412 
1413 	bucket = rcu_dereference_protected(ort->rt6i_exception_bucket,
1414 					lockdep_is_held(&rt6_exception_lock));
1415 	if (!bucket) {
1416 		bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
1417 				 GFP_ATOMIC);
1418 		if (!bucket) {
1419 			err = -ENOMEM;
1420 			goto out;
1421 		}
1422 		rcu_assign_pointer(ort->rt6i_exception_bucket, bucket);
1423 	}
1424 
1425 #ifdef CONFIG_IPV6_SUBTREES
1426 	/* rt6i_src.plen != 0 indicates ort is in subtree
1427 	 * and exception table is indexed by a hash of
1428 	 * both rt6i_dst and rt6i_src.
1429 	 * Otherwise, the exception table is indexed by
1430 	 * a hash of only rt6i_dst.
1431 	 */
1432 	if (ort->fib6_src.plen)
1433 		src_key = &nrt->rt6i_src.addr;
1434 #endif
1435 
1436 	/* Update rt6i_prefsrc as it could be changed
1437 	 * in rt6_remove_prefsrc()
1438 	 */
1439 	nrt->rt6i_prefsrc = ort->fib6_prefsrc;
1440 	/* rt6_mtu_change() might lower mtu on ort.
1441 	 * Only insert this exception route if its mtu
1442 	 * is less than ort's mtu value.
1443 	 */
1444 	if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(ort)) {
1445 		err = -EINVAL;
1446 		goto out;
1447 	}
1448 
1449 	rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
1450 					       src_key);
1451 	if (rt6_ex)
1452 		rt6_remove_exception(bucket, rt6_ex);
1453 
1454 	rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
1455 	if (!rt6_ex) {
1456 		err = -ENOMEM;
1457 		goto out;
1458 	}
1459 	rt6_ex->rt6i = nrt;
1460 	rt6_ex->stamp = jiffies;
1461 	hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
1462 	bucket->depth++;
1463 	net->ipv6.rt6_stats->fib_rt_cache++;
1464 
1465 	if (bucket->depth > FIB6_MAX_DEPTH)
1466 		rt6_exception_remove_oldest(bucket);
1467 
1468 out:
1469 	spin_unlock_bh(&rt6_exception_lock);
1470 
1471 	/* Update fn->fn_sernum to invalidate all cached dst */
1472 	if (!err) {
1473 		spin_lock_bh(&ort->fib6_table->tb6_lock);
1474 		fib6_update_sernum(net, ort);
1475 		spin_unlock_bh(&ort->fib6_table->tb6_lock);
1476 		fib6_force_start_gc(net);
1477 	}
1478 
1479 	return err;
1480 }
1481 
1482 void rt6_flush_exceptions(struct fib6_info *rt)
1483 {
1484 	struct rt6_exception_bucket *bucket;
1485 	struct rt6_exception *rt6_ex;
1486 	struct hlist_node *tmp;
1487 	int i;
1488 
1489 	spin_lock_bh(&rt6_exception_lock);
1490 	/* Prevent rt6_insert_exception() to recreate the bucket list */
1491 	rt->exception_bucket_flushed = 1;
1492 
1493 	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1494 				    lockdep_is_held(&rt6_exception_lock));
1495 	if (!bucket)
1496 		goto out;
1497 
1498 	for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1499 		hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist)
1500 			rt6_remove_exception(bucket, rt6_ex);
1501 		WARN_ON_ONCE(bucket->depth);
1502 		bucket++;
1503 	}
1504 
1505 out:
1506 	spin_unlock_bh(&rt6_exception_lock);
1507 }
1508 
1509 /* Find cached rt in the hash table inside passed in rt
1510  * Caller has to hold rcu_read_lock()
1511  */
1512 static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
1513 					   struct in6_addr *daddr,
1514 					   struct in6_addr *saddr)
1515 {
1516 	struct rt6_exception_bucket *bucket;
1517 	struct in6_addr *src_key = NULL;
1518 	struct rt6_exception *rt6_ex;
1519 	struct rt6_info *res = NULL;
1520 
1521 	bucket = rcu_dereference(rt->rt6i_exception_bucket);
1522 
1523 #ifdef CONFIG_IPV6_SUBTREES
1524 	/* rt6i_src.plen != 0 indicates rt is in subtree
1525 	 * and exception table is indexed by a hash of
1526 	 * both rt6i_dst and rt6i_src.
1527 	 * Otherwise, the exception table is indexed by
1528 	 * a hash of only rt6i_dst.
1529 	 */
1530 	if (rt->fib6_src.plen)
1531 		src_key = saddr;
1532 #endif
1533 	rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
1534 
1535 	if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
1536 		res = rt6_ex->rt6i;
1537 
1538 	return res;
1539 }
1540 
1541 /* Remove the passed in cached rt from the hash table that contains it */
1542 static int rt6_remove_exception_rt(struct rt6_info *rt)
1543 {
1544 	struct rt6_exception_bucket *bucket;
1545 	struct in6_addr *src_key = NULL;
1546 	struct rt6_exception *rt6_ex;
1547 	struct fib6_info *from;
1548 	int err;
1549 
1550 	from = rcu_dereference(rt->from);
1551 	if (!from ||
1552 	    !(rt->rt6i_flags & RTF_CACHE))
1553 		return -EINVAL;
1554 
1555 	if (!rcu_access_pointer(from->rt6i_exception_bucket))
1556 		return -ENOENT;
1557 
1558 	spin_lock_bh(&rt6_exception_lock);
1559 	bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
1560 				    lockdep_is_held(&rt6_exception_lock));
1561 #ifdef CONFIG_IPV6_SUBTREES
1562 	/* rt6i_src.plen != 0 indicates 'from' is in subtree
1563 	 * and exception table is indexed by a hash of
1564 	 * both rt6i_dst and rt6i_src.
1565 	 * Otherwise, the exception table is indexed by
1566 	 * a hash of only rt6i_dst.
1567 	 */
1568 	if (from->fib6_src.plen)
1569 		src_key = &rt->rt6i_src.addr;
1570 #endif
1571 	rt6_ex = __rt6_find_exception_spinlock(&bucket,
1572 					       &rt->rt6i_dst.addr,
1573 					       src_key);
1574 	if (rt6_ex) {
1575 		rt6_remove_exception(bucket, rt6_ex);
1576 		err = 0;
1577 	} else {
1578 		err = -ENOENT;
1579 	}
1580 
1581 	spin_unlock_bh(&rt6_exception_lock);
1582 	return err;
1583 }
1584 
1585 /* Find rt6_ex which contains the passed in rt cache and
1586  * refresh its stamp
1587  */
1588 static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
1589 {
1590 	struct rt6_exception_bucket *bucket;
1591 	struct fib6_info *from = rt->from;
1592 	struct in6_addr *src_key = NULL;
1593 	struct rt6_exception *rt6_ex;
1594 
1595 	if (!from ||
1596 	    !(rt->rt6i_flags & RTF_CACHE))
1597 		return;
1598 
1599 	rcu_read_lock();
1600 	bucket = rcu_dereference(from->rt6i_exception_bucket);
1601 
1602 #ifdef CONFIG_IPV6_SUBTREES
1603 	/* rt6i_src.plen != 0 indicates 'from' is in subtree
1604 	 * and exception table is indexed by a hash of
1605 	 * both rt6i_dst and rt6i_src.
1606 	 * Otherwise, the exception table is indexed by
1607 	 * a hash of only rt6i_dst.
1608 	 */
1609 	if (from->fib6_src.plen)
1610 		src_key = &rt->rt6i_src.addr;
1611 #endif
1612 	rt6_ex = __rt6_find_exception_rcu(&bucket,
1613 					  &rt->rt6i_dst.addr,
1614 					  src_key);
1615 	if (rt6_ex)
1616 		rt6_ex->stamp = jiffies;
1617 
1618 	rcu_read_unlock();
1619 }
1620 
1621 static void rt6_exceptions_remove_prefsrc(struct fib6_info *rt)
1622 {
1623 	struct rt6_exception_bucket *bucket;
1624 	struct rt6_exception *rt6_ex;
1625 	int i;
1626 
1627 	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1628 					lockdep_is_held(&rt6_exception_lock));
1629 
1630 	if (bucket) {
1631 		for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1632 			hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1633 				rt6_ex->rt6i->rt6i_prefsrc.plen = 0;
1634 			}
1635 			bucket++;
1636 		}
1637 	}
1638 }
1639 
1640 static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
1641 					 struct rt6_info *rt, int mtu)
1642 {
1643 	/* If the new MTU is lower than the route PMTU, this new MTU will be the
1644 	 * lowest MTU in the path: always allow updating the route PMTU to
1645 	 * reflect PMTU decreases.
1646 	 *
1647 	 * If the new MTU is higher, and the route PMTU is equal to the local
1648 	 * MTU, this means the old MTU is the lowest in the path, so allow
1649 	 * updating it: if other nodes now have lower MTUs, PMTU discovery will
1650 	 * handle this.
1651 	 */
1652 
1653 	if (dst_mtu(&rt->dst) >= mtu)
1654 		return true;
1655 
1656 	if (dst_mtu(&rt->dst) == idev->cnf.mtu6)
1657 		return true;
1658 
1659 	return false;
1660 }
1661 
1662 static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
1663 				       struct fib6_info *rt, int mtu)
1664 {
1665 	struct rt6_exception_bucket *bucket;
1666 	struct rt6_exception *rt6_ex;
1667 	int i;
1668 
1669 	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1670 					lockdep_is_held(&rt6_exception_lock));
1671 
1672 	if (!bucket)
1673 		return;
1674 
1675 	for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1676 		hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1677 			struct rt6_info *entry = rt6_ex->rt6i;
1678 
1679 			/* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected
1680 			 * route), the metrics of its rt->from have already
1681 			 * been updated.
1682 			 */
1683 			if (dst_metric_raw(&entry->dst, RTAX_MTU) &&
1684 			    rt6_mtu_change_route_allowed(idev, entry, mtu))
1685 				dst_metric_set(&entry->dst, RTAX_MTU, mtu);
1686 		}
1687 		bucket++;
1688 	}
1689 }
1690 
1691 #define RTF_CACHE_GATEWAY	(RTF_GATEWAY | RTF_CACHE)
1692 
1693 static void rt6_exceptions_clean_tohost(struct fib6_info *rt,
1694 					struct in6_addr *gateway)
1695 {
1696 	struct rt6_exception_bucket *bucket;
1697 	struct rt6_exception *rt6_ex;
1698 	struct hlist_node *tmp;
1699 	int i;
1700 
1701 	if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1702 		return;
1703 
1704 	spin_lock_bh(&rt6_exception_lock);
1705 	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1706 				     lockdep_is_held(&rt6_exception_lock));
1707 
1708 	if (bucket) {
1709 		for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1710 			hlist_for_each_entry_safe(rt6_ex, tmp,
1711 						  &bucket->chain, hlist) {
1712 				struct rt6_info *entry = rt6_ex->rt6i;
1713 
1714 				if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
1715 				    RTF_CACHE_GATEWAY &&
1716 				    ipv6_addr_equal(gateway,
1717 						    &entry->rt6i_gateway)) {
1718 					rt6_remove_exception(bucket, rt6_ex);
1719 				}
1720 			}
1721 			bucket++;
1722 		}
1723 	}
1724 
1725 	spin_unlock_bh(&rt6_exception_lock);
1726 }
1727 
1728 static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
1729 				      struct rt6_exception *rt6_ex,
1730 				      struct fib6_gc_args *gc_args,
1731 				      unsigned long now)
1732 {
1733 	struct rt6_info *rt = rt6_ex->rt6i;
1734 
1735 	/* we are pruning and obsoleting aged-out and non gateway exceptions
1736 	 * even if others have still references to them, so that on next
1737 	 * dst_check() such references can be dropped.
1738 	 * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when
1739 	 * expired, independently from their aging, as per RFC 8201 section 4
1740 	 */
1741 	if (!(rt->rt6i_flags & RTF_EXPIRES)) {
1742 		if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
1743 			RT6_TRACE("aging clone %p\n", rt);
1744 			rt6_remove_exception(bucket, rt6_ex);
1745 			return;
1746 		}
1747 	} else if (time_after(jiffies, rt->dst.expires)) {
1748 		RT6_TRACE("purging expired route %p\n", rt);
1749 		rt6_remove_exception(bucket, rt6_ex);
1750 		return;
1751 	}
1752 
1753 	if (rt->rt6i_flags & RTF_GATEWAY) {
1754 		struct neighbour *neigh;
1755 		__u8 neigh_flags = 0;
1756 
1757 		neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
1758 		if (neigh)
1759 			neigh_flags = neigh->flags;
1760 
1761 		if (!(neigh_flags & NTF_ROUTER)) {
1762 			RT6_TRACE("purging route %p via non-router but gateway\n",
1763 				  rt);
1764 			rt6_remove_exception(bucket, rt6_ex);
1765 			return;
1766 		}
1767 	}
1768 
1769 	gc_args->more++;
1770 }
1771 
1772 void rt6_age_exceptions(struct fib6_info *rt,
1773 			struct fib6_gc_args *gc_args,
1774 			unsigned long now)
1775 {
1776 	struct rt6_exception_bucket *bucket;
1777 	struct rt6_exception *rt6_ex;
1778 	struct hlist_node *tmp;
1779 	int i;
1780 
1781 	if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1782 		return;
1783 
1784 	rcu_read_lock_bh();
1785 	spin_lock(&rt6_exception_lock);
1786 	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1787 				    lockdep_is_held(&rt6_exception_lock));
1788 
1789 	if (bucket) {
1790 		for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1791 			hlist_for_each_entry_safe(rt6_ex, tmp,
1792 						  &bucket->chain, hlist) {
1793 				rt6_age_examine_exception(bucket, rt6_ex,
1794 							  gc_args, now);
1795 			}
1796 			bucket++;
1797 		}
1798 	}
1799 	spin_unlock(&rt6_exception_lock);
1800 	rcu_read_unlock_bh();
1801 }
1802 
1803 /* must be called with rcu lock held */
1804 struct fib6_info *fib6_table_lookup(struct net *net, struct fib6_table *table,
1805 				    int oif, struct flowi6 *fl6, int strict)
1806 {
1807 	struct fib6_node *fn, *saved_fn;
1808 	struct fib6_info *f6i;
1809 
1810 	fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1811 	saved_fn = fn;
1812 
1813 	if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1814 		oif = 0;
1815 
1816 redo_rt6_select:
1817 	f6i = rt6_select(net, fn, oif, strict);
1818 	if (f6i == net->ipv6.fib6_null_entry) {
1819 		fn = fib6_backtrack(fn, &fl6->saddr);
1820 		if (fn)
1821 			goto redo_rt6_select;
1822 		else if (strict & RT6_LOOKUP_F_REACHABLE) {
1823 			/* also consider unreachable route */
1824 			strict &= ~RT6_LOOKUP_F_REACHABLE;
1825 			fn = saved_fn;
1826 			goto redo_rt6_select;
1827 		}
1828 	}
1829 
1830 	trace_fib6_table_lookup(net, f6i, table, fl6);
1831 
1832 	return f6i;
1833 }
1834 
1835 struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1836 			       int oif, struct flowi6 *fl6,
1837 			       const struct sk_buff *skb, int flags)
1838 {
1839 	struct fib6_info *f6i;
1840 	struct rt6_info *rt;
1841 	int strict = 0;
1842 
1843 	strict |= flags & RT6_LOOKUP_F_IFACE;
1844 	strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
1845 	if (net->ipv6.devconf_all->forwarding == 0)
1846 		strict |= RT6_LOOKUP_F_REACHABLE;
1847 
1848 	rcu_read_lock();
1849 
1850 	f6i = fib6_table_lookup(net, table, oif, fl6, strict);
1851 	if (f6i->fib6_nsiblings)
1852 		f6i = fib6_multipath_select(net, f6i, fl6, oif, skb, strict);
1853 
1854 	if (f6i == net->ipv6.fib6_null_entry) {
1855 		rt = net->ipv6.ip6_null_entry;
1856 		rcu_read_unlock();
1857 		dst_hold(&rt->dst);
1858 		return rt;
1859 	}
1860 
1861 	/*Search through exception table */
1862 	rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
1863 	if (rt) {
1864 		if (ip6_hold_safe(net, &rt, true))
1865 			dst_use_noref(&rt->dst, jiffies);
1866 
1867 		rcu_read_unlock();
1868 		return rt;
1869 	} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1870 			    !(f6i->fib6_flags & RTF_GATEWAY))) {
1871 		/* Create a RTF_CACHE clone which will not be
1872 		 * owned by the fib6 tree.  It is for the special case where
1873 		 * the daddr in the skb during the neighbor look-up is different
1874 		 * from the fl6->daddr used to look-up route here.
1875 		 */
1876 		struct rt6_info *uncached_rt;
1877 
1878 		uncached_rt = ip6_rt_cache_alloc(f6i, &fl6->daddr, NULL);
1879 
1880 		rcu_read_unlock();
1881 
1882 		if (uncached_rt) {
1883 			/* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
1884 			 * No need for another dst_hold()
1885 			 */
1886 			rt6_uncached_list_add(uncached_rt);
1887 			atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1888 		} else {
1889 			uncached_rt = net->ipv6.ip6_null_entry;
1890 			dst_hold(&uncached_rt->dst);
1891 		}
1892 
1893 		return uncached_rt;
1894 	} else {
1895 		/* Get a percpu copy */
1896 
1897 		struct rt6_info *pcpu_rt;
1898 
1899 		local_bh_disable();
1900 		pcpu_rt = rt6_get_pcpu_route(f6i);
1901 
1902 		if (!pcpu_rt)
1903 			pcpu_rt = rt6_make_pcpu_route(net, f6i);
1904 
1905 		local_bh_enable();
1906 		rcu_read_unlock();
1907 
1908 		return pcpu_rt;
1909 	}
1910 }
1911 EXPORT_SYMBOL_GPL(ip6_pol_route);
1912 
1913 static struct rt6_info *ip6_pol_route_input(struct net *net,
1914 					    struct fib6_table *table,
1915 					    struct flowi6 *fl6,
1916 					    const struct sk_buff *skb,
1917 					    int flags)
1918 {
1919 	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, skb, flags);
1920 }
1921 
1922 struct dst_entry *ip6_route_input_lookup(struct net *net,
1923 					 struct net_device *dev,
1924 					 struct flowi6 *fl6,
1925 					 const struct sk_buff *skb,
1926 					 int flags)
1927 {
1928 	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1929 		flags |= RT6_LOOKUP_F_IFACE;
1930 
1931 	return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_input);
1932 }
1933 EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
1934 
1935 static void ip6_multipath_l3_keys(const struct sk_buff *skb,
1936 				  struct flow_keys *keys,
1937 				  struct flow_keys *flkeys)
1938 {
1939 	const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
1940 	const struct ipv6hdr *key_iph = outer_iph;
1941 	struct flow_keys *_flkeys = flkeys;
1942 	const struct ipv6hdr *inner_iph;
1943 	const struct icmp6hdr *icmph;
1944 	struct ipv6hdr _inner_iph;
1945 	struct icmp6hdr _icmph;
1946 
1947 	if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
1948 		goto out;
1949 
1950 	icmph = skb_header_pointer(skb, skb_transport_offset(skb),
1951 				   sizeof(_icmph), &_icmph);
1952 	if (!icmph)
1953 		goto out;
1954 
1955 	if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
1956 	    icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
1957 	    icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
1958 	    icmph->icmp6_type != ICMPV6_PARAMPROB)
1959 		goto out;
1960 
1961 	inner_iph = skb_header_pointer(skb,
1962 				       skb_transport_offset(skb) + sizeof(*icmph),
1963 				       sizeof(_inner_iph), &_inner_iph);
1964 	if (!inner_iph)
1965 		goto out;
1966 
1967 	key_iph = inner_iph;
1968 	_flkeys = NULL;
1969 out:
1970 	if (_flkeys) {
1971 		keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src;
1972 		keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst;
1973 		keys->tags.flow_label = _flkeys->tags.flow_label;
1974 		keys->basic.ip_proto = _flkeys->basic.ip_proto;
1975 	} else {
1976 		keys->addrs.v6addrs.src = key_iph->saddr;
1977 		keys->addrs.v6addrs.dst = key_iph->daddr;
1978 		keys->tags.flow_label = ip6_flowinfo(key_iph);
1979 		keys->basic.ip_proto = key_iph->nexthdr;
1980 	}
1981 }
1982 
1983 /* if skb is set it will be used and fl6 can be NULL */
1984 u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
1985 		       const struct sk_buff *skb, struct flow_keys *flkeys)
1986 {
1987 	struct flow_keys hash_keys;
1988 	u32 mhash;
1989 
1990 	switch (ip6_multipath_hash_policy(net)) {
1991 	case 0:
1992 		memset(&hash_keys, 0, sizeof(hash_keys));
1993 		hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1994 		if (skb) {
1995 			ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
1996 		} else {
1997 			hash_keys.addrs.v6addrs.src = fl6->saddr;
1998 			hash_keys.addrs.v6addrs.dst = fl6->daddr;
1999 			hash_keys.tags.flow_label = (__force u32)fl6->flowlabel;
2000 			hash_keys.basic.ip_proto = fl6->flowi6_proto;
2001 		}
2002 		break;
2003 	case 1:
2004 		if (skb) {
2005 			unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
2006 			struct flow_keys keys;
2007 
2008 			/* short-circuit if we already have L4 hash present */
2009 			if (skb->l4_hash)
2010 				return skb_get_hash_raw(skb) >> 1;
2011 
2012 			memset(&hash_keys, 0, sizeof(hash_keys));
2013 
2014                         if (!flkeys) {
2015 				skb_flow_dissect_flow_keys(skb, &keys, flag);
2016 				flkeys = &keys;
2017 			}
2018 			hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2019 			hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
2020 			hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
2021 			hash_keys.ports.src = flkeys->ports.src;
2022 			hash_keys.ports.dst = flkeys->ports.dst;
2023 			hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
2024 		} else {
2025 			memset(&hash_keys, 0, sizeof(hash_keys));
2026 			hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2027 			hash_keys.addrs.v6addrs.src = fl6->saddr;
2028 			hash_keys.addrs.v6addrs.dst = fl6->daddr;
2029 			hash_keys.ports.src = fl6->fl6_sport;
2030 			hash_keys.ports.dst = fl6->fl6_dport;
2031 			hash_keys.basic.ip_proto = fl6->flowi6_proto;
2032 		}
2033 		break;
2034 	}
2035 	mhash = flow_hash_from_keys(&hash_keys);
2036 
2037 	return mhash >> 1;
2038 }
2039 
2040 void ip6_route_input(struct sk_buff *skb)
2041 {
2042 	const struct ipv6hdr *iph = ipv6_hdr(skb);
2043 	struct net *net = dev_net(skb->dev);
2044 	int flags = RT6_LOOKUP_F_HAS_SADDR;
2045 	struct ip_tunnel_info *tun_info;
2046 	struct flowi6 fl6 = {
2047 		.flowi6_iif = skb->dev->ifindex,
2048 		.daddr = iph->daddr,
2049 		.saddr = iph->saddr,
2050 		.flowlabel = ip6_flowinfo(iph),
2051 		.flowi6_mark = skb->mark,
2052 		.flowi6_proto = iph->nexthdr,
2053 	};
2054 	struct flow_keys *flkeys = NULL, _flkeys;
2055 
2056 	tun_info = skb_tunnel_info(skb);
2057 	if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
2058 		fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
2059 
2060 	if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys))
2061 		flkeys = &_flkeys;
2062 
2063 	if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
2064 		fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
2065 	skb_dst_drop(skb);
2066 	skb_dst_set(skb,
2067 		    ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags));
2068 }
2069 
2070 static struct rt6_info *ip6_pol_route_output(struct net *net,
2071 					     struct fib6_table *table,
2072 					     struct flowi6 *fl6,
2073 					     const struct sk_buff *skb,
2074 					     int flags)
2075 {
2076 	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
2077 }
2078 
2079 struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
2080 					 struct flowi6 *fl6, int flags)
2081 {
2082 	bool any_src;
2083 
2084 	if (rt6_need_strict(&fl6->daddr)) {
2085 		struct dst_entry *dst;
2086 
2087 		dst = l3mdev_link_scope_lookup(net, fl6);
2088 		if (dst)
2089 			return dst;
2090 	}
2091 
2092 	fl6->flowi6_iif = LOOPBACK_IFINDEX;
2093 
2094 	any_src = ipv6_addr_any(&fl6->saddr);
2095 	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
2096 	    (fl6->flowi6_oif && any_src))
2097 		flags |= RT6_LOOKUP_F_IFACE;
2098 
2099 	if (!any_src)
2100 		flags |= RT6_LOOKUP_F_HAS_SADDR;
2101 	else if (sk)
2102 		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
2103 
2104 	return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
2105 }
2106 EXPORT_SYMBOL_GPL(ip6_route_output_flags);
2107 
2108 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
2109 {
2110 	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
2111 	struct net_device *loopback_dev = net->loopback_dev;
2112 	struct dst_entry *new = NULL;
2113 
2114 	rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
2115 		       DST_OBSOLETE_DEAD, 0);
2116 	if (rt) {
2117 		rt6_info_init(rt);
2118 		atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
2119 
2120 		new = &rt->dst;
2121 		new->__use = 1;
2122 		new->input = dst_discard;
2123 		new->output = dst_discard_out;
2124 
2125 		dst_copy_metrics(new, &ort->dst);
2126 
2127 		rt->rt6i_idev = in6_dev_get(loopback_dev);
2128 		rt->rt6i_gateway = ort->rt6i_gateway;
2129 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
2130 
2131 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
2132 #ifdef CONFIG_IPV6_SUBTREES
2133 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
2134 #endif
2135 	}
2136 
2137 	dst_release(dst_orig);
2138 	return new ? new : ERR_PTR(-ENOMEM);
2139 }
2140 
2141 /*
2142  *	Destination cache support functions
2143  */
2144 
2145 static bool fib6_check(struct fib6_info *f6i, u32 cookie)
2146 {
2147 	u32 rt_cookie = 0;
2148 
2149 	if (!fib6_get_cookie_safe(f6i, &rt_cookie) || rt_cookie != cookie)
2150 		return false;
2151 
2152 	if (fib6_check_expired(f6i))
2153 		return false;
2154 
2155 	return true;
2156 }
2157 
2158 static struct dst_entry *rt6_check(struct rt6_info *rt,
2159 				   struct fib6_info *from,
2160 				   u32 cookie)
2161 {
2162 	u32 rt_cookie = 0;
2163 
2164 	if ((from && !fib6_get_cookie_safe(from, &rt_cookie)) ||
2165 	    rt_cookie != cookie)
2166 		return NULL;
2167 
2168 	if (rt6_check_expired(rt))
2169 		return NULL;
2170 
2171 	return &rt->dst;
2172 }
2173 
2174 static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt,
2175 					    struct fib6_info *from,
2176 					    u32 cookie)
2177 {
2178 	if (!__rt6_check_expired(rt) &&
2179 	    rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
2180 	    fib6_check(from, cookie))
2181 		return &rt->dst;
2182 	else
2183 		return NULL;
2184 }
2185 
2186 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
2187 {
2188 	struct dst_entry *dst_ret;
2189 	struct fib6_info *from;
2190 	struct rt6_info *rt;
2191 
2192 	rt = container_of(dst, struct rt6_info, dst);
2193 
2194 	rcu_read_lock();
2195 
2196 	/* All IPV6 dsts are created with ->obsolete set to the value
2197 	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
2198 	 * into this function always.
2199 	 */
2200 
2201 	from = rcu_dereference(rt->from);
2202 
2203 	if (from && (rt->rt6i_flags & RTF_PCPU ||
2204 	    unlikely(!list_empty(&rt->rt6i_uncached))))
2205 		dst_ret = rt6_dst_from_check(rt, from, cookie);
2206 	else
2207 		dst_ret = rt6_check(rt, from, cookie);
2208 
2209 	rcu_read_unlock();
2210 
2211 	return dst_ret;
2212 }
2213 
2214 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
2215 {
2216 	struct rt6_info *rt = (struct rt6_info *) dst;
2217 
2218 	if (rt) {
2219 		if (rt->rt6i_flags & RTF_CACHE) {
2220 			rcu_read_lock();
2221 			if (rt6_check_expired(rt)) {
2222 				rt6_remove_exception_rt(rt);
2223 				dst = NULL;
2224 			}
2225 			rcu_read_unlock();
2226 		} else {
2227 			dst_release(dst);
2228 			dst = NULL;
2229 		}
2230 	}
2231 	return dst;
2232 }
2233 
2234 static void ip6_link_failure(struct sk_buff *skb)
2235 {
2236 	struct rt6_info *rt;
2237 
2238 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
2239 
2240 	rt = (struct rt6_info *) skb_dst(skb);
2241 	if (rt) {
2242 		rcu_read_lock();
2243 		if (rt->rt6i_flags & RTF_CACHE) {
2244 			if (dst_hold_safe(&rt->dst))
2245 				rt6_remove_exception_rt(rt);
2246 		} else {
2247 			struct fib6_info *from;
2248 			struct fib6_node *fn;
2249 
2250 			from = rcu_dereference(rt->from);
2251 			if (from) {
2252 				fn = rcu_dereference(from->fib6_node);
2253 				if (fn && (rt->rt6i_flags & RTF_DEFAULT))
2254 					fn->fn_sernum = -1;
2255 			}
2256 		}
2257 		rcu_read_unlock();
2258 	}
2259 }
2260 
2261 static void rt6_update_expires(struct rt6_info *rt0, int timeout)
2262 {
2263 	if (!(rt0->rt6i_flags & RTF_EXPIRES)) {
2264 		struct fib6_info *from;
2265 
2266 		rcu_read_lock();
2267 		from = rcu_dereference(rt0->from);
2268 		if (from)
2269 			rt0->dst.expires = from->expires;
2270 		rcu_read_unlock();
2271 	}
2272 
2273 	dst_set_expires(&rt0->dst, timeout);
2274 	rt0->rt6i_flags |= RTF_EXPIRES;
2275 }
2276 
2277 static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
2278 {
2279 	struct net *net = dev_net(rt->dst.dev);
2280 
2281 	dst_metric_set(&rt->dst, RTAX_MTU, mtu);
2282 	rt->rt6i_flags |= RTF_MODIFIED;
2283 	rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
2284 }
2285 
2286 static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
2287 {
2288 	bool from_set;
2289 
2290 	rcu_read_lock();
2291 	from_set = !!rcu_dereference(rt->from);
2292 	rcu_read_unlock();
2293 
2294 	return !(rt->rt6i_flags & RTF_CACHE) &&
2295 		(rt->rt6i_flags & RTF_PCPU || from_set);
2296 }
2297 
2298 static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
2299 				 const struct ipv6hdr *iph, u32 mtu)
2300 {
2301 	const struct in6_addr *daddr, *saddr;
2302 	struct rt6_info *rt6 = (struct rt6_info *)dst;
2303 
2304 	if (rt6->rt6i_flags & RTF_LOCAL)
2305 		return;
2306 
2307 	if (dst_metric_locked(dst, RTAX_MTU))
2308 		return;
2309 
2310 	if (iph) {
2311 		daddr = &iph->daddr;
2312 		saddr = &iph->saddr;
2313 	} else if (sk) {
2314 		daddr = &sk->sk_v6_daddr;
2315 		saddr = &inet6_sk(sk)->saddr;
2316 	} else {
2317 		daddr = NULL;
2318 		saddr = NULL;
2319 	}
2320 	dst_confirm_neigh(dst, daddr);
2321 	mtu = max_t(u32, mtu, IPV6_MIN_MTU);
2322 	if (mtu >= dst_mtu(dst))
2323 		return;
2324 
2325 	if (!rt6_cache_allowed_for_pmtu(rt6)) {
2326 		rt6_do_update_pmtu(rt6, mtu);
2327 		/* update rt6_ex->stamp for cache */
2328 		if (rt6->rt6i_flags & RTF_CACHE)
2329 			rt6_update_exception_stamp_rt(rt6);
2330 	} else if (daddr) {
2331 		struct fib6_info *from;
2332 		struct rt6_info *nrt6;
2333 
2334 		rcu_read_lock();
2335 		from = rcu_dereference(rt6->from);
2336 		nrt6 = ip6_rt_cache_alloc(from, daddr, saddr);
2337 		if (nrt6) {
2338 			rt6_do_update_pmtu(nrt6, mtu);
2339 			if (rt6_insert_exception(nrt6, from))
2340 				dst_release_immediate(&nrt6->dst);
2341 		}
2342 		rcu_read_unlock();
2343 	}
2344 }
2345 
2346 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
2347 			       struct sk_buff *skb, u32 mtu)
2348 {
2349 	__ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
2350 }
2351 
2352 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
2353 		     int oif, u32 mark, kuid_t uid)
2354 {
2355 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2356 	struct dst_entry *dst;
2357 	struct flowi6 fl6;
2358 
2359 	memset(&fl6, 0, sizeof(fl6));
2360 	fl6.flowi6_oif = oif;
2361 	fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
2362 	fl6.daddr = iph->daddr;
2363 	fl6.saddr = iph->saddr;
2364 	fl6.flowlabel = ip6_flowinfo(iph);
2365 	fl6.flowi6_uid = uid;
2366 
2367 	dst = ip6_route_output(net, NULL, &fl6);
2368 	if (!dst->error)
2369 		__ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
2370 	dst_release(dst);
2371 }
2372 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
2373 
2374 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
2375 {
2376 	struct dst_entry *dst;
2377 
2378 	ip6_update_pmtu(skb, sock_net(sk), mtu,
2379 			sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid);
2380 
2381 	dst = __sk_dst_get(sk);
2382 	if (!dst || !dst->obsolete ||
2383 	    dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
2384 		return;
2385 
2386 	bh_lock_sock(sk);
2387 	if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
2388 		ip6_datagram_dst_update(sk, false);
2389 	bh_unlock_sock(sk);
2390 }
2391 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
2392 
2393 void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst,
2394 			   const struct flowi6 *fl6)
2395 {
2396 #ifdef CONFIG_IPV6_SUBTREES
2397 	struct ipv6_pinfo *np = inet6_sk(sk);
2398 #endif
2399 
2400 	ip6_dst_store(sk, dst,
2401 		      ipv6_addr_equal(&fl6->daddr, &sk->sk_v6_daddr) ?
2402 		      &sk->sk_v6_daddr : NULL,
2403 #ifdef CONFIG_IPV6_SUBTREES
2404 		      ipv6_addr_equal(&fl6->saddr, &np->saddr) ?
2405 		      &np->saddr :
2406 #endif
2407 		      NULL);
2408 }
2409 
2410 /* Handle redirects */
2411 struct ip6rd_flowi {
2412 	struct flowi6 fl6;
2413 	struct in6_addr gateway;
2414 };
2415 
2416 static struct rt6_info *__ip6_route_redirect(struct net *net,
2417 					     struct fib6_table *table,
2418 					     struct flowi6 *fl6,
2419 					     const struct sk_buff *skb,
2420 					     int flags)
2421 {
2422 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
2423 	struct rt6_info *ret = NULL, *rt_cache;
2424 	struct fib6_info *rt;
2425 	struct fib6_node *fn;
2426 
2427 	/* Get the "current" route for this destination and
2428 	 * check if the redirect has come from appropriate router.
2429 	 *
2430 	 * RFC 4861 specifies that redirects should only be
2431 	 * accepted if they come from the nexthop to the target.
2432 	 * Due to the way the routes are chosen, this notion
2433 	 * is a bit fuzzy and one might need to check all possible
2434 	 * routes.
2435 	 */
2436 
2437 	rcu_read_lock();
2438 	fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
2439 restart:
2440 	for_each_fib6_node_rt_rcu(fn) {
2441 		if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
2442 			continue;
2443 		if (fib6_check_expired(rt))
2444 			continue;
2445 		if (rt->fib6_flags & RTF_REJECT)
2446 			break;
2447 		if (!(rt->fib6_flags & RTF_GATEWAY))
2448 			continue;
2449 		if (fl6->flowi6_oif != rt->fib6_nh.nh_dev->ifindex)
2450 			continue;
2451 		/* rt_cache's gateway might be different from its 'parent'
2452 		 * in the case of an ip redirect.
2453 		 * So we keep searching in the exception table if the gateway
2454 		 * is different.
2455 		 */
2456 		if (!ipv6_addr_equal(&rdfl->gateway, &rt->fib6_nh.nh_gw)) {
2457 			rt_cache = rt6_find_cached_rt(rt,
2458 						      &fl6->daddr,
2459 						      &fl6->saddr);
2460 			if (rt_cache &&
2461 			    ipv6_addr_equal(&rdfl->gateway,
2462 					    &rt_cache->rt6i_gateway)) {
2463 				ret = rt_cache;
2464 				break;
2465 			}
2466 			continue;
2467 		}
2468 		break;
2469 	}
2470 
2471 	if (!rt)
2472 		rt = net->ipv6.fib6_null_entry;
2473 	else if (rt->fib6_flags & RTF_REJECT) {
2474 		ret = net->ipv6.ip6_null_entry;
2475 		goto out;
2476 	}
2477 
2478 	if (rt == net->ipv6.fib6_null_entry) {
2479 		fn = fib6_backtrack(fn, &fl6->saddr);
2480 		if (fn)
2481 			goto restart;
2482 	}
2483 
2484 out:
2485 	if (ret)
2486 		dst_hold(&ret->dst);
2487 	else
2488 		ret = ip6_create_rt_rcu(rt);
2489 
2490 	rcu_read_unlock();
2491 
2492 	trace_fib6_table_lookup(net, rt, table, fl6);
2493 	return ret;
2494 };
2495 
2496 static struct dst_entry *ip6_route_redirect(struct net *net,
2497 					    const struct flowi6 *fl6,
2498 					    const struct sk_buff *skb,
2499 					    const struct in6_addr *gateway)
2500 {
2501 	int flags = RT6_LOOKUP_F_HAS_SADDR;
2502 	struct ip6rd_flowi rdfl;
2503 
2504 	rdfl.fl6 = *fl6;
2505 	rdfl.gateway = *gateway;
2506 
2507 	return fib6_rule_lookup(net, &rdfl.fl6, skb,
2508 				flags, __ip6_route_redirect);
2509 }
2510 
2511 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
2512 		  kuid_t uid)
2513 {
2514 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2515 	struct dst_entry *dst;
2516 	struct flowi6 fl6;
2517 
2518 	memset(&fl6, 0, sizeof(fl6));
2519 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
2520 	fl6.flowi6_oif = oif;
2521 	fl6.flowi6_mark = mark;
2522 	fl6.daddr = iph->daddr;
2523 	fl6.saddr = iph->saddr;
2524 	fl6.flowlabel = ip6_flowinfo(iph);
2525 	fl6.flowi6_uid = uid;
2526 
2527 	dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr);
2528 	rt6_do_redirect(dst, NULL, skb);
2529 	dst_release(dst);
2530 }
2531 EXPORT_SYMBOL_GPL(ip6_redirect);
2532 
2533 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
2534 			    u32 mark)
2535 {
2536 	const struct ipv6hdr *iph = ipv6_hdr(skb);
2537 	const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
2538 	struct dst_entry *dst;
2539 	struct flowi6 fl6;
2540 
2541 	memset(&fl6, 0, sizeof(fl6));
2542 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
2543 	fl6.flowi6_oif = oif;
2544 	fl6.flowi6_mark = mark;
2545 	fl6.daddr = msg->dest;
2546 	fl6.saddr = iph->daddr;
2547 	fl6.flowi6_uid = sock_net_uid(net, NULL);
2548 
2549 	dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr);
2550 	rt6_do_redirect(dst, NULL, skb);
2551 	dst_release(dst);
2552 }
2553 
2554 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
2555 {
2556 	ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
2557 		     sk->sk_uid);
2558 }
2559 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
2560 
2561 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
2562 {
2563 	struct net_device *dev = dst->dev;
2564 	unsigned int mtu = dst_mtu(dst);
2565 	struct net *net = dev_net(dev);
2566 
2567 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
2568 
2569 	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
2570 		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
2571 
2572 	/*
2573 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
2574 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
2575 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
2576 	 * rely only on pmtu discovery"
2577 	 */
2578 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
2579 		mtu = IPV6_MAXPLEN;
2580 	return mtu;
2581 }
2582 
2583 static unsigned int ip6_mtu(const struct dst_entry *dst)
2584 {
2585 	struct inet6_dev *idev;
2586 	unsigned int mtu;
2587 
2588 	mtu = dst_metric_raw(dst, RTAX_MTU);
2589 	if (mtu)
2590 		goto out;
2591 
2592 	mtu = IPV6_MIN_MTU;
2593 
2594 	rcu_read_lock();
2595 	idev = __in6_dev_get(dst->dev);
2596 	if (idev)
2597 		mtu = idev->cnf.mtu6;
2598 	rcu_read_unlock();
2599 
2600 out:
2601 	mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2602 
2603 	return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
2604 }
2605 
2606 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
2607 				  struct flowi6 *fl6)
2608 {
2609 	struct dst_entry *dst;
2610 	struct rt6_info *rt;
2611 	struct inet6_dev *idev = in6_dev_get(dev);
2612 	struct net *net = dev_net(dev);
2613 
2614 	if (unlikely(!idev))
2615 		return ERR_PTR(-ENODEV);
2616 
2617 	rt = ip6_dst_alloc(net, dev, 0);
2618 	if (unlikely(!rt)) {
2619 		in6_dev_put(idev);
2620 		dst = ERR_PTR(-ENOMEM);
2621 		goto out;
2622 	}
2623 
2624 	rt->dst.flags |= DST_HOST;
2625 	rt->dst.input = ip6_input;
2626 	rt->dst.output  = ip6_output;
2627 	rt->rt6i_gateway  = fl6->daddr;
2628 	rt->rt6i_dst.addr = fl6->daddr;
2629 	rt->rt6i_dst.plen = 128;
2630 	rt->rt6i_idev     = idev;
2631 	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
2632 
2633 	/* Add this dst into uncached_list so that rt6_disable_ip() can
2634 	 * do proper release of the net_device
2635 	 */
2636 	rt6_uncached_list_add(rt);
2637 	atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
2638 
2639 	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
2640 
2641 out:
2642 	return dst;
2643 }
2644 
2645 static int ip6_dst_gc(struct dst_ops *ops)
2646 {
2647 	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
2648 	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
2649 	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
2650 	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
2651 	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
2652 	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
2653 	int entries;
2654 
2655 	entries = dst_entries_get_fast(ops);
2656 	if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
2657 	    entries <= rt_max_size)
2658 		goto out;
2659 
2660 	net->ipv6.ip6_rt_gc_expire++;
2661 	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
2662 	entries = dst_entries_get_slow(ops);
2663 	if (entries < ops->gc_thresh)
2664 		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
2665 out:
2666 	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
2667 	return entries > rt_max_size;
2668 }
2669 
2670 static int ip6_convert_metrics(struct net *net, struct fib6_info *rt,
2671 			       struct fib6_config *cfg)
2672 {
2673 	struct dst_metrics *p;
2674 
2675 	if (!cfg->fc_mx)
2676 		return 0;
2677 
2678 	p = kzalloc(sizeof(*rt->fib6_metrics), GFP_KERNEL);
2679 	if (unlikely(!p))
2680 		return -ENOMEM;
2681 
2682 	refcount_set(&p->refcnt, 1);
2683 	rt->fib6_metrics = p;
2684 
2685 	return ip_metrics_convert(net, cfg->fc_mx, cfg->fc_mx_len, p->metrics);
2686 }
2687 
2688 static struct rt6_info *ip6_nh_lookup_table(struct net *net,
2689 					    struct fib6_config *cfg,
2690 					    const struct in6_addr *gw_addr,
2691 					    u32 tbid, int flags)
2692 {
2693 	struct flowi6 fl6 = {
2694 		.flowi6_oif = cfg->fc_ifindex,
2695 		.daddr = *gw_addr,
2696 		.saddr = cfg->fc_prefsrc,
2697 	};
2698 	struct fib6_table *table;
2699 	struct rt6_info *rt;
2700 
2701 	table = fib6_get_table(net, tbid);
2702 	if (!table)
2703 		return NULL;
2704 
2705 	if (!ipv6_addr_any(&cfg->fc_prefsrc))
2706 		flags |= RT6_LOOKUP_F_HAS_SADDR;
2707 
2708 	flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
2709 	rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, NULL, flags);
2710 
2711 	/* if table lookup failed, fall back to full lookup */
2712 	if (rt == net->ipv6.ip6_null_entry) {
2713 		ip6_rt_put(rt);
2714 		rt = NULL;
2715 	}
2716 
2717 	return rt;
2718 }
2719 
2720 static int ip6_route_check_nh_onlink(struct net *net,
2721 				     struct fib6_config *cfg,
2722 				     const struct net_device *dev,
2723 				     struct netlink_ext_ack *extack)
2724 {
2725 	u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
2726 	const struct in6_addr *gw_addr = &cfg->fc_gateway;
2727 	u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT;
2728 	struct rt6_info *grt;
2729 	int err;
2730 
2731 	err = 0;
2732 	grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0);
2733 	if (grt) {
2734 		if (!grt->dst.error &&
2735 		    (grt->rt6i_flags & flags || dev != grt->dst.dev)) {
2736 			NL_SET_ERR_MSG(extack,
2737 				       "Nexthop has invalid gateway or device mismatch");
2738 			err = -EINVAL;
2739 		}
2740 
2741 		ip6_rt_put(grt);
2742 	}
2743 
2744 	return err;
2745 }
2746 
2747 static int ip6_route_check_nh(struct net *net,
2748 			      struct fib6_config *cfg,
2749 			      struct net_device **_dev,
2750 			      struct inet6_dev **idev)
2751 {
2752 	const struct in6_addr *gw_addr = &cfg->fc_gateway;
2753 	struct net_device *dev = _dev ? *_dev : NULL;
2754 	struct rt6_info *grt = NULL;
2755 	int err = -EHOSTUNREACH;
2756 
2757 	if (cfg->fc_table) {
2758 		int flags = RT6_LOOKUP_F_IFACE;
2759 
2760 		grt = ip6_nh_lookup_table(net, cfg, gw_addr,
2761 					  cfg->fc_table, flags);
2762 		if (grt) {
2763 			if (grt->rt6i_flags & RTF_GATEWAY ||
2764 			    (dev && dev != grt->dst.dev)) {
2765 				ip6_rt_put(grt);
2766 				grt = NULL;
2767 			}
2768 		}
2769 	}
2770 
2771 	if (!grt)
2772 		grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, NULL, 1);
2773 
2774 	if (!grt)
2775 		goto out;
2776 
2777 	if (dev) {
2778 		if (dev != grt->dst.dev) {
2779 			ip6_rt_put(grt);
2780 			goto out;
2781 		}
2782 	} else {
2783 		*_dev = dev = grt->dst.dev;
2784 		*idev = grt->rt6i_idev;
2785 		dev_hold(dev);
2786 		in6_dev_hold(grt->rt6i_idev);
2787 	}
2788 
2789 	if (!(grt->rt6i_flags & RTF_GATEWAY))
2790 		err = 0;
2791 
2792 	ip6_rt_put(grt);
2793 
2794 out:
2795 	return err;
2796 }
2797 
2798 static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
2799 			   struct net_device **_dev, struct inet6_dev **idev,
2800 			   struct netlink_ext_ack *extack)
2801 {
2802 	const struct in6_addr *gw_addr = &cfg->fc_gateway;
2803 	int gwa_type = ipv6_addr_type(gw_addr);
2804 	bool skip_dev = gwa_type & IPV6_ADDR_LINKLOCAL ? false : true;
2805 	const struct net_device *dev = *_dev;
2806 	bool need_addr_check = !dev;
2807 	int err = -EINVAL;
2808 
2809 	/* if gw_addr is local we will fail to detect this in case
2810 	 * address is still TENTATIVE (DAD in progress). rt6_lookup()
2811 	 * will return already-added prefix route via interface that
2812 	 * prefix route was assigned to, which might be non-loopback.
2813 	 */
2814 	if (dev &&
2815 	    ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2816 		NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
2817 		goto out;
2818 	}
2819 
2820 	if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) {
2821 		/* IPv6 strictly inhibits using not link-local
2822 		 * addresses as nexthop address.
2823 		 * Otherwise, router will not able to send redirects.
2824 		 * It is very good, but in some (rare!) circumstances
2825 		 * (SIT, PtP, NBMA NOARP links) it is handy to allow
2826 		 * some exceptions. --ANK
2827 		 * We allow IPv4-mapped nexthops to support RFC4798-type
2828 		 * addressing
2829 		 */
2830 		if (!(gwa_type & (IPV6_ADDR_UNICAST | IPV6_ADDR_MAPPED))) {
2831 			NL_SET_ERR_MSG(extack, "Invalid gateway address");
2832 			goto out;
2833 		}
2834 
2835 		if (cfg->fc_flags & RTNH_F_ONLINK)
2836 			err = ip6_route_check_nh_onlink(net, cfg, dev, extack);
2837 		else
2838 			err = ip6_route_check_nh(net, cfg, _dev, idev);
2839 
2840 		if (err)
2841 			goto out;
2842 	}
2843 
2844 	/* reload in case device was changed */
2845 	dev = *_dev;
2846 
2847 	err = -EINVAL;
2848 	if (!dev) {
2849 		NL_SET_ERR_MSG(extack, "Egress device not specified");
2850 		goto out;
2851 	} else if (dev->flags & IFF_LOOPBACK) {
2852 		NL_SET_ERR_MSG(extack,
2853 			       "Egress device can not be loopback device for this route");
2854 		goto out;
2855 	}
2856 
2857 	/* if we did not check gw_addr above, do so now that the
2858 	 * egress device has been resolved.
2859 	 */
2860 	if (need_addr_check &&
2861 	    ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2862 		NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
2863 		goto out;
2864 	}
2865 
2866 	err = 0;
2867 out:
2868 	return err;
2869 }
2870 
2871 static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
2872 					      gfp_t gfp_flags,
2873 					      struct netlink_ext_ack *extack)
2874 {
2875 	struct net *net = cfg->fc_nlinfo.nl_net;
2876 	struct fib6_info *rt = NULL;
2877 	struct net_device *dev = NULL;
2878 	struct inet6_dev *idev = NULL;
2879 	struct fib6_table *table;
2880 	int addr_type;
2881 	int err = -EINVAL;
2882 
2883 	/* RTF_PCPU is an internal flag; can not be set by userspace */
2884 	if (cfg->fc_flags & RTF_PCPU) {
2885 		NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
2886 		goto out;
2887 	}
2888 
2889 	/* RTF_CACHE is an internal flag; can not be set by userspace */
2890 	if (cfg->fc_flags & RTF_CACHE) {
2891 		NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE");
2892 		goto out;
2893 	}
2894 
2895 	if (cfg->fc_type > RTN_MAX) {
2896 		NL_SET_ERR_MSG(extack, "Invalid route type");
2897 		goto out;
2898 	}
2899 
2900 	if (cfg->fc_dst_len > 128) {
2901 		NL_SET_ERR_MSG(extack, "Invalid prefix length");
2902 		goto out;
2903 	}
2904 	if (cfg->fc_src_len > 128) {
2905 		NL_SET_ERR_MSG(extack, "Invalid source address length");
2906 		goto out;
2907 	}
2908 #ifndef CONFIG_IPV6_SUBTREES
2909 	if (cfg->fc_src_len) {
2910 		NL_SET_ERR_MSG(extack,
2911 			       "Specifying source address requires IPV6_SUBTREES to be enabled");
2912 		goto out;
2913 	}
2914 #endif
2915 	if (cfg->fc_ifindex) {
2916 		err = -ENODEV;
2917 		dev = dev_get_by_index(net, cfg->fc_ifindex);
2918 		if (!dev)
2919 			goto out;
2920 		idev = in6_dev_get(dev);
2921 		if (!idev)
2922 			goto out;
2923 	}
2924 
2925 	if (cfg->fc_metric == 0)
2926 		cfg->fc_metric = IP6_RT_PRIO_USER;
2927 
2928 	if (cfg->fc_flags & RTNH_F_ONLINK) {
2929 		if (!dev) {
2930 			NL_SET_ERR_MSG(extack,
2931 				       "Nexthop device required for onlink");
2932 			err = -ENODEV;
2933 			goto out;
2934 		}
2935 
2936 		if (!(dev->flags & IFF_UP)) {
2937 			NL_SET_ERR_MSG(extack, "Nexthop device is not up");
2938 			err = -ENETDOWN;
2939 			goto out;
2940 		}
2941 	}
2942 
2943 	err = -ENOBUFS;
2944 	if (cfg->fc_nlinfo.nlh &&
2945 	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
2946 		table = fib6_get_table(net, cfg->fc_table);
2947 		if (!table) {
2948 			pr_warn("NLM_F_CREATE should be specified when creating new route\n");
2949 			table = fib6_new_table(net, cfg->fc_table);
2950 		}
2951 	} else {
2952 		table = fib6_new_table(net, cfg->fc_table);
2953 	}
2954 
2955 	if (!table)
2956 		goto out;
2957 
2958 	err = -ENOMEM;
2959 	rt = fib6_info_alloc(gfp_flags);
2960 	if (!rt)
2961 		goto out;
2962 
2963 	if (cfg->fc_flags & RTF_ADDRCONF)
2964 		rt->dst_nocount = true;
2965 
2966 	err = ip6_convert_metrics(net, rt, cfg);
2967 	if (err < 0)
2968 		goto out;
2969 
2970 	if (cfg->fc_flags & RTF_EXPIRES)
2971 		fib6_set_expires(rt, jiffies +
2972 				clock_t_to_jiffies(cfg->fc_expires));
2973 	else
2974 		fib6_clean_expires(rt);
2975 
2976 	if (cfg->fc_protocol == RTPROT_UNSPEC)
2977 		cfg->fc_protocol = RTPROT_BOOT;
2978 	rt->fib6_protocol = cfg->fc_protocol;
2979 
2980 	addr_type = ipv6_addr_type(&cfg->fc_dst);
2981 
2982 	if (cfg->fc_encap) {
2983 		struct lwtunnel_state *lwtstate;
2984 
2985 		err = lwtunnel_build_state(cfg->fc_encap_type,
2986 					   cfg->fc_encap, AF_INET6, cfg,
2987 					   &lwtstate, extack);
2988 		if (err)
2989 			goto out;
2990 		rt->fib6_nh.nh_lwtstate = lwtstate_get(lwtstate);
2991 	}
2992 
2993 	ipv6_addr_prefix(&rt->fib6_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
2994 	rt->fib6_dst.plen = cfg->fc_dst_len;
2995 	if (rt->fib6_dst.plen == 128)
2996 		rt->dst_host = true;
2997 
2998 #ifdef CONFIG_IPV6_SUBTREES
2999 	ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len);
3000 	rt->fib6_src.plen = cfg->fc_src_len;
3001 #endif
3002 
3003 	rt->fib6_metric = cfg->fc_metric;
3004 	rt->fib6_nh.nh_weight = 1;
3005 
3006 	rt->fib6_type = cfg->fc_type;
3007 
3008 	/* We cannot add true routes via loopback here,
3009 	   they would result in kernel looping; promote them to reject routes
3010 	 */
3011 	if ((cfg->fc_flags & RTF_REJECT) ||
3012 	    (dev && (dev->flags & IFF_LOOPBACK) &&
3013 	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
3014 	     !(cfg->fc_flags & RTF_LOCAL))) {
3015 		/* hold loopback dev/idev if we haven't done so. */
3016 		if (dev != net->loopback_dev) {
3017 			if (dev) {
3018 				dev_put(dev);
3019 				in6_dev_put(idev);
3020 			}
3021 			dev = net->loopback_dev;
3022 			dev_hold(dev);
3023 			idev = in6_dev_get(dev);
3024 			if (!idev) {
3025 				err = -ENODEV;
3026 				goto out;
3027 			}
3028 		}
3029 		rt->fib6_flags = RTF_REJECT|RTF_NONEXTHOP;
3030 		goto install_route;
3031 	}
3032 
3033 	if (cfg->fc_flags & RTF_GATEWAY) {
3034 		err = ip6_validate_gw(net, cfg, &dev, &idev, extack);
3035 		if (err)
3036 			goto out;
3037 
3038 		rt->fib6_nh.nh_gw = cfg->fc_gateway;
3039 	}
3040 
3041 	err = -ENODEV;
3042 	if (!dev)
3043 		goto out;
3044 
3045 	if (idev->cnf.disable_ipv6) {
3046 		NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device");
3047 		err = -EACCES;
3048 		goto out;
3049 	}
3050 
3051 	if (!(dev->flags & IFF_UP)) {
3052 		NL_SET_ERR_MSG(extack, "Nexthop device is not up");
3053 		err = -ENETDOWN;
3054 		goto out;
3055 	}
3056 
3057 	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
3058 		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
3059 			NL_SET_ERR_MSG(extack, "Invalid source address");
3060 			err = -EINVAL;
3061 			goto out;
3062 		}
3063 		rt->fib6_prefsrc.addr = cfg->fc_prefsrc;
3064 		rt->fib6_prefsrc.plen = 128;
3065 	} else
3066 		rt->fib6_prefsrc.plen = 0;
3067 
3068 	rt->fib6_flags = cfg->fc_flags;
3069 
3070 install_route:
3071 	if (!(rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
3072 	    !netif_carrier_ok(dev))
3073 		rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
3074 	rt->fib6_nh.nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK);
3075 	rt->fib6_nh.nh_dev = dev;
3076 	rt->fib6_table = table;
3077 
3078 	cfg->fc_nlinfo.nl_net = dev_net(dev);
3079 
3080 	if (idev)
3081 		in6_dev_put(idev);
3082 
3083 	return rt;
3084 out:
3085 	if (dev)
3086 		dev_put(dev);
3087 	if (idev)
3088 		in6_dev_put(idev);
3089 
3090 	fib6_info_release(rt);
3091 	return ERR_PTR(err);
3092 }
3093 
3094 int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
3095 		  struct netlink_ext_ack *extack)
3096 {
3097 	struct fib6_info *rt;
3098 	int err;
3099 
3100 	rt = ip6_route_info_create(cfg, gfp_flags, extack);
3101 	if (IS_ERR(rt))
3102 		return PTR_ERR(rt);
3103 
3104 	err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack);
3105 	fib6_info_release(rt);
3106 
3107 	return err;
3108 }
3109 
3110 static int __ip6_del_rt(struct fib6_info *rt, struct nl_info *info)
3111 {
3112 	struct net *net = info->nl_net;
3113 	struct fib6_table *table;
3114 	int err;
3115 
3116 	if (rt == net->ipv6.fib6_null_entry) {
3117 		err = -ENOENT;
3118 		goto out;
3119 	}
3120 
3121 	table = rt->fib6_table;
3122 	spin_lock_bh(&table->tb6_lock);
3123 	err = fib6_del(rt, info);
3124 	spin_unlock_bh(&table->tb6_lock);
3125 
3126 out:
3127 	fib6_info_release(rt);
3128 	return err;
3129 }
3130 
3131 int ip6_del_rt(struct net *net, struct fib6_info *rt)
3132 {
3133 	struct nl_info info = { .nl_net = net };
3134 
3135 	return __ip6_del_rt(rt, &info);
3136 }
3137 
3138 static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg)
3139 {
3140 	struct nl_info *info = &cfg->fc_nlinfo;
3141 	struct net *net = info->nl_net;
3142 	struct sk_buff *skb = NULL;
3143 	struct fib6_table *table;
3144 	int err = -ENOENT;
3145 
3146 	if (rt == net->ipv6.fib6_null_entry)
3147 		goto out_put;
3148 	table = rt->fib6_table;
3149 	spin_lock_bh(&table->tb6_lock);
3150 
3151 	if (rt->fib6_nsiblings && cfg->fc_delete_all_nh) {
3152 		struct fib6_info *sibling, *next_sibling;
3153 
3154 		/* prefer to send a single notification with all hops */
3155 		skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3156 		if (skb) {
3157 			u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3158 
3159 			if (rt6_fill_node(net, skb, rt, NULL,
3160 					  NULL, NULL, 0, RTM_DELROUTE,
3161 					  info->portid, seq, 0) < 0) {
3162 				kfree_skb(skb);
3163 				skb = NULL;
3164 			} else
3165 				info->skip_notify = 1;
3166 		}
3167 
3168 		list_for_each_entry_safe(sibling, next_sibling,
3169 					 &rt->fib6_siblings,
3170 					 fib6_siblings) {
3171 			err = fib6_del(sibling, info);
3172 			if (err)
3173 				goto out_unlock;
3174 		}
3175 	}
3176 
3177 	err = fib6_del(rt, info);
3178 out_unlock:
3179 	spin_unlock_bh(&table->tb6_lock);
3180 out_put:
3181 	fib6_info_release(rt);
3182 
3183 	if (skb) {
3184 		rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
3185 			    info->nlh, gfp_any());
3186 	}
3187 	return err;
3188 }
3189 
3190 static int ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg)
3191 {
3192 	int rc = -ESRCH;
3193 
3194 	if (cfg->fc_ifindex && rt->dst.dev->ifindex != cfg->fc_ifindex)
3195 		goto out;
3196 
3197 	if (cfg->fc_flags & RTF_GATEWAY &&
3198 	    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
3199 		goto out;
3200 	if (dst_hold_safe(&rt->dst))
3201 		rc = rt6_remove_exception_rt(rt);
3202 out:
3203 	return rc;
3204 }
3205 
3206 static int ip6_route_del(struct fib6_config *cfg,
3207 			 struct netlink_ext_ack *extack)
3208 {
3209 	struct rt6_info *rt_cache;
3210 	struct fib6_table *table;
3211 	struct fib6_info *rt;
3212 	struct fib6_node *fn;
3213 	int err = -ESRCH;
3214 
3215 	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
3216 	if (!table) {
3217 		NL_SET_ERR_MSG(extack, "FIB table does not exist");
3218 		return err;
3219 	}
3220 
3221 	rcu_read_lock();
3222 
3223 	fn = fib6_locate(&table->tb6_root,
3224 			 &cfg->fc_dst, cfg->fc_dst_len,
3225 			 &cfg->fc_src, cfg->fc_src_len,
3226 			 !(cfg->fc_flags & RTF_CACHE));
3227 
3228 	if (fn) {
3229 		for_each_fib6_node_rt_rcu(fn) {
3230 			if (cfg->fc_flags & RTF_CACHE) {
3231 				int rc;
3232 
3233 				rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst,
3234 							      &cfg->fc_src);
3235 				if (rt_cache) {
3236 					rc = ip6_del_cached_rt(rt_cache, cfg);
3237 					if (rc != -ESRCH)
3238 						return rc;
3239 				}
3240 				continue;
3241 			}
3242 			if (cfg->fc_ifindex &&
3243 			    (!rt->fib6_nh.nh_dev ||
3244 			     rt->fib6_nh.nh_dev->ifindex != cfg->fc_ifindex))
3245 				continue;
3246 			if (cfg->fc_flags & RTF_GATEWAY &&
3247 			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->fib6_nh.nh_gw))
3248 				continue;
3249 			if (cfg->fc_metric && cfg->fc_metric != rt->fib6_metric)
3250 				continue;
3251 			if (cfg->fc_protocol && cfg->fc_protocol != rt->fib6_protocol)
3252 				continue;
3253 			fib6_info_hold(rt);
3254 			rcu_read_unlock();
3255 
3256 			/* if gateway was specified only delete the one hop */
3257 			if (cfg->fc_flags & RTF_GATEWAY)
3258 				return __ip6_del_rt(rt, &cfg->fc_nlinfo);
3259 
3260 			return __ip6_del_rt_siblings(rt, cfg);
3261 		}
3262 	}
3263 	rcu_read_unlock();
3264 
3265 	return err;
3266 }
3267 
3268 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
3269 {
3270 	struct netevent_redirect netevent;
3271 	struct rt6_info *rt, *nrt = NULL;
3272 	struct ndisc_options ndopts;
3273 	struct inet6_dev *in6_dev;
3274 	struct neighbour *neigh;
3275 	struct fib6_info *from;
3276 	struct rd_msg *msg;
3277 	int optlen, on_link;
3278 	u8 *lladdr;
3279 
3280 	optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
3281 	optlen -= sizeof(*msg);
3282 
3283 	if (optlen < 0) {
3284 		net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
3285 		return;
3286 	}
3287 
3288 	msg = (struct rd_msg *)icmp6_hdr(skb);
3289 
3290 	if (ipv6_addr_is_multicast(&msg->dest)) {
3291 		net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
3292 		return;
3293 	}
3294 
3295 	on_link = 0;
3296 	if (ipv6_addr_equal(&msg->dest, &msg->target)) {
3297 		on_link = 1;
3298 	} else if (ipv6_addr_type(&msg->target) !=
3299 		   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
3300 		net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
3301 		return;
3302 	}
3303 
3304 	in6_dev = __in6_dev_get(skb->dev);
3305 	if (!in6_dev)
3306 		return;
3307 	if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
3308 		return;
3309 
3310 	/* RFC2461 8.1:
3311 	 *	The IP source address of the Redirect MUST be the same as the current
3312 	 *	first-hop router for the specified ICMP Destination Address.
3313 	 */
3314 
3315 	if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
3316 		net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
3317 		return;
3318 	}
3319 
3320 	lladdr = NULL;
3321 	if (ndopts.nd_opts_tgt_lladdr) {
3322 		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
3323 					     skb->dev);
3324 		if (!lladdr) {
3325 			net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
3326 			return;
3327 		}
3328 	}
3329 
3330 	rt = (struct rt6_info *) dst;
3331 	if (rt->rt6i_flags & RTF_REJECT) {
3332 		net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
3333 		return;
3334 	}
3335 
3336 	/* Redirect received -> path was valid.
3337 	 * Look, redirects are sent only in response to data packets,
3338 	 * so that this nexthop apparently is reachable. --ANK
3339 	 */
3340 	dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
3341 
3342 	neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
3343 	if (!neigh)
3344 		return;
3345 
3346 	/*
3347 	 *	We have finally decided to accept it.
3348 	 */
3349 
3350 	ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
3351 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
3352 		     NEIGH_UPDATE_F_OVERRIDE|
3353 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
3354 				     NEIGH_UPDATE_F_ISROUTER)),
3355 		     NDISC_REDIRECT, &ndopts);
3356 
3357 	rcu_read_lock();
3358 	from = rcu_dereference(rt->from);
3359 	fib6_info_hold(from);
3360 	rcu_read_unlock();
3361 
3362 	nrt = ip6_rt_cache_alloc(from, &msg->dest, NULL);
3363 	if (!nrt)
3364 		goto out;
3365 
3366 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
3367 	if (on_link)
3368 		nrt->rt6i_flags &= ~RTF_GATEWAY;
3369 
3370 	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
3371 
3372 	/* No need to remove rt from the exception table if rt is
3373 	 * a cached route because rt6_insert_exception() will
3374 	 * takes care of it
3375 	 */
3376 	if (rt6_insert_exception(nrt, from)) {
3377 		dst_release_immediate(&nrt->dst);
3378 		goto out;
3379 	}
3380 
3381 	netevent.old = &rt->dst;
3382 	netevent.new = &nrt->dst;
3383 	netevent.daddr = &msg->dest;
3384 	netevent.neigh = neigh;
3385 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
3386 
3387 out:
3388 	fib6_info_release(from);
3389 	neigh_release(neigh);
3390 }
3391 
3392 #ifdef CONFIG_IPV6_ROUTE_INFO
3393 static struct fib6_info *rt6_get_route_info(struct net *net,
3394 					   const struct in6_addr *prefix, int prefixlen,
3395 					   const struct in6_addr *gwaddr,
3396 					   struct net_device *dev)
3397 {
3398 	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
3399 	int ifindex = dev->ifindex;
3400 	struct fib6_node *fn;
3401 	struct fib6_info *rt = NULL;
3402 	struct fib6_table *table;
3403 
3404 	table = fib6_get_table(net, tb_id);
3405 	if (!table)
3406 		return NULL;
3407 
3408 	rcu_read_lock();
3409 	fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
3410 	if (!fn)
3411 		goto out;
3412 
3413 	for_each_fib6_node_rt_rcu(fn) {
3414 		if (rt->fib6_nh.nh_dev->ifindex != ifindex)
3415 			continue;
3416 		if ((rt->fib6_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
3417 			continue;
3418 		if (!ipv6_addr_equal(&rt->fib6_nh.nh_gw, gwaddr))
3419 			continue;
3420 		fib6_info_hold(rt);
3421 		break;
3422 	}
3423 out:
3424 	rcu_read_unlock();
3425 	return rt;
3426 }
3427 
3428 static struct fib6_info *rt6_add_route_info(struct net *net,
3429 					   const struct in6_addr *prefix, int prefixlen,
3430 					   const struct in6_addr *gwaddr,
3431 					   struct net_device *dev,
3432 					   unsigned int pref)
3433 {
3434 	struct fib6_config cfg = {
3435 		.fc_metric	= IP6_RT_PRIO_USER,
3436 		.fc_ifindex	= dev->ifindex,
3437 		.fc_dst_len	= prefixlen,
3438 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
3439 				  RTF_UP | RTF_PREF(pref),
3440 		.fc_protocol = RTPROT_RA,
3441 		.fc_type = RTN_UNICAST,
3442 		.fc_nlinfo.portid = 0,
3443 		.fc_nlinfo.nlh = NULL,
3444 		.fc_nlinfo.nl_net = net,
3445 	};
3446 
3447 	cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
3448 	cfg.fc_dst = *prefix;
3449 	cfg.fc_gateway = *gwaddr;
3450 
3451 	/* We should treat it as a default route if prefix length is 0. */
3452 	if (!prefixlen)
3453 		cfg.fc_flags |= RTF_DEFAULT;
3454 
3455 	ip6_route_add(&cfg, GFP_ATOMIC, NULL);
3456 
3457 	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
3458 }
3459 #endif
3460 
3461 struct fib6_info *rt6_get_dflt_router(struct net *net,
3462 				     const struct in6_addr *addr,
3463 				     struct net_device *dev)
3464 {
3465 	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
3466 	struct fib6_info *rt;
3467 	struct fib6_table *table;
3468 
3469 	table = fib6_get_table(net, tb_id);
3470 	if (!table)
3471 		return NULL;
3472 
3473 	rcu_read_lock();
3474 	for_each_fib6_node_rt_rcu(&table->tb6_root) {
3475 		if (dev == rt->fib6_nh.nh_dev &&
3476 		    ((rt->fib6_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
3477 		    ipv6_addr_equal(&rt->fib6_nh.nh_gw, addr))
3478 			break;
3479 	}
3480 	if (rt)
3481 		fib6_info_hold(rt);
3482 	rcu_read_unlock();
3483 	return rt;
3484 }
3485 
3486 struct fib6_info *rt6_add_dflt_router(struct net *net,
3487 				     const struct in6_addr *gwaddr,
3488 				     struct net_device *dev,
3489 				     unsigned int pref)
3490 {
3491 	struct fib6_config cfg = {
3492 		.fc_table	= l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
3493 		.fc_metric	= IP6_RT_PRIO_USER,
3494 		.fc_ifindex	= dev->ifindex,
3495 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
3496 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
3497 		.fc_protocol = RTPROT_RA,
3498 		.fc_type = RTN_UNICAST,
3499 		.fc_nlinfo.portid = 0,
3500 		.fc_nlinfo.nlh = NULL,
3501 		.fc_nlinfo.nl_net = net,
3502 	};
3503 
3504 	cfg.fc_gateway = *gwaddr;
3505 
3506 	if (!ip6_route_add(&cfg, GFP_ATOMIC, NULL)) {
3507 		struct fib6_table *table;
3508 
3509 		table = fib6_get_table(dev_net(dev), cfg.fc_table);
3510 		if (table)
3511 			table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
3512 	}
3513 
3514 	return rt6_get_dflt_router(net, gwaddr, dev);
3515 }
3516 
3517 static void __rt6_purge_dflt_routers(struct net *net,
3518 				     struct fib6_table *table)
3519 {
3520 	struct fib6_info *rt;
3521 
3522 restart:
3523 	rcu_read_lock();
3524 	for_each_fib6_node_rt_rcu(&table->tb6_root) {
3525 		struct net_device *dev = fib6_info_nh_dev(rt);
3526 		struct inet6_dev *idev = dev ? __in6_dev_get(dev) : NULL;
3527 
3528 		if (rt->fib6_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
3529 		    (!idev || idev->cnf.accept_ra != 2)) {
3530 			fib6_info_hold(rt);
3531 			rcu_read_unlock();
3532 			ip6_del_rt(net, rt);
3533 			goto restart;
3534 		}
3535 	}
3536 	rcu_read_unlock();
3537 
3538 	table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
3539 }
3540 
3541 void rt6_purge_dflt_routers(struct net *net)
3542 {
3543 	struct fib6_table *table;
3544 	struct hlist_head *head;
3545 	unsigned int h;
3546 
3547 	rcu_read_lock();
3548 
3549 	for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
3550 		head = &net->ipv6.fib_table_hash[h];
3551 		hlist_for_each_entry_rcu(table, head, tb6_hlist) {
3552 			if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
3553 				__rt6_purge_dflt_routers(net, table);
3554 		}
3555 	}
3556 
3557 	rcu_read_unlock();
3558 }
3559 
3560 static void rtmsg_to_fib6_config(struct net *net,
3561 				 struct in6_rtmsg *rtmsg,
3562 				 struct fib6_config *cfg)
3563 {
3564 	memset(cfg, 0, sizeof(*cfg));
3565 
3566 	cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
3567 			 : RT6_TABLE_MAIN;
3568 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
3569 	cfg->fc_metric = rtmsg->rtmsg_metric;
3570 	cfg->fc_expires = rtmsg->rtmsg_info;
3571 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
3572 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
3573 	cfg->fc_flags = rtmsg->rtmsg_flags;
3574 	cfg->fc_type = rtmsg->rtmsg_type;
3575 
3576 	cfg->fc_nlinfo.nl_net = net;
3577 
3578 	cfg->fc_dst = rtmsg->rtmsg_dst;
3579 	cfg->fc_src = rtmsg->rtmsg_src;
3580 	cfg->fc_gateway = rtmsg->rtmsg_gateway;
3581 }
3582 
3583 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
3584 {
3585 	struct fib6_config cfg;
3586 	struct in6_rtmsg rtmsg;
3587 	int err;
3588 
3589 	switch (cmd) {
3590 	case SIOCADDRT:		/* Add a route */
3591 	case SIOCDELRT:		/* Delete a route */
3592 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
3593 			return -EPERM;
3594 		err = copy_from_user(&rtmsg, arg,
3595 				     sizeof(struct in6_rtmsg));
3596 		if (err)
3597 			return -EFAULT;
3598 
3599 		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
3600 
3601 		rtnl_lock();
3602 		switch (cmd) {
3603 		case SIOCADDRT:
3604 			err = ip6_route_add(&cfg, GFP_KERNEL, NULL);
3605 			break;
3606 		case SIOCDELRT:
3607 			err = ip6_route_del(&cfg, NULL);
3608 			break;
3609 		default:
3610 			err = -EINVAL;
3611 		}
3612 		rtnl_unlock();
3613 
3614 		return err;
3615 	}
3616 
3617 	return -EINVAL;
3618 }
3619 
3620 /*
3621  *	Drop the packet on the floor
3622  */
3623 
3624 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
3625 {
3626 	int type;
3627 	struct dst_entry *dst = skb_dst(skb);
3628 	switch (ipstats_mib_noroutes) {
3629 	case IPSTATS_MIB_INNOROUTES:
3630 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
3631 		if (type == IPV6_ADDR_ANY) {
3632 			IP6_INC_STATS(dev_net(dst->dev),
3633 				      __in6_dev_get_safely(skb->dev),
3634 				      IPSTATS_MIB_INADDRERRORS);
3635 			break;
3636 		}
3637 		/* FALLTHROUGH */
3638 	case IPSTATS_MIB_OUTNOROUTES:
3639 		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
3640 			      ipstats_mib_noroutes);
3641 		break;
3642 	}
3643 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
3644 	kfree_skb(skb);
3645 	return 0;
3646 }
3647 
3648 static int ip6_pkt_discard(struct sk_buff *skb)
3649 {
3650 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
3651 }
3652 
3653 static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
3654 {
3655 	skb->dev = skb_dst(skb)->dev;
3656 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
3657 }
3658 
3659 static int ip6_pkt_prohibit(struct sk_buff *skb)
3660 {
3661 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
3662 }
3663 
3664 static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
3665 {
3666 	skb->dev = skb_dst(skb)->dev;
3667 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
3668 }
3669 
3670 /*
3671  *	Allocate a dst for local (unicast / anycast) address.
3672  */
3673 
3674 struct fib6_info *addrconf_f6i_alloc(struct net *net,
3675 				     struct inet6_dev *idev,
3676 				     const struct in6_addr *addr,
3677 				     bool anycast, gfp_t gfp_flags)
3678 {
3679 	u32 tb_id;
3680 	struct net_device *dev = idev->dev;
3681 	struct fib6_info *f6i;
3682 
3683 	f6i = fib6_info_alloc(gfp_flags);
3684 	if (!f6i)
3685 		return ERR_PTR(-ENOMEM);
3686 
3687 	f6i->dst_nocount = true;
3688 	f6i->dst_host = true;
3689 	f6i->fib6_protocol = RTPROT_KERNEL;
3690 	f6i->fib6_flags = RTF_UP | RTF_NONEXTHOP;
3691 	if (anycast) {
3692 		f6i->fib6_type = RTN_ANYCAST;
3693 		f6i->fib6_flags |= RTF_ANYCAST;
3694 	} else {
3695 		f6i->fib6_type = RTN_LOCAL;
3696 		f6i->fib6_flags |= RTF_LOCAL;
3697 	}
3698 
3699 	f6i->fib6_nh.nh_gw = *addr;
3700 	dev_hold(dev);
3701 	f6i->fib6_nh.nh_dev = dev;
3702 	f6i->fib6_dst.addr = *addr;
3703 	f6i->fib6_dst.plen = 128;
3704 	tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
3705 	f6i->fib6_table = fib6_get_table(net, tb_id);
3706 
3707 	return f6i;
3708 }
3709 
3710 /* remove deleted ip from prefsrc entries */
3711 struct arg_dev_net_ip {
3712 	struct net_device *dev;
3713 	struct net *net;
3714 	struct in6_addr *addr;
3715 };
3716 
3717 static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg)
3718 {
3719 	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
3720 	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
3721 	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
3722 
3723 	if (((void *)rt->fib6_nh.nh_dev == dev || !dev) &&
3724 	    rt != net->ipv6.fib6_null_entry &&
3725 	    ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr)) {
3726 		spin_lock_bh(&rt6_exception_lock);
3727 		/* remove prefsrc entry */
3728 		rt->fib6_prefsrc.plen = 0;
3729 		/* need to update cache as well */
3730 		rt6_exceptions_remove_prefsrc(rt);
3731 		spin_unlock_bh(&rt6_exception_lock);
3732 	}
3733 	return 0;
3734 }
3735 
3736 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
3737 {
3738 	struct net *net = dev_net(ifp->idev->dev);
3739 	struct arg_dev_net_ip adni = {
3740 		.dev = ifp->idev->dev,
3741 		.net = net,
3742 		.addr = &ifp->addr,
3743 	};
3744 	fib6_clean_all(net, fib6_remove_prefsrc, &adni);
3745 }
3746 
3747 #define RTF_RA_ROUTER		(RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
3748 
3749 /* Remove routers and update dst entries when gateway turn into host. */
3750 static int fib6_clean_tohost(struct fib6_info *rt, void *arg)
3751 {
3752 	struct in6_addr *gateway = (struct in6_addr *)arg;
3753 
3754 	if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
3755 	    ipv6_addr_equal(gateway, &rt->fib6_nh.nh_gw)) {
3756 		return -1;
3757 	}
3758 
3759 	/* Further clean up cached routes in exception table.
3760 	 * This is needed because cached route may have a different
3761 	 * gateway than its 'parent' in the case of an ip redirect.
3762 	 */
3763 	rt6_exceptions_clean_tohost(rt, gateway);
3764 
3765 	return 0;
3766 }
3767 
3768 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
3769 {
3770 	fib6_clean_all(net, fib6_clean_tohost, gateway);
3771 }
3772 
3773 struct arg_netdev_event {
3774 	const struct net_device *dev;
3775 	union {
3776 		unsigned int nh_flags;
3777 		unsigned long event;
3778 	};
3779 };
3780 
3781 static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt)
3782 {
3783 	struct fib6_info *iter;
3784 	struct fib6_node *fn;
3785 
3786 	fn = rcu_dereference_protected(rt->fib6_node,
3787 			lockdep_is_held(&rt->fib6_table->tb6_lock));
3788 	iter = rcu_dereference_protected(fn->leaf,
3789 			lockdep_is_held(&rt->fib6_table->tb6_lock));
3790 	while (iter) {
3791 		if (iter->fib6_metric == rt->fib6_metric &&
3792 		    rt6_qualify_for_ecmp(iter))
3793 			return iter;
3794 		iter = rcu_dereference_protected(iter->fib6_next,
3795 				lockdep_is_held(&rt->fib6_table->tb6_lock));
3796 	}
3797 
3798 	return NULL;
3799 }
3800 
3801 static bool rt6_is_dead(const struct fib6_info *rt)
3802 {
3803 	if (rt->fib6_nh.nh_flags & RTNH_F_DEAD ||
3804 	    (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
3805 	     fib6_ignore_linkdown(rt)))
3806 		return true;
3807 
3808 	return false;
3809 }
3810 
3811 static int rt6_multipath_total_weight(const struct fib6_info *rt)
3812 {
3813 	struct fib6_info *iter;
3814 	int total = 0;
3815 
3816 	if (!rt6_is_dead(rt))
3817 		total += rt->fib6_nh.nh_weight;
3818 
3819 	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
3820 		if (!rt6_is_dead(iter))
3821 			total += iter->fib6_nh.nh_weight;
3822 	}
3823 
3824 	return total;
3825 }
3826 
3827 static void rt6_upper_bound_set(struct fib6_info *rt, int *weight, int total)
3828 {
3829 	int upper_bound = -1;
3830 
3831 	if (!rt6_is_dead(rt)) {
3832 		*weight += rt->fib6_nh.nh_weight;
3833 		upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
3834 						    total) - 1;
3835 	}
3836 	atomic_set(&rt->fib6_nh.nh_upper_bound, upper_bound);
3837 }
3838 
3839 static void rt6_multipath_upper_bound_set(struct fib6_info *rt, int total)
3840 {
3841 	struct fib6_info *iter;
3842 	int weight = 0;
3843 
3844 	rt6_upper_bound_set(rt, &weight, total);
3845 
3846 	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
3847 		rt6_upper_bound_set(iter, &weight, total);
3848 }
3849 
3850 void rt6_multipath_rebalance(struct fib6_info *rt)
3851 {
3852 	struct fib6_info *first;
3853 	int total;
3854 
3855 	/* In case the entire multipath route was marked for flushing,
3856 	 * then there is no need to rebalance upon the removal of every
3857 	 * sibling route.
3858 	 */
3859 	if (!rt->fib6_nsiblings || rt->should_flush)
3860 		return;
3861 
3862 	/* During lookup routes are evaluated in order, so we need to
3863 	 * make sure upper bounds are assigned from the first sibling
3864 	 * onwards.
3865 	 */
3866 	first = rt6_multipath_first_sibling(rt);
3867 	if (WARN_ON_ONCE(!first))
3868 		return;
3869 
3870 	total = rt6_multipath_total_weight(first);
3871 	rt6_multipath_upper_bound_set(first, total);
3872 }
3873 
3874 static int fib6_ifup(struct fib6_info *rt, void *p_arg)
3875 {
3876 	const struct arg_netdev_event *arg = p_arg;
3877 	struct net *net = dev_net(arg->dev);
3878 
3879 	if (rt != net->ipv6.fib6_null_entry && rt->fib6_nh.nh_dev == arg->dev) {
3880 		rt->fib6_nh.nh_flags &= ~arg->nh_flags;
3881 		fib6_update_sernum_upto_root(net, rt);
3882 		rt6_multipath_rebalance(rt);
3883 	}
3884 
3885 	return 0;
3886 }
3887 
3888 void rt6_sync_up(struct net_device *dev, unsigned int nh_flags)
3889 {
3890 	struct arg_netdev_event arg = {
3891 		.dev = dev,
3892 		{
3893 			.nh_flags = nh_flags,
3894 		},
3895 	};
3896 
3897 	if (nh_flags & RTNH_F_DEAD && netif_carrier_ok(dev))
3898 		arg.nh_flags |= RTNH_F_LINKDOWN;
3899 
3900 	fib6_clean_all(dev_net(dev), fib6_ifup, &arg);
3901 }
3902 
3903 static bool rt6_multipath_uses_dev(const struct fib6_info *rt,
3904 				   const struct net_device *dev)
3905 {
3906 	struct fib6_info *iter;
3907 
3908 	if (rt->fib6_nh.nh_dev == dev)
3909 		return true;
3910 	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
3911 		if (iter->fib6_nh.nh_dev == dev)
3912 			return true;
3913 
3914 	return false;
3915 }
3916 
3917 static void rt6_multipath_flush(struct fib6_info *rt)
3918 {
3919 	struct fib6_info *iter;
3920 
3921 	rt->should_flush = 1;
3922 	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
3923 		iter->should_flush = 1;
3924 }
3925 
3926 static unsigned int rt6_multipath_dead_count(const struct fib6_info *rt,
3927 					     const struct net_device *down_dev)
3928 {
3929 	struct fib6_info *iter;
3930 	unsigned int dead = 0;
3931 
3932 	if (rt->fib6_nh.nh_dev == down_dev ||
3933 	    rt->fib6_nh.nh_flags & RTNH_F_DEAD)
3934 		dead++;
3935 	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
3936 		if (iter->fib6_nh.nh_dev == down_dev ||
3937 		    iter->fib6_nh.nh_flags & RTNH_F_DEAD)
3938 			dead++;
3939 
3940 	return dead;
3941 }
3942 
3943 static void rt6_multipath_nh_flags_set(struct fib6_info *rt,
3944 				       const struct net_device *dev,
3945 				       unsigned int nh_flags)
3946 {
3947 	struct fib6_info *iter;
3948 
3949 	if (rt->fib6_nh.nh_dev == dev)
3950 		rt->fib6_nh.nh_flags |= nh_flags;
3951 	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
3952 		if (iter->fib6_nh.nh_dev == dev)
3953 			iter->fib6_nh.nh_flags |= nh_flags;
3954 }
3955 
3956 /* called with write lock held for table with rt */
3957 static int fib6_ifdown(struct fib6_info *rt, void *p_arg)
3958 {
3959 	const struct arg_netdev_event *arg = p_arg;
3960 	const struct net_device *dev = arg->dev;
3961 	struct net *net = dev_net(dev);
3962 
3963 	if (rt == net->ipv6.fib6_null_entry)
3964 		return 0;
3965 
3966 	switch (arg->event) {
3967 	case NETDEV_UNREGISTER:
3968 		return rt->fib6_nh.nh_dev == dev ? -1 : 0;
3969 	case NETDEV_DOWN:
3970 		if (rt->should_flush)
3971 			return -1;
3972 		if (!rt->fib6_nsiblings)
3973 			return rt->fib6_nh.nh_dev == dev ? -1 : 0;
3974 		if (rt6_multipath_uses_dev(rt, dev)) {
3975 			unsigned int count;
3976 
3977 			count = rt6_multipath_dead_count(rt, dev);
3978 			if (rt->fib6_nsiblings + 1 == count) {
3979 				rt6_multipath_flush(rt);
3980 				return -1;
3981 			}
3982 			rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
3983 						   RTNH_F_LINKDOWN);
3984 			fib6_update_sernum(net, rt);
3985 			rt6_multipath_rebalance(rt);
3986 		}
3987 		return -2;
3988 	case NETDEV_CHANGE:
3989 		if (rt->fib6_nh.nh_dev != dev ||
3990 		    rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
3991 			break;
3992 		rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
3993 		rt6_multipath_rebalance(rt);
3994 		break;
3995 	}
3996 
3997 	return 0;
3998 }
3999 
4000 void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
4001 {
4002 	struct arg_netdev_event arg = {
4003 		.dev = dev,
4004 		{
4005 			.event = event,
4006 		},
4007 	};
4008 
4009 	fib6_clean_all(dev_net(dev), fib6_ifdown, &arg);
4010 }
4011 
4012 void rt6_disable_ip(struct net_device *dev, unsigned long event)
4013 {
4014 	rt6_sync_down_dev(dev, event);
4015 	rt6_uncached_list_flush_dev(dev_net(dev), dev);
4016 	neigh_ifdown(&nd_tbl, dev);
4017 }
4018 
4019 struct rt6_mtu_change_arg {
4020 	struct net_device *dev;
4021 	unsigned int mtu;
4022 };
4023 
4024 static int rt6_mtu_change_route(struct fib6_info *rt, void *p_arg)
4025 {
4026 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
4027 	struct inet6_dev *idev;
4028 
4029 	/* In IPv6 pmtu discovery is not optional,
4030 	   so that RTAX_MTU lock cannot disable it.
4031 	   We still use this lock to block changes
4032 	   caused by addrconf/ndisc.
4033 	*/
4034 
4035 	idev = __in6_dev_get(arg->dev);
4036 	if (!idev)
4037 		return 0;
4038 
4039 	/* For administrative MTU increase, there is no way to discover
4040 	   IPv6 PMTU increase, so PMTU increase should be updated here.
4041 	   Since RFC 1981 doesn't include administrative MTU increase
4042 	   update PMTU increase is a MUST. (i.e. jumbo frame)
4043 	 */
4044 	if (rt->fib6_nh.nh_dev == arg->dev &&
4045 	    !fib6_metric_locked(rt, RTAX_MTU)) {
4046 		u32 mtu = rt->fib6_pmtu;
4047 
4048 		if (mtu >= arg->mtu ||
4049 		    (mtu < arg->mtu && mtu == idev->cnf.mtu6))
4050 			fib6_metric_set(rt, RTAX_MTU, arg->mtu);
4051 
4052 		spin_lock_bh(&rt6_exception_lock);
4053 		rt6_exceptions_update_pmtu(idev, rt, arg->mtu);
4054 		spin_unlock_bh(&rt6_exception_lock);
4055 	}
4056 	return 0;
4057 }
4058 
4059 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
4060 {
4061 	struct rt6_mtu_change_arg arg = {
4062 		.dev = dev,
4063 		.mtu = mtu,
4064 	};
4065 
4066 	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
4067 }
4068 
4069 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
4070 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
4071 	[RTA_PREFSRC]		= { .len = sizeof(struct in6_addr) },
4072 	[RTA_OIF]               = { .type = NLA_U32 },
4073 	[RTA_IIF]		= { .type = NLA_U32 },
4074 	[RTA_PRIORITY]          = { .type = NLA_U32 },
4075 	[RTA_METRICS]           = { .type = NLA_NESTED },
4076 	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
4077 	[RTA_PREF]              = { .type = NLA_U8 },
4078 	[RTA_ENCAP_TYPE]	= { .type = NLA_U16 },
4079 	[RTA_ENCAP]		= { .type = NLA_NESTED },
4080 	[RTA_EXPIRES]		= { .type = NLA_U32 },
4081 	[RTA_UID]		= { .type = NLA_U32 },
4082 	[RTA_MARK]		= { .type = NLA_U32 },
4083 	[RTA_TABLE]		= { .type = NLA_U32 },
4084 };
4085 
4086 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
4087 			      struct fib6_config *cfg,
4088 			      struct netlink_ext_ack *extack)
4089 {
4090 	struct rtmsg *rtm;
4091 	struct nlattr *tb[RTA_MAX+1];
4092 	unsigned int pref;
4093 	int err;
4094 
4095 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
4096 			  NULL);
4097 	if (err < 0)
4098 		goto errout;
4099 
4100 	err = -EINVAL;
4101 	rtm = nlmsg_data(nlh);
4102 	memset(cfg, 0, sizeof(*cfg));
4103 
4104 	cfg->fc_table = rtm->rtm_table;
4105 	cfg->fc_dst_len = rtm->rtm_dst_len;
4106 	cfg->fc_src_len = rtm->rtm_src_len;
4107 	cfg->fc_flags = RTF_UP;
4108 	cfg->fc_protocol = rtm->rtm_protocol;
4109 	cfg->fc_type = rtm->rtm_type;
4110 
4111 	if (rtm->rtm_type == RTN_UNREACHABLE ||
4112 	    rtm->rtm_type == RTN_BLACKHOLE ||
4113 	    rtm->rtm_type == RTN_PROHIBIT ||
4114 	    rtm->rtm_type == RTN_THROW)
4115 		cfg->fc_flags |= RTF_REJECT;
4116 
4117 	if (rtm->rtm_type == RTN_LOCAL)
4118 		cfg->fc_flags |= RTF_LOCAL;
4119 
4120 	if (rtm->rtm_flags & RTM_F_CLONED)
4121 		cfg->fc_flags |= RTF_CACHE;
4122 
4123 	cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
4124 
4125 	cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
4126 	cfg->fc_nlinfo.nlh = nlh;
4127 	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
4128 
4129 	if (tb[RTA_GATEWAY]) {
4130 		cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
4131 		cfg->fc_flags |= RTF_GATEWAY;
4132 	}
4133 
4134 	if (tb[RTA_DST]) {
4135 		int plen = (rtm->rtm_dst_len + 7) >> 3;
4136 
4137 		if (nla_len(tb[RTA_DST]) < plen)
4138 			goto errout;
4139 
4140 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
4141 	}
4142 
4143 	if (tb[RTA_SRC]) {
4144 		int plen = (rtm->rtm_src_len + 7) >> 3;
4145 
4146 		if (nla_len(tb[RTA_SRC]) < plen)
4147 			goto errout;
4148 
4149 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
4150 	}
4151 
4152 	if (tb[RTA_PREFSRC])
4153 		cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
4154 
4155 	if (tb[RTA_OIF])
4156 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
4157 
4158 	if (tb[RTA_PRIORITY])
4159 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
4160 
4161 	if (tb[RTA_METRICS]) {
4162 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
4163 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
4164 	}
4165 
4166 	if (tb[RTA_TABLE])
4167 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
4168 
4169 	if (tb[RTA_MULTIPATH]) {
4170 		cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
4171 		cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
4172 
4173 		err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
4174 						     cfg->fc_mp_len, extack);
4175 		if (err < 0)
4176 			goto errout;
4177 	}
4178 
4179 	if (tb[RTA_PREF]) {
4180 		pref = nla_get_u8(tb[RTA_PREF]);
4181 		if (pref != ICMPV6_ROUTER_PREF_LOW &&
4182 		    pref != ICMPV6_ROUTER_PREF_HIGH)
4183 			pref = ICMPV6_ROUTER_PREF_MEDIUM;
4184 		cfg->fc_flags |= RTF_PREF(pref);
4185 	}
4186 
4187 	if (tb[RTA_ENCAP])
4188 		cfg->fc_encap = tb[RTA_ENCAP];
4189 
4190 	if (tb[RTA_ENCAP_TYPE]) {
4191 		cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
4192 
4193 		err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
4194 		if (err < 0)
4195 			goto errout;
4196 	}
4197 
4198 	if (tb[RTA_EXPIRES]) {
4199 		unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
4200 
4201 		if (addrconf_finite_timeout(timeout)) {
4202 			cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
4203 			cfg->fc_flags |= RTF_EXPIRES;
4204 		}
4205 	}
4206 
4207 	err = 0;
4208 errout:
4209 	return err;
4210 }
4211 
4212 struct rt6_nh {
4213 	struct fib6_info *fib6_info;
4214 	struct fib6_config r_cfg;
4215 	struct list_head next;
4216 };
4217 
4218 static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
4219 {
4220 	struct rt6_nh *nh;
4221 
4222 	list_for_each_entry(nh, rt6_nh_list, next) {
4223 		pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6c nexthop %pI6c ifi %d\n",
4224 		        &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
4225 		        nh->r_cfg.fc_ifindex);
4226 	}
4227 }
4228 
4229 static int ip6_route_info_append(struct net *net,
4230 				 struct list_head *rt6_nh_list,
4231 				 struct fib6_info *rt,
4232 				 struct fib6_config *r_cfg)
4233 {
4234 	struct rt6_nh *nh;
4235 	int err = -EEXIST;
4236 
4237 	list_for_each_entry(nh, rt6_nh_list, next) {
4238 		/* check if fib6_info already exists */
4239 		if (rt6_duplicate_nexthop(nh->fib6_info, rt))
4240 			return err;
4241 	}
4242 
4243 	nh = kzalloc(sizeof(*nh), GFP_KERNEL);
4244 	if (!nh)
4245 		return -ENOMEM;
4246 	nh->fib6_info = rt;
4247 	err = ip6_convert_metrics(net, rt, r_cfg);
4248 	if (err) {
4249 		kfree(nh);
4250 		return err;
4251 	}
4252 	memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
4253 	list_add_tail(&nh->next, rt6_nh_list);
4254 
4255 	return 0;
4256 }
4257 
4258 static void ip6_route_mpath_notify(struct fib6_info *rt,
4259 				   struct fib6_info *rt_last,
4260 				   struct nl_info *info,
4261 				   __u16 nlflags)
4262 {
4263 	/* if this is an APPEND route, then rt points to the first route
4264 	 * inserted and rt_last points to last route inserted. Userspace
4265 	 * wants a consistent dump of the route which starts at the first
4266 	 * nexthop. Since sibling routes are always added at the end of
4267 	 * the list, find the first sibling of the last route appended
4268 	 */
4269 	if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->fib6_nsiblings) {
4270 		rt = list_first_entry(&rt_last->fib6_siblings,
4271 				      struct fib6_info,
4272 				      fib6_siblings);
4273 	}
4274 
4275 	if (rt)
4276 		inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
4277 }
4278 
4279 static int ip6_route_multipath_add(struct fib6_config *cfg,
4280 				   struct netlink_ext_ack *extack)
4281 {
4282 	struct fib6_info *rt_notif = NULL, *rt_last = NULL;
4283 	struct nl_info *info = &cfg->fc_nlinfo;
4284 	struct fib6_config r_cfg;
4285 	struct rtnexthop *rtnh;
4286 	struct fib6_info *rt;
4287 	struct rt6_nh *err_nh;
4288 	struct rt6_nh *nh, *nh_safe;
4289 	__u16 nlflags;
4290 	int remaining;
4291 	int attrlen;
4292 	int err = 1;
4293 	int nhn = 0;
4294 	int replace = (cfg->fc_nlinfo.nlh &&
4295 		       (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
4296 	LIST_HEAD(rt6_nh_list);
4297 
4298 	nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
4299 	if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
4300 		nlflags |= NLM_F_APPEND;
4301 
4302 	remaining = cfg->fc_mp_len;
4303 	rtnh = (struct rtnexthop *)cfg->fc_mp;
4304 
4305 	/* Parse a Multipath Entry and build a list (rt6_nh_list) of
4306 	 * fib6_info structs per nexthop
4307 	 */
4308 	while (rtnh_ok(rtnh, remaining)) {
4309 		memcpy(&r_cfg, cfg, sizeof(*cfg));
4310 		if (rtnh->rtnh_ifindex)
4311 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4312 
4313 		attrlen = rtnh_attrlen(rtnh);
4314 		if (attrlen > 0) {
4315 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4316 
4317 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4318 			if (nla) {
4319 				r_cfg.fc_gateway = nla_get_in6_addr(nla);
4320 				r_cfg.fc_flags |= RTF_GATEWAY;
4321 			}
4322 			r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
4323 			nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
4324 			if (nla)
4325 				r_cfg.fc_encap_type = nla_get_u16(nla);
4326 		}
4327 
4328 		r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK);
4329 		rt = ip6_route_info_create(&r_cfg, GFP_KERNEL, extack);
4330 		if (IS_ERR(rt)) {
4331 			err = PTR_ERR(rt);
4332 			rt = NULL;
4333 			goto cleanup;
4334 		}
4335 
4336 		rt->fib6_nh.nh_weight = rtnh->rtnh_hops + 1;
4337 
4338 		err = ip6_route_info_append(info->nl_net, &rt6_nh_list,
4339 					    rt, &r_cfg);
4340 		if (err) {
4341 			fib6_info_release(rt);
4342 			goto cleanup;
4343 		}
4344 
4345 		rtnh = rtnh_next(rtnh, &remaining);
4346 	}
4347 
4348 	/* for add and replace send one notification with all nexthops.
4349 	 * Skip the notification in fib6_add_rt2node and send one with
4350 	 * the full route when done
4351 	 */
4352 	info->skip_notify = 1;
4353 
4354 	err_nh = NULL;
4355 	list_for_each_entry(nh, &rt6_nh_list, next) {
4356 		rt_last = nh->fib6_info;
4357 		err = __ip6_ins_rt(nh->fib6_info, info, extack);
4358 		fib6_info_release(nh->fib6_info);
4359 
4360 		/* save reference to first route for notification */
4361 		if (!rt_notif && !err)
4362 			rt_notif = nh->fib6_info;
4363 
4364 		/* nh->fib6_info is used or freed at this point, reset to NULL*/
4365 		nh->fib6_info = NULL;
4366 		if (err) {
4367 			if (replace && nhn)
4368 				ip6_print_replace_route_err(&rt6_nh_list);
4369 			err_nh = nh;
4370 			goto add_errout;
4371 		}
4372 
4373 		/* Because each route is added like a single route we remove
4374 		 * these flags after the first nexthop: if there is a collision,
4375 		 * we have already failed to add the first nexthop:
4376 		 * fib6_add_rt2node() has rejected it; when replacing, old
4377 		 * nexthops have been replaced by first new, the rest should
4378 		 * be added to it.
4379 		 */
4380 		cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
4381 						     NLM_F_REPLACE);
4382 		nhn++;
4383 	}
4384 
4385 	/* success ... tell user about new route */
4386 	ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
4387 	goto cleanup;
4388 
4389 add_errout:
4390 	/* send notification for routes that were added so that
4391 	 * the delete notifications sent by ip6_route_del are
4392 	 * coherent
4393 	 */
4394 	if (rt_notif)
4395 		ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
4396 
4397 	/* Delete routes that were already added */
4398 	list_for_each_entry(nh, &rt6_nh_list, next) {
4399 		if (err_nh == nh)
4400 			break;
4401 		ip6_route_del(&nh->r_cfg, extack);
4402 	}
4403 
4404 cleanup:
4405 	list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
4406 		if (nh->fib6_info)
4407 			fib6_info_release(nh->fib6_info);
4408 		list_del(&nh->next);
4409 		kfree(nh);
4410 	}
4411 
4412 	return err;
4413 }
4414 
4415 static int ip6_route_multipath_del(struct fib6_config *cfg,
4416 				   struct netlink_ext_ack *extack)
4417 {
4418 	struct fib6_config r_cfg;
4419 	struct rtnexthop *rtnh;
4420 	int remaining;
4421 	int attrlen;
4422 	int err = 1, last_err = 0;
4423 
4424 	remaining = cfg->fc_mp_len;
4425 	rtnh = (struct rtnexthop *)cfg->fc_mp;
4426 
4427 	/* Parse a Multipath Entry */
4428 	while (rtnh_ok(rtnh, remaining)) {
4429 		memcpy(&r_cfg, cfg, sizeof(*cfg));
4430 		if (rtnh->rtnh_ifindex)
4431 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4432 
4433 		attrlen = rtnh_attrlen(rtnh);
4434 		if (attrlen > 0) {
4435 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4436 
4437 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4438 			if (nla) {
4439 				nla_memcpy(&r_cfg.fc_gateway, nla, 16);
4440 				r_cfg.fc_flags |= RTF_GATEWAY;
4441 			}
4442 		}
4443 		err = ip6_route_del(&r_cfg, extack);
4444 		if (err)
4445 			last_err = err;
4446 
4447 		rtnh = rtnh_next(rtnh, &remaining);
4448 	}
4449 
4450 	return last_err;
4451 }
4452 
4453 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4454 			      struct netlink_ext_ack *extack)
4455 {
4456 	struct fib6_config cfg;
4457 	int err;
4458 
4459 	err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
4460 	if (err < 0)
4461 		return err;
4462 
4463 	if (cfg.fc_mp)
4464 		return ip6_route_multipath_del(&cfg, extack);
4465 	else {
4466 		cfg.fc_delete_all_nh = 1;
4467 		return ip6_route_del(&cfg, extack);
4468 	}
4469 }
4470 
4471 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4472 			      struct netlink_ext_ack *extack)
4473 {
4474 	struct fib6_config cfg;
4475 	int err;
4476 
4477 	err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
4478 	if (err < 0)
4479 		return err;
4480 
4481 	if (cfg.fc_mp)
4482 		return ip6_route_multipath_add(&cfg, extack);
4483 	else
4484 		return ip6_route_add(&cfg, GFP_KERNEL, extack);
4485 }
4486 
4487 static size_t rt6_nlmsg_size(struct fib6_info *rt)
4488 {
4489 	int nexthop_len = 0;
4490 
4491 	if (rt->fib6_nsiblings) {
4492 		nexthop_len = nla_total_size(0)	 /* RTA_MULTIPATH */
4493 			    + NLA_ALIGN(sizeof(struct rtnexthop))
4494 			    + nla_total_size(16) /* RTA_GATEWAY */
4495 			    + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate);
4496 
4497 		nexthop_len *= rt->fib6_nsiblings;
4498 	}
4499 
4500 	return NLMSG_ALIGN(sizeof(struct rtmsg))
4501 	       + nla_total_size(16) /* RTA_SRC */
4502 	       + nla_total_size(16) /* RTA_DST */
4503 	       + nla_total_size(16) /* RTA_GATEWAY */
4504 	       + nla_total_size(16) /* RTA_PREFSRC */
4505 	       + nla_total_size(4) /* RTA_TABLE */
4506 	       + nla_total_size(4) /* RTA_IIF */
4507 	       + nla_total_size(4) /* RTA_OIF */
4508 	       + nla_total_size(4) /* RTA_PRIORITY */
4509 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
4510 	       + nla_total_size(sizeof(struct rta_cacheinfo))
4511 	       + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
4512 	       + nla_total_size(1) /* RTA_PREF */
4513 	       + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate)
4514 	       + nexthop_len;
4515 }
4516 
4517 static int rt6_nexthop_info(struct sk_buff *skb, struct fib6_info *rt,
4518 			    unsigned int *flags, bool skip_oif)
4519 {
4520 	if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
4521 		*flags |= RTNH_F_DEAD;
4522 
4523 	if (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN) {
4524 		*flags |= RTNH_F_LINKDOWN;
4525 
4526 		rcu_read_lock();
4527 		if (fib6_ignore_linkdown(rt))
4528 			*flags |= RTNH_F_DEAD;
4529 		rcu_read_unlock();
4530 	}
4531 
4532 	if (rt->fib6_flags & RTF_GATEWAY) {
4533 		if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->fib6_nh.nh_gw) < 0)
4534 			goto nla_put_failure;
4535 	}
4536 
4537 	*flags |= (rt->fib6_nh.nh_flags & RTNH_F_ONLINK);
4538 	if (rt->fib6_nh.nh_flags & RTNH_F_OFFLOAD)
4539 		*flags |= RTNH_F_OFFLOAD;
4540 
4541 	/* not needed for multipath encoding b/c it has a rtnexthop struct */
4542 	if (!skip_oif && rt->fib6_nh.nh_dev &&
4543 	    nla_put_u32(skb, RTA_OIF, rt->fib6_nh.nh_dev->ifindex))
4544 		goto nla_put_failure;
4545 
4546 	if (rt->fib6_nh.nh_lwtstate &&
4547 	    lwtunnel_fill_encap(skb, rt->fib6_nh.nh_lwtstate) < 0)
4548 		goto nla_put_failure;
4549 
4550 	return 0;
4551 
4552 nla_put_failure:
4553 	return -EMSGSIZE;
4554 }
4555 
4556 /* add multipath next hop */
4557 static int rt6_add_nexthop(struct sk_buff *skb, struct fib6_info *rt)
4558 {
4559 	const struct net_device *dev = rt->fib6_nh.nh_dev;
4560 	struct rtnexthop *rtnh;
4561 	unsigned int flags = 0;
4562 
4563 	rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
4564 	if (!rtnh)
4565 		goto nla_put_failure;
4566 
4567 	rtnh->rtnh_hops = rt->fib6_nh.nh_weight - 1;
4568 	rtnh->rtnh_ifindex = dev ? dev->ifindex : 0;
4569 
4570 	if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
4571 		goto nla_put_failure;
4572 
4573 	rtnh->rtnh_flags = flags;
4574 
4575 	/* length of rtnetlink header + attributes */
4576 	rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
4577 
4578 	return 0;
4579 
4580 nla_put_failure:
4581 	return -EMSGSIZE;
4582 }
4583 
4584 static int rt6_fill_node(struct net *net, struct sk_buff *skb,
4585 			 struct fib6_info *rt, struct dst_entry *dst,
4586 			 struct in6_addr *dest, struct in6_addr *src,
4587 			 int iif, int type, u32 portid, u32 seq,
4588 			 unsigned int flags)
4589 {
4590 	struct rtmsg *rtm;
4591 	struct nlmsghdr *nlh;
4592 	long expires = 0;
4593 	u32 *pmetrics;
4594 	u32 table;
4595 
4596 	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
4597 	if (!nlh)
4598 		return -EMSGSIZE;
4599 
4600 	rtm = nlmsg_data(nlh);
4601 	rtm->rtm_family = AF_INET6;
4602 	rtm->rtm_dst_len = rt->fib6_dst.plen;
4603 	rtm->rtm_src_len = rt->fib6_src.plen;
4604 	rtm->rtm_tos = 0;
4605 	if (rt->fib6_table)
4606 		table = rt->fib6_table->tb6_id;
4607 	else
4608 		table = RT6_TABLE_UNSPEC;
4609 	rtm->rtm_table = table;
4610 	if (nla_put_u32(skb, RTA_TABLE, table))
4611 		goto nla_put_failure;
4612 
4613 	rtm->rtm_type = rt->fib6_type;
4614 	rtm->rtm_flags = 0;
4615 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
4616 	rtm->rtm_protocol = rt->fib6_protocol;
4617 
4618 	if (rt->fib6_flags & RTF_CACHE)
4619 		rtm->rtm_flags |= RTM_F_CLONED;
4620 
4621 	if (dest) {
4622 		if (nla_put_in6_addr(skb, RTA_DST, dest))
4623 			goto nla_put_failure;
4624 		rtm->rtm_dst_len = 128;
4625 	} else if (rtm->rtm_dst_len)
4626 		if (nla_put_in6_addr(skb, RTA_DST, &rt->fib6_dst.addr))
4627 			goto nla_put_failure;
4628 #ifdef CONFIG_IPV6_SUBTREES
4629 	if (src) {
4630 		if (nla_put_in6_addr(skb, RTA_SRC, src))
4631 			goto nla_put_failure;
4632 		rtm->rtm_src_len = 128;
4633 	} else if (rtm->rtm_src_len &&
4634 		   nla_put_in6_addr(skb, RTA_SRC, &rt->fib6_src.addr))
4635 		goto nla_put_failure;
4636 #endif
4637 	if (iif) {
4638 #ifdef CONFIG_IPV6_MROUTE
4639 		if (ipv6_addr_is_multicast(&rt->fib6_dst.addr)) {
4640 			int err = ip6mr_get_route(net, skb, rtm, portid);
4641 
4642 			if (err == 0)
4643 				return 0;
4644 			if (err < 0)
4645 				goto nla_put_failure;
4646 		} else
4647 #endif
4648 			if (nla_put_u32(skb, RTA_IIF, iif))
4649 				goto nla_put_failure;
4650 	} else if (dest) {
4651 		struct in6_addr saddr_buf;
4652 		if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 &&
4653 		    nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
4654 			goto nla_put_failure;
4655 	}
4656 
4657 	if (rt->fib6_prefsrc.plen) {
4658 		struct in6_addr saddr_buf;
4659 		saddr_buf = rt->fib6_prefsrc.addr;
4660 		if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
4661 			goto nla_put_failure;
4662 	}
4663 
4664 	pmetrics = dst ? dst_metrics_ptr(dst) : rt->fib6_metrics->metrics;
4665 	if (rtnetlink_put_metrics(skb, pmetrics) < 0)
4666 		goto nla_put_failure;
4667 
4668 	if (nla_put_u32(skb, RTA_PRIORITY, rt->fib6_metric))
4669 		goto nla_put_failure;
4670 
4671 	/* For multipath routes, walk the siblings list and add
4672 	 * each as a nexthop within RTA_MULTIPATH.
4673 	 */
4674 	if (rt->fib6_nsiblings) {
4675 		struct fib6_info *sibling, *next_sibling;
4676 		struct nlattr *mp;
4677 
4678 		mp = nla_nest_start(skb, RTA_MULTIPATH);
4679 		if (!mp)
4680 			goto nla_put_failure;
4681 
4682 		if (rt6_add_nexthop(skb, rt) < 0)
4683 			goto nla_put_failure;
4684 
4685 		list_for_each_entry_safe(sibling, next_sibling,
4686 					 &rt->fib6_siblings, fib6_siblings) {
4687 			if (rt6_add_nexthop(skb, sibling) < 0)
4688 				goto nla_put_failure;
4689 		}
4690 
4691 		nla_nest_end(skb, mp);
4692 	} else {
4693 		if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0)
4694 			goto nla_put_failure;
4695 	}
4696 
4697 	if (rt->fib6_flags & RTF_EXPIRES) {
4698 		expires = dst ? dst->expires : rt->expires;
4699 		expires -= jiffies;
4700 	}
4701 
4702 	if (rtnl_put_cacheinfo(skb, dst, 0, expires, dst ? dst->error : 0) < 0)
4703 		goto nla_put_failure;
4704 
4705 	if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->fib6_flags)))
4706 		goto nla_put_failure;
4707 
4708 
4709 	nlmsg_end(skb, nlh);
4710 	return 0;
4711 
4712 nla_put_failure:
4713 	nlmsg_cancel(skb, nlh);
4714 	return -EMSGSIZE;
4715 }
4716 
4717 int rt6_dump_route(struct fib6_info *rt, void *p_arg)
4718 {
4719 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
4720 	struct net *net = arg->net;
4721 
4722 	if (rt == net->ipv6.fib6_null_entry)
4723 		return 0;
4724 
4725 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
4726 		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
4727 
4728 		/* user wants prefix routes only */
4729 		if (rtm->rtm_flags & RTM_F_PREFIX &&
4730 		    !(rt->fib6_flags & RTF_PREFIX_RT)) {
4731 			/* success since this is not a prefix route */
4732 			return 1;
4733 		}
4734 	}
4735 
4736 	return rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL, 0,
4737 			     RTM_NEWROUTE, NETLINK_CB(arg->cb->skb).portid,
4738 			     arg->cb->nlh->nlmsg_seq, NLM_F_MULTI);
4739 }
4740 
4741 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4742 			      struct netlink_ext_ack *extack)
4743 {
4744 	struct net *net = sock_net(in_skb->sk);
4745 	struct nlattr *tb[RTA_MAX+1];
4746 	int err, iif = 0, oif = 0;
4747 	struct fib6_info *from;
4748 	struct dst_entry *dst;
4749 	struct rt6_info *rt;
4750 	struct sk_buff *skb;
4751 	struct rtmsg *rtm;
4752 	struct flowi6 fl6;
4753 	bool fibmatch;
4754 
4755 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
4756 			  extack);
4757 	if (err < 0)
4758 		goto errout;
4759 
4760 	err = -EINVAL;
4761 	memset(&fl6, 0, sizeof(fl6));
4762 	rtm = nlmsg_data(nlh);
4763 	fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
4764 	fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
4765 
4766 	if (tb[RTA_SRC]) {
4767 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
4768 			goto errout;
4769 
4770 		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
4771 	}
4772 
4773 	if (tb[RTA_DST]) {
4774 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
4775 			goto errout;
4776 
4777 		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
4778 	}
4779 
4780 	if (tb[RTA_IIF])
4781 		iif = nla_get_u32(tb[RTA_IIF]);
4782 
4783 	if (tb[RTA_OIF])
4784 		oif = nla_get_u32(tb[RTA_OIF]);
4785 
4786 	if (tb[RTA_MARK])
4787 		fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
4788 
4789 	if (tb[RTA_UID])
4790 		fl6.flowi6_uid = make_kuid(current_user_ns(),
4791 					   nla_get_u32(tb[RTA_UID]));
4792 	else
4793 		fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
4794 
4795 	if (iif) {
4796 		struct net_device *dev;
4797 		int flags = 0;
4798 
4799 		rcu_read_lock();
4800 
4801 		dev = dev_get_by_index_rcu(net, iif);
4802 		if (!dev) {
4803 			rcu_read_unlock();
4804 			err = -ENODEV;
4805 			goto errout;
4806 		}
4807 
4808 		fl6.flowi6_iif = iif;
4809 
4810 		if (!ipv6_addr_any(&fl6.saddr))
4811 			flags |= RT6_LOOKUP_F_HAS_SADDR;
4812 
4813 		dst = ip6_route_input_lookup(net, dev, &fl6, NULL, flags);
4814 
4815 		rcu_read_unlock();
4816 	} else {
4817 		fl6.flowi6_oif = oif;
4818 
4819 		dst = ip6_route_output(net, NULL, &fl6);
4820 	}
4821 
4822 
4823 	rt = container_of(dst, struct rt6_info, dst);
4824 	if (rt->dst.error) {
4825 		err = rt->dst.error;
4826 		ip6_rt_put(rt);
4827 		goto errout;
4828 	}
4829 
4830 	if (rt == net->ipv6.ip6_null_entry) {
4831 		err = rt->dst.error;
4832 		ip6_rt_put(rt);
4833 		goto errout;
4834 	}
4835 
4836 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
4837 	if (!skb) {
4838 		ip6_rt_put(rt);
4839 		err = -ENOBUFS;
4840 		goto errout;
4841 	}
4842 
4843 	skb_dst_set(skb, &rt->dst);
4844 
4845 	rcu_read_lock();
4846 	from = rcu_dereference(rt->from);
4847 
4848 	if (fibmatch)
4849 		err = rt6_fill_node(net, skb, from, NULL, NULL, NULL, iif,
4850 				    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
4851 				    nlh->nlmsg_seq, 0);
4852 	else
4853 		err = rt6_fill_node(net, skb, from, dst, &fl6.daddr,
4854 				    &fl6.saddr, iif, RTM_NEWROUTE,
4855 				    NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
4856 				    0);
4857 	rcu_read_unlock();
4858 
4859 	if (err < 0) {
4860 		kfree_skb(skb);
4861 		goto errout;
4862 	}
4863 
4864 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
4865 errout:
4866 	return err;
4867 }
4868 
4869 void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
4870 		     unsigned int nlm_flags)
4871 {
4872 	struct sk_buff *skb;
4873 	struct net *net = info->nl_net;
4874 	u32 seq;
4875 	int err;
4876 
4877 	err = -ENOBUFS;
4878 	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
4879 
4880 	skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
4881 	if (!skb)
4882 		goto errout;
4883 
4884 	err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
4885 			    event, info->portid, seq, nlm_flags);
4886 	if (err < 0) {
4887 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
4888 		WARN_ON(err == -EMSGSIZE);
4889 		kfree_skb(skb);
4890 		goto errout;
4891 	}
4892 	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
4893 		    info->nlh, gfp_any());
4894 	return;
4895 errout:
4896 	if (err < 0)
4897 		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
4898 }
4899 
4900 static int ip6_route_dev_notify(struct notifier_block *this,
4901 				unsigned long event, void *ptr)
4902 {
4903 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
4904 	struct net *net = dev_net(dev);
4905 
4906 	if (!(dev->flags & IFF_LOOPBACK))
4907 		return NOTIFY_OK;
4908 
4909 	if (event == NETDEV_REGISTER) {
4910 		net->ipv6.fib6_null_entry->fib6_nh.nh_dev = dev;
4911 		net->ipv6.ip6_null_entry->dst.dev = dev;
4912 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
4913 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
4914 		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
4915 		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
4916 		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
4917 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
4918 #endif
4919 	 } else if (event == NETDEV_UNREGISTER &&
4920 		    dev->reg_state != NETREG_UNREGISTERED) {
4921 		/* NETDEV_UNREGISTER could be fired for multiple times by
4922 		 * netdev_wait_allrefs(). Make sure we only call this once.
4923 		 */
4924 		in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
4925 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
4926 		in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
4927 		in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
4928 #endif
4929 	}
4930 
4931 	return NOTIFY_OK;
4932 }
4933 
4934 /*
4935  *	/proc
4936  */
4937 
4938 #ifdef CONFIG_PROC_FS
4939 
4940 static const struct file_operations ipv6_route_proc_fops = {
4941 	.open		= ipv6_route_open,
4942 	.read		= seq_read,
4943 	.llseek		= seq_lseek,
4944 	.release	= seq_release_net,
4945 };
4946 
4947 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
4948 {
4949 	struct net *net = (struct net *)seq->private;
4950 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
4951 		   net->ipv6.rt6_stats->fib_nodes,
4952 		   net->ipv6.rt6_stats->fib_route_nodes,
4953 		   atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc),
4954 		   net->ipv6.rt6_stats->fib_rt_entries,
4955 		   net->ipv6.rt6_stats->fib_rt_cache,
4956 		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
4957 		   net->ipv6.rt6_stats->fib_discarded_routes);
4958 
4959 	return 0;
4960 }
4961 
4962 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
4963 {
4964 	return single_open_net(inode, file, rt6_stats_seq_show);
4965 }
4966 
4967 static const struct file_operations rt6_stats_seq_fops = {
4968 	.open	 = rt6_stats_seq_open,
4969 	.read	 = seq_read,
4970 	.llseek	 = seq_lseek,
4971 	.release = single_release_net,
4972 };
4973 #endif	/* CONFIG_PROC_FS */
4974 
4975 #ifdef CONFIG_SYSCTL
4976 
4977 static
4978 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
4979 			      void __user *buffer, size_t *lenp, loff_t *ppos)
4980 {
4981 	struct net *net;
4982 	int delay;
4983 	if (!write)
4984 		return -EINVAL;
4985 
4986 	net = (struct net *)ctl->extra1;
4987 	delay = net->ipv6.sysctl.flush_delay;
4988 	proc_dointvec(ctl, write, buffer, lenp, ppos);
4989 	fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
4990 	return 0;
4991 }
4992 
4993 struct ctl_table ipv6_route_table_template[] = {
4994 	{
4995 		.procname	=	"flush",
4996 		.data		=	&init_net.ipv6.sysctl.flush_delay,
4997 		.maxlen		=	sizeof(int),
4998 		.mode		=	0200,
4999 		.proc_handler	=	ipv6_sysctl_rtcache_flush
5000 	},
5001 	{
5002 		.procname	=	"gc_thresh",
5003 		.data		=	&ip6_dst_ops_template.gc_thresh,
5004 		.maxlen		=	sizeof(int),
5005 		.mode		=	0644,
5006 		.proc_handler	=	proc_dointvec,
5007 	},
5008 	{
5009 		.procname	=	"max_size",
5010 		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
5011 		.maxlen		=	sizeof(int),
5012 		.mode		=	0644,
5013 		.proc_handler	=	proc_dointvec,
5014 	},
5015 	{
5016 		.procname	=	"gc_min_interval",
5017 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
5018 		.maxlen		=	sizeof(int),
5019 		.mode		=	0644,
5020 		.proc_handler	=	proc_dointvec_jiffies,
5021 	},
5022 	{
5023 		.procname	=	"gc_timeout",
5024 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
5025 		.maxlen		=	sizeof(int),
5026 		.mode		=	0644,
5027 		.proc_handler	=	proc_dointvec_jiffies,
5028 	},
5029 	{
5030 		.procname	=	"gc_interval",
5031 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
5032 		.maxlen		=	sizeof(int),
5033 		.mode		=	0644,
5034 		.proc_handler	=	proc_dointvec_jiffies,
5035 	},
5036 	{
5037 		.procname	=	"gc_elasticity",
5038 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
5039 		.maxlen		=	sizeof(int),
5040 		.mode		=	0644,
5041 		.proc_handler	=	proc_dointvec,
5042 	},
5043 	{
5044 		.procname	=	"mtu_expires",
5045 		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
5046 		.maxlen		=	sizeof(int),
5047 		.mode		=	0644,
5048 		.proc_handler	=	proc_dointvec_jiffies,
5049 	},
5050 	{
5051 		.procname	=	"min_adv_mss",
5052 		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
5053 		.maxlen		=	sizeof(int),
5054 		.mode		=	0644,
5055 		.proc_handler	=	proc_dointvec,
5056 	},
5057 	{
5058 		.procname	=	"gc_min_interval_ms",
5059 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
5060 		.maxlen		=	sizeof(int),
5061 		.mode		=	0644,
5062 		.proc_handler	=	proc_dointvec_ms_jiffies,
5063 	},
5064 	{ }
5065 };
5066 
5067 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
5068 {
5069 	struct ctl_table *table;
5070 
5071 	table = kmemdup(ipv6_route_table_template,
5072 			sizeof(ipv6_route_table_template),
5073 			GFP_KERNEL);
5074 
5075 	if (table) {
5076 		table[0].data = &net->ipv6.sysctl.flush_delay;
5077 		table[0].extra1 = net;
5078 		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5079 		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
5080 		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5081 		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
5082 		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
5083 		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
5084 		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
5085 		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
5086 		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5087 
5088 		/* Don't export sysctls to unprivileged users */
5089 		if (net->user_ns != &init_user_ns)
5090 			table[0].procname = NULL;
5091 	}
5092 
5093 	return table;
5094 }
5095 #endif
5096 
5097 static int __net_init ip6_route_net_init(struct net *net)
5098 {
5099 	int ret = -ENOMEM;
5100 
5101 	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
5102 	       sizeof(net->ipv6.ip6_dst_ops));
5103 
5104 	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
5105 		goto out_ip6_dst_ops;
5106 
5107 	net->ipv6.fib6_null_entry = kmemdup(&fib6_null_entry_template,
5108 					    sizeof(*net->ipv6.fib6_null_entry),
5109 					    GFP_KERNEL);
5110 	if (!net->ipv6.fib6_null_entry)
5111 		goto out_ip6_dst_entries;
5112 
5113 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
5114 					   sizeof(*net->ipv6.ip6_null_entry),
5115 					   GFP_KERNEL);
5116 	if (!net->ipv6.ip6_null_entry)
5117 		goto out_fib6_null_entry;
5118 	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
5119 	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
5120 			 ip6_template_metrics, true);
5121 
5122 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5123 	net->ipv6.fib6_has_custom_rules = false;
5124 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
5125 					       sizeof(*net->ipv6.ip6_prohibit_entry),
5126 					       GFP_KERNEL);
5127 	if (!net->ipv6.ip6_prohibit_entry)
5128 		goto out_ip6_null_entry;
5129 	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
5130 	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
5131 			 ip6_template_metrics, true);
5132 
5133 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
5134 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
5135 					       GFP_KERNEL);
5136 	if (!net->ipv6.ip6_blk_hole_entry)
5137 		goto out_ip6_prohibit_entry;
5138 	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
5139 	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
5140 			 ip6_template_metrics, true);
5141 #endif
5142 
5143 	net->ipv6.sysctl.flush_delay = 0;
5144 	net->ipv6.sysctl.ip6_rt_max_size = 4096;
5145 	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
5146 	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
5147 	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
5148 	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
5149 	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
5150 	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
5151 
5152 	net->ipv6.ip6_rt_gc_expire = 30*HZ;
5153 
5154 	ret = 0;
5155 out:
5156 	return ret;
5157 
5158 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5159 out_ip6_prohibit_entry:
5160 	kfree(net->ipv6.ip6_prohibit_entry);
5161 out_ip6_null_entry:
5162 	kfree(net->ipv6.ip6_null_entry);
5163 #endif
5164 out_fib6_null_entry:
5165 	kfree(net->ipv6.fib6_null_entry);
5166 out_ip6_dst_entries:
5167 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
5168 out_ip6_dst_ops:
5169 	goto out;
5170 }
5171 
5172 static void __net_exit ip6_route_net_exit(struct net *net)
5173 {
5174 	kfree(net->ipv6.fib6_null_entry);
5175 	kfree(net->ipv6.ip6_null_entry);
5176 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5177 	kfree(net->ipv6.ip6_prohibit_entry);
5178 	kfree(net->ipv6.ip6_blk_hole_entry);
5179 #endif
5180 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
5181 }
5182 
5183 static int __net_init ip6_route_net_init_late(struct net *net)
5184 {
5185 #ifdef CONFIG_PROC_FS
5186 	proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
5187 	proc_create("rt6_stats", 0444, net->proc_net, &rt6_stats_seq_fops);
5188 #endif
5189 	return 0;
5190 }
5191 
5192 static void __net_exit ip6_route_net_exit_late(struct net *net)
5193 {
5194 #ifdef CONFIG_PROC_FS
5195 	remove_proc_entry("ipv6_route", net->proc_net);
5196 	remove_proc_entry("rt6_stats", net->proc_net);
5197 #endif
5198 }
5199 
5200 static struct pernet_operations ip6_route_net_ops = {
5201 	.init = ip6_route_net_init,
5202 	.exit = ip6_route_net_exit,
5203 };
5204 
5205 static int __net_init ipv6_inetpeer_init(struct net *net)
5206 {
5207 	struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
5208 
5209 	if (!bp)
5210 		return -ENOMEM;
5211 	inet_peer_base_init(bp);
5212 	net->ipv6.peers = bp;
5213 	return 0;
5214 }
5215 
5216 static void __net_exit ipv6_inetpeer_exit(struct net *net)
5217 {
5218 	struct inet_peer_base *bp = net->ipv6.peers;
5219 
5220 	net->ipv6.peers = NULL;
5221 	inetpeer_invalidate_tree(bp);
5222 	kfree(bp);
5223 }
5224 
5225 static struct pernet_operations ipv6_inetpeer_ops = {
5226 	.init	=	ipv6_inetpeer_init,
5227 	.exit	=	ipv6_inetpeer_exit,
5228 };
5229 
5230 static struct pernet_operations ip6_route_net_late_ops = {
5231 	.init = ip6_route_net_init_late,
5232 	.exit = ip6_route_net_exit_late,
5233 };
5234 
5235 static struct notifier_block ip6_route_dev_notifier = {
5236 	.notifier_call = ip6_route_dev_notify,
5237 	.priority = ADDRCONF_NOTIFY_PRIORITY - 10,
5238 };
5239 
5240 void __init ip6_route_init_special_entries(void)
5241 {
5242 	/* Registering of the loopback is done before this portion of code,
5243 	 * the loopback reference in rt6_info will not be taken, do it
5244 	 * manually for init_net */
5245 	init_net.ipv6.fib6_null_entry->fib6_nh.nh_dev = init_net.loopback_dev;
5246 	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
5247 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5248   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5249 	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
5250 	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5251 	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
5252 	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5253   #endif
5254 }
5255 
5256 int __init ip6_route_init(void)
5257 {
5258 	int ret;
5259 	int cpu;
5260 
5261 	ret = -ENOMEM;
5262 	ip6_dst_ops_template.kmem_cachep =
5263 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
5264 				  SLAB_HWCACHE_ALIGN, NULL);
5265 	if (!ip6_dst_ops_template.kmem_cachep)
5266 		goto out;
5267 
5268 	ret = dst_entries_init(&ip6_dst_blackhole_ops);
5269 	if (ret)
5270 		goto out_kmem_cache;
5271 
5272 	ret = register_pernet_subsys(&ipv6_inetpeer_ops);
5273 	if (ret)
5274 		goto out_dst_entries;
5275 
5276 	ret = register_pernet_subsys(&ip6_route_net_ops);
5277 	if (ret)
5278 		goto out_register_inetpeer;
5279 
5280 	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
5281 
5282 	ret = fib6_init();
5283 	if (ret)
5284 		goto out_register_subsys;
5285 
5286 	ret = xfrm6_init();
5287 	if (ret)
5288 		goto out_fib6_init;
5289 
5290 	ret = fib6_rules_init();
5291 	if (ret)
5292 		goto xfrm6_init;
5293 
5294 	ret = register_pernet_subsys(&ip6_route_net_late_ops);
5295 	if (ret)
5296 		goto fib6_rules_init;
5297 
5298 	ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE,
5299 				   inet6_rtm_newroute, NULL, 0);
5300 	if (ret < 0)
5301 		goto out_register_late_subsys;
5302 
5303 	ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE,
5304 				   inet6_rtm_delroute, NULL, 0);
5305 	if (ret < 0)
5306 		goto out_register_late_subsys;
5307 
5308 	ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE,
5309 				   inet6_rtm_getroute, NULL,
5310 				   RTNL_FLAG_DOIT_UNLOCKED);
5311 	if (ret < 0)
5312 		goto out_register_late_subsys;
5313 
5314 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
5315 	if (ret)
5316 		goto out_register_late_subsys;
5317 
5318 	for_each_possible_cpu(cpu) {
5319 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
5320 
5321 		INIT_LIST_HEAD(&ul->head);
5322 		spin_lock_init(&ul->lock);
5323 	}
5324 
5325 out:
5326 	return ret;
5327 
5328 out_register_late_subsys:
5329 	rtnl_unregister_all(PF_INET6);
5330 	unregister_pernet_subsys(&ip6_route_net_late_ops);
5331 fib6_rules_init:
5332 	fib6_rules_cleanup();
5333 xfrm6_init:
5334 	xfrm6_fini();
5335 out_fib6_init:
5336 	fib6_gc_cleanup();
5337 out_register_subsys:
5338 	unregister_pernet_subsys(&ip6_route_net_ops);
5339 out_register_inetpeer:
5340 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
5341 out_dst_entries:
5342 	dst_entries_destroy(&ip6_dst_blackhole_ops);
5343 out_kmem_cache:
5344 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
5345 	goto out;
5346 }
5347 
5348 void ip6_route_cleanup(void)
5349 {
5350 	unregister_netdevice_notifier(&ip6_route_dev_notifier);
5351 	unregister_pernet_subsys(&ip6_route_net_late_ops);
5352 	fib6_rules_cleanup();
5353 	xfrm6_fini();
5354 	fib6_gc_cleanup();
5355 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
5356 	unregister_pernet_subsys(&ip6_route_net_ops);
5357 	dst_entries_destroy(&ip6_dst_blackhole_ops);
5358 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
5359 }
5360