xref: /openbmc/linux/net/ipv6/route.c (revision bf070bb0)
1 /*
2  *	Linux INET6 implementation
3  *	FIB front-end.
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13 
14 /*	Changes:
15  *
16  *	YOSHIFUJI Hideaki @USAGI
17  *		reworked default router selection.
18  *		- respect outgoing interface
19  *		- select from (probably) reachable routers (i.e.
20  *		routers in REACHABLE, STALE, DELAY or PROBE states).
21  *		- always select the same router if it is (probably)
22  *		reachable.  otherwise, round-robin the list.
23  *	Ville Nuorvala
24  *		Fixed routing subtrees.
25  */
26 
27 #define pr_fmt(fmt) "IPv6: " fmt
28 
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <linux/jhash.h>
48 #include <net/net_namespace.h>
49 #include <net/snmp.h>
50 #include <net/ipv6.h>
51 #include <net/ip6_fib.h>
52 #include <net/ip6_route.h>
53 #include <net/ndisc.h>
54 #include <net/addrconf.h>
55 #include <net/tcp.h>
56 #include <linux/rtnetlink.h>
57 #include <net/dst.h>
58 #include <net/dst_metadata.h>
59 #include <net/xfrm.h>
60 #include <net/netevent.h>
61 #include <net/netlink.h>
62 #include <net/nexthop.h>
63 #include <net/lwtunnel.h>
64 #include <net/ip_tunnels.h>
65 #include <net/l3mdev.h>
66 #include <trace/events/fib6.h>
67 
68 #include <linux/uaccess.h>
69 
70 #ifdef CONFIG_SYSCTL
71 #include <linux/sysctl.h>
72 #endif
73 
74 enum rt6_nud_state {
75 	RT6_NUD_FAIL_HARD = -3,
76 	RT6_NUD_FAIL_PROBE = -2,
77 	RT6_NUD_FAIL_DO_RR = -1,
78 	RT6_NUD_SUCCEED = 1
79 };
80 
81 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
82 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
83 static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
84 static unsigned int	 ip6_mtu(const struct dst_entry *dst);
85 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
86 static void		ip6_dst_destroy(struct dst_entry *);
87 static void		ip6_dst_ifdown(struct dst_entry *,
88 				       struct net_device *dev, int how);
89 static int		 ip6_dst_gc(struct dst_ops *ops);
90 
91 static int		ip6_pkt_discard(struct sk_buff *skb);
92 static int		ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
93 static int		ip6_pkt_prohibit(struct sk_buff *skb);
94 static int		ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
95 static void		ip6_link_failure(struct sk_buff *skb);
96 static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
97 					   struct sk_buff *skb, u32 mtu);
98 static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
99 					struct sk_buff *skb);
100 static void		rt6_dst_from_metrics_check(struct rt6_info *rt);
101 static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
102 static size_t rt6_nlmsg_size(struct rt6_info *rt);
103 static int rt6_fill_node(struct net *net,
104 			 struct sk_buff *skb, struct rt6_info *rt,
105 			 struct in6_addr *dst, struct in6_addr *src,
106 			 int iif, int type, u32 portid, u32 seq,
107 			 unsigned int flags);
108 static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
109 					   struct in6_addr *daddr,
110 					   struct in6_addr *saddr);
111 
112 #ifdef CONFIG_IPV6_ROUTE_INFO
113 static struct rt6_info *rt6_add_route_info(struct net *net,
114 					   const struct in6_addr *prefix, int prefixlen,
115 					   const struct in6_addr *gwaddr,
116 					   struct net_device *dev,
117 					   unsigned int pref);
118 static struct rt6_info *rt6_get_route_info(struct net *net,
119 					   const struct in6_addr *prefix, int prefixlen,
120 					   const struct in6_addr *gwaddr,
121 					   struct net_device *dev);
122 #endif
123 
124 struct uncached_list {
125 	spinlock_t		lock;
126 	struct list_head	head;
127 };
128 
129 static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
130 
131 static void rt6_uncached_list_add(struct rt6_info *rt)
132 {
133 	struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
134 
135 	rt->rt6i_uncached_list = ul;
136 
137 	spin_lock_bh(&ul->lock);
138 	list_add_tail(&rt->rt6i_uncached, &ul->head);
139 	spin_unlock_bh(&ul->lock);
140 }
141 
142 static void rt6_uncached_list_del(struct rt6_info *rt)
143 {
144 	if (!list_empty(&rt->rt6i_uncached)) {
145 		struct uncached_list *ul = rt->rt6i_uncached_list;
146 		struct net *net = dev_net(rt->dst.dev);
147 
148 		spin_lock_bh(&ul->lock);
149 		list_del(&rt->rt6i_uncached);
150 		atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache);
151 		spin_unlock_bh(&ul->lock);
152 	}
153 }
154 
155 static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
156 {
157 	struct net_device *loopback_dev = net->loopback_dev;
158 	int cpu;
159 
160 	if (dev == loopback_dev)
161 		return;
162 
163 	for_each_possible_cpu(cpu) {
164 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
165 		struct rt6_info *rt;
166 
167 		spin_lock_bh(&ul->lock);
168 		list_for_each_entry(rt, &ul->head, rt6i_uncached) {
169 			struct inet6_dev *rt_idev = rt->rt6i_idev;
170 			struct net_device *rt_dev = rt->dst.dev;
171 
172 			if (rt_idev->dev == dev) {
173 				rt->rt6i_idev = in6_dev_get(loopback_dev);
174 				in6_dev_put(rt_idev);
175 			}
176 
177 			if (rt_dev == dev) {
178 				rt->dst.dev = loopback_dev;
179 				dev_hold(rt->dst.dev);
180 				dev_put(rt_dev);
181 			}
182 		}
183 		spin_unlock_bh(&ul->lock);
184 	}
185 }
186 
187 static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
188 {
189 	return dst_metrics_write_ptr(rt->dst.from);
190 }
191 
192 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
193 {
194 	struct rt6_info *rt = (struct rt6_info *)dst;
195 
196 	if (rt->rt6i_flags & RTF_PCPU)
197 		return rt6_pcpu_cow_metrics(rt);
198 	else if (rt->rt6i_flags & RTF_CACHE)
199 		return NULL;
200 	else
201 		return dst_cow_metrics_generic(dst, old);
202 }
203 
204 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
205 					     struct sk_buff *skb,
206 					     const void *daddr)
207 {
208 	struct in6_addr *p = &rt->rt6i_gateway;
209 
210 	if (!ipv6_addr_any(p))
211 		return (const void *) p;
212 	else if (skb)
213 		return &ipv6_hdr(skb)->daddr;
214 	return daddr;
215 }
216 
217 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
218 					  struct sk_buff *skb,
219 					  const void *daddr)
220 {
221 	struct rt6_info *rt = (struct rt6_info *) dst;
222 	struct neighbour *n;
223 
224 	daddr = choose_neigh_daddr(rt, skb, daddr);
225 	n = __ipv6_neigh_lookup(dst->dev, daddr);
226 	if (n)
227 		return n;
228 	return neigh_create(&nd_tbl, daddr, dst->dev);
229 }
230 
231 static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
232 {
233 	struct net_device *dev = dst->dev;
234 	struct rt6_info *rt = (struct rt6_info *)dst;
235 
236 	daddr = choose_neigh_daddr(rt, NULL, daddr);
237 	if (!daddr)
238 		return;
239 	if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
240 		return;
241 	if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
242 		return;
243 	__ipv6_confirm_neigh(dev, daddr);
244 }
245 
246 static struct dst_ops ip6_dst_ops_template = {
247 	.family			=	AF_INET6,
248 	.gc			=	ip6_dst_gc,
249 	.gc_thresh		=	1024,
250 	.check			=	ip6_dst_check,
251 	.default_advmss		=	ip6_default_advmss,
252 	.mtu			=	ip6_mtu,
253 	.cow_metrics		=	ipv6_cow_metrics,
254 	.destroy		=	ip6_dst_destroy,
255 	.ifdown			=	ip6_dst_ifdown,
256 	.negative_advice	=	ip6_negative_advice,
257 	.link_failure		=	ip6_link_failure,
258 	.update_pmtu		=	ip6_rt_update_pmtu,
259 	.redirect		=	rt6_do_redirect,
260 	.local_out		=	__ip6_local_out,
261 	.neigh_lookup		=	ip6_neigh_lookup,
262 	.confirm_neigh		=	ip6_confirm_neigh,
263 };
264 
265 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
266 {
267 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
268 
269 	return mtu ? : dst->dev->mtu;
270 }
271 
272 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
273 					 struct sk_buff *skb, u32 mtu)
274 {
275 }
276 
277 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
278 				      struct sk_buff *skb)
279 {
280 }
281 
282 static struct dst_ops ip6_dst_blackhole_ops = {
283 	.family			=	AF_INET6,
284 	.destroy		=	ip6_dst_destroy,
285 	.check			=	ip6_dst_check,
286 	.mtu			=	ip6_blackhole_mtu,
287 	.default_advmss		=	ip6_default_advmss,
288 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
289 	.redirect		=	ip6_rt_blackhole_redirect,
290 	.cow_metrics		=	dst_cow_metrics_generic,
291 	.neigh_lookup		=	ip6_neigh_lookup,
292 };
293 
294 static const u32 ip6_template_metrics[RTAX_MAX] = {
295 	[RTAX_HOPLIMIT - 1] = 0,
296 };
297 
298 static const struct rt6_info ip6_null_entry_template = {
299 	.dst = {
300 		.__refcnt	= ATOMIC_INIT(1),
301 		.__use		= 1,
302 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
303 		.error		= -ENETUNREACH,
304 		.input		= ip6_pkt_discard,
305 		.output		= ip6_pkt_discard_out,
306 	},
307 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
308 	.rt6i_protocol  = RTPROT_KERNEL,
309 	.rt6i_metric	= ~(u32) 0,
310 	.rt6i_ref	= ATOMIC_INIT(1),
311 };
312 
313 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
314 
315 static const struct rt6_info ip6_prohibit_entry_template = {
316 	.dst = {
317 		.__refcnt	= ATOMIC_INIT(1),
318 		.__use		= 1,
319 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
320 		.error		= -EACCES,
321 		.input		= ip6_pkt_prohibit,
322 		.output		= ip6_pkt_prohibit_out,
323 	},
324 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
325 	.rt6i_protocol  = RTPROT_KERNEL,
326 	.rt6i_metric	= ~(u32) 0,
327 	.rt6i_ref	= ATOMIC_INIT(1),
328 };
329 
330 static const struct rt6_info ip6_blk_hole_entry_template = {
331 	.dst = {
332 		.__refcnt	= ATOMIC_INIT(1),
333 		.__use		= 1,
334 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
335 		.error		= -EINVAL,
336 		.input		= dst_discard,
337 		.output		= dst_discard_out,
338 	},
339 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
340 	.rt6i_protocol  = RTPROT_KERNEL,
341 	.rt6i_metric	= ~(u32) 0,
342 	.rt6i_ref	= ATOMIC_INIT(1),
343 };
344 
345 #endif
346 
347 static void rt6_info_init(struct rt6_info *rt)
348 {
349 	struct dst_entry *dst = &rt->dst;
350 
351 	memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
352 	INIT_LIST_HEAD(&rt->rt6i_siblings);
353 	INIT_LIST_HEAD(&rt->rt6i_uncached);
354 }
355 
356 /* allocate dst with ip6_dst_ops */
357 static struct rt6_info *__ip6_dst_alloc(struct net *net,
358 					struct net_device *dev,
359 					int flags)
360 {
361 	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
362 					1, DST_OBSOLETE_FORCE_CHK, flags);
363 
364 	if (rt) {
365 		rt6_info_init(rt);
366 		atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
367 	}
368 
369 	return rt;
370 }
371 
372 struct rt6_info *ip6_dst_alloc(struct net *net,
373 			       struct net_device *dev,
374 			       int flags)
375 {
376 	struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
377 
378 	if (rt) {
379 		rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
380 		if (!rt->rt6i_pcpu) {
381 			dst_release_immediate(&rt->dst);
382 			return NULL;
383 		}
384 	}
385 
386 	return rt;
387 }
388 EXPORT_SYMBOL(ip6_dst_alloc);
389 
390 static void ip6_dst_destroy(struct dst_entry *dst)
391 {
392 	struct rt6_info *rt = (struct rt6_info *)dst;
393 	struct rt6_exception_bucket *bucket;
394 	struct dst_entry *from = dst->from;
395 	struct inet6_dev *idev;
396 
397 	dst_destroy_metrics_generic(dst);
398 	free_percpu(rt->rt6i_pcpu);
399 	rt6_uncached_list_del(rt);
400 
401 	idev = rt->rt6i_idev;
402 	if (idev) {
403 		rt->rt6i_idev = NULL;
404 		in6_dev_put(idev);
405 	}
406 	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, 1);
407 	if (bucket) {
408 		rt->rt6i_exception_bucket = NULL;
409 		kfree(bucket);
410 	}
411 
412 	dst->from = NULL;
413 	dst_release(from);
414 }
415 
416 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
417 			   int how)
418 {
419 	struct rt6_info *rt = (struct rt6_info *)dst;
420 	struct inet6_dev *idev = rt->rt6i_idev;
421 	struct net_device *loopback_dev =
422 		dev_net(dev)->loopback_dev;
423 
424 	if (idev && idev->dev != loopback_dev) {
425 		struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
426 		if (loopback_idev) {
427 			rt->rt6i_idev = loopback_idev;
428 			in6_dev_put(idev);
429 		}
430 	}
431 }
432 
433 static bool __rt6_check_expired(const struct rt6_info *rt)
434 {
435 	if (rt->rt6i_flags & RTF_EXPIRES)
436 		return time_after(jiffies, rt->dst.expires);
437 	else
438 		return false;
439 }
440 
441 static bool rt6_check_expired(const struct rt6_info *rt)
442 {
443 	if (rt->rt6i_flags & RTF_EXPIRES) {
444 		if (time_after(jiffies, rt->dst.expires))
445 			return true;
446 	} else if (rt->dst.from) {
447 		return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
448 		       rt6_check_expired((struct rt6_info *)rt->dst.from);
449 	}
450 	return false;
451 }
452 
453 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
454 					     struct flowi6 *fl6, int oif,
455 					     int strict)
456 {
457 	struct rt6_info *sibling, *next_sibling;
458 	int route_choosen;
459 
460 	/* We might have already computed the hash for ICMPv6 errors. In such
461 	 * case it will always be non-zero. Otherwise now is the time to do it.
462 	 */
463 	if (!fl6->mp_hash)
464 		fl6->mp_hash = rt6_multipath_hash(fl6, NULL);
465 
466 	route_choosen = fl6->mp_hash % (match->rt6i_nsiblings + 1);
467 	/* Don't change the route, if route_choosen == 0
468 	 * (siblings does not include ourself)
469 	 */
470 	if (route_choosen)
471 		list_for_each_entry_safe(sibling, next_sibling,
472 				&match->rt6i_siblings, rt6i_siblings) {
473 			route_choosen--;
474 			if (route_choosen == 0) {
475 				if (rt6_score_route(sibling, oif, strict) < 0)
476 					break;
477 				match = sibling;
478 				break;
479 			}
480 		}
481 	return match;
482 }
483 
484 /*
485  *	Route lookup. rcu_read_lock() should be held.
486  */
487 
488 static inline struct rt6_info *rt6_device_match(struct net *net,
489 						    struct rt6_info *rt,
490 						    const struct in6_addr *saddr,
491 						    int oif,
492 						    int flags)
493 {
494 	struct rt6_info *local = NULL;
495 	struct rt6_info *sprt;
496 
497 	if (!oif && ipv6_addr_any(saddr))
498 		goto out;
499 
500 	for (sprt = rt; sprt; sprt = rcu_dereference(sprt->dst.rt6_next)) {
501 		struct net_device *dev = sprt->dst.dev;
502 
503 		if (oif) {
504 			if (dev->ifindex == oif)
505 				return sprt;
506 			if (dev->flags & IFF_LOOPBACK) {
507 				if (!sprt->rt6i_idev ||
508 				    sprt->rt6i_idev->dev->ifindex != oif) {
509 					if (flags & RT6_LOOKUP_F_IFACE)
510 						continue;
511 					if (local &&
512 					    local->rt6i_idev->dev->ifindex == oif)
513 						continue;
514 				}
515 				local = sprt;
516 			}
517 		} else {
518 			if (ipv6_chk_addr(net, saddr, dev,
519 					  flags & RT6_LOOKUP_F_IFACE))
520 				return sprt;
521 		}
522 	}
523 
524 	if (oif) {
525 		if (local)
526 			return local;
527 
528 		if (flags & RT6_LOOKUP_F_IFACE)
529 			return net->ipv6.ip6_null_entry;
530 	}
531 out:
532 	return rt;
533 }
534 
535 #ifdef CONFIG_IPV6_ROUTER_PREF
536 struct __rt6_probe_work {
537 	struct work_struct work;
538 	struct in6_addr target;
539 	struct net_device *dev;
540 };
541 
542 static void rt6_probe_deferred(struct work_struct *w)
543 {
544 	struct in6_addr mcaddr;
545 	struct __rt6_probe_work *work =
546 		container_of(w, struct __rt6_probe_work, work);
547 
548 	addrconf_addr_solict_mult(&work->target, &mcaddr);
549 	ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
550 	dev_put(work->dev);
551 	kfree(work);
552 }
553 
554 static void rt6_probe(struct rt6_info *rt)
555 {
556 	struct __rt6_probe_work *work;
557 	struct neighbour *neigh;
558 	/*
559 	 * Okay, this does not seem to be appropriate
560 	 * for now, however, we need to check if it
561 	 * is really so; aka Router Reachability Probing.
562 	 *
563 	 * Router Reachability Probe MUST be rate-limited
564 	 * to no more than one per minute.
565 	 */
566 	if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
567 		return;
568 	rcu_read_lock_bh();
569 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
570 	if (neigh) {
571 		if (neigh->nud_state & NUD_VALID)
572 			goto out;
573 
574 		work = NULL;
575 		write_lock(&neigh->lock);
576 		if (!(neigh->nud_state & NUD_VALID) &&
577 		    time_after(jiffies,
578 			       neigh->updated +
579 			       rt->rt6i_idev->cnf.rtr_probe_interval)) {
580 			work = kmalloc(sizeof(*work), GFP_ATOMIC);
581 			if (work)
582 				__neigh_set_probe_once(neigh);
583 		}
584 		write_unlock(&neigh->lock);
585 	} else {
586 		work = kmalloc(sizeof(*work), GFP_ATOMIC);
587 	}
588 
589 	if (work) {
590 		INIT_WORK(&work->work, rt6_probe_deferred);
591 		work->target = rt->rt6i_gateway;
592 		dev_hold(rt->dst.dev);
593 		work->dev = rt->dst.dev;
594 		schedule_work(&work->work);
595 	}
596 
597 out:
598 	rcu_read_unlock_bh();
599 }
600 #else
601 static inline void rt6_probe(struct rt6_info *rt)
602 {
603 }
604 #endif
605 
606 /*
607  * Default Router Selection (RFC 2461 6.3.6)
608  */
609 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
610 {
611 	struct net_device *dev = rt->dst.dev;
612 	if (!oif || dev->ifindex == oif)
613 		return 2;
614 	if ((dev->flags & IFF_LOOPBACK) &&
615 	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
616 		return 1;
617 	return 0;
618 }
619 
620 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
621 {
622 	struct neighbour *neigh;
623 	enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
624 
625 	if (rt->rt6i_flags & RTF_NONEXTHOP ||
626 	    !(rt->rt6i_flags & RTF_GATEWAY))
627 		return RT6_NUD_SUCCEED;
628 
629 	rcu_read_lock_bh();
630 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
631 	if (neigh) {
632 		read_lock(&neigh->lock);
633 		if (neigh->nud_state & NUD_VALID)
634 			ret = RT6_NUD_SUCCEED;
635 #ifdef CONFIG_IPV6_ROUTER_PREF
636 		else if (!(neigh->nud_state & NUD_FAILED))
637 			ret = RT6_NUD_SUCCEED;
638 		else
639 			ret = RT6_NUD_FAIL_PROBE;
640 #endif
641 		read_unlock(&neigh->lock);
642 	} else {
643 		ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
644 		      RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
645 	}
646 	rcu_read_unlock_bh();
647 
648 	return ret;
649 }
650 
651 static int rt6_score_route(struct rt6_info *rt, int oif,
652 			   int strict)
653 {
654 	int m;
655 
656 	m = rt6_check_dev(rt, oif);
657 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
658 		return RT6_NUD_FAIL_HARD;
659 #ifdef CONFIG_IPV6_ROUTER_PREF
660 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
661 #endif
662 	if (strict & RT6_LOOKUP_F_REACHABLE) {
663 		int n = rt6_check_neigh(rt);
664 		if (n < 0)
665 			return n;
666 	}
667 	return m;
668 }
669 
670 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
671 				   int *mpri, struct rt6_info *match,
672 				   bool *do_rr)
673 {
674 	int m;
675 	bool match_do_rr = false;
676 	struct inet6_dev *idev = rt->rt6i_idev;
677 	struct net_device *dev = rt->dst.dev;
678 
679 	if (dev && !netif_carrier_ok(dev) &&
680 	    idev->cnf.ignore_routes_with_linkdown &&
681 	    !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
682 		goto out;
683 
684 	if (rt6_check_expired(rt))
685 		goto out;
686 
687 	m = rt6_score_route(rt, oif, strict);
688 	if (m == RT6_NUD_FAIL_DO_RR) {
689 		match_do_rr = true;
690 		m = 0; /* lowest valid score */
691 	} else if (m == RT6_NUD_FAIL_HARD) {
692 		goto out;
693 	}
694 
695 	if (strict & RT6_LOOKUP_F_REACHABLE)
696 		rt6_probe(rt);
697 
698 	/* note that m can be RT6_NUD_FAIL_PROBE at this point */
699 	if (m > *mpri) {
700 		*do_rr = match_do_rr;
701 		*mpri = m;
702 		match = rt;
703 	}
704 out:
705 	return match;
706 }
707 
708 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
709 				     struct rt6_info *leaf,
710 				     struct rt6_info *rr_head,
711 				     u32 metric, int oif, int strict,
712 				     bool *do_rr)
713 {
714 	struct rt6_info *rt, *match, *cont;
715 	int mpri = -1;
716 
717 	match = NULL;
718 	cont = NULL;
719 	for (rt = rr_head; rt; rt = rcu_dereference(rt->dst.rt6_next)) {
720 		if (rt->rt6i_metric != metric) {
721 			cont = rt;
722 			break;
723 		}
724 
725 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
726 	}
727 
728 	for (rt = leaf; rt && rt != rr_head;
729 	     rt = rcu_dereference(rt->dst.rt6_next)) {
730 		if (rt->rt6i_metric != metric) {
731 			cont = rt;
732 			break;
733 		}
734 
735 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
736 	}
737 
738 	if (match || !cont)
739 		return match;
740 
741 	for (rt = cont; rt; rt = rcu_dereference(rt->dst.rt6_next))
742 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
743 
744 	return match;
745 }
746 
747 static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn,
748 				   int oif, int strict)
749 {
750 	struct rt6_info *leaf = rcu_dereference(fn->leaf);
751 	struct rt6_info *match, *rt0;
752 	bool do_rr = false;
753 	int key_plen;
754 
755 	if (!leaf || leaf == net->ipv6.ip6_null_entry)
756 		return net->ipv6.ip6_null_entry;
757 
758 	rt0 = rcu_dereference(fn->rr_ptr);
759 	if (!rt0)
760 		rt0 = leaf;
761 
762 	/* Double check to make sure fn is not an intermediate node
763 	 * and fn->leaf does not points to its child's leaf
764 	 * (This might happen if all routes under fn are deleted from
765 	 * the tree and fib6_repair_tree() is called on the node.)
766 	 */
767 	key_plen = rt0->rt6i_dst.plen;
768 #ifdef CONFIG_IPV6_SUBTREES
769 	if (rt0->rt6i_src.plen)
770 		key_plen = rt0->rt6i_src.plen;
771 #endif
772 	if (fn->fn_bit != key_plen)
773 		return net->ipv6.ip6_null_entry;
774 
775 	match = find_rr_leaf(fn, leaf, rt0, rt0->rt6i_metric, oif, strict,
776 			     &do_rr);
777 
778 	if (do_rr) {
779 		struct rt6_info *next = rcu_dereference(rt0->dst.rt6_next);
780 
781 		/* no entries matched; do round-robin */
782 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
783 			next = leaf;
784 
785 		if (next != rt0) {
786 			spin_lock_bh(&leaf->rt6i_table->tb6_lock);
787 			/* make sure next is not being deleted from the tree */
788 			if (next->rt6i_node)
789 				rcu_assign_pointer(fn->rr_ptr, next);
790 			spin_unlock_bh(&leaf->rt6i_table->tb6_lock);
791 		}
792 	}
793 
794 	return match ? match : net->ipv6.ip6_null_entry;
795 }
796 
797 static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
798 {
799 	return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
800 }
801 
802 #ifdef CONFIG_IPV6_ROUTE_INFO
803 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
804 		  const struct in6_addr *gwaddr)
805 {
806 	struct net *net = dev_net(dev);
807 	struct route_info *rinfo = (struct route_info *) opt;
808 	struct in6_addr prefix_buf, *prefix;
809 	unsigned int pref;
810 	unsigned long lifetime;
811 	struct rt6_info *rt;
812 
813 	if (len < sizeof(struct route_info)) {
814 		return -EINVAL;
815 	}
816 
817 	/* Sanity check for prefix_len and length */
818 	if (rinfo->length > 3) {
819 		return -EINVAL;
820 	} else if (rinfo->prefix_len > 128) {
821 		return -EINVAL;
822 	} else if (rinfo->prefix_len > 64) {
823 		if (rinfo->length < 2) {
824 			return -EINVAL;
825 		}
826 	} else if (rinfo->prefix_len > 0) {
827 		if (rinfo->length < 1) {
828 			return -EINVAL;
829 		}
830 	}
831 
832 	pref = rinfo->route_pref;
833 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
834 		return -EINVAL;
835 
836 	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
837 
838 	if (rinfo->length == 3)
839 		prefix = (struct in6_addr *)rinfo->prefix;
840 	else {
841 		/* this function is safe */
842 		ipv6_addr_prefix(&prefix_buf,
843 				 (struct in6_addr *)rinfo->prefix,
844 				 rinfo->prefix_len);
845 		prefix = &prefix_buf;
846 	}
847 
848 	if (rinfo->prefix_len == 0)
849 		rt = rt6_get_dflt_router(gwaddr, dev);
850 	else
851 		rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
852 					gwaddr, dev);
853 
854 	if (rt && !lifetime) {
855 		ip6_del_rt(rt);
856 		rt = NULL;
857 	}
858 
859 	if (!rt && lifetime)
860 		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
861 					dev, pref);
862 	else if (rt)
863 		rt->rt6i_flags = RTF_ROUTEINFO |
864 				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
865 
866 	if (rt) {
867 		if (!addrconf_finite_timeout(lifetime))
868 			rt6_clean_expires(rt);
869 		else
870 			rt6_set_expires(rt, jiffies + HZ * lifetime);
871 
872 		ip6_rt_put(rt);
873 	}
874 	return 0;
875 }
876 #endif
877 
878 static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
879 					struct in6_addr *saddr)
880 {
881 	struct fib6_node *pn, *sn;
882 	while (1) {
883 		if (fn->fn_flags & RTN_TL_ROOT)
884 			return NULL;
885 		pn = rcu_dereference(fn->parent);
886 		sn = FIB6_SUBTREE(pn);
887 		if (sn && sn != fn)
888 			fn = fib6_lookup(sn, NULL, saddr);
889 		else
890 			fn = pn;
891 		if (fn->fn_flags & RTN_RTINFO)
892 			return fn;
893 	}
894 }
895 
896 static bool ip6_hold_safe(struct net *net, struct rt6_info **prt,
897 			  bool null_fallback)
898 {
899 	struct rt6_info *rt = *prt;
900 
901 	if (dst_hold_safe(&rt->dst))
902 		return true;
903 	if (null_fallback) {
904 		rt = net->ipv6.ip6_null_entry;
905 		dst_hold(&rt->dst);
906 	} else {
907 		rt = NULL;
908 	}
909 	*prt = rt;
910 	return false;
911 }
912 
913 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
914 					     struct fib6_table *table,
915 					     struct flowi6 *fl6, int flags)
916 {
917 	struct rt6_info *rt, *rt_cache;
918 	struct fib6_node *fn;
919 
920 	rcu_read_lock();
921 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
922 restart:
923 	rt = rcu_dereference(fn->leaf);
924 	if (!rt) {
925 		rt = net->ipv6.ip6_null_entry;
926 	} else {
927 		rt = rt6_device_match(net, rt, &fl6->saddr,
928 				      fl6->flowi6_oif, flags);
929 		if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
930 			rt = rt6_multipath_select(rt, fl6,
931 						  fl6->flowi6_oif, flags);
932 	}
933 	if (rt == net->ipv6.ip6_null_entry) {
934 		fn = fib6_backtrack(fn, &fl6->saddr);
935 		if (fn)
936 			goto restart;
937 	}
938 	/* Search through exception table */
939 	rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr);
940 	if (rt_cache)
941 		rt = rt_cache;
942 
943 	if (ip6_hold_safe(net, &rt, true))
944 		dst_use_noref(&rt->dst, jiffies);
945 
946 	rcu_read_unlock();
947 
948 	trace_fib6_table_lookup(net, rt, table, fl6);
949 
950 	return rt;
951 
952 }
953 
954 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
955 				    int flags)
956 {
957 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
958 }
959 EXPORT_SYMBOL_GPL(ip6_route_lookup);
960 
961 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
962 			    const struct in6_addr *saddr, int oif, int strict)
963 {
964 	struct flowi6 fl6 = {
965 		.flowi6_oif = oif,
966 		.daddr = *daddr,
967 	};
968 	struct dst_entry *dst;
969 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
970 
971 	if (saddr) {
972 		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
973 		flags |= RT6_LOOKUP_F_HAS_SADDR;
974 	}
975 
976 	dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
977 	if (dst->error == 0)
978 		return (struct rt6_info *) dst;
979 
980 	dst_release(dst);
981 
982 	return NULL;
983 }
984 EXPORT_SYMBOL(rt6_lookup);
985 
986 /* ip6_ins_rt is called with FREE table->tb6_lock.
987  * It takes new route entry, the addition fails by any reason the
988  * route is released.
989  * Caller must hold dst before calling it.
990  */
991 
992 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
993 			struct mx6_config *mxc,
994 			struct netlink_ext_ack *extack)
995 {
996 	int err;
997 	struct fib6_table *table;
998 
999 	table = rt->rt6i_table;
1000 	spin_lock_bh(&table->tb6_lock);
1001 	err = fib6_add(&table->tb6_root, rt, info, mxc, extack);
1002 	spin_unlock_bh(&table->tb6_lock);
1003 
1004 	return err;
1005 }
1006 
1007 int ip6_ins_rt(struct rt6_info *rt)
1008 {
1009 	struct nl_info info = {	.nl_net = dev_net(rt->dst.dev), };
1010 	struct mx6_config mxc = { .mx = NULL, };
1011 
1012 	/* Hold dst to account for the reference from the fib6 tree */
1013 	dst_hold(&rt->dst);
1014 	return __ip6_ins_rt(rt, &info, &mxc, NULL);
1015 }
1016 
1017 /* called with rcu_lock held */
1018 static struct net_device *ip6_rt_get_dev_rcu(struct rt6_info *rt)
1019 {
1020 	struct net_device *dev = rt->dst.dev;
1021 
1022 	if (rt->rt6i_flags & RTF_LOCAL) {
1023 		/* for copies of local routes, dst->dev needs to be the
1024 		 * device if it is a master device, the master device if
1025 		 * device is enslaved, and the loopback as the default
1026 		 */
1027 		if (netif_is_l3_slave(dev) &&
1028 		    !rt6_need_strict(&rt->rt6i_dst.addr))
1029 			dev = l3mdev_master_dev_rcu(dev);
1030 		else if (!netif_is_l3_master(dev))
1031 			dev = dev_net(dev)->loopback_dev;
1032 		/* last case is netif_is_l3_master(dev) is true in which
1033 		 * case we want dev returned to be dev
1034 		 */
1035 	}
1036 
1037 	return dev;
1038 }
1039 
1040 static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
1041 					   const struct in6_addr *daddr,
1042 					   const struct in6_addr *saddr)
1043 {
1044 	struct net_device *dev;
1045 	struct rt6_info *rt;
1046 
1047 	/*
1048 	 *	Clone the route.
1049 	 */
1050 
1051 	if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
1052 		ort = (struct rt6_info *)ort->dst.from;
1053 
1054 	rcu_read_lock();
1055 	dev = ip6_rt_get_dev_rcu(ort);
1056 	rt = __ip6_dst_alloc(dev_net(dev), dev, 0);
1057 	rcu_read_unlock();
1058 	if (!rt)
1059 		return NULL;
1060 
1061 	ip6_rt_copy_init(rt, ort);
1062 	rt->rt6i_flags |= RTF_CACHE;
1063 	rt->rt6i_metric = 0;
1064 	rt->dst.flags |= DST_HOST;
1065 	rt->rt6i_dst.addr = *daddr;
1066 	rt->rt6i_dst.plen = 128;
1067 
1068 	if (!rt6_is_gw_or_nonexthop(ort)) {
1069 		if (ort->rt6i_dst.plen != 128 &&
1070 		    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
1071 			rt->rt6i_flags |= RTF_ANYCAST;
1072 #ifdef CONFIG_IPV6_SUBTREES
1073 		if (rt->rt6i_src.plen && saddr) {
1074 			rt->rt6i_src.addr = *saddr;
1075 			rt->rt6i_src.plen = 128;
1076 		}
1077 #endif
1078 	}
1079 
1080 	return rt;
1081 }
1082 
1083 static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
1084 {
1085 	struct net_device *dev;
1086 	struct rt6_info *pcpu_rt;
1087 
1088 	rcu_read_lock();
1089 	dev = ip6_rt_get_dev_rcu(rt);
1090 	pcpu_rt = __ip6_dst_alloc(dev_net(dev), dev, rt->dst.flags);
1091 	rcu_read_unlock();
1092 	if (!pcpu_rt)
1093 		return NULL;
1094 	ip6_rt_copy_init(pcpu_rt, rt);
1095 	pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
1096 	pcpu_rt->rt6i_flags |= RTF_PCPU;
1097 	return pcpu_rt;
1098 }
1099 
1100 /* It should be called with rcu_read_lock() acquired */
1101 static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
1102 {
1103 	struct rt6_info *pcpu_rt, **p;
1104 
1105 	p = this_cpu_ptr(rt->rt6i_pcpu);
1106 	pcpu_rt = *p;
1107 
1108 	if (pcpu_rt && ip6_hold_safe(NULL, &pcpu_rt, false))
1109 		rt6_dst_from_metrics_check(pcpu_rt);
1110 
1111 	return pcpu_rt;
1112 }
1113 
1114 static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
1115 {
1116 	struct rt6_info *pcpu_rt, *prev, **p;
1117 
1118 	pcpu_rt = ip6_rt_pcpu_alloc(rt);
1119 	if (!pcpu_rt) {
1120 		struct net *net = dev_net(rt->dst.dev);
1121 
1122 		dst_hold(&net->ipv6.ip6_null_entry->dst);
1123 		return net->ipv6.ip6_null_entry;
1124 	}
1125 
1126 	dst_hold(&pcpu_rt->dst);
1127 	p = this_cpu_ptr(rt->rt6i_pcpu);
1128 	prev = cmpxchg(p, NULL, pcpu_rt);
1129 	BUG_ON(prev);
1130 
1131 	rt6_dst_from_metrics_check(pcpu_rt);
1132 	return pcpu_rt;
1133 }
1134 
1135 /* exception hash table implementation
1136  */
1137 static DEFINE_SPINLOCK(rt6_exception_lock);
1138 
1139 /* Remove rt6_ex from hash table and free the memory
1140  * Caller must hold rt6_exception_lock
1141  */
1142 static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
1143 				 struct rt6_exception *rt6_ex)
1144 {
1145 	struct net *net;
1146 
1147 	if (!bucket || !rt6_ex)
1148 		return;
1149 
1150 	net = dev_net(rt6_ex->rt6i->dst.dev);
1151 	rt6_ex->rt6i->rt6i_node = NULL;
1152 	hlist_del_rcu(&rt6_ex->hlist);
1153 	rt6_release(rt6_ex->rt6i);
1154 	kfree_rcu(rt6_ex, rcu);
1155 	WARN_ON_ONCE(!bucket->depth);
1156 	bucket->depth--;
1157 	net->ipv6.rt6_stats->fib_rt_cache--;
1158 }
1159 
1160 /* Remove oldest rt6_ex in bucket and free the memory
1161  * Caller must hold rt6_exception_lock
1162  */
1163 static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
1164 {
1165 	struct rt6_exception *rt6_ex, *oldest = NULL;
1166 
1167 	if (!bucket)
1168 		return;
1169 
1170 	hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1171 		if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
1172 			oldest = rt6_ex;
1173 	}
1174 	rt6_remove_exception(bucket, oldest);
1175 }
1176 
1177 static u32 rt6_exception_hash(const struct in6_addr *dst,
1178 			      const struct in6_addr *src)
1179 {
1180 	static u32 seed __read_mostly;
1181 	u32 val;
1182 
1183 	net_get_random_once(&seed, sizeof(seed));
1184 	val = jhash(dst, sizeof(*dst), seed);
1185 
1186 #ifdef CONFIG_IPV6_SUBTREES
1187 	if (src)
1188 		val = jhash(src, sizeof(*src), val);
1189 #endif
1190 	return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
1191 }
1192 
1193 /* Helper function to find the cached rt in the hash table
1194  * and update bucket pointer to point to the bucket for this
1195  * (daddr, saddr) pair
1196  * Caller must hold rt6_exception_lock
1197  */
1198 static struct rt6_exception *
1199 __rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
1200 			      const struct in6_addr *daddr,
1201 			      const struct in6_addr *saddr)
1202 {
1203 	struct rt6_exception *rt6_ex;
1204 	u32 hval;
1205 
1206 	if (!(*bucket) || !daddr)
1207 		return NULL;
1208 
1209 	hval = rt6_exception_hash(daddr, saddr);
1210 	*bucket += hval;
1211 
1212 	hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
1213 		struct rt6_info *rt6 = rt6_ex->rt6i;
1214 		bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1215 
1216 #ifdef CONFIG_IPV6_SUBTREES
1217 		if (matched && saddr)
1218 			matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1219 #endif
1220 		if (matched)
1221 			return rt6_ex;
1222 	}
1223 	return NULL;
1224 }
1225 
1226 /* Helper function to find the cached rt in the hash table
1227  * and update bucket pointer to point to the bucket for this
1228  * (daddr, saddr) pair
1229  * Caller must hold rcu_read_lock()
1230  */
1231 static struct rt6_exception *
1232 __rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
1233 			 const struct in6_addr *daddr,
1234 			 const struct in6_addr *saddr)
1235 {
1236 	struct rt6_exception *rt6_ex;
1237 	u32 hval;
1238 
1239 	WARN_ON_ONCE(!rcu_read_lock_held());
1240 
1241 	if (!(*bucket) || !daddr)
1242 		return NULL;
1243 
1244 	hval = rt6_exception_hash(daddr, saddr);
1245 	*bucket += hval;
1246 
1247 	hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
1248 		struct rt6_info *rt6 = rt6_ex->rt6i;
1249 		bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1250 
1251 #ifdef CONFIG_IPV6_SUBTREES
1252 		if (matched && saddr)
1253 			matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1254 #endif
1255 		if (matched)
1256 			return rt6_ex;
1257 	}
1258 	return NULL;
1259 }
1260 
1261 static int rt6_insert_exception(struct rt6_info *nrt,
1262 				struct rt6_info *ort)
1263 {
1264 	struct net *net = dev_net(ort->dst.dev);
1265 	struct rt6_exception_bucket *bucket;
1266 	struct in6_addr *src_key = NULL;
1267 	struct rt6_exception *rt6_ex;
1268 	int err = 0;
1269 
1270 	/* ort can't be a cache or pcpu route */
1271 	if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
1272 		ort = (struct rt6_info *)ort->dst.from;
1273 	WARN_ON_ONCE(ort->rt6i_flags & (RTF_CACHE | RTF_PCPU));
1274 
1275 	spin_lock_bh(&rt6_exception_lock);
1276 
1277 	if (ort->exception_bucket_flushed) {
1278 		err = -EINVAL;
1279 		goto out;
1280 	}
1281 
1282 	bucket = rcu_dereference_protected(ort->rt6i_exception_bucket,
1283 					lockdep_is_held(&rt6_exception_lock));
1284 	if (!bucket) {
1285 		bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
1286 				 GFP_ATOMIC);
1287 		if (!bucket) {
1288 			err = -ENOMEM;
1289 			goto out;
1290 		}
1291 		rcu_assign_pointer(ort->rt6i_exception_bucket, bucket);
1292 	}
1293 
1294 #ifdef CONFIG_IPV6_SUBTREES
1295 	/* rt6i_src.plen != 0 indicates ort is in subtree
1296 	 * and exception table is indexed by a hash of
1297 	 * both rt6i_dst and rt6i_src.
1298 	 * Otherwise, the exception table is indexed by
1299 	 * a hash of only rt6i_dst.
1300 	 */
1301 	if (ort->rt6i_src.plen)
1302 		src_key = &nrt->rt6i_src.addr;
1303 #endif
1304 
1305 	/* Update rt6i_prefsrc as it could be changed
1306 	 * in rt6_remove_prefsrc()
1307 	 */
1308 	nrt->rt6i_prefsrc = ort->rt6i_prefsrc;
1309 	/* rt6_mtu_change() might lower mtu on ort.
1310 	 * Only insert this exception route if its mtu
1311 	 * is less than ort's mtu value.
1312 	 */
1313 	if (nrt->rt6i_pmtu >= dst_mtu(&ort->dst)) {
1314 		err = -EINVAL;
1315 		goto out;
1316 	}
1317 
1318 	rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
1319 					       src_key);
1320 	if (rt6_ex)
1321 		rt6_remove_exception(bucket, rt6_ex);
1322 
1323 	rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
1324 	if (!rt6_ex) {
1325 		err = -ENOMEM;
1326 		goto out;
1327 	}
1328 	rt6_ex->rt6i = nrt;
1329 	rt6_ex->stamp = jiffies;
1330 	atomic_inc(&nrt->rt6i_ref);
1331 	nrt->rt6i_node = ort->rt6i_node;
1332 	hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
1333 	bucket->depth++;
1334 	net->ipv6.rt6_stats->fib_rt_cache++;
1335 
1336 	if (bucket->depth > FIB6_MAX_DEPTH)
1337 		rt6_exception_remove_oldest(bucket);
1338 
1339 out:
1340 	spin_unlock_bh(&rt6_exception_lock);
1341 
1342 	/* Update fn->fn_sernum to invalidate all cached dst */
1343 	if (!err) {
1344 		fib6_update_sernum(ort);
1345 		fib6_force_start_gc(net);
1346 	}
1347 
1348 	return err;
1349 }
1350 
1351 void rt6_flush_exceptions(struct rt6_info *rt)
1352 {
1353 	struct rt6_exception_bucket *bucket;
1354 	struct rt6_exception *rt6_ex;
1355 	struct hlist_node *tmp;
1356 	int i;
1357 
1358 	spin_lock_bh(&rt6_exception_lock);
1359 	/* Prevent rt6_insert_exception() to recreate the bucket list */
1360 	rt->exception_bucket_flushed = 1;
1361 
1362 	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1363 				    lockdep_is_held(&rt6_exception_lock));
1364 	if (!bucket)
1365 		goto out;
1366 
1367 	for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1368 		hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist)
1369 			rt6_remove_exception(bucket, rt6_ex);
1370 		WARN_ON_ONCE(bucket->depth);
1371 		bucket++;
1372 	}
1373 
1374 out:
1375 	spin_unlock_bh(&rt6_exception_lock);
1376 }
1377 
1378 /* Find cached rt in the hash table inside passed in rt
1379  * Caller has to hold rcu_read_lock()
1380  */
1381 static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
1382 					   struct in6_addr *daddr,
1383 					   struct in6_addr *saddr)
1384 {
1385 	struct rt6_exception_bucket *bucket;
1386 	struct in6_addr *src_key = NULL;
1387 	struct rt6_exception *rt6_ex;
1388 	struct rt6_info *res = NULL;
1389 
1390 	bucket = rcu_dereference(rt->rt6i_exception_bucket);
1391 
1392 #ifdef CONFIG_IPV6_SUBTREES
1393 	/* rt6i_src.plen != 0 indicates rt is in subtree
1394 	 * and exception table is indexed by a hash of
1395 	 * both rt6i_dst and rt6i_src.
1396 	 * Otherwise, the exception table is indexed by
1397 	 * a hash of only rt6i_dst.
1398 	 */
1399 	if (rt->rt6i_src.plen)
1400 		src_key = saddr;
1401 #endif
1402 	rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
1403 
1404 	if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
1405 		res = rt6_ex->rt6i;
1406 
1407 	return res;
1408 }
1409 
1410 /* Remove the passed in cached rt from the hash table that contains it */
1411 int rt6_remove_exception_rt(struct rt6_info *rt)
1412 {
1413 	struct rt6_info *from = (struct rt6_info *)rt->dst.from;
1414 	struct rt6_exception_bucket *bucket;
1415 	struct in6_addr *src_key = NULL;
1416 	struct rt6_exception *rt6_ex;
1417 	int err;
1418 
1419 	if (!from ||
1420 	    !(rt->rt6i_flags & RTF_CACHE))
1421 		return -EINVAL;
1422 
1423 	if (!rcu_access_pointer(from->rt6i_exception_bucket))
1424 		return -ENOENT;
1425 
1426 	spin_lock_bh(&rt6_exception_lock);
1427 	bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
1428 				    lockdep_is_held(&rt6_exception_lock));
1429 #ifdef CONFIG_IPV6_SUBTREES
1430 	/* rt6i_src.plen != 0 indicates 'from' is in subtree
1431 	 * and exception table is indexed by a hash of
1432 	 * both rt6i_dst and rt6i_src.
1433 	 * Otherwise, the exception table is indexed by
1434 	 * a hash of only rt6i_dst.
1435 	 */
1436 	if (from->rt6i_src.plen)
1437 		src_key = &rt->rt6i_src.addr;
1438 #endif
1439 	rt6_ex = __rt6_find_exception_spinlock(&bucket,
1440 					       &rt->rt6i_dst.addr,
1441 					       src_key);
1442 	if (rt6_ex) {
1443 		rt6_remove_exception(bucket, rt6_ex);
1444 		err = 0;
1445 	} else {
1446 		err = -ENOENT;
1447 	}
1448 
1449 	spin_unlock_bh(&rt6_exception_lock);
1450 	return err;
1451 }
1452 
1453 /* Find rt6_ex which contains the passed in rt cache and
1454  * refresh its stamp
1455  */
1456 static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
1457 {
1458 	struct rt6_info *from = (struct rt6_info *)rt->dst.from;
1459 	struct rt6_exception_bucket *bucket;
1460 	struct in6_addr *src_key = NULL;
1461 	struct rt6_exception *rt6_ex;
1462 
1463 	if (!from ||
1464 	    !(rt->rt6i_flags & RTF_CACHE))
1465 		return;
1466 
1467 	rcu_read_lock();
1468 	bucket = rcu_dereference(from->rt6i_exception_bucket);
1469 
1470 #ifdef CONFIG_IPV6_SUBTREES
1471 	/* rt6i_src.plen != 0 indicates 'from' is in subtree
1472 	 * and exception table is indexed by a hash of
1473 	 * both rt6i_dst and rt6i_src.
1474 	 * Otherwise, the exception table is indexed by
1475 	 * a hash of only rt6i_dst.
1476 	 */
1477 	if (from->rt6i_src.plen)
1478 		src_key = &rt->rt6i_src.addr;
1479 #endif
1480 	rt6_ex = __rt6_find_exception_rcu(&bucket,
1481 					  &rt->rt6i_dst.addr,
1482 					  src_key);
1483 	if (rt6_ex)
1484 		rt6_ex->stamp = jiffies;
1485 
1486 	rcu_read_unlock();
1487 }
1488 
1489 static void rt6_exceptions_remove_prefsrc(struct rt6_info *rt)
1490 {
1491 	struct rt6_exception_bucket *bucket;
1492 	struct rt6_exception *rt6_ex;
1493 	int i;
1494 
1495 	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1496 					lockdep_is_held(&rt6_exception_lock));
1497 
1498 	if (bucket) {
1499 		for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1500 			hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1501 				rt6_ex->rt6i->rt6i_prefsrc.plen = 0;
1502 			}
1503 			bucket++;
1504 		}
1505 	}
1506 }
1507 
1508 static void rt6_exceptions_update_pmtu(struct rt6_info *rt, int mtu)
1509 {
1510 	struct rt6_exception_bucket *bucket;
1511 	struct rt6_exception *rt6_ex;
1512 	int i;
1513 
1514 	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1515 					lockdep_is_held(&rt6_exception_lock));
1516 
1517 	if (bucket) {
1518 		for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1519 			hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1520 				struct rt6_info *entry = rt6_ex->rt6i;
1521 				/* For RTF_CACHE with rt6i_pmtu == 0
1522 				 * (i.e. a redirected route),
1523 				 * the metrics of its rt->dst.from has already
1524 				 * been updated.
1525 				 */
1526 				if (entry->rt6i_pmtu && entry->rt6i_pmtu > mtu)
1527 					entry->rt6i_pmtu = mtu;
1528 			}
1529 			bucket++;
1530 		}
1531 	}
1532 }
1533 
1534 #define RTF_CACHE_GATEWAY	(RTF_GATEWAY | RTF_CACHE)
1535 
1536 static void rt6_exceptions_clean_tohost(struct rt6_info *rt,
1537 					struct in6_addr *gateway)
1538 {
1539 	struct rt6_exception_bucket *bucket;
1540 	struct rt6_exception *rt6_ex;
1541 	struct hlist_node *tmp;
1542 	int i;
1543 
1544 	if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1545 		return;
1546 
1547 	spin_lock_bh(&rt6_exception_lock);
1548 	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1549 				     lockdep_is_held(&rt6_exception_lock));
1550 
1551 	if (bucket) {
1552 		for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1553 			hlist_for_each_entry_safe(rt6_ex, tmp,
1554 						  &bucket->chain, hlist) {
1555 				struct rt6_info *entry = rt6_ex->rt6i;
1556 
1557 				if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
1558 				    RTF_CACHE_GATEWAY &&
1559 				    ipv6_addr_equal(gateway,
1560 						    &entry->rt6i_gateway)) {
1561 					rt6_remove_exception(bucket, rt6_ex);
1562 				}
1563 			}
1564 			bucket++;
1565 		}
1566 	}
1567 
1568 	spin_unlock_bh(&rt6_exception_lock);
1569 }
1570 
1571 static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
1572 				      struct rt6_exception *rt6_ex,
1573 				      struct fib6_gc_args *gc_args,
1574 				      unsigned long now)
1575 {
1576 	struct rt6_info *rt = rt6_ex->rt6i;
1577 
1578 	/* we are pruning and obsoleting aged-out and non gateway exceptions
1579 	 * even if others have still references to them, so that on next
1580 	 * dst_check() such references can be dropped.
1581 	 * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when
1582 	 * expired, independently from their aging, as per RFC 8201 section 4
1583 	 */
1584 	if (!(rt->rt6i_flags & RTF_EXPIRES) &&
1585 	    time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
1586 		RT6_TRACE("aging clone %p\n", rt);
1587 		rt6_remove_exception(bucket, rt6_ex);
1588 		return;
1589 	} else if (rt->rt6i_flags & RTF_GATEWAY) {
1590 		struct neighbour *neigh;
1591 		__u8 neigh_flags = 0;
1592 
1593 		neigh = dst_neigh_lookup(&rt->dst, &rt->rt6i_gateway);
1594 		if (neigh) {
1595 			neigh_flags = neigh->flags;
1596 			neigh_release(neigh);
1597 		}
1598 		if (!(neigh_flags & NTF_ROUTER)) {
1599 			RT6_TRACE("purging route %p via non-router but gateway\n",
1600 				  rt);
1601 			rt6_remove_exception(bucket, rt6_ex);
1602 			return;
1603 		}
1604 	} else if (__rt6_check_expired(rt)) {
1605 		RT6_TRACE("purging expired route %p\n", rt);
1606 		rt6_remove_exception(bucket, rt6_ex);
1607 		return;
1608 	}
1609 	gc_args->more++;
1610 }
1611 
1612 void rt6_age_exceptions(struct rt6_info *rt,
1613 			struct fib6_gc_args *gc_args,
1614 			unsigned long now)
1615 {
1616 	struct rt6_exception_bucket *bucket;
1617 	struct rt6_exception *rt6_ex;
1618 	struct hlist_node *tmp;
1619 	int i;
1620 
1621 	if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1622 		return;
1623 
1624 	spin_lock_bh(&rt6_exception_lock);
1625 	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1626 				    lockdep_is_held(&rt6_exception_lock));
1627 
1628 	if (bucket) {
1629 		for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1630 			hlist_for_each_entry_safe(rt6_ex, tmp,
1631 						  &bucket->chain, hlist) {
1632 				rt6_age_examine_exception(bucket, rt6_ex,
1633 							  gc_args, now);
1634 			}
1635 			bucket++;
1636 		}
1637 	}
1638 	spin_unlock_bh(&rt6_exception_lock);
1639 }
1640 
1641 struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1642 			       int oif, struct flowi6 *fl6, int flags)
1643 {
1644 	struct fib6_node *fn, *saved_fn;
1645 	struct rt6_info *rt, *rt_cache;
1646 	int strict = 0;
1647 
1648 	strict |= flags & RT6_LOOKUP_F_IFACE;
1649 	strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
1650 	if (net->ipv6.devconf_all->forwarding == 0)
1651 		strict |= RT6_LOOKUP_F_REACHABLE;
1652 
1653 	rcu_read_lock();
1654 
1655 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1656 	saved_fn = fn;
1657 
1658 	if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1659 		oif = 0;
1660 
1661 redo_rt6_select:
1662 	rt = rt6_select(net, fn, oif, strict);
1663 	if (rt->rt6i_nsiblings)
1664 		rt = rt6_multipath_select(rt, fl6, oif, strict);
1665 	if (rt == net->ipv6.ip6_null_entry) {
1666 		fn = fib6_backtrack(fn, &fl6->saddr);
1667 		if (fn)
1668 			goto redo_rt6_select;
1669 		else if (strict & RT6_LOOKUP_F_REACHABLE) {
1670 			/* also consider unreachable route */
1671 			strict &= ~RT6_LOOKUP_F_REACHABLE;
1672 			fn = saved_fn;
1673 			goto redo_rt6_select;
1674 		}
1675 	}
1676 
1677 	/*Search through exception table */
1678 	rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr);
1679 	if (rt_cache)
1680 		rt = rt_cache;
1681 
1682 	if (rt == net->ipv6.ip6_null_entry) {
1683 		rcu_read_unlock();
1684 		dst_hold(&rt->dst);
1685 		trace_fib6_table_lookup(net, rt, table, fl6);
1686 		return rt;
1687 	} else if (rt->rt6i_flags & RTF_CACHE) {
1688 		if (ip6_hold_safe(net, &rt, true)) {
1689 			dst_use_noref(&rt->dst, jiffies);
1690 			rt6_dst_from_metrics_check(rt);
1691 		}
1692 		rcu_read_unlock();
1693 		trace_fib6_table_lookup(net, rt, table, fl6);
1694 		return rt;
1695 	} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1696 			    !(rt->rt6i_flags & RTF_GATEWAY))) {
1697 		/* Create a RTF_CACHE clone which will not be
1698 		 * owned by the fib6 tree.  It is for the special case where
1699 		 * the daddr in the skb during the neighbor look-up is different
1700 		 * from the fl6->daddr used to look-up route here.
1701 		 */
1702 
1703 		struct rt6_info *uncached_rt;
1704 
1705 		if (ip6_hold_safe(net, &rt, true)) {
1706 			dst_use_noref(&rt->dst, jiffies);
1707 		} else {
1708 			rcu_read_unlock();
1709 			uncached_rt = rt;
1710 			goto uncached_rt_out;
1711 		}
1712 		rcu_read_unlock();
1713 
1714 		uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1715 		dst_release(&rt->dst);
1716 
1717 		if (uncached_rt) {
1718 			/* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
1719 			 * No need for another dst_hold()
1720 			 */
1721 			rt6_uncached_list_add(uncached_rt);
1722 			atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1723 		} else {
1724 			uncached_rt = net->ipv6.ip6_null_entry;
1725 			dst_hold(&uncached_rt->dst);
1726 		}
1727 
1728 uncached_rt_out:
1729 		trace_fib6_table_lookup(net, uncached_rt, table, fl6);
1730 		return uncached_rt;
1731 
1732 	} else {
1733 		/* Get a percpu copy */
1734 
1735 		struct rt6_info *pcpu_rt;
1736 
1737 		dst_use_noref(&rt->dst, jiffies);
1738 		local_bh_disable();
1739 		pcpu_rt = rt6_get_pcpu_route(rt);
1740 
1741 		if (!pcpu_rt) {
1742 			/* atomic_inc_not_zero() is needed when using rcu */
1743 			if (atomic_inc_not_zero(&rt->rt6i_ref)) {
1744 				/* No dst_hold() on rt is needed because grabbing
1745 				 * rt->rt6i_ref makes sure rt can't be released.
1746 				 */
1747 				pcpu_rt = rt6_make_pcpu_route(rt);
1748 				rt6_release(rt);
1749 			} else {
1750 				/* rt is already removed from tree */
1751 				pcpu_rt = net->ipv6.ip6_null_entry;
1752 				dst_hold(&pcpu_rt->dst);
1753 			}
1754 		}
1755 		local_bh_enable();
1756 		rcu_read_unlock();
1757 		trace_fib6_table_lookup(net, pcpu_rt, table, fl6);
1758 		return pcpu_rt;
1759 	}
1760 }
1761 EXPORT_SYMBOL_GPL(ip6_pol_route);
1762 
1763 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
1764 					    struct flowi6 *fl6, int flags)
1765 {
1766 	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
1767 }
1768 
1769 struct dst_entry *ip6_route_input_lookup(struct net *net,
1770 					 struct net_device *dev,
1771 					 struct flowi6 *fl6, int flags)
1772 {
1773 	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1774 		flags |= RT6_LOOKUP_F_IFACE;
1775 
1776 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1777 }
1778 EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
1779 
1780 static void ip6_multipath_l3_keys(const struct sk_buff *skb,
1781 				  struct flow_keys *keys)
1782 {
1783 	const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
1784 	const struct ipv6hdr *key_iph = outer_iph;
1785 	const struct ipv6hdr *inner_iph;
1786 	const struct icmp6hdr *icmph;
1787 	struct ipv6hdr _inner_iph;
1788 
1789 	if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
1790 		goto out;
1791 
1792 	icmph = icmp6_hdr(skb);
1793 	if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
1794 	    icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
1795 	    icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
1796 	    icmph->icmp6_type != ICMPV6_PARAMPROB)
1797 		goto out;
1798 
1799 	inner_iph = skb_header_pointer(skb,
1800 				       skb_transport_offset(skb) + sizeof(*icmph),
1801 				       sizeof(_inner_iph), &_inner_iph);
1802 	if (!inner_iph)
1803 		goto out;
1804 
1805 	key_iph = inner_iph;
1806 out:
1807 	memset(keys, 0, sizeof(*keys));
1808 	keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1809 	keys->addrs.v6addrs.src = key_iph->saddr;
1810 	keys->addrs.v6addrs.dst = key_iph->daddr;
1811 	keys->tags.flow_label = ip6_flowinfo(key_iph);
1812 	keys->basic.ip_proto = key_iph->nexthdr;
1813 }
1814 
1815 /* if skb is set it will be used and fl6 can be NULL */
1816 u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb)
1817 {
1818 	struct flow_keys hash_keys;
1819 
1820 	if (skb) {
1821 		ip6_multipath_l3_keys(skb, &hash_keys);
1822 		return flow_hash_from_keys(&hash_keys);
1823 	}
1824 
1825 	return get_hash_from_flowi6(fl6);
1826 }
1827 
1828 void ip6_route_input(struct sk_buff *skb)
1829 {
1830 	const struct ipv6hdr *iph = ipv6_hdr(skb);
1831 	struct net *net = dev_net(skb->dev);
1832 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1833 	struct ip_tunnel_info *tun_info;
1834 	struct flowi6 fl6 = {
1835 		.flowi6_iif = skb->dev->ifindex,
1836 		.daddr = iph->daddr,
1837 		.saddr = iph->saddr,
1838 		.flowlabel = ip6_flowinfo(iph),
1839 		.flowi6_mark = skb->mark,
1840 		.flowi6_proto = iph->nexthdr,
1841 	};
1842 
1843 	tun_info = skb_tunnel_info(skb);
1844 	if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
1845 		fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
1846 	if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
1847 		fl6.mp_hash = rt6_multipath_hash(&fl6, skb);
1848 	skb_dst_drop(skb);
1849 	skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1850 }
1851 
1852 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1853 					     struct flowi6 *fl6, int flags)
1854 {
1855 	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1856 }
1857 
1858 struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
1859 					 struct flowi6 *fl6, int flags)
1860 {
1861 	bool any_src;
1862 
1863 	if (rt6_need_strict(&fl6->daddr)) {
1864 		struct dst_entry *dst;
1865 
1866 		dst = l3mdev_link_scope_lookup(net, fl6);
1867 		if (dst)
1868 			return dst;
1869 	}
1870 
1871 	fl6->flowi6_iif = LOOPBACK_IFINDEX;
1872 
1873 	any_src = ipv6_addr_any(&fl6->saddr);
1874 	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
1875 	    (fl6->flowi6_oif && any_src))
1876 		flags |= RT6_LOOKUP_F_IFACE;
1877 
1878 	if (!any_src)
1879 		flags |= RT6_LOOKUP_F_HAS_SADDR;
1880 	else if (sk)
1881 		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1882 
1883 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1884 }
1885 EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1886 
1887 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1888 {
1889 	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1890 	struct net_device *loopback_dev = net->loopback_dev;
1891 	struct dst_entry *new = NULL;
1892 
1893 	rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
1894 		       DST_OBSOLETE_DEAD, 0);
1895 	if (rt) {
1896 		rt6_info_init(rt);
1897 		atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
1898 
1899 		new = &rt->dst;
1900 		new->__use = 1;
1901 		new->input = dst_discard;
1902 		new->output = dst_discard_out;
1903 
1904 		dst_copy_metrics(new, &ort->dst);
1905 
1906 		rt->rt6i_idev = in6_dev_get(loopback_dev);
1907 		rt->rt6i_gateway = ort->rt6i_gateway;
1908 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
1909 		rt->rt6i_metric = 0;
1910 
1911 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1912 #ifdef CONFIG_IPV6_SUBTREES
1913 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1914 #endif
1915 	}
1916 
1917 	dst_release(dst_orig);
1918 	return new ? new : ERR_PTR(-ENOMEM);
1919 }
1920 
1921 /*
1922  *	Destination cache support functions
1923  */
1924 
1925 static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1926 {
1927 	if (rt->dst.from &&
1928 	    dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1929 		dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1930 }
1931 
1932 static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1933 {
1934 	u32 rt_cookie = 0;
1935 
1936 	if (!rt6_get_cookie_safe(rt, &rt_cookie) || rt_cookie != cookie)
1937 		return NULL;
1938 
1939 	if (rt6_check_expired(rt))
1940 		return NULL;
1941 
1942 	return &rt->dst;
1943 }
1944 
1945 static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1946 {
1947 	if (!__rt6_check_expired(rt) &&
1948 	    rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1949 	    rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1950 		return &rt->dst;
1951 	else
1952 		return NULL;
1953 }
1954 
1955 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1956 {
1957 	struct rt6_info *rt;
1958 
1959 	rt = (struct rt6_info *) dst;
1960 
1961 	/* All IPV6 dsts are created with ->obsolete set to the value
1962 	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1963 	 * into this function always.
1964 	 */
1965 
1966 	rt6_dst_from_metrics_check(rt);
1967 
1968 	if (rt->rt6i_flags & RTF_PCPU ||
1969 	    (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->dst.from))
1970 		return rt6_dst_from_check(rt, cookie);
1971 	else
1972 		return rt6_check(rt, cookie);
1973 }
1974 
1975 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1976 {
1977 	struct rt6_info *rt = (struct rt6_info *) dst;
1978 
1979 	if (rt) {
1980 		if (rt->rt6i_flags & RTF_CACHE) {
1981 			if (rt6_check_expired(rt)) {
1982 				ip6_del_rt(rt);
1983 				dst = NULL;
1984 			}
1985 		} else {
1986 			dst_release(dst);
1987 			dst = NULL;
1988 		}
1989 	}
1990 	return dst;
1991 }
1992 
1993 static void ip6_link_failure(struct sk_buff *skb)
1994 {
1995 	struct rt6_info *rt;
1996 
1997 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1998 
1999 	rt = (struct rt6_info *) skb_dst(skb);
2000 	if (rt) {
2001 		if (rt->rt6i_flags & RTF_CACHE) {
2002 			if (dst_hold_safe(&rt->dst))
2003 				ip6_del_rt(rt);
2004 		} else {
2005 			struct fib6_node *fn;
2006 
2007 			rcu_read_lock();
2008 			fn = rcu_dereference(rt->rt6i_node);
2009 			if (fn && (rt->rt6i_flags & RTF_DEFAULT))
2010 				fn->fn_sernum = -1;
2011 			rcu_read_unlock();
2012 		}
2013 	}
2014 }
2015 
2016 static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
2017 {
2018 	struct net *net = dev_net(rt->dst.dev);
2019 
2020 	rt->rt6i_flags |= RTF_MODIFIED;
2021 	rt->rt6i_pmtu = mtu;
2022 	rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
2023 }
2024 
2025 static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
2026 {
2027 	return !(rt->rt6i_flags & RTF_CACHE) &&
2028 		(rt->rt6i_flags & RTF_PCPU ||
2029 		 rcu_access_pointer(rt->rt6i_node));
2030 }
2031 
2032 static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
2033 				 const struct ipv6hdr *iph, u32 mtu)
2034 {
2035 	const struct in6_addr *daddr, *saddr;
2036 	struct rt6_info *rt6 = (struct rt6_info *)dst;
2037 
2038 	if (rt6->rt6i_flags & RTF_LOCAL)
2039 		return;
2040 
2041 	if (dst_metric_locked(dst, RTAX_MTU))
2042 		return;
2043 
2044 	if (iph) {
2045 		daddr = &iph->daddr;
2046 		saddr = &iph->saddr;
2047 	} else if (sk) {
2048 		daddr = &sk->sk_v6_daddr;
2049 		saddr = &inet6_sk(sk)->saddr;
2050 	} else {
2051 		daddr = NULL;
2052 		saddr = NULL;
2053 	}
2054 	dst_confirm_neigh(dst, daddr);
2055 	mtu = max_t(u32, mtu, IPV6_MIN_MTU);
2056 	if (mtu >= dst_mtu(dst))
2057 		return;
2058 
2059 	if (!rt6_cache_allowed_for_pmtu(rt6)) {
2060 		rt6_do_update_pmtu(rt6, mtu);
2061 		/* update rt6_ex->stamp for cache */
2062 		if (rt6->rt6i_flags & RTF_CACHE)
2063 			rt6_update_exception_stamp_rt(rt6);
2064 	} else if (daddr) {
2065 		struct rt6_info *nrt6;
2066 
2067 		nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
2068 		if (nrt6) {
2069 			rt6_do_update_pmtu(nrt6, mtu);
2070 			if (rt6_insert_exception(nrt6, rt6))
2071 				dst_release_immediate(&nrt6->dst);
2072 		}
2073 	}
2074 }
2075 
2076 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
2077 			       struct sk_buff *skb, u32 mtu)
2078 {
2079 	__ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
2080 }
2081 
2082 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
2083 		     int oif, u32 mark, kuid_t uid)
2084 {
2085 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2086 	struct dst_entry *dst;
2087 	struct flowi6 fl6;
2088 
2089 	memset(&fl6, 0, sizeof(fl6));
2090 	fl6.flowi6_oif = oif;
2091 	fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
2092 	fl6.daddr = iph->daddr;
2093 	fl6.saddr = iph->saddr;
2094 	fl6.flowlabel = ip6_flowinfo(iph);
2095 	fl6.flowi6_uid = uid;
2096 
2097 	dst = ip6_route_output(net, NULL, &fl6);
2098 	if (!dst->error)
2099 		__ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
2100 	dst_release(dst);
2101 }
2102 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
2103 
2104 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
2105 {
2106 	struct dst_entry *dst;
2107 
2108 	ip6_update_pmtu(skb, sock_net(sk), mtu,
2109 			sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid);
2110 
2111 	dst = __sk_dst_get(sk);
2112 	if (!dst || !dst->obsolete ||
2113 	    dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
2114 		return;
2115 
2116 	bh_lock_sock(sk);
2117 	if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
2118 		ip6_datagram_dst_update(sk, false);
2119 	bh_unlock_sock(sk);
2120 }
2121 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
2122 
2123 /* Handle redirects */
2124 struct ip6rd_flowi {
2125 	struct flowi6 fl6;
2126 	struct in6_addr gateway;
2127 };
2128 
2129 static struct rt6_info *__ip6_route_redirect(struct net *net,
2130 					     struct fib6_table *table,
2131 					     struct flowi6 *fl6,
2132 					     int flags)
2133 {
2134 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
2135 	struct rt6_info *rt, *rt_cache;
2136 	struct fib6_node *fn;
2137 
2138 	/* Get the "current" route for this destination and
2139 	 * check if the redirect has come from appropriate router.
2140 	 *
2141 	 * RFC 4861 specifies that redirects should only be
2142 	 * accepted if they come from the nexthop to the target.
2143 	 * Due to the way the routes are chosen, this notion
2144 	 * is a bit fuzzy and one might need to check all possible
2145 	 * routes.
2146 	 */
2147 
2148 	rcu_read_lock();
2149 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
2150 restart:
2151 	for_each_fib6_node_rt_rcu(fn) {
2152 		if (rt6_check_expired(rt))
2153 			continue;
2154 		if (rt->dst.error)
2155 			break;
2156 		if (!(rt->rt6i_flags & RTF_GATEWAY))
2157 			continue;
2158 		if (fl6->flowi6_oif != rt->dst.dev->ifindex)
2159 			continue;
2160 		/* rt_cache's gateway might be different from its 'parent'
2161 		 * in the case of an ip redirect.
2162 		 * So we keep searching in the exception table if the gateway
2163 		 * is different.
2164 		 */
2165 		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) {
2166 			rt_cache = rt6_find_cached_rt(rt,
2167 						      &fl6->daddr,
2168 						      &fl6->saddr);
2169 			if (rt_cache &&
2170 			    ipv6_addr_equal(&rdfl->gateway,
2171 					    &rt_cache->rt6i_gateway)) {
2172 				rt = rt_cache;
2173 				break;
2174 			}
2175 			continue;
2176 		}
2177 		break;
2178 	}
2179 
2180 	if (!rt)
2181 		rt = net->ipv6.ip6_null_entry;
2182 	else if (rt->dst.error) {
2183 		rt = net->ipv6.ip6_null_entry;
2184 		goto out;
2185 	}
2186 
2187 	if (rt == net->ipv6.ip6_null_entry) {
2188 		fn = fib6_backtrack(fn, &fl6->saddr);
2189 		if (fn)
2190 			goto restart;
2191 	}
2192 
2193 out:
2194 	ip6_hold_safe(net, &rt, true);
2195 
2196 	rcu_read_unlock();
2197 
2198 	trace_fib6_table_lookup(net, rt, table, fl6);
2199 	return rt;
2200 };
2201 
2202 static struct dst_entry *ip6_route_redirect(struct net *net,
2203 					const struct flowi6 *fl6,
2204 					const struct in6_addr *gateway)
2205 {
2206 	int flags = RT6_LOOKUP_F_HAS_SADDR;
2207 	struct ip6rd_flowi rdfl;
2208 
2209 	rdfl.fl6 = *fl6;
2210 	rdfl.gateway = *gateway;
2211 
2212 	return fib6_rule_lookup(net, &rdfl.fl6,
2213 				flags, __ip6_route_redirect);
2214 }
2215 
2216 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
2217 		  kuid_t uid)
2218 {
2219 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2220 	struct dst_entry *dst;
2221 	struct flowi6 fl6;
2222 
2223 	memset(&fl6, 0, sizeof(fl6));
2224 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
2225 	fl6.flowi6_oif = oif;
2226 	fl6.flowi6_mark = mark;
2227 	fl6.daddr = iph->daddr;
2228 	fl6.saddr = iph->saddr;
2229 	fl6.flowlabel = ip6_flowinfo(iph);
2230 	fl6.flowi6_uid = uid;
2231 
2232 	dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
2233 	rt6_do_redirect(dst, NULL, skb);
2234 	dst_release(dst);
2235 }
2236 EXPORT_SYMBOL_GPL(ip6_redirect);
2237 
2238 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
2239 			    u32 mark)
2240 {
2241 	const struct ipv6hdr *iph = ipv6_hdr(skb);
2242 	const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
2243 	struct dst_entry *dst;
2244 	struct flowi6 fl6;
2245 
2246 	memset(&fl6, 0, sizeof(fl6));
2247 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
2248 	fl6.flowi6_oif = oif;
2249 	fl6.flowi6_mark = mark;
2250 	fl6.daddr = msg->dest;
2251 	fl6.saddr = iph->daddr;
2252 	fl6.flowi6_uid = sock_net_uid(net, NULL);
2253 
2254 	dst = ip6_route_redirect(net, &fl6, &iph->saddr);
2255 	rt6_do_redirect(dst, NULL, skb);
2256 	dst_release(dst);
2257 }
2258 
2259 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
2260 {
2261 	ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
2262 		     sk->sk_uid);
2263 }
2264 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
2265 
2266 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
2267 {
2268 	struct net_device *dev = dst->dev;
2269 	unsigned int mtu = dst_mtu(dst);
2270 	struct net *net = dev_net(dev);
2271 
2272 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
2273 
2274 	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
2275 		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
2276 
2277 	/*
2278 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
2279 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
2280 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
2281 	 * rely only on pmtu discovery"
2282 	 */
2283 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
2284 		mtu = IPV6_MAXPLEN;
2285 	return mtu;
2286 }
2287 
2288 static unsigned int ip6_mtu(const struct dst_entry *dst)
2289 {
2290 	const struct rt6_info *rt = (const struct rt6_info *)dst;
2291 	unsigned int mtu = rt->rt6i_pmtu;
2292 	struct inet6_dev *idev;
2293 
2294 	if (mtu)
2295 		goto out;
2296 
2297 	mtu = dst_metric_raw(dst, RTAX_MTU);
2298 	if (mtu)
2299 		goto out;
2300 
2301 	mtu = IPV6_MIN_MTU;
2302 
2303 	rcu_read_lock();
2304 	idev = __in6_dev_get(dst->dev);
2305 	if (idev)
2306 		mtu = idev->cnf.mtu6;
2307 	rcu_read_unlock();
2308 
2309 out:
2310 	mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2311 
2312 	return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
2313 }
2314 
2315 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
2316 				  struct flowi6 *fl6)
2317 {
2318 	struct dst_entry *dst;
2319 	struct rt6_info *rt;
2320 	struct inet6_dev *idev = in6_dev_get(dev);
2321 	struct net *net = dev_net(dev);
2322 
2323 	if (unlikely(!idev))
2324 		return ERR_PTR(-ENODEV);
2325 
2326 	rt = ip6_dst_alloc(net, dev, 0);
2327 	if (unlikely(!rt)) {
2328 		in6_dev_put(idev);
2329 		dst = ERR_PTR(-ENOMEM);
2330 		goto out;
2331 	}
2332 
2333 	rt->dst.flags |= DST_HOST;
2334 	rt->dst.output  = ip6_output;
2335 	rt->rt6i_gateway  = fl6->daddr;
2336 	rt->rt6i_dst.addr = fl6->daddr;
2337 	rt->rt6i_dst.plen = 128;
2338 	rt->rt6i_idev     = idev;
2339 	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
2340 
2341 	/* Add this dst into uncached_list so that rt6_ifdown() can
2342 	 * do proper release of the net_device
2343 	 */
2344 	rt6_uncached_list_add(rt);
2345 	atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
2346 
2347 	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
2348 
2349 out:
2350 	return dst;
2351 }
2352 
2353 static int ip6_dst_gc(struct dst_ops *ops)
2354 {
2355 	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
2356 	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
2357 	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
2358 	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
2359 	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
2360 	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
2361 	int entries;
2362 
2363 	entries = dst_entries_get_fast(ops);
2364 	if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
2365 	    entries <= rt_max_size)
2366 		goto out;
2367 
2368 	net->ipv6.ip6_rt_gc_expire++;
2369 	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
2370 	entries = dst_entries_get_slow(ops);
2371 	if (entries < ops->gc_thresh)
2372 		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
2373 out:
2374 	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
2375 	return entries > rt_max_size;
2376 }
2377 
2378 static int ip6_convert_metrics(struct mx6_config *mxc,
2379 			       const struct fib6_config *cfg)
2380 {
2381 	struct net *net = cfg->fc_nlinfo.nl_net;
2382 	bool ecn_ca = false;
2383 	struct nlattr *nla;
2384 	int remaining;
2385 	u32 *mp;
2386 
2387 	if (!cfg->fc_mx)
2388 		return 0;
2389 
2390 	mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
2391 	if (unlikely(!mp))
2392 		return -ENOMEM;
2393 
2394 	nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
2395 		int type = nla_type(nla);
2396 		u32 val;
2397 
2398 		if (!type)
2399 			continue;
2400 		if (unlikely(type > RTAX_MAX))
2401 			goto err;
2402 
2403 		if (type == RTAX_CC_ALGO) {
2404 			char tmp[TCP_CA_NAME_MAX];
2405 
2406 			nla_strlcpy(tmp, nla, sizeof(tmp));
2407 			val = tcp_ca_get_key_by_name(net, tmp, &ecn_ca);
2408 			if (val == TCP_CA_UNSPEC)
2409 				goto err;
2410 		} else {
2411 			val = nla_get_u32(nla);
2412 		}
2413 		if (type == RTAX_HOPLIMIT && val > 255)
2414 			val = 255;
2415 		if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
2416 			goto err;
2417 
2418 		mp[type - 1] = val;
2419 		__set_bit(type - 1, mxc->mx_valid);
2420 	}
2421 
2422 	if (ecn_ca) {
2423 		__set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
2424 		mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
2425 	}
2426 
2427 	mxc->mx = mp;
2428 	return 0;
2429  err:
2430 	kfree(mp);
2431 	return -EINVAL;
2432 }
2433 
2434 static struct rt6_info *ip6_nh_lookup_table(struct net *net,
2435 					    struct fib6_config *cfg,
2436 					    const struct in6_addr *gw_addr)
2437 {
2438 	struct flowi6 fl6 = {
2439 		.flowi6_oif = cfg->fc_ifindex,
2440 		.daddr = *gw_addr,
2441 		.saddr = cfg->fc_prefsrc,
2442 	};
2443 	struct fib6_table *table;
2444 	struct rt6_info *rt;
2445 	int flags = RT6_LOOKUP_F_IFACE | RT6_LOOKUP_F_IGNORE_LINKSTATE;
2446 
2447 	table = fib6_get_table(net, cfg->fc_table);
2448 	if (!table)
2449 		return NULL;
2450 
2451 	if (!ipv6_addr_any(&cfg->fc_prefsrc))
2452 		flags |= RT6_LOOKUP_F_HAS_SADDR;
2453 
2454 	rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags);
2455 
2456 	/* if table lookup failed, fall back to full lookup */
2457 	if (rt == net->ipv6.ip6_null_entry) {
2458 		ip6_rt_put(rt);
2459 		rt = NULL;
2460 	}
2461 
2462 	return rt;
2463 }
2464 
2465 static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
2466 					      struct netlink_ext_ack *extack)
2467 {
2468 	struct net *net = cfg->fc_nlinfo.nl_net;
2469 	struct rt6_info *rt = NULL;
2470 	struct net_device *dev = NULL;
2471 	struct inet6_dev *idev = NULL;
2472 	struct fib6_table *table;
2473 	int addr_type;
2474 	int err = -EINVAL;
2475 
2476 	/* RTF_PCPU is an internal flag; can not be set by userspace */
2477 	if (cfg->fc_flags & RTF_PCPU) {
2478 		NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
2479 		goto out;
2480 	}
2481 
2482 	/* RTF_CACHE is an internal flag; can not be set by userspace */
2483 	if (cfg->fc_flags & RTF_CACHE) {
2484 		NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE");
2485 		goto out;
2486 	}
2487 
2488 	if (cfg->fc_dst_len > 128) {
2489 		NL_SET_ERR_MSG(extack, "Invalid prefix length");
2490 		goto out;
2491 	}
2492 	if (cfg->fc_src_len > 128) {
2493 		NL_SET_ERR_MSG(extack, "Invalid source address length");
2494 		goto out;
2495 	}
2496 #ifndef CONFIG_IPV6_SUBTREES
2497 	if (cfg->fc_src_len) {
2498 		NL_SET_ERR_MSG(extack,
2499 			       "Specifying source address requires IPV6_SUBTREES to be enabled");
2500 		goto out;
2501 	}
2502 #endif
2503 	if (cfg->fc_ifindex) {
2504 		err = -ENODEV;
2505 		dev = dev_get_by_index(net, cfg->fc_ifindex);
2506 		if (!dev)
2507 			goto out;
2508 		idev = in6_dev_get(dev);
2509 		if (!idev)
2510 			goto out;
2511 	}
2512 
2513 	if (cfg->fc_metric == 0)
2514 		cfg->fc_metric = IP6_RT_PRIO_USER;
2515 
2516 	err = -ENOBUFS;
2517 	if (cfg->fc_nlinfo.nlh &&
2518 	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
2519 		table = fib6_get_table(net, cfg->fc_table);
2520 		if (!table) {
2521 			pr_warn("NLM_F_CREATE should be specified when creating new route\n");
2522 			table = fib6_new_table(net, cfg->fc_table);
2523 		}
2524 	} else {
2525 		table = fib6_new_table(net, cfg->fc_table);
2526 	}
2527 
2528 	if (!table)
2529 		goto out;
2530 
2531 	rt = ip6_dst_alloc(net, NULL,
2532 			   (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
2533 
2534 	if (!rt) {
2535 		err = -ENOMEM;
2536 		goto out;
2537 	}
2538 
2539 	if (cfg->fc_flags & RTF_EXPIRES)
2540 		rt6_set_expires(rt, jiffies +
2541 				clock_t_to_jiffies(cfg->fc_expires));
2542 	else
2543 		rt6_clean_expires(rt);
2544 
2545 	if (cfg->fc_protocol == RTPROT_UNSPEC)
2546 		cfg->fc_protocol = RTPROT_BOOT;
2547 	rt->rt6i_protocol = cfg->fc_protocol;
2548 
2549 	addr_type = ipv6_addr_type(&cfg->fc_dst);
2550 
2551 	if (addr_type & IPV6_ADDR_MULTICAST)
2552 		rt->dst.input = ip6_mc_input;
2553 	else if (cfg->fc_flags & RTF_LOCAL)
2554 		rt->dst.input = ip6_input;
2555 	else
2556 		rt->dst.input = ip6_forward;
2557 
2558 	rt->dst.output = ip6_output;
2559 
2560 	if (cfg->fc_encap) {
2561 		struct lwtunnel_state *lwtstate;
2562 
2563 		err = lwtunnel_build_state(cfg->fc_encap_type,
2564 					   cfg->fc_encap, AF_INET6, cfg,
2565 					   &lwtstate, extack);
2566 		if (err)
2567 			goto out;
2568 		rt->dst.lwtstate = lwtstate_get(lwtstate);
2569 		if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
2570 			rt->dst.lwtstate->orig_output = rt->dst.output;
2571 			rt->dst.output = lwtunnel_output;
2572 		}
2573 		if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
2574 			rt->dst.lwtstate->orig_input = rt->dst.input;
2575 			rt->dst.input = lwtunnel_input;
2576 		}
2577 	}
2578 
2579 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
2580 	rt->rt6i_dst.plen = cfg->fc_dst_len;
2581 	if (rt->rt6i_dst.plen == 128)
2582 		rt->dst.flags |= DST_HOST;
2583 
2584 #ifdef CONFIG_IPV6_SUBTREES
2585 	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
2586 	rt->rt6i_src.plen = cfg->fc_src_len;
2587 #endif
2588 
2589 	rt->rt6i_metric = cfg->fc_metric;
2590 
2591 	/* We cannot add true routes via loopback here,
2592 	   they would result in kernel looping; promote them to reject routes
2593 	 */
2594 	if ((cfg->fc_flags & RTF_REJECT) ||
2595 	    (dev && (dev->flags & IFF_LOOPBACK) &&
2596 	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
2597 	     !(cfg->fc_flags & RTF_LOCAL))) {
2598 		/* hold loopback dev/idev if we haven't done so. */
2599 		if (dev != net->loopback_dev) {
2600 			if (dev) {
2601 				dev_put(dev);
2602 				in6_dev_put(idev);
2603 			}
2604 			dev = net->loopback_dev;
2605 			dev_hold(dev);
2606 			idev = in6_dev_get(dev);
2607 			if (!idev) {
2608 				err = -ENODEV;
2609 				goto out;
2610 			}
2611 		}
2612 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
2613 		switch (cfg->fc_type) {
2614 		case RTN_BLACKHOLE:
2615 			rt->dst.error = -EINVAL;
2616 			rt->dst.output = dst_discard_out;
2617 			rt->dst.input = dst_discard;
2618 			break;
2619 		case RTN_PROHIBIT:
2620 			rt->dst.error = -EACCES;
2621 			rt->dst.output = ip6_pkt_prohibit_out;
2622 			rt->dst.input = ip6_pkt_prohibit;
2623 			break;
2624 		case RTN_THROW:
2625 		case RTN_UNREACHABLE:
2626 		default:
2627 			rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
2628 					: (cfg->fc_type == RTN_UNREACHABLE)
2629 					? -EHOSTUNREACH : -ENETUNREACH;
2630 			rt->dst.output = ip6_pkt_discard_out;
2631 			rt->dst.input = ip6_pkt_discard;
2632 			break;
2633 		}
2634 		goto install_route;
2635 	}
2636 
2637 	if (cfg->fc_flags & RTF_GATEWAY) {
2638 		const struct in6_addr *gw_addr;
2639 		int gwa_type;
2640 
2641 		gw_addr = &cfg->fc_gateway;
2642 		gwa_type = ipv6_addr_type(gw_addr);
2643 
2644 		/* if gw_addr is local we will fail to detect this in case
2645 		 * address is still TENTATIVE (DAD in progress). rt6_lookup()
2646 		 * will return already-added prefix route via interface that
2647 		 * prefix route was assigned to, which might be non-loopback.
2648 		 */
2649 		err = -EINVAL;
2650 		if (ipv6_chk_addr_and_flags(net, gw_addr,
2651 					    gwa_type & IPV6_ADDR_LINKLOCAL ?
2652 					    dev : NULL, 0, 0)) {
2653 			NL_SET_ERR_MSG(extack, "Invalid gateway address");
2654 			goto out;
2655 		}
2656 		rt->rt6i_gateway = *gw_addr;
2657 
2658 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
2659 			struct rt6_info *grt = NULL;
2660 
2661 			/* IPv6 strictly inhibits using not link-local
2662 			   addresses as nexthop address.
2663 			   Otherwise, router will not able to send redirects.
2664 			   It is very good, but in some (rare!) circumstances
2665 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
2666 			   some exceptions. --ANK
2667 			   We allow IPv4-mapped nexthops to support RFC4798-type
2668 			   addressing
2669 			 */
2670 			if (!(gwa_type & (IPV6_ADDR_UNICAST |
2671 					  IPV6_ADDR_MAPPED))) {
2672 				NL_SET_ERR_MSG(extack,
2673 					       "Invalid gateway address");
2674 				goto out;
2675 			}
2676 
2677 			if (cfg->fc_table) {
2678 				grt = ip6_nh_lookup_table(net, cfg, gw_addr);
2679 
2680 				if (grt) {
2681 					if (grt->rt6i_flags & RTF_GATEWAY ||
2682 					    (dev && dev != grt->dst.dev)) {
2683 						ip6_rt_put(grt);
2684 						grt = NULL;
2685 					}
2686 				}
2687 			}
2688 
2689 			if (!grt)
2690 				grt = rt6_lookup(net, gw_addr, NULL,
2691 						 cfg->fc_ifindex, 1);
2692 
2693 			err = -EHOSTUNREACH;
2694 			if (!grt)
2695 				goto out;
2696 			if (dev) {
2697 				if (dev != grt->dst.dev) {
2698 					ip6_rt_put(grt);
2699 					goto out;
2700 				}
2701 			} else {
2702 				dev = grt->dst.dev;
2703 				idev = grt->rt6i_idev;
2704 				dev_hold(dev);
2705 				in6_dev_hold(grt->rt6i_idev);
2706 			}
2707 			if (!(grt->rt6i_flags & RTF_GATEWAY))
2708 				err = 0;
2709 			ip6_rt_put(grt);
2710 
2711 			if (err)
2712 				goto out;
2713 		}
2714 		err = -EINVAL;
2715 		if (!dev) {
2716 			NL_SET_ERR_MSG(extack, "Egress device not specified");
2717 			goto out;
2718 		} else if (dev->flags & IFF_LOOPBACK) {
2719 			NL_SET_ERR_MSG(extack,
2720 				       "Egress device can not be loopback device for this route");
2721 			goto out;
2722 		}
2723 	}
2724 
2725 	err = -ENODEV;
2726 	if (!dev)
2727 		goto out;
2728 
2729 	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
2730 		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
2731 			NL_SET_ERR_MSG(extack, "Invalid source address");
2732 			err = -EINVAL;
2733 			goto out;
2734 		}
2735 		rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
2736 		rt->rt6i_prefsrc.plen = 128;
2737 	} else
2738 		rt->rt6i_prefsrc.plen = 0;
2739 
2740 	rt->rt6i_flags = cfg->fc_flags;
2741 
2742 install_route:
2743 	rt->dst.dev = dev;
2744 	rt->rt6i_idev = idev;
2745 	rt->rt6i_table = table;
2746 
2747 	cfg->fc_nlinfo.nl_net = dev_net(dev);
2748 
2749 	return rt;
2750 out:
2751 	if (dev)
2752 		dev_put(dev);
2753 	if (idev)
2754 		in6_dev_put(idev);
2755 	if (rt)
2756 		dst_release_immediate(&rt->dst);
2757 
2758 	return ERR_PTR(err);
2759 }
2760 
2761 int ip6_route_add(struct fib6_config *cfg,
2762 		  struct netlink_ext_ack *extack)
2763 {
2764 	struct mx6_config mxc = { .mx = NULL, };
2765 	struct rt6_info *rt;
2766 	int err;
2767 
2768 	rt = ip6_route_info_create(cfg, extack);
2769 	if (IS_ERR(rt)) {
2770 		err = PTR_ERR(rt);
2771 		rt = NULL;
2772 		goto out;
2773 	}
2774 
2775 	err = ip6_convert_metrics(&mxc, cfg);
2776 	if (err)
2777 		goto out;
2778 
2779 	err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc, extack);
2780 
2781 	kfree(mxc.mx);
2782 
2783 	return err;
2784 out:
2785 	if (rt)
2786 		dst_release_immediate(&rt->dst);
2787 
2788 	return err;
2789 }
2790 
2791 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
2792 {
2793 	int err;
2794 	struct fib6_table *table;
2795 	struct net *net = dev_net(rt->dst.dev);
2796 
2797 	if (rt == net->ipv6.ip6_null_entry) {
2798 		err = -ENOENT;
2799 		goto out;
2800 	}
2801 
2802 	table = rt->rt6i_table;
2803 	spin_lock_bh(&table->tb6_lock);
2804 	err = fib6_del(rt, info);
2805 	spin_unlock_bh(&table->tb6_lock);
2806 
2807 out:
2808 	ip6_rt_put(rt);
2809 	return err;
2810 }
2811 
2812 int ip6_del_rt(struct rt6_info *rt)
2813 {
2814 	struct nl_info info = {
2815 		.nl_net = dev_net(rt->dst.dev),
2816 	};
2817 	return __ip6_del_rt(rt, &info);
2818 }
2819 
2820 static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
2821 {
2822 	struct nl_info *info = &cfg->fc_nlinfo;
2823 	struct net *net = info->nl_net;
2824 	struct sk_buff *skb = NULL;
2825 	struct fib6_table *table;
2826 	int err = -ENOENT;
2827 
2828 	if (rt == net->ipv6.ip6_null_entry)
2829 		goto out_put;
2830 	table = rt->rt6i_table;
2831 	spin_lock_bh(&table->tb6_lock);
2832 
2833 	if (rt->rt6i_nsiblings && cfg->fc_delete_all_nh) {
2834 		struct rt6_info *sibling, *next_sibling;
2835 
2836 		/* prefer to send a single notification with all hops */
2837 		skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
2838 		if (skb) {
2839 			u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2840 
2841 			if (rt6_fill_node(net, skb, rt,
2842 					  NULL, NULL, 0, RTM_DELROUTE,
2843 					  info->portid, seq, 0) < 0) {
2844 				kfree_skb(skb);
2845 				skb = NULL;
2846 			} else
2847 				info->skip_notify = 1;
2848 		}
2849 
2850 		list_for_each_entry_safe(sibling, next_sibling,
2851 					 &rt->rt6i_siblings,
2852 					 rt6i_siblings) {
2853 			err = fib6_del(sibling, info);
2854 			if (err)
2855 				goto out_unlock;
2856 		}
2857 	}
2858 
2859 	err = fib6_del(rt, info);
2860 out_unlock:
2861 	spin_unlock_bh(&table->tb6_lock);
2862 out_put:
2863 	ip6_rt_put(rt);
2864 
2865 	if (skb) {
2866 		rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2867 			    info->nlh, gfp_any());
2868 	}
2869 	return err;
2870 }
2871 
2872 static int ip6_route_del(struct fib6_config *cfg,
2873 			 struct netlink_ext_ack *extack)
2874 {
2875 	struct rt6_info *rt, *rt_cache;
2876 	struct fib6_table *table;
2877 	struct fib6_node *fn;
2878 	int err = -ESRCH;
2879 
2880 	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
2881 	if (!table) {
2882 		NL_SET_ERR_MSG(extack, "FIB table does not exist");
2883 		return err;
2884 	}
2885 
2886 	rcu_read_lock();
2887 
2888 	fn = fib6_locate(&table->tb6_root,
2889 			 &cfg->fc_dst, cfg->fc_dst_len,
2890 			 &cfg->fc_src, cfg->fc_src_len,
2891 			 !(cfg->fc_flags & RTF_CACHE));
2892 
2893 	if (fn) {
2894 		for_each_fib6_node_rt_rcu(fn) {
2895 			if (cfg->fc_flags & RTF_CACHE) {
2896 				rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst,
2897 							      &cfg->fc_src);
2898 				if (!rt_cache)
2899 					continue;
2900 				rt = rt_cache;
2901 			}
2902 			if (cfg->fc_ifindex &&
2903 			    (!rt->dst.dev ||
2904 			     rt->dst.dev->ifindex != cfg->fc_ifindex))
2905 				continue;
2906 			if (cfg->fc_flags & RTF_GATEWAY &&
2907 			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
2908 				continue;
2909 			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
2910 				continue;
2911 			if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol)
2912 				continue;
2913 			if (!dst_hold_safe(&rt->dst))
2914 				break;
2915 			rcu_read_unlock();
2916 
2917 			/* if gateway was specified only delete the one hop */
2918 			if (cfg->fc_flags & RTF_GATEWAY)
2919 				return __ip6_del_rt(rt, &cfg->fc_nlinfo);
2920 
2921 			return __ip6_del_rt_siblings(rt, cfg);
2922 		}
2923 	}
2924 	rcu_read_unlock();
2925 
2926 	return err;
2927 }
2928 
2929 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
2930 {
2931 	struct netevent_redirect netevent;
2932 	struct rt6_info *rt, *nrt = NULL;
2933 	struct ndisc_options ndopts;
2934 	struct inet6_dev *in6_dev;
2935 	struct neighbour *neigh;
2936 	struct rd_msg *msg;
2937 	int optlen, on_link;
2938 	u8 *lladdr;
2939 
2940 	optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
2941 	optlen -= sizeof(*msg);
2942 
2943 	if (optlen < 0) {
2944 		net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
2945 		return;
2946 	}
2947 
2948 	msg = (struct rd_msg *)icmp6_hdr(skb);
2949 
2950 	if (ipv6_addr_is_multicast(&msg->dest)) {
2951 		net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
2952 		return;
2953 	}
2954 
2955 	on_link = 0;
2956 	if (ipv6_addr_equal(&msg->dest, &msg->target)) {
2957 		on_link = 1;
2958 	} else if (ipv6_addr_type(&msg->target) !=
2959 		   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
2960 		net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
2961 		return;
2962 	}
2963 
2964 	in6_dev = __in6_dev_get(skb->dev);
2965 	if (!in6_dev)
2966 		return;
2967 	if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2968 		return;
2969 
2970 	/* RFC2461 8.1:
2971 	 *	The IP source address of the Redirect MUST be the same as the current
2972 	 *	first-hop router for the specified ICMP Destination Address.
2973 	 */
2974 
2975 	if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
2976 		net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2977 		return;
2978 	}
2979 
2980 	lladdr = NULL;
2981 	if (ndopts.nd_opts_tgt_lladdr) {
2982 		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2983 					     skb->dev);
2984 		if (!lladdr) {
2985 			net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2986 			return;
2987 		}
2988 	}
2989 
2990 	rt = (struct rt6_info *) dst;
2991 	if (rt->rt6i_flags & RTF_REJECT) {
2992 		net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
2993 		return;
2994 	}
2995 
2996 	/* Redirect received -> path was valid.
2997 	 * Look, redirects are sent only in response to data packets,
2998 	 * so that this nexthop apparently is reachable. --ANK
2999 	 */
3000 	dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
3001 
3002 	neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
3003 	if (!neigh)
3004 		return;
3005 
3006 	/*
3007 	 *	We have finally decided to accept it.
3008 	 */
3009 
3010 	ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
3011 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
3012 		     NEIGH_UPDATE_F_OVERRIDE|
3013 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
3014 				     NEIGH_UPDATE_F_ISROUTER)),
3015 		     NDISC_REDIRECT, &ndopts);
3016 
3017 	nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
3018 	if (!nrt)
3019 		goto out;
3020 
3021 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
3022 	if (on_link)
3023 		nrt->rt6i_flags &= ~RTF_GATEWAY;
3024 
3025 	nrt->rt6i_protocol = RTPROT_REDIRECT;
3026 	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
3027 
3028 	/* No need to remove rt from the exception table if rt is
3029 	 * a cached route because rt6_insert_exception() will
3030 	 * takes care of it
3031 	 */
3032 	if (rt6_insert_exception(nrt, rt)) {
3033 		dst_release_immediate(&nrt->dst);
3034 		goto out;
3035 	}
3036 
3037 	netevent.old = &rt->dst;
3038 	netevent.new = &nrt->dst;
3039 	netevent.daddr = &msg->dest;
3040 	netevent.neigh = neigh;
3041 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
3042 
3043 out:
3044 	neigh_release(neigh);
3045 }
3046 
3047 /*
3048  *	Misc support functions
3049  */
3050 
3051 static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
3052 {
3053 	BUG_ON(from->dst.from);
3054 
3055 	rt->rt6i_flags &= ~RTF_EXPIRES;
3056 	dst_hold(&from->dst);
3057 	rt->dst.from = &from->dst;
3058 	dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
3059 }
3060 
3061 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
3062 {
3063 	rt->dst.input = ort->dst.input;
3064 	rt->dst.output = ort->dst.output;
3065 	rt->rt6i_dst = ort->rt6i_dst;
3066 	rt->dst.error = ort->dst.error;
3067 	rt->rt6i_idev = ort->rt6i_idev;
3068 	if (rt->rt6i_idev)
3069 		in6_dev_hold(rt->rt6i_idev);
3070 	rt->dst.lastuse = jiffies;
3071 	rt->rt6i_gateway = ort->rt6i_gateway;
3072 	rt->rt6i_flags = ort->rt6i_flags;
3073 	rt6_set_from(rt, ort);
3074 	rt->rt6i_metric = ort->rt6i_metric;
3075 #ifdef CONFIG_IPV6_SUBTREES
3076 	rt->rt6i_src = ort->rt6i_src;
3077 #endif
3078 	rt->rt6i_prefsrc = ort->rt6i_prefsrc;
3079 	rt->rt6i_table = ort->rt6i_table;
3080 	rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
3081 }
3082 
3083 #ifdef CONFIG_IPV6_ROUTE_INFO
3084 static struct rt6_info *rt6_get_route_info(struct net *net,
3085 					   const struct in6_addr *prefix, int prefixlen,
3086 					   const struct in6_addr *gwaddr,
3087 					   struct net_device *dev)
3088 {
3089 	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
3090 	int ifindex = dev->ifindex;
3091 	struct fib6_node *fn;
3092 	struct rt6_info *rt = NULL;
3093 	struct fib6_table *table;
3094 
3095 	table = fib6_get_table(net, tb_id);
3096 	if (!table)
3097 		return NULL;
3098 
3099 	rcu_read_lock();
3100 	fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
3101 	if (!fn)
3102 		goto out;
3103 
3104 	for_each_fib6_node_rt_rcu(fn) {
3105 		if (rt->dst.dev->ifindex != ifindex)
3106 			continue;
3107 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
3108 			continue;
3109 		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
3110 			continue;
3111 		ip6_hold_safe(NULL, &rt, false);
3112 		break;
3113 	}
3114 out:
3115 	rcu_read_unlock();
3116 	return rt;
3117 }
3118 
3119 static struct rt6_info *rt6_add_route_info(struct net *net,
3120 					   const struct in6_addr *prefix, int prefixlen,
3121 					   const struct in6_addr *gwaddr,
3122 					   struct net_device *dev,
3123 					   unsigned int pref)
3124 {
3125 	struct fib6_config cfg = {
3126 		.fc_metric	= IP6_RT_PRIO_USER,
3127 		.fc_ifindex	= dev->ifindex,
3128 		.fc_dst_len	= prefixlen,
3129 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
3130 				  RTF_UP | RTF_PREF(pref),
3131 		.fc_protocol = RTPROT_RA,
3132 		.fc_nlinfo.portid = 0,
3133 		.fc_nlinfo.nlh = NULL,
3134 		.fc_nlinfo.nl_net = net,
3135 	};
3136 
3137 	cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
3138 	cfg.fc_dst = *prefix;
3139 	cfg.fc_gateway = *gwaddr;
3140 
3141 	/* We should treat it as a default route if prefix length is 0. */
3142 	if (!prefixlen)
3143 		cfg.fc_flags |= RTF_DEFAULT;
3144 
3145 	ip6_route_add(&cfg, NULL);
3146 
3147 	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
3148 }
3149 #endif
3150 
3151 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
3152 {
3153 	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
3154 	struct rt6_info *rt;
3155 	struct fib6_table *table;
3156 
3157 	table = fib6_get_table(dev_net(dev), tb_id);
3158 	if (!table)
3159 		return NULL;
3160 
3161 	rcu_read_lock();
3162 	for_each_fib6_node_rt_rcu(&table->tb6_root) {
3163 		if (dev == rt->dst.dev &&
3164 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
3165 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
3166 			break;
3167 	}
3168 	if (rt)
3169 		ip6_hold_safe(NULL, &rt, false);
3170 	rcu_read_unlock();
3171 	return rt;
3172 }
3173 
3174 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
3175 				     struct net_device *dev,
3176 				     unsigned int pref)
3177 {
3178 	struct fib6_config cfg = {
3179 		.fc_table	= l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
3180 		.fc_metric	= IP6_RT_PRIO_USER,
3181 		.fc_ifindex	= dev->ifindex,
3182 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
3183 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
3184 		.fc_protocol = RTPROT_RA,
3185 		.fc_nlinfo.portid = 0,
3186 		.fc_nlinfo.nlh = NULL,
3187 		.fc_nlinfo.nl_net = dev_net(dev),
3188 	};
3189 
3190 	cfg.fc_gateway = *gwaddr;
3191 
3192 	if (!ip6_route_add(&cfg, NULL)) {
3193 		struct fib6_table *table;
3194 
3195 		table = fib6_get_table(dev_net(dev), cfg.fc_table);
3196 		if (table)
3197 			table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
3198 	}
3199 
3200 	return rt6_get_dflt_router(gwaddr, dev);
3201 }
3202 
3203 static void __rt6_purge_dflt_routers(struct fib6_table *table)
3204 {
3205 	struct rt6_info *rt;
3206 
3207 restart:
3208 	rcu_read_lock();
3209 	for_each_fib6_node_rt_rcu(&table->tb6_root) {
3210 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
3211 		    (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
3212 			if (dst_hold_safe(&rt->dst)) {
3213 				rcu_read_unlock();
3214 				ip6_del_rt(rt);
3215 			} else {
3216 				rcu_read_unlock();
3217 			}
3218 			goto restart;
3219 		}
3220 	}
3221 	rcu_read_unlock();
3222 
3223 	table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
3224 }
3225 
3226 void rt6_purge_dflt_routers(struct net *net)
3227 {
3228 	struct fib6_table *table;
3229 	struct hlist_head *head;
3230 	unsigned int h;
3231 
3232 	rcu_read_lock();
3233 
3234 	for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
3235 		head = &net->ipv6.fib_table_hash[h];
3236 		hlist_for_each_entry_rcu(table, head, tb6_hlist) {
3237 			if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
3238 				__rt6_purge_dflt_routers(table);
3239 		}
3240 	}
3241 
3242 	rcu_read_unlock();
3243 }
3244 
3245 static void rtmsg_to_fib6_config(struct net *net,
3246 				 struct in6_rtmsg *rtmsg,
3247 				 struct fib6_config *cfg)
3248 {
3249 	memset(cfg, 0, sizeof(*cfg));
3250 
3251 	cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
3252 			 : RT6_TABLE_MAIN;
3253 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
3254 	cfg->fc_metric = rtmsg->rtmsg_metric;
3255 	cfg->fc_expires = rtmsg->rtmsg_info;
3256 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
3257 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
3258 	cfg->fc_flags = rtmsg->rtmsg_flags;
3259 
3260 	cfg->fc_nlinfo.nl_net = net;
3261 
3262 	cfg->fc_dst = rtmsg->rtmsg_dst;
3263 	cfg->fc_src = rtmsg->rtmsg_src;
3264 	cfg->fc_gateway = rtmsg->rtmsg_gateway;
3265 }
3266 
3267 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
3268 {
3269 	struct fib6_config cfg;
3270 	struct in6_rtmsg rtmsg;
3271 	int err;
3272 
3273 	switch (cmd) {
3274 	case SIOCADDRT:		/* Add a route */
3275 	case SIOCDELRT:		/* Delete a route */
3276 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
3277 			return -EPERM;
3278 		err = copy_from_user(&rtmsg, arg,
3279 				     sizeof(struct in6_rtmsg));
3280 		if (err)
3281 			return -EFAULT;
3282 
3283 		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
3284 
3285 		rtnl_lock();
3286 		switch (cmd) {
3287 		case SIOCADDRT:
3288 			err = ip6_route_add(&cfg, NULL);
3289 			break;
3290 		case SIOCDELRT:
3291 			err = ip6_route_del(&cfg, NULL);
3292 			break;
3293 		default:
3294 			err = -EINVAL;
3295 		}
3296 		rtnl_unlock();
3297 
3298 		return err;
3299 	}
3300 
3301 	return -EINVAL;
3302 }
3303 
3304 /*
3305  *	Drop the packet on the floor
3306  */
3307 
3308 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
3309 {
3310 	int type;
3311 	struct dst_entry *dst = skb_dst(skb);
3312 	switch (ipstats_mib_noroutes) {
3313 	case IPSTATS_MIB_INNOROUTES:
3314 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
3315 		if (type == IPV6_ADDR_ANY) {
3316 			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
3317 				      IPSTATS_MIB_INADDRERRORS);
3318 			break;
3319 		}
3320 		/* FALLTHROUGH */
3321 	case IPSTATS_MIB_OUTNOROUTES:
3322 		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
3323 			      ipstats_mib_noroutes);
3324 		break;
3325 	}
3326 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
3327 	kfree_skb(skb);
3328 	return 0;
3329 }
3330 
3331 static int ip6_pkt_discard(struct sk_buff *skb)
3332 {
3333 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
3334 }
3335 
3336 static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
3337 {
3338 	skb->dev = skb_dst(skb)->dev;
3339 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
3340 }
3341 
3342 static int ip6_pkt_prohibit(struct sk_buff *skb)
3343 {
3344 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
3345 }
3346 
3347 static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
3348 {
3349 	skb->dev = skb_dst(skb)->dev;
3350 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
3351 }
3352 
3353 /*
3354  *	Allocate a dst for local (unicast / anycast) address.
3355  */
3356 
3357 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
3358 				    const struct in6_addr *addr,
3359 				    bool anycast)
3360 {
3361 	u32 tb_id;
3362 	struct net *net = dev_net(idev->dev);
3363 	struct net_device *dev = idev->dev;
3364 	struct rt6_info *rt;
3365 
3366 	rt = ip6_dst_alloc(net, dev, DST_NOCOUNT);
3367 	if (!rt)
3368 		return ERR_PTR(-ENOMEM);
3369 
3370 	in6_dev_hold(idev);
3371 
3372 	rt->dst.flags |= DST_HOST;
3373 	rt->dst.input = ip6_input;
3374 	rt->dst.output = ip6_output;
3375 	rt->rt6i_idev = idev;
3376 
3377 	rt->rt6i_protocol = RTPROT_KERNEL;
3378 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
3379 	if (anycast)
3380 		rt->rt6i_flags |= RTF_ANYCAST;
3381 	else
3382 		rt->rt6i_flags |= RTF_LOCAL;
3383 
3384 	rt->rt6i_gateway  = *addr;
3385 	rt->rt6i_dst.addr = *addr;
3386 	rt->rt6i_dst.plen = 128;
3387 	tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
3388 	rt->rt6i_table = fib6_get_table(net, tb_id);
3389 
3390 	return rt;
3391 }
3392 
3393 /* remove deleted ip from prefsrc entries */
3394 struct arg_dev_net_ip {
3395 	struct net_device *dev;
3396 	struct net *net;
3397 	struct in6_addr *addr;
3398 };
3399 
3400 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
3401 {
3402 	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
3403 	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
3404 	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
3405 
3406 	if (((void *)rt->dst.dev == dev || !dev) &&
3407 	    rt != net->ipv6.ip6_null_entry &&
3408 	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
3409 		spin_lock_bh(&rt6_exception_lock);
3410 		/* remove prefsrc entry */
3411 		rt->rt6i_prefsrc.plen = 0;
3412 		/* need to update cache as well */
3413 		rt6_exceptions_remove_prefsrc(rt);
3414 		spin_unlock_bh(&rt6_exception_lock);
3415 	}
3416 	return 0;
3417 }
3418 
3419 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
3420 {
3421 	struct net *net = dev_net(ifp->idev->dev);
3422 	struct arg_dev_net_ip adni = {
3423 		.dev = ifp->idev->dev,
3424 		.net = net,
3425 		.addr = &ifp->addr,
3426 	};
3427 	fib6_clean_all(net, fib6_remove_prefsrc, &adni);
3428 }
3429 
3430 #define RTF_RA_ROUTER		(RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
3431 
3432 /* Remove routers and update dst entries when gateway turn into host. */
3433 static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
3434 {
3435 	struct in6_addr *gateway = (struct in6_addr *)arg;
3436 
3437 	if (((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
3438 	    ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
3439 		return -1;
3440 	}
3441 
3442 	/* Further clean up cached routes in exception table.
3443 	 * This is needed because cached route may have a different
3444 	 * gateway than its 'parent' in the case of an ip redirect.
3445 	 */
3446 	rt6_exceptions_clean_tohost(rt, gateway);
3447 
3448 	return 0;
3449 }
3450 
3451 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
3452 {
3453 	fib6_clean_all(net, fib6_clean_tohost, gateway);
3454 }
3455 
3456 struct arg_dev_net {
3457 	struct net_device *dev;
3458 	struct net *net;
3459 };
3460 
3461 /* called with write lock held for table with rt */
3462 static int fib6_ifdown(struct rt6_info *rt, void *arg)
3463 {
3464 	const struct arg_dev_net *adn = arg;
3465 	const struct net_device *dev = adn->dev;
3466 
3467 	if ((rt->dst.dev == dev || !dev) &&
3468 	    rt != adn->net->ipv6.ip6_null_entry &&
3469 	    (rt->rt6i_nsiblings == 0 ||
3470 	     (dev && netdev_unregistering(dev)) ||
3471 	     !rt->rt6i_idev->cnf.ignore_routes_with_linkdown))
3472 		return -1;
3473 
3474 	return 0;
3475 }
3476 
3477 void rt6_ifdown(struct net *net, struct net_device *dev)
3478 {
3479 	struct arg_dev_net adn = {
3480 		.dev = dev,
3481 		.net = net,
3482 	};
3483 
3484 	fib6_clean_all(net, fib6_ifdown, &adn);
3485 	if (dev)
3486 		rt6_uncached_list_flush_dev(net, dev);
3487 }
3488 
3489 struct rt6_mtu_change_arg {
3490 	struct net_device *dev;
3491 	unsigned int mtu;
3492 };
3493 
3494 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
3495 {
3496 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
3497 	struct inet6_dev *idev;
3498 
3499 	/* In IPv6 pmtu discovery is not optional,
3500 	   so that RTAX_MTU lock cannot disable it.
3501 	   We still use this lock to block changes
3502 	   caused by addrconf/ndisc.
3503 	*/
3504 
3505 	idev = __in6_dev_get(arg->dev);
3506 	if (!idev)
3507 		return 0;
3508 
3509 	/* For administrative MTU increase, there is no way to discover
3510 	   IPv6 PMTU increase, so PMTU increase should be updated here.
3511 	   Since RFC 1981 doesn't include administrative MTU increase
3512 	   update PMTU increase is a MUST. (i.e. jumbo frame)
3513 	 */
3514 	/*
3515 	   If new MTU is less than route PMTU, this new MTU will be the
3516 	   lowest MTU in the path, update the route PMTU to reflect PMTU
3517 	   decreases; if new MTU is greater than route PMTU, and the
3518 	   old MTU is the lowest MTU in the path, update the route PMTU
3519 	   to reflect the increase. In this case if the other nodes' MTU
3520 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
3521 	   PMTU discovery.
3522 	 */
3523 	if (rt->dst.dev == arg->dev &&
3524 	    dst_metric_raw(&rt->dst, RTAX_MTU) &&
3525 	    !dst_metric_locked(&rt->dst, RTAX_MTU)) {
3526 		spin_lock_bh(&rt6_exception_lock);
3527 		if (dst_mtu(&rt->dst) >= arg->mtu ||
3528 		    (dst_mtu(&rt->dst) < arg->mtu &&
3529 		     dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
3530 			dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
3531 		}
3532 		rt6_exceptions_update_pmtu(rt, arg->mtu);
3533 		spin_unlock_bh(&rt6_exception_lock);
3534 	}
3535 	return 0;
3536 }
3537 
3538 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
3539 {
3540 	struct rt6_mtu_change_arg arg = {
3541 		.dev = dev,
3542 		.mtu = mtu,
3543 	};
3544 
3545 	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
3546 }
3547 
3548 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
3549 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
3550 	[RTA_OIF]               = { .type = NLA_U32 },
3551 	[RTA_IIF]		= { .type = NLA_U32 },
3552 	[RTA_PRIORITY]          = { .type = NLA_U32 },
3553 	[RTA_METRICS]           = { .type = NLA_NESTED },
3554 	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
3555 	[RTA_PREF]              = { .type = NLA_U8 },
3556 	[RTA_ENCAP_TYPE]	= { .type = NLA_U16 },
3557 	[RTA_ENCAP]		= { .type = NLA_NESTED },
3558 	[RTA_EXPIRES]		= { .type = NLA_U32 },
3559 	[RTA_UID]		= { .type = NLA_U32 },
3560 	[RTA_MARK]		= { .type = NLA_U32 },
3561 };
3562 
3563 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
3564 			      struct fib6_config *cfg,
3565 			      struct netlink_ext_ack *extack)
3566 {
3567 	struct rtmsg *rtm;
3568 	struct nlattr *tb[RTA_MAX+1];
3569 	unsigned int pref;
3570 	int err;
3571 
3572 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
3573 			  NULL);
3574 	if (err < 0)
3575 		goto errout;
3576 
3577 	err = -EINVAL;
3578 	rtm = nlmsg_data(nlh);
3579 	memset(cfg, 0, sizeof(*cfg));
3580 
3581 	cfg->fc_table = rtm->rtm_table;
3582 	cfg->fc_dst_len = rtm->rtm_dst_len;
3583 	cfg->fc_src_len = rtm->rtm_src_len;
3584 	cfg->fc_flags = RTF_UP;
3585 	cfg->fc_protocol = rtm->rtm_protocol;
3586 	cfg->fc_type = rtm->rtm_type;
3587 
3588 	if (rtm->rtm_type == RTN_UNREACHABLE ||
3589 	    rtm->rtm_type == RTN_BLACKHOLE ||
3590 	    rtm->rtm_type == RTN_PROHIBIT ||
3591 	    rtm->rtm_type == RTN_THROW)
3592 		cfg->fc_flags |= RTF_REJECT;
3593 
3594 	if (rtm->rtm_type == RTN_LOCAL)
3595 		cfg->fc_flags |= RTF_LOCAL;
3596 
3597 	if (rtm->rtm_flags & RTM_F_CLONED)
3598 		cfg->fc_flags |= RTF_CACHE;
3599 
3600 	cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
3601 	cfg->fc_nlinfo.nlh = nlh;
3602 	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
3603 
3604 	if (tb[RTA_GATEWAY]) {
3605 		cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
3606 		cfg->fc_flags |= RTF_GATEWAY;
3607 	}
3608 
3609 	if (tb[RTA_DST]) {
3610 		int plen = (rtm->rtm_dst_len + 7) >> 3;
3611 
3612 		if (nla_len(tb[RTA_DST]) < plen)
3613 			goto errout;
3614 
3615 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
3616 	}
3617 
3618 	if (tb[RTA_SRC]) {
3619 		int plen = (rtm->rtm_src_len + 7) >> 3;
3620 
3621 		if (nla_len(tb[RTA_SRC]) < plen)
3622 			goto errout;
3623 
3624 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
3625 	}
3626 
3627 	if (tb[RTA_PREFSRC])
3628 		cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
3629 
3630 	if (tb[RTA_OIF])
3631 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
3632 
3633 	if (tb[RTA_PRIORITY])
3634 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
3635 
3636 	if (tb[RTA_METRICS]) {
3637 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
3638 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
3639 	}
3640 
3641 	if (tb[RTA_TABLE])
3642 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
3643 
3644 	if (tb[RTA_MULTIPATH]) {
3645 		cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
3646 		cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
3647 
3648 		err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
3649 						     cfg->fc_mp_len, extack);
3650 		if (err < 0)
3651 			goto errout;
3652 	}
3653 
3654 	if (tb[RTA_PREF]) {
3655 		pref = nla_get_u8(tb[RTA_PREF]);
3656 		if (pref != ICMPV6_ROUTER_PREF_LOW &&
3657 		    pref != ICMPV6_ROUTER_PREF_HIGH)
3658 			pref = ICMPV6_ROUTER_PREF_MEDIUM;
3659 		cfg->fc_flags |= RTF_PREF(pref);
3660 	}
3661 
3662 	if (tb[RTA_ENCAP])
3663 		cfg->fc_encap = tb[RTA_ENCAP];
3664 
3665 	if (tb[RTA_ENCAP_TYPE]) {
3666 		cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
3667 
3668 		err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
3669 		if (err < 0)
3670 			goto errout;
3671 	}
3672 
3673 	if (tb[RTA_EXPIRES]) {
3674 		unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
3675 
3676 		if (addrconf_finite_timeout(timeout)) {
3677 			cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
3678 			cfg->fc_flags |= RTF_EXPIRES;
3679 		}
3680 	}
3681 
3682 	err = 0;
3683 errout:
3684 	return err;
3685 }
3686 
3687 struct rt6_nh {
3688 	struct rt6_info *rt6_info;
3689 	struct fib6_config r_cfg;
3690 	struct mx6_config mxc;
3691 	struct list_head next;
3692 };
3693 
3694 static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
3695 {
3696 	struct rt6_nh *nh;
3697 
3698 	list_for_each_entry(nh, rt6_nh_list, next) {
3699 		pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6c nexthop %pI6c ifi %d\n",
3700 		        &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
3701 		        nh->r_cfg.fc_ifindex);
3702 	}
3703 }
3704 
3705 static int ip6_route_info_append(struct list_head *rt6_nh_list,
3706 				 struct rt6_info *rt, struct fib6_config *r_cfg)
3707 {
3708 	struct rt6_nh *nh;
3709 	int err = -EEXIST;
3710 
3711 	list_for_each_entry(nh, rt6_nh_list, next) {
3712 		/* check if rt6_info already exists */
3713 		if (rt6_duplicate_nexthop(nh->rt6_info, rt))
3714 			return err;
3715 	}
3716 
3717 	nh = kzalloc(sizeof(*nh), GFP_KERNEL);
3718 	if (!nh)
3719 		return -ENOMEM;
3720 	nh->rt6_info = rt;
3721 	err = ip6_convert_metrics(&nh->mxc, r_cfg);
3722 	if (err) {
3723 		kfree(nh);
3724 		return err;
3725 	}
3726 	memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
3727 	list_add_tail(&nh->next, rt6_nh_list);
3728 
3729 	return 0;
3730 }
3731 
3732 static void ip6_route_mpath_notify(struct rt6_info *rt,
3733 				   struct rt6_info *rt_last,
3734 				   struct nl_info *info,
3735 				   __u16 nlflags)
3736 {
3737 	/* if this is an APPEND route, then rt points to the first route
3738 	 * inserted and rt_last points to last route inserted. Userspace
3739 	 * wants a consistent dump of the route which starts at the first
3740 	 * nexthop. Since sibling routes are always added at the end of
3741 	 * the list, find the first sibling of the last route appended
3742 	 */
3743 	if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->rt6i_nsiblings) {
3744 		rt = list_first_entry(&rt_last->rt6i_siblings,
3745 				      struct rt6_info,
3746 				      rt6i_siblings);
3747 	}
3748 
3749 	if (rt)
3750 		inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
3751 }
3752 
3753 static int ip6_route_multipath_add(struct fib6_config *cfg,
3754 				   struct netlink_ext_ack *extack)
3755 {
3756 	struct rt6_info *rt_notif = NULL, *rt_last = NULL;
3757 	struct nl_info *info = &cfg->fc_nlinfo;
3758 	struct fib6_config r_cfg;
3759 	struct rtnexthop *rtnh;
3760 	struct rt6_info *rt;
3761 	struct rt6_nh *err_nh;
3762 	struct rt6_nh *nh, *nh_safe;
3763 	__u16 nlflags;
3764 	int remaining;
3765 	int attrlen;
3766 	int err = 1;
3767 	int nhn = 0;
3768 	int replace = (cfg->fc_nlinfo.nlh &&
3769 		       (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
3770 	LIST_HEAD(rt6_nh_list);
3771 
3772 	nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
3773 	if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
3774 		nlflags |= NLM_F_APPEND;
3775 
3776 	remaining = cfg->fc_mp_len;
3777 	rtnh = (struct rtnexthop *)cfg->fc_mp;
3778 
3779 	/* Parse a Multipath Entry and build a list (rt6_nh_list) of
3780 	 * rt6_info structs per nexthop
3781 	 */
3782 	while (rtnh_ok(rtnh, remaining)) {
3783 		memcpy(&r_cfg, cfg, sizeof(*cfg));
3784 		if (rtnh->rtnh_ifindex)
3785 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
3786 
3787 		attrlen = rtnh_attrlen(rtnh);
3788 		if (attrlen > 0) {
3789 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3790 
3791 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3792 			if (nla) {
3793 				r_cfg.fc_gateway = nla_get_in6_addr(nla);
3794 				r_cfg.fc_flags |= RTF_GATEWAY;
3795 			}
3796 			r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
3797 			nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
3798 			if (nla)
3799 				r_cfg.fc_encap_type = nla_get_u16(nla);
3800 		}
3801 
3802 		rt = ip6_route_info_create(&r_cfg, extack);
3803 		if (IS_ERR(rt)) {
3804 			err = PTR_ERR(rt);
3805 			rt = NULL;
3806 			goto cleanup;
3807 		}
3808 
3809 		err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
3810 		if (err) {
3811 			dst_release_immediate(&rt->dst);
3812 			goto cleanup;
3813 		}
3814 
3815 		rtnh = rtnh_next(rtnh, &remaining);
3816 	}
3817 
3818 	/* for add and replace send one notification with all nexthops.
3819 	 * Skip the notification in fib6_add_rt2node and send one with
3820 	 * the full route when done
3821 	 */
3822 	info->skip_notify = 1;
3823 
3824 	err_nh = NULL;
3825 	list_for_each_entry(nh, &rt6_nh_list, next) {
3826 		rt_last = nh->rt6_info;
3827 		err = __ip6_ins_rt(nh->rt6_info, info, &nh->mxc, extack);
3828 		/* save reference to first route for notification */
3829 		if (!rt_notif && !err)
3830 			rt_notif = nh->rt6_info;
3831 
3832 		/* nh->rt6_info is used or freed at this point, reset to NULL*/
3833 		nh->rt6_info = NULL;
3834 		if (err) {
3835 			if (replace && nhn)
3836 				ip6_print_replace_route_err(&rt6_nh_list);
3837 			err_nh = nh;
3838 			goto add_errout;
3839 		}
3840 
3841 		/* Because each route is added like a single route we remove
3842 		 * these flags after the first nexthop: if there is a collision,
3843 		 * we have already failed to add the first nexthop:
3844 		 * fib6_add_rt2node() has rejected it; when replacing, old
3845 		 * nexthops have been replaced by first new, the rest should
3846 		 * be added to it.
3847 		 */
3848 		cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
3849 						     NLM_F_REPLACE);
3850 		nhn++;
3851 	}
3852 
3853 	/* success ... tell user about new route */
3854 	ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
3855 	goto cleanup;
3856 
3857 add_errout:
3858 	/* send notification for routes that were added so that
3859 	 * the delete notifications sent by ip6_route_del are
3860 	 * coherent
3861 	 */
3862 	if (rt_notif)
3863 		ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
3864 
3865 	/* Delete routes that were already added */
3866 	list_for_each_entry(nh, &rt6_nh_list, next) {
3867 		if (err_nh == nh)
3868 			break;
3869 		ip6_route_del(&nh->r_cfg, extack);
3870 	}
3871 
3872 cleanup:
3873 	list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
3874 		if (nh->rt6_info)
3875 			dst_release_immediate(&nh->rt6_info->dst);
3876 		kfree(nh->mxc.mx);
3877 		list_del(&nh->next);
3878 		kfree(nh);
3879 	}
3880 
3881 	return err;
3882 }
3883 
3884 static int ip6_route_multipath_del(struct fib6_config *cfg,
3885 				   struct netlink_ext_ack *extack)
3886 {
3887 	struct fib6_config r_cfg;
3888 	struct rtnexthop *rtnh;
3889 	int remaining;
3890 	int attrlen;
3891 	int err = 1, last_err = 0;
3892 
3893 	remaining = cfg->fc_mp_len;
3894 	rtnh = (struct rtnexthop *)cfg->fc_mp;
3895 
3896 	/* Parse a Multipath Entry */
3897 	while (rtnh_ok(rtnh, remaining)) {
3898 		memcpy(&r_cfg, cfg, sizeof(*cfg));
3899 		if (rtnh->rtnh_ifindex)
3900 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
3901 
3902 		attrlen = rtnh_attrlen(rtnh);
3903 		if (attrlen > 0) {
3904 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3905 
3906 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3907 			if (nla) {
3908 				nla_memcpy(&r_cfg.fc_gateway, nla, 16);
3909 				r_cfg.fc_flags |= RTF_GATEWAY;
3910 			}
3911 		}
3912 		err = ip6_route_del(&r_cfg, extack);
3913 		if (err)
3914 			last_err = err;
3915 
3916 		rtnh = rtnh_next(rtnh, &remaining);
3917 	}
3918 
3919 	return last_err;
3920 }
3921 
3922 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
3923 			      struct netlink_ext_ack *extack)
3924 {
3925 	struct fib6_config cfg;
3926 	int err;
3927 
3928 	err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
3929 	if (err < 0)
3930 		return err;
3931 
3932 	if (cfg.fc_mp)
3933 		return ip6_route_multipath_del(&cfg, extack);
3934 	else {
3935 		cfg.fc_delete_all_nh = 1;
3936 		return ip6_route_del(&cfg, extack);
3937 	}
3938 }
3939 
3940 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
3941 			      struct netlink_ext_ack *extack)
3942 {
3943 	struct fib6_config cfg;
3944 	int err;
3945 
3946 	err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
3947 	if (err < 0)
3948 		return err;
3949 
3950 	if (cfg.fc_mp)
3951 		return ip6_route_multipath_add(&cfg, extack);
3952 	else
3953 		return ip6_route_add(&cfg, extack);
3954 }
3955 
3956 static size_t rt6_nlmsg_size(struct rt6_info *rt)
3957 {
3958 	int nexthop_len = 0;
3959 
3960 	if (rt->rt6i_nsiblings) {
3961 		nexthop_len = nla_total_size(0)	 /* RTA_MULTIPATH */
3962 			    + NLA_ALIGN(sizeof(struct rtnexthop))
3963 			    + nla_total_size(16) /* RTA_GATEWAY */
3964 			    + lwtunnel_get_encap_size(rt->dst.lwtstate);
3965 
3966 		nexthop_len *= rt->rt6i_nsiblings;
3967 	}
3968 
3969 	return NLMSG_ALIGN(sizeof(struct rtmsg))
3970 	       + nla_total_size(16) /* RTA_SRC */
3971 	       + nla_total_size(16) /* RTA_DST */
3972 	       + nla_total_size(16) /* RTA_GATEWAY */
3973 	       + nla_total_size(16) /* RTA_PREFSRC */
3974 	       + nla_total_size(4) /* RTA_TABLE */
3975 	       + nla_total_size(4) /* RTA_IIF */
3976 	       + nla_total_size(4) /* RTA_OIF */
3977 	       + nla_total_size(4) /* RTA_PRIORITY */
3978 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
3979 	       + nla_total_size(sizeof(struct rta_cacheinfo))
3980 	       + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
3981 	       + nla_total_size(1) /* RTA_PREF */
3982 	       + lwtunnel_get_encap_size(rt->dst.lwtstate)
3983 	       + nexthop_len;
3984 }
3985 
3986 static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
3987 			    unsigned int *flags, bool skip_oif)
3988 {
3989 	if (!netif_running(rt->dst.dev) || !netif_carrier_ok(rt->dst.dev)) {
3990 		*flags |= RTNH_F_LINKDOWN;
3991 		if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
3992 			*flags |= RTNH_F_DEAD;
3993 	}
3994 
3995 	if (rt->rt6i_flags & RTF_GATEWAY) {
3996 		if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
3997 			goto nla_put_failure;
3998 	}
3999 
4000 	if (rt->rt6i_nh_flags & RTNH_F_OFFLOAD)
4001 		*flags |= RTNH_F_OFFLOAD;
4002 
4003 	/* not needed for multipath encoding b/c it has a rtnexthop struct */
4004 	if (!skip_oif && rt->dst.dev &&
4005 	    nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
4006 		goto nla_put_failure;
4007 
4008 	if (rt->dst.lwtstate &&
4009 	    lwtunnel_fill_encap(skb, rt->dst.lwtstate) < 0)
4010 		goto nla_put_failure;
4011 
4012 	return 0;
4013 
4014 nla_put_failure:
4015 	return -EMSGSIZE;
4016 }
4017 
4018 /* add multipath next hop */
4019 static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)
4020 {
4021 	struct rtnexthop *rtnh;
4022 	unsigned int flags = 0;
4023 
4024 	rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
4025 	if (!rtnh)
4026 		goto nla_put_failure;
4027 
4028 	rtnh->rtnh_hops = 0;
4029 	rtnh->rtnh_ifindex = rt->dst.dev ? rt->dst.dev->ifindex : 0;
4030 
4031 	if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
4032 		goto nla_put_failure;
4033 
4034 	rtnh->rtnh_flags = flags;
4035 
4036 	/* length of rtnetlink header + attributes */
4037 	rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
4038 
4039 	return 0;
4040 
4041 nla_put_failure:
4042 	return -EMSGSIZE;
4043 }
4044 
4045 static int rt6_fill_node(struct net *net,
4046 			 struct sk_buff *skb, struct rt6_info *rt,
4047 			 struct in6_addr *dst, struct in6_addr *src,
4048 			 int iif, int type, u32 portid, u32 seq,
4049 			 unsigned int flags)
4050 {
4051 	u32 metrics[RTAX_MAX];
4052 	struct rtmsg *rtm;
4053 	struct nlmsghdr *nlh;
4054 	long expires;
4055 	u32 table;
4056 
4057 	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
4058 	if (!nlh)
4059 		return -EMSGSIZE;
4060 
4061 	rtm = nlmsg_data(nlh);
4062 	rtm->rtm_family = AF_INET6;
4063 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
4064 	rtm->rtm_src_len = rt->rt6i_src.plen;
4065 	rtm->rtm_tos = 0;
4066 	if (rt->rt6i_table)
4067 		table = rt->rt6i_table->tb6_id;
4068 	else
4069 		table = RT6_TABLE_UNSPEC;
4070 	rtm->rtm_table = table;
4071 	if (nla_put_u32(skb, RTA_TABLE, table))
4072 		goto nla_put_failure;
4073 	if (rt->rt6i_flags & RTF_REJECT) {
4074 		switch (rt->dst.error) {
4075 		case -EINVAL:
4076 			rtm->rtm_type = RTN_BLACKHOLE;
4077 			break;
4078 		case -EACCES:
4079 			rtm->rtm_type = RTN_PROHIBIT;
4080 			break;
4081 		case -EAGAIN:
4082 			rtm->rtm_type = RTN_THROW;
4083 			break;
4084 		default:
4085 			rtm->rtm_type = RTN_UNREACHABLE;
4086 			break;
4087 		}
4088 	}
4089 	else if (rt->rt6i_flags & RTF_LOCAL)
4090 		rtm->rtm_type = RTN_LOCAL;
4091 	else if (rt->rt6i_flags & RTF_ANYCAST)
4092 		rtm->rtm_type = RTN_ANYCAST;
4093 	else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
4094 		rtm->rtm_type = RTN_LOCAL;
4095 	else
4096 		rtm->rtm_type = RTN_UNICAST;
4097 	rtm->rtm_flags = 0;
4098 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
4099 	rtm->rtm_protocol = rt->rt6i_protocol;
4100 
4101 	if (rt->rt6i_flags & RTF_CACHE)
4102 		rtm->rtm_flags |= RTM_F_CLONED;
4103 
4104 	if (dst) {
4105 		if (nla_put_in6_addr(skb, RTA_DST, dst))
4106 			goto nla_put_failure;
4107 		rtm->rtm_dst_len = 128;
4108 	} else if (rtm->rtm_dst_len)
4109 		if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
4110 			goto nla_put_failure;
4111 #ifdef CONFIG_IPV6_SUBTREES
4112 	if (src) {
4113 		if (nla_put_in6_addr(skb, RTA_SRC, src))
4114 			goto nla_put_failure;
4115 		rtm->rtm_src_len = 128;
4116 	} else if (rtm->rtm_src_len &&
4117 		   nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
4118 		goto nla_put_failure;
4119 #endif
4120 	if (iif) {
4121 #ifdef CONFIG_IPV6_MROUTE
4122 		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
4123 			int err = ip6mr_get_route(net, skb, rtm, portid);
4124 
4125 			if (err == 0)
4126 				return 0;
4127 			if (err < 0)
4128 				goto nla_put_failure;
4129 		} else
4130 #endif
4131 			if (nla_put_u32(skb, RTA_IIF, iif))
4132 				goto nla_put_failure;
4133 	} else if (dst) {
4134 		struct in6_addr saddr_buf;
4135 		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
4136 		    nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
4137 			goto nla_put_failure;
4138 	}
4139 
4140 	if (rt->rt6i_prefsrc.plen) {
4141 		struct in6_addr saddr_buf;
4142 		saddr_buf = rt->rt6i_prefsrc.addr;
4143 		if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
4144 			goto nla_put_failure;
4145 	}
4146 
4147 	memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
4148 	if (rt->rt6i_pmtu)
4149 		metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
4150 	if (rtnetlink_put_metrics(skb, metrics) < 0)
4151 		goto nla_put_failure;
4152 
4153 	if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
4154 		goto nla_put_failure;
4155 
4156 	/* For multipath routes, walk the siblings list and add
4157 	 * each as a nexthop within RTA_MULTIPATH.
4158 	 */
4159 	if (rt->rt6i_nsiblings) {
4160 		struct rt6_info *sibling, *next_sibling;
4161 		struct nlattr *mp;
4162 
4163 		mp = nla_nest_start(skb, RTA_MULTIPATH);
4164 		if (!mp)
4165 			goto nla_put_failure;
4166 
4167 		if (rt6_add_nexthop(skb, rt) < 0)
4168 			goto nla_put_failure;
4169 
4170 		list_for_each_entry_safe(sibling, next_sibling,
4171 					 &rt->rt6i_siblings, rt6i_siblings) {
4172 			if (rt6_add_nexthop(skb, sibling) < 0)
4173 				goto nla_put_failure;
4174 		}
4175 
4176 		nla_nest_end(skb, mp);
4177 	} else {
4178 		if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0)
4179 			goto nla_put_failure;
4180 	}
4181 
4182 	expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
4183 
4184 	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
4185 		goto nla_put_failure;
4186 
4187 	if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
4188 		goto nla_put_failure;
4189 
4190 
4191 	nlmsg_end(skb, nlh);
4192 	return 0;
4193 
4194 nla_put_failure:
4195 	nlmsg_cancel(skb, nlh);
4196 	return -EMSGSIZE;
4197 }
4198 
4199 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
4200 {
4201 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
4202 	struct net *net = arg->net;
4203 
4204 	if (rt == net->ipv6.ip6_null_entry)
4205 		return 0;
4206 
4207 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
4208 		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
4209 
4210 		/* user wants prefix routes only */
4211 		if (rtm->rtm_flags & RTM_F_PREFIX &&
4212 		    !(rt->rt6i_flags & RTF_PREFIX_RT)) {
4213 			/* success since this is not a prefix route */
4214 			return 1;
4215 		}
4216 	}
4217 
4218 	return rt6_fill_node(net,
4219 		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
4220 		     NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
4221 		     NLM_F_MULTI);
4222 }
4223 
4224 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4225 			      struct netlink_ext_ack *extack)
4226 {
4227 	struct net *net = sock_net(in_skb->sk);
4228 	struct nlattr *tb[RTA_MAX+1];
4229 	int err, iif = 0, oif = 0;
4230 	struct dst_entry *dst;
4231 	struct rt6_info *rt;
4232 	struct sk_buff *skb;
4233 	struct rtmsg *rtm;
4234 	struct flowi6 fl6;
4235 	bool fibmatch;
4236 
4237 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
4238 			  extack);
4239 	if (err < 0)
4240 		goto errout;
4241 
4242 	err = -EINVAL;
4243 	memset(&fl6, 0, sizeof(fl6));
4244 	rtm = nlmsg_data(nlh);
4245 	fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
4246 	fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
4247 
4248 	if (tb[RTA_SRC]) {
4249 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
4250 			goto errout;
4251 
4252 		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
4253 	}
4254 
4255 	if (tb[RTA_DST]) {
4256 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
4257 			goto errout;
4258 
4259 		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
4260 	}
4261 
4262 	if (tb[RTA_IIF])
4263 		iif = nla_get_u32(tb[RTA_IIF]);
4264 
4265 	if (tb[RTA_OIF])
4266 		oif = nla_get_u32(tb[RTA_OIF]);
4267 
4268 	if (tb[RTA_MARK])
4269 		fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
4270 
4271 	if (tb[RTA_UID])
4272 		fl6.flowi6_uid = make_kuid(current_user_ns(),
4273 					   nla_get_u32(tb[RTA_UID]));
4274 	else
4275 		fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
4276 
4277 	if (iif) {
4278 		struct net_device *dev;
4279 		int flags = 0;
4280 
4281 		rcu_read_lock();
4282 
4283 		dev = dev_get_by_index_rcu(net, iif);
4284 		if (!dev) {
4285 			rcu_read_unlock();
4286 			err = -ENODEV;
4287 			goto errout;
4288 		}
4289 
4290 		fl6.flowi6_iif = iif;
4291 
4292 		if (!ipv6_addr_any(&fl6.saddr))
4293 			flags |= RT6_LOOKUP_F_HAS_SADDR;
4294 
4295 		if (!fibmatch)
4296 			dst = ip6_route_input_lookup(net, dev, &fl6, flags);
4297 		else
4298 			dst = ip6_route_lookup(net, &fl6, 0);
4299 
4300 		rcu_read_unlock();
4301 	} else {
4302 		fl6.flowi6_oif = oif;
4303 
4304 		if (!fibmatch)
4305 			dst = ip6_route_output(net, NULL, &fl6);
4306 		else
4307 			dst = ip6_route_lookup(net, &fl6, 0);
4308 	}
4309 
4310 
4311 	rt = container_of(dst, struct rt6_info, dst);
4312 	if (rt->dst.error) {
4313 		err = rt->dst.error;
4314 		ip6_rt_put(rt);
4315 		goto errout;
4316 	}
4317 
4318 	if (rt == net->ipv6.ip6_null_entry) {
4319 		err = rt->dst.error;
4320 		ip6_rt_put(rt);
4321 		goto errout;
4322 	}
4323 
4324 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
4325 	if (!skb) {
4326 		ip6_rt_put(rt);
4327 		err = -ENOBUFS;
4328 		goto errout;
4329 	}
4330 
4331 	skb_dst_set(skb, &rt->dst);
4332 	if (fibmatch)
4333 		err = rt6_fill_node(net, skb, rt, NULL, NULL, iif,
4334 				    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
4335 				    nlh->nlmsg_seq, 0);
4336 	else
4337 		err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
4338 				    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
4339 				    nlh->nlmsg_seq, 0);
4340 	if (err < 0) {
4341 		kfree_skb(skb);
4342 		goto errout;
4343 	}
4344 
4345 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
4346 errout:
4347 	return err;
4348 }
4349 
4350 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
4351 		     unsigned int nlm_flags)
4352 {
4353 	struct sk_buff *skb;
4354 	struct net *net = info->nl_net;
4355 	u32 seq;
4356 	int err;
4357 
4358 	err = -ENOBUFS;
4359 	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
4360 
4361 	skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
4362 	if (!skb)
4363 		goto errout;
4364 
4365 	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
4366 				event, info->portid, seq, nlm_flags);
4367 	if (err < 0) {
4368 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
4369 		WARN_ON(err == -EMSGSIZE);
4370 		kfree_skb(skb);
4371 		goto errout;
4372 	}
4373 	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
4374 		    info->nlh, gfp_any());
4375 	return;
4376 errout:
4377 	if (err < 0)
4378 		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
4379 }
4380 
4381 static int ip6_route_dev_notify(struct notifier_block *this,
4382 				unsigned long event, void *ptr)
4383 {
4384 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
4385 	struct net *net = dev_net(dev);
4386 
4387 	if (!(dev->flags & IFF_LOOPBACK))
4388 		return NOTIFY_OK;
4389 
4390 	if (event == NETDEV_REGISTER) {
4391 		net->ipv6.ip6_null_entry->dst.dev = dev;
4392 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
4393 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
4394 		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
4395 		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
4396 		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
4397 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
4398 #endif
4399 	 } else if (event == NETDEV_UNREGISTER &&
4400 		    dev->reg_state != NETREG_UNREGISTERED) {
4401 		/* NETDEV_UNREGISTER could be fired for multiple times by
4402 		 * netdev_wait_allrefs(). Make sure we only call this once.
4403 		 */
4404 		in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
4405 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
4406 		in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
4407 		in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
4408 #endif
4409 	}
4410 
4411 	return NOTIFY_OK;
4412 }
4413 
4414 /*
4415  *	/proc
4416  */
4417 
4418 #ifdef CONFIG_PROC_FS
4419 
4420 static const struct file_operations ipv6_route_proc_fops = {
4421 	.owner		= THIS_MODULE,
4422 	.open		= ipv6_route_open,
4423 	.read		= seq_read,
4424 	.llseek		= seq_lseek,
4425 	.release	= seq_release_net,
4426 };
4427 
4428 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
4429 {
4430 	struct net *net = (struct net *)seq->private;
4431 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
4432 		   net->ipv6.rt6_stats->fib_nodes,
4433 		   net->ipv6.rt6_stats->fib_route_nodes,
4434 		   atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc),
4435 		   net->ipv6.rt6_stats->fib_rt_entries,
4436 		   net->ipv6.rt6_stats->fib_rt_cache,
4437 		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
4438 		   net->ipv6.rt6_stats->fib_discarded_routes);
4439 
4440 	return 0;
4441 }
4442 
4443 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
4444 {
4445 	return single_open_net(inode, file, rt6_stats_seq_show);
4446 }
4447 
4448 static const struct file_operations rt6_stats_seq_fops = {
4449 	.owner	 = THIS_MODULE,
4450 	.open	 = rt6_stats_seq_open,
4451 	.read	 = seq_read,
4452 	.llseek	 = seq_lseek,
4453 	.release = single_release_net,
4454 };
4455 #endif	/* CONFIG_PROC_FS */
4456 
4457 #ifdef CONFIG_SYSCTL
4458 
4459 static
4460 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
4461 			      void __user *buffer, size_t *lenp, loff_t *ppos)
4462 {
4463 	struct net *net;
4464 	int delay;
4465 	if (!write)
4466 		return -EINVAL;
4467 
4468 	net = (struct net *)ctl->extra1;
4469 	delay = net->ipv6.sysctl.flush_delay;
4470 	proc_dointvec(ctl, write, buffer, lenp, ppos);
4471 	fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
4472 	return 0;
4473 }
4474 
4475 struct ctl_table ipv6_route_table_template[] = {
4476 	{
4477 		.procname	=	"flush",
4478 		.data		=	&init_net.ipv6.sysctl.flush_delay,
4479 		.maxlen		=	sizeof(int),
4480 		.mode		=	0200,
4481 		.proc_handler	=	ipv6_sysctl_rtcache_flush
4482 	},
4483 	{
4484 		.procname	=	"gc_thresh",
4485 		.data		=	&ip6_dst_ops_template.gc_thresh,
4486 		.maxlen		=	sizeof(int),
4487 		.mode		=	0644,
4488 		.proc_handler	=	proc_dointvec,
4489 	},
4490 	{
4491 		.procname	=	"max_size",
4492 		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
4493 		.maxlen		=	sizeof(int),
4494 		.mode		=	0644,
4495 		.proc_handler	=	proc_dointvec,
4496 	},
4497 	{
4498 		.procname	=	"gc_min_interval",
4499 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
4500 		.maxlen		=	sizeof(int),
4501 		.mode		=	0644,
4502 		.proc_handler	=	proc_dointvec_jiffies,
4503 	},
4504 	{
4505 		.procname	=	"gc_timeout",
4506 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
4507 		.maxlen		=	sizeof(int),
4508 		.mode		=	0644,
4509 		.proc_handler	=	proc_dointvec_jiffies,
4510 	},
4511 	{
4512 		.procname	=	"gc_interval",
4513 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
4514 		.maxlen		=	sizeof(int),
4515 		.mode		=	0644,
4516 		.proc_handler	=	proc_dointvec_jiffies,
4517 	},
4518 	{
4519 		.procname	=	"gc_elasticity",
4520 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
4521 		.maxlen		=	sizeof(int),
4522 		.mode		=	0644,
4523 		.proc_handler	=	proc_dointvec,
4524 	},
4525 	{
4526 		.procname	=	"mtu_expires",
4527 		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
4528 		.maxlen		=	sizeof(int),
4529 		.mode		=	0644,
4530 		.proc_handler	=	proc_dointvec_jiffies,
4531 	},
4532 	{
4533 		.procname	=	"min_adv_mss",
4534 		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
4535 		.maxlen		=	sizeof(int),
4536 		.mode		=	0644,
4537 		.proc_handler	=	proc_dointvec,
4538 	},
4539 	{
4540 		.procname	=	"gc_min_interval_ms",
4541 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
4542 		.maxlen		=	sizeof(int),
4543 		.mode		=	0644,
4544 		.proc_handler	=	proc_dointvec_ms_jiffies,
4545 	},
4546 	{ }
4547 };
4548 
4549 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
4550 {
4551 	struct ctl_table *table;
4552 
4553 	table = kmemdup(ipv6_route_table_template,
4554 			sizeof(ipv6_route_table_template),
4555 			GFP_KERNEL);
4556 
4557 	if (table) {
4558 		table[0].data = &net->ipv6.sysctl.flush_delay;
4559 		table[0].extra1 = net;
4560 		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
4561 		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
4562 		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
4563 		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
4564 		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
4565 		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
4566 		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
4567 		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
4568 		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
4569 
4570 		/* Don't export sysctls to unprivileged users */
4571 		if (net->user_ns != &init_user_ns)
4572 			table[0].procname = NULL;
4573 	}
4574 
4575 	return table;
4576 }
4577 #endif
4578 
4579 static int __net_init ip6_route_net_init(struct net *net)
4580 {
4581 	int ret = -ENOMEM;
4582 
4583 	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
4584 	       sizeof(net->ipv6.ip6_dst_ops));
4585 
4586 	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
4587 		goto out_ip6_dst_ops;
4588 
4589 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
4590 					   sizeof(*net->ipv6.ip6_null_entry),
4591 					   GFP_KERNEL);
4592 	if (!net->ipv6.ip6_null_entry)
4593 		goto out_ip6_dst_entries;
4594 	net->ipv6.ip6_null_entry->dst.path =
4595 		(struct dst_entry *)net->ipv6.ip6_null_entry;
4596 	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
4597 	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
4598 			 ip6_template_metrics, true);
4599 
4600 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
4601 	net->ipv6.fib6_has_custom_rules = false;
4602 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
4603 					       sizeof(*net->ipv6.ip6_prohibit_entry),
4604 					       GFP_KERNEL);
4605 	if (!net->ipv6.ip6_prohibit_entry)
4606 		goto out_ip6_null_entry;
4607 	net->ipv6.ip6_prohibit_entry->dst.path =
4608 		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
4609 	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
4610 	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
4611 			 ip6_template_metrics, true);
4612 
4613 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
4614 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
4615 					       GFP_KERNEL);
4616 	if (!net->ipv6.ip6_blk_hole_entry)
4617 		goto out_ip6_prohibit_entry;
4618 	net->ipv6.ip6_blk_hole_entry->dst.path =
4619 		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
4620 	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
4621 	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
4622 			 ip6_template_metrics, true);
4623 #endif
4624 
4625 	net->ipv6.sysctl.flush_delay = 0;
4626 	net->ipv6.sysctl.ip6_rt_max_size = 4096;
4627 	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
4628 	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
4629 	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
4630 	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
4631 	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
4632 	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
4633 
4634 	net->ipv6.ip6_rt_gc_expire = 30*HZ;
4635 
4636 	ret = 0;
4637 out:
4638 	return ret;
4639 
4640 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
4641 out_ip6_prohibit_entry:
4642 	kfree(net->ipv6.ip6_prohibit_entry);
4643 out_ip6_null_entry:
4644 	kfree(net->ipv6.ip6_null_entry);
4645 #endif
4646 out_ip6_dst_entries:
4647 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
4648 out_ip6_dst_ops:
4649 	goto out;
4650 }
4651 
4652 static void __net_exit ip6_route_net_exit(struct net *net)
4653 {
4654 	kfree(net->ipv6.ip6_null_entry);
4655 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
4656 	kfree(net->ipv6.ip6_prohibit_entry);
4657 	kfree(net->ipv6.ip6_blk_hole_entry);
4658 #endif
4659 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
4660 }
4661 
4662 static int __net_init ip6_route_net_init_late(struct net *net)
4663 {
4664 #ifdef CONFIG_PROC_FS
4665 	proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
4666 	proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
4667 #endif
4668 	return 0;
4669 }
4670 
4671 static void __net_exit ip6_route_net_exit_late(struct net *net)
4672 {
4673 #ifdef CONFIG_PROC_FS
4674 	remove_proc_entry("ipv6_route", net->proc_net);
4675 	remove_proc_entry("rt6_stats", net->proc_net);
4676 #endif
4677 }
4678 
4679 static struct pernet_operations ip6_route_net_ops = {
4680 	.init = ip6_route_net_init,
4681 	.exit = ip6_route_net_exit,
4682 };
4683 
4684 static int __net_init ipv6_inetpeer_init(struct net *net)
4685 {
4686 	struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
4687 
4688 	if (!bp)
4689 		return -ENOMEM;
4690 	inet_peer_base_init(bp);
4691 	net->ipv6.peers = bp;
4692 	return 0;
4693 }
4694 
4695 static void __net_exit ipv6_inetpeer_exit(struct net *net)
4696 {
4697 	struct inet_peer_base *bp = net->ipv6.peers;
4698 
4699 	net->ipv6.peers = NULL;
4700 	inetpeer_invalidate_tree(bp);
4701 	kfree(bp);
4702 }
4703 
4704 static struct pernet_operations ipv6_inetpeer_ops = {
4705 	.init	=	ipv6_inetpeer_init,
4706 	.exit	=	ipv6_inetpeer_exit,
4707 };
4708 
4709 static struct pernet_operations ip6_route_net_late_ops = {
4710 	.init = ip6_route_net_init_late,
4711 	.exit = ip6_route_net_exit_late,
4712 };
4713 
4714 static struct notifier_block ip6_route_dev_notifier = {
4715 	.notifier_call = ip6_route_dev_notify,
4716 	.priority = ADDRCONF_NOTIFY_PRIORITY - 10,
4717 };
4718 
4719 void __init ip6_route_init_special_entries(void)
4720 {
4721 	/* Registering of the loopback is done before this portion of code,
4722 	 * the loopback reference in rt6_info will not be taken, do it
4723 	 * manually for init_net */
4724 	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
4725 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
4726   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
4727 	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
4728 	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
4729 	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
4730 	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
4731   #endif
4732 }
4733 
4734 int __init ip6_route_init(void)
4735 {
4736 	int ret;
4737 	int cpu;
4738 
4739 	ret = -ENOMEM;
4740 	ip6_dst_ops_template.kmem_cachep =
4741 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
4742 				  SLAB_HWCACHE_ALIGN, NULL);
4743 	if (!ip6_dst_ops_template.kmem_cachep)
4744 		goto out;
4745 
4746 	ret = dst_entries_init(&ip6_dst_blackhole_ops);
4747 	if (ret)
4748 		goto out_kmem_cache;
4749 
4750 	ret = register_pernet_subsys(&ipv6_inetpeer_ops);
4751 	if (ret)
4752 		goto out_dst_entries;
4753 
4754 	ret = register_pernet_subsys(&ip6_route_net_ops);
4755 	if (ret)
4756 		goto out_register_inetpeer;
4757 
4758 	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
4759 
4760 	ret = fib6_init();
4761 	if (ret)
4762 		goto out_register_subsys;
4763 
4764 	ret = xfrm6_init();
4765 	if (ret)
4766 		goto out_fib6_init;
4767 
4768 	ret = fib6_rules_init();
4769 	if (ret)
4770 		goto xfrm6_init;
4771 
4772 	ret = register_pernet_subsys(&ip6_route_net_late_ops);
4773 	if (ret)
4774 		goto fib6_rules_init;
4775 
4776 	ret = -ENOBUFS;
4777 	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, 0) ||
4778 	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, 0) ||
4779 	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL,
4780 			    RTNL_FLAG_DOIT_UNLOCKED))
4781 		goto out_register_late_subsys;
4782 
4783 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
4784 	if (ret)
4785 		goto out_register_late_subsys;
4786 
4787 	for_each_possible_cpu(cpu) {
4788 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
4789 
4790 		INIT_LIST_HEAD(&ul->head);
4791 		spin_lock_init(&ul->lock);
4792 	}
4793 
4794 out:
4795 	return ret;
4796 
4797 out_register_late_subsys:
4798 	unregister_pernet_subsys(&ip6_route_net_late_ops);
4799 fib6_rules_init:
4800 	fib6_rules_cleanup();
4801 xfrm6_init:
4802 	xfrm6_fini();
4803 out_fib6_init:
4804 	fib6_gc_cleanup();
4805 out_register_subsys:
4806 	unregister_pernet_subsys(&ip6_route_net_ops);
4807 out_register_inetpeer:
4808 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
4809 out_dst_entries:
4810 	dst_entries_destroy(&ip6_dst_blackhole_ops);
4811 out_kmem_cache:
4812 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
4813 	goto out;
4814 }
4815 
4816 void ip6_route_cleanup(void)
4817 {
4818 	unregister_netdevice_notifier(&ip6_route_dev_notifier);
4819 	unregister_pernet_subsys(&ip6_route_net_late_ops);
4820 	fib6_rules_cleanup();
4821 	xfrm6_fini();
4822 	fib6_gc_cleanup();
4823 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
4824 	unregister_pernet_subsys(&ip6_route_net_ops);
4825 	dst_entries_destroy(&ip6_dst_blackhole_ops);
4826 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
4827 }
4828