xref: /openbmc/linux/net/ipv6/route.c (revision 951f788a80ff8b6339c5c1ab888b0d4b4352efd8)
1 /*
2  *	Linux INET6 implementation
3  *	FIB front-end.
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13 
14 /*	Changes:
15  *
16  *	YOSHIFUJI Hideaki @USAGI
17  *		reworked default router selection.
18  *		- respect outgoing interface
19  *		- select from (probably) reachable routers (i.e.
20  *		routers in REACHABLE, STALE, DELAY or PROBE states).
21  *		- always select the same router if it is (probably)
22  *		reachable.  otherwise, round-robin the list.
23  *	Ville Nuorvala
24  *		Fixed routing subtrees.
25  */
26 
27 #define pr_fmt(fmt) "IPv6: " fmt
28 
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <linux/jhash.h>
48 #include <net/net_namespace.h>
49 #include <net/snmp.h>
50 #include <net/ipv6.h>
51 #include <net/ip6_fib.h>
52 #include <net/ip6_route.h>
53 #include <net/ndisc.h>
54 #include <net/addrconf.h>
55 #include <net/tcp.h>
56 #include <linux/rtnetlink.h>
57 #include <net/dst.h>
58 #include <net/dst_metadata.h>
59 #include <net/xfrm.h>
60 #include <net/netevent.h>
61 #include <net/netlink.h>
62 #include <net/nexthop.h>
63 #include <net/lwtunnel.h>
64 #include <net/ip_tunnels.h>
65 #include <net/l3mdev.h>
66 #include <trace/events/fib6.h>
67 
68 #include <linux/uaccess.h>
69 
70 #ifdef CONFIG_SYSCTL
71 #include <linux/sysctl.h>
72 #endif
73 
74 enum rt6_nud_state {
75 	RT6_NUD_FAIL_HARD = -3,
76 	RT6_NUD_FAIL_PROBE = -2,
77 	RT6_NUD_FAIL_DO_RR = -1,
78 	RT6_NUD_SUCCEED = 1
79 };
80 
81 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
82 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
83 static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
84 static unsigned int	 ip6_mtu(const struct dst_entry *dst);
85 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
86 static void		ip6_dst_destroy(struct dst_entry *);
87 static void		ip6_dst_ifdown(struct dst_entry *,
88 				       struct net_device *dev, int how);
89 static int		 ip6_dst_gc(struct dst_ops *ops);
90 
91 static int		ip6_pkt_discard(struct sk_buff *skb);
92 static int		ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
93 static int		ip6_pkt_prohibit(struct sk_buff *skb);
94 static int		ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
95 static void		ip6_link_failure(struct sk_buff *skb);
96 static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
97 					   struct sk_buff *skb, u32 mtu);
98 static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
99 					struct sk_buff *skb);
100 static void		rt6_dst_from_metrics_check(struct rt6_info *rt);
101 static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
102 static size_t rt6_nlmsg_size(struct rt6_info *rt);
103 static int rt6_fill_node(struct net *net,
104 			 struct sk_buff *skb, struct rt6_info *rt,
105 			 struct in6_addr *dst, struct in6_addr *src,
106 			 int iif, int type, u32 portid, u32 seq,
107 			 unsigned int flags);
108 static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
109 					   struct in6_addr *daddr,
110 					   struct in6_addr *saddr);
111 
112 #ifdef CONFIG_IPV6_ROUTE_INFO
113 static struct rt6_info *rt6_add_route_info(struct net *net,
114 					   const struct in6_addr *prefix, int prefixlen,
115 					   const struct in6_addr *gwaddr,
116 					   struct net_device *dev,
117 					   unsigned int pref);
118 static struct rt6_info *rt6_get_route_info(struct net *net,
119 					   const struct in6_addr *prefix, int prefixlen,
120 					   const struct in6_addr *gwaddr,
121 					   struct net_device *dev);
122 #endif
123 
124 struct uncached_list {
125 	spinlock_t		lock;
126 	struct list_head	head;
127 };
128 
129 static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
130 
131 static void rt6_uncached_list_add(struct rt6_info *rt)
132 {
133 	struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
134 
135 	rt->rt6i_uncached_list = ul;
136 
137 	spin_lock_bh(&ul->lock);
138 	list_add_tail(&rt->rt6i_uncached, &ul->head);
139 	spin_unlock_bh(&ul->lock);
140 }
141 
142 static void rt6_uncached_list_del(struct rt6_info *rt)
143 {
144 	if (!list_empty(&rt->rt6i_uncached)) {
145 		struct uncached_list *ul = rt->rt6i_uncached_list;
146 		struct net *net = dev_net(rt->dst.dev);
147 
148 		spin_lock_bh(&ul->lock);
149 		list_del(&rt->rt6i_uncached);
150 		atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache);
151 		spin_unlock_bh(&ul->lock);
152 	}
153 }
154 
155 static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
156 {
157 	struct net_device *loopback_dev = net->loopback_dev;
158 	int cpu;
159 
160 	if (dev == loopback_dev)
161 		return;
162 
163 	for_each_possible_cpu(cpu) {
164 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
165 		struct rt6_info *rt;
166 
167 		spin_lock_bh(&ul->lock);
168 		list_for_each_entry(rt, &ul->head, rt6i_uncached) {
169 			struct inet6_dev *rt_idev = rt->rt6i_idev;
170 			struct net_device *rt_dev = rt->dst.dev;
171 
172 			if (rt_idev->dev == dev) {
173 				rt->rt6i_idev = in6_dev_get(loopback_dev);
174 				in6_dev_put(rt_idev);
175 			}
176 
177 			if (rt_dev == dev) {
178 				rt->dst.dev = loopback_dev;
179 				dev_hold(rt->dst.dev);
180 				dev_put(rt_dev);
181 			}
182 		}
183 		spin_unlock_bh(&ul->lock);
184 	}
185 }
186 
187 static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
188 {
189 	return dst_metrics_write_ptr(rt->dst.from);
190 }
191 
192 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
193 {
194 	struct rt6_info *rt = (struct rt6_info *)dst;
195 
196 	if (rt->rt6i_flags & RTF_PCPU)
197 		return rt6_pcpu_cow_metrics(rt);
198 	else if (rt->rt6i_flags & RTF_CACHE)
199 		return NULL;
200 	else
201 		return dst_cow_metrics_generic(dst, old);
202 }
203 
204 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
205 					     struct sk_buff *skb,
206 					     const void *daddr)
207 {
208 	struct in6_addr *p = &rt->rt6i_gateway;
209 
210 	if (!ipv6_addr_any(p))
211 		return (const void *) p;
212 	else if (skb)
213 		return &ipv6_hdr(skb)->daddr;
214 	return daddr;
215 }
216 
217 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
218 					  struct sk_buff *skb,
219 					  const void *daddr)
220 {
221 	struct rt6_info *rt = (struct rt6_info *) dst;
222 	struct neighbour *n;
223 
224 	daddr = choose_neigh_daddr(rt, skb, daddr);
225 	n = __ipv6_neigh_lookup(dst->dev, daddr);
226 	if (n)
227 		return n;
228 	return neigh_create(&nd_tbl, daddr, dst->dev);
229 }
230 
231 static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
232 {
233 	struct net_device *dev = dst->dev;
234 	struct rt6_info *rt = (struct rt6_info *)dst;
235 
236 	daddr = choose_neigh_daddr(rt, NULL, daddr);
237 	if (!daddr)
238 		return;
239 	if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
240 		return;
241 	if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
242 		return;
243 	__ipv6_confirm_neigh(dev, daddr);
244 }
245 
246 static struct dst_ops ip6_dst_ops_template = {
247 	.family			=	AF_INET6,
248 	.gc			=	ip6_dst_gc,
249 	.gc_thresh		=	1024,
250 	.check			=	ip6_dst_check,
251 	.default_advmss		=	ip6_default_advmss,
252 	.mtu			=	ip6_mtu,
253 	.cow_metrics		=	ipv6_cow_metrics,
254 	.destroy		=	ip6_dst_destroy,
255 	.ifdown			=	ip6_dst_ifdown,
256 	.negative_advice	=	ip6_negative_advice,
257 	.link_failure		=	ip6_link_failure,
258 	.update_pmtu		=	ip6_rt_update_pmtu,
259 	.redirect		=	rt6_do_redirect,
260 	.local_out		=	__ip6_local_out,
261 	.neigh_lookup		=	ip6_neigh_lookup,
262 	.confirm_neigh		=	ip6_confirm_neigh,
263 };
264 
265 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
266 {
267 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
268 
269 	return mtu ? : dst->dev->mtu;
270 }
271 
272 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
273 					 struct sk_buff *skb, u32 mtu)
274 {
275 }
276 
277 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
278 				      struct sk_buff *skb)
279 {
280 }
281 
282 static struct dst_ops ip6_dst_blackhole_ops = {
283 	.family			=	AF_INET6,
284 	.destroy		=	ip6_dst_destroy,
285 	.check			=	ip6_dst_check,
286 	.mtu			=	ip6_blackhole_mtu,
287 	.default_advmss		=	ip6_default_advmss,
288 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
289 	.redirect		=	ip6_rt_blackhole_redirect,
290 	.cow_metrics		=	dst_cow_metrics_generic,
291 	.neigh_lookup		=	ip6_neigh_lookup,
292 };
293 
294 static const u32 ip6_template_metrics[RTAX_MAX] = {
295 	[RTAX_HOPLIMIT - 1] = 0,
296 };
297 
298 static const struct rt6_info ip6_null_entry_template = {
299 	.dst = {
300 		.__refcnt	= ATOMIC_INIT(1),
301 		.__use		= 1,
302 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
303 		.error		= -ENETUNREACH,
304 		.input		= ip6_pkt_discard,
305 		.output		= ip6_pkt_discard_out,
306 	},
307 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
308 	.rt6i_protocol  = RTPROT_KERNEL,
309 	.rt6i_metric	= ~(u32) 0,
310 	.rt6i_ref	= ATOMIC_INIT(1),
311 };
312 
313 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
314 
315 static const struct rt6_info ip6_prohibit_entry_template = {
316 	.dst = {
317 		.__refcnt	= ATOMIC_INIT(1),
318 		.__use		= 1,
319 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
320 		.error		= -EACCES,
321 		.input		= ip6_pkt_prohibit,
322 		.output		= ip6_pkt_prohibit_out,
323 	},
324 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
325 	.rt6i_protocol  = RTPROT_KERNEL,
326 	.rt6i_metric	= ~(u32) 0,
327 	.rt6i_ref	= ATOMIC_INIT(1),
328 };
329 
330 static const struct rt6_info ip6_blk_hole_entry_template = {
331 	.dst = {
332 		.__refcnt	= ATOMIC_INIT(1),
333 		.__use		= 1,
334 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
335 		.error		= -EINVAL,
336 		.input		= dst_discard,
337 		.output		= dst_discard_out,
338 	},
339 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
340 	.rt6i_protocol  = RTPROT_KERNEL,
341 	.rt6i_metric	= ~(u32) 0,
342 	.rt6i_ref	= ATOMIC_INIT(1),
343 };
344 
345 #endif
346 
347 static void rt6_info_init(struct rt6_info *rt)
348 {
349 	struct dst_entry *dst = &rt->dst;
350 
351 	memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
352 	INIT_LIST_HEAD(&rt->rt6i_siblings);
353 	INIT_LIST_HEAD(&rt->rt6i_uncached);
354 }
355 
356 /* allocate dst with ip6_dst_ops */
357 static struct rt6_info *__ip6_dst_alloc(struct net *net,
358 					struct net_device *dev,
359 					int flags)
360 {
361 	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
362 					1, DST_OBSOLETE_FORCE_CHK, flags);
363 
364 	if (rt) {
365 		rt6_info_init(rt);
366 		atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
367 	}
368 
369 	return rt;
370 }
371 
372 struct rt6_info *ip6_dst_alloc(struct net *net,
373 			       struct net_device *dev,
374 			       int flags)
375 {
376 	struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
377 
378 	if (rt) {
379 		rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
380 		if (rt->rt6i_pcpu) {
381 			int cpu;
382 
383 			for_each_possible_cpu(cpu) {
384 				struct rt6_info **p;
385 
386 				p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
387 				/* no one shares rt */
388 				*p =  NULL;
389 			}
390 		} else {
391 			dst_release_immediate(&rt->dst);
392 			return NULL;
393 		}
394 	}
395 
396 	return rt;
397 }
398 EXPORT_SYMBOL(ip6_dst_alloc);
399 
400 static void ip6_dst_destroy(struct dst_entry *dst)
401 {
402 	struct rt6_info *rt = (struct rt6_info *)dst;
403 	struct rt6_exception_bucket *bucket;
404 	struct dst_entry *from = dst->from;
405 	struct inet6_dev *idev;
406 
407 	dst_destroy_metrics_generic(dst);
408 	free_percpu(rt->rt6i_pcpu);
409 	rt6_uncached_list_del(rt);
410 
411 	idev = rt->rt6i_idev;
412 	if (idev) {
413 		rt->rt6i_idev = NULL;
414 		in6_dev_put(idev);
415 	}
416 	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, 1);
417 	if (bucket) {
418 		rt->rt6i_exception_bucket = NULL;
419 		kfree(bucket);
420 	}
421 
422 	dst->from = NULL;
423 	dst_release(from);
424 }
425 
426 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
427 			   int how)
428 {
429 	struct rt6_info *rt = (struct rt6_info *)dst;
430 	struct inet6_dev *idev = rt->rt6i_idev;
431 	struct net_device *loopback_dev =
432 		dev_net(dev)->loopback_dev;
433 
434 	if (idev && idev->dev != loopback_dev) {
435 		struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
436 		if (loopback_idev) {
437 			rt->rt6i_idev = loopback_idev;
438 			in6_dev_put(idev);
439 		}
440 	}
441 }
442 
443 static bool __rt6_check_expired(const struct rt6_info *rt)
444 {
445 	if (rt->rt6i_flags & RTF_EXPIRES)
446 		return time_after(jiffies, rt->dst.expires);
447 	else
448 		return false;
449 }
450 
451 static bool rt6_check_expired(const struct rt6_info *rt)
452 {
453 	if (rt->rt6i_flags & RTF_EXPIRES) {
454 		if (time_after(jiffies, rt->dst.expires))
455 			return true;
456 	} else if (rt->dst.from) {
457 		return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
458 		       rt6_check_expired((struct rt6_info *)rt->dst.from);
459 	}
460 	return false;
461 }
462 
463 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
464 					     struct flowi6 *fl6, int oif,
465 					     int strict)
466 {
467 	struct rt6_info *sibling, *next_sibling;
468 	int route_choosen;
469 
470 	/* We might have already computed the hash for ICMPv6 errors. In such
471 	 * case it will always be non-zero. Otherwise now is the time to do it.
472 	 */
473 	if (!fl6->mp_hash)
474 		fl6->mp_hash = rt6_multipath_hash(fl6, NULL);
475 
476 	route_choosen = fl6->mp_hash % (match->rt6i_nsiblings + 1);
477 	/* Don't change the route, if route_choosen == 0
478 	 * (siblings does not include ourself)
479 	 */
480 	if (route_choosen)
481 		list_for_each_entry_safe(sibling, next_sibling,
482 				&match->rt6i_siblings, rt6i_siblings) {
483 			route_choosen--;
484 			if (route_choosen == 0) {
485 				if (rt6_score_route(sibling, oif, strict) < 0)
486 					break;
487 				match = sibling;
488 				break;
489 			}
490 		}
491 	return match;
492 }
493 
494 /*
495  *	Route lookup. rcu_read_lock() should be held.
496  */
497 
498 static inline struct rt6_info *rt6_device_match(struct net *net,
499 						    struct rt6_info *rt,
500 						    const struct in6_addr *saddr,
501 						    int oif,
502 						    int flags)
503 {
504 	struct rt6_info *local = NULL;
505 	struct rt6_info *sprt;
506 
507 	if (!oif && ipv6_addr_any(saddr))
508 		goto out;
509 
510 	for (sprt = rt; sprt; sprt = rcu_dereference(sprt->dst.rt6_next)) {
511 		struct net_device *dev = sprt->dst.dev;
512 
513 		if (oif) {
514 			if (dev->ifindex == oif)
515 				return sprt;
516 			if (dev->flags & IFF_LOOPBACK) {
517 				if (!sprt->rt6i_idev ||
518 				    sprt->rt6i_idev->dev->ifindex != oif) {
519 					if (flags & RT6_LOOKUP_F_IFACE)
520 						continue;
521 					if (local &&
522 					    local->rt6i_idev->dev->ifindex == oif)
523 						continue;
524 				}
525 				local = sprt;
526 			}
527 		} else {
528 			if (ipv6_chk_addr(net, saddr, dev,
529 					  flags & RT6_LOOKUP_F_IFACE))
530 				return sprt;
531 		}
532 	}
533 
534 	if (oif) {
535 		if (local)
536 			return local;
537 
538 		if (flags & RT6_LOOKUP_F_IFACE)
539 			return net->ipv6.ip6_null_entry;
540 	}
541 out:
542 	return rt;
543 }
544 
545 #ifdef CONFIG_IPV6_ROUTER_PREF
546 struct __rt6_probe_work {
547 	struct work_struct work;
548 	struct in6_addr target;
549 	struct net_device *dev;
550 };
551 
552 static void rt6_probe_deferred(struct work_struct *w)
553 {
554 	struct in6_addr mcaddr;
555 	struct __rt6_probe_work *work =
556 		container_of(w, struct __rt6_probe_work, work);
557 
558 	addrconf_addr_solict_mult(&work->target, &mcaddr);
559 	ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
560 	dev_put(work->dev);
561 	kfree(work);
562 }
563 
564 static void rt6_probe(struct rt6_info *rt)
565 {
566 	struct __rt6_probe_work *work;
567 	struct neighbour *neigh;
568 	/*
569 	 * Okay, this does not seem to be appropriate
570 	 * for now, however, we need to check if it
571 	 * is really so; aka Router Reachability Probing.
572 	 *
573 	 * Router Reachability Probe MUST be rate-limited
574 	 * to no more than one per minute.
575 	 */
576 	if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
577 		return;
578 	rcu_read_lock_bh();
579 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
580 	if (neigh) {
581 		if (neigh->nud_state & NUD_VALID)
582 			goto out;
583 
584 		work = NULL;
585 		write_lock(&neigh->lock);
586 		if (!(neigh->nud_state & NUD_VALID) &&
587 		    time_after(jiffies,
588 			       neigh->updated +
589 			       rt->rt6i_idev->cnf.rtr_probe_interval)) {
590 			work = kmalloc(sizeof(*work), GFP_ATOMIC);
591 			if (work)
592 				__neigh_set_probe_once(neigh);
593 		}
594 		write_unlock(&neigh->lock);
595 	} else {
596 		work = kmalloc(sizeof(*work), GFP_ATOMIC);
597 	}
598 
599 	if (work) {
600 		INIT_WORK(&work->work, rt6_probe_deferred);
601 		work->target = rt->rt6i_gateway;
602 		dev_hold(rt->dst.dev);
603 		work->dev = rt->dst.dev;
604 		schedule_work(&work->work);
605 	}
606 
607 out:
608 	rcu_read_unlock_bh();
609 }
610 #else
611 static inline void rt6_probe(struct rt6_info *rt)
612 {
613 }
614 #endif
615 
616 /*
617  * Default Router Selection (RFC 2461 6.3.6)
618  */
619 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
620 {
621 	struct net_device *dev = rt->dst.dev;
622 	if (!oif || dev->ifindex == oif)
623 		return 2;
624 	if ((dev->flags & IFF_LOOPBACK) &&
625 	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
626 		return 1;
627 	return 0;
628 }
629 
630 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
631 {
632 	struct neighbour *neigh;
633 	enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
634 
635 	if (rt->rt6i_flags & RTF_NONEXTHOP ||
636 	    !(rt->rt6i_flags & RTF_GATEWAY))
637 		return RT6_NUD_SUCCEED;
638 
639 	rcu_read_lock_bh();
640 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
641 	if (neigh) {
642 		read_lock(&neigh->lock);
643 		if (neigh->nud_state & NUD_VALID)
644 			ret = RT6_NUD_SUCCEED;
645 #ifdef CONFIG_IPV6_ROUTER_PREF
646 		else if (!(neigh->nud_state & NUD_FAILED))
647 			ret = RT6_NUD_SUCCEED;
648 		else
649 			ret = RT6_NUD_FAIL_PROBE;
650 #endif
651 		read_unlock(&neigh->lock);
652 	} else {
653 		ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
654 		      RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
655 	}
656 	rcu_read_unlock_bh();
657 
658 	return ret;
659 }
660 
661 static int rt6_score_route(struct rt6_info *rt, int oif,
662 			   int strict)
663 {
664 	int m;
665 
666 	m = rt6_check_dev(rt, oif);
667 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
668 		return RT6_NUD_FAIL_HARD;
669 #ifdef CONFIG_IPV6_ROUTER_PREF
670 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
671 #endif
672 	if (strict & RT6_LOOKUP_F_REACHABLE) {
673 		int n = rt6_check_neigh(rt);
674 		if (n < 0)
675 			return n;
676 	}
677 	return m;
678 }
679 
680 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
681 				   int *mpri, struct rt6_info *match,
682 				   bool *do_rr)
683 {
684 	int m;
685 	bool match_do_rr = false;
686 	struct inet6_dev *idev = rt->rt6i_idev;
687 	struct net_device *dev = rt->dst.dev;
688 
689 	if (dev && !netif_carrier_ok(dev) &&
690 	    idev->cnf.ignore_routes_with_linkdown &&
691 	    !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
692 		goto out;
693 
694 	if (rt6_check_expired(rt))
695 		goto out;
696 
697 	m = rt6_score_route(rt, oif, strict);
698 	if (m == RT6_NUD_FAIL_DO_RR) {
699 		match_do_rr = true;
700 		m = 0; /* lowest valid score */
701 	} else if (m == RT6_NUD_FAIL_HARD) {
702 		goto out;
703 	}
704 
705 	if (strict & RT6_LOOKUP_F_REACHABLE)
706 		rt6_probe(rt);
707 
708 	/* note that m can be RT6_NUD_FAIL_PROBE at this point */
709 	if (m > *mpri) {
710 		*do_rr = match_do_rr;
711 		*mpri = m;
712 		match = rt;
713 	}
714 out:
715 	return match;
716 }
717 
718 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
719 				     struct rt6_info *leaf,
720 				     struct rt6_info *rr_head,
721 				     u32 metric, int oif, int strict,
722 				     bool *do_rr)
723 {
724 	struct rt6_info *rt, *match, *cont;
725 	int mpri = -1;
726 
727 	match = NULL;
728 	cont = NULL;
729 	for (rt = rr_head; rt; rt = rcu_dereference(rt->dst.rt6_next)) {
730 		if (rt->rt6i_metric != metric) {
731 			cont = rt;
732 			break;
733 		}
734 
735 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
736 	}
737 
738 	for (rt = leaf; rt && rt != rr_head;
739 	     rt = rcu_dereference(rt->dst.rt6_next)) {
740 		if (rt->rt6i_metric != metric) {
741 			cont = rt;
742 			break;
743 		}
744 
745 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
746 	}
747 
748 	if (match || !cont)
749 		return match;
750 
751 	for (rt = cont; rt; rt = rcu_dereference(rt->dst.rt6_next))
752 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
753 
754 	return match;
755 }
756 
757 static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn,
758 				   int oif, int strict)
759 {
760 	struct rt6_info *leaf = rcu_dereference(fn->leaf);
761 	struct rt6_info *match, *rt0;
762 	bool do_rr = false;
763 	int key_plen;
764 
765 	if (!leaf)
766 		return net->ipv6.ip6_null_entry;
767 
768 	rt0 = rcu_dereference(fn->rr_ptr);
769 	if (!rt0)
770 		rt0 = leaf;
771 
772 	/* Double check to make sure fn is not an intermediate node
773 	 * and fn->leaf does not points to its child's leaf
774 	 * (This might happen if all routes under fn are deleted from
775 	 * the tree and fib6_repair_tree() is called on the node.)
776 	 */
777 	key_plen = rt0->rt6i_dst.plen;
778 #ifdef CONFIG_IPV6_SUBTREES
779 	if (rt0->rt6i_src.plen)
780 		key_plen = rt0->rt6i_src.plen;
781 #endif
782 	if (fn->fn_bit != key_plen)
783 		return net->ipv6.ip6_null_entry;
784 
785 	match = find_rr_leaf(fn, leaf, rt0, rt0->rt6i_metric, oif, strict,
786 			     &do_rr);
787 
788 	if (do_rr) {
789 		struct rt6_info *next = rcu_dereference(rt0->dst.rt6_next);
790 
791 		/* no entries matched; do round-robin */
792 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
793 			next = leaf;
794 
795 		if (next != rt0) {
796 			spin_lock_bh(&leaf->rt6i_table->tb6_lock);
797 			/* make sure next is not being deleted from the tree */
798 			if (next->rt6i_node)
799 				rcu_assign_pointer(fn->rr_ptr, next);
800 			spin_unlock_bh(&leaf->rt6i_table->tb6_lock);
801 		}
802 	}
803 
804 	return match ? match : net->ipv6.ip6_null_entry;
805 }
806 
807 static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
808 {
809 	return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
810 }
811 
812 #ifdef CONFIG_IPV6_ROUTE_INFO
813 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
814 		  const struct in6_addr *gwaddr)
815 {
816 	struct net *net = dev_net(dev);
817 	struct route_info *rinfo = (struct route_info *) opt;
818 	struct in6_addr prefix_buf, *prefix;
819 	unsigned int pref;
820 	unsigned long lifetime;
821 	struct rt6_info *rt;
822 
823 	if (len < sizeof(struct route_info)) {
824 		return -EINVAL;
825 	}
826 
827 	/* Sanity check for prefix_len and length */
828 	if (rinfo->length > 3) {
829 		return -EINVAL;
830 	} else if (rinfo->prefix_len > 128) {
831 		return -EINVAL;
832 	} else if (rinfo->prefix_len > 64) {
833 		if (rinfo->length < 2) {
834 			return -EINVAL;
835 		}
836 	} else if (rinfo->prefix_len > 0) {
837 		if (rinfo->length < 1) {
838 			return -EINVAL;
839 		}
840 	}
841 
842 	pref = rinfo->route_pref;
843 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
844 		return -EINVAL;
845 
846 	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
847 
848 	if (rinfo->length == 3)
849 		prefix = (struct in6_addr *)rinfo->prefix;
850 	else {
851 		/* this function is safe */
852 		ipv6_addr_prefix(&prefix_buf,
853 				 (struct in6_addr *)rinfo->prefix,
854 				 rinfo->prefix_len);
855 		prefix = &prefix_buf;
856 	}
857 
858 	if (rinfo->prefix_len == 0)
859 		rt = rt6_get_dflt_router(gwaddr, dev);
860 	else
861 		rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
862 					gwaddr, dev);
863 
864 	if (rt && !lifetime) {
865 		ip6_del_rt(rt);
866 		rt = NULL;
867 	}
868 
869 	if (!rt && lifetime)
870 		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
871 					dev, pref);
872 	else if (rt)
873 		rt->rt6i_flags = RTF_ROUTEINFO |
874 				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
875 
876 	if (rt) {
877 		if (!addrconf_finite_timeout(lifetime))
878 			rt6_clean_expires(rt);
879 		else
880 			rt6_set_expires(rt, jiffies + HZ * lifetime);
881 
882 		ip6_rt_put(rt);
883 	}
884 	return 0;
885 }
886 #endif
887 
888 static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
889 					struct in6_addr *saddr)
890 {
891 	struct fib6_node *pn, *sn;
892 	while (1) {
893 		if (fn->fn_flags & RTN_TL_ROOT)
894 			return NULL;
895 		pn = rcu_dereference(fn->parent);
896 		sn = FIB6_SUBTREE(pn);
897 		if (sn && sn != fn)
898 			fn = fib6_lookup(sn, NULL, saddr);
899 		else
900 			fn = pn;
901 		if (fn->fn_flags & RTN_RTINFO)
902 			return fn;
903 	}
904 }
905 
906 static bool ip6_hold_safe(struct net *net, struct rt6_info **prt,
907 			  bool null_fallback)
908 {
909 	struct rt6_info *rt = *prt;
910 
911 	if (dst_hold_safe(&rt->dst))
912 		return true;
913 	if (null_fallback) {
914 		rt = net->ipv6.ip6_null_entry;
915 		dst_hold(&rt->dst);
916 	} else {
917 		rt = NULL;
918 	}
919 	*prt = rt;
920 	return false;
921 }
922 
923 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
924 					     struct fib6_table *table,
925 					     struct flowi6 *fl6, int flags)
926 {
927 	struct rt6_info *rt, *rt_cache;
928 	struct fib6_node *fn;
929 
930 	rcu_read_lock();
931 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
932 restart:
933 	rt = rcu_dereference(fn->leaf);
934 	if (!rt) {
935 		rt = net->ipv6.ip6_null_entry;
936 	} else {
937 		rt = rt6_device_match(net, rt, &fl6->saddr,
938 				      fl6->flowi6_oif, flags);
939 		if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
940 			rt = rt6_multipath_select(rt, fl6,
941 						  fl6->flowi6_oif, flags);
942 	}
943 	if (rt == net->ipv6.ip6_null_entry) {
944 		fn = fib6_backtrack(fn, &fl6->saddr);
945 		if (fn)
946 			goto restart;
947 	}
948 	/* Search through exception table */
949 	rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr);
950 	if (rt_cache)
951 		rt = rt_cache;
952 
953 	if (ip6_hold_safe(net, &rt, true))
954 		dst_use_noref(&rt->dst, jiffies);
955 
956 	rcu_read_unlock();
957 
958 	trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
959 
960 	return rt;
961 
962 }
963 
964 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
965 				    int flags)
966 {
967 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
968 }
969 EXPORT_SYMBOL_GPL(ip6_route_lookup);
970 
971 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
972 			    const struct in6_addr *saddr, int oif, int strict)
973 {
974 	struct flowi6 fl6 = {
975 		.flowi6_oif = oif,
976 		.daddr = *daddr,
977 	};
978 	struct dst_entry *dst;
979 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
980 
981 	if (saddr) {
982 		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
983 		flags |= RT6_LOOKUP_F_HAS_SADDR;
984 	}
985 
986 	dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
987 	if (dst->error == 0)
988 		return (struct rt6_info *) dst;
989 
990 	dst_release(dst);
991 
992 	return NULL;
993 }
994 EXPORT_SYMBOL(rt6_lookup);
995 
996 /* ip6_ins_rt is called with FREE table->tb6_lock.
997  * It takes new route entry, the addition fails by any reason the
998  * route is released.
999  * Caller must hold dst before calling it.
1000  */
1001 
1002 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
1003 			struct mx6_config *mxc,
1004 			struct netlink_ext_ack *extack)
1005 {
1006 	int err;
1007 	struct fib6_table *table;
1008 
1009 	table = rt->rt6i_table;
1010 	spin_lock_bh(&table->tb6_lock);
1011 	err = fib6_add(&table->tb6_root, rt, info, mxc, extack);
1012 	spin_unlock_bh(&table->tb6_lock);
1013 
1014 	return err;
1015 }
1016 
1017 int ip6_ins_rt(struct rt6_info *rt)
1018 {
1019 	struct nl_info info = {	.nl_net = dev_net(rt->dst.dev), };
1020 	struct mx6_config mxc = { .mx = NULL, };
1021 
1022 	/* Hold dst to account for the reference from the fib6 tree */
1023 	dst_hold(&rt->dst);
1024 	return __ip6_ins_rt(rt, &info, &mxc, NULL);
1025 }
1026 
1027 /* called with rcu_lock held */
1028 static struct net_device *ip6_rt_get_dev_rcu(struct rt6_info *rt)
1029 {
1030 	struct net_device *dev = rt->dst.dev;
1031 
1032 	if (rt->rt6i_flags & RTF_LOCAL) {
1033 		/* for copies of local routes, dst->dev needs to be the
1034 		 * device if it is a master device, the master device if
1035 		 * device is enslaved, and the loopback as the default
1036 		 */
1037 		if (netif_is_l3_slave(dev) &&
1038 		    !rt6_need_strict(&rt->rt6i_dst.addr))
1039 			dev = l3mdev_master_dev_rcu(dev);
1040 		else if (!netif_is_l3_master(dev))
1041 			dev = dev_net(dev)->loopback_dev;
1042 		/* last case is netif_is_l3_master(dev) is true in which
1043 		 * case we want dev returned to be dev
1044 		 */
1045 	}
1046 
1047 	return dev;
1048 }
1049 
1050 static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
1051 					   const struct in6_addr *daddr,
1052 					   const struct in6_addr *saddr)
1053 {
1054 	struct net_device *dev;
1055 	struct rt6_info *rt;
1056 
1057 	/*
1058 	 *	Clone the route.
1059 	 */
1060 
1061 	if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
1062 		ort = (struct rt6_info *)ort->dst.from;
1063 
1064 	rcu_read_lock();
1065 	dev = ip6_rt_get_dev_rcu(ort);
1066 	rt = __ip6_dst_alloc(dev_net(dev), dev, 0);
1067 	rcu_read_unlock();
1068 	if (!rt)
1069 		return NULL;
1070 
1071 	ip6_rt_copy_init(rt, ort);
1072 	rt->rt6i_flags |= RTF_CACHE;
1073 	rt->rt6i_metric = 0;
1074 	rt->dst.flags |= DST_HOST;
1075 	rt->rt6i_dst.addr = *daddr;
1076 	rt->rt6i_dst.plen = 128;
1077 
1078 	if (!rt6_is_gw_or_nonexthop(ort)) {
1079 		if (ort->rt6i_dst.plen != 128 &&
1080 		    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
1081 			rt->rt6i_flags |= RTF_ANYCAST;
1082 #ifdef CONFIG_IPV6_SUBTREES
1083 		if (rt->rt6i_src.plen && saddr) {
1084 			rt->rt6i_src.addr = *saddr;
1085 			rt->rt6i_src.plen = 128;
1086 		}
1087 #endif
1088 	}
1089 
1090 	return rt;
1091 }
1092 
1093 static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
1094 {
1095 	struct net_device *dev;
1096 	struct rt6_info *pcpu_rt;
1097 
1098 	rcu_read_lock();
1099 	dev = ip6_rt_get_dev_rcu(rt);
1100 	pcpu_rt = __ip6_dst_alloc(dev_net(dev), dev, rt->dst.flags);
1101 	rcu_read_unlock();
1102 	if (!pcpu_rt)
1103 		return NULL;
1104 	ip6_rt_copy_init(pcpu_rt, rt);
1105 	pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
1106 	pcpu_rt->rt6i_flags |= RTF_PCPU;
1107 	return pcpu_rt;
1108 }
1109 
1110 /* It should be called with rcu_read_lock() acquired */
1111 static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
1112 {
1113 	struct rt6_info *pcpu_rt, **p;
1114 
1115 	p = this_cpu_ptr(rt->rt6i_pcpu);
1116 	pcpu_rt = *p;
1117 
1118 	if (pcpu_rt && ip6_hold_safe(NULL, &pcpu_rt, false))
1119 		rt6_dst_from_metrics_check(pcpu_rt);
1120 
1121 	return pcpu_rt;
1122 }
1123 
1124 static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
1125 {
1126 	struct rt6_info *pcpu_rt, *prev, **p;
1127 
1128 	pcpu_rt = ip6_rt_pcpu_alloc(rt);
1129 	if (!pcpu_rt) {
1130 		struct net *net = dev_net(rt->dst.dev);
1131 
1132 		dst_hold(&net->ipv6.ip6_null_entry->dst);
1133 		return net->ipv6.ip6_null_entry;
1134 	}
1135 
1136 	dst_hold(&pcpu_rt->dst);
1137 	p = this_cpu_ptr(rt->rt6i_pcpu);
1138 	prev = cmpxchg(p, NULL, pcpu_rt);
1139 	BUG_ON(prev);
1140 
1141 	rt6_dst_from_metrics_check(pcpu_rt);
1142 	return pcpu_rt;
1143 }
1144 
1145 /* exception hash table implementation
1146  */
1147 static DEFINE_SPINLOCK(rt6_exception_lock);
1148 
1149 /* Remove rt6_ex from hash table and free the memory
1150  * Caller must hold rt6_exception_lock
1151  */
1152 static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
1153 				 struct rt6_exception *rt6_ex)
1154 {
1155 	struct net *net = dev_net(rt6_ex->rt6i->dst.dev);
1156 
1157 	if (!bucket || !rt6_ex)
1158 		return;
1159 	rt6_ex->rt6i->rt6i_node = NULL;
1160 	hlist_del_rcu(&rt6_ex->hlist);
1161 	rt6_release(rt6_ex->rt6i);
1162 	kfree_rcu(rt6_ex, rcu);
1163 	WARN_ON_ONCE(!bucket->depth);
1164 	bucket->depth--;
1165 	net->ipv6.rt6_stats->fib_rt_cache--;
1166 }
1167 
1168 /* Remove oldest rt6_ex in bucket and free the memory
1169  * Caller must hold rt6_exception_lock
1170  */
1171 static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
1172 {
1173 	struct rt6_exception *rt6_ex, *oldest = NULL;
1174 
1175 	if (!bucket)
1176 		return;
1177 
1178 	hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1179 		if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
1180 			oldest = rt6_ex;
1181 	}
1182 	rt6_remove_exception(bucket, oldest);
1183 }
1184 
1185 static u32 rt6_exception_hash(const struct in6_addr *dst,
1186 			      const struct in6_addr *src)
1187 {
1188 	static u32 seed __read_mostly;
1189 	u32 val;
1190 
1191 	net_get_random_once(&seed, sizeof(seed));
1192 	val = jhash(dst, sizeof(*dst), seed);
1193 
1194 #ifdef CONFIG_IPV6_SUBTREES
1195 	if (src)
1196 		val = jhash(src, sizeof(*src), val);
1197 #endif
1198 	return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
1199 }
1200 
1201 /* Helper function to find the cached rt in the hash table
1202  * and update bucket pointer to point to the bucket for this
1203  * (daddr, saddr) pair
1204  * Caller must hold rt6_exception_lock
1205  */
1206 static struct rt6_exception *
1207 __rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
1208 			      const struct in6_addr *daddr,
1209 			      const struct in6_addr *saddr)
1210 {
1211 	struct rt6_exception *rt6_ex;
1212 	u32 hval;
1213 
1214 	if (!(*bucket) || !daddr)
1215 		return NULL;
1216 
1217 	hval = rt6_exception_hash(daddr, saddr);
1218 	*bucket += hval;
1219 
1220 	hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
1221 		struct rt6_info *rt6 = rt6_ex->rt6i;
1222 		bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1223 
1224 #ifdef CONFIG_IPV6_SUBTREES
1225 		if (matched && saddr)
1226 			matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1227 #endif
1228 		if (matched)
1229 			return rt6_ex;
1230 	}
1231 	return NULL;
1232 }
1233 
1234 /* Helper function to find the cached rt in the hash table
1235  * and update bucket pointer to point to the bucket for this
1236  * (daddr, saddr) pair
1237  * Caller must hold rcu_read_lock()
1238  */
1239 static struct rt6_exception *
1240 __rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
1241 			 const struct in6_addr *daddr,
1242 			 const struct in6_addr *saddr)
1243 {
1244 	struct rt6_exception *rt6_ex;
1245 	u32 hval;
1246 
1247 	WARN_ON_ONCE(!rcu_read_lock_held());
1248 
1249 	if (!(*bucket) || !daddr)
1250 		return NULL;
1251 
1252 	hval = rt6_exception_hash(daddr, saddr);
1253 	*bucket += hval;
1254 
1255 	hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
1256 		struct rt6_info *rt6 = rt6_ex->rt6i;
1257 		bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1258 
1259 #ifdef CONFIG_IPV6_SUBTREES
1260 		if (matched && saddr)
1261 			matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1262 #endif
1263 		if (matched)
1264 			return rt6_ex;
1265 	}
1266 	return NULL;
1267 }
1268 
1269 static int rt6_insert_exception(struct rt6_info *nrt,
1270 				struct rt6_info *ort)
1271 {
1272 	struct net *net = dev_net(ort->dst.dev);
1273 	struct rt6_exception_bucket *bucket;
1274 	struct in6_addr *src_key = NULL;
1275 	struct rt6_exception *rt6_ex;
1276 	int err = 0;
1277 
1278 	/* ort can't be a cache or pcpu route */
1279 	if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
1280 		ort = (struct rt6_info *)ort->dst.from;
1281 	WARN_ON_ONCE(ort->rt6i_flags & (RTF_CACHE | RTF_PCPU));
1282 
1283 	spin_lock_bh(&rt6_exception_lock);
1284 
1285 	if (ort->exception_bucket_flushed) {
1286 		err = -EINVAL;
1287 		goto out;
1288 	}
1289 
1290 	bucket = rcu_dereference_protected(ort->rt6i_exception_bucket,
1291 					lockdep_is_held(&rt6_exception_lock));
1292 	if (!bucket) {
1293 		bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
1294 				 GFP_ATOMIC);
1295 		if (!bucket) {
1296 			err = -ENOMEM;
1297 			goto out;
1298 		}
1299 		rcu_assign_pointer(ort->rt6i_exception_bucket, bucket);
1300 	}
1301 
1302 #ifdef CONFIG_IPV6_SUBTREES
1303 	/* rt6i_src.plen != 0 indicates ort is in subtree
1304 	 * and exception table is indexed by a hash of
1305 	 * both rt6i_dst and rt6i_src.
1306 	 * Otherwise, the exception table is indexed by
1307 	 * a hash of only rt6i_dst.
1308 	 */
1309 	if (ort->rt6i_src.plen)
1310 		src_key = &nrt->rt6i_src.addr;
1311 #endif
1312 
1313 	/* Update rt6i_prefsrc as it could be changed
1314 	 * in rt6_remove_prefsrc()
1315 	 */
1316 	nrt->rt6i_prefsrc = ort->rt6i_prefsrc;
1317 	/* rt6_mtu_change() might lower mtu on ort.
1318 	 * Only insert this exception route if its mtu
1319 	 * is less than ort's mtu value.
1320 	 */
1321 	if (nrt->rt6i_pmtu >= dst_mtu(&ort->dst)) {
1322 		err = -EINVAL;
1323 		goto out;
1324 	}
1325 
1326 	rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
1327 					       src_key);
1328 	if (rt6_ex)
1329 		rt6_remove_exception(bucket, rt6_ex);
1330 
1331 	rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
1332 	if (!rt6_ex) {
1333 		err = -ENOMEM;
1334 		goto out;
1335 	}
1336 	rt6_ex->rt6i = nrt;
1337 	rt6_ex->stamp = jiffies;
1338 	atomic_inc(&nrt->rt6i_ref);
1339 	nrt->rt6i_node = ort->rt6i_node;
1340 	hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
1341 	bucket->depth++;
1342 	net->ipv6.rt6_stats->fib_rt_cache++;
1343 
1344 	if (bucket->depth > FIB6_MAX_DEPTH)
1345 		rt6_exception_remove_oldest(bucket);
1346 
1347 out:
1348 	spin_unlock_bh(&rt6_exception_lock);
1349 
1350 	/* Update fn->fn_sernum to invalidate all cached dst */
1351 	if (!err)
1352 		fib6_update_sernum(ort);
1353 
1354 	return err;
1355 }
1356 
1357 void rt6_flush_exceptions(struct rt6_info *rt)
1358 {
1359 	struct rt6_exception_bucket *bucket;
1360 	struct rt6_exception *rt6_ex;
1361 	struct hlist_node *tmp;
1362 	int i;
1363 
1364 	spin_lock_bh(&rt6_exception_lock);
1365 	/* Prevent rt6_insert_exception() to recreate the bucket list */
1366 	rt->exception_bucket_flushed = 1;
1367 
1368 	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1369 				    lockdep_is_held(&rt6_exception_lock));
1370 	if (!bucket)
1371 		goto out;
1372 
1373 	for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1374 		hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist)
1375 			rt6_remove_exception(bucket, rt6_ex);
1376 		WARN_ON_ONCE(bucket->depth);
1377 		bucket++;
1378 	}
1379 
1380 out:
1381 	spin_unlock_bh(&rt6_exception_lock);
1382 }
1383 
1384 /* Find cached rt in the hash table inside passed in rt
1385  * Caller has to hold rcu_read_lock()
1386  */
1387 static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
1388 					   struct in6_addr *daddr,
1389 					   struct in6_addr *saddr)
1390 {
1391 	struct rt6_exception_bucket *bucket;
1392 	struct in6_addr *src_key = NULL;
1393 	struct rt6_exception *rt6_ex;
1394 	struct rt6_info *res = NULL;
1395 
1396 	bucket = rcu_dereference(rt->rt6i_exception_bucket);
1397 
1398 #ifdef CONFIG_IPV6_SUBTREES
1399 	/* rt6i_src.plen != 0 indicates rt is in subtree
1400 	 * and exception table is indexed by a hash of
1401 	 * both rt6i_dst and rt6i_src.
1402 	 * Otherwise, the exception table is indexed by
1403 	 * a hash of only rt6i_dst.
1404 	 */
1405 	if (rt->rt6i_src.plen)
1406 		src_key = saddr;
1407 #endif
1408 	rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
1409 
1410 	if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
1411 		res = rt6_ex->rt6i;
1412 
1413 	return res;
1414 }
1415 
1416 /* Remove the passed in cached rt from the hash table that contains it */
1417 int rt6_remove_exception_rt(struct rt6_info *rt)
1418 {
1419 	struct rt6_info *from = (struct rt6_info *)rt->dst.from;
1420 	struct rt6_exception_bucket *bucket;
1421 	struct in6_addr *src_key = NULL;
1422 	struct rt6_exception *rt6_ex;
1423 	int err;
1424 
1425 	if (!from ||
1426 	    !(rt->rt6i_flags | RTF_CACHE))
1427 		return -EINVAL;
1428 
1429 	if (!rcu_access_pointer(from->rt6i_exception_bucket))
1430 		return -ENOENT;
1431 
1432 	spin_lock_bh(&rt6_exception_lock);
1433 	bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
1434 				    lockdep_is_held(&rt6_exception_lock));
1435 #ifdef CONFIG_IPV6_SUBTREES
1436 	/* rt6i_src.plen != 0 indicates 'from' is in subtree
1437 	 * and exception table is indexed by a hash of
1438 	 * both rt6i_dst and rt6i_src.
1439 	 * Otherwise, the exception table is indexed by
1440 	 * a hash of only rt6i_dst.
1441 	 */
1442 	if (from->rt6i_src.plen)
1443 		src_key = &rt->rt6i_src.addr;
1444 #endif
1445 	rt6_ex = __rt6_find_exception_spinlock(&bucket,
1446 					       &rt->rt6i_dst.addr,
1447 					       src_key);
1448 	if (rt6_ex) {
1449 		rt6_remove_exception(bucket, rt6_ex);
1450 		err = 0;
1451 	} else {
1452 		err = -ENOENT;
1453 	}
1454 
1455 	spin_unlock_bh(&rt6_exception_lock);
1456 	return err;
1457 }
1458 
1459 /* Find rt6_ex which contains the passed in rt cache and
1460  * refresh its stamp
1461  */
1462 static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
1463 {
1464 	struct rt6_info *from = (struct rt6_info *)rt->dst.from;
1465 	struct rt6_exception_bucket *bucket;
1466 	struct in6_addr *src_key = NULL;
1467 	struct rt6_exception *rt6_ex;
1468 
1469 	if (!from ||
1470 	    !(rt->rt6i_flags | RTF_CACHE))
1471 		return;
1472 
1473 	rcu_read_lock();
1474 	bucket = rcu_dereference(from->rt6i_exception_bucket);
1475 
1476 #ifdef CONFIG_IPV6_SUBTREES
1477 	/* rt6i_src.plen != 0 indicates 'from' is in subtree
1478 	 * and exception table is indexed by a hash of
1479 	 * both rt6i_dst and rt6i_src.
1480 	 * Otherwise, the exception table is indexed by
1481 	 * a hash of only rt6i_dst.
1482 	 */
1483 	if (from->rt6i_src.plen)
1484 		src_key = &rt->rt6i_src.addr;
1485 #endif
1486 	rt6_ex = __rt6_find_exception_rcu(&bucket,
1487 					  &rt->rt6i_dst.addr,
1488 					  src_key);
1489 	if (rt6_ex)
1490 		rt6_ex->stamp = jiffies;
1491 
1492 	rcu_read_unlock();
1493 }
1494 
1495 static void rt6_exceptions_remove_prefsrc(struct rt6_info *rt)
1496 {
1497 	struct rt6_exception_bucket *bucket;
1498 	struct rt6_exception *rt6_ex;
1499 	int i;
1500 
1501 	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1502 					lockdep_is_held(&rt6_exception_lock));
1503 
1504 	if (bucket) {
1505 		for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1506 			hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1507 				rt6_ex->rt6i->rt6i_prefsrc.plen = 0;
1508 			}
1509 			bucket++;
1510 		}
1511 	}
1512 }
1513 
1514 static void rt6_exceptions_update_pmtu(struct rt6_info *rt, int mtu)
1515 {
1516 	struct rt6_exception_bucket *bucket;
1517 	struct rt6_exception *rt6_ex;
1518 	int i;
1519 
1520 	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1521 					lockdep_is_held(&rt6_exception_lock));
1522 
1523 	if (bucket) {
1524 		for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1525 			hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1526 				struct rt6_info *entry = rt6_ex->rt6i;
1527 				/* For RTF_CACHE with rt6i_pmtu == 0
1528 				 * (i.e. a redirected route),
1529 				 * the metrics of its rt->dst.from has already
1530 				 * been updated.
1531 				 */
1532 				if (entry->rt6i_pmtu && entry->rt6i_pmtu > mtu)
1533 					entry->rt6i_pmtu = mtu;
1534 			}
1535 			bucket++;
1536 		}
1537 	}
1538 }
1539 
1540 #define RTF_CACHE_GATEWAY	(RTF_GATEWAY | RTF_CACHE)
1541 
1542 static void rt6_exceptions_clean_tohost(struct rt6_info *rt,
1543 					struct in6_addr *gateway)
1544 {
1545 	struct rt6_exception_bucket *bucket;
1546 	struct rt6_exception *rt6_ex;
1547 	struct hlist_node *tmp;
1548 	int i;
1549 
1550 	if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1551 		return;
1552 
1553 	spin_lock_bh(&rt6_exception_lock);
1554 	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1555 				     lockdep_is_held(&rt6_exception_lock));
1556 
1557 	if (bucket) {
1558 		for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1559 			hlist_for_each_entry_safe(rt6_ex, tmp,
1560 						  &bucket->chain, hlist) {
1561 				struct rt6_info *entry = rt6_ex->rt6i;
1562 
1563 				if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
1564 				    RTF_CACHE_GATEWAY &&
1565 				    ipv6_addr_equal(gateway,
1566 						    &entry->rt6i_gateway)) {
1567 					rt6_remove_exception(bucket, rt6_ex);
1568 				}
1569 			}
1570 			bucket++;
1571 		}
1572 	}
1573 
1574 	spin_unlock_bh(&rt6_exception_lock);
1575 }
1576 
1577 static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
1578 				      struct rt6_exception *rt6_ex,
1579 				      struct fib6_gc_args *gc_args,
1580 				      unsigned long now)
1581 {
1582 	struct rt6_info *rt = rt6_ex->rt6i;
1583 
1584 	if (atomic_read(&rt->dst.__refcnt) == 1 &&
1585 	    time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
1586 		RT6_TRACE("aging clone %p\n", rt);
1587 		rt6_remove_exception(bucket, rt6_ex);
1588 		return;
1589 	} else if (rt->rt6i_flags & RTF_GATEWAY) {
1590 		struct neighbour *neigh;
1591 		__u8 neigh_flags = 0;
1592 
1593 		neigh = dst_neigh_lookup(&rt->dst, &rt->rt6i_gateway);
1594 		if (neigh) {
1595 			neigh_flags = neigh->flags;
1596 			neigh_release(neigh);
1597 		}
1598 		if (!(neigh_flags & NTF_ROUTER)) {
1599 			RT6_TRACE("purging route %p via non-router but gateway\n",
1600 				  rt);
1601 			rt6_remove_exception(bucket, rt6_ex);
1602 			return;
1603 		}
1604 	}
1605 	gc_args->more++;
1606 }
1607 
1608 void rt6_age_exceptions(struct rt6_info *rt,
1609 			struct fib6_gc_args *gc_args,
1610 			unsigned long now)
1611 {
1612 	struct rt6_exception_bucket *bucket;
1613 	struct rt6_exception *rt6_ex;
1614 	struct hlist_node *tmp;
1615 	int i;
1616 
1617 	if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1618 		return;
1619 
1620 	spin_lock_bh(&rt6_exception_lock);
1621 	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1622 				    lockdep_is_held(&rt6_exception_lock));
1623 
1624 	if (bucket) {
1625 		for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1626 			hlist_for_each_entry_safe(rt6_ex, tmp,
1627 						  &bucket->chain, hlist) {
1628 				rt6_age_examine_exception(bucket, rt6_ex,
1629 							  gc_args, now);
1630 			}
1631 			bucket++;
1632 		}
1633 	}
1634 	spin_unlock_bh(&rt6_exception_lock);
1635 }
1636 
1637 struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1638 			       int oif, struct flowi6 *fl6, int flags)
1639 {
1640 	struct fib6_node *fn, *saved_fn;
1641 	struct rt6_info *rt, *rt_cache;
1642 	int strict = 0;
1643 
1644 	strict |= flags & RT6_LOOKUP_F_IFACE;
1645 	strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
1646 	if (net->ipv6.devconf_all->forwarding == 0)
1647 		strict |= RT6_LOOKUP_F_REACHABLE;
1648 
1649 	rcu_read_lock();
1650 
1651 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1652 	saved_fn = fn;
1653 
1654 	if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1655 		oif = 0;
1656 
1657 redo_rt6_select:
1658 	rt = rt6_select(net, fn, oif, strict);
1659 	if (rt->rt6i_nsiblings)
1660 		rt = rt6_multipath_select(rt, fl6, oif, strict);
1661 	if (rt == net->ipv6.ip6_null_entry) {
1662 		fn = fib6_backtrack(fn, &fl6->saddr);
1663 		if (fn)
1664 			goto redo_rt6_select;
1665 		else if (strict & RT6_LOOKUP_F_REACHABLE) {
1666 			/* also consider unreachable route */
1667 			strict &= ~RT6_LOOKUP_F_REACHABLE;
1668 			fn = saved_fn;
1669 			goto redo_rt6_select;
1670 		}
1671 	}
1672 
1673 	/*Search through exception table */
1674 	rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr);
1675 	if (rt_cache)
1676 		rt = rt_cache;
1677 
1678 	if (rt == net->ipv6.ip6_null_entry) {
1679 		rcu_read_unlock();
1680 		dst_hold(&rt->dst);
1681 		trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
1682 		return rt;
1683 	} else if (rt->rt6i_flags & RTF_CACHE) {
1684 		if (ip6_hold_safe(net, &rt, true)) {
1685 			dst_use_noref(&rt->dst, jiffies);
1686 			rt6_dst_from_metrics_check(rt);
1687 		}
1688 		rcu_read_unlock();
1689 		trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
1690 		return rt;
1691 	} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1692 			    !(rt->rt6i_flags & RTF_GATEWAY))) {
1693 		/* Create a RTF_CACHE clone which will not be
1694 		 * owned by the fib6 tree.  It is for the special case where
1695 		 * the daddr in the skb during the neighbor look-up is different
1696 		 * from the fl6->daddr used to look-up route here.
1697 		 */
1698 
1699 		struct rt6_info *uncached_rt;
1700 
1701 		if (ip6_hold_safe(net, &rt, true)) {
1702 			dst_use_noref(&rt->dst, jiffies);
1703 		} else {
1704 			rcu_read_unlock();
1705 			uncached_rt = rt;
1706 			goto uncached_rt_out;
1707 		}
1708 		rcu_read_unlock();
1709 
1710 		uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1711 		dst_release(&rt->dst);
1712 
1713 		if (uncached_rt) {
1714 			/* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
1715 			 * No need for another dst_hold()
1716 			 */
1717 			rt6_uncached_list_add(uncached_rt);
1718 			atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1719 		} else {
1720 			uncached_rt = net->ipv6.ip6_null_entry;
1721 			dst_hold(&uncached_rt->dst);
1722 		}
1723 
1724 uncached_rt_out:
1725 		trace_fib6_table_lookup(net, uncached_rt, table->tb6_id, fl6);
1726 		return uncached_rt;
1727 
1728 	} else {
1729 		/* Get a percpu copy */
1730 
1731 		struct rt6_info *pcpu_rt;
1732 
1733 		dst_use_noref(&rt->dst, jiffies);
1734 		local_bh_disable();
1735 		pcpu_rt = rt6_get_pcpu_route(rt);
1736 
1737 		if (!pcpu_rt) {
1738 			/* atomic_inc_not_zero() is needed when using rcu */
1739 			if (atomic_inc_not_zero(&rt->rt6i_ref)) {
1740 				/* No dst_hold() on rt is needed because grabbing
1741 				 * rt->rt6i_ref makes sure rt can't be released.
1742 				 */
1743 				pcpu_rt = rt6_make_pcpu_route(rt);
1744 				rt6_release(rt);
1745 			} else {
1746 				/* rt is already removed from tree */
1747 				pcpu_rt = net->ipv6.ip6_null_entry;
1748 				dst_hold(&pcpu_rt->dst);
1749 			}
1750 		}
1751 		local_bh_enable();
1752 		rcu_read_unlock();
1753 		trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6);
1754 		return pcpu_rt;
1755 	}
1756 }
1757 EXPORT_SYMBOL_GPL(ip6_pol_route);
1758 
1759 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
1760 					    struct flowi6 *fl6, int flags)
1761 {
1762 	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
1763 }
1764 
1765 struct dst_entry *ip6_route_input_lookup(struct net *net,
1766 					 struct net_device *dev,
1767 					 struct flowi6 *fl6, int flags)
1768 {
1769 	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1770 		flags |= RT6_LOOKUP_F_IFACE;
1771 
1772 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1773 }
1774 EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
1775 
1776 static void ip6_multipath_l3_keys(const struct sk_buff *skb,
1777 				  struct flow_keys *keys)
1778 {
1779 	const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
1780 	const struct ipv6hdr *key_iph = outer_iph;
1781 	const struct ipv6hdr *inner_iph;
1782 	const struct icmp6hdr *icmph;
1783 	struct ipv6hdr _inner_iph;
1784 
1785 	if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
1786 		goto out;
1787 
1788 	icmph = icmp6_hdr(skb);
1789 	if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
1790 	    icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
1791 	    icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
1792 	    icmph->icmp6_type != ICMPV6_PARAMPROB)
1793 		goto out;
1794 
1795 	inner_iph = skb_header_pointer(skb,
1796 				       skb_transport_offset(skb) + sizeof(*icmph),
1797 				       sizeof(_inner_iph), &_inner_iph);
1798 	if (!inner_iph)
1799 		goto out;
1800 
1801 	key_iph = inner_iph;
1802 out:
1803 	memset(keys, 0, sizeof(*keys));
1804 	keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1805 	keys->addrs.v6addrs.src = key_iph->saddr;
1806 	keys->addrs.v6addrs.dst = key_iph->daddr;
1807 	keys->tags.flow_label = ip6_flowinfo(key_iph);
1808 	keys->basic.ip_proto = key_iph->nexthdr;
1809 }
1810 
1811 /* if skb is set it will be used and fl6 can be NULL */
1812 u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb)
1813 {
1814 	struct flow_keys hash_keys;
1815 
1816 	if (skb) {
1817 		ip6_multipath_l3_keys(skb, &hash_keys);
1818 		return flow_hash_from_keys(&hash_keys);
1819 	}
1820 
1821 	return get_hash_from_flowi6(fl6);
1822 }
1823 
1824 void ip6_route_input(struct sk_buff *skb)
1825 {
1826 	const struct ipv6hdr *iph = ipv6_hdr(skb);
1827 	struct net *net = dev_net(skb->dev);
1828 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1829 	struct ip_tunnel_info *tun_info;
1830 	struct flowi6 fl6 = {
1831 		.flowi6_iif = skb->dev->ifindex,
1832 		.daddr = iph->daddr,
1833 		.saddr = iph->saddr,
1834 		.flowlabel = ip6_flowinfo(iph),
1835 		.flowi6_mark = skb->mark,
1836 		.flowi6_proto = iph->nexthdr,
1837 	};
1838 
1839 	tun_info = skb_tunnel_info(skb);
1840 	if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
1841 		fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
1842 	if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
1843 		fl6.mp_hash = rt6_multipath_hash(&fl6, skb);
1844 	skb_dst_drop(skb);
1845 	skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1846 }
1847 
1848 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1849 					     struct flowi6 *fl6, int flags)
1850 {
1851 	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1852 }
1853 
1854 struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
1855 					 struct flowi6 *fl6, int flags)
1856 {
1857 	bool any_src;
1858 
1859 	if (rt6_need_strict(&fl6->daddr)) {
1860 		struct dst_entry *dst;
1861 
1862 		dst = l3mdev_link_scope_lookup(net, fl6);
1863 		if (dst)
1864 			return dst;
1865 	}
1866 
1867 	fl6->flowi6_iif = LOOPBACK_IFINDEX;
1868 
1869 	any_src = ipv6_addr_any(&fl6->saddr);
1870 	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
1871 	    (fl6->flowi6_oif && any_src))
1872 		flags |= RT6_LOOKUP_F_IFACE;
1873 
1874 	if (!any_src)
1875 		flags |= RT6_LOOKUP_F_HAS_SADDR;
1876 	else if (sk)
1877 		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1878 
1879 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1880 }
1881 EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1882 
1883 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1884 {
1885 	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1886 	struct net_device *loopback_dev = net->loopback_dev;
1887 	struct dst_entry *new = NULL;
1888 
1889 	rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
1890 		       DST_OBSOLETE_NONE, 0);
1891 	if (rt) {
1892 		rt6_info_init(rt);
1893 		atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
1894 
1895 		new = &rt->dst;
1896 		new->__use = 1;
1897 		new->input = dst_discard;
1898 		new->output = dst_discard_out;
1899 
1900 		dst_copy_metrics(new, &ort->dst);
1901 
1902 		rt->rt6i_idev = in6_dev_get(loopback_dev);
1903 		rt->rt6i_gateway = ort->rt6i_gateway;
1904 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
1905 		rt->rt6i_metric = 0;
1906 
1907 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1908 #ifdef CONFIG_IPV6_SUBTREES
1909 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1910 #endif
1911 	}
1912 
1913 	dst_release(dst_orig);
1914 	return new ? new : ERR_PTR(-ENOMEM);
1915 }
1916 
1917 /*
1918  *	Destination cache support functions
1919  */
1920 
1921 static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1922 {
1923 	if (rt->dst.from &&
1924 	    dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1925 		dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1926 }
1927 
1928 static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1929 {
1930 	u32 rt_cookie = 0;
1931 
1932 	if (!rt6_get_cookie_safe(rt, &rt_cookie) || rt_cookie != cookie)
1933 		return NULL;
1934 
1935 	if (rt6_check_expired(rt))
1936 		return NULL;
1937 
1938 	return &rt->dst;
1939 }
1940 
1941 static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1942 {
1943 	if (!__rt6_check_expired(rt) &&
1944 	    rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1945 	    rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1946 		return &rt->dst;
1947 	else
1948 		return NULL;
1949 }
1950 
1951 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1952 {
1953 	struct rt6_info *rt;
1954 
1955 	rt = (struct rt6_info *) dst;
1956 
1957 	/* All IPV6 dsts are created with ->obsolete set to the value
1958 	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1959 	 * into this function always.
1960 	 */
1961 
1962 	rt6_dst_from_metrics_check(rt);
1963 
1964 	if (rt->rt6i_flags & RTF_PCPU ||
1965 	    (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->dst.from))
1966 		return rt6_dst_from_check(rt, cookie);
1967 	else
1968 		return rt6_check(rt, cookie);
1969 }
1970 
1971 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1972 {
1973 	struct rt6_info *rt = (struct rt6_info *) dst;
1974 
1975 	if (rt) {
1976 		if (rt->rt6i_flags & RTF_CACHE) {
1977 			if (rt6_check_expired(rt)) {
1978 				ip6_del_rt(rt);
1979 				dst = NULL;
1980 			}
1981 		} else {
1982 			dst_release(dst);
1983 			dst = NULL;
1984 		}
1985 	}
1986 	return dst;
1987 }
1988 
1989 static void ip6_link_failure(struct sk_buff *skb)
1990 {
1991 	struct rt6_info *rt;
1992 
1993 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1994 
1995 	rt = (struct rt6_info *) skb_dst(skb);
1996 	if (rt) {
1997 		if (rt->rt6i_flags & RTF_CACHE) {
1998 			if (dst_hold_safe(&rt->dst))
1999 				ip6_del_rt(rt);
2000 		} else {
2001 			struct fib6_node *fn;
2002 
2003 			rcu_read_lock();
2004 			fn = rcu_dereference(rt->rt6i_node);
2005 			if (fn && (rt->rt6i_flags & RTF_DEFAULT))
2006 				fn->fn_sernum = -1;
2007 			rcu_read_unlock();
2008 		}
2009 	}
2010 }
2011 
2012 static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
2013 {
2014 	struct net *net = dev_net(rt->dst.dev);
2015 
2016 	rt->rt6i_flags |= RTF_MODIFIED;
2017 	rt->rt6i_pmtu = mtu;
2018 	rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
2019 }
2020 
2021 static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
2022 {
2023 	return !(rt->rt6i_flags & RTF_CACHE) &&
2024 		(rt->rt6i_flags & RTF_PCPU ||
2025 		 rcu_access_pointer(rt->rt6i_node));
2026 }
2027 
2028 static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
2029 				 const struct ipv6hdr *iph, u32 mtu)
2030 {
2031 	const struct in6_addr *daddr, *saddr;
2032 	struct rt6_info *rt6 = (struct rt6_info *)dst;
2033 
2034 	if (rt6->rt6i_flags & RTF_LOCAL)
2035 		return;
2036 
2037 	if (dst_metric_locked(dst, RTAX_MTU))
2038 		return;
2039 
2040 	if (iph) {
2041 		daddr = &iph->daddr;
2042 		saddr = &iph->saddr;
2043 	} else if (sk) {
2044 		daddr = &sk->sk_v6_daddr;
2045 		saddr = &inet6_sk(sk)->saddr;
2046 	} else {
2047 		daddr = NULL;
2048 		saddr = NULL;
2049 	}
2050 	dst_confirm_neigh(dst, daddr);
2051 	mtu = max_t(u32, mtu, IPV6_MIN_MTU);
2052 	if (mtu >= dst_mtu(dst))
2053 		return;
2054 
2055 	if (!rt6_cache_allowed_for_pmtu(rt6)) {
2056 		rt6_do_update_pmtu(rt6, mtu);
2057 		/* update rt6_ex->stamp for cache */
2058 		if (rt6->rt6i_flags & RTF_CACHE)
2059 			rt6_update_exception_stamp_rt(rt6);
2060 	} else if (daddr) {
2061 		struct rt6_info *nrt6;
2062 
2063 		nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
2064 		if (nrt6) {
2065 			rt6_do_update_pmtu(nrt6, mtu);
2066 			if (rt6_insert_exception(nrt6, rt6))
2067 				dst_release_immediate(&nrt6->dst);
2068 		}
2069 	}
2070 }
2071 
2072 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
2073 			       struct sk_buff *skb, u32 mtu)
2074 {
2075 	__ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
2076 }
2077 
2078 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
2079 		     int oif, u32 mark, kuid_t uid)
2080 {
2081 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2082 	struct dst_entry *dst;
2083 	struct flowi6 fl6;
2084 
2085 	memset(&fl6, 0, sizeof(fl6));
2086 	fl6.flowi6_oif = oif;
2087 	fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
2088 	fl6.daddr = iph->daddr;
2089 	fl6.saddr = iph->saddr;
2090 	fl6.flowlabel = ip6_flowinfo(iph);
2091 	fl6.flowi6_uid = uid;
2092 
2093 	dst = ip6_route_output(net, NULL, &fl6);
2094 	if (!dst->error)
2095 		__ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
2096 	dst_release(dst);
2097 }
2098 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
2099 
2100 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
2101 {
2102 	struct dst_entry *dst;
2103 
2104 	ip6_update_pmtu(skb, sock_net(sk), mtu,
2105 			sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid);
2106 
2107 	dst = __sk_dst_get(sk);
2108 	if (!dst || !dst->obsolete ||
2109 	    dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
2110 		return;
2111 
2112 	bh_lock_sock(sk);
2113 	if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
2114 		ip6_datagram_dst_update(sk, false);
2115 	bh_unlock_sock(sk);
2116 }
2117 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
2118 
2119 /* Handle redirects */
2120 struct ip6rd_flowi {
2121 	struct flowi6 fl6;
2122 	struct in6_addr gateway;
2123 };
2124 
2125 static struct rt6_info *__ip6_route_redirect(struct net *net,
2126 					     struct fib6_table *table,
2127 					     struct flowi6 *fl6,
2128 					     int flags)
2129 {
2130 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
2131 	struct rt6_info *rt, *rt_cache;
2132 	struct fib6_node *fn;
2133 
2134 	/* Get the "current" route for this destination and
2135 	 * check if the redirect has come from appropriate router.
2136 	 *
2137 	 * RFC 4861 specifies that redirects should only be
2138 	 * accepted if they come from the nexthop to the target.
2139 	 * Due to the way the routes are chosen, this notion
2140 	 * is a bit fuzzy and one might need to check all possible
2141 	 * routes.
2142 	 */
2143 
2144 	rcu_read_lock();
2145 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
2146 restart:
2147 	for_each_fib6_node_rt_rcu(fn) {
2148 		if (rt6_check_expired(rt))
2149 			continue;
2150 		if (rt->dst.error)
2151 			break;
2152 		if (!(rt->rt6i_flags & RTF_GATEWAY))
2153 			continue;
2154 		if (fl6->flowi6_oif != rt->dst.dev->ifindex)
2155 			continue;
2156 		/* rt_cache's gateway might be different from its 'parent'
2157 		 * in the case of an ip redirect.
2158 		 * So we keep searching in the exception table if the gateway
2159 		 * is different.
2160 		 */
2161 		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) {
2162 			rt_cache = rt6_find_cached_rt(rt,
2163 						      &fl6->daddr,
2164 						      &fl6->saddr);
2165 			if (rt_cache &&
2166 			    ipv6_addr_equal(&rdfl->gateway,
2167 					    &rt_cache->rt6i_gateway)) {
2168 				rt = rt_cache;
2169 				break;
2170 			}
2171 			continue;
2172 		}
2173 		break;
2174 	}
2175 
2176 	if (!rt)
2177 		rt = net->ipv6.ip6_null_entry;
2178 	else if (rt->dst.error) {
2179 		rt = net->ipv6.ip6_null_entry;
2180 		goto out;
2181 	}
2182 
2183 	if (rt == net->ipv6.ip6_null_entry) {
2184 		fn = fib6_backtrack(fn, &fl6->saddr);
2185 		if (fn)
2186 			goto restart;
2187 	}
2188 
2189 out:
2190 	ip6_hold_safe(net, &rt, true);
2191 
2192 	rcu_read_unlock();
2193 
2194 	trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
2195 	return rt;
2196 };
2197 
2198 static struct dst_entry *ip6_route_redirect(struct net *net,
2199 					const struct flowi6 *fl6,
2200 					const struct in6_addr *gateway)
2201 {
2202 	int flags = RT6_LOOKUP_F_HAS_SADDR;
2203 	struct ip6rd_flowi rdfl;
2204 
2205 	rdfl.fl6 = *fl6;
2206 	rdfl.gateway = *gateway;
2207 
2208 	return fib6_rule_lookup(net, &rdfl.fl6,
2209 				flags, __ip6_route_redirect);
2210 }
2211 
2212 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
2213 		  kuid_t uid)
2214 {
2215 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2216 	struct dst_entry *dst;
2217 	struct flowi6 fl6;
2218 
2219 	memset(&fl6, 0, sizeof(fl6));
2220 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
2221 	fl6.flowi6_oif = oif;
2222 	fl6.flowi6_mark = mark;
2223 	fl6.daddr = iph->daddr;
2224 	fl6.saddr = iph->saddr;
2225 	fl6.flowlabel = ip6_flowinfo(iph);
2226 	fl6.flowi6_uid = uid;
2227 
2228 	dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
2229 	rt6_do_redirect(dst, NULL, skb);
2230 	dst_release(dst);
2231 }
2232 EXPORT_SYMBOL_GPL(ip6_redirect);
2233 
2234 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
2235 			    u32 mark)
2236 {
2237 	const struct ipv6hdr *iph = ipv6_hdr(skb);
2238 	const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
2239 	struct dst_entry *dst;
2240 	struct flowi6 fl6;
2241 
2242 	memset(&fl6, 0, sizeof(fl6));
2243 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
2244 	fl6.flowi6_oif = oif;
2245 	fl6.flowi6_mark = mark;
2246 	fl6.daddr = msg->dest;
2247 	fl6.saddr = iph->daddr;
2248 	fl6.flowi6_uid = sock_net_uid(net, NULL);
2249 
2250 	dst = ip6_route_redirect(net, &fl6, &iph->saddr);
2251 	rt6_do_redirect(dst, NULL, skb);
2252 	dst_release(dst);
2253 }
2254 
2255 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
2256 {
2257 	ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
2258 		     sk->sk_uid);
2259 }
2260 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
2261 
2262 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
2263 {
2264 	struct net_device *dev = dst->dev;
2265 	unsigned int mtu = dst_mtu(dst);
2266 	struct net *net = dev_net(dev);
2267 
2268 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
2269 
2270 	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
2271 		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
2272 
2273 	/*
2274 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
2275 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
2276 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
2277 	 * rely only on pmtu discovery"
2278 	 */
2279 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
2280 		mtu = IPV6_MAXPLEN;
2281 	return mtu;
2282 }
2283 
2284 static unsigned int ip6_mtu(const struct dst_entry *dst)
2285 {
2286 	const struct rt6_info *rt = (const struct rt6_info *)dst;
2287 	unsigned int mtu = rt->rt6i_pmtu;
2288 	struct inet6_dev *idev;
2289 
2290 	if (mtu)
2291 		goto out;
2292 
2293 	mtu = dst_metric_raw(dst, RTAX_MTU);
2294 	if (mtu)
2295 		goto out;
2296 
2297 	mtu = IPV6_MIN_MTU;
2298 
2299 	rcu_read_lock();
2300 	idev = __in6_dev_get(dst->dev);
2301 	if (idev)
2302 		mtu = idev->cnf.mtu6;
2303 	rcu_read_unlock();
2304 
2305 out:
2306 	mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2307 
2308 	return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
2309 }
2310 
2311 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
2312 				  struct flowi6 *fl6)
2313 {
2314 	struct dst_entry *dst;
2315 	struct rt6_info *rt;
2316 	struct inet6_dev *idev = in6_dev_get(dev);
2317 	struct net *net = dev_net(dev);
2318 
2319 	if (unlikely(!idev))
2320 		return ERR_PTR(-ENODEV);
2321 
2322 	rt = ip6_dst_alloc(net, dev, 0);
2323 	if (unlikely(!rt)) {
2324 		in6_dev_put(idev);
2325 		dst = ERR_PTR(-ENOMEM);
2326 		goto out;
2327 	}
2328 
2329 	rt->dst.flags |= DST_HOST;
2330 	rt->dst.output  = ip6_output;
2331 	rt->rt6i_gateway  = fl6->daddr;
2332 	rt->rt6i_dst.addr = fl6->daddr;
2333 	rt->rt6i_dst.plen = 128;
2334 	rt->rt6i_idev     = idev;
2335 	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
2336 
2337 	/* Add this dst into uncached_list so that rt6_ifdown() can
2338 	 * do proper release of the net_device
2339 	 */
2340 	rt6_uncached_list_add(rt);
2341 	atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
2342 
2343 	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
2344 
2345 out:
2346 	return dst;
2347 }
2348 
2349 static int ip6_dst_gc(struct dst_ops *ops)
2350 {
2351 	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
2352 	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
2353 	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
2354 	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
2355 	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
2356 	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
2357 	int entries;
2358 
2359 	entries = dst_entries_get_fast(ops);
2360 	if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
2361 	    entries <= rt_max_size)
2362 		goto out;
2363 
2364 	net->ipv6.ip6_rt_gc_expire++;
2365 	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
2366 	entries = dst_entries_get_slow(ops);
2367 	if (entries < ops->gc_thresh)
2368 		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
2369 out:
2370 	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
2371 	return entries > rt_max_size;
2372 }
2373 
2374 static int ip6_convert_metrics(struct mx6_config *mxc,
2375 			       const struct fib6_config *cfg)
2376 {
2377 	bool ecn_ca = false;
2378 	struct nlattr *nla;
2379 	int remaining;
2380 	u32 *mp;
2381 
2382 	if (!cfg->fc_mx)
2383 		return 0;
2384 
2385 	mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
2386 	if (unlikely(!mp))
2387 		return -ENOMEM;
2388 
2389 	nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
2390 		int type = nla_type(nla);
2391 		u32 val;
2392 
2393 		if (!type)
2394 			continue;
2395 		if (unlikely(type > RTAX_MAX))
2396 			goto err;
2397 
2398 		if (type == RTAX_CC_ALGO) {
2399 			char tmp[TCP_CA_NAME_MAX];
2400 
2401 			nla_strlcpy(tmp, nla, sizeof(tmp));
2402 			val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
2403 			if (val == TCP_CA_UNSPEC)
2404 				goto err;
2405 		} else {
2406 			val = nla_get_u32(nla);
2407 		}
2408 		if (type == RTAX_HOPLIMIT && val > 255)
2409 			val = 255;
2410 		if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
2411 			goto err;
2412 
2413 		mp[type - 1] = val;
2414 		__set_bit(type - 1, mxc->mx_valid);
2415 	}
2416 
2417 	if (ecn_ca) {
2418 		__set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
2419 		mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
2420 	}
2421 
2422 	mxc->mx = mp;
2423 	return 0;
2424  err:
2425 	kfree(mp);
2426 	return -EINVAL;
2427 }
2428 
2429 static struct rt6_info *ip6_nh_lookup_table(struct net *net,
2430 					    struct fib6_config *cfg,
2431 					    const struct in6_addr *gw_addr)
2432 {
2433 	struct flowi6 fl6 = {
2434 		.flowi6_oif = cfg->fc_ifindex,
2435 		.daddr = *gw_addr,
2436 		.saddr = cfg->fc_prefsrc,
2437 	};
2438 	struct fib6_table *table;
2439 	struct rt6_info *rt;
2440 	int flags = RT6_LOOKUP_F_IFACE | RT6_LOOKUP_F_IGNORE_LINKSTATE;
2441 
2442 	table = fib6_get_table(net, cfg->fc_table);
2443 	if (!table)
2444 		return NULL;
2445 
2446 	if (!ipv6_addr_any(&cfg->fc_prefsrc))
2447 		flags |= RT6_LOOKUP_F_HAS_SADDR;
2448 
2449 	rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags);
2450 
2451 	/* if table lookup failed, fall back to full lookup */
2452 	if (rt == net->ipv6.ip6_null_entry) {
2453 		ip6_rt_put(rt);
2454 		rt = NULL;
2455 	}
2456 
2457 	return rt;
2458 }
2459 
2460 static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
2461 					      struct netlink_ext_ack *extack)
2462 {
2463 	struct net *net = cfg->fc_nlinfo.nl_net;
2464 	struct rt6_info *rt = NULL;
2465 	struct net_device *dev = NULL;
2466 	struct inet6_dev *idev = NULL;
2467 	struct fib6_table *table;
2468 	int addr_type;
2469 	int err = -EINVAL;
2470 
2471 	/* RTF_PCPU is an internal flag; can not be set by userspace */
2472 	if (cfg->fc_flags & RTF_PCPU) {
2473 		NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
2474 		goto out;
2475 	}
2476 
2477 	if (cfg->fc_dst_len > 128) {
2478 		NL_SET_ERR_MSG(extack, "Invalid prefix length");
2479 		goto out;
2480 	}
2481 	if (cfg->fc_src_len > 128) {
2482 		NL_SET_ERR_MSG(extack, "Invalid source address length");
2483 		goto out;
2484 	}
2485 #ifndef CONFIG_IPV6_SUBTREES
2486 	if (cfg->fc_src_len) {
2487 		NL_SET_ERR_MSG(extack,
2488 			       "Specifying source address requires IPV6_SUBTREES to be enabled");
2489 		goto out;
2490 	}
2491 #endif
2492 	if (cfg->fc_ifindex) {
2493 		err = -ENODEV;
2494 		dev = dev_get_by_index(net, cfg->fc_ifindex);
2495 		if (!dev)
2496 			goto out;
2497 		idev = in6_dev_get(dev);
2498 		if (!idev)
2499 			goto out;
2500 	}
2501 
2502 	if (cfg->fc_metric == 0)
2503 		cfg->fc_metric = IP6_RT_PRIO_USER;
2504 
2505 	err = -ENOBUFS;
2506 	if (cfg->fc_nlinfo.nlh &&
2507 	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
2508 		table = fib6_get_table(net, cfg->fc_table);
2509 		if (!table) {
2510 			pr_warn("NLM_F_CREATE should be specified when creating new route\n");
2511 			table = fib6_new_table(net, cfg->fc_table);
2512 		}
2513 	} else {
2514 		table = fib6_new_table(net, cfg->fc_table);
2515 	}
2516 
2517 	if (!table)
2518 		goto out;
2519 
2520 	rt = ip6_dst_alloc(net, NULL,
2521 			   (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
2522 
2523 	if (!rt) {
2524 		err = -ENOMEM;
2525 		goto out;
2526 	}
2527 
2528 	if (cfg->fc_flags & RTF_EXPIRES)
2529 		rt6_set_expires(rt, jiffies +
2530 				clock_t_to_jiffies(cfg->fc_expires));
2531 	else
2532 		rt6_clean_expires(rt);
2533 
2534 	if (cfg->fc_protocol == RTPROT_UNSPEC)
2535 		cfg->fc_protocol = RTPROT_BOOT;
2536 	rt->rt6i_protocol = cfg->fc_protocol;
2537 
2538 	addr_type = ipv6_addr_type(&cfg->fc_dst);
2539 
2540 	if (addr_type & IPV6_ADDR_MULTICAST)
2541 		rt->dst.input = ip6_mc_input;
2542 	else if (cfg->fc_flags & RTF_LOCAL)
2543 		rt->dst.input = ip6_input;
2544 	else
2545 		rt->dst.input = ip6_forward;
2546 
2547 	rt->dst.output = ip6_output;
2548 
2549 	if (cfg->fc_encap) {
2550 		struct lwtunnel_state *lwtstate;
2551 
2552 		err = lwtunnel_build_state(cfg->fc_encap_type,
2553 					   cfg->fc_encap, AF_INET6, cfg,
2554 					   &lwtstate, extack);
2555 		if (err)
2556 			goto out;
2557 		rt->dst.lwtstate = lwtstate_get(lwtstate);
2558 		if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
2559 			rt->dst.lwtstate->orig_output = rt->dst.output;
2560 			rt->dst.output = lwtunnel_output;
2561 		}
2562 		if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
2563 			rt->dst.lwtstate->orig_input = rt->dst.input;
2564 			rt->dst.input = lwtunnel_input;
2565 		}
2566 	}
2567 
2568 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
2569 	rt->rt6i_dst.plen = cfg->fc_dst_len;
2570 	if (rt->rt6i_dst.plen == 128)
2571 		rt->dst.flags |= DST_HOST;
2572 
2573 #ifdef CONFIG_IPV6_SUBTREES
2574 	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
2575 	rt->rt6i_src.plen = cfg->fc_src_len;
2576 #endif
2577 
2578 	rt->rt6i_metric = cfg->fc_metric;
2579 
2580 	/* We cannot add true routes via loopback here,
2581 	   they would result in kernel looping; promote them to reject routes
2582 	 */
2583 	if ((cfg->fc_flags & RTF_REJECT) ||
2584 	    (dev && (dev->flags & IFF_LOOPBACK) &&
2585 	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
2586 	     !(cfg->fc_flags & RTF_LOCAL))) {
2587 		/* hold loopback dev/idev if we haven't done so. */
2588 		if (dev != net->loopback_dev) {
2589 			if (dev) {
2590 				dev_put(dev);
2591 				in6_dev_put(idev);
2592 			}
2593 			dev = net->loopback_dev;
2594 			dev_hold(dev);
2595 			idev = in6_dev_get(dev);
2596 			if (!idev) {
2597 				err = -ENODEV;
2598 				goto out;
2599 			}
2600 		}
2601 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
2602 		switch (cfg->fc_type) {
2603 		case RTN_BLACKHOLE:
2604 			rt->dst.error = -EINVAL;
2605 			rt->dst.output = dst_discard_out;
2606 			rt->dst.input = dst_discard;
2607 			break;
2608 		case RTN_PROHIBIT:
2609 			rt->dst.error = -EACCES;
2610 			rt->dst.output = ip6_pkt_prohibit_out;
2611 			rt->dst.input = ip6_pkt_prohibit;
2612 			break;
2613 		case RTN_THROW:
2614 		case RTN_UNREACHABLE:
2615 		default:
2616 			rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
2617 					: (cfg->fc_type == RTN_UNREACHABLE)
2618 					? -EHOSTUNREACH : -ENETUNREACH;
2619 			rt->dst.output = ip6_pkt_discard_out;
2620 			rt->dst.input = ip6_pkt_discard;
2621 			break;
2622 		}
2623 		goto install_route;
2624 	}
2625 
2626 	if (cfg->fc_flags & RTF_GATEWAY) {
2627 		const struct in6_addr *gw_addr;
2628 		int gwa_type;
2629 
2630 		gw_addr = &cfg->fc_gateway;
2631 		gwa_type = ipv6_addr_type(gw_addr);
2632 
2633 		/* if gw_addr is local we will fail to detect this in case
2634 		 * address is still TENTATIVE (DAD in progress). rt6_lookup()
2635 		 * will return already-added prefix route via interface that
2636 		 * prefix route was assigned to, which might be non-loopback.
2637 		 */
2638 		err = -EINVAL;
2639 		if (ipv6_chk_addr_and_flags(net, gw_addr,
2640 					    gwa_type & IPV6_ADDR_LINKLOCAL ?
2641 					    dev : NULL, 0, 0)) {
2642 			NL_SET_ERR_MSG(extack, "Invalid gateway address");
2643 			goto out;
2644 		}
2645 		rt->rt6i_gateway = *gw_addr;
2646 
2647 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
2648 			struct rt6_info *grt = NULL;
2649 
2650 			/* IPv6 strictly inhibits using not link-local
2651 			   addresses as nexthop address.
2652 			   Otherwise, router will not able to send redirects.
2653 			   It is very good, but in some (rare!) circumstances
2654 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
2655 			   some exceptions. --ANK
2656 			   We allow IPv4-mapped nexthops to support RFC4798-type
2657 			   addressing
2658 			 */
2659 			if (!(gwa_type & (IPV6_ADDR_UNICAST |
2660 					  IPV6_ADDR_MAPPED))) {
2661 				NL_SET_ERR_MSG(extack,
2662 					       "Invalid gateway address");
2663 				goto out;
2664 			}
2665 
2666 			if (cfg->fc_table) {
2667 				grt = ip6_nh_lookup_table(net, cfg, gw_addr);
2668 
2669 				if (grt) {
2670 					if (grt->rt6i_flags & RTF_GATEWAY ||
2671 					    (dev && dev != grt->dst.dev)) {
2672 						ip6_rt_put(grt);
2673 						grt = NULL;
2674 					}
2675 				}
2676 			}
2677 
2678 			if (!grt)
2679 				grt = rt6_lookup(net, gw_addr, NULL,
2680 						 cfg->fc_ifindex, 1);
2681 
2682 			err = -EHOSTUNREACH;
2683 			if (!grt)
2684 				goto out;
2685 			if (dev) {
2686 				if (dev != grt->dst.dev) {
2687 					ip6_rt_put(grt);
2688 					goto out;
2689 				}
2690 			} else {
2691 				dev = grt->dst.dev;
2692 				idev = grt->rt6i_idev;
2693 				dev_hold(dev);
2694 				in6_dev_hold(grt->rt6i_idev);
2695 			}
2696 			if (!(grt->rt6i_flags & RTF_GATEWAY))
2697 				err = 0;
2698 			ip6_rt_put(grt);
2699 
2700 			if (err)
2701 				goto out;
2702 		}
2703 		err = -EINVAL;
2704 		if (!dev) {
2705 			NL_SET_ERR_MSG(extack, "Egress device not specified");
2706 			goto out;
2707 		} else if (dev->flags & IFF_LOOPBACK) {
2708 			NL_SET_ERR_MSG(extack,
2709 				       "Egress device can not be loopback device for this route");
2710 			goto out;
2711 		}
2712 	}
2713 
2714 	err = -ENODEV;
2715 	if (!dev)
2716 		goto out;
2717 
2718 	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
2719 		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
2720 			NL_SET_ERR_MSG(extack, "Invalid source address");
2721 			err = -EINVAL;
2722 			goto out;
2723 		}
2724 		rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
2725 		rt->rt6i_prefsrc.plen = 128;
2726 	} else
2727 		rt->rt6i_prefsrc.plen = 0;
2728 
2729 	rt->rt6i_flags = cfg->fc_flags;
2730 
2731 install_route:
2732 	rt->dst.dev = dev;
2733 	rt->rt6i_idev = idev;
2734 	rt->rt6i_table = table;
2735 
2736 	cfg->fc_nlinfo.nl_net = dev_net(dev);
2737 
2738 	return rt;
2739 out:
2740 	if (dev)
2741 		dev_put(dev);
2742 	if (idev)
2743 		in6_dev_put(idev);
2744 	if (rt)
2745 		dst_release_immediate(&rt->dst);
2746 
2747 	return ERR_PTR(err);
2748 }
2749 
2750 int ip6_route_add(struct fib6_config *cfg,
2751 		  struct netlink_ext_ack *extack)
2752 {
2753 	struct mx6_config mxc = { .mx = NULL, };
2754 	struct rt6_info *rt;
2755 	int err;
2756 
2757 	rt = ip6_route_info_create(cfg, extack);
2758 	if (IS_ERR(rt)) {
2759 		err = PTR_ERR(rt);
2760 		rt = NULL;
2761 		goto out;
2762 	}
2763 
2764 	err = ip6_convert_metrics(&mxc, cfg);
2765 	if (err)
2766 		goto out;
2767 
2768 	err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc, extack);
2769 
2770 	kfree(mxc.mx);
2771 
2772 	return err;
2773 out:
2774 	if (rt)
2775 		dst_release_immediate(&rt->dst);
2776 
2777 	return err;
2778 }
2779 
2780 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
2781 {
2782 	int err;
2783 	struct fib6_table *table;
2784 	struct net *net = dev_net(rt->dst.dev);
2785 
2786 	if (rt == net->ipv6.ip6_null_entry) {
2787 		err = -ENOENT;
2788 		goto out;
2789 	}
2790 
2791 	table = rt->rt6i_table;
2792 	spin_lock_bh(&table->tb6_lock);
2793 	err = fib6_del(rt, info);
2794 	spin_unlock_bh(&table->tb6_lock);
2795 
2796 out:
2797 	ip6_rt_put(rt);
2798 	return err;
2799 }
2800 
2801 int ip6_del_rt(struct rt6_info *rt)
2802 {
2803 	struct nl_info info = {
2804 		.nl_net = dev_net(rt->dst.dev),
2805 	};
2806 	return __ip6_del_rt(rt, &info);
2807 }
2808 
2809 static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
2810 {
2811 	struct nl_info *info = &cfg->fc_nlinfo;
2812 	struct net *net = info->nl_net;
2813 	struct sk_buff *skb = NULL;
2814 	struct fib6_table *table;
2815 	int err = -ENOENT;
2816 
2817 	if (rt == net->ipv6.ip6_null_entry)
2818 		goto out_put;
2819 	table = rt->rt6i_table;
2820 	spin_lock_bh(&table->tb6_lock);
2821 
2822 	if (rt->rt6i_nsiblings && cfg->fc_delete_all_nh) {
2823 		struct rt6_info *sibling, *next_sibling;
2824 
2825 		/* prefer to send a single notification with all hops */
2826 		skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
2827 		if (skb) {
2828 			u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2829 
2830 			if (rt6_fill_node(net, skb, rt,
2831 					  NULL, NULL, 0, RTM_DELROUTE,
2832 					  info->portid, seq, 0) < 0) {
2833 				kfree_skb(skb);
2834 				skb = NULL;
2835 			} else
2836 				info->skip_notify = 1;
2837 		}
2838 
2839 		list_for_each_entry_safe(sibling, next_sibling,
2840 					 &rt->rt6i_siblings,
2841 					 rt6i_siblings) {
2842 			err = fib6_del(sibling, info);
2843 			if (err)
2844 				goto out_unlock;
2845 		}
2846 	}
2847 
2848 	err = fib6_del(rt, info);
2849 out_unlock:
2850 	spin_unlock_bh(&table->tb6_lock);
2851 out_put:
2852 	ip6_rt_put(rt);
2853 
2854 	if (skb) {
2855 		rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2856 			    info->nlh, gfp_any());
2857 	}
2858 	return err;
2859 }
2860 
2861 static int ip6_route_del(struct fib6_config *cfg,
2862 			 struct netlink_ext_ack *extack)
2863 {
2864 	struct rt6_info *rt, *rt_cache;
2865 	struct fib6_table *table;
2866 	struct fib6_node *fn;
2867 	int err = -ESRCH;
2868 
2869 	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
2870 	if (!table) {
2871 		NL_SET_ERR_MSG(extack, "FIB table does not exist");
2872 		return err;
2873 	}
2874 
2875 	rcu_read_lock();
2876 
2877 	fn = fib6_locate(&table->tb6_root,
2878 			 &cfg->fc_dst, cfg->fc_dst_len,
2879 			 &cfg->fc_src, cfg->fc_src_len,
2880 			 !(cfg->fc_flags & RTF_CACHE));
2881 
2882 	if (fn) {
2883 		for_each_fib6_node_rt_rcu(fn) {
2884 			if (cfg->fc_flags & RTF_CACHE) {
2885 				rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst,
2886 							      &cfg->fc_src);
2887 				if (!rt_cache)
2888 					continue;
2889 				rt = rt_cache;
2890 			}
2891 			if (cfg->fc_ifindex &&
2892 			    (!rt->dst.dev ||
2893 			     rt->dst.dev->ifindex != cfg->fc_ifindex))
2894 				continue;
2895 			if (cfg->fc_flags & RTF_GATEWAY &&
2896 			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
2897 				continue;
2898 			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
2899 				continue;
2900 			if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol)
2901 				continue;
2902 			if (!dst_hold_safe(&rt->dst))
2903 				break;
2904 			rcu_read_unlock();
2905 
2906 			/* if gateway was specified only delete the one hop */
2907 			if (cfg->fc_flags & RTF_GATEWAY)
2908 				return __ip6_del_rt(rt, &cfg->fc_nlinfo);
2909 
2910 			return __ip6_del_rt_siblings(rt, cfg);
2911 		}
2912 	}
2913 	rcu_read_unlock();
2914 
2915 	return err;
2916 }
2917 
2918 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
2919 {
2920 	struct netevent_redirect netevent;
2921 	struct rt6_info *rt, *nrt = NULL;
2922 	struct ndisc_options ndopts;
2923 	struct inet6_dev *in6_dev;
2924 	struct neighbour *neigh;
2925 	struct rd_msg *msg;
2926 	int optlen, on_link;
2927 	u8 *lladdr;
2928 
2929 	optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
2930 	optlen -= sizeof(*msg);
2931 
2932 	if (optlen < 0) {
2933 		net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
2934 		return;
2935 	}
2936 
2937 	msg = (struct rd_msg *)icmp6_hdr(skb);
2938 
2939 	if (ipv6_addr_is_multicast(&msg->dest)) {
2940 		net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
2941 		return;
2942 	}
2943 
2944 	on_link = 0;
2945 	if (ipv6_addr_equal(&msg->dest, &msg->target)) {
2946 		on_link = 1;
2947 	} else if (ipv6_addr_type(&msg->target) !=
2948 		   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
2949 		net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
2950 		return;
2951 	}
2952 
2953 	in6_dev = __in6_dev_get(skb->dev);
2954 	if (!in6_dev)
2955 		return;
2956 	if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2957 		return;
2958 
2959 	/* RFC2461 8.1:
2960 	 *	The IP source address of the Redirect MUST be the same as the current
2961 	 *	first-hop router for the specified ICMP Destination Address.
2962 	 */
2963 
2964 	if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
2965 		net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2966 		return;
2967 	}
2968 
2969 	lladdr = NULL;
2970 	if (ndopts.nd_opts_tgt_lladdr) {
2971 		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2972 					     skb->dev);
2973 		if (!lladdr) {
2974 			net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2975 			return;
2976 		}
2977 	}
2978 
2979 	rt = (struct rt6_info *) dst;
2980 	if (rt->rt6i_flags & RTF_REJECT) {
2981 		net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
2982 		return;
2983 	}
2984 
2985 	/* Redirect received -> path was valid.
2986 	 * Look, redirects are sent only in response to data packets,
2987 	 * so that this nexthop apparently is reachable. --ANK
2988 	 */
2989 	dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
2990 
2991 	neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
2992 	if (!neigh)
2993 		return;
2994 
2995 	/*
2996 	 *	We have finally decided to accept it.
2997 	 */
2998 
2999 	ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
3000 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
3001 		     NEIGH_UPDATE_F_OVERRIDE|
3002 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
3003 				     NEIGH_UPDATE_F_ISROUTER)),
3004 		     NDISC_REDIRECT, &ndopts);
3005 
3006 	nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
3007 	if (!nrt)
3008 		goto out;
3009 
3010 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
3011 	if (on_link)
3012 		nrt->rt6i_flags &= ~RTF_GATEWAY;
3013 
3014 	nrt->rt6i_protocol = RTPROT_REDIRECT;
3015 	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
3016 
3017 	/* No need to remove rt from the exception table if rt is
3018 	 * a cached route because rt6_insert_exception() will
3019 	 * takes care of it
3020 	 */
3021 	if (rt6_insert_exception(nrt, rt)) {
3022 		dst_release_immediate(&nrt->dst);
3023 		goto out;
3024 	}
3025 
3026 	netevent.old = &rt->dst;
3027 	netevent.new = &nrt->dst;
3028 	netevent.daddr = &msg->dest;
3029 	netevent.neigh = neigh;
3030 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
3031 
3032 out:
3033 	neigh_release(neigh);
3034 }
3035 
3036 /*
3037  *	Misc support functions
3038  */
3039 
3040 static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
3041 {
3042 	BUG_ON(from->dst.from);
3043 
3044 	rt->rt6i_flags &= ~RTF_EXPIRES;
3045 	dst_hold(&from->dst);
3046 	rt->dst.from = &from->dst;
3047 	dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
3048 }
3049 
3050 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
3051 {
3052 	rt->dst.input = ort->dst.input;
3053 	rt->dst.output = ort->dst.output;
3054 	rt->rt6i_dst = ort->rt6i_dst;
3055 	rt->dst.error = ort->dst.error;
3056 	rt->rt6i_idev = ort->rt6i_idev;
3057 	if (rt->rt6i_idev)
3058 		in6_dev_hold(rt->rt6i_idev);
3059 	rt->dst.lastuse = jiffies;
3060 	rt->rt6i_gateway = ort->rt6i_gateway;
3061 	rt->rt6i_flags = ort->rt6i_flags;
3062 	rt6_set_from(rt, ort);
3063 	rt->rt6i_metric = ort->rt6i_metric;
3064 #ifdef CONFIG_IPV6_SUBTREES
3065 	rt->rt6i_src = ort->rt6i_src;
3066 #endif
3067 	rt->rt6i_prefsrc = ort->rt6i_prefsrc;
3068 	rt->rt6i_table = ort->rt6i_table;
3069 	rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
3070 }
3071 
3072 #ifdef CONFIG_IPV6_ROUTE_INFO
3073 static struct rt6_info *rt6_get_route_info(struct net *net,
3074 					   const struct in6_addr *prefix, int prefixlen,
3075 					   const struct in6_addr *gwaddr,
3076 					   struct net_device *dev)
3077 {
3078 	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
3079 	int ifindex = dev->ifindex;
3080 	struct fib6_node *fn;
3081 	struct rt6_info *rt = NULL;
3082 	struct fib6_table *table;
3083 
3084 	table = fib6_get_table(net, tb_id);
3085 	if (!table)
3086 		return NULL;
3087 
3088 	rcu_read_lock();
3089 	fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
3090 	if (!fn)
3091 		goto out;
3092 
3093 	for_each_fib6_node_rt_rcu(fn) {
3094 		if (rt->dst.dev->ifindex != ifindex)
3095 			continue;
3096 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
3097 			continue;
3098 		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
3099 			continue;
3100 		ip6_hold_safe(NULL, &rt, false);
3101 		break;
3102 	}
3103 out:
3104 	rcu_read_unlock();
3105 	return rt;
3106 }
3107 
3108 static struct rt6_info *rt6_add_route_info(struct net *net,
3109 					   const struct in6_addr *prefix, int prefixlen,
3110 					   const struct in6_addr *gwaddr,
3111 					   struct net_device *dev,
3112 					   unsigned int pref)
3113 {
3114 	struct fib6_config cfg = {
3115 		.fc_metric	= IP6_RT_PRIO_USER,
3116 		.fc_ifindex	= dev->ifindex,
3117 		.fc_dst_len	= prefixlen,
3118 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
3119 				  RTF_UP | RTF_PREF(pref),
3120 		.fc_protocol = RTPROT_RA,
3121 		.fc_nlinfo.portid = 0,
3122 		.fc_nlinfo.nlh = NULL,
3123 		.fc_nlinfo.nl_net = net,
3124 	};
3125 
3126 	cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
3127 	cfg.fc_dst = *prefix;
3128 	cfg.fc_gateway = *gwaddr;
3129 
3130 	/* We should treat it as a default route if prefix length is 0. */
3131 	if (!prefixlen)
3132 		cfg.fc_flags |= RTF_DEFAULT;
3133 
3134 	ip6_route_add(&cfg, NULL);
3135 
3136 	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
3137 }
3138 #endif
3139 
3140 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
3141 {
3142 	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
3143 	struct rt6_info *rt;
3144 	struct fib6_table *table;
3145 
3146 	table = fib6_get_table(dev_net(dev), tb_id);
3147 	if (!table)
3148 		return NULL;
3149 
3150 	rcu_read_lock();
3151 	for_each_fib6_node_rt_rcu(&table->tb6_root) {
3152 		if (dev == rt->dst.dev &&
3153 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
3154 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
3155 			break;
3156 	}
3157 	if (rt)
3158 		ip6_hold_safe(NULL, &rt, false);
3159 	rcu_read_unlock();
3160 	return rt;
3161 }
3162 
3163 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
3164 				     struct net_device *dev,
3165 				     unsigned int pref)
3166 {
3167 	struct fib6_config cfg = {
3168 		.fc_table	= l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
3169 		.fc_metric	= IP6_RT_PRIO_USER,
3170 		.fc_ifindex	= dev->ifindex,
3171 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
3172 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
3173 		.fc_protocol = RTPROT_RA,
3174 		.fc_nlinfo.portid = 0,
3175 		.fc_nlinfo.nlh = NULL,
3176 		.fc_nlinfo.nl_net = dev_net(dev),
3177 	};
3178 
3179 	cfg.fc_gateway = *gwaddr;
3180 
3181 	if (!ip6_route_add(&cfg, NULL)) {
3182 		struct fib6_table *table;
3183 
3184 		table = fib6_get_table(dev_net(dev), cfg.fc_table);
3185 		if (table)
3186 			table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
3187 	}
3188 
3189 	return rt6_get_dflt_router(gwaddr, dev);
3190 }
3191 
3192 static void __rt6_purge_dflt_routers(struct fib6_table *table)
3193 {
3194 	struct rt6_info *rt;
3195 
3196 restart:
3197 	rcu_read_lock();
3198 	for_each_fib6_node_rt_rcu(&table->tb6_root) {
3199 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
3200 		    (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
3201 			if (dst_hold_safe(&rt->dst)) {
3202 				rcu_read_unlock();
3203 				ip6_del_rt(rt);
3204 			} else {
3205 				rcu_read_unlock();
3206 			}
3207 			goto restart;
3208 		}
3209 	}
3210 	rcu_read_unlock();
3211 
3212 	table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
3213 }
3214 
3215 void rt6_purge_dflt_routers(struct net *net)
3216 {
3217 	struct fib6_table *table;
3218 	struct hlist_head *head;
3219 	unsigned int h;
3220 
3221 	rcu_read_lock();
3222 
3223 	for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
3224 		head = &net->ipv6.fib_table_hash[h];
3225 		hlist_for_each_entry_rcu(table, head, tb6_hlist) {
3226 			if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
3227 				__rt6_purge_dflt_routers(table);
3228 		}
3229 	}
3230 
3231 	rcu_read_unlock();
3232 }
3233 
3234 static void rtmsg_to_fib6_config(struct net *net,
3235 				 struct in6_rtmsg *rtmsg,
3236 				 struct fib6_config *cfg)
3237 {
3238 	memset(cfg, 0, sizeof(*cfg));
3239 
3240 	cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
3241 			 : RT6_TABLE_MAIN;
3242 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
3243 	cfg->fc_metric = rtmsg->rtmsg_metric;
3244 	cfg->fc_expires = rtmsg->rtmsg_info;
3245 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
3246 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
3247 	cfg->fc_flags = rtmsg->rtmsg_flags;
3248 
3249 	cfg->fc_nlinfo.nl_net = net;
3250 
3251 	cfg->fc_dst = rtmsg->rtmsg_dst;
3252 	cfg->fc_src = rtmsg->rtmsg_src;
3253 	cfg->fc_gateway = rtmsg->rtmsg_gateway;
3254 }
3255 
3256 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
3257 {
3258 	struct fib6_config cfg;
3259 	struct in6_rtmsg rtmsg;
3260 	int err;
3261 
3262 	switch (cmd) {
3263 	case SIOCADDRT:		/* Add a route */
3264 	case SIOCDELRT:		/* Delete a route */
3265 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
3266 			return -EPERM;
3267 		err = copy_from_user(&rtmsg, arg,
3268 				     sizeof(struct in6_rtmsg));
3269 		if (err)
3270 			return -EFAULT;
3271 
3272 		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
3273 
3274 		rtnl_lock();
3275 		switch (cmd) {
3276 		case SIOCADDRT:
3277 			err = ip6_route_add(&cfg, NULL);
3278 			break;
3279 		case SIOCDELRT:
3280 			err = ip6_route_del(&cfg, NULL);
3281 			break;
3282 		default:
3283 			err = -EINVAL;
3284 		}
3285 		rtnl_unlock();
3286 
3287 		return err;
3288 	}
3289 
3290 	return -EINVAL;
3291 }
3292 
3293 /*
3294  *	Drop the packet on the floor
3295  */
3296 
3297 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
3298 {
3299 	int type;
3300 	struct dst_entry *dst = skb_dst(skb);
3301 	switch (ipstats_mib_noroutes) {
3302 	case IPSTATS_MIB_INNOROUTES:
3303 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
3304 		if (type == IPV6_ADDR_ANY) {
3305 			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
3306 				      IPSTATS_MIB_INADDRERRORS);
3307 			break;
3308 		}
3309 		/* FALLTHROUGH */
3310 	case IPSTATS_MIB_OUTNOROUTES:
3311 		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
3312 			      ipstats_mib_noroutes);
3313 		break;
3314 	}
3315 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
3316 	kfree_skb(skb);
3317 	return 0;
3318 }
3319 
3320 static int ip6_pkt_discard(struct sk_buff *skb)
3321 {
3322 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
3323 }
3324 
3325 static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
3326 {
3327 	skb->dev = skb_dst(skb)->dev;
3328 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
3329 }
3330 
3331 static int ip6_pkt_prohibit(struct sk_buff *skb)
3332 {
3333 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
3334 }
3335 
3336 static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
3337 {
3338 	skb->dev = skb_dst(skb)->dev;
3339 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
3340 }
3341 
3342 /*
3343  *	Allocate a dst for local (unicast / anycast) address.
3344  */
3345 
3346 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
3347 				    const struct in6_addr *addr,
3348 				    bool anycast)
3349 {
3350 	u32 tb_id;
3351 	struct net *net = dev_net(idev->dev);
3352 	struct net_device *dev = idev->dev;
3353 	struct rt6_info *rt;
3354 
3355 	rt = ip6_dst_alloc(net, dev, DST_NOCOUNT);
3356 	if (!rt)
3357 		return ERR_PTR(-ENOMEM);
3358 
3359 	in6_dev_hold(idev);
3360 
3361 	rt->dst.flags |= DST_HOST;
3362 	rt->dst.input = ip6_input;
3363 	rt->dst.output = ip6_output;
3364 	rt->rt6i_idev = idev;
3365 
3366 	rt->rt6i_protocol = RTPROT_KERNEL;
3367 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
3368 	if (anycast)
3369 		rt->rt6i_flags |= RTF_ANYCAST;
3370 	else
3371 		rt->rt6i_flags |= RTF_LOCAL;
3372 
3373 	rt->rt6i_gateway  = *addr;
3374 	rt->rt6i_dst.addr = *addr;
3375 	rt->rt6i_dst.plen = 128;
3376 	tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
3377 	rt->rt6i_table = fib6_get_table(net, tb_id);
3378 
3379 	return rt;
3380 }
3381 
3382 /* remove deleted ip from prefsrc entries */
3383 struct arg_dev_net_ip {
3384 	struct net_device *dev;
3385 	struct net *net;
3386 	struct in6_addr *addr;
3387 };
3388 
3389 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
3390 {
3391 	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
3392 	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
3393 	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
3394 
3395 	if (((void *)rt->dst.dev == dev || !dev) &&
3396 	    rt != net->ipv6.ip6_null_entry &&
3397 	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
3398 		spin_lock_bh(&rt6_exception_lock);
3399 		/* remove prefsrc entry */
3400 		rt->rt6i_prefsrc.plen = 0;
3401 		/* need to update cache as well */
3402 		rt6_exceptions_remove_prefsrc(rt);
3403 		spin_unlock_bh(&rt6_exception_lock);
3404 	}
3405 	return 0;
3406 }
3407 
3408 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
3409 {
3410 	struct net *net = dev_net(ifp->idev->dev);
3411 	struct arg_dev_net_ip adni = {
3412 		.dev = ifp->idev->dev,
3413 		.net = net,
3414 		.addr = &ifp->addr,
3415 	};
3416 	fib6_clean_all(net, fib6_remove_prefsrc, &adni);
3417 }
3418 
3419 #define RTF_RA_ROUTER		(RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
3420 
3421 /* Remove routers and update dst entries when gateway turn into host. */
3422 static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
3423 {
3424 	struct in6_addr *gateway = (struct in6_addr *)arg;
3425 
3426 	if (((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
3427 	    ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
3428 		return -1;
3429 	}
3430 
3431 	/* Further clean up cached routes in exception table.
3432 	 * This is needed because cached route may have a different
3433 	 * gateway than its 'parent' in the case of an ip redirect.
3434 	 */
3435 	rt6_exceptions_clean_tohost(rt, gateway);
3436 
3437 	return 0;
3438 }
3439 
3440 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
3441 {
3442 	fib6_clean_all(net, fib6_clean_tohost, gateway);
3443 }
3444 
3445 struct arg_dev_net {
3446 	struct net_device *dev;
3447 	struct net *net;
3448 };
3449 
3450 /* called with write lock held for table with rt */
3451 static int fib6_ifdown(struct rt6_info *rt, void *arg)
3452 {
3453 	const struct arg_dev_net *adn = arg;
3454 	const struct net_device *dev = adn->dev;
3455 
3456 	if ((rt->dst.dev == dev || !dev) &&
3457 	    rt != adn->net->ipv6.ip6_null_entry &&
3458 	    (rt->rt6i_nsiblings == 0 ||
3459 	     (dev && netdev_unregistering(dev)) ||
3460 	     !rt->rt6i_idev->cnf.ignore_routes_with_linkdown))
3461 		return -1;
3462 
3463 	return 0;
3464 }
3465 
3466 void rt6_ifdown(struct net *net, struct net_device *dev)
3467 {
3468 	struct arg_dev_net adn = {
3469 		.dev = dev,
3470 		.net = net,
3471 	};
3472 
3473 	fib6_clean_all(net, fib6_ifdown, &adn);
3474 	if (dev)
3475 		rt6_uncached_list_flush_dev(net, dev);
3476 }
3477 
3478 struct rt6_mtu_change_arg {
3479 	struct net_device *dev;
3480 	unsigned int mtu;
3481 };
3482 
3483 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
3484 {
3485 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
3486 	struct inet6_dev *idev;
3487 
3488 	/* In IPv6 pmtu discovery is not optional,
3489 	   so that RTAX_MTU lock cannot disable it.
3490 	   We still use this lock to block changes
3491 	   caused by addrconf/ndisc.
3492 	*/
3493 
3494 	idev = __in6_dev_get(arg->dev);
3495 	if (!idev)
3496 		return 0;
3497 
3498 	/* For administrative MTU increase, there is no way to discover
3499 	   IPv6 PMTU increase, so PMTU increase should be updated here.
3500 	   Since RFC 1981 doesn't include administrative MTU increase
3501 	   update PMTU increase is a MUST. (i.e. jumbo frame)
3502 	 */
3503 	/*
3504 	   If new MTU is less than route PMTU, this new MTU will be the
3505 	   lowest MTU in the path, update the route PMTU to reflect PMTU
3506 	   decreases; if new MTU is greater than route PMTU, and the
3507 	   old MTU is the lowest MTU in the path, update the route PMTU
3508 	   to reflect the increase. In this case if the other nodes' MTU
3509 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
3510 	   PMTU discovery.
3511 	 */
3512 	if (rt->dst.dev == arg->dev &&
3513 	    dst_metric_raw(&rt->dst, RTAX_MTU) &&
3514 	    !dst_metric_locked(&rt->dst, RTAX_MTU)) {
3515 		spin_lock_bh(&rt6_exception_lock);
3516 		if (dst_mtu(&rt->dst) >= arg->mtu ||
3517 		    (dst_mtu(&rt->dst) < arg->mtu &&
3518 		     dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
3519 			dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
3520 		}
3521 		rt6_exceptions_update_pmtu(rt, arg->mtu);
3522 		spin_unlock_bh(&rt6_exception_lock);
3523 	}
3524 	return 0;
3525 }
3526 
3527 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
3528 {
3529 	struct rt6_mtu_change_arg arg = {
3530 		.dev = dev,
3531 		.mtu = mtu,
3532 	};
3533 
3534 	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
3535 }
3536 
3537 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
3538 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
3539 	[RTA_OIF]               = { .type = NLA_U32 },
3540 	[RTA_IIF]		= { .type = NLA_U32 },
3541 	[RTA_PRIORITY]          = { .type = NLA_U32 },
3542 	[RTA_METRICS]           = { .type = NLA_NESTED },
3543 	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
3544 	[RTA_PREF]              = { .type = NLA_U8 },
3545 	[RTA_ENCAP_TYPE]	= { .type = NLA_U16 },
3546 	[RTA_ENCAP]		= { .type = NLA_NESTED },
3547 	[RTA_EXPIRES]		= { .type = NLA_U32 },
3548 	[RTA_UID]		= { .type = NLA_U32 },
3549 	[RTA_MARK]		= { .type = NLA_U32 },
3550 };
3551 
3552 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
3553 			      struct fib6_config *cfg,
3554 			      struct netlink_ext_ack *extack)
3555 {
3556 	struct rtmsg *rtm;
3557 	struct nlattr *tb[RTA_MAX+1];
3558 	unsigned int pref;
3559 	int err;
3560 
3561 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
3562 			  NULL);
3563 	if (err < 0)
3564 		goto errout;
3565 
3566 	err = -EINVAL;
3567 	rtm = nlmsg_data(nlh);
3568 	memset(cfg, 0, sizeof(*cfg));
3569 
3570 	cfg->fc_table = rtm->rtm_table;
3571 	cfg->fc_dst_len = rtm->rtm_dst_len;
3572 	cfg->fc_src_len = rtm->rtm_src_len;
3573 	cfg->fc_flags = RTF_UP;
3574 	cfg->fc_protocol = rtm->rtm_protocol;
3575 	cfg->fc_type = rtm->rtm_type;
3576 
3577 	if (rtm->rtm_type == RTN_UNREACHABLE ||
3578 	    rtm->rtm_type == RTN_BLACKHOLE ||
3579 	    rtm->rtm_type == RTN_PROHIBIT ||
3580 	    rtm->rtm_type == RTN_THROW)
3581 		cfg->fc_flags |= RTF_REJECT;
3582 
3583 	if (rtm->rtm_type == RTN_LOCAL)
3584 		cfg->fc_flags |= RTF_LOCAL;
3585 
3586 	if (rtm->rtm_flags & RTM_F_CLONED)
3587 		cfg->fc_flags |= RTF_CACHE;
3588 
3589 	cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
3590 	cfg->fc_nlinfo.nlh = nlh;
3591 	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
3592 
3593 	if (tb[RTA_GATEWAY]) {
3594 		cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
3595 		cfg->fc_flags |= RTF_GATEWAY;
3596 	}
3597 
3598 	if (tb[RTA_DST]) {
3599 		int plen = (rtm->rtm_dst_len + 7) >> 3;
3600 
3601 		if (nla_len(tb[RTA_DST]) < plen)
3602 			goto errout;
3603 
3604 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
3605 	}
3606 
3607 	if (tb[RTA_SRC]) {
3608 		int plen = (rtm->rtm_src_len + 7) >> 3;
3609 
3610 		if (nla_len(tb[RTA_SRC]) < plen)
3611 			goto errout;
3612 
3613 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
3614 	}
3615 
3616 	if (tb[RTA_PREFSRC])
3617 		cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
3618 
3619 	if (tb[RTA_OIF])
3620 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
3621 
3622 	if (tb[RTA_PRIORITY])
3623 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
3624 
3625 	if (tb[RTA_METRICS]) {
3626 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
3627 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
3628 	}
3629 
3630 	if (tb[RTA_TABLE])
3631 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
3632 
3633 	if (tb[RTA_MULTIPATH]) {
3634 		cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
3635 		cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
3636 
3637 		err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
3638 						     cfg->fc_mp_len, extack);
3639 		if (err < 0)
3640 			goto errout;
3641 	}
3642 
3643 	if (tb[RTA_PREF]) {
3644 		pref = nla_get_u8(tb[RTA_PREF]);
3645 		if (pref != ICMPV6_ROUTER_PREF_LOW &&
3646 		    pref != ICMPV6_ROUTER_PREF_HIGH)
3647 			pref = ICMPV6_ROUTER_PREF_MEDIUM;
3648 		cfg->fc_flags |= RTF_PREF(pref);
3649 	}
3650 
3651 	if (tb[RTA_ENCAP])
3652 		cfg->fc_encap = tb[RTA_ENCAP];
3653 
3654 	if (tb[RTA_ENCAP_TYPE]) {
3655 		cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
3656 
3657 		err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
3658 		if (err < 0)
3659 			goto errout;
3660 	}
3661 
3662 	if (tb[RTA_EXPIRES]) {
3663 		unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
3664 
3665 		if (addrconf_finite_timeout(timeout)) {
3666 			cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
3667 			cfg->fc_flags |= RTF_EXPIRES;
3668 		}
3669 	}
3670 
3671 	err = 0;
3672 errout:
3673 	return err;
3674 }
3675 
3676 struct rt6_nh {
3677 	struct rt6_info *rt6_info;
3678 	struct fib6_config r_cfg;
3679 	struct mx6_config mxc;
3680 	struct list_head next;
3681 };
3682 
3683 static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
3684 {
3685 	struct rt6_nh *nh;
3686 
3687 	list_for_each_entry(nh, rt6_nh_list, next) {
3688 		pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6c nexthop %pI6c ifi %d\n",
3689 		        &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
3690 		        nh->r_cfg.fc_ifindex);
3691 	}
3692 }
3693 
3694 static int ip6_route_info_append(struct list_head *rt6_nh_list,
3695 				 struct rt6_info *rt, struct fib6_config *r_cfg)
3696 {
3697 	struct rt6_nh *nh;
3698 	int err = -EEXIST;
3699 
3700 	list_for_each_entry(nh, rt6_nh_list, next) {
3701 		/* check if rt6_info already exists */
3702 		if (rt6_duplicate_nexthop(nh->rt6_info, rt))
3703 			return err;
3704 	}
3705 
3706 	nh = kzalloc(sizeof(*nh), GFP_KERNEL);
3707 	if (!nh)
3708 		return -ENOMEM;
3709 	nh->rt6_info = rt;
3710 	err = ip6_convert_metrics(&nh->mxc, r_cfg);
3711 	if (err) {
3712 		kfree(nh);
3713 		return err;
3714 	}
3715 	memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
3716 	list_add_tail(&nh->next, rt6_nh_list);
3717 
3718 	return 0;
3719 }
3720 
3721 static void ip6_route_mpath_notify(struct rt6_info *rt,
3722 				   struct rt6_info *rt_last,
3723 				   struct nl_info *info,
3724 				   __u16 nlflags)
3725 {
3726 	/* if this is an APPEND route, then rt points to the first route
3727 	 * inserted and rt_last points to last route inserted. Userspace
3728 	 * wants a consistent dump of the route which starts at the first
3729 	 * nexthop. Since sibling routes are always added at the end of
3730 	 * the list, find the first sibling of the last route appended
3731 	 */
3732 	if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->rt6i_nsiblings) {
3733 		rt = list_first_entry(&rt_last->rt6i_siblings,
3734 				      struct rt6_info,
3735 				      rt6i_siblings);
3736 	}
3737 
3738 	if (rt)
3739 		inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
3740 }
3741 
3742 static int ip6_route_multipath_add(struct fib6_config *cfg,
3743 				   struct netlink_ext_ack *extack)
3744 {
3745 	struct rt6_info *rt_notif = NULL, *rt_last = NULL;
3746 	struct nl_info *info = &cfg->fc_nlinfo;
3747 	struct fib6_config r_cfg;
3748 	struct rtnexthop *rtnh;
3749 	struct rt6_info *rt;
3750 	struct rt6_nh *err_nh;
3751 	struct rt6_nh *nh, *nh_safe;
3752 	__u16 nlflags;
3753 	int remaining;
3754 	int attrlen;
3755 	int err = 1;
3756 	int nhn = 0;
3757 	int replace = (cfg->fc_nlinfo.nlh &&
3758 		       (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
3759 	LIST_HEAD(rt6_nh_list);
3760 
3761 	nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
3762 	if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
3763 		nlflags |= NLM_F_APPEND;
3764 
3765 	remaining = cfg->fc_mp_len;
3766 	rtnh = (struct rtnexthop *)cfg->fc_mp;
3767 
3768 	/* Parse a Multipath Entry and build a list (rt6_nh_list) of
3769 	 * rt6_info structs per nexthop
3770 	 */
3771 	while (rtnh_ok(rtnh, remaining)) {
3772 		memcpy(&r_cfg, cfg, sizeof(*cfg));
3773 		if (rtnh->rtnh_ifindex)
3774 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
3775 
3776 		attrlen = rtnh_attrlen(rtnh);
3777 		if (attrlen > 0) {
3778 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3779 
3780 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3781 			if (nla) {
3782 				r_cfg.fc_gateway = nla_get_in6_addr(nla);
3783 				r_cfg.fc_flags |= RTF_GATEWAY;
3784 			}
3785 			r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
3786 			nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
3787 			if (nla)
3788 				r_cfg.fc_encap_type = nla_get_u16(nla);
3789 		}
3790 
3791 		rt = ip6_route_info_create(&r_cfg, extack);
3792 		if (IS_ERR(rt)) {
3793 			err = PTR_ERR(rt);
3794 			rt = NULL;
3795 			goto cleanup;
3796 		}
3797 
3798 		err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
3799 		if (err) {
3800 			dst_release_immediate(&rt->dst);
3801 			goto cleanup;
3802 		}
3803 
3804 		rtnh = rtnh_next(rtnh, &remaining);
3805 	}
3806 
3807 	/* for add and replace send one notification with all nexthops.
3808 	 * Skip the notification in fib6_add_rt2node and send one with
3809 	 * the full route when done
3810 	 */
3811 	info->skip_notify = 1;
3812 
3813 	err_nh = NULL;
3814 	list_for_each_entry(nh, &rt6_nh_list, next) {
3815 		rt_last = nh->rt6_info;
3816 		err = __ip6_ins_rt(nh->rt6_info, info, &nh->mxc, extack);
3817 		/* save reference to first route for notification */
3818 		if (!rt_notif && !err)
3819 			rt_notif = nh->rt6_info;
3820 
3821 		/* nh->rt6_info is used or freed at this point, reset to NULL*/
3822 		nh->rt6_info = NULL;
3823 		if (err) {
3824 			if (replace && nhn)
3825 				ip6_print_replace_route_err(&rt6_nh_list);
3826 			err_nh = nh;
3827 			goto add_errout;
3828 		}
3829 
3830 		/* Because each route is added like a single route we remove
3831 		 * these flags after the first nexthop: if there is a collision,
3832 		 * we have already failed to add the first nexthop:
3833 		 * fib6_add_rt2node() has rejected it; when replacing, old
3834 		 * nexthops have been replaced by first new, the rest should
3835 		 * be added to it.
3836 		 */
3837 		cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
3838 						     NLM_F_REPLACE);
3839 		nhn++;
3840 	}
3841 
3842 	/* success ... tell user about new route */
3843 	ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
3844 	goto cleanup;
3845 
3846 add_errout:
3847 	/* send notification for routes that were added so that
3848 	 * the delete notifications sent by ip6_route_del are
3849 	 * coherent
3850 	 */
3851 	if (rt_notif)
3852 		ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
3853 
3854 	/* Delete routes that were already added */
3855 	list_for_each_entry(nh, &rt6_nh_list, next) {
3856 		if (err_nh == nh)
3857 			break;
3858 		ip6_route_del(&nh->r_cfg, extack);
3859 	}
3860 
3861 cleanup:
3862 	list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
3863 		if (nh->rt6_info)
3864 			dst_release_immediate(&nh->rt6_info->dst);
3865 		kfree(nh->mxc.mx);
3866 		list_del(&nh->next);
3867 		kfree(nh);
3868 	}
3869 
3870 	return err;
3871 }
3872 
3873 static int ip6_route_multipath_del(struct fib6_config *cfg,
3874 				   struct netlink_ext_ack *extack)
3875 {
3876 	struct fib6_config r_cfg;
3877 	struct rtnexthop *rtnh;
3878 	int remaining;
3879 	int attrlen;
3880 	int err = 1, last_err = 0;
3881 
3882 	remaining = cfg->fc_mp_len;
3883 	rtnh = (struct rtnexthop *)cfg->fc_mp;
3884 
3885 	/* Parse a Multipath Entry */
3886 	while (rtnh_ok(rtnh, remaining)) {
3887 		memcpy(&r_cfg, cfg, sizeof(*cfg));
3888 		if (rtnh->rtnh_ifindex)
3889 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
3890 
3891 		attrlen = rtnh_attrlen(rtnh);
3892 		if (attrlen > 0) {
3893 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3894 
3895 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3896 			if (nla) {
3897 				nla_memcpy(&r_cfg.fc_gateway, nla, 16);
3898 				r_cfg.fc_flags |= RTF_GATEWAY;
3899 			}
3900 		}
3901 		err = ip6_route_del(&r_cfg, extack);
3902 		if (err)
3903 			last_err = err;
3904 
3905 		rtnh = rtnh_next(rtnh, &remaining);
3906 	}
3907 
3908 	return last_err;
3909 }
3910 
3911 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
3912 			      struct netlink_ext_ack *extack)
3913 {
3914 	struct fib6_config cfg;
3915 	int err;
3916 
3917 	err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
3918 	if (err < 0)
3919 		return err;
3920 
3921 	if (cfg.fc_mp)
3922 		return ip6_route_multipath_del(&cfg, extack);
3923 	else {
3924 		cfg.fc_delete_all_nh = 1;
3925 		return ip6_route_del(&cfg, extack);
3926 	}
3927 }
3928 
3929 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
3930 			      struct netlink_ext_ack *extack)
3931 {
3932 	struct fib6_config cfg;
3933 	int err;
3934 
3935 	err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
3936 	if (err < 0)
3937 		return err;
3938 
3939 	if (cfg.fc_mp)
3940 		return ip6_route_multipath_add(&cfg, extack);
3941 	else
3942 		return ip6_route_add(&cfg, extack);
3943 }
3944 
3945 static size_t rt6_nlmsg_size(struct rt6_info *rt)
3946 {
3947 	int nexthop_len = 0;
3948 
3949 	if (rt->rt6i_nsiblings) {
3950 		nexthop_len = nla_total_size(0)	 /* RTA_MULTIPATH */
3951 			    + NLA_ALIGN(sizeof(struct rtnexthop))
3952 			    + nla_total_size(16) /* RTA_GATEWAY */
3953 			    + lwtunnel_get_encap_size(rt->dst.lwtstate);
3954 
3955 		nexthop_len *= rt->rt6i_nsiblings;
3956 	}
3957 
3958 	return NLMSG_ALIGN(sizeof(struct rtmsg))
3959 	       + nla_total_size(16) /* RTA_SRC */
3960 	       + nla_total_size(16) /* RTA_DST */
3961 	       + nla_total_size(16) /* RTA_GATEWAY */
3962 	       + nla_total_size(16) /* RTA_PREFSRC */
3963 	       + nla_total_size(4) /* RTA_TABLE */
3964 	       + nla_total_size(4) /* RTA_IIF */
3965 	       + nla_total_size(4) /* RTA_OIF */
3966 	       + nla_total_size(4) /* RTA_PRIORITY */
3967 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
3968 	       + nla_total_size(sizeof(struct rta_cacheinfo))
3969 	       + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
3970 	       + nla_total_size(1) /* RTA_PREF */
3971 	       + lwtunnel_get_encap_size(rt->dst.lwtstate)
3972 	       + nexthop_len;
3973 }
3974 
3975 static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
3976 			    unsigned int *flags, bool skip_oif)
3977 {
3978 	if (!netif_running(rt->dst.dev) || !netif_carrier_ok(rt->dst.dev)) {
3979 		*flags |= RTNH_F_LINKDOWN;
3980 		if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
3981 			*flags |= RTNH_F_DEAD;
3982 	}
3983 
3984 	if (rt->rt6i_flags & RTF_GATEWAY) {
3985 		if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
3986 			goto nla_put_failure;
3987 	}
3988 
3989 	if (rt->rt6i_nh_flags & RTNH_F_OFFLOAD)
3990 		*flags |= RTNH_F_OFFLOAD;
3991 
3992 	/* not needed for multipath encoding b/c it has a rtnexthop struct */
3993 	if (!skip_oif && rt->dst.dev &&
3994 	    nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
3995 		goto nla_put_failure;
3996 
3997 	if (rt->dst.lwtstate &&
3998 	    lwtunnel_fill_encap(skb, rt->dst.lwtstate) < 0)
3999 		goto nla_put_failure;
4000 
4001 	return 0;
4002 
4003 nla_put_failure:
4004 	return -EMSGSIZE;
4005 }
4006 
4007 /* add multipath next hop */
4008 static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)
4009 {
4010 	struct rtnexthop *rtnh;
4011 	unsigned int flags = 0;
4012 
4013 	rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
4014 	if (!rtnh)
4015 		goto nla_put_failure;
4016 
4017 	rtnh->rtnh_hops = 0;
4018 	rtnh->rtnh_ifindex = rt->dst.dev ? rt->dst.dev->ifindex : 0;
4019 
4020 	if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
4021 		goto nla_put_failure;
4022 
4023 	rtnh->rtnh_flags = flags;
4024 
4025 	/* length of rtnetlink header + attributes */
4026 	rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
4027 
4028 	return 0;
4029 
4030 nla_put_failure:
4031 	return -EMSGSIZE;
4032 }
4033 
4034 static int rt6_fill_node(struct net *net,
4035 			 struct sk_buff *skb, struct rt6_info *rt,
4036 			 struct in6_addr *dst, struct in6_addr *src,
4037 			 int iif, int type, u32 portid, u32 seq,
4038 			 unsigned int flags)
4039 {
4040 	u32 metrics[RTAX_MAX];
4041 	struct rtmsg *rtm;
4042 	struct nlmsghdr *nlh;
4043 	long expires;
4044 	u32 table;
4045 
4046 	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
4047 	if (!nlh)
4048 		return -EMSGSIZE;
4049 
4050 	rtm = nlmsg_data(nlh);
4051 	rtm->rtm_family = AF_INET6;
4052 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
4053 	rtm->rtm_src_len = rt->rt6i_src.plen;
4054 	rtm->rtm_tos = 0;
4055 	if (rt->rt6i_table)
4056 		table = rt->rt6i_table->tb6_id;
4057 	else
4058 		table = RT6_TABLE_UNSPEC;
4059 	rtm->rtm_table = table;
4060 	if (nla_put_u32(skb, RTA_TABLE, table))
4061 		goto nla_put_failure;
4062 	if (rt->rt6i_flags & RTF_REJECT) {
4063 		switch (rt->dst.error) {
4064 		case -EINVAL:
4065 			rtm->rtm_type = RTN_BLACKHOLE;
4066 			break;
4067 		case -EACCES:
4068 			rtm->rtm_type = RTN_PROHIBIT;
4069 			break;
4070 		case -EAGAIN:
4071 			rtm->rtm_type = RTN_THROW;
4072 			break;
4073 		default:
4074 			rtm->rtm_type = RTN_UNREACHABLE;
4075 			break;
4076 		}
4077 	}
4078 	else if (rt->rt6i_flags & RTF_LOCAL)
4079 		rtm->rtm_type = RTN_LOCAL;
4080 	else if (rt->rt6i_flags & RTF_ANYCAST)
4081 		rtm->rtm_type = RTN_ANYCAST;
4082 	else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
4083 		rtm->rtm_type = RTN_LOCAL;
4084 	else
4085 		rtm->rtm_type = RTN_UNICAST;
4086 	rtm->rtm_flags = 0;
4087 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
4088 	rtm->rtm_protocol = rt->rt6i_protocol;
4089 
4090 	if (rt->rt6i_flags & RTF_CACHE)
4091 		rtm->rtm_flags |= RTM_F_CLONED;
4092 
4093 	if (dst) {
4094 		if (nla_put_in6_addr(skb, RTA_DST, dst))
4095 			goto nla_put_failure;
4096 		rtm->rtm_dst_len = 128;
4097 	} else if (rtm->rtm_dst_len)
4098 		if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
4099 			goto nla_put_failure;
4100 #ifdef CONFIG_IPV6_SUBTREES
4101 	if (src) {
4102 		if (nla_put_in6_addr(skb, RTA_SRC, src))
4103 			goto nla_put_failure;
4104 		rtm->rtm_src_len = 128;
4105 	} else if (rtm->rtm_src_len &&
4106 		   nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
4107 		goto nla_put_failure;
4108 #endif
4109 	if (iif) {
4110 #ifdef CONFIG_IPV6_MROUTE
4111 		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
4112 			int err = ip6mr_get_route(net, skb, rtm, portid);
4113 
4114 			if (err == 0)
4115 				return 0;
4116 			if (err < 0)
4117 				goto nla_put_failure;
4118 		} else
4119 #endif
4120 			if (nla_put_u32(skb, RTA_IIF, iif))
4121 				goto nla_put_failure;
4122 	} else if (dst) {
4123 		struct in6_addr saddr_buf;
4124 		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
4125 		    nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
4126 			goto nla_put_failure;
4127 	}
4128 
4129 	if (rt->rt6i_prefsrc.plen) {
4130 		struct in6_addr saddr_buf;
4131 		saddr_buf = rt->rt6i_prefsrc.addr;
4132 		if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
4133 			goto nla_put_failure;
4134 	}
4135 
4136 	memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
4137 	if (rt->rt6i_pmtu)
4138 		metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
4139 	if (rtnetlink_put_metrics(skb, metrics) < 0)
4140 		goto nla_put_failure;
4141 
4142 	if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
4143 		goto nla_put_failure;
4144 
4145 	/* For multipath routes, walk the siblings list and add
4146 	 * each as a nexthop within RTA_MULTIPATH.
4147 	 */
4148 	if (rt->rt6i_nsiblings) {
4149 		struct rt6_info *sibling, *next_sibling;
4150 		struct nlattr *mp;
4151 
4152 		mp = nla_nest_start(skb, RTA_MULTIPATH);
4153 		if (!mp)
4154 			goto nla_put_failure;
4155 
4156 		if (rt6_add_nexthop(skb, rt) < 0)
4157 			goto nla_put_failure;
4158 
4159 		list_for_each_entry_safe(sibling, next_sibling,
4160 					 &rt->rt6i_siblings, rt6i_siblings) {
4161 			if (rt6_add_nexthop(skb, sibling) < 0)
4162 				goto nla_put_failure;
4163 		}
4164 
4165 		nla_nest_end(skb, mp);
4166 	} else {
4167 		if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0)
4168 			goto nla_put_failure;
4169 	}
4170 
4171 	expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
4172 
4173 	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
4174 		goto nla_put_failure;
4175 
4176 	if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
4177 		goto nla_put_failure;
4178 
4179 
4180 	nlmsg_end(skb, nlh);
4181 	return 0;
4182 
4183 nla_put_failure:
4184 	nlmsg_cancel(skb, nlh);
4185 	return -EMSGSIZE;
4186 }
4187 
4188 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
4189 {
4190 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
4191 	struct net *net = arg->net;
4192 
4193 	if (rt == net->ipv6.ip6_null_entry)
4194 		return 0;
4195 
4196 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
4197 		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
4198 
4199 		/* user wants prefix routes only */
4200 		if (rtm->rtm_flags & RTM_F_PREFIX &&
4201 		    !(rt->rt6i_flags & RTF_PREFIX_RT)) {
4202 			/* success since this is not a prefix route */
4203 			return 1;
4204 		}
4205 	}
4206 
4207 	return rt6_fill_node(net,
4208 		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
4209 		     NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
4210 		     NLM_F_MULTI);
4211 }
4212 
4213 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4214 			      struct netlink_ext_ack *extack)
4215 {
4216 	struct net *net = sock_net(in_skb->sk);
4217 	struct nlattr *tb[RTA_MAX+1];
4218 	int err, iif = 0, oif = 0;
4219 	struct dst_entry *dst;
4220 	struct rt6_info *rt;
4221 	struct sk_buff *skb;
4222 	struct rtmsg *rtm;
4223 	struct flowi6 fl6;
4224 	bool fibmatch;
4225 
4226 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
4227 			  extack);
4228 	if (err < 0)
4229 		goto errout;
4230 
4231 	err = -EINVAL;
4232 	memset(&fl6, 0, sizeof(fl6));
4233 	rtm = nlmsg_data(nlh);
4234 	fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
4235 	fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
4236 
4237 	if (tb[RTA_SRC]) {
4238 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
4239 			goto errout;
4240 
4241 		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
4242 	}
4243 
4244 	if (tb[RTA_DST]) {
4245 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
4246 			goto errout;
4247 
4248 		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
4249 	}
4250 
4251 	if (tb[RTA_IIF])
4252 		iif = nla_get_u32(tb[RTA_IIF]);
4253 
4254 	if (tb[RTA_OIF])
4255 		oif = nla_get_u32(tb[RTA_OIF]);
4256 
4257 	if (tb[RTA_MARK])
4258 		fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
4259 
4260 	if (tb[RTA_UID])
4261 		fl6.flowi6_uid = make_kuid(current_user_ns(),
4262 					   nla_get_u32(tb[RTA_UID]));
4263 	else
4264 		fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
4265 
4266 	if (iif) {
4267 		struct net_device *dev;
4268 		int flags = 0;
4269 
4270 		rcu_read_lock();
4271 
4272 		dev = dev_get_by_index_rcu(net, iif);
4273 		if (!dev) {
4274 			rcu_read_unlock();
4275 			err = -ENODEV;
4276 			goto errout;
4277 		}
4278 
4279 		fl6.flowi6_iif = iif;
4280 
4281 		if (!ipv6_addr_any(&fl6.saddr))
4282 			flags |= RT6_LOOKUP_F_HAS_SADDR;
4283 
4284 		if (!fibmatch)
4285 			dst = ip6_route_input_lookup(net, dev, &fl6, flags);
4286 		else
4287 			dst = ip6_route_lookup(net, &fl6, 0);
4288 
4289 		rcu_read_unlock();
4290 	} else {
4291 		fl6.flowi6_oif = oif;
4292 
4293 		if (!fibmatch)
4294 			dst = ip6_route_output(net, NULL, &fl6);
4295 		else
4296 			dst = ip6_route_lookup(net, &fl6, 0);
4297 	}
4298 
4299 
4300 	rt = container_of(dst, struct rt6_info, dst);
4301 	if (rt->dst.error) {
4302 		err = rt->dst.error;
4303 		ip6_rt_put(rt);
4304 		goto errout;
4305 	}
4306 
4307 	if (rt == net->ipv6.ip6_null_entry) {
4308 		err = rt->dst.error;
4309 		ip6_rt_put(rt);
4310 		goto errout;
4311 	}
4312 
4313 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
4314 	if (!skb) {
4315 		ip6_rt_put(rt);
4316 		err = -ENOBUFS;
4317 		goto errout;
4318 	}
4319 
4320 	skb_dst_set(skb, &rt->dst);
4321 	if (fibmatch)
4322 		err = rt6_fill_node(net, skb, rt, NULL, NULL, iif,
4323 				    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
4324 				    nlh->nlmsg_seq, 0);
4325 	else
4326 		err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
4327 				    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
4328 				    nlh->nlmsg_seq, 0);
4329 	if (err < 0) {
4330 		kfree_skb(skb);
4331 		goto errout;
4332 	}
4333 
4334 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
4335 errout:
4336 	return err;
4337 }
4338 
4339 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
4340 		     unsigned int nlm_flags)
4341 {
4342 	struct sk_buff *skb;
4343 	struct net *net = info->nl_net;
4344 	u32 seq;
4345 	int err;
4346 
4347 	err = -ENOBUFS;
4348 	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
4349 
4350 	skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
4351 	if (!skb)
4352 		goto errout;
4353 
4354 	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
4355 				event, info->portid, seq, nlm_flags);
4356 	if (err < 0) {
4357 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
4358 		WARN_ON(err == -EMSGSIZE);
4359 		kfree_skb(skb);
4360 		goto errout;
4361 	}
4362 	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
4363 		    info->nlh, gfp_any());
4364 	return;
4365 errout:
4366 	if (err < 0)
4367 		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
4368 }
4369 
4370 static int ip6_route_dev_notify(struct notifier_block *this,
4371 				unsigned long event, void *ptr)
4372 {
4373 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
4374 	struct net *net = dev_net(dev);
4375 
4376 	if (!(dev->flags & IFF_LOOPBACK))
4377 		return NOTIFY_OK;
4378 
4379 	if (event == NETDEV_REGISTER) {
4380 		net->ipv6.ip6_null_entry->dst.dev = dev;
4381 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
4382 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
4383 		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
4384 		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
4385 		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
4386 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
4387 #endif
4388 	 } else if (event == NETDEV_UNREGISTER &&
4389 		    dev->reg_state != NETREG_UNREGISTERED) {
4390 		/* NETDEV_UNREGISTER could be fired for multiple times by
4391 		 * netdev_wait_allrefs(). Make sure we only call this once.
4392 		 */
4393 		in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
4394 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
4395 		in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
4396 		in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
4397 #endif
4398 	}
4399 
4400 	return NOTIFY_OK;
4401 }
4402 
4403 /*
4404  *	/proc
4405  */
4406 
4407 #ifdef CONFIG_PROC_FS
4408 
4409 static const struct file_operations ipv6_route_proc_fops = {
4410 	.owner		= THIS_MODULE,
4411 	.open		= ipv6_route_open,
4412 	.read		= seq_read,
4413 	.llseek		= seq_lseek,
4414 	.release	= seq_release_net,
4415 };
4416 
4417 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
4418 {
4419 	struct net *net = (struct net *)seq->private;
4420 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
4421 		   net->ipv6.rt6_stats->fib_nodes,
4422 		   net->ipv6.rt6_stats->fib_route_nodes,
4423 		   atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc),
4424 		   net->ipv6.rt6_stats->fib_rt_entries,
4425 		   net->ipv6.rt6_stats->fib_rt_cache,
4426 		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
4427 		   net->ipv6.rt6_stats->fib_discarded_routes);
4428 
4429 	return 0;
4430 }
4431 
4432 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
4433 {
4434 	return single_open_net(inode, file, rt6_stats_seq_show);
4435 }
4436 
4437 static const struct file_operations rt6_stats_seq_fops = {
4438 	.owner	 = THIS_MODULE,
4439 	.open	 = rt6_stats_seq_open,
4440 	.read	 = seq_read,
4441 	.llseek	 = seq_lseek,
4442 	.release = single_release_net,
4443 };
4444 #endif	/* CONFIG_PROC_FS */
4445 
4446 #ifdef CONFIG_SYSCTL
4447 
4448 static
4449 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
4450 			      void __user *buffer, size_t *lenp, loff_t *ppos)
4451 {
4452 	struct net *net;
4453 	int delay;
4454 	if (!write)
4455 		return -EINVAL;
4456 
4457 	net = (struct net *)ctl->extra1;
4458 	delay = net->ipv6.sysctl.flush_delay;
4459 	proc_dointvec(ctl, write, buffer, lenp, ppos);
4460 	fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
4461 	return 0;
4462 }
4463 
4464 struct ctl_table ipv6_route_table_template[] = {
4465 	{
4466 		.procname	=	"flush",
4467 		.data		=	&init_net.ipv6.sysctl.flush_delay,
4468 		.maxlen		=	sizeof(int),
4469 		.mode		=	0200,
4470 		.proc_handler	=	ipv6_sysctl_rtcache_flush
4471 	},
4472 	{
4473 		.procname	=	"gc_thresh",
4474 		.data		=	&ip6_dst_ops_template.gc_thresh,
4475 		.maxlen		=	sizeof(int),
4476 		.mode		=	0644,
4477 		.proc_handler	=	proc_dointvec,
4478 	},
4479 	{
4480 		.procname	=	"max_size",
4481 		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
4482 		.maxlen		=	sizeof(int),
4483 		.mode		=	0644,
4484 		.proc_handler	=	proc_dointvec,
4485 	},
4486 	{
4487 		.procname	=	"gc_min_interval",
4488 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
4489 		.maxlen		=	sizeof(int),
4490 		.mode		=	0644,
4491 		.proc_handler	=	proc_dointvec_jiffies,
4492 	},
4493 	{
4494 		.procname	=	"gc_timeout",
4495 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
4496 		.maxlen		=	sizeof(int),
4497 		.mode		=	0644,
4498 		.proc_handler	=	proc_dointvec_jiffies,
4499 	},
4500 	{
4501 		.procname	=	"gc_interval",
4502 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
4503 		.maxlen		=	sizeof(int),
4504 		.mode		=	0644,
4505 		.proc_handler	=	proc_dointvec_jiffies,
4506 	},
4507 	{
4508 		.procname	=	"gc_elasticity",
4509 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
4510 		.maxlen		=	sizeof(int),
4511 		.mode		=	0644,
4512 		.proc_handler	=	proc_dointvec,
4513 	},
4514 	{
4515 		.procname	=	"mtu_expires",
4516 		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
4517 		.maxlen		=	sizeof(int),
4518 		.mode		=	0644,
4519 		.proc_handler	=	proc_dointvec_jiffies,
4520 	},
4521 	{
4522 		.procname	=	"min_adv_mss",
4523 		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
4524 		.maxlen		=	sizeof(int),
4525 		.mode		=	0644,
4526 		.proc_handler	=	proc_dointvec,
4527 	},
4528 	{
4529 		.procname	=	"gc_min_interval_ms",
4530 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
4531 		.maxlen		=	sizeof(int),
4532 		.mode		=	0644,
4533 		.proc_handler	=	proc_dointvec_ms_jiffies,
4534 	},
4535 	{ }
4536 };
4537 
4538 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
4539 {
4540 	struct ctl_table *table;
4541 
4542 	table = kmemdup(ipv6_route_table_template,
4543 			sizeof(ipv6_route_table_template),
4544 			GFP_KERNEL);
4545 
4546 	if (table) {
4547 		table[0].data = &net->ipv6.sysctl.flush_delay;
4548 		table[0].extra1 = net;
4549 		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
4550 		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
4551 		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
4552 		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
4553 		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
4554 		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
4555 		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
4556 		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
4557 		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
4558 
4559 		/* Don't export sysctls to unprivileged users */
4560 		if (net->user_ns != &init_user_ns)
4561 			table[0].procname = NULL;
4562 	}
4563 
4564 	return table;
4565 }
4566 #endif
4567 
4568 static int __net_init ip6_route_net_init(struct net *net)
4569 {
4570 	int ret = -ENOMEM;
4571 
4572 	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
4573 	       sizeof(net->ipv6.ip6_dst_ops));
4574 
4575 	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
4576 		goto out_ip6_dst_ops;
4577 
4578 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
4579 					   sizeof(*net->ipv6.ip6_null_entry),
4580 					   GFP_KERNEL);
4581 	if (!net->ipv6.ip6_null_entry)
4582 		goto out_ip6_dst_entries;
4583 	net->ipv6.ip6_null_entry->dst.path =
4584 		(struct dst_entry *)net->ipv6.ip6_null_entry;
4585 	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
4586 	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
4587 			 ip6_template_metrics, true);
4588 
4589 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
4590 	net->ipv6.fib6_has_custom_rules = false;
4591 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
4592 					       sizeof(*net->ipv6.ip6_prohibit_entry),
4593 					       GFP_KERNEL);
4594 	if (!net->ipv6.ip6_prohibit_entry)
4595 		goto out_ip6_null_entry;
4596 	net->ipv6.ip6_prohibit_entry->dst.path =
4597 		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
4598 	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
4599 	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
4600 			 ip6_template_metrics, true);
4601 
4602 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
4603 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
4604 					       GFP_KERNEL);
4605 	if (!net->ipv6.ip6_blk_hole_entry)
4606 		goto out_ip6_prohibit_entry;
4607 	net->ipv6.ip6_blk_hole_entry->dst.path =
4608 		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
4609 	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
4610 	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
4611 			 ip6_template_metrics, true);
4612 #endif
4613 
4614 	net->ipv6.sysctl.flush_delay = 0;
4615 	net->ipv6.sysctl.ip6_rt_max_size = 4096;
4616 	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
4617 	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
4618 	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
4619 	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
4620 	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
4621 	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
4622 
4623 	net->ipv6.ip6_rt_gc_expire = 30*HZ;
4624 
4625 	ret = 0;
4626 out:
4627 	return ret;
4628 
4629 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
4630 out_ip6_prohibit_entry:
4631 	kfree(net->ipv6.ip6_prohibit_entry);
4632 out_ip6_null_entry:
4633 	kfree(net->ipv6.ip6_null_entry);
4634 #endif
4635 out_ip6_dst_entries:
4636 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
4637 out_ip6_dst_ops:
4638 	goto out;
4639 }
4640 
4641 static void __net_exit ip6_route_net_exit(struct net *net)
4642 {
4643 	kfree(net->ipv6.ip6_null_entry);
4644 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
4645 	kfree(net->ipv6.ip6_prohibit_entry);
4646 	kfree(net->ipv6.ip6_blk_hole_entry);
4647 #endif
4648 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
4649 }
4650 
4651 static int __net_init ip6_route_net_init_late(struct net *net)
4652 {
4653 #ifdef CONFIG_PROC_FS
4654 	proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
4655 	proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
4656 #endif
4657 	return 0;
4658 }
4659 
4660 static void __net_exit ip6_route_net_exit_late(struct net *net)
4661 {
4662 #ifdef CONFIG_PROC_FS
4663 	remove_proc_entry("ipv6_route", net->proc_net);
4664 	remove_proc_entry("rt6_stats", net->proc_net);
4665 #endif
4666 }
4667 
4668 static struct pernet_operations ip6_route_net_ops = {
4669 	.init = ip6_route_net_init,
4670 	.exit = ip6_route_net_exit,
4671 };
4672 
4673 static int __net_init ipv6_inetpeer_init(struct net *net)
4674 {
4675 	struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
4676 
4677 	if (!bp)
4678 		return -ENOMEM;
4679 	inet_peer_base_init(bp);
4680 	net->ipv6.peers = bp;
4681 	return 0;
4682 }
4683 
4684 static void __net_exit ipv6_inetpeer_exit(struct net *net)
4685 {
4686 	struct inet_peer_base *bp = net->ipv6.peers;
4687 
4688 	net->ipv6.peers = NULL;
4689 	inetpeer_invalidate_tree(bp);
4690 	kfree(bp);
4691 }
4692 
4693 static struct pernet_operations ipv6_inetpeer_ops = {
4694 	.init	=	ipv6_inetpeer_init,
4695 	.exit	=	ipv6_inetpeer_exit,
4696 };
4697 
4698 static struct pernet_operations ip6_route_net_late_ops = {
4699 	.init = ip6_route_net_init_late,
4700 	.exit = ip6_route_net_exit_late,
4701 };
4702 
4703 static struct notifier_block ip6_route_dev_notifier = {
4704 	.notifier_call = ip6_route_dev_notify,
4705 	.priority = ADDRCONF_NOTIFY_PRIORITY - 10,
4706 };
4707 
4708 void __init ip6_route_init_special_entries(void)
4709 {
4710 	/* Registering of the loopback is done before this portion of code,
4711 	 * the loopback reference in rt6_info will not be taken, do it
4712 	 * manually for init_net */
4713 	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
4714 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
4715   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
4716 	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
4717 	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
4718 	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
4719 	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
4720   #endif
4721 }
4722 
4723 int __init ip6_route_init(void)
4724 {
4725 	int ret;
4726 	int cpu;
4727 
4728 	ret = -ENOMEM;
4729 	ip6_dst_ops_template.kmem_cachep =
4730 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
4731 				  SLAB_HWCACHE_ALIGN, NULL);
4732 	if (!ip6_dst_ops_template.kmem_cachep)
4733 		goto out;
4734 
4735 	ret = dst_entries_init(&ip6_dst_blackhole_ops);
4736 	if (ret)
4737 		goto out_kmem_cache;
4738 
4739 	ret = register_pernet_subsys(&ipv6_inetpeer_ops);
4740 	if (ret)
4741 		goto out_dst_entries;
4742 
4743 	ret = register_pernet_subsys(&ip6_route_net_ops);
4744 	if (ret)
4745 		goto out_register_inetpeer;
4746 
4747 	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
4748 
4749 	ret = fib6_init();
4750 	if (ret)
4751 		goto out_register_subsys;
4752 
4753 	ret = xfrm6_init();
4754 	if (ret)
4755 		goto out_fib6_init;
4756 
4757 	ret = fib6_rules_init();
4758 	if (ret)
4759 		goto xfrm6_init;
4760 
4761 	ret = register_pernet_subsys(&ip6_route_net_late_ops);
4762 	if (ret)
4763 		goto fib6_rules_init;
4764 
4765 	ret = -ENOBUFS;
4766 	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, 0) ||
4767 	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, 0) ||
4768 	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL,
4769 			    RTNL_FLAG_DOIT_UNLOCKED))
4770 		goto out_register_late_subsys;
4771 
4772 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
4773 	if (ret)
4774 		goto out_register_late_subsys;
4775 
4776 	for_each_possible_cpu(cpu) {
4777 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
4778 
4779 		INIT_LIST_HEAD(&ul->head);
4780 		spin_lock_init(&ul->lock);
4781 	}
4782 
4783 out:
4784 	return ret;
4785 
4786 out_register_late_subsys:
4787 	unregister_pernet_subsys(&ip6_route_net_late_ops);
4788 fib6_rules_init:
4789 	fib6_rules_cleanup();
4790 xfrm6_init:
4791 	xfrm6_fini();
4792 out_fib6_init:
4793 	fib6_gc_cleanup();
4794 out_register_subsys:
4795 	unregister_pernet_subsys(&ip6_route_net_ops);
4796 out_register_inetpeer:
4797 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
4798 out_dst_entries:
4799 	dst_entries_destroy(&ip6_dst_blackhole_ops);
4800 out_kmem_cache:
4801 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
4802 	goto out;
4803 }
4804 
4805 void ip6_route_cleanup(void)
4806 {
4807 	unregister_netdevice_notifier(&ip6_route_dev_notifier);
4808 	unregister_pernet_subsys(&ip6_route_net_late_ops);
4809 	fib6_rules_cleanup();
4810 	xfrm6_fini();
4811 	fib6_gc_cleanup();
4812 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
4813 	unregister_pernet_subsys(&ip6_route_net_ops);
4814 	dst_entries_destroy(&ip6_dst_blackhole_ops);
4815 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
4816 }
4817