xref: /openbmc/linux/net/ipv6/route.c (revision 38fbeeeeccdb38d0635398e8e344d245f6d8dc52)
1 /*
2  *	Linux INET6 implementation
3  *	FIB front-end.
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13 
14 /*	Changes:
15  *
16  *	YOSHIFUJI Hideaki @USAGI
17  *		reworked default router selection.
18  *		- respect outgoing interface
19  *		- select from (probably) reachable routers (i.e.
20  *		routers in REACHABLE, STALE, DELAY or PROBE states).
21  *		- always select the same router if it is (probably)
22  *		reachable.  otherwise, round-robin the list.
23  *	Ville Nuorvala
24  *		Fixed routing subtrees.
25  */
26 
27 #define pr_fmt(fmt) "IPv6: " fmt
28 
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <linux/jhash.h>
48 #include <net/net_namespace.h>
49 #include <net/snmp.h>
50 #include <net/ipv6.h>
51 #include <net/ip6_fib.h>
52 #include <net/ip6_route.h>
53 #include <net/ndisc.h>
54 #include <net/addrconf.h>
55 #include <net/tcp.h>
56 #include <linux/rtnetlink.h>
57 #include <net/dst.h>
58 #include <net/dst_metadata.h>
59 #include <net/xfrm.h>
60 #include <net/netevent.h>
61 #include <net/netlink.h>
62 #include <net/nexthop.h>
63 #include <net/lwtunnel.h>
64 #include <net/ip_tunnels.h>
65 #include <net/l3mdev.h>
66 #include <trace/events/fib6.h>
67 
68 #include <linux/uaccess.h>
69 
70 #ifdef CONFIG_SYSCTL
71 #include <linux/sysctl.h>
72 #endif
73 
74 enum rt6_nud_state {
75 	RT6_NUD_FAIL_HARD = -3,
76 	RT6_NUD_FAIL_PROBE = -2,
77 	RT6_NUD_FAIL_DO_RR = -1,
78 	RT6_NUD_SUCCEED = 1
79 };
80 
81 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
82 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
83 static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
84 static unsigned int	 ip6_mtu(const struct dst_entry *dst);
85 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
86 static void		ip6_dst_destroy(struct dst_entry *);
87 static void		ip6_dst_ifdown(struct dst_entry *,
88 				       struct net_device *dev, int how);
89 static int		 ip6_dst_gc(struct dst_ops *ops);
90 
91 static int		ip6_pkt_discard(struct sk_buff *skb);
92 static int		ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
93 static int		ip6_pkt_prohibit(struct sk_buff *skb);
94 static int		ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
95 static void		ip6_link_failure(struct sk_buff *skb);
96 static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
97 					   struct sk_buff *skb, u32 mtu);
98 static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
99 					struct sk_buff *skb);
100 static void		rt6_dst_from_metrics_check(struct rt6_info *rt);
101 static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
102 static size_t rt6_nlmsg_size(struct rt6_info *rt);
103 static int rt6_fill_node(struct net *net,
104 			 struct sk_buff *skb, struct rt6_info *rt,
105 			 struct in6_addr *dst, struct in6_addr *src,
106 			 int iif, int type, u32 portid, u32 seq,
107 			 unsigned int flags);
108 static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
109 					   struct in6_addr *daddr,
110 					   struct in6_addr *saddr);
111 
112 #ifdef CONFIG_IPV6_ROUTE_INFO
113 static struct rt6_info *rt6_add_route_info(struct net *net,
114 					   const struct in6_addr *prefix, int prefixlen,
115 					   const struct in6_addr *gwaddr,
116 					   struct net_device *dev,
117 					   unsigned int pref);
118 static struct rt6_info *rt6_get_route_info(struct net *net,
119 					   const struct in6_addr *prefix, int prefixlen,
120 					   const struct in6_addr *gwaddr,
121 					   struct net_device *dev);
122 #endif
123 
124 struct uncached_list {
125 	spinlock_t		lock;
126 	struct list_head	head;
127 };
128 
129 static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
130 
131 static void rt6_uncached_list_add(struct rt6_info *rt)
132 {
133 	struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
134 
135 	rt->rt6i_uncached_list = ul;
136 
137 	spin_lock_bh(&ul->lock);
138 	list_add_tail(&rt->rt6i_uncached, &ul->head);
139 	spin_unlock_bh(&ul->lock);
140 }
141 
142 static void rt6_uncached_list_del(struct rt6_info *rt)
143 {
144 	if (!list_empty(&rt->rt6i_uncached)) {
145 		struct uncached_list *ul = rt->rt6i_uncached_list;
146 
147 		spin_lock_bh(&ul->lock);
148 		list_del(&rt->rt6i_uncached);
149 		spin_unlock_bh(&ul->lock);
150 	}
151 }
152 
153 static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
154 {
155 	struct net_device *loopback_dev = net->loopback_dev;
156 	int cpu;
157 
158 	if (dev == loopback_dev)
159 		return;
160 
161 	for_each_possible_cpu(cpu) {
162 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
163 		struct rt6_info *rt;
164 
165 		spin_lock_bh(&ul->lock);
166 		list_for_each_entry(rt, &ul->head, rt6i_uncached) {
167 			struct inet6_dev *rt_idev = rt->rt6i_idev;
168 			struct net_device *rt_dev = rt->dst.dev;
169 
170 			if (rt_idev->dev == dev) {
171 				rt->rt6i_idev = in6_dev_get(loopback_dev);
172 				in6_dev_put(rt_idev);
173 			}
174 
175 			if (rt_dev == dev) {
176 				rt->dst.dev = loopback_dev;
177 				dev_hold(rt->dst.dev);
178 				dev_put(rt_dev);
179 			}
180 		}
181 		spin_unlock_bh(&ul->lock);
182 	}
183 }
184 
185 static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
186 {
187 	return dst_metrics_write_ptr(rt->dst.from);
188 }
189 
190 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
191 {
192 	struct rt6_info *rt = (struct rt6_info *)dst;
193 
194 	if (rt->rt6i_flags & RTF_PCPU)
195 		return rt6_pcpu_cow_metrics(rt);
196 	else if (rt->rt6i_flags & RTF_CACHE)
197 		return NULL;
198 	else
199 		return dst_cow_metrics_generic(dst, old);
200 }
201 
202 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
203 					     struct sk_buff *skb,
204 					     const void *daddr)
205 {
206 	struct in6_addr *p = &rt->rt6i_gateway;
207 
208 	if (!ipv6_addr_any(p))
209 		return (const void *) p;
210 	else if (skb)
211 		return &ipv6_hdr(skb)->daddr;
212 	return daddr;
213 }
214 
215 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
216 					  struct sk_buff *skb,
217 					  const void *daddr)
218 {
219 	struct rt6_info *rt = (struct rt6_info *) dst;
220 	struct neighbour *n;
221 
222 	daddr = choose_neigh_daddr(rt, skb, daddr);
223 	n = __ipv6_neigh_lookup(dst->dev, daddr);
224 	if (n)
225 		return n;
226 	return neigh_create(&nd_tbl, daddr, dst->dev);
227 }
228 
229 static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
230 {
231 	struct net_device *dev = dst->dev;
232 	struct rt6_info *rt = (struct rt6_info *)dst;
233 
234 	daddr = choose_neigh_daddr(rt, NULL, daddr);
235 	if (!daddr)
236 		return;
237 	if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
238 		return;
239 	if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
240 		return;
241 	__ipv6_confirm_neigh(dev, daddr);
242 }
243 
244 static struct dst_ops ip6_dst_ops_template = {
245 	.family			=	AF_INET6,
246 	.gc			=	ip6_dst_gc,
247 	.gc_thresh		=	1024,
248 	.check			=	ip6_dst_check,
249 	.default_advmss		=	ip6_default_advmss,
250 	.mtu			=	ip6_mtu,
251 	.cow_metrics		=	ipv6_cow_metrics,
252 	.destroy		=	ip6_dst_destroy,
253 	.ifdown			=	ip6_dst_ifdown,
254 	.negative_advice	=	ip6_negative_advice,
255 	.link_failure		=	ip6_link_failure,
256 	.update_pmtu		=	ip6_rt_update_pmtu,
257 	.redirect		=	rt6_do_redirect,
258 	.local_out		=	__ip6_local_out,
259 	.neigh_lookup		=	ip6_neigh_lookup,
260 	.confirm_neigh		=	ip6_confirm_neigh,
261 };
262 
263 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
264 {
265 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
266 
267 	return mtu ? : dst->dev->mtu;
268 }
269 
270 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
271 					 struct sk_buff *skb, u32 mtu)
272 {
273 }
274 
275 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
276 				      struct sk_buff *skb)
277 {
278 }
279 
280 static struct dst_ops ip6_dst_blackhole_ops = {
281 	.family			=	AF_INET6,
282 	.destroy		=	ip6_dst_destroy,
283 	.check			=	ip6_dst_check,
284 	.mtu			=	ip6_blackhole_mtu,
285 	.default_advmss		=	ip6_default_advmss,
286 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
287 	.redirect		=	ip6_rt_blackhole_redirect,
288 	.cow_metrics		=	dst_cow_metrics_generic,
289 	.neigh_lookup		=	ip6_neigh_lookup,
290 };
291 
292 static const u32 ip6_template_metrics[RTAX_MAX] = {
293 	[RTAX_HOPLIMIT - 1] = 0,
294 };
295 
296 static const struct rt6_info ip6_null_entry_template = {
297 	.dst = {
298 		.__refcnt	= ATOMIC_INIT(1),
299 		.__use		= 1,
300 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
301 		.error		= -ENETUNREACH,
302 		.input		= ip6_pkt_discard,
303 		.output		= ip6_pkt_discard_out,
304 	},
305 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
306 	.rt6i_protocol  = RTPROT_KERNEL,
307 	.rt6i_metric	= ~(u32) 0,
308 	.rt6i_ref	= ATOMIC_INIT(1),
309 };
310 
311 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
312 
313 static const struct rt6_info ip6_prohibit_entry_template = {
314 	.dst = {
315 		.__refcnt	= ATOMIC_INIT(1),
316 		.__use		= 1,
317 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
318 		.error		= -EACCES,
319 		.input		= ip6_pkt_prohibit,
320 		.output		= ip6_pkt_prohibit_out,
321 	},
322 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
323 	.rt6i_protocol  = RTPROT_KERNEL,
324 	.rt6i_metric	= ~(u32) 0,
325 	.rt6i_ref	= ATOMIC_INIT(1),
326 };
327 
328 static const struct rt6_info ip6_blk_hole_entry_template = {
329 	.dst = {
330 		.__refcnt	= ATOMIC_INIT(1),
331 		.__use		= 1,
332 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
333 		.error		= -EINVAL,
334 		.input		= dst_discard,
335 		.output		= dst_discard_out,
336 	},
337 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
338 	.rt6i_protocol  = RTPROT_KERNEL,
339 	.rt6i_metric	= ~(u32) 0,
340 	.rt6i_ref	= ATOMIC_INIT(1),
341 };
342 
343 #endif
344 
345 static void rt6_info_init(struct rt6_info *rt)
346 {
347 	struct dst_entry *dst = &rt->dst;
348 
349 	memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
350 	INIT_LIST_HEAD(&rt->rt6i_siblings);
351 	INIT_LIST_HEAD(&rt->rt6i_uncached);
352 }
353 
354 /* allocate dst with ip6_dst_ops */
355 static struct rt6_info *__ip6_dst_alloc(struct net *net,
356 					struct net_device *dev,
357 					int flags)
358 {
359 	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
360 					1, DST_OBSOLETE_FORCE_CHK, flags);
361 
362 	if (rt)
363 		rt6_info_init(rt);
364 
365 	return rt;
366 }
367 
368 struct rt6_info *ip6_dst_alloc(struct net *net,
369 			       struct net_device *dev,
370 			       int flags)
371 {
372 	struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
373 
374 	if (rt) {
375 		rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
376 		if (rt->rt6i_pcpu) {
377 			int cpu;
378 
379 			for_each_possible_cpu(cpu) {
380 				struct rt6_info **p;
381 
382 				p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
383 				/* no one shares rt */
384 				*p =  NULL;
385 			}
386 		} else {
387 			dst_release_immediate(&rt->dst);
388 			return NULL;
389 		}
390 	}
391 
392 	return rt;
393 }
394 EXPORT_SYMBOL(ip6_dst_alloc);
395 
396 static void ip6_dst_destroy(struct dst_entry *dst)
397 {
398 	struct rt6_info *rt = (struct rt6_info *)dst;
399 	struct rt6_exception_bucket *bucket;
400 	struct dst_entry *from = dst->from;
401 	struct inet6_dev *idev;
402 
403 	dst_destroy_metrics_generic(dst);
404 	free_percpu(rt->rt6i_pcpu);
405 	rt6_uncached_list_del(rt);
406 
407 	idev = rt->rt6i_idev;
408 	if (idev) {
409 		rt->rt6i_idev = NULL;
410 		in6_dev_put(idev);
411 	}
412 	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, 1);
413 	if (bucket) {
414 		rt->rt6i_exception_bucket = NULL;
415 		kfree(bucket);
416 	}
417 
418 	dst->from = NULL;
419 	dst_release(from);
420 }
421 
422 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
423 			   int how)
424 {
425 	struct rt6_info *rt = (struct rt6_info *)dst;
426 	struct inet6_dev *idev = rt->rt6i_idev;
427 	struct net_device *loopback_dev =
428 		dev_net(dev)->loopback_dev;
429 
430 	if (idev && idev->dev != loopback_dev) {
431 		struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
432 		if (loopback_idev) {
433 			rt->rt6i_idev = loopback_idev;
434 			in6_dev_put(idev);
435 		}
436 	}
437 }
438 
439 static bool __rt6_check_expired(const struct rt6_info *rt)
440 {
441 	if (rt->rt6i_flags & RTF_EXPIRES)
442 		return time_after(jiffies, rt->dst.expires);
443 	else
444 		return false;
445 }
446 
447 static bool rt6_check_expired(const struct rt6_info *rt)
448 {
449 	if (rt->rt6i_flags & RTF_EXPIRES) {
450 		if (time_after(jiffies, rt->dst.expires))
451 			return true;
452 	} else if (rt->dst.from) {
453 		return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
454 		       rt6_check_expired((struct rt6_info *)rt->dst.from);
455 	}
456 	return false;
457 }
458 
459 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
460 					     struct flowi6 *fl6, int oif,
461 					     int strict)
462 {
463 	struct rt6_info *sibling, *next_sibling;
464 	int route_choosen;
465 
466 	/* We might have already computed the hash for ICMPv6 errors. In such
467 	 * case it will always be non-zero. Otherwise now is the time to do it.
468 	 */
469 	if (!fl6->mp_hash)
470 		fl6->mp_hash = rt6_multipath_hash(fl6, NULL);
471 
472 	route_choosen = fl6->mp_hash % (match->rt6i_nsiblings + 1);
473 	/* Don't change the route, if route_choosen == 0
474 	 * (siblings does not include ourself)
475 	 */
476 	if (route_choosen)
477 		list_for_each_entry_safe(sibling, next_sibling,
478 				&match->rt6i_siblings, rt6i_siblings) {
479 			route_choosen--;
480 			if (route_choosen == 0) {
481 				if (rt6_score_route(sibling, oif, strict) < 0)
482 					break;
483 				match = sibling;
484 				break;
485 			}
486 		}
487 	return match;
488 }
489 
490 /*
491  *	Route lookup. Any table->tb6_lock is implied.
492  */
493 
494 static inline struct rt6_info *rt6_device_match(struct net *net,
495 						    struct rt6_info *rt,
496 						    const struct in6_addr *saddr,
497 						    int oif,
498 						    int flags)
499 {
500 	struct rt6_info *local = NULL;
501 	struct rt6_info *sprt;
502 
503 	if (!oif && ipv6_addr_any(saddr))
504 		goto out;
505 
506 	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
507 		struct net_device *dev = sprt->dst.dev;
508 
509 		if (oif) {
510 			if (dev->ifindex == oif)
511 				return sprt;
512 			if (dev->flags & IFF_LOOPBACK) {
513 				if (!sprt->rt6i_idev ||
514 				    sprt->rt6i_idev->dev->ifindex != oif) {
515 					if (flags & RT6_LOOKUP_F_IFACE)
516 						continue;
517 					if (local &&
518 					    local->rt6i_idev->dev->ifindex == oif)
519 						continue;
520 				}
521 				local = sprt;
522 			}
523 		} else {
524 			if (ipv6_chk_addr(net, saddr, dev,
525 					  flags & RT6_LOOKUP_F_IFACE))
526 				return sprt;
527 		}
528 	}
529 
530 	if (oif) {
531 		if (local)
532 			return local;
533 
534 		if (flags & RT6_LOOKUP_F_IFACE)
535 			return net->ipv6.ip6_null_entry;
536 	}
537 out:
538 	return rt;
539 }
540 
541 #ifdef CONFIG_IPV6_ROUTER_PREF
542 struct __rt6_probe_work {
543 	struct work_struct work;
544 	struct in6_addr target;
545 	struct net_device *dev;
546 };
547 
548 static void rt6_probe_deferred(struct work_struct *w)
549 {
550 	struct in6_addr mcaddr;
551 	struct __rt6_probe_work *work =
552 		container_of(w, struct __rt6_probe_work, work);
553 
554 	addrconf_addr_solict_mult(&work->target, &mcaddr);
555 	ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
556 	dev_put(work->dev);
557 	kfree(work);
558 }
559 
560 static void rt6_probe(struct rt6_info *rt)
561 {
562 	struct __rt6_probe_work *work;
563 	struct neighbour *neigh;
564 	/*
565 	 * Okay, this does not seem to be appropriate
566 	 * for now, however, we need to check if it
567 	 * is really so; aka Router Reachability Probing.
568 	 *
569 	 * Router Reachability Probe MUST be rate-limited
570 	 * to no more than one per minute.
571 	 */
572 	if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
573 		return;
574 	rcu_read_lock_bh();
575 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
576 	if (neigh) {
577 		if (neigh->nud_state & NUD_VALID)
578 			goto out;
579 
580 		work = NULL;
581 		write_lock(&neigh->lock);
582 		if (!(neigh->nud_state & NUD_VALID) &&
583 		    time_after(jiffies,
584 			       neigh->updated +
585 			       rt->rt6i_idev->cnf.rtr_probe_interval)) {
586 			work = kmalloc(sizeof(*work), GFP_ATOMIC);
587 			if (work)
588 				__neigh_set_probe_once(neigh);
589 		}
590 		write_unlock(&neigh->lock);
591 	} else {
592 		work = kmalloc(sizeof(*work), GFP_ATOMIC);
593 	}
594 
595 	if (work) {
596 		INIT_WORK(&work->work, rt6_probe_deferred);
597 		work->target = rt->rt6i_gateway;
598 		dev_hold(rt->dst.dev);
599 		work->dev = rt->dst.dev;
600 		schedule_work(&work->work);
601 	}
602 
603 out:
604 	rcu_read_unlock_bh();
605 }
606 #else
607 static inline void rt6_probe(struct rt6_info *rt)
608 {
609 }
610 #endif
611 
612 /*
613  * Default Router Selection (RFC 2461 6.3.6)
614  */
615 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
616 {
617 	struct net_device *dev = rt->dst.dev;
618 	if (!oif || dev->ifindex == oif)
619 		return 2;
620 	if ((dev->flags & IFF_LOOPBACK) &&
621 	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
622 		return 1;
623 	return 0;
624 }
625 
626 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
627 {
628 	struct neighbour *neigh;
629 	enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
630 
631 	if (rt->rt6i_flags & RTF_NONEXTHOP ||
632 	    !(rt->rt6i_flags & RTF_GATEWAY))
633 		return RT6_NUD_SUCCEED;
634 
635 	rcu_read_lock_bh();
636 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
637 	if (neigh) {
638 		read_lock(&neigh->lock);
639 		if (neigh->nud_state & NUD_VALID)
640 			ret = RT6_NUD_SUCCEED;
641 #ifdef CONFIG_IPV6_ROUTER_PREF
642 		else if (!(neigh->nud_state & NUD_FAILED))
643 			ret = RT6_NUD_SUCCEED;
644 		else
645 			ret = RT6_NUD_FAIL_PROBE;
646 #endif
647 		read_unlock(&neigh->lock);
648 	} else {
649 		ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
650 		      RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
651 	}
652 	rcu_read_unlock_bh();
653 
654 	return ret;
655 }
656 
657 static int rt6_score_route(struct rt6_info *rt, int oif,
658 			   int strict)
659 {
660 	int m;
661 
662 	m = rt6_check_dev(rt, oif);
663 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
664 		return RT6_NUD_FAIL_HARD;
665 #ifdef CONFIG_IPV6_ROUTER_PREF
666 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
667 #endif
668 	if (strict & RT6_LOOKUP_F_REACHABLE) {
669 		int n = rt6_check_neigh(rt);
670 		if (n < 0)
671 			return n;
672 	}
673 	return m;
674 }
675 
676 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
677 				   int *mpri, struct rt6_info *match,
678 				   bool *do_rr)
679 {
680 	int m;
681 	bool match_do_rr = false;
682 	struct inet6_dev *idev = rt->rt6i_idev;
683 	struct net_device *dev = rt->dst.dev;
684 
685 	if (dev && !netif_carrier_ok(dev) &&
686 	    idev->cnf.ignore_routes_with_linkdown &&
687 	    !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
688 		goto out;
689 
690 	if (rt6_check_expired(rt))
691 		goto out;
692 
693 	m = rt6_score_route(rt, oif, strict);
694 	if (m == RT6_NUD_FAIL_DO_RR) {
695 		match_do_rr = true;
696 		m = 0; /* lowest valid score */
697 	} else if (m == RT6_NUD_FAIL_HARD) {
698 		goto out;
699 	}
700 
701 	if (strict & RT6_LOOKUP_F_REACHABLE)
702 		rt6_probe(rt);
703 
704 	/* note that m can be RT6_NUD_FAIL_PROBE at this point */
705 	if (m > *mpri) {
706 		*do_rr = match_do_rr;
707 		*mpri = m;
708 		match = rt;
709 	}
710 out:
711 	return match;
712 }
713 
714 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
715 				     struct rt6_info *rr_head,
716 				     u32 metric, int oif, int strict,
717 				     bool *do_rr)
718 {
719 	struct rt6_info *rt, *match, *cont;
720 	int mpri = -1;
721 
722 	match = NULL;
723 	cont = NULL;
724 	for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
725 		if (rt->rt6i_metric != metric) {
726 			cont = rt;
727 			break;
728 		}
729 
730 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
731 	}
732 
733 	for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
734 		if (rt->rt6i_metric != metric) {
735 			cont = rt;
736 			break;
737 		}
738 
739 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
740 	}
741 
742 	if (match || !cont)
743 		return match;
744 
745 	for (rt = cont; rt; rt = rt->dst.rt6_next)
746 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
747 
748 	return match;
749 }
750 
751 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
752 {
753 	struct rt6_info *match, *rt0;
754 	struct net *net;
755 	bool do_rr = false;
756 
757 	rt0 = fn->rr_ptr;
758 	if (!rt0)
759 		fn->rr_ptr = rt0 = fn->leaf;
760 
761 	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
762 			     &do_rr);
763 
764 	if (do_rr) {
765 		struct rt6_info *next = rt0->dst.rt6_next;
766 
767 		/* no entries matched; do round-robin */
768 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
769 			next = fn->leaf;
770 
771 		if (next != rt0)
772 			fn->rr_ptr = next;
773 	}
774 
775 	net = dev_net(rt0->dst.dev);
776 	return match ? match : net->ipv6.ip6_null_entry;
777 }
778 
779 static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
780 {
781 	return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
782 }
783 
784 #ifdef CONFIG_IPV6_ROUTE_INFO
785 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
786 		  const struct in6_addr *gwaddr)
787 {
788 	struct net *net = dev_net(dev);
789 	struct route_info *rinfo = (struct route_info *) opt;
790 	struct in6_addr prefix_buf, *prefix;
791 	unsigned int pref;
792 	unsigned long lifetime;
793 	struct rt6_info *rt;
794 
795 	if (len < sizeof(struct route_info)) {
796 		return -EINVAL;
797 	}
798 
799 	/* Sanity check for prefix_len and length */
800 	if (rinfo->length > 3) {
801 		return -EINVAL;
802 	} else if (rinfo->prefix_len > 128) {
803 		return -EINVAL;
804 	} else if (rinfo->prefix_len > 64) {
805 		if (rinfo->length < 2) {
806 			return -EINVAL;
807 		}
808 	} else if (rinfo->prefix_len > 0) {
809 		if (rinfo->length < 1) {
810 			return -EINVAL;
811 		}
812 	}
813 
814 	pref = rinfo->route_pref;
815 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
816 		return -EINVAL;
817 
818 	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
819 
820 	if (rinfo->length == 3)
821 		prefix = (struct in6_addr *)rinfo->prefix;
822 	else {
823 		/* this function is safe */
824 		ipv6_addr_prefix(&prefix_buf,
825 				 (struct in6_addr *)rinfo->prefix,
826 				 rinfo->prefix_len);
827 		prefix = &prefix_buf;
828 	}
829 
830 	if (rinfo->prefix_len == 0)
831 		rt = rt6_get_dflt_router(gwaddr, dev);
832 	else
833 		rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
834 					gwaddr, dev);
835 
836 	if (rt && !lifetime) {
837 		ip6_del_rt(rt);
838 		rt = NULL;
839 	}
840 
841 	if (!rt && lifetime)
842 		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
843 					dev, pref);
844 	else if (rt)
845 		rt->rt6i_flags = RTF_ROUTEINFO |
846 				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
847 
848 	if (rt) {
849 		if (!addrconf_finite_timeout(lifetime))
850 			rt6_clean_expires(rt);
851 		else
852 			rt6_set_expires(rt, jiffies + HZ * lifetime);
853 
854 		ip6_rt_put(rt);
855 	}
856 	return 0;
857 }
858 #endif
859 
860 static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
861 					struct in6_addr *saddr)
862 {
863 	struct fib6_node *pn;
864 	while (1) {
865 		if (fn->fn_flags & RTN_TL_ROOT)
866 			return NULL;
867 		pn = fn->parent;
868 		if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
869 			fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
870 		else
871 			fn = pn;
872 		if (fn->fn_flags & RTN_RTINFO)
873 			return fn;
874 	}
875 }
876 
877 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
878 					     struct fib6_table *table,
879 					     struct flowi6 *fl6, int flags)
880 {
881 	struct fib6_node *fn;
882 	struct rt6_info *rt;
883 
884 	read_lock_bh(&table->tb6_lock);
885 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
886 restart:
887 	rt = fn->leaf;
888 	rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
889 	if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
890 		rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
891 	if (rt == net->ipv6.ip6_null_entry) {
892 		fn = fib6_backtrack(fn, &fl6->saddr);
893 		if (fn)
894 			goto restart;
895 	}
896 	dst_use(&rt->dst, jiffies);
897 	read_unlock_bh(&table->tb6_lock);
898 
899 	trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
900 
901 	return rt;
902 
903 }
904 
905 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
906 				    int flags)
907 {
908 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
909 }
910 EXPORT_SYMBOL_GPL(ip6_route_lookup);
911 
912 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
913 			    const struct in6_addr *saddr, int oif, int strict)
914 {
915 	struct flowi6 fl6 = {
916 		.flowi6_oif = oif,
917 		.daddr = *daddr,
918 	};
919 	struct dst_entry *dst;
920 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
921 
922 	if (saddr) {
923 		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
924 		flags |= RT6_LOOKUP_F_HAS_SADDR;
925 	}
926 
927 	dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
928 	if (dst->error == 0)
929 		return (struct rt6_info *) dst;
930 
931 	dst_release(dst);
932 
933 	return NULL;
934 }
935 EXPORT_SYMBOL(rt6_lookup);
936 
937 /* ip6_ins_rt is called with FREE table->tb6_lock.
938  * It takes new route entry, the addition fails by any reason the
939  * route is released.
940  * Caller must hold dst before calling it.
941  */
942 
943 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
944 			struct mx6_config *mxc,
945 			struct netlink_ext_ack *extack)
946 {
947 	int err;
948 	struct fib6_table *table;
949 
950 	table = rt->rt6i_table;
951 	write_lock_bh(&table->tb6_lock);
952 	err = fib6_add(&table->tb6_root, rt, info, mxc, extack);
953 	write_unlock_bh(&table->tb6_lock);
954 
955 	return err;
956 }
957 
958 int ip6_ins_rt(struct rt6_info *rt)
959 {
960 	struct nl_info info = {	.nl_net = dev_net(rt->dst.dev), };
961 	struct mx6_config mxc = { .mx = NULL, };
962 
963 	/* Hold dst to account for the reference from the fib6 tree */
964 	dst_hold(&rt->dst);
965 	return __ip6_ins_rt(rt, &info, &mxc, NULL);
966 }
967 
968 /* called with rcu_lock held */
969 static struct net_device *ip6_rt_get_dev_rcu(struct rt6_info *rt)
970 {
971 	struct net_device *dev = rt->dst.dev;
972 
973 	if (rt->rt6i_flags & RTF_LOCAL) {
974 		/* for copies of local routes, dst->dev needs to be the
975 		 * device if it is a master device, the master device if
976 		 * device is enslaved, and the loopback as the default
977 		 */
978 		if (netif_is_l3_slave(dev) &&
979 		    !rt6_need_strict(&rt->rt6i_dst.addr))
980 			dev = l3mdev_master_dev_rcu(dev);
981 		else if (!netif_is_l3_master(dev))
982 			dev = dev_net(dev)->loopback_dev;
983 		/* last case is netif_is_l3_master(dev) is true in which
984 		 * case we want dev returned to be dev
985 		 */
986 	}
987 
988 	return dev;
989 }
990 
991 static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
992 					   const struct in6_addr *daddr,
993 					   const struct in6_addr *saddr)
994 {
995 	struct net_device *dev;
996 	struct rt6_info *rt;
997 
998 	/*
999 	 *	Clone the route.
1000 	 */
1001 
1002 	if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
1003 		ort = (struct rt6_info *)ort->dst.from;
1004 
1005 	rcu_read_lock();
1006 	dev = ip6_rt_get_dev_rcu(ort);
1007 	rt = __ip6_dst_alloc(dev_net(dev), dev, 0);
1008 	rcu_read_unlock();
1009 	if (!rt)
1010 		return NULL;
1011 
1012 	ip6_rt_copy_init(rt, ort);
1013 	rt->rt6i_flags |= RTF_CACHE;
1014 	rt->rt6i_metric = 0;
1015 	rt->dst.flags |= DST_HOST;
1016 	rt->rt6i_dst.addr = *daddr;
1017 	rt->rt6i_dst.plen = 128;
1018 
1019 	if (!rt6_is_gw_or_nonexthop(ort)) {
1020 		if (ort->rt6i_dst.plen != 128 &&
1021 		    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
1022 			rt->rt6i_flags |= RTF_ANYCAST;
1023 #ifdef CONFIG_IPV6_SUBTREES
1024 		if (rt->rt6i_src.plen && saddr) {
1025 			rt->rt6i_src.addr = *saddr;
1026 			rt->rt6i_src.plen = 128;
1027 		}
1028 #endif
1029 	}
1030 
1031 	return rt;
1032 }
1033 
1034 static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
1035 {
1036 	struct net_device *dev;
1037 	struct rt6_info *pcpu_rt;
1038 
1039 	rcu_read_lock();
1040 	dev = ip6_rt_get_dev_rcu(rt);
1041 	pcpu_rt = __ip6_dst_alloc(dev_net(dev), dev, rt->dst.flags);
1042 	rcu_read_unlock();
1043 	if (!pcpu_rt)
1044 		return NULL;
1045 	ip6_rt_copy_init(pcpu_rt, rt);
1046 	pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
1047 	pcpu_rt->rt6i_flags |= RTF_PCPU;
1048 	return pcpu_rt;
1049 }
1050 
1051 /* It should be called with read_lock_bh(&tb6_lock) acquired */
1052 static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
1053 {
1054 	struct rt6_info *pcpu_rt, **p;
1055 
1056 	p = this_cpu_ptr(rt->rt6i_pcpu);
1057 	pcpu_rt = *p;
1058 
1059 	if (pcpu_rt) {
1060 		dst_hold(&pcpu_rt->dst);
1061 		rt6_dst_from_metrics_check(pcpu_rt);
1062 	}
1063 	return pcpu_rt;
1064 }
1065 
1066 static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
1067 {
1068 	struct fib6_table *table = rt->rt6i_table;
1069 	struct rt6_info *pcpu_rt, *prev, **p;
1070 
1071 	pcpu_rt = ip6_rt_pcpu_alloc(rt);
1072 	if (!pcpu_rt) {
1073 		struct net *net = dev_net(rt->dst.dev);
1074 
1075 		dst_hold(&net->ipv6.ip6_null_entry->dst);
1076 		return net->ipv6.ip6_null_entry;
1077 	}
1078 
1079 	read_lock_bh(&table->tb6_lock);
1080 	if (rt->rt6i_pcpu) {
1081 		p = this_cpu_ptr(rt->rt6i_pcpu);
1082 		prev = cmpxchg(p, NULL, pcpu_rt);
1083 		if (prev) {
1084 			/* If someone did it before us, return prev instead */
1085 			dst_release_immediate(&pcpu_rt->dst);
1086 			pcpu_rt = prev;
1087 		}
1088 	} else {
1089 		/* rt has been removed from the fib6 tree
1090 		 * before we have a chance to acquire the read_lock.
1091 		 * In this case, don't brother to create a pcpu rt
1092 		 * since rt is going away anyway.  The next
1093 		 * dst_check() will trigger a re-lookup.
1094 		 */
1095 		dst_release_immediate(&pcpu_rt->dst);
1096 		pcpu_rt = rt;
1097 	}
1098 	dst_hold(&pcpu_rt->dst);
1099 	rt6_dst_from_metrics_check(pcpu_rt);
1100 	read_unlock_bh(&table->tb6_lock);
1101 	return pcpu_rt;
1102 }
1103 
1104 /* exception hash table implementation
1105  */
1106 static DEFINE_SPINLOCK(rt6_exception_lock);
1107 
1108 /* Remove rt6_ex from hash table and free the memory
1109  * Caller must hold rt6_exception_lock
1110  */
1111 static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
1112 				 struct rt6_exception *rt6_ex)
1113 {
1114 	if (!bucket || !rt6_ex)
1115 		return;
1116 	rt6_ex->rt6i->rt6i_node = NULL;
1117 	hlist_del_rcu(&rt6_ex->hlist);
1118 	rt6_release(rt6_ex->rt6i);
1119 	kfree_rcu(rt6_ex, rcu);
1120 	WARN_ON_ONCE(!bucket->depth);
1121 	bucket->depth--;
1122 }
1123 
1124 /* Remove oldest rt6_ex in bucket and free the memory
1125  * Caller must hold rt6_exception_lock
1126  */
1127 static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
1128 {
1129 	struct rt6_exception *rt6_ex, *oldest = NULL;
1130 
1131 	if (!bucket)
1132 		return;
1133 
1134 	hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1135 		if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
1136 			oldest = rt6_ex;
1137 	}
1138 	rt6_remove_exception(bucket, oldest);
1139 }
1140 
1141 static u32 rt6_exception_hash(const struct in6_addr *dst,
1142 			      const struct in6_addr *src)
1143 {
1144 	static u32 seed __read_mostly;
1145 	u32 val;
1146 
1147 	net_get_random_once(&seed, sizeof(seed));
1148 	val = jhash(dst, sizeof(*dst), seed);
1149 
1150 #ifdef CONFIG_IPV6_SUBTREES
1151 	if (src)
1152 		val = jhash(src, sizeof(*src), val);
1153 #endif
1154 	return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
1155 }
1156 
1157 /* Helper function to find the cached rt in the hash table
1158  * and update bucket pointer to point to the bucket for this
1159  * (daddr, saddr) pair
1160  * Caller must hold rt6_exception_lock
1161  */
1162 static struct rt6_exception *
1163 __rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
1164 			      const struct in6_addr *daddr,
1165 			      const struct in6_addr *saddr)
1166 {
1167 	struct rt6_exception *rt6_ex;
1168 	u32 hval;
1169 
1170 	if (!(*bucket) || !daddr)
1171 		return NULL;
1172 
1173 	hval = rt6_exception_hash(daddr, saddr);
1174 	*bucket += hval;
1175 
1176 	hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
1177 		struct rt6_info *rt6 = rt6_ex->rt6i;
1178 		bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1179 
1180 #ifdef CONFIG_IPV6_SUBTREES
1181 		if (matched && saddr)
1182 			matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1183 #endif
1184 		if (matched)
1185 			return rt6_ex;
1186 	}
1187 	return NULL;
1188 }
1189 
1190 /* Helper function to find the cached rt in the hash table
1191  * and update bucket pointer to point to the bucket for this
1192  * (daddr, saddr) pair
1193  * Caller must hold rcu_read_lock()
1194  */
1195 static struct rt6_exception *
1196 __rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
1197 			 const struct in6_addr *daddr,
1198 			 const struct in6_addr *saddr)
1199 {
1200 	struct rt6_exception *rt6_ex;
1201 	u32 hval;
1202 
1203 	WARN_ON_ONCE(!rcu_read_lock_held());
1204 
1205 	if (!(*bucket) || !daddr)
1206 		return NULL;
1207 
1208 	hval = rt6_exception_hash(daddr, saddr);
1209 	*bucket += hval;
1210 
1211 	hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
1212 		struct rt6_info *rt6 = rt6_ex->rt6i;
1213 		bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1214 
1215 #ifdef CONFIG_IPV6_SUBTREES
1216 		if (matched && saddr)
1217 			matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1218 #endif
1219 		if (matched)
1220 			return rt6_ex;
1221 	}
1222 	return NULL;
1223 }
1224 
1225 static int rt6_insert_exception(struct rt6_info *nrt,
1226 				struct rt6_info *ort)
1227 {
1228 	struct rt6_exception_bucket *bucket;
1229 	struct in6_addr *src_key = NULL;
1230 	struct rt6_exception *rt6_ex;
1231 	int err = 0;
1232 
1233 	/* ort can't be a cache or pcpu route */
1234 	if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
1235 		ort = (struct rt6_info *)ort->dst.from;
1236 	WARN_ON_ONCE(ort->rt6i_flags & (RTF_CACHE | RTF_PCPU));
1237 
1238 	spin_lock_bh(&rt6_exception_lock);
1239 
1240 	if (ort->exception_bucket_flushed) {
1241 		err = -EINVAL;
1242 		goto out;
1243 	}
1244 
1245 	bucket = rcu_dereference_protected(ort->rt6i_exception_bucket,
1246 					lockdep_is_held(&rt6_exception_lock));
1247 	if (!bucket) {
1248 		bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
1249 				 GFP_ATOMIC);
1250 		if (!bucket) {
1251 			err = -ENOMEM;
1252 			goto out;
1253 		}
1254 		rcu_assign_pointer(ort->rt6i_exception_bucket, bucket);
1255 	}
1256 
1257 #ifdef CONFIG_IPV6_SUBTREES
1258 	/* rt6i_src.plen != 0 indicates ort is in subtree
1259 	 * and exception table is indexed by a hash of
1260 	 * both rt6i_dst and rt6i_src.
1261 	 * Otherwise, the exception table is indexed by
1262 	 * a hash of only rt6i_dst.
1263 	 */
1264 	if (ort->rt6i_src.plen)
1265 		src_key = &nrt->rt6i_src.addr;
1266 #endif
1267 
1268 	/* Update rt6i_prefsrc as it could be changed
1269 	 * in rt6_remove_prefsrc()
1270 	 */
1271 	nrt->rt6i_prefsrc = ort->rt6i_prefsrc;
1272 	/* rt6_mtu_change() might lower mtu on ort.
1273 	 * Only insert this exception route if its mtu
1274 	 * is less than ort's mtu value.
1275 	 */
1276 	if (nrt->rt6i_pmtu >= dst_mtu(&ort->dst)) {
1277 		err = -EINVAL;
1278 		goto out;
1279 	}
1280 
1281 	rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
1282 					       src_key);
1283 	if (rt6_ex)
1284 		rt6_remove_exception(bucket, rt6_ex);
1285 
1286 	rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
1287 	if (!rt6_ex) {
1288 		err = -ENOMEM;
1289 		goto out;
1290 	}
1291 	rt6_ex->rt6i = nrt;
1292 	rt6_ex->stamp = jiffies;
1293 	atomic_inc(&nrt->rt6i_ref);
1294 	nrt->rt6i_node = ort->rt6i_node;
1295 	hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
1296 	bucket->depth++;
1297 
1298 	if (bucket->depth > FIB6_MAX_DEPTH)
1299 		rt6_exception_remove_oldest(bucket);
1300 
1301 out:
1302 	spin_unlock_bh(&rt6_exception_lock);
1303 
1304 	/* Update fn->fn_sernum to invalidate all cached dst */
1305 	if (!err)
1306 		fib6_update_sernum(ort);
1307 
1308 	return err;
1309 }
1310 
1311 void rt6_flush_exceptions(struct rt6_info *rt)
1312 {
1313 	struct rt6_exception_bucket *bucket;
1314 	struct rt6_exception *rt6_ex;
1315 	struct hlist_node *tmp;
1316 	int i;
1317 
1318 	spin_lock_bh(&rt6_exception_lock);
1319 	/* Prevent rt6_insert_exception() to recreate the bucket list */
1320 	rt->exception_bucket_flushed = 1;
1321 
1322 	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1323 				    lockdep_is_held(&rt6_exception_lock));
1324 	if (!bucket)
1325 		goto out;
1326 
1327 	for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1328 		hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist)
1329 			rt6_remove_exception(bucket, rt6_ex);
1330 		WARN_ON_ONCE(bucket->depth);
1331 		bucket++;
1332 	}
1333 
1334 out:
1335 	spin_unlock_bh(&rt6_exception_lock);
1336 }
1337 
1338 /* Find cached rt in the hash table inside passed in rt
1339  * Caller has to hold rcu_read_lock()
1340  */
1341 static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
1342 					   struct in6_addr *daddr,
1343 					   struct in6_addr *saddr)
1344 {
1345 	struct rt6_exception_bucket *bucket;
1346 	struct in6_addr *src_key = NULL;
1347 	struct rt6_exception *rt6_ex;
1348 	struct rt6_info *res = NULL;
1349 
1350 	bucket = rcu_dereference(rt->rt6i_exception_bucket);
1351 
1352 #ifdef CONFIG_IPV6_SUBTREES
1353 	/* rt6i_src.plen != 0 indicates rt is in subtree
1354 	 * and exception table is indexed by a hash of
1355 	 * both rt6i_dst and rt6i_src.
1356 	 * Otherwise, the exception table is indexed by
1357 	 * a hash of only rt6i_dst.
1358 	 */
1359 	if (rt->rt6i_src.plen)
1360 		src_key = saddr;
1361 #endif
1362 	rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
1363 
1364 	if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
1365 		res = rt6_ex->rt6i;
1366 
1367 	return res;
1368 }
1369 
1370 /* Remove the passed in cached rt from the hash table that contains it */
1371 int rt6_remove_exception_rt(struct rt6_info *rt)
1372 {
1373 	struct rt6_info *from = (struct rt6_info *)rt->dst.from;
1374 	struct rt6_exception_bucket *bucket;
1375 	struct in6_addr *src_key = NULL;
1376 	struct rt6_exception *rt6_ex;
1377 	int err;
1378 
1379 	if (!from ||
1380 	    !(rt->rt6i_flags | RTF_CACHE))
1381 		return -EINVAL;
1382 
1383 	if (!rcu_access_pointer(from->rt6i_exception_bucket))
1384 		return -ENOENT;
1385 
1386 	spin_lock_bh(&rt6_exception_lock);
1387 	bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
1388 				    lockdep_is_held(&rt6_exception_lock));
1389 #ifdef CONFIG_IPV6_SUBTREES
1390 	/* rt6i_src.plen != 0 indicates 'from' is in subtree
1391 	 * and exception table is indexed by a hash of
1392 	 * both rt6i_dst and rt6i_src.
1393 	 * Otherwise, the exception table is indexed by
1394 	 * a hash of only rt6i_dst.
1395 	 */
1396 	if (from->rt6i_src.plen)
1397 		src_key = &rt->rt6i_src.addr;
1398 #endif
1399 	rt6_ex = __rt6_find_exception_spinlock(&bucket,
1400 					       &rt->rt6i_dst.addr,
1401 					       src_key);
1402 	if (rt6_ex) {
1403 		rt6_remove_exception(bucket, rt6_ex);
1404 		err = 0;
1405 	} else {
1406 		err = -ENOENT;
1407 	}
1408 
1409 	spin_unlock_bh(&rt6_exception_lock);
1410 	return err;
1411 }
1412 
1413 /* Find rt6_ex which contains the passed in rt cache and
1414  * refresh its stamp
1415  */
1416 static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
1417 {
1418 	struct rt6_info *from = (struct rt6_info *)rt->dst.from;
1419 	struct rt6_exception_bucket *bucket;
1420 	struct in6_addr *src_key = NULL;
1421 	struct rt6_exception *rt6_ex;
1422 
1423 	if (!from ||
1424 	    !(rt->rt6i_flags | RTF_CACHE))
1425 		return;
1426 
1427 	rcu_read_lock();
1428 	bucket = rcu_dereference(from->rt6i_exception_bucket);
1429 
1430 #ifdef CONFIG_IPV6_SUBTREES
1431 	/* rt6i_src.plen != 0 indicates 'from' is in subtree
1432 	 * and exception table is indexed by a hash of
1433 	 * both rt6i_dst and rt6i_src.
1434 	 * Otherwise, the exception table is indexed by
1435 	 * a hash of only rt6i_dst.
1436 	 */
1437 	if (from->rt6i_src.plen)
1438 		src_key = &rt->rt6i_src.addr;
1439 #endif
1440 	rt6_ex = __rt6_find_exception_rcu(&bucket,
1441 					  &rt->rt6i_dst.addr,
1442 					  src_key);
1443 	if (rt6_ex)
1444 		rt6_ex->stamp = jiffies;
1445 
1446 	rcu_read_unlock();
1447 }
1448 
1449 static void rt6_exceptions_remove_prefsrc(struct rt6_info *rt)
1450 {
1451 	struct rt6_exception_bucket *bucket;
1452 	struct rt6_exception *rt6_ex;
1453 	int i;
1454 
1455 	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1456 					lockdep_is_held(&rt6_exception_lock));
1457 
1458 	if (bucket) {
1459 		for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1460 			hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1461 				rt6_ex->rt6i->rt6i_prefsrc.plen = 0;
1462 			}
1463 			bucket++;
1464 		}
1465 	}
1466 }
1467 
1468 static void rt6_exceptions_update_pmtu(struct rt6_info *rt, int mtu)
1469 {
1470 	struct rt6_exception_bucket *bucket;
1471 	struct rt6_exception *rt6_ex;
1472 	int i;
1473 
1474 	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1475 					lockdep_is_held(&rt6_exception_lock));
1476 
1477 	if (bucket) {
1478 		for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1479 			hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1480 				struct rt6_info *entry = rt6_ex->rt6i;
1481 				/* For RTF_CACHE with rt6i_pmtu == 0
1482 				 * (i.e. a redirected route),
1483 				 * the metrics of its rt->dst.from has already
1484 				 * been updated.
1485 				 */
1486 				if (entry->rt6i_pmtu && entry->rt6i_pmtu > mtu)
1487 					entry->rt6i_pmtu = mtu;
1488 			}
1489 			bucket++;
1490 		}
1491 	}
1492 }
1493 
1494 #define RTF_CACHE_GATEWAY	(RTF_GATEWAY | RTF_CACHE)
1495 
1496 static void rt6_exceptions_clean_tohost(struct rt6_info *rt,
1497 					struct in6_addr *gateway)
1498 {
1499 	struct rt6_exception_bucket *bucket;
1500 	struct rt6_exception *rt6_ex;
1501 	struct hlist_node *tmp;
1502 	int i;
1503 
1504 	if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1505 		return;
1506 
1507 	spin_lock_bh(&rt6_exception_lock);
1508 	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1509 				     lockdep_is_held(&rt6_exception_lock));
1510 
1511 	if (bucket) {
1512 		for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1513 			hlist_for_each_entry_safe(rt6_ex, tmp,
1514 						  &bucket->chain, hlist) {
1515 				struct rt6_info *entry = rt6_ex->rt6i;
1516 
1517 				if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
1518 				    RTF_CACHE_GATEWAY &&
1519 				    ipv6_addr_equal(gateway,
1520 						    &entry->rt6i_gateway)) {
1521 					rt6_remove_exception(bucket, rt6_ex);
1522 				}
1523 			}
1524 			bucket++;
1525 		}
1526 	}
1527 
1528 	spin_unlock_bh(&rt6_exception_lock);
1529 }
1530 
1531 static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
1532 				      struct rt6_exception *rt6_ex,
1533 				      struct fib6_gc_args *gc_args,
1534 				      unsigned long now)
1535 {
1536 	struct rt6_info *rt = rt6_ex->rt6i;
1537 
1538 	if (atomic_read(&rt->dst.__refcnt) == 1 &&
1539 	    time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
1540 		RT6_TRACE("aging clone %p\n", rt);
1541 		rt6_remove_exception(bucket, rt6_ex);
1542 		return;
1543 	} else if (rt->rt6i_flags & RTF_GATEWAY) {
1544 		struct neighbour *neigh;
1545 		__u8 neigh_flags = 0;
1546 
1547 		neigh = dst_neigh_lookup(&rt->dst, &rt->rt6i_gateway);
1548 		if (neigh) {
1549 			neigh_flags = neigh->flags;
1550 			neigh_release(neigh);
1551 		}
1552 		if (!(neigh_flags & NTF_ROUTER)) {
1553 			RT6_TRACE("purging route %p via non-router but gateway\n",
1554 				  rt);
1555 			rt6_remove_exception(bucket, rt6_ex);
1556 			return;
1557 		}
1558 	}
1559 	gc_args->more++;
1560 }
1561 
1562 void rt6_age_exceptions(struct rt6_info *rt,
1563 			struct fib6_gc_args *gc_args,
1564 			unsigned long now)
1565 {
1566 	struct rt6_exception_bucket *bucket;
1567 	struct rt6_exception *rt6_ex;
1568 	struct hlist_node *tmp;
1569 	int i;
1570 
1571 	if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1572 		return;
1573 
1574 	spin_lock_bh(&rt6_exception_lock);
1575 	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1576 				    lockdep_is_held(&rt6_exception_lock));
1577 
1578 	if (bucket) {
1579 		for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1580 			hlist_for_each_entry_safe(rt6_ex, tmp,
1581 						  &bucket->chain, hlist) {
1582 				rt6_age_examine_exception(bucket, rt6_ex,
1583 							  gc_args, now);
1584 			}
1585 			bucket++;
1586 		}
1587 	}
1588 	spin_unlock_bh(&rt6_exception_lock);
1589 }
1590 
1591 struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1592 			       int oif, struct flowi6 *fl6, int flags)
1593 {
1594 	struct fib6_node *fn, *saved_fn;
1595 	struct rt6_info *rt;
1596 	int strict = 0;
1597 
1598 	strict |= flags & RT6_LOOKUP_F_IFACE;
1599 	strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
1600 	if (net->ipv6.devconf_all->forwarding == 0)
1601 		strict |= RT6_LOOKUP_F_REACHABLE;
1602 
1603 	read_lock_bh(&table->tb6_lock);
1604 
1605 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1606 	saved_fn = fn;
1607 
1608 	if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1609 		oif = 0;
1610 
1611 redo_rt6_select:
1612 	rt = rt6_select(fn, oif, strict);
1613 	if (rt->rt6i_nsiblings)
1614 		rt = rt6_multipath_select(rt, fl6, oif, strict);
1615 	if (rt == net->ipv6.ip6_null_entry) {
1616 		fn = fib6_backtrack(fn, &fl6->saddr);
1617 		if (fn)
1618 			goto redo_rt6_select;
1619 		else if (strict & RT6_LOOKUP_F_REACHABLE) {
1620 			/* also consider unreachable route */
1621 			strict &= ~RT6_LOOKUP_F_REACHABLE;
1622 			fn = saved_fn;
1623 			goto redo_rt6_select;
1624 		}
1625 	}
1626 
1627 
1628 	if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
1629 		dst_use(&rt->dst, jiffies);
1630 		read_unlock_bh(&table->tb6_lock);
1631 
1632 		rt6_dst_from_metrics_check(rt);
1633 
1634 		trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
1635 		return rt;
1636 	} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1637 			    !(rt->rt6i_flags & RTF_GATEWAY))) {
1638 		/* Create a RTF_CACHE clone which will not be
1639 		 * owned by the fib6 tree.  It is for the special case where
1640 		 * the daddr in the skb during the neighbor look-up is different
1641 		 * from the fl6->daddr used to look-up route here.
1642 		 */
1643 
1644 		struct rt6_info *uncached_rt;
1645 
1646 		dst_use(&rt->dst, jiffies);
1647 		read_unlock_bh(&table->tb6_lock);
1648 
1649 		uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1650 		dst_release(&rt->dst);
1651 
1652 		if (uncached_rt) {
1653 			/* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
1654 			 * No need for another dst_hold()
1655 			 */
1656 			rt6_uncached_list_add(uncached_rt);
1657 		} else {
1658 			uncached_rt = net->ipv6.ip6_null_entry;
1659 			dst_hold(&uncached_rt->dst);
1660 		}
1661 
1662 		trace_fib6_table_lookup(net, uncached_rt, table->tb6_id, fl6);
1663 		return uncached_rt;
1664 
1665 	} else {
1666 		/* Get a percpu copy */
1667 
1668 		struct rt6_info *pcpu_rt;
1669 
1670 		rt->dst.lastuse = jiffies;
1671 		rt->dst.__use++;
1672 		pcpu_rt = rt6_get_pcpu_route(rt);
1673 
1674 		if (pcpu_rt) {
1675 			read_unlock_bh(&table->tb6_lock);
1676 		} else {
1677 			/* We have to do the read_unlock first
1678 			 * because rt6_make_pcpu_route() may trigger
1679 			 * ip6_dst_gc() which will take the write_lock.
1680 			 */
1681 			dst_hold(&rt->dst);
1682 			read_unlock_bh(&table->tb6_lock);
1683 			pcpu_rt = rt6_make_pcpu_route(rt);
1684 			dst_release(&rt->dst);
1685 		}
1686 
1687 		trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6);
1688 		return pcpu_rt;
1689 
1690 	}
1691 }
1692 EXPORT_SYMBOL_GPL(ip6_pol_route);
1693 
1694 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
1695 					    struct flowi6 *fl6, int flags)
1696 {
1697 	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
1698 }
1699 
1700 struct dst_entry *ip6_route_input_lookup(struct net *net,
1701 					 struct net_device *dev,
1702 					 struct flowi6 *fl6, int flags)
1703 {
1704 	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1705 		flags |= RT6_LOOKUP_F_IFACE;
1706 
1707 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1708 }
1709 EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
1710 
1711 static void ip6_multipath_l3_keys(const struct sk_buff *skb,
1712 				  struct flow_keys *keys)
1713 {
1714 	const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
1715 	const struct ipv6hdr *key_iph = outer_iph;
1716 	const struct ipv6hdr *inner_iph;
1717 	const struct icmp6hdr *icmph;
1718 	struct ipv6hdr _inner_iph;
1719 
1720 	if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
1721 		goto out;
1722 
1723 	icmph = icmp6_hdr(skb);
1724 	if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
1725 	    icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
1726 	    icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
1727 	    icmph->icmp6_type != ICMPV6_PARAMPROB)
1728 		goto out;
1729 
1730 	inner_iph = skb_header_pointer(skb,
1731 				       skb_transport_offset(skb) + sizeof(*icmph),
1732 				       sizeof(_inner_iph), &_inner_iph);
1733 	if (!inner_iph)
1734 		goto out;
1735 
1736 	key_iph = inner_iph;
1737 out:
1738 	memset(keys, 0, sizeof(*keys));
1739 	keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1740 	keys->addrs.v6addrs.src = key_iph->saddr;
1741 	keys->addrs.v6addrs.dst = key_iph->daddr;
1742 	keys->tags.flow_label = ip6_flowinfo(key_iph);
1743 	keys->basic.ip_proto = key_iph->nexthdr;
1744 }
1745 
1746 /* if skb is set it will be used and fl6 can be NULL */
1747 u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb)
1748 {
1749 	struct flow_keys hash_keys;
1750 
1751 	if (skb) {
1752 		ip6_multipath_l3_keys(skb, &hash_keys);
1753 		return flow_hash_from_keys(&hash_keys);
1754 	}
1755 
1756 	return get_hash_from_flowi6(fl6);
1757 }
1758 
1759 void ip6_route_input(struct sk_buff *skb)
1760 {
1761 	const struct ipv6hdr *iph = ipv6_hdr(skb);
1762 	struct net *net = dev_net(skb->dev);
1763 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1764 	struct ip_tunnel_info *tun_info;
1765 	struct flowi6 fl6 = {
1766 		.flowi6_iif = skb->dev->ifindex,
1767 		.daddr = iph->daddr,
1768 		.saddr = iph->saddr,
1769 		.flowlabel = ip6_flowinfo(iph),
1770 		.flowi6_mark = skb->mark,
1771 		.flowi6_proto = iph->nexthdr,
1772 	};
1773 
1774 	tun_info = skb_tunnel_info(skb);
1775 	if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
1776 		fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
1777 	if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
1778 		fl6.mp_hash = rt6_multipath_hash(&fl6, skb);
1779 	skb_dst_drop(skb);
1780 	skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1781 }
1782 
1783 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1784 					     struct flowi6 *fl6, int flags)
1785 {
1786 	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1787 }
1788 
1789 struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
1790 					 struct flowi6 *fl6, int flags)
1791 {
1792 	bool any_src;
1793 
1794 	if (rt6_need_strict(&fl6->daddr)) {
1795 		struct dst_entry *dst;
1796 
1797 		dst = l3mdev_link_scope_lookup(net, fl6);
1798 		if (dst)
1799 			return dst;
1800 	}
1801 
1802 	fl6->flowi6_iif = LOOPBACK_IFINDEX;
1803 
1804 	any_src = ipv6_addr_any(&fl6->saddr);
1805 	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
1806 	    (fl6->flowi6_oif && any_src))
1807 		flags |= RT6_LOOKUP_F_IFACE;
1808 
1809 	if (!any_src)
1810 		flags |= RT6_LOOKUP_F_HAS_SADDR;
1811 	else if (sk)
1812 		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1813 
1814 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1815 }
1816 EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1817 
1818 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1819 {
1820 	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1821 	struct net_device *loopback_dev = net->loopback_dev;
1822 	struct dst_entry *new = NULL;
1823 
1824 	rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
1825 		       DST_OBSOLETE_NONE, 0);
1826 	if (rt) {
1827 		rt6_info_init(rt);
1828 
1829 		new = &rt->dst;
1830 		new->__use = 1;
1831 		new->input = dst_discard;
1832 		new->output = dst_discard_out;
1833 
1834 		dst_copy_metrics(new, &ort->dst);
1835 
1836 		rt->rt6i_idev = in6_dev_get(loopback_dev);
1837 		rt->rt6i_gateway = ort->rt6i_gateway;
1838 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
1839 		rt->rt6i_metric = 0;
1840 
1841 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1842 #ifdef CONFIG_IPV6_SUBTREES
1843 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1844 #endif
1845 	}
1846 
1847 	dst_release(dst_orig);
1848 	return new ? new : ERR_PTR(-ENOMEM);
1849 }
1850 
1851 /*
1852  *	Destination cache support functions
1853  */
1854 
1855 static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1856 {
1857 	if (rt->dst.from &&
1858 	    dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1859 		dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1860 }
1861 
1862 static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1863 {
1864 	u32 rt_cookie = 0;
1865 
1866 	if (!rt6_get_cookie_safe(rt, &rt_cookie) || rt_cookie != cookie)
1867 		return NULL;
1868 
1869 	if (rt6_check_expired(rt))
1870 		return NULL;
1871 
1872 	return &rt->dst;
1873 }
1874 
1875 static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1876 {
1877 	if (!__rt6_check_expired(rt) &&
1878 	    rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1879 	    rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1880 		return &rt->dst;
1881 	else
1882 		return NULL;
1883 }
1884 
1885 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1886 {
1887 	struct rt6_info *rt;
1888 
1889 	rt = (struct rt6_info *) dst;
1890 
1891 	/* All IPV6 dsts are created with ->obsolete set to the value
1892 	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1893 	 * into this function always.
1894 	 */
1895 
1896 	rt6_dst_from_metrics_check(rt);
1897 
1898 	if (rt->rt6i_flags & RTF_PCPU ||
1899 	    (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->dst.from))
1900 		return rt6_dst_from_check(rt, cookie);
1901 	else
1902 		return rt6_check(rt, cookie);
1903 }
1904 
1905 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1906 {
1907 	struct rt6_info *rt = (struct rt6_info *) dst;
1908 
1909 	if (rt) {
1910 		if (rt->rt6i_flags & RTF_CACHE) {
1911 			if (rt6_check_expired(rt)) {
1912 				ip6_del_rt(rt);
1913 				dst = NULL;
1914 			}
1915 		} else {
1916 			dst_release(dst);
1917 			dst = NULL;
1918 		}
1919 	}
1920 	return dst;
1921 }
1922 
1923 static void ip6_link_failure(struct sk_buff *skb)
1924 {
1925 	struct rt6_info *rt;
1926 
1927 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1928 
1929 	rt = (struct rt6_info *) skb_dst(skb);
1930 	if (rt) {
1931 		if (rt->rt6i_flags & RTF_CACHE) {
1932 			if (dst_hold_safe(&rt->dst))
1933 				ip6_del_rt(rt);
1934 		} else {
1935 			struct fib6_node *fn;
1936 
1937 			rcu_read_lock();
1938 			fn = rcu_dereference(rt->rt6i_node);
1939 			if (fn && (rt->rt6i_flags & RTF_DEFAULT))
1940 				fn->fn_sernum = -1;
1941 			rcu_read_unlock();
1942 		}
1943 	}
1944 }
1945 
1946 static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1947 {
1948 	struct net *net = dev_net(rt->dst.dev);
1949 
1950 	rt->rt6i_flags |= RTF_MODIFIED;
1951 	rt->rt6i_pmtu = mtu;
1952 	rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1953 }
1954 
1955 static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
1956 {
1957 	return !(rt->rt6i_flags & RTF_CACHE) &&
1958 		(rt->rt6i_flags & RTF_PCPU ||
1959 		 rcu_access_pointer(rt->rt6i_node));
1960 }
1961 
1962 static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1963 				 const struct ipv6hdr *iph, u32 mtu)
1964 {
1965 	const struct in6_addr *daddr, *saddr;
1966 	struct rt6_info *rt6 = (struct rt6_info *)dst;
1967 
1968 	if (rt6->rt6i_flags & RTF_LOCAL)
1969 		return;
1970 
1971 	if (dst_metric_locked(dst, RTAX_MTU))
1972 		return;
1973 
1974 	if (iph) {
1975 		daddr = &iph->daddr;
1976 		saddr = &iph->saddr;
1977 	} else if (sk) {
1978 		daddr = &sk->sk_v6_daddr;
1979 		saddr = &inet6_sk(sk)->saddr;
1980 	} else {
1981 		daddr = NULL;
1982 		saddr = NULL;
1983 	}
1984 	dst_confirm_neigh(dst, daddr);
1985 	mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1986 	if (mtu >= dst_mtu(dst))
1987 		return;
1988 
1989 	if (!rt6_cache_allowed_for_pmtu(rt6)) {
1990 		rt6_do_update_pmtu(rt6, mtu);
1991 	} else if (daddr) {
1992 		struct rt6_info *nrt6;
1993 
1994 		nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1995 		if (nrt6) {
1996 			rt6_do_update_pmtu(nrt6, mtu);
1997 
1998 			/* ip6_ins_rt(nrt6) will bump the
1999 			 * rt6->rt6i_node->fn_sernum
2000 			 * which will fail the next rt6_check() and
2001 			 * invalidate the sk->sk_dst_cache.
2002 			 */
2003 			ip6_ins_rt(nrt6);
2004 			/* Release the reference taken in
2005 			 * ip6_rt_cache_alloc()
2006 			 */
2007 			dst_release(&nrt6->dst);
2008 		}
2009 	}
2010 }
2011 
2012 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
2013 			       struct sk_buff *skb, u32 mtu)
2014 {
2015 	__ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
2016 }
2017 
2018 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
2019 		     int oif, u32 mark, kuid_t uid)
2020 {
2021 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2022 	struct dst_entry *dst;
2023 	struct flowi6 fl6;
2024 
2025 	memset(&fl6, 0, sizeof(fl6));
2026 	fl6.flowi6_oif = oif;
2027 	fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
2028 	fl6.daddr = iph->daddr;
2029 	fl6.saddr = iph->saddr;
2030 	fl6.flowlabel = ip6_flowinfo(iph);
2031 	fl6.flowi6_uid = uid;
2032 
2033 	dst = ip6_route_output(net, NULL, &fl6);
2034 	if (!dst->error)
2035 		__ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
2036 	dst_release(dst);
2037 }
2038 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
2039 
2040 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
2041 {
2042 	struct dst_entry *dst;
2043 
2044 	ip6_update_pmtu(skb, sock_net(sk), mtu,
2045 			sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid);
2046 
2047 	dst = __sk_dst_get(sk);
2048 	if (!dst || !dst->obsolete ||
2049 	    dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
2050 		return;
2051 
2052 	bh_lock_sock(sk);
2053 	if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
2054 		ip6_datagram_dst_update(sk, false);
2055 	bh_unlock_sock(sk);
2056 }
2057 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
2058 
2059 /* Handle redirects */
2060 struct ip6rd_flowi {
2061 	struct flowi6 fl6;
2062 	struct in6_addr gateway;
2063 };
2064 
2065 static struct rt6_info *__ip6_route_redirect(struct net *net,
2066 					     struct fib6_table *table,
2067 					     struct flowi6 *fl6,
2068 					     int flags)
2069 {
2070 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
2071 	struct rt6_info *rt;
2072 	struct fib6_node *fn;
2073 
2074 	/* Get the "current" route for this destination and
2075 	 * check if the redirect has come from appropriate router.
2076 	 *
2077 	 * RFC 4861 specifies that redirects should only be
2078 	 * accepted if they come from the nexthop to the target.
2079 	 * Due to the way the routes are chosen, this notion
2080 	 * is a bit fuzzy and one might need to check all possible
2081 	 * routes.
2082 	 */
2083 
2084 	read_lock_bh(&table->tb6_lock);
2085 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
2086 restart:
2087 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2088 		if (rt6_check_expired(rt))
2089 			continue;
2090 		if (rt->dst.error)
2091 			break;
2092 		if (!(rt->rt6i_flags & RTF_GATEWAY))
2093 			continue;
2094 		if (fl6->flowi6_oif != rt->dst.dev->ifindex)
2095 			continue;
2096 		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
2097 			continue;
2098 		break;
2099 	}
2100 
2101 	if (!rt)
2102 		rt = net->ipv6.ip6_null_entry;
2103 	else if (rt->dst.error) {
2104 		rt = net->ipv6.ip6_null_entry;
2105 		goto out;
2106 	}
2107 
2108 	if (rt == net->ipv6.ip6_null_entry) {
2109 		fn = fib6_backtrack(fn, &fl6->saddr);
2110 		if (fn)
2111 			goto restart;
2112 	}
2113 
2114 out:
2115 	dst_hold(&rt->dst);
2116 
2117 	read_unlock_bh(&table->tb6_lock);
2118 
2119 	trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
2120 	return rt;
2121 };
2122 
2123 static struct dst_entry *ip6_route_redirect(struct net *net,
2124 					const struct flowi6 *fl6,
2125 					const struct in6_addr *gateway)
2126 {
2127 	int flags = RT6_LOOKUP_F_HAS_SADDR;
2128 	struct ip6rd_flowi rdfl;
2129 
2130 	rdfl.fl6 = *fl6;
2131 	rdfl.gateway = *gateway;
2132 
2133 	return fib6_rule_lookup(net, &rdfl.fl6,
2134 				flags, __ip6_route_redirect);
2135 }
2136 
2137 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
2138 		  kuid_t uid)
2139 {
2140 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2141 	struct dst_entry *dst;
2142 	struct flowi6 fl6;
2143 
2144 	memset(&fl6, 0, sizeof(fl6));
2145 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
2146 	fl6.flowi6_oif = oif;
2147 	fl6.flowi6_mark = mark;
2148 	fl6.daddr = iph->daddr;
2149 	fl6.saddr = iph->saddr;
2150 	fl6.flowlabel = ip6_flowinfo(iph);
2151 	fl6.flowi6_uid = uid;
2152 
2153 	dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
2154 	rt6_do_redirect(dst, NULL, skb);
2155 	dst_release(dst);
2156 }
2157 EXPORT_SYMBOL_GPL(ip6_redirect);
2158 
2159 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
2160 			    u32 mark)
2161 {
2162 	const struct ipv6hdr *iph = ipv6_hdr(skb);
2163 	const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
2164 	struct dst_entry *dst;
2165 	struct flowi6 fl6;
2166 
2167 	memset(&fl6, 0, sizeof(fl6));
2168 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
2169 	fl6.flowi6_oif = oif;
2170 	fl6.flowi6_mark = mark;
2171 	fl6.daddr = msg->dest;
2172 	fl6.saddr = iph->daddr;
2173 	fl6.flowi6_uid = sock_net_uid(net, NULL);
2174 
2175 	dst = ip6_route_redirect(net, &fl6, &iph->saddr);
2176 	rt6_do_redirect(dst, NULL, skb);
2177 	dst_release(dst);
2178 }
2179 
2180 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
2181 {
2182 	ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
2183 		     sk->sk_uid);
2184 }
2185 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
2186 
2187 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
2188 {
2189 	struct net_device *dev = dst->dev;
2190 	unsigned int mtu = dst_mtu(dst);
2191 	struct net *net = dev_net(dev);
2192 
2193 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
2194 
2195 	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
2196 		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
2197 
2198 	/*
2199 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
2200 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
2201 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
2202 	 * rely only on pmtu discovery"
2203 	 */
2204 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
2205 		mtu = IPV6_MAXPLEN;
2206 	return mtu;
2207 }
2208 
2209 static unsigned int ip6_mtu(const struct dst_entry *dst)
2210 {
2211 	const struct rt6_info *rt = (const struct rt6_info *)dst;
2212 	unsigned int mtu = rt->rt6i_pmtu;
2213 	struct inet6_dev *idev;
2214 
2215 	if (mtu)
2216 		goto out;
2217 
2218 	mtu = dst_metric_raw(dst, RTAX_MTU);
2219 	if (mtu)
2220 		goto out;
2221 
2222 	mtu = IPV6_MIN_MTU;
2223 
2224 	rcu_read_lock();
2225 	idev = __in6_dev_get(dst->dev);
2226 	if (idev)
2227 		mtu = idev->cnf.mtu6;
2228 	rcu_read_unlock();
2229 
2230 out:
2231 	mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2232 
2233 	return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
2234 }
2235 
2236 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
2237 				  struct flowi6 *fl6)
2238 {
2239 	struct dst_entry *dst;
2240 	struct rt6_info *rt;
2241 	struct inet6_dev *idev = in6_dev_get(dev);
2242 	struct net *net = dev_net(dev);
2243 
2244 	if (unlikely(!idev))
2245 		return ERR_PTR(-ENODEV);
2246 
2247 	rt = ip6_dst_alloc(net, dev, 0);
2248 	if (unlikely(!rt)) {
2249 		in6_dev_put(idev);
2250 		dst = ERR_PTR(-ENOMEM);
2251 		goto out;
2252 	}
2253 
2254 	rt->dst.flags |= DST_HOST;
2255 	rt->dst.output  = ip6_output;
2256 	rt->rt6i_gateway  = fl6->daddr;
2257 	rt->rt6i_dst.addr = fl6->daddr;
2258 	rt->rt6i_dst.plen = 128;
2259 	rt->rt6i_idev     = idev;
2260 	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
2261 
2262 	/* Add this dst into uncached_list so that rt6_ifdown() can
2263 	 * do proper release of the net_device
2264 	 */
2265 	rt6_uncached_list_add(rt);
2266 
2267 	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
2268 
2269 out:
2270 	return dst;
2271 }
2272 
2273 static int ip6_dst_gc(struct dst_ops *ops)
2274 {
2275 	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
2276 	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
2277 	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
2278 	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
2279 	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
2280 	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
2281 	int entries;
2282 
2283 	entries = dst_entries_get_fast(ops);
2284 	if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
2285 	    entries <= rt_max_size)
2286 		goto out;
2287 
2288 	net->ipv6.ip6_rt_gc_expire++;
2289 	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
2290 	entries = dst_entries_get_slow(ops);
2291 	if (entries < ops->gc_thresh)
2292 		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
2293 out:
2294 	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
2295 	return entries > rt_max_size;
2296 }
2297 
2298 static int ip6_convert_metrics(struct mx6_config *mxc,
2299 			       const struct fib6_config *cfg)
2300 {
2301 	bool ecn_ca = false;
2302 	struct nlattr *nla;
2303 	int remaining;
2304 	u32 *mp;
2305 
2306 	if (!cfg->fc_mx)
2307 		return 0;
2308 
2309 	mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
2310 	if (unlikely(!mp))
2311 		return -ENOMEM;
2312 
2313 	nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
2314 		int type = nla_type(nla);
2315 		u32 val;
2316 
2317 		if (!type)
2318 			continue;
2319 		if (unlikely(type > RTAX_MAX))
2320 			goto err;
2321 
2322 		if (type == RTAX_CC_ALGO) {
2323 			char tmp[TCP_CA_NAME_MAX];
2324 
2325 			nla_strlcpy(tmp, nla, sizeof(tmp));
2326 			val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
2327 			if (val == TCP_CA_UNSPEC)
2328 				goto err;
2329 		} else {
2330 			val = nla_get_u32(nla);
2331 		}
2332 		if (type == RTAX_HOPLIMIT && val > 255)
2333 			val = 255;
2334 		if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
2335 			goto err;
2336 
2337 		mp[type - 1] = val;
2338 		__set_bit(type - 1, mxc->mx_valid);
2339 	}
2340 
2341 	if (ecn_ca) {
2342 		__set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
2343 		mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
2344 	}
2345 
2346 	mxc->mx = mp;
2347 	return 0;
2348  err:
2349 	kfree(mp);
2350 	return -EINVAL;
2351 }
2352 
2353 static struct rt6_info *ip6_nh_lookup_table(struct net *net,
2354 					    struct fib6_config *cfg,
2355 					    const struct in6_addr *gw_addr)
2356 {
2357 	struct flowi6 fl6 = {
2358 		.flowi6_oif = cfg->fc_ifindex,
2359 		.daddr = *gw_addr,
2360 		.saddr = cfg->fc_prefsrc,
2361 	};
2362 	struct fib6_table *table;
2363 	struct rt6_info *rt;
2364 	int flags = RT6_LOOKUP_F_IFACE | RT6_LOOKUP_F_IGNORE_LINKSTATE;
2365 
2366 	table = fib6_get_table(net, cfg->fc_table);
2367 	if (!table)
2368 		return NULL;
2369 
2370 	if (!ipv6_addr_any(&cfg->fc_prefsrc))
2371 		flags |= RT6_LOOKUP_F_HAS_SADDR;
2372 
2373 	rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags);
2374 
2375 	/* if table lookup failed, fall back to full lookup */
2376 	if (rt == net->ipv6.ip6_null_entry) {
2377 		ip6_rt_put(rt);
2378 		rt = NULL;
2379 	}
2380 
2381 	return rt;
2382 }
2383 
2384 static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
2385 					      struct netlink_ext_ack *extack)
2386 {
2387 	struct net *net = cfg->fc_nlinfo.nl_net;
2388 	struct rt6_info *rt = NULL;
2389 	struct net_device *dev = NULL;
2390 	struct inet6_dev *idev = NULL;
2391 	struct fib6_table *table;
2392 	int addr_type;
2393 	int err = -EINVAL;
2394 
2395 	/* RTF_PCPU is an internal flag; can not be set by userspace */
2396 	if (cfg->fc_flags & RTF_PCPU) {
2397 		NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
2398 		goto out;
2399 	}
2400 
2401 	if (cfg->fc_dst_len > 128) {
2402 		NL_SET_ERR_MSG(extack, "Invalid prefix length");
2403 		goto out;
2404 	}
2405 	if (cfg->fc_src_len > 128) {
2406 		NL_SET_ERR_MSG(extack, "Invalid source address length");
2407 		goto out;
2408 	}
2409 #ifndef CONFIG_IPV6_SUBTREES
2410 	if (cfg->fc_src_len) {
2411 		NL_SET_ERR_MSG(extack,
2412 			       "Specifying source address requires IPV6_SUBTREES to be enabled");
2413 		goto out;
2414 	}
2415 #endif
2416 	if (cfg->fc_ifindex) {
2417 		err = -ENODEV;
2418 		dev = dev_get_by_index(net, cfg->fc_ifindex);
2419 		if (!dev)
2420 			goto out;
2421 		idev = in6_dev_get(dev);
2422 		if (!idev)
2423 			goto out;
2424 	}
2425 
2426 	if (cfg->fc_metric == 0)
2427 		cfg->fc_metric = IP6_RT_PRIO_USER;
2428 
2429 	err = -ENOBUFS;
2430 	if (cfg->fc_nlinfo.nlh &&
2431 	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
2432 		table = fib6_get_table(net, cfg->fc_table);
2433 		if (!table) {
2434 			pr_warn("NLM_F_CREATE should be specified when creating new route\n");
2435 			table = fib6_new_table(net, cfg->fc_table);
2436 		}
2437 	} else {
2438 		table = fib6_new_table(net, cfg->fc_table);
2439 	}
2440 
2441 	if (!table)
2442 		goto out;
2443 
2444 	rt = ip6_dst_alloc(net, NULL,
2445 			   (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
2446 
2447 	if (!rt) {
2448 		err = -ENOMEM;
2449 		goto out;
2450 	}
2451 
2452 	if (cfg->fc_flags & RTF_EXPIRES)
2453 		rt6_set_expires(rt, jiffies +
2454 				clock_t_to_jiffies(cfg->fc_expires));
2455 	else
2456 		rt6_clean_expires(rt);
2457 
2458 	if (cfg->fc_protocol == RTPROT_UNSPEC)
2459 		cfg->fc_protocol = RTPROT_BOOT;
2460 	rt->rt6i_protocol = cfg->fc_protocol;
2461 
2462 	addr_type = ipv6_addr_type(&cfg->fc_dst);
2463 
2464 	if (addr_type & IPV6_ADDR_MULTICAST)
2465 		rt->dst.input = ip6_mc_input;
2466 	else if (cfg->fc_flags & RTF_LOCAL)
2467 		rt->dst.input = ip6_input;
2468 	else
2469 		rt->dst.input = ip6_forward;
2470 
2471 	rt->dst.output = ip6_output;
2472 
2473 	if (cfg->fc_encap) {
2474 		struct lwtunnel_state *lwtstate;
2475 
2476 		err = lwtunnel_build_state(cfg->fc_encap_type,
2477 					   cfg->fc_encap, AF_INET6, cfg,
2478 					   &lwtstate, extack);
2479 		if (err)
2480 			goto out;
2481 		rt->dst.lwtstate = lwtstate_get(lwtstate);
2482 		if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
2483 			rt->dst.lwtstate->orig_output = rt->dst.output;
2484 			rt->dst.output = lwtunnel_output;
2485 		}
2486 		if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
2487 			rt->dst.lwtstate->orig_input = rt->dst.input;
2488 			rt->dst.input = lwtunnel_input;
2489 		}
2490 	}
2491 
2492 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
2493 	rt->rt6i_dst.plen = cfg->fc_dst_len;
2494 	if (rt->rt6i_dst.plen == 128)
2495 		rt->dst.flags |= DST_HOST;
2496 
2497 #ifdef CONFIG_IPV6_SUBTREES
2498 	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
2499 	rt->rt6i_src.plen = cfg->fc_src_len;
2500 #endif
2501 
2502 	rt->rt6i_metric = cfg->fc_metric;
2503 
2504 	/* We cannot add true routes via loopback here,
2505 	   they would result in kernel looping; promote them to reject routes
2506 	 */
2507 	if ((cfg->fc_flags & RTF_REJECT) ||
2508 	    (dev && (dev->flags & IFF_LOOPBACK) &&
2509 	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
2510 	     !(cfg->fc_flags & RTF_LOCAL))) {
2511 		/* hold loopback dev/idev if we haven't done so. */
2512 		if (dev != net->loopback_dev) {
2513 			if (dev) {
2514 				dev_put(dev);
2515 				in6_dev_put(idev);
2516 			}
2517 			dev = net->loopback_dev;
2518 			dev_hold(dev);
2519 			idev = in6_dev_get(dev);
2520 			if (!idev) {
2521 				err = -ENODEV;
2522 				goto out;
2523 			}
2524 		}
2525 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
2526 		switch (cfg->fc_type) {
2527 		case RTN_BLACKHOLE:
2528 			rt->dst.error = -EINVAL;
2529 			rt->dst.output = dst_discard_out;
2530 			rt->dst.input = dst_discard;
2531 			break;
2532 		case RTN_PROHIBIT:
2533 			rt->dst.error = -EACCES;
2534 			rt->dst.output = ip6_pkt_prohibit_out;
2535 			rt->dst.input = ip6_pkt_prohibit;
2536 			break;
2537 		case RTN_THROW:
2538 		case RTN_UNREACHABLE:
2539 		default:
2540 			rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
2541 					: (cfg->fc_type == RTN_UNREACHABLE)
2542 					? -EHOSTUNREACH : -ENETUNREACH;
2543 			rt->dst.output = ip6_pkt_discard_out;
2544 			rt->dst.input = ip6_pkt_discard;
2545 			break;
2546 		}
2547 		goto install_route;
2548 	}
2549 
2550 	if (cfg->fc_flags & RTF_GATEWAY) {
2551 		const struct in6_addr *gw_addr;
2552 		int gwa_type;
2553 
2554 		gw_addr = &cfg->fc_gateway;
2555 		gwa_type = ipv6_addr_type(gw_addr);
2556 
2557 		/* if gw_addr is local we will fail to detect this in case
2558 		 * address is still TENTATIVE (DAD in progress). rt6_lookup()
2559 		 * will return already-added prefix route via interface that
2560 		 * prefix route was assigned to, which might be non-loopback.
2561 		 */
2562 		err = -EINVAL;
2563 		if (ipv6_chk_addr_and_flags(net, gw_addr,
2564 					    gwa_type & IPV6_ADDR_LINKLOCAL ?
2565 					    dev : NULL, 0, 0)) {
2566 			NL_SET_ERR_MSG(extack, "Invalid gateway address");
2567 			goto out;
2568 		}
2569 		rt->rt6i_gateway = *gw_addr;
2570 
2571 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
2572 			struct rt6_info *grt = NULL;
2573 
2574 			/* IPv6 strictly inhibits using not link-local
2575 			   addresses as nexthop address.
2576 			   Otherwise, router will not able to send redirects.
2577 			   It is very good, but in some (rare!) circumstances
2578 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
2579 			   some exceptions. --ANK
2580 			   We allow IPv4-mapped nexthops to support RFC4798-type
2581 			   addressing
2582 			 */
2583 			if (!(gwa_type & (IPV6_ADDR_UNICAST |
2584 					  IPV6_ADDR_MAPPED))) {
2585 				NL_SET_ERR_MSG(extack,
2586 					       "Invalid gateway address");
2587 				goto out;
2588 			}
2589 
2590 			if (cfg->fc_table) {
2591 				grt = ip6_nh_lookup_table(net, cfg, gw_addr);
2592 
2593 				if (grt) {
2594 					if (grt->rt6i_flags & RTF_GATEWAY ||
2595 					    (dev && dev != grt->dst.dev)) {
2596 						ip6_rt_put(grt);
2597 						grt = NULL;
2598 					}
2599 				}
2600 			}
2601 
2602 			if (!grt)
2603 				grt = rt6_lookup(net, gw_addr, NULL,
2604 						 cfg->fc_ifindex, 1);
2605 
2606 			err = -EHOSTUNREACH;
2607 			if (!grt)
2608 				goto out;
2609 			if (dev) {
2610 				if (dev != grt->dst.dev) {
2611 					ip6_rt_put(grt);
2612 					goto out;
2613 				}
2614 			} else {
2615 				dev = grt->dst.dev;
2616 				idev = grt->rt6i_idev;
2617 				dev_hold(dev);
2618 				in6_dev_hold(grt->rt6i_idev);
2619 			}
2620 			if (!(grt->rt6i_flags & RTF_GATEWAY))
2621 				err = 0;
2622 			ip6_rt_put(grt);
2623 
2624 			if (err)
2625 				goto out;
2626 		}
2627 		err = -EINVAL;
2628 		if (!dev) {
2629 			NL_SET_ERR_MSG(extack, "Egress device not specified");
2630 			goto out;
2631 		} else if (dev->flags & IFF_LOOPBACK) {
2632 			NL_SET_ERR_MSG(extack,
2633 				       "Egress device can not be loopback device for this route");
2634 			goto out;
2635 		}
2636 	}
2637 
2638 	err = -ENODEV;
2639 	if (!dev)
2640 		goto out;
2641 
2642 	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
2643 		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
2644 			NL_SET_ERR_MSG(extack, "Invalid source address");
2645 			err = -EINVAL;
2646 			goto out;
2647 		}
2648 		rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
2649 		rt->rt6i_prefsrc.plen = 128;
2650 	} else
2651 		rt->rt6i_prefsrc.plen = 0;
2652 
2653 	rt->rt6i_flags = cfg->fc_flags;
2654 
2655 install_route:
2656 	rt->dst.dev = dev;
2657 	rt->rt6i_idev = idev;
2658 	rt->rt6i_table = table;
2659 
2660 	cfg->fc_nlinfo.nl_net = dev_net(dev);
2661 
2662 	return rt;
2663 out:
2664 	if (dev)
2665 		dev_put(dev);
2666 	if (idev)
2667 		in6_dev_put(idev);
2668 	if (rt)
2669 		dst_release_immediate(&rt->dst);
2670 
2671 	return ERR_PTR(err);
2672 }
2673 
2674 int ip6_route_add(struct fib6_config *cfg,
2675 		  struct netlink_ext_ack *extack)
2676 {
2677 	struct mx6_config mxc = { .mx = NULL, };
2678 	struct rt6_info *rt;
2679 	int err;
2680 
2681 	rt = ip6_route_info_create(cfg, extack);
2682 	if (IS_ERR(rt)) {
2683 		err = PTR_ERR(rt);
2684 		rt = NULL;
2685 		goto out;
2686 	}
2687 
2688 	err = ip6_convert_metrics(&mxc, cfg);
2689 	if (err)
2690 		goto out;
2691 
2692 	err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc, extack);
2693 
2694 	kfree(mxc.mx);
2695 
2696 	return err;
2697 out:
2698 	if (rt)
2699 		dst_release_immediate(&rt->dst);
2700 
2701 	return err;
2702 }
2703 
2704 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
2705 {
2706 	int err;
2707 	struct fib6_table *table;
2708 	struct net *net = dev_net(rt->dst.dev);
2709 
2710 	if (rt == net->ipv6.ip6_null_entry) {
2711 		err = -ENOENT;
2712 		goto out;
2713 	}
2714 
2715 	table = rt->rt6i_table;
2716 	write_lock_bh(&table->tb6_lock);
2717 	err = fib6_del(rt, info);
2718 	write_unlock_bh(&table->tb6_lock);
2719 
2720 out:
2721 	ip6_rt_put(rt);
2722 	return err;
2723 }
2724 
2725 int ip6_del_rt(struct rt6_info *rt)
2726 {
2727 	struct nl_info info = {
2728 		.nl_net = dev_net(rt->dst.dev),
2729 	};
2730 	return __ip6_del_rt(rt, &info);
2731 }
2732 
2733 static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
2734 {
2735 	struct nl_info *info = &cfg->fc_nlinfo;
2736 	struct net *net = info->nl_net;
2737 	struct sk_buff *skb = NULL;
2738 	struct fib6_table *table;
2739 	int err = -ENOENT;
2740 
2741 	if (rt == net->ipv6.ip6_null_entry)
2742 		goto out_put;
2743 	table = rt->rt6i_table;
2744 	write_lock_bh(&table->tb6_lock);
2745 
2746 	if (rt->rt6i_nsiblings && cfg->fc_delete_all_nh) {
2747 		struct rt6_info *sibling, *next_sibling;
2748 
2749 		/* prefer to send a single notification with all hops */
2750 		skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
2751 		if (skb) {
2752 			u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2753 
2754 			if (rt6_fill_node(net, skb, rt,
2755 					  NULL, NULL, 0, RTM_DELROUTE,
2756 					  info->portid, seq, 0) < 0) {
2757 				kfree_skb(skb);
2758 				skb = NULL;
2759 			} else
2760 				info->skip_notify = 1;
2761 		}
2762 
2763 		list_for_each_entry_safe(sibling, next_sibling,
2764 					 &rt->rt6i_siblings,
2765 					 rt6i_siblings) {
2766 			err = fib6_del(sibling, info);
2767 			if (err)
2768 				goto out_unlock;
2769 		}
2770 	}
2771 
2772 	err = fib6_del(rt, info);
2773 out_unlock:
2774 	write_unlock_bh(&table->tb6_lock);
2775 out_put:
2776 	ip6_rt_put(rt);
2777 
2778 	if (skb) {
2779 		rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2780 			    info->nlh, gfp_any());
2781 	}
2782 	return err;
2783 }
2784 
2785 static int ip6_route_del(struct fib6_config *cfg,
2786 			 struct netlink_ext_ack *extack)
2787 {
2788 	struct fib6_table *table;
2789 	struct fib6_node *fn;
2790 	struct rt6_info *rt;
2791 	int err = -ESRCH;
2792 
2793 	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
2794 	if (!table) {
2795 		NL_SET_ERR_MSG(extack, "FIB table does not exist");
2796 		return err;
2797 	}
2798 
2799 	read_lock_bh(&table->tb6_lock);
2800 
2801 	fn = fib6_locate(&table->tb6_root,
2802 			 &cfg->fc_dst, cfg->fc_dst_len,
2803 			 &cfg->fc_src, cfg->fc_src_len,
2804 			 true);
2805 
2806 	if (fn) {
2807 		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2808 			if ((rt->rt6i_flags & RTF_CACHE) &&
2809 			    !(cfg->fc_flags & RTF_CACHE))
2810 				continue;
2811 			if (cfg->fc_ifindex &&
2812 			    (!rt->dst.dev ||
2813 			     rt->dst.dev->ifindex != cfg->fc_ifindex))
2814 				continue;
2815 			if (cfg->fc_flags & RTF_GATEWAY &&
2816 			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
2817 				continue;
2818 			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
2819 				continue;
2820 			if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol)
2821 				continue;
2822 			dst_hold(&rt->dst);
2823 			read_unlock_bh(&table->tb6_lock);
2824 
2825 			/* if gateway was specified only delete the one hop */
2826 			if (cfg->fc_flags & RTF_GATEWAY)
2827 				return __ip6_del_rt(rt, &cfg->fc_nlinfo);
2828 
2829 			return __ip6_del_rt_siblings(rt, cfg);
2830 		}
2831 	}
2832 	read_unlock_bh(&table->tb6_lock);
2833 
2834 	return err;
2835 }
2836 
2837 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
2838 {
2839 	struct netevent_redirect netevent;
2840 	struct rt6_info *rt, *nrt = NULL;
2841 	struct ndisc_options ndopts;
2842 	struct inet6_dev *in6_dev;
2843 	struct neighbour *neigh;
2844 	struct rd_msg *msg;
2845 	int optlen, on_link;
2846 	u8 *lladdr;
2847 
2848 	optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
2849 	optlen -= sizeof(*msg);
2850 
2851 	if (optlen < 0) {
2852 		net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
2853 		return;
2854 	}
2855 
2856 	msg = (struct rd_msg *)icmp6_hdr(skb);
2857 
2858 	if (ipv6_addr_is_multicast(&msg->dest)) {
2859 		net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
2860 		return;
2861 	}
2862 
2863 	on_link = 0;
2864 	if (ipv6_addr_equal(&msg->dest, &msg->target)) {
2865 		on_link = 1;
2866 	} else if (ipv6_addr_type(&msg->target) !=
2867 		   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
2868 		net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
2869 		return;
2870 	}
2871 
2872 	in6_dev = __in6_dev_get(skb->dev);
2873 	if (!in6_dev)
2874 		return;
2875 	if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2876 		return;
2877 
2878 	/* RFC2461 8.1:
2879 	 *	The IP source address of the Redirect MUST be the same as the current
2880 	 *	first-hop router for the specified ICMP Destination Address.
2881 	 */
2882 
2883 	if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
2884 		net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2885 		return;
2886 	}
2887 
2888 	lladdr = NULL;
2889 	if (ndopts.nd_opts_tgt_lladdr) {
2890 		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2891 					     skb->dev);
2892 		if (!lladdr) {
2893 			net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2894 			return;
2895 		}
2896 	}
2897 
2898 	rt = (struct rt6_info *) dst;
2899 	if (rt->rt6i_flags & RTF_REJECT) {
2900 		net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
2901 		return;
2902 	}
2903 
2904 	/* Redirect received -> path was valid.
2905 	 * Look, redirects are sent only in response to data packets,
2906 	 * so that this nexthop apparently is reachable. --ANK
2907 	 */
2908 	dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
2909 
2910 	neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
2911 	if (!neigh)
2912 		return;
2913 
2914 	/*
2915 	 *	We have finally decided to accept it.
2916 	 */
2917 
2918 	ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
2919 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
2920 		     NEIGH_UPDATE_F_OVERRIDE|
2921 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
2922 				     NEIGH_UPDATE_F_ISROUTER)),
2923 		     NDISC_REDIRECT, &ndopts);
2924 
2925 	nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
2926 	if (!nrt)
2927 		goto out;
2928 
2929 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
2930 	if (on_link)
2931 		nrt->rt6i_flags &= ~RTF_GATEWAY;
2932 
2933 	nrt->rt6i_protocol = RTPROT_REDIRECT;
2934 	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
2935 
2936 	if (ip6_ins_rt(nrt))
2937 		goto out_release;
2938 
2939 	netevent.old = &rt->dst;
2940 	netevent.new = &nrt->dst;
2941 	netevent.daddr = &msg->dest;
2942 	netevent.neigh = neigh;
2943 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
2944 
2945 	if (rt->rt6i_flags & RTF_CACHE) {
2946 		rt = (struct rt6_info *) dst_clone(&rt->dst);
2947 		ip6_del_rt(rt);
2948 	}
2949 
2950 out_release:
2951 	/* Release the reference taken in
2952 	 * ip6_rt_cache_alloc()
2953 	 */
2954 	dst_release(&nrt->dst);
2955 
2956 out:
2957 	neigh_release(neigh);
2958 }
2959 
2960 /*
2961  *	Misc support functions
2962  */
2963 
2964 static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
2965 {
2966 	BUG_ON(from->dst.from);
2967 
2968 	rt->rt6i_flags &= ~RTF_EXPIRES;
2969 	dst_hold(&from->dst);
2970 	rt->dst.from = &from->dst;
2971 	dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
2972 }
2973 
2974 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
2975 {
2976 	rt->dst.input = ort->dst.input;
2977 	rt->dst.output = ort->dst.output;
2978 	rt->rt6i_dst = ort->rt6i_dst;
2979 	rt->dst.error = ort->dst.error;
2980 	rt->rt6i_idev = ort->rt6i_idev;
2981 	if (rt->rt6i_idev)
2982 		in6_dev_hold(rt->rt6i_idev);
2983 	rt->dst.lastuse = jiffies;
2984 	rt->rt6i_gateway = ort->rt6i_gateway;
2985 	rt->rt6i_flags = ort->rt6i_flags;
2986 	rt6_set_from(rt, ort);
2987 	rt->rt6i_metric = ort->rt6i_metric;
2988 #ifdef CONFIG_IPV6_SUBTREES
2989 	rt->rt6i_src = ort->rt6i_src;
2990 #endif
2991 	rt->rt6i_prefsrc = ort->rt6i_prefsrc;
2992 	rt->rt6i_table = ort->rt6i_table;
2993 	rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
2994 }
2995 
2996 #ifdef CONFIG_IPV6_ROUTE_INFO
2997 static struct rt6_info *rt6_get_route_info(struct net *net,
2998 					   const struct in6_addr *prefix, int prefixlen,
2999 					   const struct in6_addr *gwaddr,
3000 					   struct net_device *dev)
3001 {
3002 	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
3003 	int ifindex = dev->ifindex;
3004 	struct fib6_node *fn;
3005 	struct rt6_info *rt = NULL;
3006 	struct fib6_table *table;
3007 
3008 	table = fib6_get_table(net, tb_id);
3009 	if (!table)
3010 		return NULL;
3011 
3012 	read_lock_bh(&table->tb6_lock);
3013 	fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
3014 	if (!fn)
3015 		goto out;
3016 
3017 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
3018 		if (rt->dst.dev->ifindex != ifindex)
3019 			continue;
3020 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
3021 			continue;
3022 		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
3023 			continue;
3024 		dst_hold(&rt->dst);
3025 		break;
3026 	}
3027 out:
3028 	read_unlock_bh(&table->tb6_lock);
3029 	return rt;
3030 }
3031 
3032 static struct rt6_info *rt6_add_route_info(struct net *net,
3033 					   const struct in6_addr *prefix, int prefixlen,
3034 					   const struct in6_addr *gwaddr,
3035 					   struct net_device *dev,
3036 					   unsigned int pref)
3037 {
3038 	struct fib6_config cfg = {
3039 		.fc_metric	= IP6_RT_PRIO_USER,
3040 		.fc_ifindex	= dev->ifindex,
3041 		.fc_dst_len	= prefixlen,
3042 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
3043 				  RTF_UP | RTF_PREF(pref),
3044 		.fc_protocol = RTPROT_RA,
3045 		.fc_nlinfo.portid = 0,
3046 		.fc_nlinfo.nlh = NULL,
3047 		.fc_nlinfo.nl_net = net,
3048 	};
3049 
3050 	cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
3051 	cfg.fc_dst = *prefix;
3052 	cfg.fc_gateway = *gwaddr;
3053 
3054 	/* We should treat it as a default route if prefix length is 0. */
3055 	if (!prefixlen)
3056 		cfg.fc_flags |= RTF_DEFAULT;
3057 
3058 	ip6_route_add(&cfg, NULL);
3059 
3060 	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
3061 }
3062 #endif
3063 
3064 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
3065 {
3066 	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
3067 	struct rt6_info *rt;
3068 	struct fib6_table *table;
3069 
3070 	table = fib6_get_table(dev_net(dev), tb_id);
3071 	if (!table)
3072 		return NULL;
3073 
3074 	read_lock_bh(&table->tb6_lock);
3075 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
3076 		if (dev == rt->dst.dev &&
3077 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
3078 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
3079 			break;
3080 	}
3081 	if (rt)
3082 		dst_hold(&rt->dst);
3083 	read_unlock_bh(&table->tb6_lock);
3084 	return rt;
3085 }
3086 
3087 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
3088 				     struct net_device *dev,
3089 				     unsigned int pref)
3090 {
3091 	struct fib6_config cfg = {
3092 		.fc_table	= l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
3093 		.fc_metric	= IP6_RT_PRIO_USER,
3094 		.fc_ifindex	= dev->ifindex,
3095 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
3096 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
3097 		.fc_protocol = RTPROT_RA,
3098 		.fc_nlinfo.portid = 0,
3099 		.fc_nlinfo.nlh = NULL,
3100 		.fc_nlinfo.nl_net = dev_net(dev),
3101 	};
3102 
3103 	cfg.fc_gateway = *gwaddr;
3104 
3105 	if (!ip6_route_add(&cfg, NULL)) {
3106 		struct fib6_table *table;
3107 
3108 		table = fib6_get_table(dev_net(dev), cfg.fc_table);
3109 		if (table)
3110 			table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
3111 	}
3112 
3113 	return rt6_get_dflt_router(gwaddr, dev);
3114 }
3115 
3116 static void __rt6_purge_dflt_routers(struct fib6_table *table)
3117 {
3118 	struct rt6_info *rt;
3119 
3120 restart:
3121 	read_lock_bh(&table->tb6_lock);
3122 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
3123 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
3124 		    (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
3125 			dst_hold(&rt->dst);
3126 			read_unlock_bh(&table->tb6_lock);
3127 			ip6_del_rt(rt);
3128 			goto restart;
3129 		}
3130 	}
3131 	read_unlock_bh(&table->tb6_lock);
3132 
3133 	table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
3134 }
3135 
3136 void rt6_purge_dflt_routers(struct net *net)
3137 {
3138 	struct fib6_table *table;
3139 	struct hlist_head *head;
3140 	unsigned int h;
3141 
3142 	rcu_read_lock();
3143 
3144 	for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
3145 		head = &net->ipv6.fib_table_hash[h];
3146 		hlist_for_each_entry_rcu(table, head, tb6_hlist) {
3147 			if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
3148 				__rt6_purge_dflt_routers(table);
3149 		}
3150 	}
3151 
3152 	rcu_read_unlock();
3153 }
3154 
3155 static void rtmsg_to_fib6_config(struct net *net,
3156 				 struct in6_rtmsg *rtmsg,
3157 				 struct fib6_config *cfg)
3158 {
3159 	memset(cfg, 0, sizeof(*cfg));
3160 
3161 	cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
3162 			 : RT6_TABLE_MAIN;
3163 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
3164 	cfg->fc_metric = rtmsg->rtmsg_metric;
3165 	cfg->fc_expires = rtmsg->rtmsg_info;
3166 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
3167 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
3168 	cfg->fc_flags = rtmsg->rtmsg_flags;
3169 
3170 	cfg->fc_nlinfo.nl_net = net;
3171 
3172 	cfg->fc_dst = rtmsg->rtmsg_dst;
3173 	cfg->fc_src = rtmsg->rtmsg_src;
3174 	cfg->fc_gateway = rtmsg->rtmsg_gateway;
3175 }
3176 
3177 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
3178 {
3179 	struct fib6_config cfg;
3180 	struct in6_rtmsg rtmsg;
3181 	int err;
3182 
3183 	switch (cmd) {
3184 	case SIOCADDRT:		/* Add a route */
3185 	case SIOCDELRT:		/* Delete a route */
3186 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
3187 			return -EPERM;
3188 		err = copy_from_user(&rtmsg, arg,
3189 				     sizeof(struct in6_rtmsg));
3190 		if (err)
3191 			return -EFAULT;
3192 
3193 		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
3194 
3195 		rtnl_lock();
3196 		switch (cmd) {
3197 		case SIOCADDRT:
3198 			err = ip6_route_add(&cfg, NULL);
3199 			break;
3200 		case SIOCDELRT:
3201 			err = ip6_route_del(&cfg, NULL);
3202 			break;
3203 		default:
3204 			err = -EINVAL;
3205 		}
3206 		rtnl_unlock();
3207 
3208 		return err;
3209 	}
3210 
3211 	return -EINVAL;
3212 }
3213 
3214 /*
3215  *	Drop the packet on the floor
3216  */
3217 
3218 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
3219 {
3220 	int type;
3221 	struct dst_entry *dst = skb_dst(skb);
3222 	switch (ipstats_mib_noroutes) {
3223 	case IPSTATS_MIB_INNOROUTES:
3224 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
3225 		if (type == IPV6_ADDR_ANY) {
3226 			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
3227 				      IPSTATS_MIB_INADDRERRORS);
3228 			break;
3229 		}
3230 		/* FALLTHROUGH */
3231 	case IPSTATS_MIB_OUTNOROUTES:
3232 		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
3233 			      ipstats_mib_noroutes);
3234 		break;
3235 	}
3236 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
3237 	kfree_skb(skb);
3238 	return 0;
3239 }
3240 
3241 static int ip6_pkt_discard(struct sk_buff *skb)
3242 {
3243 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
3244 }
3245 
3246 static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
3247 {
3248 	skb->dev = skb_dst(skb)->dev;
3249 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
3250 }
3251 
3252 static int ip6_pkt_prohibit(struct sk_buff *skb)
3253 {
3254 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
3255 }
3256 
3257 static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
3258 {
3259 	skb->dev = skb_dst(skb)->dev;
3260 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
3261 }
3262 
3263 /*
3264  *	Allocate a dst for local (unicast / anycast) address.
3265  */
3266 
3267 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
3268 				    const struct in6_addr *addr,
3269 				    bool anycast)
3270 {
3271 	u32 tb_id;
3272 	struct net *net = dev_net(idev->dev);
3273 	struct net_device *dev = idev->dev;
3274 	struct rt6_info *rt;
3275 
3276 	rt = ip6_dst_alloc(net, dev, DST_NOCOUNT);
3277 	if (!rt)
3278 		return ERR_PTR(-ENOMEM);
3279 
3280 	in6_dev_hold(idev);
3281 
3282 	rt->dst.flags |= DST_HOST;
3283 	rt->dst.input = ip6_input;
3284 	rt->dst.output = ip6_output;
3285 	rt->rt6i_idev = idev;
3286 
3287 	rt->rt6i_protocol = RTPROT_KERNEL;
3288 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
3289 	if (anycast)
3290 		rt->rt6i_flags |= RTF_ANYCAST;
3291 	else
3292 		rt->rt6i_flags |= RTF_LOCAL;
3293 
3294 	rt->rt6i_gateway  = *addr;
3295 	rt->rt6i_dst.addr = *addr;
3296 	rt->rt6i_dst.plen = 128;
3297 	tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
3298 	rt->rt6i_table = fib6_get_table(net, tb_id);
3299 
3300 	return rt;
3301 }
3302 
3303 /* remove deleted ip from prefsrc entries */
3304 struct arg_dev_net_ip {
3305 	struct net_device *dev;
3306 	struct net *net;
3307 	struct in6_addr *addr;
3308 };
3309 
3310 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
3311 {
3312 	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
3313 	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
3314 	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
3315 
3316 	if (((void *)rt->dst.dev == dev || !dev) &&
3317 	    rt != net->ipv6.ip6_null_entry &&
3318 	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
3319 		spin_lock_bh(&rt6_exception_lock);
3320 		/* remove prefsrc entry */
3321 		rt->rt6i_prefsrc.plen = 0;
3322 		/* need to update cache as well */
3323 		rt6_exceptions_remove_prefsrc(rt);
3324 		spin_unlock_bh(&rt6_exception_lock);
3325 	}
3326 	return 0;
3327 }
3328 
3329 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
3330 {
3331 	struct net *net = dev_net(ifp->idev->dev);
3332 	struct arg_dev_net_ip adni = {
3333 		.dev = ifp->idev->dev,
3334 		.net = net,
3335 		.addr = &ifp->addr,
3336 	};
3337 	fib6_clean_all(net, fib6_remove_prefsrc, &adni);
3338 }
3339 
3340 #define RTF_RA_ROUTER		(RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
3341 
3342 /* Remove routers and update dst entries when gateway turn into host. */
3343 static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
3344 {
3345 	struct in6_addr *gateway = (struct in6_addr *)arg;
3346 
3347 	/* RTF_CACHE_GATEWAY case will be removed once the exception
3348 	 * table is hooked up to store all cached routes.
3349 	 */
3350 	if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
3351 	     ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
3352 	     ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
3353 		return -1;
3354 	}
3355 
3356 	/* Further clean up cached routes in exception table.
3357 	 * This is needed because cached route may have a different
3358 	 * gateway than its 'parent' in the case of an ip redirect.
3359 	 */
3360 	rt6_exceptions_clean_tohost(rt, gateway);
3361 
3362 	return 0;
3363 }
3364 
3365 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
3366 {
3367 	fib6_clean_all(net, fib6_clean_tohost, gateway);
3368 }
3369 
3370 struct arg_dev_net {
3371 	struct net_device *dev;
3372 	struct net *net;
3373 };
3374 
3375 /* called with write lock held for table with rt */
3376 static int fib6_ifdown(struct rt6_info *rt, void *arg)
3377 {
3378 	const struct arg_dev_net *adn = arg;
3379 	const struct net_device *dev = adn->dev;
3380 
3381 	if ((rt->dst.dev == dev || !dev) &&
3382 	    rt != adn->net->ipv6.ip6_null_entry &&
3383 	    (rt->rt6i_nsiblings == 0 ||
3384 	     (dev && netdev_unregistering(dev)) ||
3385 	     !rt->rt6i_idev->cnf.ignore_routes_with_linkdown))
3386 		return -1;
3387 
3388 	return 0;
3389 }
3390 
3391 void rt6_ifdown(struct net *net, struct net_device *dev)
3392 {
3393 	struct arg_dev_net adn = {
3394 		.dev = dev,
3395 		.net = net,
3396 	};
3397 
3398 	fib6_clean_all(net, fib6_ifdown, &adn);
3399 	if (dev)
3400 		rt6_uncached_list_flush_dev(net, dev);
3401 }
3402 
3403 struct rt6_mtu_change_arg {
3404 	struct net_device *dev;
3405 	unsigned int mtu;
3406 };
3407 
3408 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
3409 {
3410 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
3411 	struct inet6_dev *idev;
3412 
3413 	/* In IPv6 pmtu discovery is not optional,
3414 	   so that RTAX_MTU lock cannot disable it.
3415 	   We still use this lock to block changes
3416 	   caused by addrconf/ndisc.
3417 	*/
3418 
3419 	idev = __in6_dev_get(arg->dev);
3420 	if (!idev)
3421 		return 0;
3422 
3423 	/* For administrative MTU increase, there is no way to discover
3424 	   IPv6 PMTU increase, so PMTU increase should be updated here.
3425 	   Since RFC 1981 doesn't include administrative MTU increase
3426 	   update PMTU increase is a MUST. (i.e. jumbo frame)
3427 	 */
3428 	/*
3429 	   If new MTU is less than route PMTU, this new MTU will be the
3430 	   lowest MTU in the path, update the route PMTU to reflect PMTU
3431 	   decreases; if new MTU is greater than route PMTU, and the
3432 	   old MTU is the lowest MTU in the path, update the route PMTU
3433 	   to reflect the increase. In this case if the other nodes' MTU
3434 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
3435 	   PMTU discovery.
3436 	 */
3437 	if (rt->dst.dev == arg->dev &&
3438 	    dst_metric_raw(&rt->dst, RTAX_MTU) &&
3439 	    !dst_metric_locked(&rt->dst, RTAX_MTU)) {
3440 		spin_lock_bh(&rt6_exception_lock);
3441 		/* This case will be removed once the exception table
3442 		 * is hooked up.
3443 		 */
3444 		if (rt->rt6i_flags & RTF_CACHE) {
3445 			/* For RTF_CACHE with rt6i_pmtu == 0
3446 			 * (i.e. a redirected route),
3447 			 * the metrics of its rt->dst.from has already
3448 			 * been updated.
3449 			 */
3450 			if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
3451 				rt->rt6i_pmtu = arg->mtu;
3452 		} else if (dst_mtu(&rt->dst) >= arg->mtu ||
3453 			   (dst_mtu(&rt->dst) < arg->mtu &&
3454 			    dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
3455 			dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
3456 		}
3457 		rt6_exceptions_update_pmtu(rt, arg->mtu);
3458 		spin_unlock_bh(&rt6_exception_lock);
3459 	}
3460 	return 0;
3461 }
3462 
3463 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
3464 {
3465 	struct rt6_mtu_change_arg arg = {
3466 		.dev = dev,
3467 		.mtu = mtu,
3468 	};
3469 
3470 	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
3471 }
3472 
3473 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
3474 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
3475 	[RTA_OIF]               = { .type = NLA_U32 },
3476 	[RTA_IIF]		= { .type = NLA_U32 },
3477 	[RTA_PRIORITY]          = { .type = NLA_U32 },
3478 	[RTA_METRICS]           = { .type = NLA_NESTED },
3479 	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
3480 	[RTA_PREF]              = { .type = NLA_U8 },
3481 	[RTA_ENCAP_TYPE]	= { .type = NLA_U16 },
3482 	[RTA_ENCAP]		= { .type = NLA_NESTED },
3483 	[RTA_EXPIRES]		= { .type = NLA_U32 },
3484 	[RTA_UID]		= { .type = NLA_U32 },
3485 	[RTA_MARK]		= { .type = NLA_U32 },
3486 };
3487 
3488 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
3489 			      struct fib6_config *cfg,
3490 			      struct netlink_ext_ack *extack)
3491 {
3492 	struct rtmsg *rtm;
3493 	struct nlattr *tb[RTA_MAX+1];
3494 	unsigned int pref;
3495 	int err;
3496 
3497 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
3498 			  NULL);
3499 	if (err < 0)
3500 		goto errout;
3501 
3502 	err = -EINVAL;
3503 	rtm = nlmsg_data(nlh);
3504 	memset(cfg, 0, sizeof(*cfg));
3505 
3506 	cfg->fc_table = rtm->rtm_table;
3507 	cfg->fc_dst_len = rtm->rtm_dst_len;
3508 	cfg->fc_src_len = rtm->rtm_src_len;
3509 	cfg->fc_flags = RTF_UP;
3510 	cfg->fc_protocol = rtm->rtm_protocol;
3511 	cfg->fc_type = rtm->rtm_type;
3512 
3513 	if (rtm->rtm_type == RTN_UNREACHABLE ||
3514 	    rtm->rtm_type == RTN_BLACKHOLE ||
3515 	    rtm->rtm_type == RTN_PROHIBIT ||
3516 	    rtm->rtm_type == RTN_THROW)
3517 		cfg->fc_flags |= RTF_REJECT;
3518 
3519 	if (rtm->rtm_type == RTN_LOCAL)
3520 		cfg->fc_flags |= RTF_LOCAL;
3521 
3522 	if (rtm->rtm_flags & RTM_F_CLONED)
3523 		cfg->fc_flags |= RTF_CACHE;
3524 
3525 	cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
3526 	cfg->fc_nlinfo.nlh = nlh;
3527 	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
3528 
3529 	if (tb[RTA_GATEWAY]) {
3530 		cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
3531 		cfg->fc_flags |= RTF_GATEWAY;
3532 	}
3533 
3534 	if (tb[RTA_DST]) {
3535 		int plen = (rtm->rtm_dst_len + 7) >> 3;
3536 
3537 		if (nla_len(tb[RTA_DST]) < plen)
3538 			goto errout;
3539 
3540 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
3541 	}
3542 
3543 	if (tb[RTA_SRC]) {
3544 		int plen = (rtm->rtm_src_len + 7) >> 3;
3545 
3546 		if (nla_len(tb[RTA_SRC]) < plen)
3547 			goto errout;
3548 
3549 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
3550 	}
3551 
3552 	if (tb[RTA_PREFSRC])
3553 		cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
3554 
3555 	if (tb[RTA_OIF])
3556 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
3557 
3558 	if (tb[RTA_PRIORITY])
3559 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
3560 
3561 	if (tb[RTA_METRICS]) {
3562 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
3563 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
3564 	}
3565 
3566 	if (tb[RTA_TABLE])
3567 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
3568 
3569 	if (tb[RTA_MULTIPATH]) {
3570 		cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
3571 		cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
3572 
3573 		err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
3574 						     cfg->fc_mp_len, extack);
3575 		if (err < 0)
3576 			goto errout;
3577 	}
3578 
3579 	if (tb[RTA_PREF]) {
3580 		pref = nla_get_u8(tb[RTA_PREF]);
3581 		if (pref != ICMPV6_ROUTER_PREF_LOW &&
3582 		    pref != ICMPV6_ROUTER_PREF_HIGH)
3583 			pref = ICMPV6_ROUTER_PREF_MEDIUM;
3584 		cfg->fc_flags |= RTF_PREF(pref);
3585 	}
3586 
3587 	if (tb[RTA_ENCAP])
3588 		cfg->fc_encap = tb[RTA_ENCAP];
3589 
3590 	if (tb[RTA_ENCAP_TYPE]) {
3591 		cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
3592 
3593 		err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
3594 		if (err < 0)
3595 			goto errout;
3596 	}
3597 
3598 	if (tb[RTA_EXPIRES]) {
3599 		unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
3600 
3601 		if (addrconf_finite_timeout(timeout)) {
3602 			cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
3603 			cfg->fc_flags |= RTF_EXPIRES;
3604 		}
3605 	}
3606 
3607 	err = 0;
3608 errout:
3609 	return err;
3610 }
3611 
3612 struct rt6_nh {
3613 	struct rt6_info *rt6_info;
3614 	struct fib6_config r_cfg;
3615 	struct mx6_config mxc;
3616 	struct list_head next;
3617 };
3618 
3619 static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
3620 {
3621 	struct rt6_nh *nh;
3622 
3623 	list_for_each_entry(nh, rt6_nh_list, next) {
3624 		pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6c nexthop %pI6c ifi %d\n",
3625 		        &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
3626 		        nh->r_cfg.fc_ifindex);
3627 	}
3628 }
3629 
3630 static int ip6_route_info_append(struct list_head *rt6_nh_list,
3631 				 struct rt6_info *rt, struct fib6_config *r_cfg)
3632 {
3633 	struct rt6_nh *nh;
3634 	int err = -EEXIST;
3635 
3636 	list_for_each_entry(nh, rt6_nh_list, next) {
3637 		/* check if rt6_info already exists */
3638 		if (rt6_duplicate_nexthop(nh->rt6_info, rt))
3639 			return err;
3640 	}
3641 
3642 	nh = kzalloc(sizeof(*nh), GFP_KERNEL);
3643 	if (!nh)
3644 		return -ENOMEM;
3645 	nh->rt6_info = rt;
3646 	err = ip6_convert_metrics(&nh->mxc, r_cfg);
3647 	if (err) {
3648 		kfree(nh);
3649 		return err;
3650 	}
3651 	memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
3652 	list_add_tail(&nh->next, rt6_nh_list);
3653 
3654 	return 0;
3655 }
3656 
3657 static void ip6_route_mpath_notify(struct rt6_info *rt,
3658 				   struct rt6_info *rt_last,
3659 				   struct nl_info *info,
3660 				   __u16 nlflags)
3661 {
3662 	/* if this is an APPEND route, then rt points to the first route
3663 	 * inserted and rt_last points to last route inserted. Userspace
3664 	 * wants a consistent dump of the route which starts at the first
3665 	 * nexthop. Since sibling routes are always added at the end of
3666 	 * the list, find the first sibling of the last route appended
3667 	 */
3668 	if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->rt6i_nsiblings) {
3669 		rt = list_first_entry(&rt_last->rt6i_siblings,
3670 				      struct rt6_info,
3671 				      rt6i_siblings);
3672 	}
3673 
3674 	if (rt)
3675 		inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
3676 }
3677 
3678 static int ip6_route_multipath_add(struct fib6_config *cfg,
3679 				   struct netlink_ext_ack *extack)
3680 {
3681 	struct rt6_info *rt_notif = NULL, *rt_last = NULL;
3682 	struct nl_info *info = &cfg->fc_nlinfo;
3683 	struct fib6_config r_cfg;
3684 	struct rtnexthop *rtnh;
3685 	struct rt6_info *rt;
3686 	struct rt6_nh *err_nh;
3687 	struct rt6_nh *nh, *nh_safe;
3688 	__u16 nlflags;
3689 	int remaining;
3690 	int attrlen;
3691 	int err = 1;
3692 	int nhn = 0;
3693 	int replace = (cfg->fc_nlinfo.nlh &&
3694 		       (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
3695 	LIST_HEAD(rt6_nh_list);
3696 
3697 	nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
3698 	if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
3699 		nlflags |= NLM_F_APPEND;
3700 
3701 	remaining = cfg->fc_mp_len;
3702 	rtnh = (struct rtnexthop *)cfg->fc_mp;
3703 
3704 	/* Parse a Multipath Entry and build a list (rt6_nh_list) of
3705 	 * rt6_info structs per nexthop
3706 	 */
3707 	while (rtnh_ok(rtnh, remaining)) {
3708 		memcpy(&r_cfg, cfg, sizeof(*cfg));
3709 		if (rtnh->rtnh_ifindex)
3710 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
3711 
3712 		attrlen = rtnh_attrlen(rtnh);
3713 		if (attrlen > 0) {
3714 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3715 
3716 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3717 			if (nla) {
3718 				r_cfg.fc_gateway = nla_get_in6_addr(nla);
3719 				r_cfg.fc_flags |= RTF_GATEWAY;
3720 			}
3721 			r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
3722 			nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
3723 			if (nla)
3724 				r_cfg.fc_encap_type = nla_get_u16(nla);
3725 		}
3726 
3727 		rt = ip6_route_info_create(&r_cfg, extack);
3728 		if (IS_ERR(rt)) {
3729 			err = PTR_ERR(rt);
3730 			rt = NULL;
3731 			goto cleanup;
3732 		}
3733 
3734 		err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
3735 		if (err) {
3736 			dst_release_immediate(&rt->dst);
3737 			goto cleanup;
3738 		}
3739 
3740 		rtnh = rtnh_next(rtnh, &remaining);
3741 	}
3742 
3743 	/* for add and replace send one notification with all nexthops.
3744 	 * Skip the notification in fib6_add_rt2node and send one with
3745 	 * the full route when done
3746 	 */
3747 	info->skip_notify = 1;
3748 
3749 	err_nh = NULL;
3750 	list_for_each_entry(nh, &rt6_nh_list, next) {
3751 		rt_last = nh->rt6_info;
3752 		err = __ip6_ins_rt(nh->rt6_info, info, &nh->mxc, extack);
3753 		/* save reference to first route for notification */
3754 		if (!rt_notif && !err)
3755 			rt_notif = nh->rt6_info;
3756 
3757 		/* nh->rt6_info is used or freed at this point, reset to NULL*/
3758 		nh->rt6_info = NULL;
3759 		if (err) {
3760 			if (replace && nhn)
3761 				ip6_print_replace_route_err(&rt6_nh_list);
3762 			err_nh = nh;
3763 			goto add_errout;
3764 		}
3765 
3766 		/* Because each route is added like a single route we remove
3767 		 * these flags after the first nexthop: if there is a collision,
3768 		 * we have already failed to add the first nexthop:
3769 		 * fib6_add_rt2node() has rejected it; when replacing, old
3770 		 * nexthops have been replaced by first new, the rest should
3771 		 * be added to it.
3772 		 */
3773 		cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
3774 						     NLM_F_REPLACE);
3775 		nhn++;
3776 	}
3777 
3778 	/* success ... tell user about new route */
3779 	ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
3780 	goto cleanup;
3781 
3782 add_errout:
3783 	/* send notification for routes that were added so that
3784 	 * the delete notifications sent by ip6_route_del are
3785 	 * coherent
3786 	 */
3787 	if (rt_notif)
3788 		ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
3789 
3790 	/* Delete routes that were already added */
3791 	list_for_each_entry(nh, &rt6_nh_list, next) {
3792 		if (err_nh == nh)
3793 			break;
3794 		ip6_route_del(&nh->r_cfg, extack);
3795 	}
3796 
3797 cleanup:
3798 	list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
3799 		if (nh->rt6_info)
3800 			dst_release_immediate(&nh->rt6_info->dst);
3801 		kfree(nh->mxc.mx);
3802 		list_del(&nh->next);
3803 		kfree(nh);
3804 	}
3805 
3806 	return err;
3807 }
3808 
3809 static int ip6_route_multipath_del(struct fib6_config *cfg,
3810 				   struct netlink_ext_ack *extack)
3811 {
3812 	struct fib6_config r_cfg;
3813 	struct rtnexthop *rtnh;
3814 	int remaining;
3815 	int attrlen;
3816 	int err = 1, last_err = 0;
3817 
3818 	remaining = cfg->fc_mp_len;
3819 	rtnh = (struct rtnexthop *)cfg->fc_mp;
3820 
3821 	/* Parse a Multipath Entry */
3822 	while (rtnh_ok(rtnh, remaining)) {
3823 		memcpy(&r_cfg, cfg, sizeof(*cfg));
3824 		if (rtnh->rtnh_ifindex)
3825 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
3826 
3827 		attrlen = rtnh_attrlen(rtnh);
3828 		if (attrlen > 0) {
3829 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3830 
3831 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3832 			if (nla) {
3833 				nla_memcpy(&r_cfg.fc_gateway, nla, 16);
3834 				r_cfg.fc_flags |= RTF_GATEWAY;
3835 			}
3836 		}
3837 		err = ip6_route_del(&r_cfg, extack);
3838 		if (err)
3839 			last_err = err;
3840 
3841 		rtnh = rtnh_next(rtnh, &remaining);
3842 	}
3843 
3844 	return last_err;
3845 }
3846 
3847 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
3848 			      struct netlink_ext_ack *extack)
3849 {
3850 	struct fib6_config cfg;
3851 	int err;
3852 
3853 	err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
3854 	if (err < 0)
3855 		return err;
3856 
3857 	if (cfg.fc_mp)
3858 		return ip6_route_multipath_del(&cfg, extack);
3859 	else {
3860 		cfg.fc_delete_all_nh = 1;
3861 		return ip6_route_del(&cfg, extack);
3862 	}
3863 }
3864 
3865 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
3866 			      struct netlink_ext_ack *extack)
3867 {
3868 	struct fib6_config cfg;
3869 	int err;
3870 
3871 	err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
3872 	if (err < 0)
3873 		return err;
3874 
3875 	if (cfg.fc_mp)
3876 		return ip6_route_multipath_add(&cfg, extack);
3877 	else
3878 		return ip6_route_add(&cfg, extack);
3879 }
3880 
3881 static size_t rt6_nlmsg_size(struct rt6_info *rt)
3882 {
3883 	int nexthop_len = 0;
3884 
3885 	if (rt->rt6i_nsiblings) {
3886 		nexthop_len = nla_total_size(0)	 /* RTA_MULTIPATH */
3887 			    + NLA_ALIGN(sizeof(struct rtnexthop))
3888 			    + nla_total_size(16) /* RTA_GATEWAY */
3889 			    + lwtunnel_get_encap_size(rt->dst.lwtstate);
3890 
3891 		nexthop_len *= rt->rt6i_nsiblings;
3892 	}
3893 
3894 	return NLMSG_ALIGN(sizeof(struct rtmsg))
3895 	       + nla_total_size(16) /* RTA_SRC */
3896 	       + nla_total_size(16) /* RTA_DST */
3897 	       + nla_total_size(16) /* RTA_GATEWAY */
3898 	       + nla_total_size(16) /* RTA_PREFSRC */
3899 	       + nla_total_size(4) /* RTA_TABLE */
3900 	       + nla_total_size(4) /* RTA_IIF */
3901 	       + nla_total_size(4) /* RTA_OIF */
3902 	       + nla_total_size(4) /* RTA_PRIORITY */
3903 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
3904 	       + nla_total_size(sizeof(struct rta_cacheinfo))
3905 	       + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
3906 	       + nla_total_size(1) /* RTA_PREF */
3907 	       + lwtunnel_get_encap_size(rt->dst.lwtstate)
3908 	       + nexthop_len;
3909 }
3910 
3911 static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
3912 			    unsigned int *flags, bool skip_oif)
3913 {
3914 	if (!netif_running(rt->dst.dev) || !netif_carrier_ok(rt->dst.dev)) {
3915 		*flags |= RTNH_F_LINKDOWN;
3916 		if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
3917 			*flags |= RTNH_F_DEAD;
3918 	}
3919 
3920 	if (rt->rt6i_flags & RTF_GATEWAY) {
3921 		if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
3922 			goto nla_put_failure;
3923 	}
3924 
3925 	if (rt->rt6i_nh_flags & RTNH_F_OFFLOAD)
3926 		*flags |= RTNH_F_OFFLOAD;
3927 
3928 	/* not needed for multipath encoding b/c it has a rtnexthop struct */
3929 	if (!skip_oif && rt->dst.dev &&
3930 	    nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
3931 		goto nla_put_failure;
3932 
3933 	if (rt->dst.lwtstate &&
3934 	    lwtunnel_fill_encap(skb, rt->dst.lwtstate) < 0)
3935 		goto nla_put_failure;
3936 
3937 	return 0;
3938 
3939 nla_put_failure:
3940 	return -EMSGSIZE;
3941 }
3942 
3943 /* add multipath next hop */
3944 static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)
3945 {
3946 	struct rtnexthop *rtnh;
3947 	unsigned int flags = 0;
3948 
3949 	rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
3950 	if (!rtnh)
3951 		goto nla_put_failure;
3952 
3953 	rtnh->rtnh_hops = 0;
3954 	rtnh->rtnh_ifindex = rt->dst.dev ? rt->dst.dev->ifindex : 0;
3955 
3956 	if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
3957 		goto nla_put_failure;
3958 
3959 	rtnh->rtnh_flags = flags;
3960 
3961 	/* length of rtnetlink header + attributes */
3962 	rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
3963 
3964 	return 0;
3965 
3966 nla_put_failure:
3967 	return -EMSGSIZE;
3968 }
3969 
3970 static int rt6_fill_node(struct net *net,
3971 			 struct sk_buff *skb, struct rt6_info *rt,
3972 			 struct in6_addr *dst, struct in6_addr *src,
3973 			 int iif, int type, u32 portid, u32 seq,
3974 			 unsigned int flags)
3975 {
3976 	u32 metrics[RTAX_MAX];
3977 	struct rtmsg *rtm;
3978 	struct nlmsghdr *nlh;
3979 	long expires;
3980 	u32 table;
3981 
3982 	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
3983 	if (!nlh)
3984 		return -EMSGSIZE;
3985 
3986 	rtm = nlmsg_data(nlh);
3987 	rtm->rtm_family = AF_INET6;
3988 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
3989 	rtm->rtm_src_len = rt->rt6i_src.plen;
3990 	rtm->rtm_tos = 0;
3991 	if (rt->rt6i_table)
3992 		table = rt->rt6i_table->tb6_id;
3993 	else
3994 		table = RT6_TABLE_UNSPEC;
3995 	rtm->rtm_table = table;
3996 	if (nla_put_u32(skb, RTA_TABLE, table))
3997 		goto nla_put_failure;
3998 	if (rt->rt6i_flags & RTF_REJECT) {
3999 		switch (rt->dst.error) {
4000 		case -EINVAL:
4001 			rtm->rtm_type = RTN_BLACKHOLE;
4002 			break;
4003 		case -EACCES:
4004 			rtm->rtm_type = RTN_PROHIBIT;
4005 			break;
4006 		case -EAGAIN:
4007 			rtm->rtm_type = RTN_THROW;
4008 			break;
4009 		default:
4010 			rtm->rtm_type = RTN_UNREACHABLE;
4011 			break;
4012 		}
4013 	}
4014 	else if (rt->rt6i_flags & RTF_LOCAL)
4015 		rtm->rtm_type = RTN_LOCAL;
4016 	else if (rt->rt6i_flags & RTF_ANYCAST)
4017 		rtm->rtm_type = RTN_ANYCAST;
4018 	else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
4019 		rtm->rtm_type = RTN_LOCAL;
4020 	else
4021 		rtm->rtm_type = RTN_UNICAST;
4022 	rtm->rtm_flags = 0;
4023 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
4024 	rtm->rtm_protocol = rt->rt6i_protocol;
4025 
4026 	if (rt->rt6i_flags & RTF_CACHE)
4027 		rtm->rtm_flags |= RTM_F_CLONED;
4028 
4029 	if (dst) {
4030 		if (nla_put_in6_addr(skb, RTA_DST, dst))
4031 			goto nla_put_failure;
4032 		rtm->rtm_dst_len = 128;
4033 	} else if (rtm->rtm_dst_len)
4034 		if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
4035 			goto nla_put_failure;
4036 #ifdef CONFIG_IPV6_SUBTREES
4037 	if (src) {
4038 		if (nla_put_in6_addr(skb, RTA_SRC, src))
4039 			goto nla_put_failure;
4040 		rtm->rtm_src_len = 128;
4041 	} else if (rtm->rtm_src_len &&
4042 		   nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
4043 		goto nla_put_failure;
4044 #endif
4045 	if (iif) {
4046 #ifdef CONFIG_IPV6_MROUTE
4047 		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
4048 			int err = ip6mr_get_route(net, skb, rtm, portid);
4049 
4050 			if (err == 0)
4051 				return 0;
4052 			if (err < 0)
4053 				goto nla_put_failure;
4054 		} else
4055 #endif
4056 			if (nla_put_u32(skb, RTA_IIF, iif))
4057 				goto nla_put_failure;
4058 	} else if (dst) {
4059 		struct in6_addr saddr_buf;
4060 		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
4061 		    nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
4062 			goto nla_put_failure;
4063 	}
4064 
4065 	if (rt->rt6i_prefsrc.plen) {
4066 		struct in6_addr saddr_buf;
4067 		saddr_buf = rt->rt6i_prefsrc.addr;
4068 		if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
4069 			goto nla_put_failure;
4070 	}
4071 
4072 	memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
4073 	if (rt->rt6i_pmtu)
4074 		metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
4075 	if (rtnetlink_put_metrics(skb, metrics) < 0)
4076 		goto nla_put_failure;
4077 
4078 	if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
4079 		goto nla_put_failure;
4080 
4081 	/* For multipath routes, walk the siblings list and add
4082 	 * each as a nexthop within RTA_MULTIPATH.
4083 	 */
4084 	if (rt->rt6i_nsiblings) {
4085 		struct rt6_info *sibling, *next_sibling;
4086 		struct nlattr *mp;
4087 
4088 		mp = nla_nest_start(skb, RTA_MULTIPATH);
4089 		if (!mp)
4090 			goto nla_put_failure;
4091 
4092 		if (rt6_add_nexthop(skb, rt) < 0)
4093 			goto nla_put_failure;
4094 
4095 		list_for_each_entry_safe(sibling, next_sibling,
4096 					 &rt->rt6i_siblings, rt6i_siblings) {
4097 			if (rt6_add_nexthop(skb, sibling) < 0)
4098 				goto nla_put_failure;
4099 		}
4100 
4101 		nla_nest_end(skb, mp);
4102 	} else {
4103 		if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0)
4104 			goto nla_put_failure;
4105 	}
4106 
4107 	expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
4108 
4109 	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
4110 		goto nla_put_failure;
4111 
4112 	if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
4113 		goto nla_put_failure;
4114 
4115 
4116 	nlmsg_end(skb, nlh);
4117 	return 0;
4118 
4119 nla_put_failure:
4120 	nlmsg_cancel(skb, nlh);
4121 	return -EMSGSIZE;
4122 }
4123 
4124 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
4125 {
4126 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
4127 	struct net *net = arg->net;
4128 
4129 	if (rt == net->ipv6.ip6_null_entry)
4130 		return 0;
4131 
4132 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
4133 		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
4134 
4135 		/* user wants prefix routes only */
4136 		if (rtm->rtm_flags & RTM_F_PREFIX &&
4137 		    !(rt->rt6i_flags & RTF_PREFIX_RT)) {
4138 			/* success since this is not a prefix route */
4139 			return 1;
4140 		}
4141 	}
4142 
4143 	return rt6_fill_node(net,
4144 		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
4145 		     NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
4146 		     NLM_F_MULTI);
4147 }
4148 
4149 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4150 			      struct netlink_ext_ack *extack)
4151 {
4152 	struct net *net = sock_net(in_skb->sk);
4153 	struct nlattr *tb[RTA_MAX+1];
4154 	int err, iif = 0, oif = 0;
4155 	struct dst_entry *dst;
4156 	struct rt6_info *rt;
4157 	struct sk_buff *skb;
4158 	struct rtmsg *rtm;
4159 	struct flowi6 fl6;
4160 	bool fibmatch;
4161 
4162 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
4163 			  extack);
4164 	if (err < 0)
4165 		goto errout;
4166 
4167 	err = -EINVAL;
4168 	memset(&fl6, 0, sizeof(fl6));
4169 	rtm = nlmsg_data(nlh);
4170 	fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
4171 	fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
4172 
4173 	if (tb[RTA_SRC]) {
4174 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
4175 			goto errout;
4176 
4177 		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
4178 	}
4179 
4180 	if (tb[RTA_DST]) {
4181 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
4182 			goto errout;
4183 
4184 		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
4185 	}
4186 
4187 	if (tb[RTA_IIF])
4188 		iif = nla_get_u32(tb[RTA_IIF]);
4189 
4190 	if (tb[RTA_OIF])
4191 		oif = nla_get_u32(tb[RTA_OIF]);
4192 
4193 	if (tb[RTA_MARK])
4194 		fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
4195 
4196 	if (tb[RTA_UID])
4197 		fl6.flowi6_uid = make_kuid(current_user_ns(),
4198 					   nla_get_u32(tb[RTA_UID]));
4199 	else
4200 		fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
4201 
4202 	if (iif) {
4203 		struct net_device *dev;
4204 		int flags = 0;
4205 
4206 		rcu_read_lock();
4207 
4208 		dev = dev_get_by_index_rcu(net, iif);
4209 		if (!dev) {
4210 			rcu_read_unlock();
4211 			err = -ENODEV;
4212 			goto errout;
4213 		}
4214 
4215 		fl6.flowi6_iif = iif;
4216 
4217 		if (!ipv6_addr_any(&fl6.saddr))
4218 			flags |= RT6_LOOKUP_F_HAS_SADDR;
4219 
4220 		if (!fibmatch)
4221 			dst = ip6_route_input_lookup(net, dev, &fl6, flags);
4222 		else
4223 			dst = ip6_route_lookup(net, &fl6, 0);
4224 
4225 		rcu_read_unlock();
4226 	} else {
4227 		fl6.flowi6_oif = oif;
4228 
4229 		if (!fibmatch)
4230 			dst = ip6_route_output(net, NULL, &fl6);
4231 		else
4232 			dst = ip6_route_lookup(net, &fl6, 0);
4233 	}
4234 
4235 
4236 	rt = container_of(dst, struct rt6_info, dst);
4237 	if (rt->dst.error) {
4238 		err = rt->dst.error;
4239 		ip6_rt_put(rt);
4240 		goto errout;
4241 	}
4242 
4243 	if (rt == net->ipv6.ip6_null_entry) {
4244 		err = rt->dst.error;
4245 		ip6_rt_put(rt);
4246 		goto errout;
4247 	}
4248 
4249 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
4250 	if (!skb) {
4251 		ip6_rt_put(rt);
4252 		err = -ENOBUFS;
4253 		goto errout;
4254 	}
4255 
4256 	skb_dst_set(skb, &rt->dst);
4257 	if (fibmatch)
4258 		err = rt6_fill_node(net, skb, rt, NULL, NULL, iif,
4259 				    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
4260 				    nlh->nlmsg_seq, 0);
4261 	else
4262 		err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
4263 				    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
4264 				    nlh->nlmsg_seq, 0);
4265 	if (err < 0) {
4266 		kfree_skb(skb);
4267 		goto errout;
4268 	}
4269 
4270 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
4271 errout:
4272 	return err;
4273 }
4274 
4275 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
4276 		     unsigned int nlm_flags)
4277 {
4278 	struct sk_buff *skb;
4279 	struct net *net = info->nl_net;
4280 	u32 seq;
4281 	int err;
4282 
4283 	err = -ENOBUFS;
4284 	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
4285 
4286 	skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
4287 	if (!skb)
4288 		goto errout;
4289 
4290 	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
4291 				event, info->portid, seq, nlm_flags);
4292 	if (err < 0) {
4293 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
4294 		WARN_ON(err == -EMSGSIZE);
4295 		kfree_skb(skb);
4296 		goto errout;
4297 	}
4298 	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
4299 		    info->nlh, gfp_any());
4300 	return;
4301 errout:
4302 	if (err < 0)
4303 		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
4304 }
4305 
4306 static int ip6_route_dev_notify(struct notifier_block *this,
4307 				unsigned long event, void *ptr)
4308 {
4309 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
4310 	struct net *net = dev_net(dev);
4311 
4312 	if (!(dev->flags & IFF_LOOPBACK))
4313 		return NOTIFY_OK;
4314 
4315 	if (event == NETDEV_REGISTER) {
4316 		net->ipv6.ip6_null_entry->dst.dev = dev;
4317 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
4318 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
4319 		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
4320 		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
4321 		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
4322 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
4323 #endif
4324 	 } else if (event == NETDEV_UNREGISTER &&
4325 		    dev->reg_state != NETREG_UNREGISTERED) {
4326 		/* NETDEV_UNREGISTER could be fired for multiple times by
4327 		 * netdev_wait_allrefs(). Make sure we only call this once.
4328 		 */
4329 		in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
4330 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
4331 		in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
4332 		in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
4333 #endif
4334 	}
4335 
4336 	return NOTIFY_OK;
4337 }
4338 
4339 /*
4340  *	/proc
4341  */
4342 
4343 #ifdef CONFIG_PROC_FS
4344 
4345 static const struct file_operations ipv6_route_proc_fops = {
4346 	.owner		= THIS_MODULE,
4347 	.open		= ipv6_route_open,
4348 	.read		= seq_read,
4349 	.llseek		= seq_lseek,
4350 	.release	= seq_release_net,
4351 };
4352 
4353 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
4354 {
4355 	struct net *net = (struct net *)seq->private;
4356 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
4357 		   net->ipv6.rt6_stats->fib_nodes,
4358 		   net->ipv6.rt6_stats->fib_route_nodes,
4359 		   net->ipv6.rt6_stats->fib_rt_alloc,
4360 		   net->ipv6.rt6_stats->fib_rt_entries,
4361 		   net->ipv6.rt6_stats->fib_rt_cache,
4362 		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
4363 		   net->ipv6.rt6_stats->fib_discarded_routes);
4364 
4365 	return 0;
4366 }
4367 
4368 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
4369 {
4370 	return single_open_net(inode, file, rt6_stats_seq_show);
4371 }
4372 
4373 static const struct file_operations rt6_stats_seq_fops = {
4374 	.owner	 = THIS_MODULE,
4375 	.open	 = rt6_stats_seq_open,
4376 	.read	 = seq_read,
4377 	.llseek	 = seq_lseek,
4378 	.release = single_release_net,
4379 };
4380 #endif	/* CONFIG_PROC_FS */
4381 
4382 #ifdef CONFIG_SYSCTL
4383 
4384 static
4385 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
4386 			      void __user *buffer, size_t *lenp, loff_t *ppos)
4387 {
4388 	struct net *net;
4389 	int delay;
4390 	if (!write)
4391 		return -EINVAL;
4392 
4393 	net = (struct net *)ctl->extra1;
4394 	delay = net->ipv6.sysctl.flush_delay;
4395 	proc_dointvec(ctl, write, buffer, lenp, ppos);
4396 	fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
4397 	return 0;
4398 }
4399 
4400 struct ctl_table ipv6_route_table_template[] = {
4401 	{
4402 		.procname	=	"flush",
4403 		.data		=	&init_net.ipv6.sysctl.flush_delay,
4404 		.maxlen		=	sizeof(int),
4405 		.mode		=	0200,
4406 		.proc_handler	=	ipv6_sysctl_rtcache_flush
4407 	},
4408 	{
4409 		.procname	=	"gc_thresh",
4410 		.data		=	&ip6_dst_ops_template.gc_thresh,
4411 		.maxlen		=	sizeof(int),
4412 		.mode		=	0644,
4413 		.proc_handler	=	proc_dointvec,
4414 	},
4415 	{
4416 		.procname	=	"max_size",
4417 		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
4418 		.maxlen		=	sizeof(int),
4419 		.mode		=	0644,
4420 		.proc_handler	=	proc_dointvec,
4421 	},
4422 	{
4423 		.procname	=	"gc_min_interval",
4424 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
4425 		.maxlen		=	sizeof(int),
4426 		.mode		=	0644,
4427 		.proc_handler	=	proc_dointvec_jiffies,
4428 	},
4429 	{
4430 		.procname	=	"gc_timeout",
4431 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
4432 		.maxlen		=	sizeof(int),
4433 		.mode		=	0644,
4434 		.proc_handler	=	proc_dointvec_jiffies,
4435 	},
4436 	{
4437 		.procname	=	"gc_interval",
4438 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
4439 		.maxlen		=	sizeof(int),
4440 		.mode		=	0644,
4441 		.proc_handler	=	proc_dointvec_jiffies,
4442 	},
4443 	{
4444 		.procname	=	"gc_elasticity",
4445 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
4446 		.maxlen		=	sizeof(int),
4447 		.mode		=	0644,
4448 		.proc_handler	=	proc_dointvec,
4449 	},
4450 	{
4451 		.procname	=	"mtu_expires",
4452 		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
4453 		.maxlen		=	sizeof(int),
4454 		.mode		=	0644,
4455 		.proc_handler	=	proc_dointvec_jiffies,
4456 	},
4457 	{
4458 		.procname	=	"min_adv_mss",
4459 		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
4460 		.maxlen		=	sizeof(int),
4461 		.mode		=	0644,
4462 		.proc_handler	=	proc_dointvec,
4463 	},
4464 	{
4465 		.procname	=	"gc_min_interval_ms",
4466 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
4467 		.maxlen		=	sizeof(int),
4468 		.mode		=	0644,
4469 		.proc_handler	=	proc_dointvec_ms_jiffies,
4470 	},
4471 	{ }
4472 };
4473 
4474 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
4475 {
4476 	struct ctl_table *table;
4477 
4478 	table = kmemdup(ipv6_route_table_template,
4479 			sizeof(ipv6_route_table_template),
4480 			GFP_KERNEL);
4481 
4482 	if (table) {
4483 		table[0].data = &net->ipv6.sysctl.flush_delay;
4484 		table[0].extra1 = net;
4485 		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
4486 		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
4487 		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
4488 		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
4489 		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
4490 		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
4491 		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
4492 		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
4493 		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
4494 
4495 		/* Don't export sysctls to unprivileged users */
4496 		if (net->user_ns != &init_user_ns)
4497 			table[0].procname = NULL;
4498 	}
4499 
4500 	return table;
4501 }
4502 #endif
4503 
4504 static int __net_init ip6_route_net_init(struct net *net)
4505 {
4506 	int ret = -ENOMEM;
4507 
4508 	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
4509 	       sizeof(net->ipv6.ip6_dst_ops));
4510 
4511 	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
4512 		goto out_ip6_dst_ops;
4513 
4514 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
4515 					   sizeof(*net->ipv6.ip6_null_entry),
4516 					   GFP_KERNEL);
4517 	if (!net->ipv6.ip6_null_entry)
4518 		goto out_ip6_dst_entries;
4519 	net->ipv6.ip6_null_entry->dst.path =
4520 		(struct dst_entry *)net->ipv6.ip6_null_entry;
4521 	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
4522 	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
4523 			 ip6_template_metrics, true);
4524 
4525 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
4526 	net->ipv6.fib6_has_custom_rules = false;
4527 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
4528 					       sizeof(*net->ipv6.ip6_prohibit_entry),
4529 					       GFP_KERNEL);
4530 	if (!net->ipv6.ip6_prohibit_entry)
4531 		goto out_ip6_null_entry;
4532 	net->ipv6.ip6_prohibit_entry->dst.path =
4533 		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
4534 	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
4535 	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
4536 			 ip6_template_metrics, true);
4537 
4538 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
4539 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
4540 					       GFP_KERNEL);
4541 	if (!net->ipv6.ip6_blk_hole_entry)
4542 		goto out_ip6_prohibit_entry;
4543 	net->ipv6.ip6_blk_hole_entry->dst.path =
4544 		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
4545 	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
4546 	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
4547 			 ip6_template_metrics, true);
4548 #endif
4549 
4550 	net->ipv6.sysctl.flush_delay = 0;
4551 	net->ipv6.sysctl.ip6_rt_max_size = 4096;
4552 	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
4553 	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
4554 	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
4555 	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
4556 	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
4557 	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
4558 
4559 	net->ipv6.ip6_rt_gc_expire = 30*HZ;
4560 
4561 	ret = 0;
4562 out:
4563 	return ret;
4564 
4565 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
4566 out_ip6_prohibit_entry:
4567 	kfree(net->ipv6.ip6_prohibit_entry);
4568 out_ip6_null_entry:
4569 	kfree(net->ipv6.ip6_null_entry);
4570 #endif
4571 out_ip6_dst_entries:
4572 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
4573 out_ip6_dst_ops:
4574 	goto out;
4575 }
4576 
4577 static void __net_exit ip6_route_net_exit(struct net *net)
4578 {
4579 	kfree(net->ipv6.ip6_null_entry);
4580 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
4581 	kfree(net->ipv6.ip6_prohibit_entry);
4582 	kfree(net->ipv6.ip6_blk_hole_entry);
4583 #endif
4584 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
4585 }
4586 
4587 static int __net_init ip6_route_net_init_late(struct net *net)
4588 {
4589 #ifdef CONFIG_PROC_FS
4590 	proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
4591 	proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
4592 #endif
4593 	return 0;
4594 }
4595 
4596 static void __net_exit ip6_route_net_exit_late(struct net *net)
4597 {
4598 #ifdef CONFIG_PROC_FS
4599 	remove_proc_entry("ipv6_route", net->proc_net);
4600 	remove_proc_entry("rt6_stats", net->proc_net);
4601 #endif
4602 }
4603 
4604 static struct pernet_operations ip6_route_net_ops = {
4605 	.init = ip6_route_net_init,
4606 	.exit = ip6_route_net_exit,
4607 };
4608 
4609 static int __net_init ipv6_inetpeer_init(struct net *net)
4610 {
4611 	struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
4612 
4613 	if (!bp)
4614 		return -ENOMEM;
4615 	inet_peer_base_init(bp);
4616 	net->ipv6.peers = bp;
4617 	return 0;
4618 }
4619 
4620 static void __net_exit ipv6_inetpeer_exit(struct net *net)
4621 {
4622 	struct inet_peer_base *bp = net->ipv6.peers;
4623 
4624 	net->ipv6.peers = NULL;
4625 	inetpeer_invalidate_tree(bp);
4626 	kfree(bp);
4627 }
4628 
4629 static struct pernet_operations ipv6_inetpeer_ops = {
4630 	.init	=	ipv6_inetpeer_init,
4631 	.exit	=	ipv6_inetpeer_exit,
4632 };
4633 
4634 static struct pernet_operations ip6_route_net_late_ops = {
4635 	.init = ip6_route_net_init_late,
4636 	.exit = ip6_route_net_exit_late,
4637 };
4638 
4639 static struct notifier_block ip6_route_dev_notifier = {
4640 	.notifier_call = ip6_route_dev_notify,
4641 	.priority = ADDRCONF_NOTIFY_PRIORITY - 10,
4642 };
4643 
4644 void __init ip6_route_init_special_entries(void)
4645 {
4646 	/* Registering of the loopback is done before this portion of code,
4647 	 * the loopback reference in rt6_info will not be taken, do it
4648 	 * manually for init_net */
4649 	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
4650 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
4651   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
4652 	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
4653 	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
4654 	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
4655 	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
4656   #endif
4657 }
4658 
4659 int __init ip6_route_init(void)
4660 {
4661 	int ret;
4662 	int cpu;
4663 
4664 	ret = -ENOMEM;
4665 	ip6_dst_ops_template.kmem_cachep =
4666 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
4667 				  SLAB_HWCACHE_ALIGN, NULL);
4668 	if (!ip6_dst_ops_template.kmem_cachep)
4669 		goto out;
4670 
4671 	ret = dst_entries_init(&ip6_dst_blackhole_ops);
4672 	if (ret)
4673 		goto out_kmem_cache;
4674 
4675 	ret = register_pernet_subsys(&ipv6_inetpeer_ops);
4676 	if (ret)
4677 		goto out_dst_entries;
4678 
4679 	ret = register_pernet_subsys(&ip6_route_net_ops);
4680 	if (ret)
4681 		goto out_register_inetpeer;
4682 
4683 	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
4684 
4685 	ret = fib6_init();
4686 	if (ret)
4687 		goto out_register_subsys;
4688 
4689 	ret = xfrm6_init();
4690 	if (ret)
4691 		goto out_fib6_init;
4692 
4693 	ret = fib6_rules_init();
4694 	if (ret)
4695 		goto xfrm6_init;
4696 
4697 	ret = register_pernet_subsys(&ip6_route_net_late_ops);
4698 	if (ret)
4699 		goto fib6_rules_init;
4700 
4701 	ret = -ENOBUFS;
4702 	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, 0) ||
4703 	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, 0) ||
4704 	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL,
4705 			    RTNL_FLAG_DOIT_UNLOCKED))
4706 		goto out_register_late_subsys;
4707 
4708 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
4709 	if (ret)
4710 		goto out_register_late_subsys;
4711 
4712 	for_each_possible_cpu(cpu) {
4713 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
4714 
4715 		INIT_LIST_HEAD(&ul->head);
4716 		spin_lock_init(&ul->lock);
4717 	}
4718 
4719 out:
4720 	return ret;
4721 
4722 out_register_late_subsys:
4723 	unregister_pernet_subsys(&ip6_route_net_late_ops);
4724 fib6_rules_init:
4725 	fib6_rules_cleanup();
4726 xfrm6_init:
4727 	xfrm6_fini();
4728 out_fib6_init:
4729 	fib6_gc_cleanup();
4730 out_register_subsys:
4731 	unregister_pernet_subsys(&ip6_route_net_ops);
4732 out_register_inetpeer:
4733 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
4734 out_dst_entries:
4735 	dst_entries_destroy(&ip6_dst_blackhole_ops);
4736 out_kmem_cache:
4737 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
4738 	goto out;
4739 }
4740 
4741 void ip6_route_cleanup(void)
4742 {
4743 	unregister_netdevice_notifier(&ip6_route_dev_notifier);
4744 	unregister_pernet_subsys(&ip6_route_net_late_ops);
4745 	fib6_rules_cleanup();
4746 	xfrm6_fini();
4747 	fib6_gc_cleanup();
4748 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
4749 	unregister_pernet_subsys(&ip6_route_net_ops);
4750 	dst_entries_destroy(&ip6_dst_blackhole_ops);
4751 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
4752 }
4753