xref: /openbmc/linux/net/ipv6/route.c (revision 711aab1d)
1 /*
2  *	Linux INET6 implementation
3  *	FIB front-end.
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13 
14 /*	Changes:
15  *
16  *	YOSHIFUJI Hideaki @USAGI
17  *		reworked default router selection.
18  *		- respect outgoing interface
19  *		- select from (probably) reachable routers (i.e.
20  *		routers in REACHABLE, STALE, DELAY or PROBE states).
21  *		- always select the same router if it is (probably)
22  *		reachable.  otherwise, round-robin the list.
23  *	Ville Nuorvala
24  *		Fixed routing subtrees.
25  */
26 
27 #define pr_fmt(fmt) "IPv6: " fmt
28 
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/dst_metadata.h>
58 #include <net/xfrm.h>
59 #include <net/netevent.h>
60 #include <net/netlink.h>
61 #include <net/nexthop.h>
62 #include <net/lwtunnel.h>
63 #include <net/ip_tunnels.h>
64 #include <net/l3mdev.h>
65 #include <trace/events/fib6.h>
66 
67 #include <linux/uaccess.h>
68 
69 #ifdef CONFIG_SYSCTL
70 #include <linux/sysctl.h>
71 #endif
72 
73 enum rt6_nud_state {
74 	RT6_NUD_FAIL_HARD = -3,
75 	RT6_NUD_FAIL_PROBE = -2,
76 	RT6_NUD_FAIL_DO_RR = -1,
77 	RT6_NUD_SUCCEED = 1
78 };
79 
80 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
81 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
82 static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
83 static unsigned int	 ip6_mtu(const struct dst_entry *dst);
84 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
85 static void		ip6_dst_destroy(struct dst_entry *);
86 static void		ip6_dst_ifdown(struct dst_entry *,
87 				       struct net_device *dev, int how);
88 static int		 ip6_dst_gc(struct dst_ops *ops);
89 
90 static int		ip6_pkt_discard(struct sk_buff *skb);
91 static int		ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
92 static int		ip6_pkt_prohibit(struct sk_buff *skb);
93 static int		ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
94 static void		ip6_link_failure(struct sk_buff *skb);
95 static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
96 					   struct sk_buff *skb, u32 mtu);
97 static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
98 					struct sk_buff *skb);
99 static void		rt6_dst_from_metrics_check(struct rt6_info *rt);
100 static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
101 static size_t rt6_nlmsg_size(struct rt6_info *rt);
102 static int rt6_fill_node(struct net *net,
103 			 struct sk_buff *skb, struct rt6_info *rt,
104 			 struct in6_addr *dst, struct in6_addr *src,
105 			 int iif, int type, u32 portid, u32 seq,
106 			 unsigned int flags);
107 
108 #ifdef CONFIG_IPV6_ROUTE_INFO
109 static struct rt6_info *rt6_add_route_info(struct net *net,
110 					   const struct in6_addr *prefix, int prefixlen,
111 					   const struct in6_addr *gwaddr,
112 					   struct net_device *dev,
113 					   unsigned int pref);
114 static struct rt6_info *rt6_get_route_info(struct net *net,
115 					   const struct in6_addr *prefix, int prefixlen,
116 					   const struct in6_addr *gwaddr,
117 					   struct net_device *dev);
118 #endif
119 
120 struct uncached_list {
121 	spinlock_t		lock;
122 	struct list_head	head;
123 };
124 
125 static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
126 
127 static void rt6_uncached_list_add(struct rt6_info *rt)
128 {
129 	struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
130 
131 	rt->rt6i_uncached_list = ul;
132 
133 	spin_lock_bh(&ul->lock);
134 	list_add_tail(&rt->rt6i_uncached, &ul->head);
135 	spin_unlock_bh(&ul->lock);
136 }
137 
138 static void rt6_uncached_list_del(struct rt6_info *rt)
139 {
140 	if (!list_empty(&rt->rt6i_uncached)) {
141 		struct uncached_list *ul = rt->rt6i_uncached_list;
142 
143 		spin_lock_bh(&ul->lock);
144 		list_del(&rt->rt6i_uncached);
145 		spin_unlock_bh(&ul->lock);
146 	}
147 }
148 
149 static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
150 {
151 	struct net_device *loopback_dev = net->loopback_dev;
152 	int cpu;
153 
154 	if (dev == loopback_dev)
155 		return;
156 
157 	for_each_possible_cpu(cpu) {
158 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
159 		struct rt6_info *rt;
160 
161 		spin_lock_bh(&ul->lock);
162 		list_for_each_entry(rt, &ul->head, rt6i_uncached) {
163 			struct inet6_dev *rt_idev = rt->rt6i_idev;
164 			struct net_device *rt_dev = rt->dst.dev;
165 
166 			if (rt_idev->dev == dev) {
167 				rt->rt6i_idev = in6_dev_get(loopback_dev);
168 				in6_dev_put(rt_idev);
169 			}
170 
171 			if (rt_dev == dev) {
172 				rt->dst.dev = loopback_dev;
173 				dev_hold(rt->dst.dev);
174 				dev_put(rt_dev);
175 			}
176 		}
177 		spin_unlock_bh(&ul->lock);
178 	}
179 }
180 
181 static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
182 {
183 	return dst_metrics_write_ptr(rt->dst.from);
184 }
185 
186 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
187 {
188 	struct rt6_info *rt = (struct rt6_info *)dst;
189 
190 	if (rt->rt6i_flags & RTF_PCPU)
191 		return rt6_pcpu_cow_metrics(rt);
192 	else if (rt->rt6i_flags & RTF_CACHE)
193 		return NULL;
194 	else
195 		return dst_cow_metrics_generic(dst, old);
196 }
197 
198 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
199 					     struct sk_buff *skb,
200 					     const void *daddr)
201 {
202 	struct in6_addr *p = &rt->rt6i_gateway;
203 
204 	if (!ipv6_addr_any(p))
205 		return (const void *) p;
206 	else if (skb)
207 		return &ipv6_hdr(skb)->daddr;
208 	return daddr;
209 }
210 
211 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
212 					  struct sk_buff *skb,
213 					  const void *daddr)
214 {
215 	struct rt6_info *rt = (struct rt6_info *) dst;
216 	struct neighbour *n;
217 
218 	daddr = choose_neigh_daddr(rt, skb, daddr);
219 	n = __ipv6_neigh_lookup(dst->dev, daddr);
220 	if (n)
221 		return n;
222 	return neigh_create(&nd_tbl, daddr, dst->dev);
223 }
224 
225 static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
226 {
227 	struct net_device *dev = dst->dev;
228 	struct rt6_info *rt = (struct rt6_info *)dst;
229 
230 	daddr = choose_neigh_daddr(rt, NULL, daddr);
231 	if (!daddr)
232 		return;
233 	if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
234 		return;
235 	if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
236 		return;
237 	__ipv6_confirm_neigh(dev, daddr);
238 }
239 
240 static struct dst_ops ip6_dst_ops_template = {
241 	.family			=	AF_INET6,
242 	.gc			=	ip6_dst_gc,
243 	.gc_thresh		=	1024,
244 	.check			=	ip6_dst_check,
245 	.default_advmss		=	ip6_default_advmss,
246 	.mtu			=	ip6_mtu,
247 	.cow_metrics		=	ipv6_cow_metrics,
248 	.destroy		=	ip6_dst_destroy,
249 	.ifdown			=	ip6_dst_ifdown,
250 	.negative_advice	=	ip6_negative_advice,
251 	.link_failure		=	ip6_link_failure,
252 	.update_pmtu		=	ip6_rt_update_pmtu,
253 	.redirect		=	rt6_do_redirect,
254 	.local_out		=	__ip6_local_out,
255 	.neigh_lookup		=	ip6_neigh_lookup,
256 	.confirm_neigh		=	ip6_confirm_neigh,
257 };
258 
259 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
260 {
261 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
262 
263 	return mtu ? : dst->dev->mtu;
264 }
265 
266 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
267 					 struct sk_buff *skb, u32 mtu)
268 {
269 }
270 
271 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
272 				      struct sk_buff *skb)
273 {
274 }
275 
276 static struct dst_ops ip6_dst_blackhole_ops = {
277 	.family			=	AF_INET6,
278 	.destroy		=	ip6_dst_destroy,
279 	.check			=	ip6_dst_check,
280 	.mtu			=	ip6_blackhole_mtu,
281 	.default_advmss		=	ip6_default_advmss,
282 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
283 	.redirect		=	ip6_rt_blackhole_redirect,
284 	.cow_metrics		=	dst_cow_metrics_generic,
285 	.neigh_lookup		=	ip6_neigh_lookup,
286 };
287 
288 static const u32 ip6_template_metrics[RTAX_MAX] = {
289 	[RTAX_HOPLIMIT - 1] = 0,
290 };
291 
292 static const struct rt6_info ip6_null_entry_template = {
293 	.dst = {
294 		.__refcnt	= ATOMIC_INIT(1),
295 		.__use		= 1,
296 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
297 		.error		= -ENETUNREACH,
298 		.input		= ip6_pkt_discard,
299 		.output		= ip6_pkt_discard_out,
300 	},
301 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
302 	.rt6i_protocol  = RTPROT_KERNEL,
303 	.rt6i_metric	= ~(u32) 0,
304 	.rt6i_ref	= ATOMIC_INIT(1),
305 };
306 
307 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
308 
309 static const struct rt6_info ip6_prohibit_entry_template = {
310 	.dst = {
311 		.__refcnt	= ATOMIC_INIT(1),
312 		.__use		= 1,
313 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
314 		.error		= -EACCES,
315 		.input		= ip6_pkt_prohibit,
316 		.output		= ip6_pkt_prohibit_out,
317 	},
318 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
319 	.rt6i_protocol  = RTPROT_KERNEL,
320 	.rt6i_metric	= ~(u32) 0,
321 	.rt6i_ref	= ATOMIC_INIT(1),
322 };
323 
324 static const struct rt6_info ip6_blk_hole_entry_template = {
325 	.dst = {
326 		.__refcnt	= ATOMIC_INIT(1),
327 		.__use		= 1,
328 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
329 		.error		= -EINVAL,
330 		.input		= dst_discard,
331 		.output		= dst_discard_out,
332 	},
333 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
334 	.rt6i_protocol  = RTPROT_KERNEL,
335 	.rt6i_metric	= ~(u32) 0,
336 	.rt6i_ref	= ATOMIC_INIT(1),
337 };
338 
339 #endif
340 
341 static void rt6_info_init(struct rt6_info *rt)
342 {
343 	struct dst_entry *dst = &rt->dst;
344 
345 	memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
346 	INIT_LIST_HEAD(&rt->rt6i_siblings);
347 	INIT_LIST_HEAD(&rt->rt6i_uncached);
348 }
349 
350 /* allocate dst with ip6_dst_ops */
351 static struct rt6_info *__ip6_dst_alloc(struct net *net,
352 					struct net_device *dev,
353 					int flags)
354 {
355 	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
356 					1, DST_OBSOLETE_FORCE_CHK, flags);
357 
358 	if (rt)
359 		rt6_info_init(rt);
360 
361 	return rt;
362 }
363 
364 struct rt6_info *ip6_dst_alloc(struct net *net,
365 			       struct net_device *dev,
366 			       int flags)
367 {
368 	struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
369 
370 	if (rt) {
371 		rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
372 		if (rt->rt6i_pcpu) {
373 			int cpu;
374 
375 			for_each_possible_cpu(cpu) {
376 				struct rt6_info **p;
377 
378 				p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
379 				/* no one shares rt */
380 				*p =  NULL;
381 			}
382 		} else {
383 			dst_release_immediate(&rt->dst);
384 			return NULL;
385 		}
386 	}
387 
388 	return rt;
389 }
390 EXPORT_SYMBOL(ip6_dst_alloc);
391 
392 static void ip6_dst_destroy(struct dst_entry *dst)
393 {
394 	struct rt6_info *rt = (struct rt6_info *)dst;
395 	struct dst_entry *from = dst->from;
396 	struct inet6_dev *idev;
397 
398 	dst_destroy_metrics_generic(dst);
399 	free_percpu(rt->rt6i_pcpu);
400 	rt6_uncached_list_del(rt);
401 
402 	idev = rt->rt6i_idev;
403 	if (idev) {
404 		rt->rt6i_idev = NULL;
405 		in6_dev_put(idev);
406 	}
407 
408 	dst->from = NULL;
409 	dst_release(from);
410 }
411 
412 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
413 			   int how)
414 {
415 	struct rt6_info *rt = (struct rt6_info *)dst;
416 	struct inet6_dev *idev = rt->rt6i_idev;
417 	struct net_device *loopback_dev =
418 		dev_net(dev)->loopback_dev;
419 
420 	if (idev && idev->dev != loopback_dev) {
421 		struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
422 		if (loopback_idev) {
423 			rt->rt6i_idev = loopback_idev;
424 			in6_dev_put(idev);
425 		}
426 	}
427 }
428 
429 static bool __rt6_check_expired(const struct rt6_info *rt)
430 {
431 	if (rt->rt6i_flags & RTF_EXPIRES)
432 		return time_after(jiffies, rt->dst.expires);
433 	else
434 		return false;
435 }
436 
437 static bool rt6_check_expired(const struct rt6_info *rt)
438 {
439 	if (rt->rt6i_flags & RTF_EXPIRES) {
440 		if (time_after(jiffies, rt->dst.expires))
441 			return true;
442 	} else if (rt->dst.from) {
443 		return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
444 		       rt6_check_expired((struct rt6_info *)rt->dst.from);
445 	}
446 	return false;
447 }
448 
449 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
450 					     struct flowi6 *fl6, int oif,
451 					     int strict)
452 {
453 	struct rt6_info *sibling, *next_sibling;
454 	int route_choosen;
455 
456 	/* We might have already computed the hash for ICMPv6 errors. In such
457 	 * case it will always be non-zero. Otherwise now is the time to do it.
458 	 */
459 	if (!fl6->mp_hash)
460 		fl6->mp_hash = rt6_multipath_hash(fl6, NULL);
461 
462 	route_choosen = fl6->mp_hash % (match->rt6i_nsiblings + 1);
463 	/* Don't change the route, if route_choosen == 0
464 	 * (siblings does not include ourself)
465 	 */
466 	if (route_choosen)
467 		list_for_each_entry_safe(sibling, next_sibling,
468 				&match->rt6i_siblings, rt6i_siblings) {
469 			route_choosen--;
470 			if (route_choosen == 0) {
471 				if (rt6_score_route(sibling, oif, strict) < 0)
472 					break;
473 				match = sibling;
474 				break;
475 			}
476 		}
477 	return match;
478 }
479 
480 /*
481  *	Route lookup. Any table->tb6_lock is implied.
482  */
483 
484 static inline struct rt6_info *rt6_device_match(struct net *net,
485 						    struct rt6_info *rt,
486 						    const struct in6_addr *saddr,
487 						    int oif,
488 						    int flags)
489 {
490 	struct rt6_info *local = NULL;
491 	struct rt6_info *sprt;
492 
493 	if (!oif && ipv6_addr_any(saddr))
494 		goto out;
495 
496 	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
497 		struct net_device *dev = sprt->dst.dev;
498 
499 		if (oif) {
500 			if (dev->ifindex == oif)
501 				return sprt;
502 			if (dev->flags & IFF_LOOPBACK) {
503 				if (!sprt->rt6i_idev ||
504 				    sprt->rt6i_idev->dev->ifindex != oif) {
505 					if (flags & RT6_LOOKUP_F_IFACE)
506 						continue;
507 					if (local &&
508 					    local->rt6i_idev->dev->ifindex == oif)
509 						continue;
510 				}
511 				local = sprt;
512 			}
513 		} else {
514 			if (ipv6_chk_addr(net, saddr, dev,
515 					  flags & RT6_LOOKUP_F_IFACE))
516 				return sprt;
517 		}
518 	}
519 
520 	if (oif) {
521 		if (local)
522 			return local;
523 
524 		if (flags & RT6_LOOKUP_F_IFACE)
525 			return net->ipv6.ip6_null_entry;
526 	}
527 out:
528 	return rt;
529 }
530 
531 #ifdef CONFIG_IPV6_ROUTER_PREF
532 struct __rt6_probe_work {
533 	struct work_struct work;
534 	struct in6_addr target;
535 	struct net_device *dev;
536 };
537 
538 static void rt6_probe_deferred(struct work_struct *w)
539 {
540 	struct in6_addr mcaddr;
541 	struct __rt6_probe_work *work =
542 		container_of(w, struct __rt6_probe_work, work);
543 
544 	addrconf_addr_solict_mult(&work->target, &mcaddr);
545 	ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
546 	dev_put(work->dev);
547 	kfree(work);
548 }
549 
550 static void rt6_probe(struct rt6_info *rt)
551 {
552 	struct __rt6_probe_work *work;
553 	struct neighbour *neigh;
554 	/*
555 	 * Okay, this does not seem to be appropriate
556 	 * for now, however, we need to check if it
557 	 * is really so; aka Router Reachability Probing.
558 	 *
559 	 * Router Reachability Probe MUST be rate-limited
560 	 * to no more than one per minute.
561 	 */
562 	if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
563 		return;
564 	rcu_read_lock_bh();
565 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
566 	if (neigh) {
567 		if (neigh->nud_state & NUD_VALID)
568 			goto out;
569 
570 		work = NULL;
571 		write_lock(&neigh->lock);
572 		if (!(neigh->nud_state & NUD_VALID) &&
573 		    time_after(jiffies,
574 			       neigh->updated +
575 			       rt->rt6i_idev->cnf.rtr_probe_interval)) {
576 			work = kmalloc(sizeof(*work), GFP_ATOMIC);
577 			if (work)
578 				__neigh_set_probe_once(neigh);
579 		}
580 		write_unlock(&neigh->lock);
581 	} else {
582 		work = kmalloc(sizeof(*work), GFP_ATOMIC);
583 	}
584 
585 	if (work) {
586 		INIT_WORK(&work->work, rt6_probe_deferred);
587 		work->target = rt->rt6i_gateway;
588 		dev_hold(rt->dst.dev);
589 		work->dev = rt->dst.dev;
590 		schedule_work(&work->work);
591 	}
592 
593 out:
594 	rcu_read_unlock_bh();
595 }
596 #else
597 static inline void rt6_probe(struct rt6_info *rt)
598 {
599 }
600 #endif
601 
602 /*
603  * Default Router Selection (RFC 2461 6.3.6)
604  */
605 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
606 {
607 	struct net_device *dev = rt->dst.dev;
608 	if (!oif || dev->ifindex == oif)
609 		return 2;
610 	if ((dev->flags & IFF_LOOPBACK) &&
611 	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
612 		return 1;
613 	return 0;
614 }
615 
616 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
617 {
618 	struct neighbour *neigh;
619 	enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
620 
621 	if (rt->rt6i_flags & RTF_NONEXTHOP ||
622 	    !(rt->rt6i_flags & RTF_GATEWAY))
623 		return RT6_NUD_SUCCEED;
624 
625 	rcu_read_lock_bh();
626 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
627 	if (neigh) {
628 		read_lock(&neigh->lock);
629 		if (neigh->nud_state & NUD_VALID)
630 			ret = RT6_NUD_SUCCEED;
631 #ifdef CONFIG_IPV6_ROUTER_PREF
632 		else if (!(neigh->nud_state & NUD_FAILED))
633 			ret = RT6_NUD_SUCCEED;
634 		else
635 			ret = RT6_NUD_FAIL_PROBE;
636 #endif
637 		read_unlock(&neigh->lock);
638 	} else {
639 		ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
640 		      RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
641 	}
642 	rcu_read_unlock_bh();
643 
644 	return ret;
645 }
646 
647 static int rt6_score_route(struct rt6_info *rt, int oif,
648 			   int strict)
649 {
650 	int m;
651 
652 	m = rt6_check_dev(rt, oif);
653 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
654 		return RT6_NUD_FAIL_HARD;
655 #ifdef CONFIG_IPV6_ROUTER_PREF
656 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
657 #endif
658 	if (strict & RT6_LOOKUP_F_REACHABLE) {
659 		int n = rt6_check_neigh(rt);
660 		if (n < 0)
661 			return n;
662 	}
663 	return m;
664 }
665 
666 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
667 				   int *mpri, struct rt6_info *match,
668 				   bool *do_rr)
669 {
670 	int m;
671 	bool match_do_rr = false;
672 	struct inet6_dev *idev = rt->rt6i_idev;
673 	struct net_device *dev = rt->dst.dev;
674 
675 	if (dev && !netif_carrier_ok(dev) &&
676 	    idev->cnf.ignore_routes_with_linkdown &&
677 	    !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
678 		goto out;
679 
680 	if (rt6_check_expired(rt))
681 		goto out;
682 
683 	m = rt6_score_route(rt, oif, strict);
684 	if (m == RT6_NUD_FAIL_DO_RR) {
685 		match_do_rr = true;
686 		m = 0; /* lowest valid score */
687 	} else if (m == RT6_NUD_FAIL_HARD) {
688 		goto out;
689 	}
690 
691 	if (strict & RT6_LOOKUP_F_REACHABLE)
692 		rt6_probe(rt);
693 
694 	/* note that m can be RT6_NUD_FAIL_PROBE at this point */
695 	if (m > *mpri) {
696 		*do_rr = match_do_rr;
697 		*mpri = m;
698 		match = rt;
699 	}
700 out:
701 	return match;
702 }
703 
704 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
705 				     struct rt6_info *rr_head,
706 				     u32 metric, int oif, int strict,
707 				     bool *do_rr)
708 {
709 	struct rt6_info *rt, *match, *cont;
710 	int mpri = -1;
711 
712 	match = NULL;
713 	cont = NULL;
714 	for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
715 		if (rt->rt6i_metric != metric) {
716 			cont = rt;
717 			break;
718 		}
719 
720 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
721 	}
722 
723 	for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
724 		if (rt->rt6i_metric != metric) {
725 			cont = rt;
726 			break;
727 		}
728 
729 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
730 	}
731 
732 	if (match || !cont)
733 		return match;
734 
735 	for (rt = cont; rt; rt = rt->dst.rt6_next)
736 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
737 
738 	return match;
739 }
740 
741 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
742 {
743 	struct rt6_info *match, *rt0;
744 	struct net *net;
745 	bool do_rr = false;
746 
747 	rt0 = fn->rr_ptr;
748 	if (!rt0)
749 		fn->rr_ptr = rt0 = fn->leaf;
750 
751 	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
752 			     &do_rr);
753 
754 	if (do_rr) {
755 		struct rt6_info *next = rt0->dst.rt6_next;
756 
757 		/* no entries matched; do round-robin */
758 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
759 			next = fn->leaf;
760 
761 		if (next != rt0)
762 			fn->rr_ptr = next;
763 	}
764 
765 	net = dev_net(rt0->dst.dev);
766 	return match ? match : net->ipv6.ip6_null_entry;
767 }
768 
769 static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
770 {
771 	return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
772 }
773 
774 #ifdef CONFIG_IPV6_ROUTE_INFO
775 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
776 		  const struct in6_addr *gwaddr)
777 {
778 	struct net *net = dev_net(dev);
779 	struct route_info *rinfo = (struct route_info *) opt;
780 	struct in6_addr prefix_buf, *prefix;
781 	unsigned int pref;
782 	unsigned long lifetime;
783 	struct rt6_info *rt;
784 
785 	if (len < sizeof(struct route_info)) {
786 		return -EINVAL;
787 	}
788 
789 	/* Sanity check for prefix_len and length */
790 	if (rinfo->length > 3) {
791 		return -EINVAL;
792 	} else if (rinfo->prefix_len > 128) {
793 		return -EINVAL;
794 	} else if (rinfo->prefix_len > 64) {
795 		if (rinfo->length < 2) {
796 			return -EINVAL;
797 		}
798 	} else if (rinfo->prefix_len > 0) {
799 		if (rinfo->length < 1) {
800 			return -EINVAL;
801 		}
802 	}
803 
804 	pref = rinfo->route_pref;
805 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
806 		return -EINVAL;
807 
808 	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
809 
810 	if (rinfo->length == 3)
811 		prefix = (struct in6_addr *)rinfo->prefix;
812 	else {
813 		/* this function is safe */
814 		ipv6_addr_prefix(&prefix_buf,
815 				 (struct in6_addr *)rinfo->prefix,
816 				 rinfo->prefix_len);
817 		prefix = &prefix_buf;
818 	}
819 
820 	if (rinfo->prefix_len == 0)
821 		rt = rt6_get_dflt_router(gwaddr, dev);
822 	else
823 		rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
824 					gwaddr, dev);
825 
826 	if (rt && !lifetime) {
827 		ip6_del_rt(rt);
828 		rt = NULL;
829 	}
830 
831 	if (!rt && lifetime)
832 		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
833 					dev, pref);
834 	else if (rt)
835 		rt->rt6i_flags = RTF_ROUTEINFO |
836 				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
837 
838 	if (rt) {
839 		if (!addrconf_finite_timeout(lifetime))
840 			rt6_clean_expires(rt);
841 		else
842 			rt6_set_expires(rt, jiffies + HZ * lifetime);
843 
844 		ip6_rt_put(rt);
845 	}
846 	return 0;
847 }
848 #endif
849 
850 static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
851 					struct in6_addr *saddr)
852 {
853 	struct fib6_node *pn;
854 	while (1) {
855 		if (fn->fn_flags & RTN_TL_ROOT)
856 			return NULL;
857 		pn = fn->parent;
858 		if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
859 			fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
860 		else
861 			fn = pn;
862 		if (fn->fn_flags & RTN_RTINFO)
863 			return fn;
864 	}
865 }
866 
867 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
868 					     struct fib6_table *table,
869 					     struct flowi6 *fl6, int flags)
870 {
871 	struct fib6_node *fn;
872 	struct rt6_info *rt;
873 
874 	read_lock_bh(&table->tb6_lock);
875 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
876 restart:
877 	rt = fn->leaf;
878 	rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
879 	if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
880 		rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
881 	if (rt == net->ipv6.ip6_null_entry) {
882 		fn = fib6_backtrack(fn, &fl6->saddr);
883 		if (fn)
884 			goto restart;
885 	}
886 	dst_use(&rt->dst, jiffies);
887 	read_unlock_bh(&table->tb6_lock);
888 
889 	trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
890 
891 	return rt;
892 
893 }
894 
895 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
896 				    int flags)
897 {
898 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
899 }
900 EXPORT_SYMBOL_GPL(ip6_route_lookup);
901 
902 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
903 			    const struct in6_addr *saddr, int oif, int strict)
904 {
905 	struct flowi6 fl6 = {
906 		.flowi6_oif = oif,
907 		.daddr = *daddr,
908 	};
909 	struct dst_entry *dst;
910 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
911 
912 	if (saddr) {
913 		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
914 		flags |= RT6_LOOKUP_F_HAS_SADDR;
915 	}
916 
917 	dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
918 	if (dst->error == 0)
919 		return (struct rt6_info *) dst;
920 
921 	dst_release(dst);
922 
923 	return NULL;
924 }
925 EXPORT_SYMBOL(rt6_lookup);
926 
927 /* ip6_ins_rt is called with FREE table->tb6_lock.
928  * It takes new route entry, the addition fails by any reason the
929  * route is released.
930  * Caller must hold dst before calling it.
931  */
932 
933 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
934 			struct mx6_config *mxc,
935 			struct netlink_ext_ack *extack)
936 {
937 	int err;
938 	struct fib6_table *table;
939 
940 	table = rt->rt6i_table;
941 	write_lock_bh(&table->tb6_lock);
942 	err = fib6_add(&table->tb6_root, rt, info, mxc, extack);
943 	write_unlock_bh(&table->tb6_lock);
944 
945 	return err;
946 }
947 
948 int ip6_ins_rt(struct rt6_info *rt)
949 {
950 	struct nl_info info = {	.nl_net = dev_net(rt->dst.dev), };
951 	struct mx6_config mxc = { .mx = NULL, };
952 
953 	/* Hold dst to account for the reference from the fib6 tree */
954 	dst_hold(&rt->dst);
955 	return __ip6_ins_rt(rt, &info, &mxc, NULL);
956 }
957 
958 /* called with rcu_lock held */
959 static struct net_device *ip6_rt_get_dev_rcu(struct rt6_info *rt)
960 {
961 	struct net_device *dev = rt->dst.dev;
962 
963 	if (rt->rt6i_flags & RTF_LOCAL) {
964 		/* for copies of local routes, dst->dev needs to be the
965 		 * device if it is a master device, the master device if
966 		 * device is enslaved, and the loopback as the default
967 		 */
968 		if (netif_is_l3_slave(dev) &&
969 		    !rt6_need_strict(&rt->rt6i_dst.addr))
970 			dev = l3mdev_master_dev_rcu(dev);
971 		else if (!netif_is_l3_master(dev))
972 			dev = dev_net(dev)->loopback_dev;
973 		/* last case is netif_is_l3_master(dev) is true in which
974 		 * case we want dev returned to be dev
975 		 */
976 	}
977 
978 	return dev;
979 }
980 
981 static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
982 					   const struct in6_addr *daddr,
983 					   const struct in6_addr *saddr)
984 {
985 	struct net_device *dev;
986 	struct rt6_info *rt;
987 
988 	/*
989 	 *	Clone the route.
990 	 */
991 
992 	if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
993 		ort = (struct rt6_info *)ort->dst.from;
994 
995 	rcu_read_lock();
996 	dev = ip6_rt_get_dev_rcu(ort);
997 	rt = __ip6_dst_alloc(dev_net(dev), dev, 0);
998 	rcu_read_unlock();
999 	if (!rt)
1000 		return NULL;
1001 
1002 	ip6_rt_copy_init(rt, ort);
1003 	rt->rt6i_flags |= RTF_CACHE;
1004 	rt->rt6i_metric = 0;
1005 	rt->dst.flags |= DST_HOST;
1006 	rt->rt6i_dst.addr = *daddr;
1007 	rt->rt6i_dst.plen = 128;
1008 
1009 	if (!rt6_is_gw_or_nonexthop(ort)) {
1010 		if (ort->rt6i_dst.plen != 128 &&
1011 		    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
1012 			rt->rt6i_flags |= RTF_ANYCAST;
1013 #ifdef CONFIG_IPV6_SUBTREES
1014 		if (rt->rt6i_src.plen && saddr) {
1015 			rt->rt6i_src.addr = *saddr;
1016 			rt->rt6i_src.plen = 128;
1017 		}
1018 #endif
1019 	}
1020 
1021 	return rt;
1022 }
1023 
1024 static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
1025 {
1026 	struct net_device *dev;
1027 	struct rt6_info *pcpu_rt;
1028 
1029 	rcu_read_lock();
1030 	dev = ip6_rt_get_dev_rcu(rt);
1031 	pcpu_rt = __ip6_dst_alloc(dev_net(dev), dev, rt->dst.flags);
1032 	rcu_read_unlock();
1033 	if (!pcpu_rt)
1034 		return NULL;
1035 	ip6_rt_copy_init(pcpu_rt, rt);
1036 	pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
1037 	pcpu_rt->rt6i_flags |= RTF_PCPU;
1038 	return pcpu_rt;
1039 }
1040 
1041 /* It should be called with read_lock_bh(&tb6_lock) acquired */
1042 static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
1043 {
1044 	struct rt6_info *pcpu_rt, **p;
1045 
1046 	p = this_cpu_ptr(rt->rt6i_pcpu);
1047 	pcpu_rt = *p;
1048 
1049 	if (pcpu_rt) {
1050 		dst_hold(&pcpu_rt->dst);
1051 		rt6_dst_from_metrics_check(pcpu_rt);
1052 	}
1053 	return pcpu_rt;
1054 }
1055 
1056 static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
1057 {
1058 	struct fib6_table *table = rt->rt6i_table;
1059 	struct rt6_info *pcpu_rt, *prev, **p;
1060 
1061 	pcpu_rt = ip6_rt_pcpu_alloc(rt);
1062 	if (!pcpu_rt) {
1063 		struct net *net = dev_net(rt->dst.dev);
1064 
1065 		dst_hold(&net->ipv6.ip6_null_entry->dst);
1066 		return net->ipv6.ip6_null_entry;
1067 	}
1068 
1069 	read_lock_bh(&table->tb6_lock);
1070 	if (rt->rt6i_pcpu) {
1071 		p = this_cpu_ptr(rt->rt6i_pcpu);
1072 		prev = cmpxchg(p, NULL, pcpu_rt);
1073 		if (prev) {
1074 			/* If someone did it before us, return prev instead */
1075 			dst_release_immediate(&pcpu_rt->dst);
1076 			pcpu_rt = prev;
1077 		}
1078 	} else {
1079 		/* rt has been removed from the fib6 tree
1080 		 * before we have a chance to acquire the read_lock.
1081 		 * In this case, don't brother to create a pcpu rt
1082 		 * since rt is going away anyway.  The next
1083 		 * dst_check() will trigger a re-lookup.
1084 		 */
1085 		dst_release_immediate(&pcpu_rt->dst);
1086 		pcpu_rt = rt;
1087 	}
1088 	dst_hold(&pcpu_rt->dst);
1089 	rt6_dst_from_metrics_check(pcpu_rt);
1090 	read_unlock_bh(&table->tb6_lock);
1091 	return pcpu_rt;
1092 }
1093 
1094 struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1095 			       int oif, struct flowi6 *fl6, int flags)
1096 {
1097 	struct fib6_node *fn, *saved_fn;
1098 	struct rt6_info *rt;
1099 	int strict = 0;
1100 
1101 	strict |= flags & RT6_LOOKUP_F_IFACE;
1102 	strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
1103 	if (net->ipv6.devconf_all->forwarding == 0)
1104 		strict |= RT6_LOOKUP_F_REACHABLE;
1105 
1106 	read_lock_bh(&table->tb6_lock);
1107 
1108 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1109 	saved_fn = fn;
1110 
1111 	if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1112 		oif = 0;
1113 
1114 redo_rt6_select:
1115 	rt = rt6_select(fn, oif, strict);
1116 	if (rt->rt6i_nsiblings)
1117 		rt = rt6_multipath_select(rt, fl6, oif, strict);
1118 	if (rt == net->ipv6.ip6_null_entry) {
1119 		fn = fib6_backtrack(fn, &fl6->saddr);
1120 		if (fn)
1121 			goto redo_rt6_select;
1122 		else if (strict & RT6_LOOKUP_F_REACHABLE) {
1123 			/* also consider unreachable route */
1124 			strict &= ~RT6_LOOKUP_F_REACHABLE;
1125 			fn = saved_fn;
1126 			goto redo_rt6_select;
1127 		}
1128 	}
1129 
1130 
1131 	if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
1132 		dst_use(&rt->dst, jiffies);
1133 		read_unlock_bh(&table->tb6_lock);
1134 
1135 		rt6_dst_from_metrics_check(rt);
1136 
1137 		trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
1138 		return rt;
1139 	} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1140 			    !(rt->rt6i_flags & RTF_GATEWAY))) {
1141 		/* Create a RTF_CACHE clone which will not be
1142 		 * owned by the fib6 tree.  It is for the special case where
1143 		 * the daddr in the skb during the neighbor look-up is different
1144 		 * from the fl6->daddr used to look-up route here.
1145 		 */
1146 
1147 		struct rt6_info *uncached_rt;
1148 
1149 		dst_use(&rt->dst, jiffies);
1150 		read_unlock_bh(&table->tb6_lock);
1151 
1152 		uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1153 		dst_release(&rt->dst);
1154 
1155 		if (uncached_rt) {
1156 			/* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
1157 			 * No need for another dst_hold()
1158 			 */
1159 			rt6_uncached_list_add(uncached_rt);
1160 		} else {
1161 			uncached_rt = net->ipv6.ip6_null_entry;
1162 			dst_hold(&uncached_rt->dst);
1163 		}
1164 
1165 		trace_fib6_table_lookup(net, uncached_rt, table->tb6_id, fl6);
1166 		return uncached_rt;
1167 
1168 	} else {
1169 		/* Get a percpu copy */
1170 
1171 		struct rt6_info *pcpu_rt;
1172 
1173 		rt->dst.lastuse = jiffies;
1174 		rt->dst.__use++;
1175 		pcpu_rt = rt6_get_pcpu_route(rt);
1176 
1177 		if (pcpu_rt) {
1178 			read_unlock_bh(&table->tb6_lock);
1179 		} else {
1180 			/* We have to do the read_unlock first
1181 			 * because rt6_make_pcpu_route() may trigger
1182 			 * ip6_dst_gc() which will take the write_lock.
1183 			 */
1184 			dst_hold(&rt->dst);
1185 			read_unlock_bh(&table->tb6_lock);
1186 			pcpu_rt = rt6_make_pcpu_route(rt);
1187 			dst_release(&rt->dst);
1188 		}
1189 
1190 		trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6);
1191 		return pcpu_rt;
1192 
1193 	}
1194 }
1195 EXPORT_SYMBOL_GPL(ip6_pol_route);
1196 
1197 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
1198 					    struct flowi6 *fl6, int flags)
1199 {
1200 	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
1201 }
1202 
1203 struct dst_entry *ip6_route_input_lookup(struct net *net,
1204 					 struct net_device *dev,
1205 					 struct flowi6 *fl6, int flags)
1206 {
1207 	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1208 		flags |= RT6_LOOKUP_F_IFACE;
1209 
1210 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1211 }
1212 EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
1213 
1214 static void ip6_multipath_l3_keys(const struct sk_buff *skb,
1215 				  struct flow_keys *keys)
1216 {
1217 	const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
1218 	const struct ipv6hdr *key_iph = outer_iph;
1219 	const struct ipv6hdr *inner_iph;
1220 	const struct icmp6hdr *icmph;
1221 	struct ipv6hdr _inner_iph;
1222 
1223 	if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
1224 		goto out;
1225 
1226 	icmph = icmp6_hdr(skb);
1227 	if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
1228 	    icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
1229 	    icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
1230 	    icmph->icmp6_type != ICMPV6_PARAMPROB)
1231 		goto out;
1232 
1233 	inner_iph = skb_header_pointer(skb,
1234 				       skb_transport_offset(skb) + sizeof(*icmph),
1235 				       sizeof(_inner_iph), &_inner_iph);
1236 	if (!inner_iph)
1237 		goto out;
1238 
1239 	key_iph = inner_iph;
1240 out:
1241 	memset(keys, 0, sizeof(*keys));
1242 	keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1243 	keys->addrs.v6addrs.src = key_iph->saddr;
1244 	keys->addrs.v6addrs.dst = key_iph->daddr;
1245 	keys->tags.flow_label = ip6_flowinfo(key_iph);
1246 	keys->basic.ip_proto = key_iph->nexthdr;
1247 }
1248 
1249 /* if skb is set it will be used and fl6 can be NULL */
1250 u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb)
1251 {
1252 	struct flow_keys hash_keys;
1253 
1254 	if (skb) {
1255 		ip6_multipath_l3_keys(skb, &hash_keys);
1256 		return flow_hash_from_keys(&hash_keys);
1257 	}
1258 
1259 	return get_hash_from_flowi6(fl6);
1260 }
1261 
1262 void ip6_route_input(struct sk_buff *skb)
1263 {
1264 	const struct ipv6hdr *iph = ipv6_hdr(skb);
1265 	struct net *net = dev_net(skb->dev);
1266 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1267 	struct ip_tunnel_info *tun_info;
1268 	struct flowi6 fl6 = {
1269 		.flowi6_iif = skb->dev->ifindex,
1270 		.daddr = iph->daddr,
1271 		.saddr = iph->saddr,
1272 		.flowlabel = ip6_flowinfo(iph),
1273 		.flowi6_mark = skb->mark,
1274 		.flowi6_proto = iph->nexthdr,
1275 	};
1276 
1277 	tun_info = skb_tunnel_info(skb);
1278 	if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
1279 		fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
1280 	if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
1281 		fl6.mp_hash = rt6_multipath_hash(&fl6, skb);
1282 	skb_dst_drop(skb);
1283 	skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1284 }
1285 
1286 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1287 					     struct flowi6 *fl6, int flags)
1288 {
1289 	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1290 }
1291 
1292 struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
1293 					 struct flowi6 *fl6, int flags)
1294 {
1295 	bool any_src;
1296 
1297 	if (rt6_need_strict(&fl6->daddr)) {
1298 		struct dst_entry *dst;
1299 
1300 		dst = l3mdev_link_scope_lookup(net, fl6);
1301 		if (dst)
1302 			return dst;
1303 	}
1304 
1305 	fl6->flowi6_iif = LOOPBACK_IFINDEX;
1306 
1307 	any_src = ipv6_addr_any(&fl6->saddr);
1308 	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
1309 	    (fl6->flowi6_oif && any_src))
1310 		flags |= RT6_LOOKUP_F_IFACE;
1311 
1312 	if (!any_src)
1313 		flags |= RT6_LOOKUP_F_HAS_SADDR;
1314 	else if (sk)
1315 		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1316 
1317 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1318 }
1319 EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1320 
1321 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1322 {
1323 	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1324 	struct net_device *loopback_dev = net->loopback_dev;
1325 	struct dst_entry *new = NULL;
1326 
1327 	rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
1328 		       DST_OBSOLETE_NONE, 0);
1329 	if (rt) {
1330 		rt6_info_init(rt);
1331 
1332 		new = &rt->dst;
1333 		new->__use = 1;
1334 		new->input = dst_discard;
1335 		new->output = dst_discard_out;
1336 
1337 		dst_copy_metrics(new, &ort->dst);
1338 
1339 		rt->rt6i_idev = in6_dev_get(loopback_dev);
1340 		rt->rt6i_gateway = ort->rt6i_gateway;
1341 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
1342 		rt->rt6i_metric = 0;
1343 
1344 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1345 #ifdef CONFIG_IPV6_SUBTREES
1346 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1347 #endif
1348 	}
1349 
1350 	dst_release(dst_orig);
1351 	return new ? new : ERR_PTR(-ENOMEM);
1352 }
1353 
1354 /*
1355  *	Destination cache support functions
1356  */
1357 
1358 static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1359 {
1360 	if (rt->dst.from &&
1361 	    dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1362 		dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1363 }
1364 
1365 static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1366 {
1367 	u32 rt_cookie = 0;
1368 
1369 	if (!rt6_get_cookie_safe(rt, &rt_cookie) || rt_cookie != cookie)
1370 		return NULL;
1371 
1372 	if (rt6_check_expired(rt))
1373 		return NULL;
1374 
1375 	return &rt->dst;
1376 }
1377 
1378 static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1379 {
1380 	if (!__rt6_check_expired(rt) &&
1381 	    rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1382 	    rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1383 		return &rt->dst;
1384 	else
1385 		return NULL;
1386 }
1387 
1388 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1389 {
1390 	struct rt6_info *rt;
1391 
1392 	rt = (struct rt6_info *) dst;
1393 
1394 	/* All IPV6 dsts are created with ->obsolete set to the value
1395 	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1396 	 * into this function always.
1397 	 */
1398 
1399 	rt6_dst_from_metrics_check(rt);
1400 
1401 	if (rt->rt6i_flags & RTF_PCPU ||
1402 	    (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->dst.from))
1403 		return rt6_dst_from_check(rt, cookie);
1404 	else
1405 		return rt6_check(rt, cookie);
1406 }
1407 
1408 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1409 {
1410 	struct rt6_info *rt = (struct rt6_info *) dst;
1411 
1412 	if (rt) {
1413 		if (rt->rt6i_flags & RTF_CACHE) {
1414 			if (rt6_check_expired(rt)) {
1415 				ip6_del_rt(rt);
1416 				dst = NULL;
1417 			}
1418 		} else {
1419 			dst_release(dst);
1420 			dst = NULL;
1421 		}
1422 	}
1423 	return dst;
1424 }
1425 
1426 static void ip6_link_failure(struct sk_buff *skb)
1427 {
1428 	struct rt6_info *rt;
1429 
1430 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1431 
1432 	rt = (struct rt6_info *) skb_dst(skb);
1433 	if (rt) {
1434 		if (rt->rt6i_flags & RTF_CACHE) {
1435 			if (dst_hold_safe(&rt->dst))
1436 				ip6_del_rt(rt);
1437 		} else {
1438 			struct fib6_node *fn;
1439 
1440 			rcu_read_lock();
1441 			fn = rcu_dereference(rt->rt6i_node);
1442 			if (fn && (rt->rt6i_flags & RTF_DEFAULT))
1443 				fn->fn_sernum = -1;
1444 			rcu_read_unlock();
1445 		}
1446 	}
1447 }
1448 
1449 static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1450 {
1451 	struct net *net = dev_net(rt->dst.dev);
1452 
1453 	rt->rt6i_flags |= RTF_MODIFIED;
1454 	rt->rt6i_pmtu = mtu;
1455 	rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1456 }
1457 
1458 static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
1459 {
1460 	return !(rt->rt6i_flags & RTF_CACHE) &&
1461 		(rt->rt6i_flags & RTF_PCPU ||
1462 		 rcu_access_pointer(rt->rt6i_node));
1463 }
1464 
1465 static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1466 				 const struct ipv6hdr *iph, u32 mtu)
1467 {
1468 	const struct in6_addr *daddr, *saddr;
1469 	struct rt6_info *rt6 = (struct rt6_info *)dst;
1470 
1471 	if (rt6->rt6i_flags & RTF_LOCAL)
1472 		return;
1473 
1474 	if (dst_metric_locked(dst, RTAX_MTU))
1475 		return;
1476 
1477 	if (iph) {
1478 		daddr = &iph->daddr;
1479 		saddr = &iph->saddr;
1480 	} else if (sk) {
1481 		daddr = &sk->sk_v6_daddr;
1482 		saddr = &inet6_sk(sk)->saddr;
1483 	} else {
1484 		daddr = NULL;
1485 		saddr = NULL;
1486 	}
1487 	dst_confirm_neigh(dst, daddr);
1488 	mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1489 	if (mtu >= dst_mtu(dst))
1490 		return;
1491 
1492 	if (!rt6_cache_allowed_for_pmtu(rt6)) {
1493 		rt6_do_update_pmtu(rt6, mtu);
1494 	} else if (daddr) {
1495 		struct rt6_info *nrt6;
1496 
1497 		nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1498 		if (nrt6) {
1499 			rt6_do_update_pmtu(nrt6, mtu);
1500 
1501 			/* ip6_ins_rt(nrt6) will bump the
1502 			 * rt6->rt6i_node->fn_sernum
1503 			 * which will fail the next rt6_check() and
1504 			 * invalidate the sk->sk_dst_cache.
1505 			 */
1506 			ip6_ins_rt(nrt6);
1507 			/* Release the reference taken in
1508 			 * ip6_rt_cache_alloc()
1509 			 */
1510 			dst_release(&nrt6->dst);
1511 		}
1512 	}
1513 }
1514 
1515 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1516 			       struct sk_buff *skb, u32 mtu)
1517 {
1518 	__ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
1519 }
1520 
1521 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1522 		     int oif, u32 mark, kuid_t uid)
1523 {
1524 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1525 	struct dst_entry *dst;
1526 	struct flowi6 fl6;
1527 
1528 	memset(&fl6, 0, sizeof(fl6));
1529 	fl6.flowi6_oif = oif;
1530 	fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
1531 	fl6.daddr = iph->daddr;
1532 	fl6.saddr = iph->saddr;
1533 	fl6.flowlabel = ip6_flowinfo(iph);
1534 	fl6.flowi6_uid = uid;
1535 
1536 	dst = ip6_route_output(net, NULL, &fl6);
1537 	if (!dst->error)
1538 		__ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
1539 	dst_release(dst);
1540 }
1541 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1542 
1543 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1544 {
1545 	struct dst_entry *dst;
1546 
1547 	ip6_update_pmtu(skb, sock_net(sk), mtu,
1548 			sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid);
1549 
1550 	dst = __sk_dst_get(sk);
1551 	if (!dst || !dst->obsolete ||
1552 	    dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
1553 		return;
1554 
1555 	bh_lock_sock(sk);
1556 	if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
1557 		ip6_datagram_dst_update(sk, false);
1558 	bh_unlock_sock(sk);
1559 }
1560 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1561 
1562 /* Handle redirects */
1563 struct ip6rd_flowi {
1564 	struct flowi6 fl6;
1565 	struct in6_addr gateway;
1566 };
1567 
1568 static struct rt6_info *__ip6_route_redirect(struct net *net,
1569 					     struct fib6_table *table,
1570 					     struct flowi6 *fl6,
1571 					     int flags)
1572 {
1573 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1574 	struct rt6_info *rt;
1575 	struct fib6_node *fn;
1576 
1577 	/* Get the "current" route for this destination and
1578 	 * check if the redirect has come from appropriate router.
1579 	 *
1580 	 * RFC 4861 specifies that redirects should only be
1581 	 * accepted if they come from the nexthop to the target.
1582 	 * Due to the way the routes are chosen, this notion
1583 	 * is a bit fuzzy and one might need to check all possible
1584 	 * routes.
1585 	 */
1586 
1587 	read_lock_bh(&table->tb6_lock);
1588 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1589 restart:
1590 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1591 		if (rt6_check_expired(rt))
1592 			continue;
1593 		if (rt->dst.error)
1594 			break;
1595 		if (!(rt->rt6i_flags & RTF_GATEWAY))
1596 			continue;
1597 		if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1598 			continue;
1599 		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1600 			continue;
1601 		break;
1602 	}
1603 
1604 	if (!rt)
1605 		rt = net->ipv6.ip6_null_entry;
1606 	else if (rt->dst.error) {
1607 		rt = net->ipv6.ip6_null_entry;
1608 		goto out;
1609 	}
1610 
1611 	if (rt == net->ipv6.ip6_null_entry) {
1612 		fn = fib6_backtrack(fn, &fl6->saddr);
1613 		if (fn)
1614 			goto restart;
1615 	}
1616 
1617 out:
1618 	dst_hold(&rt->dst);
1619 
1620 	read_unlock_bh(&table->tb6_lock);
1621 
1622 	trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
1623 	return rt;
1624 };
1625 
1626 static struct dst_entry *ip6_route_redirect(struct net *net,
1627 					const struct flowi6 *fl6,
1628 					const struct in6_addr *gateway)
1629 {
1630 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1631 	struct ip6rd_flowi rdfl;
1632 
1633 	rdfl.fl6 = *fl6;
1634 	rdfl.gateway = *gateway;
1635 
1636 	return fib6_rule_lookup(net, &rdfl.fl6,
1637 				flags, __ip6_route_redirect);
1638 }
1639 
1640 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
1641 		  kuid_t uid)
1642 {
1643 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1644 	struct dst_entry *dst;
1645 	struct flowi6 fl6;
1646 
1647 	memset(&fl6, 0, sizeof(fl6));
1648 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
1649 	fl6.flowi6_oif = oif;
1650 	fl6.flowi6_mark = mark;
1651 	fl6.daddr = iph->daddr;
1652 	fl6.saddr = iph->saddr;
1653 	fl6.flowlabel = ip6_flowinfo(iph);
1654 	fl6.flowi6_uid = uid;
1655 
1656 	dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1657 	rt6_do_redirect(dst, NULL, skb);
1658 	dst_release(dst);
1659 }
1660 EXPORT_SYMBOL_GPL(ip6_redirect);
1661 
1662 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1663 			    u32 mark)
1664 {
1665 	const struct ipv6hdr *iph = ipv6_hdr(skb);
1666 	const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1667 	struct dst_entry *dst;
1668 	struct flowi6 fl6;
1669 
1670 	memset(&fl6, 0, sizeof(fl6));
1671 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
1672 	fl6.flowi6_oif = oif;
1673 	fl6.flowi6_mark = mark;
1674 	fl6.daddr = msg->dest;
1675 	fl6.saddr = iph->daddr;
1676 	fl6.flowi6_uid = sock_net_uid(net, NULL);
1677 
1678 	dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1679 	rt6_do_redirect(dst, NULL, skb);
1680 	dst_release(dst);
1681 }
1682 
1683 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1684 {
1685 	ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
1686 		     sk->sk_uid);
1687 }
1688 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1689 
1690 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1691 {
1692 	struct net_device *dev = dst->dev;
1693 	unsigned int mtu = dst_mtu(dst);
1694 	struct net *net = dev_net(dev);
1695 
1696 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1697 
1698 	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1699 		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1700 
1701 	/*
1702 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1703 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1704 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
1705 	 * rely only on pmtu discovery"
1706 	 */
1707 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1708 		mtu = IPV6_MAXPLEN;
1709 	return mtu;
1710 }
1711 
1712 static unsigned int ip6_mtu(const struct dst_entry *dst)
1713 {
1714 	const struct rt6_info *rt = (const struct rt6_info *)dst;
1715 	unsigned int mtu = rt->rt6i_pmtu;
1716 	struct inet6_dev *idev;
1717 
1718 	if (mtu)
1719 		goto out;
1720 
1721 	mtu = dst_metric_raw(dst, RTAX_MTU);
1722 	if (mtu)
1723 		goto out;
1724 
1725 	mtu = IPV6_MIN_MTU;
1726 
1727 	rcu_read_lock();
1728 	idev = __in6_dev_get(dst->dev);
1729 	if (idev)
1730 		mtu = idev->cnf.mtu6;
1731 	rcu_read_unlock();
1732 
1733 out:
1734 	mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1735 
1736 	return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
1737 }
1738 
1739 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1740 				  struct flowi6 *fl6)
1741 {
1742 	struct dst_entry *dst;
1743 	struct rt6_info *rt;
1744 	struct inet6_dev *idev = in6_dev_get(dev);
1745 	struct net *net = dev_net(dev);
1746 
1747 	if (unlikely(!idev))
1748 		return ERR_PTR(-ENODEV);
1749 
1750 	rt = ip6_dst_alloc(net, dev, 0);
1751 	if (unlikely(!rt)) {
1752 		in6_dev_put(idev);
1753 		dst = ERR_PTR(-ENOMEM);
1754 		goto out;
1755 	}
1756 
1757 	rt->dst.flags |= DST_HOST;
1758 	rt->dst.output  = ip6_output;
1759 	rt->rt6i_gateway  = fl6->daddr;
1760 	rt->rt6i_dst.addr = fl6->daddr;
1761 	rt->rt6i_dst.plen = 128;
1762 	rt->rt6i_idev     = idev;
1763 	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1764 
1765 	/* Add this dst into uncached_list so that rt6_ifdown() can
1766 	 * do proper release of the net_device
1767 	 */
1768 	rt6_uncached_list_add(rt);
1769 
1770 	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1771 
1772 out:
1773 	return dst;
1774 }
1775 
1776 static int ip6_dst_gc(struct dst_ops *ops)
1777 {
1778 	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1779 	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1780 	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1781 	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1782 	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1783 	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1784 	int entries;
1785 
1786 	entries = dst_entries_get_fast(ops);
1787 	if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1788 	    entries <= rt_max_size)
1789 		goto out;
1790 
1791 	net->ipv6.ip6_rt_gc_expire++;
1792 	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
1793 	entries = dst_entries_get_slow(ops);
1794 	if (entries < ops->gc_thresh)
1795 		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1796 out:
1797 	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1798 	return entries > rt_max_size;
1799 }
1800 
1801 static int ip6_convert_metrics(struct mx6_config *mxc,
1802 			       const struct fib6_config *cfg)
1803 {
1804 	bool ecn_ca = false;
1805 	struct nlattr *nla;
1806 	int remaining;
1807 	u32 *mp;
1808 
1809 	if (!cfg->fc_mx)
1810 		return 0;
1811 
1812 	mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1813 	if (unlikely(!mp))
1814 		return -ENOMEM;
1815 
1816 	nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1817 		int type = nla_type(nla);
1818 		u32 val;
1819 
1820 		if (!type)
1821 			continue;
1822 		if (unlikely(type > RTAX_MAX))
1823 			goto err;
1824 
1825 		if (type == RTAX_CC_ALGO) {
1826 			char tmp[TCP_CA_NAME_MAX];
1827 
1828 			nla_strlcpy(tmp, nla, sizeof(tmp));
1829 			val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
1830 			if (val == TCP_CA_UNSPEC)
1831 				goto err;
1832 		} else {
1833 			val = nla_get_u32(nla);
1834 		}
1835 		if (type == RTAX_HOPLIMIT && val > 255)
1836 			val = 255;
1837 		if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
1838 			goto err;
1839 
1840 		mp[type - 1] = val;
1841 		__set_bit(type - 1, mxc->mx_valid);
1842 	}
1843 
1844 	if (ecn_ca) {
1845 		__set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
1846 		mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
1847 	}
1848 
1849 	mxc->mx = mp;
1850 	return 0;
1851  err:
1852 	kfree(mp);
1853 	return -EINVAL;
1854 }
1855 
1856 static struct rt6_info *ip6_nh_lookup_table(struct net *net,
1857 					    struct fib6_config *cfg,
1858 					    const struct in6_addr *gw_addr)
1859 {
1860 	struct flowi6 fl6 = {
1861 		.flowi6_oif = cfg->fc_ifindex,
1862 		.daddr = *gw_addr,
1863 		.saddr = cfg->fc_prefsrc,
1864 	};
1865 	struct fib6_table *table;
1866 	struct rt6_info *rt;
1867 	int flags = RT6_LOOKUP_F_IFACE | RT6_LOOKUP_F_IGNORE_LINKSTATE;
1868 
1869 	table = fib6_get_table(net, cfg->fc_table);
1870 	if (!table)
1871 		return NULL;
1872 
1873 	if (!ipv6_addr_any(&cfg->fc_prefsrc))
1874 		flags |= RT6_LOOKUP_F_HAS_SADDR;
1875 
1876 	rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags);
1877 
1878 	/* if table lookup failed, fall back to full lookup */
1879 	if (rt == net->ipv6.ip6_null_entry) {
1880 		ip6_rt_put(rt);
1881 		rt = NULL;
1882 	}
1883 
1884 	return rt;
1885 }
1886 
1887 static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
1888 					      struct netlink_ext_ack *extack)
1889 {
1890 	struct net *net = cfg->fc_nlinfo.nl_net;
1891 	struct rt6_info *rt = NULL;
1892 	struct net_device *dev = NULL;
1893 	struct inet6_dev *idev = NULL;
1894 	struct fib6_table *table;
1895 	int addr_type;
1896 	int err = -EINVAL;
1897 
1898 	/* RTF_PCPU is an internal flag; can not be set by userspace */
1899 	if (cfg->fc_flags & RTF_PCPU) {
1900 		NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
1901 		goto out;
1902 	}
1903 
1904 	if (cfg->fc_dst_len > 128) {
1905 		NL_SET_ERR_MSG(extack, "Invalid prefix length");
1906 		goto out;
1907 	}
1908 	if (cfg->fc_src_len > 128) {
1909 		NL_SET_ERR_MSG(extack, "Invalid source address length");
1910 		goto out;
1911 	}
1912 #ifndef CONFIG_IPV6_SUBTREES
1913 	if (cfg->fc_src_len) {
1914 		NL_SET_ERR_MSG(extack,
1915 			       "Specifying source address requires IPV6_SUBTREES to be enabled");
1916 		goto out;
1917 	}
1918 #endif
1919 	if (cfg->fc_ifindex) {
1920 		err = -ENODEV;
1921 		dev = dev_get_by_index(net, cfg->fc_ifindex);
1922 		if (!dev)
1923 			goto out;
1924 		idev = in6_dev_get(dev);
1925 		if (!idev)
1926 			goto out;
1927 	}
1928 
1929 	if (cfg->fc_metric == 0)
1930 		cfg->fc_metric = IP6_RT_PRIO_USER;
1931 
1932 	err = -ENOBUFS;
1933 	if (cfg->fc_nlinfo.nlh &&
1934 	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1935 		table = fib6_get_table(net, cfg->fc_table);
1936 		if (!table) {
1937 			pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1938 			table = fib6_new_table(net, cfg->fc_table);
1939 		}
1940 	} else {
1941 		table = fib6_new_table(net, cfg->fc_table);
1942 	}
1943 
1944 	if (!table)
1945 		goto out;
1946 
1947 	rt = ip6_dst_alloc(net, NULL,
1948 			   (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1949 
1950 	if (!rt) {
1951 		err = -ENOMEM;
1952 		goto out;
1953 	}
1954 
1955 	if (cfg->fc_flags & RTF_EXPIRES)
1956 		rt6_set_expires(rt, jiffies +
1957 				clock_t_to_jiffies(cfg->fc_expires));
1958 	else
1959 		rt6_clean_expires(rt);
1960 
1961 	if (cfg->fc_protocol == RTPROT_UNSPEC)
1962 		cfg->fc_protocol = RTPROT_BOOT;
1963 	rt->rt6i_protocol = cfg->fc_protocol;
1964 
1965 	addr_type = ipv6_addr_type(&cfg->fc_dst);
1966 
1967 	if (addr_type & IPV6_ADDR_MULTICAST)
1968 		rt->dst.input = ip6_mc_input;
1969 	else if (cfg->fc_flags & RTF_LOCAL)
1970 		rt->dst.input = ip6_input;
1971 	else
1972 		rt->dst.input = ip6_forward;
1973 
1974 	rt->dst.output = ip6_output;
1975 
1976 	if (cfg->fc_encap) {
1977 		struct lwtunnel_state *lwtstate;
1978 
1979 		err = lwtunnel_build_state(cfg->fc_encap_type,
1980 					   cfg->fc_encap, AF_INET6, cfg,
1981 					   &lwtstate, extack);
1982 		if (err)
1983 			goto out;
1984 		rt->dst.lwtstate = lwtstate_get(lwtstate);
1985 		if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
1986 			rt->dst.lwtstate->orig_output = rt->dst.output;
1987 			rt->dst.output = lwtunnel_output;
1988 		}
1989 		if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
1990 			rt->dst.lwtstate->orig_input = rt->dst.input;
1991 			rt->dst.input = lwtunnel_input;
1992 		}
1993 	}
1994 
1995 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1996 	rt->rt6i_dst.plen = cfg->fc_dst_len;
1997 	if (rt->rt6i_dst.plen == 128)
1998 		rt->dst.flags |= DST_HOST;
1999 
2000 #ifdef CONFIG_IPV6_SUBTREES
2001 	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
2002 	rt->rt6i_src.plen = cfg->fc_src_len;
2003 #endif
2004 
2005 	rt->rt6i_metric = cfg->fc_metric;
2006 
2007 	/* We cannot add true routes via loopback here,
2008 	   they would result in kernel looping; promote them to reject routes
2009 	 */
2010 	if ((cfg->fc_flags & RTF_REJECT) ||
2011 	    (dev && (dev->flags & IFF_LOOPBACK) &&
2012 	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
2013 	     !(cfg->fc_flags & RTF_LOCAL))) {
2014 		/* hold loopback dev/idev if we haven't done so. */
2015 		if (dev != net->loopback_dev) {
2016 			if (dev) {
2017 				dev_put(dev);
2018 				in6_dev_put(idev);
2019 			}
2020 			dev = net->loopback_dev;
2021 			dev_hold(dev);
2022 			idev = in6_dev_get(dev);
2023 			if (!idev) {
2024 				err = -ENODEV;
2025 				goto out;
2026 			}
2027 		}
2028 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
2029 		switch (cfg->fc_type) {
2030 		case RTN_BLACKHOLE:
2031 			rt->dst.error = -EINVAL;
2032 			rt->dst.output = dst_discard_out;
2033 			rt->dst.input = dst_discard;
2034 			break;
2035 		case RTN_PROHIBIT:
2036 			rt->dst.error = -EACCES;
2037 			rt->dst.output = ip6_pkt_prohibit_out;
2038 			rt->dst.input = ip6_pkt_prohibit;
2039 			break;
2040 		case RTN_THROW:
2041 		case RTN_UNREACHABLE:
2042 		default:
2043 			rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
2044 					: (cfg->fc_type == RTN_UNREACHABLE)
2045 					? -EHOSTUNREACH : -ENETUNREACH;
2046 			rt->dst.output = ip6_pkt_discard_out;
2047 			rt->dst.input = ip6_pkt_discard;
2048 			break;
2049 		}
2050 		goto install_route;
2051 	}
2052 
2053 	if (cfg->fc_flags & RTF_GATEWAY) {
2054 		const struct in6_addr *gw_addr;
2055 		int gwa_type;
2056 
2057 		gw_addr = &cfg->fc_gateway;
2058 		gwa_type = ipv6_addr_type(gw_addr);
2059 
2060 		/* if gw_addr is local we will fail to detect this in case
2061 		 * address is still TENTATIVE (DAD in progress). rt6_lookup()
2062 		 * will return already-added prefix route via interface that
2063 		 * prefix route was assigned to, which might be non-loopback.
2064 		 */
2065 		err = -EINVAL;
2066 		if (ipv6_chk_addr_and_flags(net, gw_addr,
2067 					    gwa_type & IPV6_ADDR_LINKLOCAL ?
2068 					    dev : NULL, 0, 0)) {
2069 			NL_SET_ERR_MSG(extack, "Invalid gateway address");
2070 			goto out;
2071 		}
2072 		rt->rt6i_gateway = *gw_addr;
2073 
2074 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
2075 			struct rt6_info *grt = NULL;
2076 
2077 			/* IPv6 strictly inhibits using not link-local
2078 			   addresses as nexthop address.
2079 			   Otherwise, router will not able to send redirects.
2080 			   It is very good, but in some (rare!) circumstances
2081 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
2082 			   some exceptions. --ANK
2083 			   We allow IPv4-mapped nexthops to support RFC4798-type
2084 			   addressing
2085 			 */
2086 			if (!(gwa_type & (IPV6_ADDR_UNICAST |
2087 					  IPV6_ADDR_MAPPED))) {
2088 				NL_SET_ERR_MSG(extack,
2089 					       "Invalid gateway address");
2090 				goto out;
2091 			}
2092 
2093 			if (cfg->fc_table) {
2094 				grt = ip6_nh_lookup_table(net, cfg, gw_addr);
2095 
2096 				if (grt) {
2097 					if (grt->rt6i_flags & RTF_GATEWAY ||
2098 					    (dev && dev != grt->dst.dev)) {
2099 						ip6_rt_put(grt);
2100 						grt = NULL;
2101 					}
2102 				}
2103 			}
2104 
2105 			if (!grt)
2106 				grt = rt6_lookup(net, gw_addr, NULL,
2107 						 cfg->fc_ifindex, 1);
2108 
2109 			err = -EHOSTUNREACH;
2110 			if (!grt)
2111 				goto out;
2112 			if (dev) {
2113 				if (dev != grt->dst.dev) {
2114 					ip6_rt_put(grt);
2115 					goto out;
2116 				}
2117 			} else {
2118 				dev = grt->dst.dev;
2119 				idev = grt->rt6i_idev;
2120 				dev_hold(dev);
2121 				in6_dev_hold(grt->rt6i_idev);
2122 			}
2123 			if (!(grt->rt6i_flags & RTF_GATEWAY))
2124 				err = 0;
2125 			ip6_rt_put(grt);
2126 
2127 			if (err)
2128 				goto out;
2129 		}
2130 		err = -EINVAL;
2131 		if (!dev) {
2132 			NL_SET_ERR_MSG(extack, "Egress device not specified");
2133 			goto out;
2134 		} else if (dev->flags & IFF_LOOPBACK) {
2135 			NL_SET_ERR_MSG(extack,
2136 				       "Egress device can not be loopback device for this route");
2137 			goto out;
2138 		}
2139 	}
2140 
2141 	err = -ENODEV;
2142 	if (!dev)
2143 		goto out;
2144 
2145 	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
2146 		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
2147 			NL_SET_ERR_MSG(extack, "Invalid source address");
2148 			err = -EINVAL;
2149 			goto out;
2150 		}
2151 		rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
2152 		rt->rt6i_prefsrc.plen = 128;
2153 	} else
2154 		rt->rt6i_prefsrc.plen = 0;
2155 
2156 	rt->rt6i_flags = cfg->fc_flags;
2157 
2158 install_route:
2159 	rt->dst.dev = dev;
2160 	rt->rt6i_idev = idev;
2161 	rt->rt6i_table = table;
2162 
2163 	cfg->fc_nlinfo.nl_net = dev_net(dev);
2164 
2165 	return rt;
2166 out:
2167 	if (dev)
2168 		dev_put(dev);
2169 	if (idev)
2170 		in6_dev_put(idev);
2171 	if (rt)
2172 		dst_release_immediate(&rt->dst);
2173 
2174 	return ERR_PTR(err);
2175 }
2176 
2177 int ip6_route_add(struct fib6_config *cfg,
2178 		  struct netlink_ext_ack *extack)
2179 {
2180 	struct mx6_config mxc = { .mx = NULL, };
2181 	struct rt6_info *rt;
2182 	int err;
2183 
2184 	rt = ip6_route_info_create(cfg, extack);
2185 	if (IS_ERR(rt)) {
2186 		err = PTR_ERR(rt);
2187 		rt = NULL;
2188 		goto out;
2189 	}
2190 
2191 	err = ip6_convert_metrics(&mxc, cfg);
2192 	if (err)
2193 		goto out;
2194 
2195 	err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc, extack);
2196 
2197 	kfree(mxc.mx);
2198 
2199 	return err;
2200 out:
2201 	if (rt)
2202 		dst_release_immediate(&rt->dst);
2203 
2204 	return err;
2205 }
2206 
2207 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
2208 {
2209 	int err;
2210 	struct fib6_table *table;
2211 	struct net *net = dev_net(rt->dst.dev);
2212 
2213 	if (rt == net->ipv6.ip6_null_entry) {
2214 		err = -ENOENT;
2215 		goto out;
2216 	}
2217 
2218 	table = rt->rt6i_table;
2219 	write_lock_bh(&table->tb6_lock);
2220 	err = fib6_del(rt, info);
2221 	write_unlock_bh(&table->tb6_lock);
2222 
2223 out:
2224 	ip6_rt_put(rt);
2225 	return err;
2226 }
2227 
2228 int ip6_del_rt(struct rt6_info *rt)
2229 {
2230 	struct nl_info info = {
2231 		.nl_net = dev_net(rt->dst.dev),
2232 	};
2233 	return __ip6_del_rt(rt, &info);
2234 }
2235 
2236 static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
2237 {
2238 	struct nl_info *info = &cfg->fc_nlinfo;
2239 	struct net *net = info->nl_net;
2240 	struct sk_buff *skb = NULL;
2241 	struct fib6_table *table;
2242 	int err = -ENOENT;
2243 
2244 	if (rt == net->ipv6.ip6_null_entry)
2245 		goto out_put;
2246 	table = rt->rt6i_table;
2247 	write_lock_bh(&table->tb6_lock);
2248 
2249 	if (rt->rt6i_nsiblings && cfg->fc_delete_all_nh) {
2250 		struct rt6_info *sibling, *next_sibling;
2251 
2252 		/* prefer to send a single notification with all hops */
2253 		skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
2254 		if (skb) {
2255 			u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2256 
2257 			if (rt6_fill_node(net, skb, rt,
2258 					  NULL, NULL, 0, RTM_DELROUTE,
2259 					  info->portid, seq, 0) < 0) {
2260 				kfree_skb(skb);
2261 				skb = NULL;
2262 			} else
2263 				info->skip_notify = 1;
2264 		}
2265 
2266 		list_for_each_entry_safe(sibling, next_sibling,
2267 					 &rt->rt6i_siblings,
2268 					 rt6i_siblings) {
2269 			err = fib6_del(sibling, info);
2270 			if (err)
2271 				goto out_unlock;
2272 		}
2273 	}
2274 
2275 	err = fib6_del(rt, info);
2276 out_unlock:
2277 	write_unlock_bh(&table->tb6_lock);
2278 out_put:
2279 	ip6_rt_put(rt);
2280 
2281 	if (skb) {
2282 		rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2283 			    info->nlh, gfp_any());
2284 	}
2285 	return err;
2286 }
2287 
2288 static int ip6_route_del(struct fib6_config *cfg,
2289 			 struct netlink_ext_ack *extack)
2290 {
2291 	struct fib6_table *table;
2292 	struct fib6_node *fn;
2293 	struct rt6_info *rt;
2294 	int err = -ESRCH;
2295 
2296 	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
2297 	if (!table) {
2298 		NL_SET_ERR_MSG(extack, "FIB table does not exist");
2299 		return err;
2300 	}
2301 
2302 	read_lock_bh(&table->tb6_lock);
2303 
2304 	fn = fib6_locate(&table->tb6_root,
2305 			 &cfg->fc_dst, cfg->fc_dst_len,
2306 			 &cfg->fc_src, cfg->fc_src_len);
2307 
2308 	if (fn) {
2309 		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2310 			if ((rt->rt6i_flags & RTF_CACHE) &&
2311 			    !(cfg->fc_flags & RTF_CACHE))
2312 				continue;
2313 			if (cfg->fc_ifindex &&
2314 			    (!rt->dst.dev ||
2315 			     rt->dst.dev->ifindex != cfg->fc_ifindex))
2316 				continue;
2317 			if (cfg->fc_flags & RTF_GATEWAY &&
2318 			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
2319 				continue;
2320 			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
2321 				continue;
2322 			if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol)
2323 				continue;
2324 			dst_hold(&rt->dst);
2325 			read_unlock_bh(&table->tb6_lock);
2326 
2327 			/* if gateway was specified only delete the one hop */
2328 			if (cfg->fc_flags & RTF_GATEWAY)
2329 				return __ip6_del_rt(rt, &cfg->fc_nlinfo);
2330 
2331 			return __ip6_del_rt_siblings(rt, cfg);
2332 		}
2333 	}
2334 	read_unlock_bh(&table->tb6_lock);
2335 
2336 	return err;
2337 }
2338 
2339 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
2340 {
2341 	struct netevent_redirect netevent;
2342 	struct rt6_info *rt, *nrt = NULL;
2343 	struct ndisc_options ndopts;
2344 	struct inet6_dev *in6_dev;
2345 	struct neighbour *neigh;
2346 	struct rd_msg *msg;
2347 	int optlen, on_link;
2348 	u8 *lladdr;
2349 
2350 	optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
2351 	optlen -= sizeof(*msg);
2352 
2353 	if (optlen < 0) {
2354 		net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
2355 		return;
2356 	}
2357 
2358 	msg = (struct rd_msg *)icmp6_hdr(skb);
2359 
2360 	if (ipv6_addr_is_multicast(&msg->dest)) {
2361 		net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
2362 		return;
2363 	}
2364 
2365 	on_link = 0;
2366 	if (ipv6_addr_equal(&msg->dest, &msg->target)) {
2367 		on_link = 1;
2368 	} else if (ipv6_addr_type(&msg->target) !=
2369 		   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
2370 		net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
2371 		return;
2372 	}
2373 
2374 	in6_dev = __in6_dev_get(skb->dev);
2375 	if (!in6_dev)
2376 		return;
2377 	if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2378 		return;
2379 
2380 	/* RFC2461 8.1:
2381 	 *	The IP source address of the Redirect MUST be the same as the current
2382 	 *	first-hop router for the specified ICMP Destination Address.
2383 	 */
2384 
2385 	if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
2386 		net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2387 		return;
2388 	}
2389 
2390 	lladdr = NULL;
2391 	if (ndopts.nd_opts_tgt_lladdr) {
2392 		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2393 					     skb->dev);
2394 		if (!lladdr) {
2395 			net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2396 			return;
2397 		}
2398 	}
2399 
2400 	rt = (struct rt6_info *) dst;
2401 	if (rt->rt6i_flags & RTF_REJECT) {
2402 		net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
2403 		return;
2404 	}
2405 
2406 	/* Redirect received -> path was valid.
2407 	 * Look, redirects are sent only in response to data packets,
2408 	 * so that this nexthop apparently is reachable. --ANK
2409 	 */
2410 	dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
2411 
2412 	neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
2413 	if (!neigh)
2414 		return;
2415 
2416 	/*
2417 	 *	We have finally decided to accept it.
2418 	 */
2419 
2420 	ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
2421 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
2422 		     NEIGH_UPDATE_F_OVERRIDE|
2423 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
2424 				     NEIGH_UPDATE_F_ISROUTER)),
2425 		     NDISC_REDIRECT, &ndopts);
2426 
2427 	nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
2428 	if (!nrt)
2429 		goto out;
2430 
2431 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
2432 	if (on_link)
2433 		nrt->rt6i_flags &= ~RTF_GATEWAY;
2434 
2435 	nrt->rt6i_protocol = RTPROT_REDIRECT;
2436 	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
2437 
2438 	if (ip6_ins_rt(nrt))
2439 		goto out_release;
2440 
2441 	netevent.old = &rt->dst;
2442 	netevent.new = &nrt->dst;
2443 	netevent.daddr = &msg->dest;
2444 	netevent.neigh = neigh;
2445 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
2446 
2447 	if (rt->rt6i_flags & RTF_CACHE) {
2448 		rt = (struct rt6_info *) dst_clone(&rt->dst);
2449 		ip6_del_rt(rt);
2450 	}
2451 
2452 out_release:
2453 	/* Release the reference taken in
2454 	 * ip6_rt_cache_alloc()
2455 	 */
2456 	dst_release(&nrt->dst);
2457 
2458 out:
2459 	neigh_release(neigh);
2460 }
2461 
2462 /*
2463  *	Misc support functions
2464  */
2465 
2466 static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
2467 {
2468 	BUG_ON(from->dst.from);
2469 
2470 	rt->rt6i_flags &= ~RTF_EXPIRES;
2471 	dst_hold(&from->dst);
2472 	rt->dst.from = &from->dst;
2473 	dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
2474 }
2475 
2476 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
2477 {
2478 	rt->dst.input = ort->dst.input;
2479 	rt->dst.output = ort->dst.output;
2480 	rt->rt6i_dst = ort->rt6i_dst;
2481 	rt->dst.error = ort->dst.error;
2482 	rt->rt6i_idev = ort->rt6i_idev;
2483 	if (rt->rt6i_idev)
2484 		in6_dev_hold(rt->rt6i_idev);
2485 	rt->dst.lastuse = jiffies;
2486 	rt->rt6i_gateway = ort->rt6i_gateway;
2487 	rt->rt6i_flags = ort->rt6i_flags;
2488 	rt6_set_from(rt, ort);
2489 	rt->rt6i_metric = ort->rt6i_metric;
2490 #ifdef CONFIG_IPV6_SUBTREES
2491 	rt->rt6i_src = ort->rt6i_src;
2492 #endif
2493 	rt->rt6i_prefsrc = ort->rt6i_prefsrc;
2494 	rt->rt6i_table = ort->rt6i_table;
2495 	rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
2496 }
2497 
2498 #ifdef CONFIG_IPV6_ROUTE_INFO
2499 static struct rt6_info *rt6_get_route_info(struct net *net,
2500 					   const struct in6_addr *prefix, int prefixlen,
2501 					   const struct in6_addr *gwaddr,
2502 					   struct net_device *dev)
2503 {
2504 	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
2505 	int ifindex = dev->ifindex;
2506 	struct fib6_node *fn;
2507 	struct rt6_info *rt = NULL;
2508 	struct fib6_table *table;
2509 
2510 	table = fib6_get_table(net, tb_id);
2511 	if (!table)
2512 		return NULL;
2513 
2514 	read_lock_bh(&table->tb6_lock);
2515 	fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
2516 	if (!fn)
2517 		goto out;
2518 
2519 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2520 		if (rt->dst.dev->ifindex != ifindex)
2521 			continue;
2522 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2523 			continue;
2524 		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2525 			continue;
2526 		dst_hold(&rt->dst);
2527 		break;
2528 	}
2529 out:
2530 	read_unlock_bh(&table->tb6_lock);
2531 	return rt;
2532 }
2533 
2534 static struct rt6_info *rt6_add_route_info(struct net *net,
2535 					   const struct in6_addr *prefix, int prefixlen,
2536 					   const struct in6_addr *gwaddr,
2537 					   struct net_device *dev,
2538 					   unsigned int pref)
2539 {
2540 	struct fib6_config cfg = {
2541 		.fc_metric	= IP6_RT_PRIO_USER,
2542 		.fc_ifindex	= dev->ifindex,
2543 		.fc_dst_len	= prefixlen,
2544 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2545 				  RTF_UP | RTF_PREF(pref),
2546 		.fc_protocol = RTPROT_RA,
2547 		.fc_nlinfo.portid = 0,
2548 		.fc_nlinfo.nlh = NULL,
2549 		.fc_nlinfo.nl_net = net,
2550 	};
2551 
2552 	cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
2553 	cfg.fc_dst = *prefix;
2554 	cfg.fc_gateway = *gwaddr;
2555 
2556 	/* We should treat it as a default route if prefix length is 0. */
2557 	if (!prefixlen)
2558 		cfg.fc_flags |= RTF_DEFAULT;
2559 
2560 	ip6_route_add(&cfg, NULL);
2561 
2562 	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
2563 }
2564 #endif
2565 
2566 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
2567 {
2568 	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
2569 	struct rt6_info *rt;
2570 	struct fib6_table *table;
2571 
2572 	table = fib6_get_table(dev_net(dev), tb_id);
2573 	if (!table)
2574 		return NULL;
2575 
2576 	read_lock_bh(&table->tb6_lock);
2577 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2578 		if (dev == rt->dst.dev &&
2579 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
2580 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
2581 			break;
2582 	}
2583 	if (rt)
2584 		dst_hold(&rt->dst);
2585 	read_unlock_bh(&table->tb6_lock);
2586 	return rt;
2587 }
2588 
2589 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
2590 				     struct net_device *dev,
2591 				     unsigned int pref)
2592 {
2593 	struct fib6_config cfg = {
2594 		.fc_table	= l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
2595 		.fc_metric	= IP6_RT_PRIO_USER,
2596 		.fc_ifindex	= dev->ifindex,
2597 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2598 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
2599 		.fc_protocol = RTPROT_RA,
2600 		.fc_nlinfo.portid = 0,
2601 		.fc_nlinfo.nlh = NULL,
2602 		.fc_nlinfo.nl_net = dev_net(dev),
2603 	};
2604 
2605 	cfg.fc_gateway = *gwaddr;
2606 
2607 	if (!ip6_route_add(&cfg, NULL)) {
2608 		struct fib6_table *table;
2609 
2610 		table = fib6_get_table(dev_net(dev), cfg.fc_table);
2611 		if (table)
2612 			table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
2613 	}
2614 
2615 	return rt6_get_dflt_router(gwaddr, dev);
2616 }
2617 
2618 static void __rt6_purge_dflt_routers(struct fib6_table *table)
2619 {
2620 	struct rt6_info *rt;
2621 
2622 restart:
2623 	read_lock_bh(&table->tb6_lock);
2624 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2625 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2626 		    (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
2627 			dst_hold(&rt->dst);
2628 			read_unlock_bh(&table->tb6_lock);
2629 			ip6_del_rt(rt);
2630 			goto restart;
2631 		}
2632 	}
2633 	read_unlock_bh(&table->tb6_lock);
2634 
2635 	table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
2636 }
2637 
2638 void rt6_purge_dflt_routers(struct net *net)
2639 {
2640 	struct fib6_table *table;
2641 	struct hlist_head *head;
2642 	unsigned int h;
2643 
2644 	rcu_read_lock();
2645 
2646 	for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
2647 		head = &net->ipv6.fib_table_hash[h];
2648 		hlist_for_each_entry_rcu(table, head, tb6_hlist) {
2649 			if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
2650 				__rt6_purge_dflt_routers(table);
2651 		}
2652 	}
2653 
2654 	rcu_read_unlock();
2655 }
2656 
2657 static void rtmsg_to_fib6_config(struct net *net,
2658 				 struct in6_rtmsg *rtmsg,
2659 				 struct fib6_config *cfg)
2660 {
2661 	memset(cfg, 0, sizeof(*cfg));
2662 
2663 	cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
2664 			 : RT6_TABLE_MAIN;
2665 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2666 	cfg->fc_metric = rtmsg->rtmsg_metric;
2667 	cfg->fc_expires = rtmsg->rtmsg_info;
2668 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2669 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
2670 	cfg->fc_flags = rtmsg->rtmsg_flags;
2671 
2672 	cfg->fc_nlinfo.nl_net = net;
2673 
2674 	cfg->fc_dst = rtmsg->rtmsg_dst;
2675 	cfg->fc_src = rtmsg->rtmsg_src;
2676 	cfg->fc_gateway = rtmsg->rtmsg_gateway;
2677 }
2678 
2679 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2680 {
2681 	struct fib6_config cfg;
2682 	struct in6_rtmsg rtmsg;
2683 	int err;
2684 
2685 	switch (cmd) {
2686 	case SIOCADDRT:		/* Add a route */
2687 	case SIOCDELRT:		/* Delete a route */
2688 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2689 			return -EPERM;
2690 		err = copy_from_user(&rtmsg, arg,
2691 				     sizeof(struct in6_rtmsg));
2692 		if (err)
2693 			return -EFAULT;
2694 
2695 		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2696 
2697 		rtnl_lock();
2698 		switch (cmd) {
2699 		case SIOCADDRT:
2700 			err = ip6_route_add(&cfg, NULL);
2701 			break;
2702 		case SIOCDELRT:
2703 			err = ip6_route_del(&cfg, NULL);
2704 			break;
2705 		default:
2706 			err = -EINVAL;
2707 		}
2708 		rtnl_unlock();
2709 
2710 		return err;
2711 	}
2712 
2713 	return -EINVAL;
2714 }
2715 
2716 /*
2717  *	Drop the packet on the floor
2718  */
2719 
2720 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2721 {
2722 	int type;
2723 	struct dst_entry *dst = skb_dst(skb);
2724 	switch (ipstats_mib_noroutes) {
2725 	case IPSTATS_MIB_INNOROUTES:
2726 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2727 		if (type == IPV6_ADDR_ANY) {
2728 			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2729 				      IPSTATS_MIB_INADDRERRORS);
2730 			break;
2731 		}
2732 		/* FALLTHROUGH */
2733 	case IPSTATS_MIB_OUTNOROUTES:
2734 		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2735 			      ipstats_mib_noroutes);
2736 		break;
2737 	}
2738 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2739 	kfree_skb(skb);
2740 	return 0;
2741 }
2742 
2743 static int ip6_pkt_discard(struct sk_buff *skb)
2744 {
2745 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2746 }
2747 
2748 static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2749 {
2750 	skb->dev = skb_dst(skb)->dev;
2751 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2752 }
2753 
2754 static int ip6_pkt_prohibit(struct sk_buff *skb)
2755 {
2756 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2757 }
2758 
2759 static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2760 {
2761 	skb->dev = skb_dst(skb)->dev;
2762 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2763 }
2764 
2765 /*
2766  *	Allocate a dst for local (unicast / anycast) address.
2767  */
2768 
2769 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2770 				    const struct in6_addr *addr,
2771 				    bool anycast)
2772 {
2773 	u32 tb_id;
2774 	struct net *net = dev_net(idev->dev);
2775 	struct net_device *dev = idev->dev;
2776 	struct rt6_info *rt;
2777 
2778 	rt = ip6_dst_alloc(net, dev, DST_NOCOUNT);
2779 	if (!rt)
2780 		return ERR_PTR(-ENOMEM);
2781 
2782 	in6_dev_hold(idev);
2783 
2784 	rt->dst.flags |= DST_HOST;
2785 	rt->dst.input = ip6_input;
2786 	rt->dst.output = ip6_output;
2787 	rt->rt6i_idev = idev;
2788 
2789 	rt->rt6i_protocol = RTPROT_KERNEL;
2790 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2791 	if (anycast)
2792 		rt->rt6i_flags |= RTF_ANYCAST;
2793 	else
2794 		rt->rt6i_flags |= RTF_LOCAL;
2795 
2796 	rt->rt6i_gateway  = *addr;
2797 	rt->rt6i_dst.addr = *addr;
2798 	rt->rt6i_dst.plen = 128;
2799 	tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
2800 	rt->rt6i_table = fib6_get_table(net, tb_id);
2801 
2802 	return rt;
2803 }
2804 
2805 /* remove deleted ip from prefsrc entries */
2806 struct arg_dev_net_ip {
2807 	struct net_device *dev;
2808 	struct net *net;
2809 	struct in6_addr *addr;
2810 };
2811 
2812 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2813 {
2814 	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2815 	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2816 	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2817 
2818 	if (((void *)rt->dst.dev == dev || !dev) &&
2819 	    rt != net->ipv6.ip6_null_entry &&
2820 	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2821 		/* remove prefsrc entry */
2822 		rt->rt6i_prefsrc.plen = 0;
2823 	}
2824 	return 0;
2825 }
2826 
2827 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2828 {
2829 	struct net *net = dev_net(ifp->idev->dev);
2830 	struct arg_dev_net_ip adni = {
2831 		.dev = ifp->idev->dev,
2832 		.net = net,
2833 		.addr = &ifp->addr,
2834 	};
2835 	fib6_clean_all(net, fib6_remove_prefsrc, &adni);
2836 }
2837 
2838 #define RTF_RA_ROUTER		(RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2839 #define RTF_CACHE_GATEWAY	(RTF_GATEWAY | RTF_CACHE)
2840 
2841 /* Remove routers and update dst entries when gateway turn into host. */
2842 static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2843 {
2844 	struct in6_addr *gateway = (struct in6_addr *)arg;
2845 
2846 	if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2847 	     ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2848 	     ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2849 		return -1;
2850 	}
2851 	return 0;
2852 }
2853 
2854 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2855 {
2856 	fib6_clean_all(net, fib6_clean_tohost, gateway);
2857 }
2858 
2859 struct arg_dev_net {
2860 	struct net_device *dev;
2861 	struct net *net;
2862 };
2863 
2864 /* called with write lock held for table with rt */
2865 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2866 {
2867 	const struct arg_dev_net *adn = arg;
2868 	const struct net_device *dev = adn->dev;
2869 
2870 	if ((rt->dst.dev == dev || !dev) &&
2871 	    rt != adn->net->ipv6.ip6_null_entry &&
2872 	    (rt->rt6i_nsiblings == 0 ||
2873 	     (dev && netdev_unregistering(dev)) ||
2874 	     !rt->rt6i_idev->cnf.ignore_routes_with_linkdown))
2875 		return -1;
2876 
2877 	return 0;
2878 }
2879 
2880 void rt6_ifdown(struct net *net, struct net_device *dev)
2881 {
2882 	struct arg_dev_net adn = {
2883 		.dev = dev,
2884 		.net = net,
2885 	};
2886 
2887 	fib6_clean_all(net, fib6_ifdown, &adn);
2888 	if (dev)
2889 		rt6_uncached_list_flush_dev(net, dev);
2890 }
2891 
2892 struct rt6_mtu_change_arg {
2893 	struct net_device *dev;
2894 	unsigned int mtu;
2895 };
2896 
2897 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2898 {
2899 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2900 	struct inet6_dev *idev;
2901 
2902 	/* In IPv6 pmtu discovery is not optional,
2903 	   so that RTAX_MTU lock cannot disable it.
2904 	   We still use this lock to block changes
2905 	   caused by addrconf/ndisc.
2906 	*/
2907 
2908 	idev = __in6_dev_get(arg->dev);
2909 	if (!idev)
2910 		return 0;
2911 
2912 	/* For administrative MTU increase, there is no way to discover
2913 	   IPv6 PMTU increase, so PMTU increase should be updated here.
2914 	   Since RFC 1981 doesn't include administrative MTU increase
2915 	   update PMTU increase is a MUST. (i.e. jumbo frame)
2916 	 */
2917 	/*
2918 	   If new MTU is less than route PMTU, this new MTU will be the
2919 	   lowest MTU in the path, update the route PMTU to reflect PMTU
2920 	   decreases; if new MTU is greater than route PMTU, and the
2921 	   old MTU is the lowest MTU in the path, update the route PMTU
2922 	   to reflect the increase. In this case if the other nodes' MTU
2923 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
2924 	   PMTU discovery.
2925 	 */
2926 	if (rt->dst.dev == arg->dev &&
2927 	    dst_metric_raw(&rt->dst, RTAX_MTU) &&
2928 	    !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2929 		if (rt->rt6i_flags & RTF_CACHE) {
2930 			/* For RTF_CACHE with rt6i_pmtu == 0
2931 			 * (i.e. a redirected route),
2932 			 * the metrics of its rt->dst.from has already
2933 			 * been updated.
2934 			 */
2935 			if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2936 				rt->rt6i_pmtu = arg->mtu;
2937 		} else if (dst_mtu(&rt->dst) >= arg->mtu ||
2938 			   (dst_mtu(&rt->dst) < arg->mtu &&
2939 			    dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2940 			dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2941 		}
2942 	}
2943 	return 0;
2944 }
2945 
2946 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2947 {
2948 	struct rt6_mtu_change_arg arg = {
2949 		.dev = dev,
2950 		.mtu = mtu,
2951 	};
2952 
2953 	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
2954 }
2955 
2956 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2957 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2958 	[RTA_OIF]               = { .type = NLA_U32 },
2959 	[RTA_IIF]		= { .type = NLA_U32 },
2960 	[RTA_PRIORITY]          = { .type = NLA_U32 },
2961 	[RTA_METRICS]           = { .type = NLA_NESTED },
2962 	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
2963 	[RTA_PREF]              = { .type = NLA_U8 },
2964 	[RTA_ENCAP_TYPE]	= { .type = NLA_U16 },
2965 	[RTA_ENCAP]		= { .type = NLA_NESTED },
2966 	[RTA_EXPIRES]		= { .type = NLA_U32 },
2967 	[RTA_UID]		= { .type = NLA_U32 },
2968 	[RTA_MARK]		= { .type = NLA_U32 },
2969 };
2970 
2971 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2972 			      struct fib6_config *cfg,
2973 			      struct netlink_ext_ack *extack)
2974 {
2975 	struct rtmsg *rtm;
2976 	struct nlattr *tb[RTA_MAX+1];
2977 	unsigned int pref;
2978 	int err;
2979 
2980 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
2981 			  NULL);
2982 	if (err < 0)
2983 		goto errout;
2984 
2985 	err = -EINVAL;
2986 	rtm = nlmsg_data(nlh);
2987 	memset(cfg, 0, sizeof(*cfg));
2988 
2989 	cfg->fc_table = rtm->rtm_table;
2990 	cfg->fc_dst_len = rtm->rtm_dst_len;
2991 	cfg->fc_src_len = rtm->rtm_src_len;
2992 	cfg->fc_flags = RTF_UP;
2993 	cfg->fc_protocol = rtm->rtm_protocol;
2994 	cfg->fc_type = rtm->rtm_type;
2995 
2996 	if (rtm->rtm_type == RTN_UNREACHABLE ||
2997 	    rtm->rtm_type == RTN_BLACKHOLE ||
2998 	    rtm->rtm_type == RTN_PROHIBIT ||
2999 	    rtm->rtm_type == RTN_THROW)
3000 		cfg->fc_flags |= RTF_REJECT;
3001 
3002 	if (rtm->rtm_type == RTN_LOCAL)
3003 		cfg->fc_flags |= RTF_LOCAL;
3004 
3005 	if (rtm->rtm_flags & RTM_F_CLONED)
3006 		cfg->fc_flags |= RTF_CACHE;
3007 
3008 	cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
3009 	cfg->fc_nlinfo.nlh = nlh;
3010 	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
3011 
3012 	if (tb[RTA_GATEWAY]) {
3013 		cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
3014 		cfg->fc_flags |= RTF_GATEWAY;
3015 	}
3016 
3017 	if (tb[RTA_DST]) {
3018 		int plen = (rtm->rtm_dst_len + 7) >> 3;
3019 
3020 		if (nla_len(tb[RTA_DST]) < plen)
3021 			goto errout;
3022 
3023 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
3024 	}
3025 
3026 	if (tb[RTA_SRC]) {
3027 		int plen = (rtm->rtm_src_len + 7) >> 3;
3028 
3029 		if (nla_len(tb[RTA_SRC]) < plen)
3030 			goto errout;
3031 
3032 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
3033 	}
3034 
3035 	if (tb[RTA_PREFSRC])
3036 		cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
3037 
3038 	if (tb[RTA_OIF])
3039 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
3040 
3041 	if (tb[RTA_PRIORITY])
3042 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
3043 
3044 	if (tb[RTA_METRICS]) {
3045 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
3046 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
3047 	}
3048 
3049 	if (tb[RTA_TABLE])
3050 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
3051 
3052 	if (tb[RTA_MULTIPATH]) {
3053 		cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
3054 		cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
3055 
3056 		err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
3057 						     cfg->fc_mp_len, extack);
3058 		if (err < 0)
3059 			goto errout;
3060 	}
3061 
3062 	if (tb[RTA_PREF]) {
3063 		pref = nla_get_u8(tb[RTA_PREF]);
3064 		if (pref != ICMPV6_ROUTER_PREF_LOW &&
3065 		    pref != ICMPV6_ROUTER_PREF_HIGH)
3066 			pref = ICMPV6_ROUTER_PREF_MEDIUM;
3067 		cfg->fc_flags |= RTF_PREF(pref);
3068 	}
3069 
3070 	if (tb[RTA_ENCAP])
3071 		cfg->fc_encap = tb[RTA_ENCAP];
3072 
3073 	if (tb[RTA_ENCAP_TYPE]) {
3074 		cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
3075 
3076 		err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
3077 		if (err < 0)
3078 			goto errout;
3079 	}
3080 
3081 	if (tb[RTA_EXPIRES]) {
3082 		unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
3083 
3084 		if (addrconf_finite_timeout(timeout)) {
3085 			cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
3086 			cfg->fc_flags |= RTF_EXPIRES;
3087 		}
3088 	}
3089 
3090 	err = 0;
3091 errout:
3092 	return err;
3093 }
3094 
3095 struct rt6_nh {
3096 	struct rt6_info *rt6_info;
3097 	struct fib6_config r_cfg;
3098 	struct mx6_config mxc;
3099 	struct list_head next;
3100 };
3101 
3102 static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
3103 {
3104 	struct rt6_nh *nh;
3105 
3106 	list_for_each_entry(nh, rt6_nh_list, next) {
3107 		pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6c nexthop %pI6c ifi %d\n",
3108 		        &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
3109 		        nh->r_cfg.fc_ifindex);
3110 	}
3111 }
3112 
3113 static int ip6_route_info_append(struct list_head *rt6_nh_list,
3114 				 struct rt6_info *rt, struct fib6_config *r_cfg)
3115 {
3116 	struct rt6_nh *nh;
3117 	int err = -EEXIST;
3118 
3119 	list_for_each_entry(nh, rt6_nh_list, next) {
3120 		/* check if rt6_info already exists */
3121 		if (rt6_duplicate_nexthop(nh->rt6_info, rt))
3122 			return err;
3123 	}
3124 
3125 	nh = kzalloc(sizeof(*nh), GFP_KERNEL);
3126 	if (!nh)
3127 		return -ENOMEM;
3128 	nh->rt6_info = rt;
3129 	err = ip6_convert_metrics(&nh->mxc, r_cfg);
3130 	if (err) {
3131 		kfree(nh);
3132 		return err;
3133 	}
3134 	memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
3135 	list_add_tail(&nh->next, rt6_nh_list);
3136 
3137 	return 0;
3138 }
3139 
3140 static void ip6_route_mpath_notify(struct rt6_info *rt,
3141 				   struct rt6_info *rt_last,
3142 				   struct nl_info *info,
3143 				   __u16 nlflags)
3144 {
3145 	/* if this is an APPEND route, then rt points to the first route
3146 	 * inserted and rt_last points to last route inserted. Userspace
3147 	 * wants a consistent dump of the route which starts at the first
3148 	 * nexthop. Since sibling routes are always added at the end of
3149 	 * the list, find the first sibling of the last route appended
3150 	 */
3151 	if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->rt6i_nsiblings) {
3152 		rt = list_first_entry(&rt_last->rt6i_siblings,
3153 				      struct rt6_info,
3154 				      rt6i_siblings);
3155 	}
3156 
3157 	if (rt)
3158 		inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
3159 }
3160 
3161 static int ip6_route_multipath_add(struct fib6_config *cfg,
3162 				   struct netlink_ext_ack *extack)
3163 {
3164 	struct rt6_info *rt_notif = NULL, *rt_last = NULL;
3165 	struct nl_info *info = &cfg->fc_nlinfo;
3166 	struct fib6_config r_cfg;
3167 	struct rtnexthop *rtnh;
3168 	struct rt6_info *rt;
3169 	struct rt6_nh *err_nh;
3170 	struct rt6_nh *nh, *nh_safe;
3171 	__u16 nlflags;
3172 	int remaining;
3173 	int attrlen;
3174 	int err = 1;
3175 	int nhn = 0;
3176 	int replace = (cfg->fc_nlinfo.nlh &&
3177 		       (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
3178 	LIST_HEAD(rt6_nh_list);
3179 
3180 	nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
3181 	if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
3182 		nlflags |= NLM_F_APPEND;
3183 
3184 	remaining = cfg->fc_mp_len;
3185 	rtnh = (struct rtnexthop *)cfg->fc_mp;
3186 
3187 	/* Parse a Multipath Entry and build a list (rt6_nh_list) of
3188 	 * rt6_info structs per nexthop
3189 	 */
3190 	while (rtnh_ok(rtnh, remaining)) {
3191 		memcpy(&r_cfg, cfg, sizeof(*cfg));
3192 		if (rtnh->rtnh_ifindex)
3193 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
3194 
3195 		attrlen = rtnh_attrlen(rtnh);
3196 		if (attrlen > 0) {
3197 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3198 
3199 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3200 			if (nla) {
3201 				r_cfg.fc_gateway = nla_get_in6_addr(nla);
3202 				r_cfg.fc_flags |= RTF_GATEWAY;
3203 			}
3204 			r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
3205 			nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
3206 			if (nla)
3207 				r_cfg.fc_encap_type = nla_get_u16(nla);
3208 		}
3209 
3210 		rt = ip6_route_info_create(&r_cfg, extack);
3211 		if (IS_ERR(rt)) {
3212 			err = PTR_ERR(rt);
3213 			rt = NULL;
3214 			goto cleanup;
3215 		}
3216 
3217 		err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
3218 		if (err) {
3219 			dst_release_immediate(&rt->dst);
3220 			goto cleanup;
3221 		}
3222 
3223 		rtnh = rtnh_next(rtnh, &remaining);
3224 	}
3225 
3226 	/* for add and replace send one notification with all nexthops.
3227 	 * Skip the notification in fib6_add_rt2node and send one with
3228 	 * the full route when done
3229 	 */
3230 	info->skip_notify = 1;
3231 
3232 	err_nh = NULL;
3233 	list_for_each_entry(nh, &rt6_nh_list, next) {
3234 		rt_last = nh->rt6_info;
3235 		err = __ip6_ins_rt(nh->rt6_info, info, &nh->mxc, extack);
3236 		/* save reference to first route for notification */
3237 		if (!rt_notif && !err)
3238 			rt_notif = nh->rt6_info;
3239 
3240 		/* nh->rt6_info is used or freed at this point, reset to NULL*/
3241 		nh->rt6_info = NULL;
3242 		if (err) {
3243 			if (replace && nhn)
3244 				ip6_print_replace_route_err(&rt6_nh_list);
3245 			err_nh = nh;
3246 			goto add_errout;
3247 		}
3248 
3249 		/* Because each route is added like a single route we remove
3250 		 * these flags after the first nexthop: if there is a collision,
3251 		 * we have already failed to add the first nexthop:
3252 		 * fib6_add_rt2node() has rejected it; when replacing, old
3253 		 * nexthops have been replaced by first new, the rest should
3254 		 * be added to it.
3255 		 */
3256 		cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
3257 						     NLM_F_REPLACE);
3258 		nhn++;
3259 	}
3260 
3261 	/* success ... tell user about new route */
3262 	ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
3263 	goto cleanup;
3264 
3265 add_errout:
3266 	/* send notification for routes that were added so that
3267 	 * the delete notifications sent by ip6_route_del are
3268 	 * coherent
3269 	 */
3270 	if (rt_notif)
3271 		ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
3272 
3273 	/* Delete routes that were already added */
3274 	list_for_each_entry(nh, &rt6_nh_list, next) {
3275 		if (err_nh == nh)
3276 			break;
3277 		ip6_route_del(&nh->r_cfg, extack);
3278 	}
3279 
3280 cleanup:
3281 	list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
3282 		if (nh->rt6_info)
3283 			dst_release_immediate(&nh->rt6_info->dst);
3284 		kfree(nh->mxc.mx);
3285 		list_del(&nh->next);
3286 		kfree(nh);
3287 	}
3288 
3289 	return err;
3290 }
3291 
3292 static int ip6_route_multipath_del(struct fib6_config *cfg,
3293 				   struct netlink_ext_ack *extack)
3294 {
3295 	struct fib6_config r_cfg;
3296 	struct rtnexthop *rtnh;
3297 	int remaining;
3298 	int attrlen;
3299 	int err = 1, last_err = 0;
3300 
3301 	remaining = cfg->fc_mp_len;
3302 	rtnh = (struct rtnexthop *)cfg->fc_mp;
3303 
3304 	/* Parse a Multipath Entry */
3305 	while (rtnh_ok(rtnh, remaining)) {
3306 		memcpy(&r_cfg, cfg, sizeof(*cfg));
3307 		if (rtnh->rtnh_ifindex)
3308 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
3309 
3310 		attrlen = rtnh_attrlen(rtnh);
3311 		if (attrlen > 0) {
3312 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3313 
3314 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3315 			if (nla) {
3316 				nla_memcpy(&r_cfg.fc_gateway, nla, 16);
3317 				r_cfg.fc_flags |= RTF_GATEWAY;
3318 			}
3319 		}
3320 		err = ip6_route_del(&r_cfg, extack);
3321 		if (err)
3322 			last_err = err;
3323 
3324 		rtnh = rtnh_next(rtnh, &remaining);
3325 	}
3326 
3327 	return last_err;
3328 }
3329 
3330 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
3331 			      struct netlink_ext_ack *extack)
3332 {
3333 	struct fib6_config cfg;
3334 	int err;
3335 
3336 	err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
3337 	if (err < 0)
3338 		return err;
3339 
3340 	if (cfg.fc_mp)
3341 		return ip6_route_multipath_del(&cfg, extack);
3342 	else {
3343 		cfg.fc_delete_all_nh = 1;
3344 		return ip6_route_del(&cfg, extack);
3345 	}
3346 }
3347 
3348 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
3349 			      struct netlink_ext_ack *extack)
3350 {
3351 	struct fib6_config cfg;
3352 	int err;
3353 
3354 	err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
3355 	if (err < 0)
3356 		return err;
3357 
3358 	if (cfg.fc_mp)
3359 		return ip6_route_multipath_add(&cfg, extack);
3360 	else
3361 		return ip6_route_add(&cfg, extack);
3362 }
3363 
3364 static size_t rt6_nlmsg_size(struct rt6_info *rt)
3365 {
3366 	int nexthop_len = 0;
3367 
3368 	if (rt->rt6i_nsiblings) {
3369 		nexthop_len = nla_total_size(0)	 /* RTA_MULTIPATH */
3370 			    + NLA_ALIGN(sizeof(struct rtnexthop))
3371 			    + nla_total_size(16) /* RTA_GATEWAY */
3372 			    + lwtunnel_get_encap_size(rt->dst.lwtstate);
3373 
3374 		nexthop_len *= rt->rt6i_nsiblings;
3375 	}
3376 
3377 	return NLMSG_ALIGN(sizeof(struct rtmsg))
3378 	       + nla_total_size(16) /* RTA_SRC */
3379 	       + nla_total_size(16) /* RTA_DST */
3380 	       + nla_total_size(16) /* RTA_GATEWAY */
3381 	       + nla_total_size(16) /* RTA_PREFSRC */
3382 	       + nla_total_size(4) /* RTA_TABLE */
3383 	       + nla_total_size(4) /* RTA_IIF */
3384 	       + nla_total_size(4) /* RTA_OIF */
3385 	       + nla_total_size(4) /* RTA_PRIORITY */
3386 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
3387 	       + nla_total_size(sizeof(struct rta_cacheinfo))
3388 	       + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
3389 	       + nla_total_size(1) /* RTA_PREF */
3390 	       + lwtunnel_get_encap_size(rt->dst.lwtstate)
3391 	       + nexthop_len;
3392 }
3393 
3394 static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
3395 			    unsigned int *flags, bool skip_oif)
3396 {
3397 	if (!netif_running(rt->dst.dev) || !netif_carrier_ok(rt->dst.dev)) {
3398 		*flags |= RTNH_F_LINKDOWN;
3399 		if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
3400 			*flags |= RTNH_F_DEAD;
3401 	}
3402 
3403 	if (rt->rt6i_flags & RTF_GATEWAY) {
3404 		if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
3405 			goto nla_put_failure;
3406 	}
3407 
3408 	if (rt->rt6i_nh_flags & RTNH_F_OFFLOAD)
3409 		*flags |= RTNH_F_OFFLOAD;
3410 
3411 	/* not needed for multipath encoding b/c it has a rtnexthop struct */
3412 	if (!skip_oif && rt->dst.dev &&
3413 	    nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
3414 		goto nla_put_failure;
3415 
3416 	if (rt->dst.lwtstate &&
3417 	    lwtunnel_fill_encap(skb, rt->dst.lwtstate) < 0)
3418 		goto nla_put_failure;
3419 
3420 	return 0;
3421 
3422 nla_put_failure:
3423 	return -EMSGSIZE;
3424 }
3425 
3426 /* add multipath next hop */
3427 static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)
3428 {
3429 	struct rtnexthop *rtnh;
3430 	unsigned int flags = 0;
3431 
3432 	rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
3433 	if (!rtnh)
3434 		goto nla_put_failure;
3435 
3436 	rtnh->rtnh_hops = 0;
3437 	rtnh->rtnh_ifindex = rt->dst.dev ? rt->dst.dev->ifindex : 0;
3438 
3439 	if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
3440 		goto nla_put_failure;
3441 
3442 	rtnh->rtnh_flags = flags;
3443 
3444 	/* length of rtnetlink header + attributes */
3445 	rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
3446 
3447 	return 0;
3448 
3449 nla_put_failure:
3450 	return -EMSGSIZE;
3451 }
3452 
3453 static int rt6_fill_node(struct net *net,
3454 			 struct sk_buff *skb, struct rt6_info *rt,
3455 			 struct in6_addr *dst, struct in6_addr *src,
3456 			 int iif, int type, u32 portid, u32 seq,
3457 			 unsigned int flags)
3458 {
3459 	u32 metrics[RTAX_MAX];
3460 	struct rtmsg *rtm;
3461 	struct nlmsghdr *nlh;
3462 	long expires;
3463 	u32 table;
3464 
3465 	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
3466 	if (!nlh)
3467 		return -EMSGSIZE;
3468 
3469 	rtm = nlmsg_data(nlh);
3470 	rtm->rtm_family = AF_INET6;
3471 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
3472 	rtm->rtm_src_len = rt->rt6i_src.plen;
3473 	rtm->rtm_tos = 0;
3474 	if (rt->rt6i_table)
3475 		table = rt->rt6i_table->tb6_id;
3476 	else
3477 		table = RT6_TABLE_UNSPEC;
3478 	rtm->rtm_table = table;
3479 	if (nla_put_u32(skb, RTA_TABLE, table))
3480 		goto nla_put_failure;
3481 	if (rt->rt6i_flags & RTF_REJECT) {
3482 		switch (rt->dst.error) {
3483 		case -EINVAL:
3484 			rtm->rtm_type = RTN_BLACKHOLE;
3485 			break;
3486 		case -EACCES:
3487 			rtm->rtm_type = RTN_PROHIBIT;
3488 			break;
3489 		case -EAGAIN:
3490 			rtm->rtm_type = RTN_THROW;
3491 			break;
3492 		default:
3493 			rtm->rtm_type = RTN_UNREACHABLE;
3494 			break;
3495 		}
3496 	}
3497 	else if (rt->rt6i_flags & RTF_LOCAL)
3498 		rtm->rtm_type = RTN_LOCAL;
3499 	else if (rt->rt6i_flags & RTF_ANYCAST)
3500 		rtm->rtm_type = RTN_ANYCAST;
3501 	else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
3502 		rtm->rtm_type = RTN_LOCAL;
3503 	else
3504 		rtm->rtm_type = RTN_UNICAST;
3505 	rtm->rtm_flags = 0;
3506 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
3507 	rtm->rtm_protocol = rt->rt6i_protocol;
3508 
3509 	if (rt->rt6i_flags & RTF_CACHE)
3510 		rtm->rtm_flags |= RTM_F_CLONED;
3511 
3512 	if (dst) {
3513 		if (nla_put_in6_addr(skb, RTA_DST, dst))
3514 			goto nla_put_failure;
3515 		rtm->rtm_dst_len = 128;
3516 	} else if (rtm->rtm_dst_len)
3517 		if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
3518 			goto nla_put_failure;
3519 #ifdef CONFIG_IPV6_SUBTREES
3520 	if (src) {
3521 		if (nla_put_in6_addr(skb, RTA_SRC, src))
3522 			goto nla_put_failure;
3523 		rtm->rtm_src_len = 128;
3524 	} else if (rtm->rtm_src_len &&
3525 		   nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
3526 		goto nla_put_failure;
3527 #endif
3528 	if (iif) {
3529 #ifdef CONFIG_IPV6_MROUTE
3530 		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
3531 			int err = ip6mr_get_route(net, skb, rtm, portid);
3532 
3533 			if (err == 0)
3534 				return 0;
3535 			if (err < 0)
3536 				goto nla_put_failure;
3537 		} else
3538 #endif
3539 			if (nla_put_u32(skb, RTA_IIF, iif))
3540 				goto nla_put_failure;
3541 	} else if (dst) {
3542 		struct in6_addr saddr_buf;
3543 		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
3544 		    nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3545 			goto nla_put_failure;
3546 	}
3547 
3548 	if (rt->rt6i_prefsrc.plen) {
3549 		struct in6_addr saddr_buf;
3550 		saddr_buf = rt->rt6i_prefsrc.addr;
3551 		if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3552 			goto nla_put_failure;
3553 	}
3554 
3555 	memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
3556 	if (rt->rt6i_pmtu)
3557 		metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
3558 	if (rtnetlink_put_metrics(skb, metrics) < 0)
3559 		goto nla_put_failure;
3560 
3561 	if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
3562 		goto nla_put_failure;
3563 
3564 	/* For multipath routes, walk the siblings list and add
3565 	 * each as a nexthop within RTA_MULTIPATH.
3566 	 */
3567 	if (rt->rt6i_nsiblings) {
3568 		struct rt6_info *sibling, *next_sibling;
3569 		struct nlattr *mp;
3570 
3571 		mp = nla_nest_start(skb, RTA_MULTIPATH);
3572 		if (!mp)
3573 			goto nla_put_failure;
3574 
3575 		if (rt6_add_nexthop(skb, rt) < 0)
3576 			goto nla_put_failure;
3577 
3578 		list_for_each_entry_safe(sibling, next_sibling,
3579 					 &rt->rt6i_siblings, rt6i_siblings) {
3580 			if (rt6_add_nexthop(skb, sibling) < 0)
3581 				goto nla_put_failure;
3582 		}
3583 
3584 		nla_nest_end(skb, mp);
3585 	} else {
3586 		if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0)
3587 			goto nla_put_failure;
3588 	}
3589 
3590 	expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
3591 
3592 	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
3593 		goto nla_put_failure;
3594 
3595 	if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
3596 		goto nla_put_failure;
3597 
3598 
3599 	nlmsg_end(skb, nlh);
3600 	return 0;
3601 
3602 nla_put_failure:
3603 	nlmsg_cancel(skb, nlh);
3604 	return -EMSGSIZE;
3605 }
3606 
3607 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
3608 {
3609 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
3610 	struct net *net = arg->net;
3611 
3612 	if (rt == net->ipv6.ip6_null_entry)
3613 		return 0;
3614 
3615 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
3616 		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
3617 
3618 		/* user wants prefix routes only */
3619 		if (rtm->rtm_flags & RTM_F_PREFIX &&
3620 		    !(rt->rt6i_flags & RTF_PREFIX_RT)) {
3621 			/* success since this is not a prefix route */
3622 			return 1;
3623 		}
3624 	}
3625 
3626 	return rt6_fill_node(net,
3627 		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
3628 		     NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
3629 		     NLM_F_MULTI);
3630 }
3631 
3632 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
3633 			      struct netlink_ext_ack *extack)
3634 {
3635 	struct net *net = sock_net(in_skb->sk);
3636 	struct nlattr *tb[RTA_MAX+1];
3637 	int err, iif = 0, oif = 0;
3638 	struct dst_entry *dst;
3639 	struct rt6_info *rt;
3640 	struct sk_buff *skb;
3641 	struct rtmsg *rtm;
3642 	struct flowi6 fl6;
3643 	bool fibmatch;
3644 
3645 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
3646 			  extack);
3647 	if (err < 0)
3648 		goto errout;
3649 
3650 	err = -EINVAL;
3651 	memset(&fl6, 0, sizeof(fl6));
3652 	rtm = nlmsg_data(nlh);
3653 	fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
3654 	fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
3655 
3656 	if (tb[RTA_SRC]) {
3657 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
3658 			goto errout;
3659 
3660 		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
3661 	}
3662 
3663 	if (tb[RTA_DST]) {
3664 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
3665 			goto errout;
3666 
3667 		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
3668 	}
3669 
3670 	if (tb[RTA_IIF])
3671 		iif = nla_get_u32(tb[RTA_IIF]);
3672 
3673 	if (tb[RTA_OIF])
3674 		oif = nla_get_u32(tb[RTA_OIF]);
3675 
3676 	if (tb[RTA_MARK])
3677 		fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
3678 
3679 	if (tb[RTA_UID])
3680 		fl6.flowi6_uid = make_kuid(current_user_ns(),
3681 					   nla_get_u32(tb[RTA_UID]));
3682 	else
3683 		fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
3684 
3685 	if (iif) {
3686 		struct net_device *dev;
3687 		int flags = 0;
3688 
3689 		rcu_read_lock();
3690 
3691 		dev = dev_get_by_index_rcu(net, iif);
3692 		if (!dev) {
3693 			rcu_read_unlock();
3694 			err = -ENODEV;
3695 			goto errout;
3696 		}
3697 
3698 		fl6.flowi6_iif = iif;
3699 
3700 		if (!ipv6_addr_any(&fl6.saddr))
3701 			flags |= RT6_LOOKUP_F_HAS_SADDR;
3702 
3703 		if (!fibmatch)
3704 			dst = ip6_route_input_lookup(net, dev, &fl6, flags);
3705 		else
3706 			dst = ip6_route_lookup(net, &fl6, 0);
3707 
3708 		rcu_read_unlock();
3709 	} else {
3710 		fl6.flowi6_oif = oif;
3711 
3712 		if (!fibmatch)
3713 			dst = ip6_route_output(net, NULL, &fl6);
3714 		else
3715 			dst = ip6_route_lookup(net, &fl6, 0);
3716 	}
3717 
3718 
3719 	rt = container_of(dst, struct rt6_info, dst);
3720 	if (rt->dst.error) {
3721 		err = rt->dst.error;
3722 		ip6_rt_put(rt);
3723 		goto errout;
3724 	}
3725 
3726 	if (rt == net->ipv6.ip6_null_entry) {
3727 		err = rt->dst.error;
3728 		ip6_rt_put(rt);
3729 		goto errout;
3730 	}
3731 
3732 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
3733 	if (!skb) {
3734 		ip6_rt_put(rt);
3735 		err = -ENOBUFS;
3736 		goto errout;
3737 	}
3738 
3739 	skb_dst_set(skb, &rt->dst);
3740 	if (fibmatch)
3741 		err = rt6_fill_node(net, skb, rt, NULL, NULL, iif,
3742 				    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
3743 				    nlh->nlmsg_seq, 0);
3744 	else
3745 		err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
3746 				    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
3747 				    nlh->nlmsg_seq, 0);
3748 	if (err < 0) {
3749 		kfree_skb(skb);
3750 		goto errout;
3751 	}
3752 
3753 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
3754 errout:
3755 	return err;
3756 }
3757 
3758 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
3759 		     unsigned int nlm_flags)
3760 {
3761 	struct sk_buff *skb;
3762 	struct net *net = info->nl_net;
3763 	u32 seq;
3764 	int err;
3765 
3766 	err = -ENOBUFS;
3767 	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3768 
3769 	skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3770 	if (!skb)
3771 		goto errout;
3772 
3773 	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
3774 				event, info->portid, seq, nlm_flags);
3775 	if (err < 0) {
3776 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
3777 		WARN_ON(err == -EMSGSIZE);
3778 		kfree_skb(skb);
3779 		goto errout;
3780 	}
3781 	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
3782 		    info->nlh, gfp_any());
3783 	return;
3784 errout:
3785 	if (err < 0)
3786 		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
3787 }
3788 
3789 static int ip6_route_dev_notify(struct notifier_block *this,
3790 				unsigned long event, void *ptr)
3791 {
3792 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
3793 	struct net *net = dev_net(dev);
3794 
3795 	if (!(dev->flags & IFF_LOOPBACK))
3796 		return NOTIFY_OK;
3797 
3798 	if (event == NETDEV_REGISTER) {
3799 		net->ipv6.ip6_null_entry->dst.dev = dev;
3800 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
3801 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3802 		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
3803 		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
3804 		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
3805 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
3806 #endif
3807 	 } else if (event == NETDEV_UNREGISTER &&
3808 		    dev->reg_state != NETREG_UNREGISTERED) {
3809 		/* NETDEV_UNREGISTER could be fired for multiple times by
3810 		 * netdev_wait_allrefs(). Make sure we only call this once.
3811 		 */
3812 		in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
3813 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3814 		in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
3815 		in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
3816 #endif
3817 	}
3818 
3819 	return NOTIFY_OK;
3820 }
3821 
3822 /*
3823  *	/proc
3824  */
3825 
3826 #ifdef CONFIG_PROC_FS
3827 
3828 static const struct file_operations ipv6_route_proc_fops = {
3829 	.owner		= THIS_MODULE,
3830 	.open		= ipv6_route_open,
3831 	.read		= seq_read,
3832 	.llseek		= seq_lseek,
3833 	.release	= seq_release_net,
3834 };
3835 
3836 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
3837 {
3838 	struct net *net = (struct net *)seq->private;
3839 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
3840 		   net->ipv6.rt6_stats->fib_nodes,
3841 		   net->ipv6.rt6_stats->fib_route_nodes,
3842 		   net->ipv6.rt6_stats->fib_rt_alloc,
3843 		   net->ipv6.rt6_stats->fib_rt_entries,
3844 		   net->ipv6.rt6_stats->fib_rt_cache,
3845 		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
3846 		   net->ipv6.rt6_stats->fib_discarded_routes);
3847 
3848 	return 0;
3849 }
3850 
3851 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
3852 {
3853 	return single_open_net(inode, file, rt6_stats_seq_show);
3854 }
3855 
3856 static const struct file_operations rt6_stats_seq_fops = {
3857 	.owner	 = THIS_MODULE,
3858 	.open	 = rt6_stats_seq_open,
3859 	.read	 = seq_read,
3860 	.llseek	 = seq_lseek,
3861 	.release = single_release_net,
3862 };
3863 #endif	/* CONFIG_PROC_FS */
3864 
3865 #ifdef CONFIG_SYSCTL
3866 
3867 static
3868 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
3869 			      void __user *buffer, size_t *lenp, loff_t *ppos)
3870 {
3871 	struct net *net;
3872 	int delay;
3873 	if (!write)
3874 		return -EINVAL;
3875 
3876 	net = (struct net *)ctl->extra1;
3877 	delay = net->ipv6.sysctl.flush_delay;
3878 	proc_dointvec(ctl, write, buffer, lenp, ppos);
3879 	fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
3880 	return 0;
3881 }
3882 
3883 struct ctl_table ipv6_route_table_template[] = {
3884 	{
3885 		.procname	=	"flush",
3886 		.data		=	&init_net.ipv6.sysctl.flush_delay,
3887 		.maxlen		=	sizeof(int),
3888 		.mode		=	0200,
3889 		.proc_handler	=	ipv6_sysctl_rtcache_flush
3890 	},
3891 	{
3892 		.procname	=	"gc_thresh",
3893 		.data		=	&ip6_dst_ops_template.gc_thresh,
3894 		.maxlen		=	sizeof(int),
3895 		.mode		=	0644,
3896 		.proc_handler	=	proc_dointvec,
3897 	},
3898 	{
3899 		.procname	=	"max_size",
3900 		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
3901 		.maxlen		=	sizeof(int),
3902 		.mode		=	0644,
3903 		.proc_handler	=	proc_dointvec,
3904 	},
3905 	{
3906 		.procname	=	"gc_min_interval",
3907 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3908 		.maxlen		=	sizeof(int),
3909 		.mode		=	0644,
3910 		.proc_handler	=	proc_dointvec_jiffies,
3911 	},
3912 	{
3913 		.procname	=	"gc_timeout",
3914 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
3915 		.maxlen		=	sizeof(int),
3916 		.mode		=	0644,
3917 		.proc_handler	=	proc_dointvec_jiffies,
3918 	},
3919 	{
3920 		.procname	=	"gc_interval",
3921 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
3922 		.maxlen		=	sizeof(int),
3923 		.mode		=	0644,
3924 		.proc_handler	=	proc_dointvec_jiffies,
3925 	},
3926 	{
3927 		.procname	=	"gc_elasticity",
3928 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
3929 		.maxlen		=	sizeof(int),
3930 		.mode		=	0644,
3931 		.proc_handler	=	proc_dointvec,
3932 	},
3933 	{
3934 		.procname	=	"mtu_expires",
3935 		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
3936 		.maxlen		=	sizeof(int),
3937 		.mode		=	0644,
3938 		.proc_handler	=	proc_dointvec_jiffies,
3939 	},
3940 	{
3941 		.procname	=	"min_adv_mss",
3942 		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
3943 		.maxlen		=	sizeof(int),
3944 		.mode		=	0644,
3945 		.proc_handler	=	proc_dointvec,
3946 	},
3947 	{
3948 		.procname	=	"gc_min_interval_ms",
3949 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3950 		.maxlen		=	sizeof(int),
3951 		.mode		=	0644,
3952 		.proc_handler	=	proc_dointvec_ms_jiffies,
3953 	},
3954 	{ }
3955 };
3956 
3957 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
3958 {
3959 	struct ctl_table *table;
3960 
3961 	table = kmemdup(ipv6_route_table_template,
3962 			sizeof(ipv6_route_table_template),
3963 			GFP_KERNEL);
3964 
3965 	if (table) {
3966 		table[0].data = &net->ipv6.sysctl.flush_delay;
3967 		table[0].extra1 = net;
3968 		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
3969 		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3970 		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3971 		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3972 		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3973 		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3974 		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3975 		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
3976 		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3977 
3978 		/* Don't export sysctls to unprivileged users */
3979 		if (net->user_ns != &init_user_ns)
3980 			table[0].procname = NULL;
3981 	}
3982 
3983 	return table;
3984 }
3985 #endif
3986 
3987 static int __net_init ip6_route_net_init(struct net *net)
3988 {
3989 	int ret = -ENOMEM;
3990 
3991 	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3992 	       sizeof(net->ipv6.ip6_dst_ops));
3993 
3994 	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3995 		goto out_ip6_dst_ops;
3996 
3997 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3998 					   sizeof(*net->ipv6.ip6_null_entry),
3999 					   GFP_KERNEL);
4000 	if (!net->ipv6.ip6_null_entry)
4001 		goto out_ip6_dst_entries;
4002 	net->ipv6.ip6_null_entry->dst.path =
4003 		(struct dst_entry *)net->ipv6.ip6_null_entry;
4004 	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
4005 	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
4006 			 ip6_template_metrics, true);
4007 
4008 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
4009 	net->ipv6.fib6_has_custom_rules = false;
4010 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
4011 					       sizeof(*net->ipv6.ip6_prohibit_entry),
4012 					       GFP_KERNEL);
4013 	if (!net->ipv6.ip6_prohibit_entry)
4014 		goto out_ip6_null_entry;
4015 	net->ipv6.ip6_prohibit_entry->dst.path =
4016 		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
4017 	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
4018 	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
4019 			 ip6_template_metrics, true);
4020 
4021 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
4022 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
4023 					       GFP_KERNEL);
4024 	if (!net->ipv6.ip6_blk_hole_entry)
4025 		goto out_ip6_prohibit_entry;
4026 	net->ipv6.ip6_blk_hole_entry->dst.path =
4027 		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
4028 	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
4029 	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
4030 			 ip6_template_metrics, true);
4031 #endif
4032 
4033 	net->ipv6.sysctl.flush_delay = 0;
4034 	net->ipv6.sysctl.ip6_rt_max_size = 4096;
4035 	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
4036 	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
4037 	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
4038 	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
4039 	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
4040 	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
4041 
4042 	net->ipv6.ip6_rt_gc_expire = 30*HZ;
4043 
4044 	ret = 0;
4045 out:
4046 	return ret;
4047 
4048 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
4049 out_ip6_prohibit_entry:
4050 	kfree(net->ipv6.ip6_prohibit_entry);
4051 out_ip6_null_entry:
4052 	kfree(net->ipv6.ip6_null_entry);
4053 #endif
4054 out_ip6_dst_entries:
4055 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
4056 out_ip6_dst_ops:
4057 	goto out;
4058 }
4059 
4060 static void __net_exit ip6_route_net_exit(struct net *net)
4061 {
4062 	kfree(net->ipv6.ip6_null_entry);
4063 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
4064 	kfree(net->ipv6.ip6_prohibit_entry);
4065 	kfree(net->ipv6.ip6_blk_hole_entry);
4066 #endif
4067 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
4068 }
4069 
4070 static int __net_init ip6_route_net_init_late(struct net *net)
4071 {
4072 #ifdef CONFIG_PROC_FS
4073 	proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
4074 	proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
4075 #endif
4076 	return 0;
4077 }
4078 
4079 static void __net_exit ip6_route_net_exit_late(struct net *net)
4080 {
4081 #ifdef CONFIG_PROC_FS
4082 	remove_proc_entry("ipv6_route", net->proc_net);
4083 	remove_proc_entry("rt6_stats", net->proc_net);
4084 #endif
4085 }
4086 
4087 static struct pernet_operations ip6_route_net_ops = {
4088 	.init = ip6_route_net_init,
4089 	.exit = ip6_route_net_exit,
4090 };
4091 
4092 static int __net_init ipv6_inetpeer_init(struct net *net)
4093 {
4094 	struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
4095 
4096 	if (!bp)
4097 		return -ENOMEM;
4098 	inet_peer_base_init(bp);
4099 	net->ipv6.peers = bp;
4100 	return 0;
4101 }
4102 
4103 static void __net_exit ipv6_inetpeer_exit(struct net *net)
4104 {
4105 	struct inet_peer_base *bp = net->ipv6.peers;
4106 
4107 	net->ipv6.peers = NULL;
4108 	inetpeer_invalidate_tree(bp);
4109 	kfree(bp);
4110 }
4111 
4112 static struct pernet_operations ipv6_inetpeer_ops = {
4113 	.init	=	ipv6_inetpeer_init,
4114 	.exit	=	ipv6_inetpeer_exit,
4115 };
4116 
4117 static struct pernet_operations ip6_route_net_late_ops = {
4118 	.init = ip6_route_net_init_late,
4119 	.exit = ip6_route_net_exit_late,
4120 };
4121 
4122 static struct notifier_block ip6_route_dev_notifier = {
4123 	.notifier_call = ip6_route_dev_notify,
4124 	.priority = ADDRCONF_NOTIFY_PRIORITY - 10,
4125 };
4126 
4127 void __init ip6_route_init_special_entries(void)
4128 {
4129 	/* Registering of the loopback is done before this portion of code,
4130 	 * the loopback reference in rt6_info will not be taken, do it
4131 	 * manually for init_net */
4132 	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
4133 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
4134   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
4135 	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
4136 	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
4137 	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
4138 	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
4139   #endif
4140 }
4141 
4142 int __init ip6_route_init(void)
4143 {
4144 	int ret;
4145 	int cpu;
4146 
4147 	ret = -ENOMEM;
4148 	ip6_dst_ops_template.kmem_cachep =
4149 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
4150 				  SLAB_HWCACHE_ALIGN, NULL);
4151 	if (!ip6_dst_ops_template.kmem_cachep)
4152 		goto out;
4153 
4154 	ret = dst_entries_init(&ip6_dst_blackhole_ops);
4155 	if (ret)
4156 		goto out_kmem_cache;
4157 
4158 	ret = register_pernet_subsys(&ipv6_inetpeer_ops);
4159 	if (ret)
4160 		goto out_dst_entries;
4161 
4162 	ret = register_pernet_subsys(&ip6_route_net_ops);
4163 	if (ret)
4164 		goto out_register_inetpeer;
4165 
4166 	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
4167 
4168 	ret = fib6_init();
4169 	if (ret)
4170 		goto out_register_subsys;
4171 
4172 	ret = xfrm6_init();
4173 	if (ret)
4174 		goto out_fib6_init;
4175 
4176 	ret = fib6_rules_init();
4177 	if (ret)
4178 		goto xfrm6_init;
4179 
4180 	ret = register_pernet_subsys(&ip6_route_net_late_ops);
4181 	if (ret)
4182 		goto fib6_rules_init;
4183 
4184 	ret = -ENOBUFS;
4185 	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, 0) ||
4186 	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, 0) ||
4187 	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL,
4188 			    RTNL_FLAG_DOIT_UNLOCKED))
4189 		goto out_register_late_subsys;
4190 
4191 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
4192 	if (ret)
4193 		goto out_register_late_subsys;
4194 
4195 	for_each_possible_cpu(cpu) {
4196 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
4197 
4198 		INIT_LIST_HEAD(&ul->head);
4199 		spin_lock_init(&ul->lock);
4200 	}
4201 
4202 out:
4203 	return ret;
4204 
4205 out_register_late_subsys:
4206 	unregister_pernet_subsys(&ip6_route_net_late_ops);
4207 fib6_rules_init:
4208 	fib6_rules_cleanup();
4209 xfrm6_init:
4210 	xfrm6_fini();
4211 out_fib6_init:
4212 	fib6_gc_cleanup();
4213 out_register_subsys:
4214 	unregister_pernet_subsys(&ip6_route_net_ops);
4215 out_register_inetpeer:
4216 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
4217 out_dst_entries:
4218 	dst_entries_destroy(&ip6_dst_blackhole_ops);
4219 out_kmem_cache:
4220 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
4221 	goto out;
4222 }
4223 
4224 void ip6_route_cleanup(void)
4225 {
4226 	unregister_netdevice_notifier(&ip6_route_dev_notifier);
4227 	unregister_pernet_subsys(&ip6_route_net_late_ops);
4228 	fib6_rules_cleanup();
4229 	xfrm6_fini();
4230 	fib6_gc_cleanup();
4231 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
4232 	unregister_pernet_subsys(&ip6_route_net_ops);
4233 	dst_entries_destroy(&ip6_dst_blackhole_ops);
4234 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
4235 }
4236