xref: /openbmc/linux/net/ipv6/route.c (revision c819e2cf)
1 /*
2  *	Linux INET6 implementation
3  *	FIB front-end.
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13 
14 /*	Changes:
15  *
16  *	YOSHIFUJI Hideaki @USAGI
17  *		reworked default router selection.
18  *		- respect outgoing interface
19  *		- select from (probably) reachable routers (i.e.
20  *		routers in REACHABLE, STALE, DELAY or PROBE states).
21  *		- always select the same router if it is (probably)
22  *		reachable.  otherwise, round-robin the list.
23  *	Ville Nuorvala
24  *		Fixed routing subtrees.
25  */
26 
27 #define pr_fmt(fmt) "IPv6: " fmt
28 
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/xfrm.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60 #include <net/nexthop.h>
61 
62 #include <asm/uaccess.h>
63 
64 #ifdef CONFIG_SYSCTL
65 #include <linux/sysctl.h>
66 #endif
67 
68 enum rt6_nud_state {
69 	RT6_NUD_FAIL_HARD = -3,
70 	RT6_NUD_FAIL_PROBE = -2,
71 	RT6_NUD_FAIL_DO_RR = -1,
72 	RT6_NUD_SUCCEED = 1
73 };
74 
75 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
76 				    const struct in6_addr *dest);
77 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
78 static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
79 static unsigned int	 ip6_mtu(const struct dst_entry *dst);
80 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
81 static void		ip6_dst_destroy(struct dst_entry *);
82 static void		ip6_dst_ifdown(struct dst_entry *,
83 				       struct net_device *dev, int how);
84 static int		 ip6_dst_gc(struct dst_ops *ops);
85 
86 static int		ip6_pkt_discard(struct sk_buff *skb);
87 static int		ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb);
88 static int		ip6_pkt_prohibit(struct sk_buff *skb);
89 static int		ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb);
90 static void		ip6_link_failure(struct sk_buff *skb);
91 static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
92 					   struct sk_buff *skb, u32 mtu);
93 static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
94 					struct sk_buff *skb);
95 static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
96 
97 #ifdef CONFIG_IPV6_ROUTE_INFO
98 static struct rt6_info *rt6_add_route_info(struct net *net,
99 					   const struct in6_addr *prefix, int prefixlen,
100 					   const struct in6_addr *gwaddr, int ifindex,
101 					   unsigned int pref);
102 static struct rt6_info *rt6_get_route_info(struct net *net,
103 					   const struct in6_addr *prefix, int prefixlen,
104 					   const struct in6_addr *gwaddr, int ifindex);
105 #endif
106 
107 static void rt6_bind_peer(struct rt6_info *rt, int create)
108 {
109 	struct inet_peer_base *base;
110 	struct inet_peer *peer;
111 
112 	base = inetpeer_base_ptr(rt->_rt6i_peer);
113 	if (!base)
114 		return;
115 
116 	peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
117 	if (peer) {
118 		if (!rt6_set_peer(rt, peer))
119 			inet_putpeer(peer);
120 	}
121 }
122 
123 static struct inet_peer *__rt6_get_peer(struct rt6_info *rt, int create)
124 {
125 	if (rt6_has_peer(rt))
126 		return rt6_peer_ptr(rt);
127 
128 	rt6_bind_peer(rt, create);
129 	return (rt6_has_peer(rt) ? rt6_peer_ptr(rt) : NULL);
130 }
131 
132 static struct inet_peer *rt6_get_peer_create(struct rt6_info *rt)
133 {
134 	return __rt6_get_peer(rt, 1);
135 }
136 
137 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
138 {
139 	struct rt6_info *rt = (struct rt6_info *) dst;
140 	struct inet_peer *peer;
141 	u32 *p = NULL;
142 
143 	if (!(rt->dst.flags & DST_HOST))
144 		return NULL;
145 
146 	peer = rt6_get_peer_create(rt);
147 	if (peer) {
148 		u32 *old_p = __DST_METRICS_PTR(old);
149 		unsigned long prev, new;
150 
151 		p = peer->metrics;
152 		if (inet_metrics_new(peer) ||
153 		    (old & DST_METRICS_FORCE_OVERWRITE))
154 			memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
155 
156 		new = (unsigned long) p;
157 		prev = cmpxchg(&dst->_metrics, old, new);
158 
159 		if (prev != old) {
160 			p = __DST_METRICS_PTR(prev);
161 			if (prev & DST_METRICS_READ_ONLY)
162 				p = NULL;
163 		}
164 	}
165 	return p;
166 }
167 
168 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
169 					     struct sk_buff *skb,
170 					     const void *daddr)
171 {
172 	struct in6_addr *p = &rt->rt6i_gateway;
173 
174 	if (!ipv6_addr_any(p))
175 		return (const void *) p;
176 	else if (skb)
177 		return &ipv6_hdr(skb)->daddr;
178 	return daddr;
179 }
180 
181 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
182 					  struct sk_buff *skb,
183 					  const void *daddr)
184 {
185 	struct rt6_info *rt = (struct rt6_info *) dst;
186 	struct neighbour *n;
187 
188 	daddr = choose_neigh_daddr(rt, skb, daddr);
189 	n = __ipv6_neigh_lookup(dst->dev, daddr);
190 	if (n)
191 		return n;
192 	return neigh_create(&nd_tbl, daddr, dst->dev);
193 }
194 
195 static struct dst_ops ip6_dst_ops_template = {
196 	.family			=	AF_INET6,
197 	.protocol		=	cpu_to_be16(ETH_P_IPV6),
198 	.gc			=	ip6_dst_gc,
199 	.gc_thresh		=	1024,
200 	.check			=	ip6_dst_check,
201 	.default_advmss		=	ip6_default_advmss,
202 	.mtu			=	ip6_mtu,
203 	.cow_metrics		=	ipv6_cow_metrics,
204 	.destroy		=	ip6_dst_destroy,
205 	.ifdown			=	ip6_dst_ifdown,
206 	.negative_advice	=	ip6_negative_advice,
207 	.link_failure		=	ip6_link_failure,
208 	.update_pmtu		=	ip6_rt_update_pmtu,
209 	.redirect		=	rt6_do_redirect,
210 	.local_out		=	__ip6_local_out,
211 	.neigh_lookup		=	ip6_neigh_lookup,
212 };
213 
214 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
215 {
216 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
217 
218 	return mtu ? : dst->dev->mtu;
219 }
220 
221 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
222 					 struct sk_buff *skb, u32 mtu)
223 {
224 }
225 
226 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
227 				      struct sk_buff *skb)
228 {
229 }
230 
231 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
232 					 unsigned long old)
233 {
234 	return NULL;
235 }
236 
237 static struct dst_ops ip6_dst_blackhole_ops = {
238 	.family			=	AF_INET6,
239 	.protocol		=	cpu_to_be16(ETH_P_IPV6),
240 	.destroy		=	ip6_dst_destroy,
241 	.check			=	ip6_dst_check,
242 	.mtu			=	ip6_blackhole_mtu,
243 	.default_advmss		=	ip6_default_advmss,
244 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
245 	.redirect		=	ip6_rt_blackhole_redirect,
246 	.cow_metrics		=	ip6_rt_blackhole_cow_metrics,
247 	.neigh_lookup		=	ip6_neigh_lookup,
248 };
249 
250 static const u32 ip6_template_metrics[RTAX_MAX] = {
251 	[RTAX_HOPLIMIT - 1] = 0,
252 };
253 
254 static const struct rt6_info ip6_null_entry_template = {
255 	.dst = {
256 		.__refcnt	= ATOMIC_INIT(1),
257 		.__use		= 1,
258 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
259 		.error		= -ENETUNREACH,
260 		.input		= ip6_pkt_discard,
261 		.output		= ip6_pkt_discard_out,
262 	},
263 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
264 	.rt6i_protocol  = RTPROT_KERNEL,
265 	.rt6i_metric	= ~(u32) 0,
266 	.rt6i_ref	= ATOMIC_INIT(1),
267 };
268 
269 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
270 
271 static const struct rt6_info ip6_prohibit_entry_template = {
272 	.dst = {
273 		.__refcnt	= ATOMIC_INIT(1),
274 		.__use		= 1,
275 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
276 		.error		= -EACCES,
277 		.input		= ip6_pkt_prohibit,
278 		.output		= ip6_pkt_prohibit_out,
279 	},
280 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
281 	.rt6i_protocol  = RTPROT_KERNEL,
282 	.rt6i_metric	= ~(u32) 0,
283 	.rt6i_ref	= ATOMIC_INIT(1),
284 };
285 
286 static const struct rt6_info ip6_blk_hole_entry_template = {
287 	.dst = {
288 		.__refcnt	= ATOMIC_INIT(1),
289 		.__use		= 1,
290 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
291 		.error		= -EINVAL,
292 		.input		= dst_discard,
293 		.output		= dst_discard_sk,
294 	},
295 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
296 	.rt6i_protocol  = RTPROT_KERNEL,
297 	.rt6i_metric	= ~(u32) 0,
298 	.rt6i_ref	= ATOMIC_INIT(1),
299 };
300 
301 #endif
302 
303 /* allocate dst with ip6_dst_ops */
304 static inline struct rt6_info *ip6_dst_alloc(struct net *net,
305 					     struct net_device *dev,
306 					     int flags,
307 					     struct fib6_table *table)
308 {
309 	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
310 					0, DST_OBSOLETE_FORCE_CHK, flags);
311 
312 	if (rt) {
313 		struct dst_entry *dst = &rt->dst;
314 
315 		memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
316 		rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
317 		INIT_LIST_HEAD(&rt->rt6i_siblings);
318 	}
319 	return rt;
320 }
321 
322 static void ip6_dst_destroy(struct dst_entry *dst)
323 {
324 	struct rt6_info *rt = (struct rt6_info *)dst;
325 	struct inet6_dev *idev = rt->rt6i_idev;
326 	struct dst_entry *from = dst->from;
327 
328 	if (!(rt->dst.flags & DST_HOST))
329 		dst_destroy_metrics_generic(dst);
330 
331 	if (idev) {
332 		rt->rt6i_idev = NULL;
333 		in6_dev_put(idev);
334 	}
335 
336 	dst->from = NULL;
337 	dst_release(from);
338 
339 	if (rt6_has_peer(rt)) {
340 		struct inet_peer *peer = rt6_peer_ptr(rt);
341 		inet_putpeer(peer);
342 	}
343 }
344 
345 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
346 			   int how)
347 {
348 	struct rt6_info *rt = (struct rt6_info *)dst;
349 	struct inet6_dev *idev = rt->rt6i_idev;
350 	struct net_device *loopback_dev =
351 		dev_net(dev)->loopback_dev;
352 
353 	if (dev != loopback_dev) {
354 		if (idev && idev->dev == dev) {
355 			struct inet6_dev *loopback_idev =
356 				in6_dev_get(loopback_dev);
357 			if (loopback_idev) {
358 				rt->rt6i_idev = loopback_idev;
359 				in6_dev_put(idev);
360 			}
361 		}
362 	}
363 }
364 
365 static bool rt6_check_expired(const struct rt6_info *rt)
366 {
367 	if (rt->rt6i_flags & RTF_EXPIRES) {
368 		if (time_after(jiffies, rt->dst.expires))
369 			return true;
370 	} else if (rt->dst.from) {
371 		return rt6_check_expired((struct rt6_info *) rt->dst.from);
372 	}
373 	return false;
374 }
375 
376 /* Multipath route selection:
377  *   Hash based function using packet header and flowlabel.
378  * Adapted from fib_info_hashfn()
379  */
380 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
381 			       const struct flowi6 *fl6)
382 {
383 	unsigned int val = fl6->flowi6_proto;
384 
385 	val ^= ipv6_addr_hash(&fl6->daddr);
386 	val ^= ipv6_addr_hash(&fl6->saddr);
387 
388 	/* Work only if this not encapsulated */
389 	switch (fl6->flowi6_proto) {
390 	case IPPROTO_UDP:
391 	case IPPROTO_TCP:
392 	case IPPROTO_SCTP:
393 		val ^= (__force u16)fl6->fl6_sport;
394 		val ^= (__force u16)fl6->fl6_dport;
395 		break;
396 
397 	case IPPROTO_ICMPV6:
398 		val ^= (__force u16)fl6->fl6_icmp_type;
399 		val ^= (__force u16)fl6->fl6_icmp_code;
400 		break;
401 	}
402 	/* RFC6438 recommands to use flowlabel */
403 	val ^= (__force u32)fl6->flowlabel;
404 
405 	/* Perhaps, we need to tune, this function? */
406 	val = val ^ (val >> 7) ^ (val >> 12);
407 	return val % candidate_count;
408 }
409 
410 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
411 					     struct flowi6 *fl6, int oif,
412 					     int strict)
413 {
414 	struct rt6_info *sibling, *next_sibling;
415 	int route_choosen;
416 
417 	route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
418 	/* Don't change the route, if route_choosen == 0
419 	 * (siblings does not include ourself)
420 	 */
421 	if (route_choosen)
422 		list_for_each_entry_safe(sibling, next_sibling,
423 				&match->rt6i_siblings, rt6i_siblings) {
424 			route_choosen--;
425 			if (route_choosen == 0) {
426 				if (rt6_score_route(sibling, oif, strict) < 0)
427 					break;
428 				match = sibling;
429 				break;
430 			}
431 		}
432 	return match;
433 }
434 
435 /*
436  *	Route lookup. Any table->tb6_lock is implied.
437  */
438 
439 static inline struct rt6_info *rt6_device_match(struct net *net,
440 						    struct rt6_info *rt,
441 						    const struct in6_addr *saddr,
442 						    int oif,
443 						    int flags)
444 {
445 	struct rt6_info *local = NULL;
446 	struct rt6_info *sprt;
447 
448 	if (!oif && ipv6_addr_any(saddr))
449 		goto out;
450 
451 	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
452 		struct net_device *dev = sprt->dst.dev;
453 
454 		if (oif) {
455 			if (dev->ifindex == oif)
456 				return sprt;
457 			if (dev->flags & IFF_LOOPBACK) {
458 				if (!sprt->rt6i_idev ||
459 				    sprt->rt6i_idev->dev->ifindex != oif) {
460 					if (flags & RT6_LOOKUP_F_IFACE && oif)
461 						continue;
462 					if (local && (!oif ||
463 						      local->rt6i_idev->dev->ifindex == oif))
464 						continue;
465 				}
466 				local = sprt;
467 			}
468 		} else {
469 			if (ipv6_chk_addr(net, saddr, dev,
470 					  flags & RT6_LOOKUP_F_IFACE))
471 				return sprt;
472 		}
473 	}
474 
475 	if (oif) {
476 		if (local)
477 			return local;
478 
479 		if (flags & RT6_LOOKUP_F_IFACE)
480 			return net->ipv6.ip6_null_entry;
481 	}
482 out:
483 	return rt;
484 }
485 
486 #ifdef CONFIG_IPV6_ROUTER_PREF
487 struct __rt6_probe_work {
488 	struct work_struct work;
489 	struct in6_addr target;
490 	struct net_device *dev;
491 };
492 
493 static void rt6_probe_deferred(struct work_struct *w)
494 {
495 	struct in6_addr mcaddr;
496 	struct __rt6_probe_work *work =
497 		container_of(w, struct __rt6_probe_work, work);
498 
499 	addrconf_addr_solict_mult(&work->target, &mcaddr);
500 	ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL);
501 	dev_put(work->dev);
502 	kfree(w);
503 }
504 
505 static void rt6_probe(struct rt6_info *rt)
506 {
507 	struct neighbour *neigh;
508 	/*
509 	 * Okay, this does not seem to be appropriate
510 	 * for now, however, we need to check if it
511 	 * is really so; aka Router Reachability Probing.
512 	 *
513 	 * Router Reachability Probe MUST be rate-limited
514 	 * to no more than one per minute.
515 	 */
516 	if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
517 		return;
518 	rcu_read_lock_bh();
519 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
520 	if (neigh) {
521 		write_lock(&neigh->lock);
522 		if (neigh->nud_state & NUD_VALID)
523 			goto out;
524 	}
525 
526 	if (!neigh ||
527 	    time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
528 		struct __rt6_probe_work *work;
529 
530 		work = kmalloc(sizeof(*work), GFP_ATOMIC);
531 
532 		if (neigh && work)
533 			__neigh_set_probe_once(neigh);
534 
535 		if (neigh)
536 			write_unlock(&neigh->lock);
537 
538 		if (work) {
539 			INIT_WORK(&work->work, rt6_probe_deferred);
540 			work->target = rt->rt6i_gateway;
541 			dev_hold(rt->dst.dev);
542 			work->dev = rt->dst.dev;
543 			schedule_work(&work->work);
544 		}
545 	} else {
546 out:
547 		write_unlock(&neigh->lock);
548 	}
549 	rcu_read_unlock_bh();
550 }
551 #else
552 static inline void rt6_probe(struct rt6_info *rt)
553 {
554 }
555 #endif
556 
557 /*
558  * Default Router Selection (RFC 2461 6.3.6)
559  */
560 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
561 {
562 	struct net_device *dev = rt->dst.dev;
563 	if (!oif || dev->ifindex == oif)
564 		return 2;
565 	if ((dev->flags & IFF_LOOPBACK) &&
566 	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
567 		return 1;
568 	return 0;
569 }
570 
571 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
572 {
573 	struct neighbour *neigh;
574 	enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
575 
576 	if (rt->rt6i_flags & RTF_NONEXTHOP ||
577 	    !(rt->rt6i_flags & RTF_GATEWAY))
578 		return RT6_NUD_SUCCEED;
579 
580 	rcu_read_lock_bh();
581 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
582 	if (neigh) {
583 		read_lock(&neigh->lock);
584 		if (neigh->nud_state & NUD_VALID)
585 			ret = RT6_NUD_SUCCEED;
586 #ifdef CONFIG_IPV6_ROUTER_PREF
587 		else if (!(neigh->nud_state & NUD_FAILED))
588 			ret = RT6_NUD_SUCCEED;
589 		else
590 			ret = RT6_NUD_FAIL_PROBE;
591 #endif
592 		read_unlock(&neigh->lock);
593 	} else {
594 		ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
595 		      RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
596 	}
597 	rcu_read_unlock_bh();
598 
599 	return ret;
600 }
601 
602 static int rt6_score_route(struct rt6_info *rt, int oif,
603 			   int strict)
604 {
605 	int m;
606 
607 	m = rt6_check_dev(rt, oif);
608 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
609 		return RT6_NUD_FAIL_HARD;
610 #ifdef CONFIG_IPV6_ROUTER_PREF
611 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
612 #endif
613 	if (strict & RT6_LOOKUP_F_REACHABLE) {
614 		int n = rt6_check_neigh(rt);
615 		if (n < 0)
616 			return n;
617 	}
618 	return m;
619 }
620 
621 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
622 				   int *mpri, struct rt6_info *match,
623 				   bool *do_rr)
624 {
625 	int m;
626 	bool match_do_rr = false;
627 
628 	if (rt6_check_expired(rt))
629 		goto out;
630 
631 	m = rt6_score_route(rt, oif, strict);
632 	if (m == RT6_NUD_FAIL_DO_RR) {
633 		match_do_rr = true;
634 		m = 0; /* lowest valid score */
635 	} else if (m == RT6_NUD_FAIL_HARD) {
636 		goto out;
637 	}
638 
639 	if (strict & RT6_LOOKUP_F_REACHABLE)
640 		rt6_probe(rt);
641 
642 	/* note that m can be RT6_NUD_FAIL_PROBE at this point */
643 	if (m > *mpri) {
644 		*do_rr = match_do_rr;
645 		*mpri = m;
646 		match = rt;
647 	}
648 out:
649 	return match;
650 }
651 
652 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
653 				     struct rt6_info *rr_head,
654 				     u32 metric, int oif, int strict,
655 				     bool *do_rr)
656 {
657 	struct rt6_info *rt, *match;
658 	int mpri = -1;
659 
660 	match = NULL;
661 	for (rt = rr_head; rt && rt->rt6i_metric == metric;
662 	     rt = rt->dst.rt6_next)
663 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
664 	for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
665 	     rt = rt->dst.rt6_next)
666 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
667 
668 	return match;
669 }
670 
671 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
672 {
673 	struct rt6_info *match, *rt0;
674 	struct net *net;
675 	bool do_rr = false;
676 
677 	rt0 = fn->rr_ptr;
678 	if (!rt0)
679 		fn->rr_ptr = rt0 = fn->leaf;
680 
681 	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
682 			     &do_rr);
683 
684 	if (do_rr) {
685 		struct rt6_info *next = rt0->dst.rt6_next;
686 
687 		/* no entries matched; do round-robin */
688 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
689 			next = fn->leaf;
690 
691 		if (next != rt0)
692 			fn->rr_ptr = next;
693 	}
694 
695 	net = dev_net(rt0->dst.dev);
696 	return match ? match : net->ipv6.ip6_null_entry;
697 }
698 
699 #ifdef CONFIG_IPV6_ROUTE_INFO
700 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
701 		  const struct in6_addr *gwaddr)
702 {
703 	struct net *net = dev_net(dev);
704 	struct route_info *rinfo = (struct route_info *) opt;
705 	struct in6_addr prefix_buf, *prefix;
706 	unsigned int pref;
707 	unsigned long lifetime;
708 	struct rt6_info *rt;
709 
710 	if (len < sizeof(struct route_info)) {
711 		return -EINVAL;
712 	}
713 
714 	/* Sanity check for prefix_len and length */
715 	if (rinfo->length > 3) {
716 		return -EINVAL;
717 	} else if (rinfo->prefix_len > 128) {
718 		return -EINVAL;
719 	} else if (rinfo->prefix_len > 64) {
720 		if (rinfo->length < 2) {
721 			return -EINVAL;
722 		}
723 	} else if (rinfo->prefix_len > 0) {
724 		if (rinfo->length < 1) {
725 			return -EINVAL;
726 		}
727 	}
728 
729 	pref = rinfo->route_pref;
730 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
731 		return -EINVAL;
732 
733 	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
734 
735 	if (rinfo->length == 3)
736 		prefix = (struct in6_addr *)rinfo->prefix;
737 	else {
738 		/* this function is safe */
739 		ipv6_addr_prefix(&prefix_buf,
740 				 (struct in6_addr *)rinfo->prefix,
741 				 rinfo->prefix_len);
742 		prefix = &prefix_buf;
743 	}
744 
745 	if (rinfo->prefix_len == 0)
746 		rt = rt6_get_dflt_router(gwaddr, dev);
747 	else
748 		rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
749 					gwaddr, dev->ifindex);
750 
751 	if (rt && !lifetime) {
752 		ip6_del_rt(rt);
753 		rt = NULL;
754 	}
755 
756 	if (!rt && lifetime)
757 		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
758 					pref);
759 	else if (rt)
760 		rt->rt6i_flags = RTF_ROUTEINFO |
761 				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
762 
763 	if (rt) {
764 		if (!addrconf_finite_timeout(lifetime))
765 			rt6_clean_expires(rt);
766 		else
767 			rt6_set_expires(rt, jiffies + HZ * lifetime);
768 
769 		ip6_rt_put(rt);
770 	}
771 	return 0;
772 }
773 #endif
774 
775 static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
776 					struct in6_addr *saddr)
777 {
778 	struct fib6_node *pn;
779 	while (1) {
780 		if (fn->fn_flags & RTN_TL_ROOT)
781 			return NULL;
782 		pn = fn->parent;
783 		if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
784 			fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
785 		else
786 			fn = pn;
787 		if (fn->fn_flags & RTN_RTINFO)
788 			return fn;
789 	}
790 }
791 
792 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
793 					     struct fib6_table *table,
794 					     struct flowi6 *fl6, int flags)
795 {
796 	struct fib6_node *fn;
797 	struct rt6_info *rt;
798 
799 	read_lock_bh(&table->tb6_lock);
800 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
801 restart:
802 	rt = fn->leaf;
803 	rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
804 	if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
805 		rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
806 	if (rt == net->ipv6.ip6_null_entry) {
807 		fn = fib6_backtrack(fn, &fl6->saddr);
808 		if (fn)
809 			goto restart;
810 	}
811 	dst_use(&rt->dst, jiffies);
812 	read_unlock_bh(&table->tb6_lock);
813 	return rt;
814 
815 }
816 
817 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
818 				    int flags)
819 {
820 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
821 }
822 EXPORT_SYMBOL_GPL(ip6_route_lookup);
823 
824 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
825 			    const struct in6_addr *saddr, int oif, int strict)
826 {
827 	struct flowi6 fl6 = {
828 		.flowi6_oif = oif,
829 		.daddr = *daddr,
830 	};
831 	struct dst_entry *dst;
832 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
833 
834 	if (saddr) {
835 		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
836 		flags |= RT6_LOOKUP_F_HAS_SADDR;
837 	}
838 
839 	dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
840 	if (dst->error == 0)
841 		return (struct rt6_info *) dst;
842 
843 	dst_release(dst);
844 
845 	return NULL;
846 }
847 EXPORT_SYMBOL(rt6_lookup);
848 
849 /* ip6_ins_rt is called with FREE table->tb6_lock.
850    It takes new route entry, the addition fails by any reason the
851    route is freed. In any case, if caller does not hold it, it may
852    be destroyed.
853  */
854 
855 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
856 			struct nlattr *mx, int mx_len)
857 {
858 	int err;
859 	struct fib6_table *table;
860 
861 	table = rt->rt6i_table;
862 	write_lock_bh(&table->tb6_lock);
863 	err = fib6_add(&table->tb6_root, rt, info, mx, mx_len);
864 	write_unlock_bh(&table->tb6_lock);
865 
866 	return err;
867 }
868 
869 int ip6_ins_rt(struct rt6_info *rt)
870 {
871 	struct nl_info info = {
872 		.nl_net = dev_net(rt->dst.dev),
873 	};
874 	return __ip6_ins_rt(rt, &info, NULL, 0);
875 }
876 
877 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
878 				      const struct in6_addr *daddr,
879 				      const struct in6_addr *saddr)
880 {
881 	struct rt6_info *rt;
882 
883 	/*
884 	 *	Clone the route.
885 	 */
886 
887 	rt = ip6_rt_copy(ort, daddr);
888 
889 	if (rt) {
890 		if (ort->rt6i_dst.plen != 128 &&
891 		    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
892 			rt->rt6i_flags |= RTF_ANYCAST;
893 
894 		rt->rt6i_flags |= RTF_CACHE;
895 
896 #ifdef CONFIG_IPV6_SUBTREES
897 		if (rt->rt6i_src.plen && saddr) {
898 			rt->rt6i_src.addr = *saddr;
899 			rt->rt6i_src.plen = 128;
900 		}
901 #endif
902 	}
903 
904 	return rt;
905 }
906 
907 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
908 					const struct in6_addr *daddr)
909 {
910 	struct rt6_info *rt = ip6_rt_copy(ort, daddr);
911 
912 	if (rt)
913 		rt->rt6i_flags |= RTF_CACHE;
914 	return rt;
915 }
916 
917 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
918 				      struct flowi6 *fl6, int flags)
919 {
920 	struct fib6_node *fn, *saved_fn;
921 	struct rt6_info *rt, *nrt;
922 	int strict = 0;
923 	int attempts = 3;
924 	int err;
925 
926 	strict |= flags & RT6_LOOKUP_F_IFACE;
927 	if (net->ipv6.devconf_all->forwarding == 0)
928 		strict |= RT6_LOOKUP_F_REACHABLE;
929 
930 redo_fib6_lookup_lock:
931 	read_lock_bh(&table->tb6_lock);
932 
933 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
934 	saved_fn = fn;
935 
936 redo_rt6_select:
937 	rt = rt6_select(fn, oif, strict);
938 	if (rt->rt6i_nsiblings)
939 		rt = rt6_multipath_select(rt, fl6, oif, strict);
940 	if (rt == net->ipv6.ip6_null_entry) {
941 		fn = fib6_backtrack(fn, &fl6->saddr);
942 		if (fn)
943 			goto redo_rt6_select;
944 		else if (strict & RT6_LOOKUP_F_REACHABLE) {
945 			/* also consider unreachable route */
946 			strict &= ~RT6_LOOKUP_F_REACHABLE;
947 			fn = saved_fn;
948 			goto redo_rt6_select;
949 		} else {
950 			dst_hold(&rt->dst);
951 			read_unlock_bh(&table->tb6_lock);
952 			goto out2;
953 		}
954 	}
955 
956 	dst_hold(&rt->dst);
957 	read_unlock_bh(&table->tb6_lock);
958 
959 	if (rt->rt6i_flags & RTF_CACHE)
960 		goto out2;
961 
962 	if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY)))
963 		nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
964 	else if (!(rt->dst.flags & DST_HOST))
965 		nrt = rt6_alloc_clone(rt, &fl6->daddr);
966 	else
967 		goto out2;
968 
969 	ip6_rt_put(rt);
970 	rt = nrt ? : net->ipv6.ip6_null_entry;
971 
972 	dst_hold(&rt->dst);
973 	if (nrt) {
974 		err = ip6_ins_rt(nrt);
975 		if (!err)
976 			goto out2;
977 	}
978 
979 	if (--attempts <= 0)
980 		goto out2;
981 
982 	/*
983 	 * Race condition! In the gap, when table->tb6_lock was
984 	 * released someone could insert this route.  Relookup.
985 	 */
986 	ip6_rt_put(rt);
987 	goto redo_fib6_lookup_lock;
988 
989 out2:
990 	rt->dst.lastuse = jiffies;
991 	rt->dst.__use++;
992 
993 	return rt;
994 }
995 
996 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
997 					    struct flowi6 *fl6, int flags)
998 {
999 	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
1000 }
1001 
1002 static struct dst_entry *ip6_route_input_lookup(struct net *net,
1003 						struct net_device *dev,
1004 						struct flowi6 *fl6, int flags)
1005 {
1006 	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1007 		flags |= RT6_LOOKUP_F_IFACE;
1008 
1009 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1010 }
1011 
1012 void ip6_route_input(struct sk_buff *skb)
1013 {
1014 	const struct ipv6hdr *iph = ipv6_hdr(skb);
1015 	struct net *net = dev_net(skb->dev);
1016 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1017 	struct flowi6 fl6 = {
1018 		.flowi6_iif = skb->dev->ifindex,
1019 		.daddr = iph->daddr,
1020 		.saddr = iph->saddr,
1021 		.flowlabel = ip6_flowinfo(iph),
1022 		.flowi6_mark = skb->mark,
1023 		.flowi6_proto = iph->nexthdr,
1024 	};
1025 
1026 	skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1027 }
1028 
1029 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1030 					     struct flowi6 *fl6, int flags)
1031 {
1032 	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1033 }
1034 
1035 struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
1036 				    struct flowi6 *fl6)
1037 {
1038 	int flags = 0;
1039 
1040 	fl6->flowi6_iif = LOOPBACK_IFINDEX;
1041 
1042 	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
1043 		flags |= RT6_LOOKUP_F_IFACE;
1044 
1045 	if (!ipv6_addr_any(&fl6->saddr))
1046 		flags |= RT6_LOOKUP_F_HAS_SADDR;
1047 	else if (sk)
1048 		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1049 
1050 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1051 }
1052 EXPORT_SYMBOL(ip6_route_output);
1053 
1054 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1055 {
1056 	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1057 	struct dst_entry *new = NULL;
1058 
1059 	rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1060 	if (rt) {
1061 		new = &rt->dst;
1062 
1063 		memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
1064 		rt6_init_peer(rt, net->ipv6.peers);
1065 
1066 		new->__use = 1;
1067 		new->input = dst_discard;
1068 		new->output = dst_discard_sk;
1069 
1070 		if (dst_metrics_read_only(&ort->dst))
1071 			new->_metrics = ort->dst._metrics;
1072 		else
1073 			dst_copy_metrics(new, &ort->dst);
1074 		rt->rt6i_idev = ort->rt6i_idev;
1075 		if (rt->rt6i_idev)
1076 			in6_dev_hold(rt->rt6i_idev);
1077 
1078 		rt->rt6i_gateway = ort->rt6i_gateway;
1079 		rt->rt6i_flags = ort->rt6i_flags;
1080 		rt->rt6i_metric = 0;
1081 
1082 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1083 #ifdef CONFIG_IPV6_SUBTREES
1084 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1085 #endif
1086 
1087 		dst_free(new);
1088 	}
1089 
1090 	dst_release(dst_orig);
1091 	return new ? new : ERR_PTR(-ENOMEM);
1092 }
1093 
1094 /*
1095  *	Destination cache support functions
1096  */
1097 
1098 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1099 {
1100 	struct rt6_info *rt;
1101 
1102 	rt = (struct rt6_info *) dst;
1103 
1104 	/* All IPV6 dsts are created with ->obsolete set to the value
1105 	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1106 	 * into this function always.
1107 	 */
1108 	if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1109 		return NULL;
1110 
1111 	if (rt6_check_expired(rt))
1112 		return NULL;
1113 
1114 	return dst;
1115 }
1116 
1117 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1118 {
1119 	struct rt6_info *rt = (struct rt6_info *) dst;
1120 
1121 	if (rt) {
1122 		if (rt->rt6i_flags & RTF_CACHE) {
1123 			if (rt6_check_expired(rt)) {
1124 				ip6_del_rt(rt);
1125 				dst = NULL;
1126 			}
1127 		} else {
1128 			dst_release(dst);
1129 			dst = NULL;
1130 		}
1131 	}
1132 	return dst;
1133 }
1134 
1135 static void ip6_link_failure(struct sk_buff *skb)
1136 {
1137 	struct rt6_info *rt;
1138 
1139 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1140 
1141 	rt = (struct rt6_info *) skb_dst(skb);
1142 	if (rt) {
1143 		if (rt->rt6i_flags & RTF_CACHE) {
1144 			dst_hold(&rt->dst);
1145 			if (ip6_del_rt(rt))
1146 				dst_free(&rt->dst);
1147 		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1148 			rt->rt6i_node->fn_sernum = -1;
1149 		}
1150 	}
1151 }
1152 
1153 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1154 			       struct sk_buff *skb, u32 mtu)
1155 {
1156 	struct rt6_info *rt6 = (struct rt6_info *)dst;
1157 
1158 	dst_confirm(dst);
1159 	if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1160 		struct net *net = dev_net(dst->dev);
1161 
1162 		rt6->rt6i_flags |= RTF_MODIFIED;
1163 		if (mtu < IPV6_MIN_MTU)
1164 			mtu = IPV6_MIN_MTU;
1165 
1166 		dst_metric_set(dst, RTAX_MTU, mtu);
1167 		rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1168 	}
1169 }
1170 
1171 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1172 		     int oif, u32 mark)
1173 {
1174 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1175 	struct dst_entry *dst;
1176 	struct flowi6 fl6;
1177 
1178 	memset(&fl6, 0, sizeof(fl6));
1179 	fl6.flowi6_oif = oif;
1180 	fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
1181 	fl6.daddr = iph->daddr;
1182 	fl6.saddr = iph->saddr;
1183 	fl6.flowlabel = ip6_flowinfo(iph);
1184 
1185 	dst = ip6_route_output(net, NULL, &fl6);
1186 	if (!dst->error)
1187 		ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1188 	dst_release(dst);
1189 }
1190 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1191 
1192 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1193 {
1194 	ip6_update_pmtu(skb, sock_net(sk), mtu,
1195 			sk->sk_bound_dev_if, sk->sk_mark);
1196 }
1197 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1198 
1199 /* Handle redirects */
1200 struct ip6rd_flowi {
1201 	struct flowi6 fl6;
1202 	struct in6_addr gateway;
1203 };
1204 
1205 static struct rt6_info *__ip6_route_redirect(struct net *net,
1206 					     struct fib6_table *table,
1207 					     struct flowi6 *fl6,
1208 					     int flags)
1209 {
1210 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1211 	struct rt6_info *rt;
1212 	struct fib6_node *fn;
1213 
1214 	/* Get the "current" route for this destination and
1215 	 * check if the redirect has come from approriate router.
1216 	 *
1217 	 * RFC 4861 specifies that redirects should only be
1218 	 * accepted if they come from the nexthop to the target.
1219 	 * Due to the way the routes are chosen, this notion
1220 	 * is a bit fuzzy and one might need to check all possible
1221 	 * routes.
1222 	 */
1223 
1224 	read_lock_bh(&table->tb6_lock);
1225 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1226 restart:
1227 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1228 		if (rt6_check_expired(rt))
1229 			continue;
1230 		if (rt->dst.error)
1231 			break;
1232 		if (!(rt->rt6i_flags & RTF_GATEWAY))
1233 			continue;
1234 		if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1235 			continue;
1236 		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1237 			continue;
1238 		break;
1239 	}
1240 
1241 	if (!rt)
1242 		rt = net->ipv6.ip6_null_entry;
1243 	else if (rt->dst.error) {
1244 		rt = net->ipv6.ip6_null_entry;
1245 		goto out;
1246 	}
1247 
1248 	if (rt == net->ipv6.ip6_null_entry) {
1249 		fn = fib6_backtrack(fn, &fl6->saddr);
1250 		if (fn)
1251 			goto restart;
1252 	}
1253 
1254 out:
1255 	dst_hold(&rt->dst);
1256 
1257 	read_unlock_bh(&table->tb6_lock);
1258 
1259 	return rt;
1260 };
1261 
1262 static struct dst_entry *ip6_route_redirect(struct net *net,
1263 					const struct flowi6 *fl6,
1264 					const struct in6_addr *gateway)
1265 {
1266 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1267 	struct ip6rd_flowi rdfl;
1268 
1269 	rdfl.fl6 = *fl6;
1270 	rdfl.gateway = *gateway;
1271 
1272 	return fib6_rule_lookup(net, &rdfl.fl6,
1273 				flags, __ip6_route_redirect);
1274 }
1275 
1276 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1277 {
1278 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1279 	struct dst_entry *dst;
1280 	struct flowi6 fl6;
1281 
1282 	memset(&fl6, 0, sizeof(fl6));
1283 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
1284 	fl6.flowi6_oif = oif;
1285 	fl6.flowi6_mark = mark;
1286 	fl6.daddr = iph->daddr;
1287 	fl6.saddr = iph->saddr;
1288 	fl6.flowlabel = ip6_flowinfo(iph);
1289 
1290 	dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1291 	rt6_do_redirect(dst, NULL, skb);
1292 	dst_release(dst);
1293 }
1294 EXPORT_SYMBOL_GPL(ip6_redirect);
1295 
1296 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1297 			    u32 mark)
1298 {
1299 	const struct ipv6hdr *iph = ipv6_hdr(skb);
1300 	const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1301 	struct dst_entry *dst;
1302 	struct flowi6 fl6;
1303 
1304 	memset(&fl6, 0, sizeof(fl6));
1305 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
1306 	fl6.flowi6_oif = oif;
1307 	fl6.flowi6_mark = mark;
1308 	fl6.daddr = msg->dest;
1309 	fl6.saddr = iph->daddr;
1310 
1311 	dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1312 	rt6_do_redirect(dst, NULL, skb);
1313 	dst_release(dst);
1314 }
1315 
1316 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1317 {
1318 	ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1319 }
1320 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1321 
1322 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1323 {
1324 	struct net_device *dev = dst->dev;
1325 	unsigned int mtu = dst_mtu(dst);
1326 	struct net *net = dev_net(dev);
1327 
1328 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1329 
1330 	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1331 		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1332 
1333 	/*
1334 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1335 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1336 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
1337 	 * rely only on pmtu discovery"
1338 	 */
1339 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1340 		mtu = IPV6_MAXPLEN;
1341 	return mtu;
1342 }
1343 
1344 static unsigned int ip6_mtu(const struct dst_entry *dst)
1345 {
1346 	struct inet6_dev *idev;
1347 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1348 
1349 	if (mtu)
1350 		goto out;
1351 
1352 	mtu = IPV6_MIN_MTU;
1353 
1354 	rcu_read_lock();
1355 	idev = __in6_dev_get(dst->dev);
1356 	if (idev)
1357 		mtu = idev->cnf.mtu6;
1358 	rcu_read_unlock();
1359 
1360 out:
1361 	return min_t(unsigned int, mtu, IP6_MAX_MTU);
1362 }
1363 
1364 static struct dst_entry *icmp6_dst_gc_list;
1365 static DEFINE_SPINLOCK(icmp6_dst_lock);
1366 
1367 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1368 				  struct flowi6 *fl6)
1369 {
1370 	struct dst_entry *dst;
1371 	struct rt6_info *rt;
1372 	struct inet6_dev *idev = in6_dev_get(dev);
1373 	struct net *net = dev_net(dev);
1374 
1375 	if (unlikely(!idev))
1376 		return ERR_PTR(-ENODEV);
1377 
1378 	rt = ip6_dst_alloc(net, dev, 0, NULL);
1379 	if (unlikely(!rt)) {
1380 		in6_dev_put(idev);
1381 		dst = ERR_PTR(-ENOMEM);
1382 		goto out;
1383 	}
1384 
1385 	rt->dst.flags |= DST_HOST;
1386 	rt->dst.output  = ip6_output;
1387 	atomic_set(&rt->dst.__refcnt, 1);
1388 	rt->rt6i_gateway  = fl6->daddr;
1389 	rt->rt6i_dst.addr = fl6->daddr;
1390 	rt->rt6i_dst.plen = 128;
1391 	rt->rt6i_idev     = idev;
1392 	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1393 
1394 	spin_lock_bh(&icmp6_dst_lock);
1395 	rt->dst.next = icmp6_dst_gc_list;
1396 	icmp6_dst_gc_list = &rt->dst;
1397 	spin_unlock_bh(&icmp6_dst_lock);
1398 
1399 	fib6_force_start_gc(net);
1400 
1401 	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1402 
1403 out:
1404 	return dst;
1405 }
1406 
1407 int icmp6_dst_gc(void)
1408 {
1409 	struct dst_entry *dst, **pprev;
1410 	int more = 0;
1411 
1412 	spin_lock_bh(&icmp6_dst_lock);
1413 	pprev = &icmp6_dst_gc_list;
1414 
1415 	while ((dst = *pprev) != NULL) {
1416 		if (!atomic_read(&dst->__refcnt)) {
1417 			*pprev = dst->next;
1418 			dst_free(dst);
1419 		} else {
1420 			pprev = &dst->next;
1421 			++more;
1422 		}
1423 	}
1424 
1425 	spin_unlock_bh(&icmp6_dst_lock);
1426 
1427 	return more;
1428 }
1429 
1430 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1431 			    void *arg)
1432 {
1433 	struct dst_entry *dst, **pprev;
1434 
1435 	spin_lock_bh(&icmp6_dst_lock);
1436 	pprev = &icmp6_dst_gc_list;
1437 	while ((dst = *pprev) != NULL) {
1438 		struct rt6_info *rt = (struct rt6_info *) dst;
1439 		if (func(rt, arg)) {
1440 			*pprev = dst->next;
1441 			dst_free(dst);
1442 		} else {
1443 			pprev = &dst->next;
1444 		}
1445 	}
1446 	spin_unlock_bh(&icmp6_dst_lock);
1447 }
1448 
1449 static int ip6_dst_gc(struct dst_ops *ops)
1450 {
1451 	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1452 	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1453 	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1454 	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1455 	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1456 	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1457 	int entries;
1458 
1459 	entries = dst_entries_get_fast(ops);
1460 	if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1461 	    entries <= rt_max_size)
1462 		goto out;
1463 
1464 	net->ipv6.ip6_rt_gc_expire++;
1465 	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
1466 	entries = dst_entries_get_slow(ops);
1467 	if (entries < ops->gc_thresh)
1468 		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1469 out:
1470 	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1471 	return entries > rt_max_size;
1472 }
1473 
1474 /*
1475  *
1476  */
1477 
1478 int ip6_route_add(struct fib6_config *cfg)
1479 {
1480 	int err;
1481 	struct net *net = cfg->fc_nlinfo.nl_net;
1482 	struct rt6_info *rt = NULL;
1483 	struct net_device *dev = NULL;
1484 	struct inet6_dev *idev = NULL;
1485 	struct fib6_table *table;
1486 	int addr_type;
1487 
1488 	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1489 		return -EINVAL;
1490 #ifndef CONFIG_IPV6_SUBTREES
1491 	if (cfg->fc_src_len)
1492 		return -EINVAL;
1493 #endif
1494 	if (cfg->fc_ifindex) {
1495 		err = -ENODEV;
1496 		dev = dev_get_by_index(net, cfg->fc_ifindex);
1497 		if (!dev)
1498 			goto out;
1499 		idev = in6_dev_get(dev);
1500 		if (!idev)
1501 			goto out;
1502 	}
1503 
1504 	if (cfg->fc_metric == 0)
1505 		cfg->fc_metric = IP6_RT_PRIO_USER;
1506 
1507 	err = -ENOBUFS;
1508 	if (cfg->fc_nlinfo.nlh &&
1509 	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1510 		table = fib6_get_table(net, cfg->fc_table);
1511 		if (!table) {
1512 			pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1513 			table = fib6_new_table(net, cfg->fc_table);
1514 		}
1515 	} else {
1516 		table = fib6_new_table(net, cfg->fc_table);
1517 	}
1518 
1519 	if (!table)
1520 		goto out;
1521 
1522 	rt = ip6_dst_alloc(net, NULL, (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT, table);
1523 
1524 	if (!rt) {
1525 		err = -ENOMEM;
1526 		goto out;
1527 	}
1528 
1529 	if (cfg->fc_flags & RTF_EXPIRES)
1530 		rt6_set_expires(rt, jiffies +
1531 				clock_t_to_jiffies(cfg->fc_expires));
1532 	else
1533 		rt6_clean_expires(rt);
1534 
1535 	if (cfg->fc_protocol == RTPROT_UNSPEC)
1536 		cfg->fc_protocol = RTPROT_BOOT;
1537 	rt->rt6i_protocol = cfg->fc_protocol;
1538 
1539 	addr_type = ipv6_addr_type(&cfg->fc_dst);
1540 
1541 	if (addr_type & IPV6_ADDR_MULTICAST)
1542 		rt->dst.input = ip6_mc_input;
1543 	else if (cfg->fc_flags & RTF_LOCAL)
1544 		rt->dst.input = ip6_input;
1545 	else
1546 		rt->dst.input = ip6_forward;
1547 
1548 	rt->dst.output = ip6_output;
1549 
1550 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1551 	rt->rt6i_dst.plen = cfg->fc_dst_len;
1552 	if (rt->rt6i_dst.plen == 128) {
1553 		rt->dst.flags |= DST_HOST;
1554 		dst_metrics_set_force_overwrite(&rt->dst);
1555 	}
1556 
1557 #ifdef CONFIG_IPV6_SUBTREES
1558 	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1559 	rt->rt6i_src.plen = cfg->fc_src_len;
1560 #endif
1561 
1562 	rt->rt6i_metric = cfg->fc_metric;
1563 
1564 	/* We cannot add true routes via loopback here,
1565 	   they would result in kernel looping; promote them to reject routes
1566 	 */
1567 	if ((cfg->fc_flags & RTF_REJECT) ||
1568 	    (dev && (dev->flags & IFF_LOOPBACK) &&
1569 	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
1570 	     !(cfg->fc_flags & RTF_LOCAL))) {
1571 		/* hold loopback dev/idev if we haven't done so. */
1572 		if (dev != net->loopback_dev) {
1573 			if (dev) {
1574 				dev_put(dev);
1575 				in6_dev_put(idev);
1576 			}
1577 			dev = net->loopback_dev;
1578 			dev_hold(dev);
1579 			idev = in6_dev_get(dev);
1580 			if (!idev) {
1581 				err = -ENODEV;
1582 				goto out;
1583 			}
1584 		}
1585 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1586 		switch (cfg->fc_type) {
1587 		case RTN_BLACKHOLE:
1588 			rt->dst.error = -EINVAL;
1589 			rt->dst.output = dst_discard_sk;
1590 			rt->dst.input = dst_discard;
1591 			break;
1592 		case RTN_PROHIBIT:
1593 			rt->dst.error = -EACCES;
1594 			rt->dst.output = ip6_pkt_prohibit_out;
1595 			rt->dst.input = ip6_pkt_prohibit;
1596 			break;
1597 		case RTN_THROW:
1598 		default:
1599 			rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1600 					: -ENETUNREACH;
1601 			rt->dst.output = ip6_pkt_discard_out;
1602 			rt->dst.input = ip6_pkt_discard;
1603 			break;
1604 		}
1605 		goto install_route;
1606 	}
1607 
1608 	if (cfg->fc_flags & RTF_GATEWAY) {
1609 		const struct in6_addr *gw_addr;
1610 		int gwa_type;
1611 
1612 		gw_addr = &cfg->fc_gateway;
1613 		rt->rt6i_gateway = *gw_addr;
1614 		gwa_type = ipv6_addr_type(gw_addr);
1615 
1616 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1617 			struct rt6_info *grt;
1618 
1619 			/* IPv6 strictly inhibits using not link-local
1620 			   addresses as nexthop address.
1621 			   Otherwise, router will not able to send redirects.
1622 			   It is very good, but in some (rare!) circumstances
1623 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
1624 			   some exceptions. --ANK
1625 			 */
1626 			err = -EINVAL;
1627 			if (!(gwa_type & IPV6_ADDR_UNICAST))
1628 				goto out;
1629 
1630 			grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1631 
1632 			err = -EHOSTUNREACH;
1633 			if (!grt)
1634 				goto out;
1635 			if (dev) {
1636 				if (dev != grt->dst.dev) {
1637 					ip6_rt_put(grt);
1638 					goto out;
1639 				}
1640 			} else {
1641 				dev = grt->dst.dev;
1642 				idev = grt->rt6i_idev;
1643 				dev_hold(dev);
1644 				in6_dev_hold(grt->rt6i_idev);
1645 			}
1646 			if (!(grt->rt6i_flags & RTF_GATEWAY))
1647 				err = 0;
1648 			ip6_rt_put(grt);
1649 
1650 			if (err)
1651 				goto out;
1652 		}
1653 		err = -EINVAL;
1654 		if (!dev || (dev->flags & IFF_LOOPBACK))
1655 			goto out;
1656 	}
1657 
1658 	err = -ENODEV;
1659 	if (!dev)
1660 		goto out;
1661 
1662 	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1663 		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1664 			err = -EINVAL;
1665 			goto out;
1666 		}
1667 		rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1668 		rt->rt6i_prefsrc.plen = 128;
1669 	} else
1670 		rt->rt6i_prefsrc.plen = 0;
1671 
1672 	rt->rt6i_flags = cfg->fc_flags;
1673 
1674 install_route:
1675 	rt->dst.dev = dev;
1676 	rt->rt6i_idev = idev;
1677 	rt->rt6i_table = table;
1678 
1679 	cfg->fc_nlinfo.nl_net = dev_net(dev);
1680 
1681 	return __ip6_ins_rt(rt, &cfg->fc_nlinfo, cfg->fc_mx, cfg->fc_mx_len);
1682 
1683 out:
1684 	if (dev)
1685 		dev_put(dev);
1686 	if (idev)
1687 		in6_dev_put(idev);
1688 	if (rt)
1689 		dst_free(&rt->dst);
1690 	return err;
1691 }
1692 
1693 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1694 {
1695 	int err;
1696 	struct fib6_table *table;
1697 	struct net *net = dev_net(rt->dst.dev);
1698 
1699 	if (rt == net->ipv6.ip6_null_entry) {
1700 		err = -ENOENT;
1701 		goto out;
1702 	}
1703 
1704 	table = rt->rt6i_table;
1705 	write_lock_bh(&table->tb6_lock);
1706 	err = fib6_del(rt, info);
1707 	write_unlock_bh(&table->tb6_lock);
1708 
1709 out:
1710 	ip6_rt_put(rt);
1711 	return err;
1712 }
1713 
1714 int ip6_del_rt(struct rt6_info *rt)
1715 {
1716 	struct nl_info info = {
1717 		.nl_net = dev_net(rt->dst.dev),
1718 	};
1719 	return __ip6_del_rt(rt, &info);
1720 }
1721 
1722 static int ip6_route_del(struct fib6_config *cfg)
1723 {
1724 	struct fib6_table *table;
1725 	struct fib6_node *fn;
1726 	struct rt6_info *rt;
1727 	int err = -ESRCH;
1728 
1729 	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1730 	if (!table)
1731 		return err;
1732 
1733 	read_lock_bh(&table->tb6_lock);
1734 
1735 	fn = fib6_locate(&table->tb6_root,
1736 			 &cfg->fc_dst, cfg->fc_dst_len,
1737 			 &cfg->fc_src, cfg->fc_src_len);
1738 
1739 	if (fn) {
1740 		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1741 			if (cfg->fc_ifindex &&
1742 			    (!rt->dst.dev ||
1743 			     rt->dst.dev->ifindex != cfg->fc_ifindex))
1744 				continue;
1745 			if (cfg->fc_flags & RTF_GATEWAY &&
1746 			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1747 				continue;
1748 			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1749 				continue;
1750 			dst_hold(&rt->dst);
1751 			read_unlock_bh(&table->tb6_lock);
1752 
1753 			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1754 		}
1755 	}
1756 	read_unlock_bh(&table->tb6_lock);
1757 
1758 	return err;
1759 }
1760 
1761 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1762 {
1763 	struct net *net = dev_net(skb->dev);
1764 	struct netevent_redirect netevent;
1765 	struct rt6_info *rt, *nrt = NULL;
1766 	struct ndisc_options ndopts;
1767 	struct inet6_dev *in6_dev;
1768 	struct neighbour *neigh;
1769 	struct rd_msg *msg;
1770 	int optlen, on_link;
1771 	u8 *lladdr;
1772 
1773 	optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
1774 	optlen -= sizeof(*msg);
1775 
1776 	if (optlen < 0) {
1777 		net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1778 		return;
1779 	}
1780 
1781 	msg = (struct rd_msg *)icmp6_hdr(skb);
1782 
1783 	if (ipv6_addr_is_multicast(&msg->dest)) {
1784 		net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1785 		return;
1786 	}
1787 
1788 	on_link = 0;
1789 	if (ipv6_addr_equal(&msg->dest, &msg->target)) {
1790 		on_link = 1;
1791 	} else if (ipv6_addr_type(&msg->target) !=
1792 		   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1793 		net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1794 		return;
1795 	}
1796 
1797 	in6_dev = __in6_dev_get(skb->dev);
1798 	if (!in6_dev)
1799 		return;
1800 	if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1801 		return;
1802 
1803 	/* RFC2461 8.1:
1804 	 *	The IP source address of the Redirect MUST be the same as the current
1805 	 *	first-hop router for the specified ICMP Destination Address.
1806 	 */
1807 
1808 	if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
1809 		net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1810 		return;
1811 	}
1812 
1813 	lladdr = NULL;
1814 	if (ndopts.nd_opts_tgt_lladdr) {
1815 		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1816 					     skb->dev);
1817 		if (!lladdr) {
1818 			net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1819 			return;
1820 		}
1821 	}
1822 
1823 	rt = (struct rt6_info *) dst;
1824 	if (rt == net->ipv6.ip6_null_entry) {
1825 		net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1826 		return;
1827 	}
1828 
1829 	/* Redirect received -> path was valid.
1830 	 * Look, redirects are sent only in response to data packets,
1831 	 * so that this nexthop apparently is reachable. --ANK
1832 	 */
1833 	dst_confirm(&rt->dst);
1834 
1835 	neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
1836 	if (!neigh)
1837 		return;
1838 
1839 	/*
1840 	 *	We have finally decided to accept it.
1841 	 */
1842 
1843 	neigh_update(neigh, lladdr, NUD_STALE,
1844 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
1845 		     NEIGH_UPDATE_F_OVERRIDE|
1846 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1847 				     NEIGH_UPDATE_F_ISROUTER))
1848 		     );
1849 
1850 	nrt = ip6_rt_copy(rt, &msg->dest);
1851 	if (!nrt)
1852 		goto out;
1853 
1854 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1855 	if (on_link)
1856 		nrt->rt6i_flags &= ~RTF_GATEWAY;
1857 
1858 	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1859 
1860 	if (ip6_ins_rt(nrt))
1861 		goto out;
1862 
1863 	netevent.old = &rt->dst;
1864 	netevent.new = &nrt->dst;
1865 	netevent.daddr = &msg->dest;
1866 	netevent.neigh = neigh;
1867 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1868 
1869 	if (rt->rt6i_flags & RTF_CACHE) {
1870 		rt = (struct rt6_info *) dst_clone(&rt->dst);
1871 		ip6_del_rt(rt);
1872 	}
1873 
1874 out:
1875 	neigh_release(neigh);
1876 }
1877 
1878 /*
1879  *	Misc support functions
1880  */
1881 
1882 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1883 				    const struct in6_addr *dest)
1884 {
1885 	struct net *net = dev_net(ort->dst.dev);
1886 	struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1887 					    ort->rt6i_table);
1888 
1889 	if (rt) {
1890 		rt->dst.input = ort->dst.input;
1891 		rt->dst.output = ort->dst.output;
1892 		rt->dst.flags |= DST_HOST;
1893 
1894 		rt->rt6i_dst.addr = *dest;
1895 		rt->rt6i_dst.plen = 128;
1896 		dst_copy_metrics(&rt->dst, &ort->dst);
1897 		rt->dst.error = ort->dst.error;
1898 		rt->rt6i_idev = ort->rt6i_idev;
1899 		if (rt->rt6i_idev)
1900 			in6_dev_hold(rt->rt6i_idev);
1901 		rt->dst.lastuse = jiffies;
1902 
1903 		if (ort->rt6i_flags & RTF_GATEWAY)
1904 			rt->rt6i_gateway = ort->rt6i_gateway;
1905 		else
1906 			rt->rt6i_gateway = *dest;
1907 		rt->rt6i_flags = ort->rt6i_flags;
1908 		rt6_set_from(rt, ort);
1909 		rt->rt6i_metric = 0;
1910 
1911 #ifdef CONFIG_IPV6_SUBTREES
1912 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1913 #endif
1914 		memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1915 		rt->rt6i_table = ort->rt6i_table;
1916 	}
1917 	return rt;
1918 }
1919 
1920 #ifdef CONFIG_IPV6_ROUTE_INFO
1921 static struct rt6_info *rt6_get_route_info(struct net *net,
1922 					   const struct in6_addr *prefix, int prefixlen,
1923 					   const struct in6_addr *gwaddr, int ifindex)
1924 {
1925 	struct fib6_node *fn;
1926 	struct rt6_info *rt = NULL;
1927 	struct fib6_table *table;
1928 
1929 	table = fib6_get_table(net, RT6_TABLE_INFO);
1930 	if (!table)
1931 		return NULL;
1932 
1933 	read_lock_bh(&table->tb6_lock);
1934 	fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
1935 	if (!fn)
1936 		goto out;
1937 
1938 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1939 		if (rt->dst.dev->ifindex != ifindex)
1940 			continue;
1941 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1942 			continue;
1943 		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1944 			continue;
1945 		dst_hold(&rt->dst);
1946 		break;
1947 	}
1948 out:
1949 	read_unlock_bh(&table->tb6_lock);
1950 	return rt;
1951 }
1952 
1953 static struct rt6_info *rt6_add_route_info(struct net *net,
1954 					   const struct in6_addr *prefix, int prefixlen,
1955 					   const struct in6_addr *gwaddr, int ifindex,
1956 					   unsigned int pref)
1957 {
1958 	struct fib6_config cfg = {
1959 		.fc_table	= RT6_TABLE_INFO,
1960 		.fc_metric	= IP6_RT_PRIO_USER,
1961 		.fc_ifindex	= ifindex,
1962 		.fc_dst_len	= prefixlen,
1963 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1964 				  RTF_UP | RTF_PREF(pref),
1965 		.fc_nlinfo.portid = 0,
1966 		.fc_nlinfo.nlh = NULL,
1967 		.fc_nlinfo.nl_net = net,
1968 	};
1969 
1970 	cfg.fc_dst = *prefix;
1971 	cfg.fc_gateway = *gwaddr;
1972 
1973 	/* We should treat it as a default route if prefix length is 0. */
1974 	if (!prefixlen)
1975 		cfg.fc_flags |= RTF_DEFAULT;
1976 
1977 	ip6_route_add(&cfg);
1978 
1979 	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1980 }
1981 #endif
1982 
1983 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1984 {
1985 	struct rt6_info *rt;
1986 	struct fib6_table *table;
1987 
1988 	table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1989 	if (!table)
1990 		return NULL;
1991 
1992 	read_lock_bh(&table->tb6_lock);
1993 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1994 		if (dev == rt->dst.dev &&
1995 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1996 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
1997 			break;
1998 	}
1999 	if (rt)
2000 		dst_hold(&rt->dst);
2001 	read_unlock_bh(&table->tb6_lock);
2002 	return rt;
2003 }
2004 
2005 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
2006 				     struct net_device *dev,
2007 				     unsigned int pref)
2008 {
2009 	struct fib6_config cfg = {
2010 		.fc_table	= RT6_TABLE_DFLT,
2011 		.fc_metric	= IP6_RT_PRIO_USER,
2012 		.fc_ifindex	= dev->ifindex,
2013 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2014 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
2015 		.fc_nlinfo.portid = 0,
2016 		.fc_nlinfo.nlh = NULL,
2017 		.fc_nlinfo.nl_net = dev_net(dev),
2018 	};
2019 
2020 	cfg.fc_gateway = *gwaddr;
2021 
2022 	ip6_route_add(&cfg);
2023 
2024 	return rt6_get_dflt_router(gwaddr, dev);
2025 }
2026 
2027 void rt6_purge_dflt_routers(struct net *net)
2028 {
2029 	struct rt6_info *rt;
2030 	struct fib6_table *table;
2031 
2032 	/* NOTE: Keep consistent with rt6_get_dflt_router */
2033 	table = fib6_get_table(net, RT6_TABLE_DFLT);
2034 	if (!table)
2035 		return;
2036 
2037 restart:
2038 	read_lock_bh(&table->tb6_lock);
2039 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2040 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2041 		    (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
2042 			dst_hold(&rt->dst);
2043 			read_unlock_bh(&table->tb6_lock);
2044 			ip6_del_rt(rt);
2045 			goto restart;
2046 		}
2047 	}
2048 	read_unlock_bh(&table->tb6_lock);
2049 }
2050 
2051 static void rtmsg_to_fib6_config(struct net *net,
2052 				 struct in6_rtmsg *rtmsg,
2053 				 struct fib6_config *cfg)
2054 {
2055 	memset(cfg, 0, sizeof(*cfg));
2056 
2057 	cfg->fc_table = RT6_TABLE_MAIN;
2058 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2059 	cfg->fc_metric = rtmsg->rtmsg_metric;
2060 	cfg->fc_expires = rtmsg->rtmsg_info;
2061 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2062 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
2063 	cfg->fc_flags = rtmsg->rtmsg_flags;
2064 
2065 	cfg->fc_nlinfo.nl_net = net;
2066 
2067 	cfg->fc_dst = rtmsg->rtmsg_dst;
2068 	cfg->fc_src = rtmsg->rtmsg_src;
2069 	cfg->fc_gateway = rtmsg->rtmsg_gateway;
2070 }
2071 
2072 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2073 {
2074 	struct fib6_config cfg;
2075 	struct in6_rtmsg rtmsg;
2076 	int err;
2077 
2078 	switch (cmd) {
2079 	case SIOCADDRT:		/* Add a route */
2080 	case SIOCDELRT:		/* Delete a route */
2081 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2082 			return -EPERM;
2083 		err = copy_from_user(&rtmsg, arg,
2084 				     sizeof(struct in6_rtmsg));
2085 		if (err)
2086 			return -EFAULT;
2087 
2088 		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2089 
2090 		rtnl_lock();
2091 		switch (cmd) {
2092 		case SIOCADDRT:
2093 			err = ip6_route_add(&cfg);
2094 			break;
2095 		case SIOCDELRT:
2096 			err = ip6_route_del(&cfg);
2097 			break;
2098 		default:
2099 			err = -EINVAL;
2100 		}
2101 		rtnl_unlock();
2102 
2103 		return err;
2104 	}
2105 
2106 	return -EINVAL;
2107 }
2108 
2109 /*
2110  *	Drop the packet on the floor
2111  */
2112 
2113 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2114 {
2115 	int type;
2116 	struct dst_entry *dst = skb_dst(skb);
2117 	switch (ipstats_mib_noroutes) {
2118 	case IPSTATS_MIB_INNOROUTES:
2119 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2120 		if (type == IPV6_ADDR_ANY) {
2121 			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2122 				      IPSTATS_MIB_INADDRERRORS);
2123 			break;
2124 		}
2125 		/* FALLTHROUGH */
2126 	case IPSTATS_MIB_OUTNOROUTES:
2127 		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2128 			      ipstats_mib_noroutes);
2129 		break;
2130 	}
2131 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2132 	kfree_skb(skb);
2133 	return 0;
2134 }
2135 
2136 static int ip6_pkt_discard(struct sk_buff *skb)
2137 {
2138 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2139 }
2140 
2141 static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb)
2142 {
2143 	skb->dev = skb_dst(skb)->dev;
2144 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2145 }
2146 
2147 static int ip6_pkt_prohibit(struct sk_buff *skb)
2148 {
2149 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2150 }
2151 
2152 static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb)
2153 {
2154 	skb->dev = skb_dst(skb)->dev;
2155 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2156 }
2157 
2158 /*
2159  *	Allocate a dst for local (unicast / anycast) address.
2160  */
2161 
2162 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2163 				    const struct in6_addr *addr,
2164 				    bool anycast)
2165 {
2166 	struct net *net = dev_net(idev->dev);
2167 	struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
2168 					    DST_NOCOUNT, NULL);
2169 	if (!rt)
2170 		return ERR_PTR(-ENOMEM);
2171 
2172 	in6_dev_hold(idev);
2173 
2174 	rt->dst.flags |= DST_HOST;
2175 	rt->dst.input = ip6_input;
2176 	rt->dst.output = ip6_output;
2177 	rt->rt6i_idev = idev;
2178 
2179 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2180 	if (anycast)
2181 		rt->rt6i_flags |= RTF_ANYCAST;
2182 	else
2183 		rt->rt6i_flags |= RTF_LOCAL;
2184 
2185 	rt->rt6i_gateway  = *addr;
2186 	rt->rt6i_dst.addr = *addr;
2187 	rt->rt6i_dst.plen = 128;
2188 	rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2189 
2190 	atomic_set(&rt->dst.__refcnt, 1);
2191 
2192 	return rt;
2193 }
2194 
2195 int ip6_route_get_saddr(struct net *net,
2196 			struct rt6_info *rt,
2197 			const struct in6_addr *daddr,
2198 			unsigned int prefs,
2199 			struct in6_addr *saddr)
2200 {
2201 	struct inet6_dev *idev = ip6_dst_idev((struct dst_entry *)rt);
2202 	int err = 0;
2203 	if (rt->rt6i_prefsrc.plen)
2204 		*saddr = rt->rt6i_prefsrc.addr;
2205 	else
2206 		err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2207 					 daddr, prefs, saddr);
2208 	return err;
2209 }
2210 
2211 /* remove deleted ip from prefsrc entries */
2212 struct arg_dev_net_ip {
2213 	struct net_device *dev;
2214 	struct net *net;
2215 	struct in6_addr *addr;
2216 };
2217 
2218 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2219 {
2220 	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2221 	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2222 	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2223 
2224 	if (((void *)rt->dst.dev == dev || !dev) &&
2225 	    rt != net->ipv6.ip6_null_entry &&
2226 	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2227 		/* remove prefsrc entry */
2228 		rt->rt6i_prefsrc.plen = 0;
2229 	}
2230 	return 0;
2231 }
2232 
2233 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2234 {
2235 	struct net *net = dev_net(ifp->idev->dev);
2236 	struct arg_dev_net_ip adni = {
2237 		.dev = ifp->idev->dev,
2238 		.net = net,
2239 		.addr = &ifp->addr,
2240 	};
2241 	fib6_clean_all(net, fib6_remove_prefsrc, &adni);
2242 }
2243 
2244 #define RTF_RA_ROUTER		(RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2245 #define RTF_CACHE_GATEWAY	(RTF_GATEWAY | RTF_CACHE)
2246 
2247 /* Remove routers and update dst entries when gateway turn into host. */
2248 static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2249 {
2250 	struct in6_addr *gateway = (struct in6_addr *)arg;
2251 
2252 	if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2253 	     ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2254 	     ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2255 		return -1;
2256 	}
2257 	return 0;
2258 }
2259 
2260 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2261 {
2262 	fib6_clean_all(net, fib6_clean_tohost, gateway);
2263 }
2264 
2265 struct arg_dev_net {
2266 	struct net_device *dev;
2267 	struct net *net;
2268 };
2269 
2270 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2271 {
2272 	const struct arg_dev_net *adn = arg;
2273 	const struct net_device *dev = adn->dev;
2274 
2275 	if ((rt->dst.dev == dev || !dev) &&
2276 	    rt != adn->net->ipv6.ip6_null_entry)
2277 		return -1;
2278 
2279 	return 0;
2280 }
2281 
2282 void rt6_ifdown(struct net *net, struct net_device *dev)
2283 {
2284 	struct arg_dev_net adn = {
2285 		.dev = dev,
2286 		.net = net,
2287 	};
2288 
2289 	fib6_clean_all(net, fib6_ifdown, &adn);
2290 	icmp6_clean_all(fib6_ifdown, &adn);
2291 }
2292 
2293 struct rt6_mtu_change_arg {
2294 	struct net_device *dev;
2295 	unsigned int mtu;
2296 };
2297 
2298 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2299 {
2300 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2301 	struct inet6_dev *idev;
2302 
2303 	/* In IPv6 pmtu discovery is not optional,
2304 	   so that RTAX_MTU lock cannot disable it.
2305 	   We still use this lock to block changes
2306 	   caused by addrconf/ndisc.
2307 	*/
2308 
2309 	idev = __in6_dev_get(arg->dev);
2310 	if (!idev)
2311 		return 0;
2312 
2313 	/* For administrative MTU increase, there is no way to discover
2314 	   IPv6 PMTU increase, so PMTU increase should be updated here.
2315 	   Since RFC 1981 doesn't include administrative MTU increase
2316 	   update PMTU increase is a MUST. (i.e. jumbo frame)
2317 	 */
2318 	/*
2319 	   If new MTU is less than route PMTU, this new MTU will be the
2320 	   lowest MTU in the path, update the route PMTU to reflect PMTU
2321 	   decreases; if new MTU is greater than route PMTU, and the
2322 	   old MTU is the lowest MTU in the path, update the route PMTU
2323 	   to reflect the increase. In this case if the other nodes' MTU
2324 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
2325 	   PMTU discouvery.
2326 	 */
2327 	if (rt->dst.dev == arg->dev &&
2328 	    !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2329 	    (dst_mtu(&rt->dst) >= arg->mtu ||
2330 	     (dst_mtu(&rt->dst) < arg->mtu &&
2331 	      dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2332 		dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2333 	}
2334 	return 0;
2335 }
2336 
2337 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2338 {
2339 	struct rt6_mtu_change_arg arg = {
2340 		.dev = dev,
2341 		.mtu = mtu,
2342 	};
2343 
2344 	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
2345 }
2346 
2347 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2348 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2349 	[RTA_OIF]               = { .type = NLA_U32 },
2350 	[RTA_IIF]		= { .type = NLA_U32 },
2351 	[RTA_PRIORITY]          = { .type = NLA_U32 },
2352 	[RTA_METRICS]           = { .type = NLA_NESTED },
2353 	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
2354 };
2355 
2356 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2357 			      struct fib6_config *cfg)
2358 {
2359 	struct rtmsg *rtm;
2360 	struct nlattr *tb[RTA_MAX+1];
2361 	int err;
2362 
2363 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2364 	if (err < 0)
2365 		goto errout;
2366 
2367 	err = -EINVAL;
2368 	rtm = nlmsg_data(nlh);
2369 	memset(cfg, 0, sizeof(*cfg));
2370 
2371 	cfg->fc_table = rtm->rtm_table;
2372 	cfg->fc_dst_len = rtm->rtm_dst_len;
2373 	cfg->fc_src_len = rtm->rtm_src_len;
2374 	cfg->fc_flags = RTF_UP;
2375 	cfg->fc_protocol = rtm->rtm_protocol;
2376 	cfg->fc_type = rtm->rtm_type;
2377 
2378 	if (rtm->rtm_type == RTN_UNREACHABLE ||
2379 	    rtm->rtm_type == RTN_BLACKHOLE ||
2380 	    rtm->rtm_type == RTN_PROHIBIT ||
2381 	    rtm->rtm_type == RTN_THROW)
2382 		cfg->fc_flags |= RTF_REJECT;
2383 
2384 	if (rtm->rtm_type == RTN_LOCAL)
2385 		cfg->fc_flags |= RTF_LOCAL;
2386 
2387 	cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2388 	cfg->fc_nlinfo.nlh = nlh;
2389 	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2390 
2391 	if (tb[RTA_GATEWAY]) {
2392 		nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2393 		cfg->fc_flags |= RTF_GATEWAY;
2394 	}
2395 
2396 	if (tb[RTA_DST]) {
2397 		int plen = (rtm->rtm_dst_len + 7) >> 3;
2398 
2399 		if (nla_len(tb[RTA_DST]) < plen)
2400 			goto errout;
2401 
2402 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2403 	}
2404 
2405 	if (tb[RTA_SRC]) {
2406 		int plen = (rtm->rtm_src_len + 7) >> 3;
2407 
2408 		if (nla_len(tb[RTA_SRC]) < plen)
2409 			goto errout;
2410 
2411 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2412 	}
2413 
2414 	if (tb[RTA_PREFSRC])
2415 		nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2416 
2417 	if (tb[RTA_OIF])
2418 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2419 
2420 	if (tb[RTA_PRIORITY])
2421 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2422 
2423 	if (tb[RTA_METRICS]) {
2424 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2425 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2426 	}
2427 
2428 	if (tb[RTA_TABLE])
2429 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2430 
2431 	if (tb[RTA_MULTIPATH]) {
2432 		cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2433 		cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2434 	}
2435 
2436 	err = 0;
2437 errout:
2438 	return err;
2439 }
2440 
2441 static int ip6_route_multipath(struct fib6_config *cfg, int add)
2442 {
2443 	struct fib6_config r_cfg;
2444 	struct rtnexthop *rtnh;
2445 	int remaining;
2446 	int attrlen;
2447 	int err = 0, last_err = 0;
2448 
2449 beginning:
2450 	rtnh = (struct rtnexthop *)cfg->fc_mp;
2451 	remaining = cfg->fc_mp_len;
2452 
2453 	/* Parse a Multipath Entry */
2454 	while (rtnh_ok(rtnh, remaining)) {
2455 		memcpy(&r_cfg, cfg, sizeof(*cfg));
2456 		if (rtnh->rtnh_ifindex)
2457 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2458 
2459 		attrlen = rtnh_attrlen(rtnh);
2460 		if (attrlen > 0) {
2461 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2462 
2463 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2464 			if (nla) {
2465 				nla_memcpy(&r_cfg.fc_gateway, nla, 16);
2466 				r_cfg.fc_flags |= RTF_GATEWAY;
2467 			}
2468 		}
2469 		err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
2470 		if (err) {
2471 			last_err = err;
2472 			/* If we are trying to remove a route, do not stop the
2473 			 * loop when ip6_route_del() fails (because next hop is
2474 			 * already gone), we should try to remove all next hops.
2475 			 */
2476 			if (add) {
2477 				/* If add fails, we should try to delete all
2478 				 * next hops that have been already added.
2479 				 */
2480 				add = 0;
2481 				goto beginning;
2482 			}
2483 		}
2484 		/* Because each route is added like a single route we remove
2485 		 * this flag after the first nexthop (if there is a collision,
2486 		 * we have already fail to add the first nexthop:
2487 		 * fib6_add_rt2node() has reject it).
2488 		 */
2489 		cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL;
2490 		rtnh = rtnh_next(rtnh, &remaining);
2491 	}
2492 
2493 	return last_err;
2494 }
2495 
2496 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
2497 {
2498 	struct fib6_config cfg;
2499 	int err;
2500 
2501 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2502 	if (err < 0)
2503 		return err;
2504 
2505 	if (cfg.fc_mp)
2506 		return ip6_route_multipath(&cfg, 0);
2507 	else
2508 		return ip6_route_del(&cfg);
2509 }
2510 
2511 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
2512 {
2513 	struct fib6_config cfg;
2514 	int err;
2515 
2516 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2517 	if (err < 0)
2518 		return err;
2519 
2520 	if (cfg.fc_mp)
2521 		return ip6_route_multipath(&cfg, 1);
2522 	else
2523 		return ip6_route_add(&cfg);
2524 }
2525 
2526 static inline size_t rt6_nlmsg_size(void)
2527 {
2528 	return NLMSG_ALIGN(sizeof(struct rtmsg))
2529 	       + nla_total_size(16) /* RTA_SRC */
2530 	       + nla_total_size(16) /* RTA_DST */
2531 	       + nla_total_size(16) /* RTA_GATEWAY */
2532 	       + nla_total_size(16) /* RTA_PREFSRC */
2533 	       + nla_total_size(4) /* RTA_TABLE */
2534 	       + nla_total_size(4) /* RTA_IIF */
2535 	       + nla_total_size(4) /* RTA_OIF */
2536 	       + nla_total_size(4) /* RTA_PRIORITY */
2537 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2538 	       + nla_total_size(sizeof(struct rta_cacheinfo));
2539 }
2540 
2541 static int rt6_fill_node(struct net *net,
2542 			 struct sk_buff *skb, struct rt6_info *rt,
2543 			 struct in6_addr *dst, struct in6_addr *src,
2544 			 int iif, int type, u32 portid, u32 seq,
2545 			 int prefix, int nowait, unsigned int flags)
2546 {
2547 	struct rtmsg *rtm;
2548 	struct nlmsghdr *nlh;
2549 	long expires;
2550 	u32 table;
2551 
2552 	if (prefix) {	/* user wants prefix routes only */
2553 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2554 			/* success since this is not a prefix route */
2555 			return 1;
2556 		}
2557 	}
2558 
2559 	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
2560 	if (!nlh)
2561 		return -EMSGSIZE;
2562 
2563 	rtm = nlmsg_data(nlh);
2564 	rtm->rtm_family = AF_INET6;
2565 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
2566 	rtm->rtm_src_len = rt->rt6i_src.plen;
2567 	rtm->rtm_tos = 0;
2568 	if (rt->rt6i_table)
2569 		table = rt->rt6i_table->tb6_id;
2570 	else
2571 		table = RT6_TABLE_UNSPEC;
2572 	rtm->rtm_table = table;
2573 	if (nla_put_u32(skb, RTA_TABLE, table))
2574 		goto nla_put_failure;
2575 	if (rt->rt6i_flags & RTF_REJECT) {
2576 		switch (rt->dst.error) {
2577 		case -EINVAL:
2578 			rtm->rtm_type = RTN_BLACKHOLE;
2579 			break;
2580 		case -EACCES:
2581 			rtm->rtm_type = RTN_PROHIBIT;
2582 			break;
2583 		case -EAGAIN:
2584 			rtm->rtm_type = RTN_THROW;
2585 			break;
2586 		default:
2587 			rtm->rtm_type = RTN_UNREACHABLE;
2588 			break;
2589 		}
2590 	}
2591 	else if (rt->rt6i_flags & RTF_LOCAL)
2592 		rtm->rtm_type = RTN_LOCAL;
2593 	else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2594 		rtm->rtm_type = RTN_LOCAL;
2595 	else
2596 		rtm->rtm_type = RTN_UNICAST;
2597 	rtm->rtm_flags = 0;
2598 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2599 	rtm->rtm_protocol = rt->rt6i_protocol;
2600 	if (rt->rt6i_flags & RTF_DYNAMIC)
2601 		rtm->rtm_protocol = RTPROT_REDIRECT;
2602 	else if (rt->rt6i_flags & RTF_ADDRCONF) {
2603 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2604 			rtm->rtm_protocol = RTPROT_RA;
2605 		else
2606 			rtm->rtm_protocol = RTPROT_KERNEL;
2607 	}
2608 
2609 	if (rt->rt6i_flags & RTF_CACHE)
2610 		rtm->rtm_flags |= RTM_F_CLONED;
2611 
2612 	if (dst) {
2613 		if (nla_put(skb, RTA_DST, 16, dst))
2614 			goto nla_put_failure;
2615 		rtm->rtm_dst_len = 128;
2616 	} else if (rtm->rtm_dst_len)
2617 		if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2618 			goto nla_put_failure;
2619 #ifdef CONFIG_IPV6_SUBTREES
2620 	if (src) {
2621 		if (nla_put(skb, RTA_SRC, 16, src))
2622 			goto nla_put_failure;
2623 		rtm->rtm_src_len = 128;
2624 	} else if (rtm->rtm_src_len &&
2625 		   nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2626 		goto nla_put_failure;
2627 #endif
2628 	if (iif) {
2629 #ifdef CONFIG_IPV6_MROUTE
2630 		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2631 			int err = ip6mr_get_route(net, skb, rtm, nowait);
2632 			if (err <= 0) {
2633 				if (!nowait) {
2634 					if (err == 0)
2635 						return 0;
2636 					goto nla_put_failure;
2637 				} else {
2638 					if (err == -EMSGSIZE)
2639 						goto nla_put_failure;
2640 				}
2641 			}
2642 		} else
2643 #endif
2644 			if (nla_put_u32(skb, RTA_IIF, iif))
2645 				goto nla_put_failure;
2646 	} else if (dst) {
2647 		struct in6_addr saddr_buf;
2648 		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2649 		    nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2650 			goto nla_put_failure;
2651 	}
2652 
2653 	if (rt->rt6i_prefsrc.plen) {
2654 		struct in6_addr saddr_buf;
2655 		saddr_buf = rt->rt6i_prefsrc.addr;
2656 		if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2657 			goto nla_put_failure;
2658 	}
2659 
2660 	if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2661 		goto nla_put_failure;
2662 
2663 	if (rt->rt6i_flags & RTF_GATEWAY) {
2664 		if (nla_put(skb, RTA_GATEWAY, 16, &rt->rt6i_gateway) < 0)
2665 			goto nla_put_failure;
2666 	}
2667 
2668 	if (rt->dst.dev &&
2669 	    nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2670 		goto nla_put_failure;
2671 	if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2672 		goto nla_put_failure;
2673 
2674 	expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2675 
2676 	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2677 		goto nla_put_failure;
2678 
2679 	return nlmsg_end(skb, nlh);
2680 
2681 nla_put_failure:
2682 	nlmsg_cancel(skb, nlh);
2683 	return -EMSGSIZE;
2684 }
2685 
2686 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2687 {
2688 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2689 	int prefix;
2690 
2691 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2692 		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2693 		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2694 	} else
2695 		prefix = 0;
2696 
2697 	return rt6_fill_node(arg->net,
2698 		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2699 		     NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
2700 		     prefix, 0, NLM_F_MULTI);
2701 }
2702 
2703 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
2704 {
2705 	struct net *net = sock_net(in_skb->sk);
2706 	struct nlattr *tb[RTA_MAX+1];
2707 	struct rt6_info *rt;
2708 	struct sk_buff *skb;
2709 	struct rtmsg *rtm;
2710 	struct flowi6 fl6;
2711 	int err, iif = 0, oif = 0;
2712 
2713 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2714 	if (err < 0)
2715 		goto errout;
2716 
2717 	err = -EINVAL;
2718 	memset(&fl6, 0, sizeof(fl6));
2719 
2720 	if (tb[RTA_SRC]) {
2721 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2722 			goto errout;
2723 
2724 		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2725 	}
2726 
2727 	if (tb[RTA_DST]) {
2728 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2729 			goto errout;
2730 
2731 		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2732 	}
2733 
2734 	if (tb[RTA_IIF])
2735 		iif = nla_get_u32(tb[RTA_IIF]);
2736 
2737 	if (tb[RTA_OIF])
2738 		oif = nla_get_u32(tb[RTA_OIF]);
2739 
2740 	if (tb[RTA_MARK])
2741 		fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
2742 
2743 	if (iif) {
2744 		struct net_device *dev;
2745 		int flags = 0;
2746 
2747 		dev = __dev_get_by_index(net, iif);
2748 		if (!dev) {
2749 			err = -ENODEV;
2750 			goto errout;
2751 		}
2752 
2753 		fl6.flowi6_iif = iif;
2754 
2755 		if (!ipv6_addr_any(&fl6.saddr))
2756 			flags |= RT6_LOOKUP_F_HAS_SADDR;
2757 
2758 		rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2759 							       flags);
2760 	} else {
2761 		fl6.flowi6_oif = oif;
2762 
2763 		rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2764 	}
2765 
2766 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2767 	if (!skb) {
2768 		ip6_rt_put(rt);
2769 		err = -ENOBUFS;
2770 		goto errout;
2771 	}
2772 
2773 	/* Reserve room for dummy headers, this skb can pass
2774 	   through good chunk of routing engine.
2775 	 */
2776 	skb_reset_mac_header(skb);
2777 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2778 
2779 	skb_dst_set(skb, &rt->dst);
2780 
2781 	err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2782 			    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
2783 			    nlh->nlmsg_seq, 0, 0, 0);
2784 	if (err < 0) {
2785 		kfree_skb(skb);
2786 		goto errout;
2787 	}
2788 
2789 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2790 errout:
2791 	return err;
2792 }
2793 
2794 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2795 {
2796 	struct sk_buff *skb;
2797 	struct net *net = info->nl_net;
2798 	u32 seq;
2799 	int err;
2800 
2801 	err = -ENOBUFS;
2802 	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2803 
2804 	skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2805 	if (!skb)
2806 		goto errout;
2807 
2808 	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2809 				event, info->portid, seq, 0, 0, 0);
2810 	if (err < 0) {
2811 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2812 		WARN_ON(err == -EMSGSIZE);
2813 		kfree_skb(skb);
2814 		goto errout;
2815 	}
2816 	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2817 		    info->nlh, gfp_any());
2818 	return;
2819 errout:
2820 	if (err < 0)
2821 		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2822 }
2823 
2824 static int ip6_route_dev_notify(struct notifier_block *this,
2825 				unsigned long event, void *ptr)
2826 {
2827 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
2828 	struct net *net = dev_net(dev);
2829 
2830 	if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2831 		net->ipv6.ip6_null_entry->dst.dev = dev;
2832 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2833 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2834 		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2835 		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2836 		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2837 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2838 #endif
2839 	}
2840 
2841 	return NOTIFY_OK;
2842 }
2843 
2844 /*
2845  *	/proc
2846  */
2847 
2848 #ifdef CONFIG_PROC_FS
2849 
2850 static const struct file_operations ipv6_route_proc_fops = {
2851 	.owner		= THIS_MODULE,
2852 	.open		= ipv6_route_open,
2853 	.read		= seq_read,
2854 	.llseek		= seq_lseek,
2855 	.release	= seq_release_net,
2856 };
2857 
2858 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2859 {
2860 	struct net *net = (struct net *)seq->private;
2861 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2862 		   net->ipv6.rt6_stats->fib_nodes,
2863 		   net->ipv6.rt6_stats->fib_route_nodes,
2864 		   net->ipv6.rt6_stats->fib_rt_alloc,
2865 		   net->ipv6.rt6_stats->fib_rt_entries,
2866 		   net->ipv6.rt6_stats->fib_rt_cache,
2867 		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2868 		   net->ipv6.rt6_stats->fib_discarded_routes);
2869 
2870 	return 0;
2871 }
2872 
2873 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2874 {
2875 	return single_open_net(inode, file, rt6_stats_seq_show);
2876 }
2877 
2878 static const struct file_operations rt6_stats_seq_fops = {
2879 	.owner	 = THIS_MODULE,
2880 	.open	 = rt6_stats_seq_open,
2881 	.read	 = seq_read,
2882 	.llseek	 = seq_lseek,
2883 	.release = single_release_net,
2884 };
2885 #endif	/* CONFIG_PROC_FS */
2886 
2887 #ifdef CONFIG_SYSCTL
2888 
2889 static
2890 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
2891 			      void __user *buffer, size_t *lenp, loff_t *ppos)
2892 {
2893 	struct net *net;
2894 	int delay;
2895 	if (!write)
2896 		return -EINVAL;
2897 
2898 	net = (struct net *)ctl->extra1;
2899 	delay = net->ipv6.sysctl.flush_delay;
2900 	proc_dointvec(ctl, write, buffer, lenp, ppos);
2901 	fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
2902 	return 0;
2903 }
2904 
2905 struct ctl_table ipv6_route_table_template[] = {
2906 	{
2907 		.procname	=	"flush",
2908 		.data		=	&init_net.ipv6.sysctl.flush_delay,
2909 		.maxlen		=	sizeof(int),
2910 		.mode		=	0200,
2911 		.proc_handler	=	ipv6_sysctl_rtcache_flush
2912 	},
2913 	{
2914 		.procname	=	"gc_thresh",
2915 		.data		=	&ip6_dst_ops_template.gc_thresh,
2916 		.maxlen		=	sizeof(int),
2917 		.mode		=	0644,
2918 		.proc_handler	=	proc_dointvec,
2919 	},
2920 	{
2921 		.procname	=	"max_size",
2922 		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
2923 		.maxlen		=	sizeof(int),
2924 		.mode		=	0644,
2925 		.proc_handler	=	proc_dointvec,
2926 	},
2927 	{
2928 		.procname	=	"gc_min_interval",
2929 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2930 		.maxlen		=	sizeof(int),
2931 		.mode		=	0644,
2932 		.proc_handler	=	proc_dointvec_jiffies,
2933 	},
2934 	{
2935 		.procname	=	"gc_timeout",
2936 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2937 		.maxlen		=	sizeof(int),
2938 		.mode		=	0644,
2939 		.proc_handler	=	proc_dointvec_jiffies,
2940 	},
2941 	{
2942 		.procname	=	"gc_interval",
2943 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
2944 		.maxlen		=	sizeof(int),
2945 		.mode		=	0644,
2946 		.proc_handler	=	proc_dointvec_jiffies,
2947 	},
2948 	{
2949 		.procname	=	"gc_elasticity",
2950 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2951 		.maxlen		=	sizeof(int),
2952 		.mode		=	0644,
2953 		.proc_handler	=	proc_dointvec,
2954 	},
2955 	{
2956 		.procname	=	"mtu_expires",
2957 		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2958 		.maxlen		=	sizeof(int),
2959 		.mode		=	0644,
2960 		.proc_handler	=	proc_dointvec_jiffies,
2961 	},
2962 	{
2963 		.procname	=	"min_adv_mss",
2964 		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
2965 		.maxlen		=	sizeof(int),
2966 		.mode		=	0644,
2967 		.proc_handler	=	proc_dointvec,
2968 	},
2969 	{
2970 		.procname	=	"gc_min_interval_ms",
2971 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2972 		.maxlen		=	sizeof(int),
2973 		.mode		=	0644,
2974 		.proc_handler	=	proc_dointvec_ms_jiffies,
2975 	},
2976 	{ }
2977 };
2978 
2979 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2980 {
2981 	struct ctl_table *table;
2982 
2983 	table = kmemdup(ipv6_route_table_template,
2984 			sizeof(ipv6_route_table_template),
2985 			GFP_KERNEL);
2986 
2987 	if (table) {
2988 		table[0].data = &net->ipv6.sysctl.flush_delay;
2989 		table[0].extra1 = net;
2990 		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2991 		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2992 		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2993 		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2994 		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2995 		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2996 		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2997 		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2998 		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2999 
3000 		/* Don't export sysctls to unprivileged users */
3001 		if (net->user_ns != &init_user_ns)
3002 			table[0].procname = NULL;
3003 	}
3004 
3005 	return table;
3006 }
3007 #endif
3008 
3009 static int __net_init ip6_route_net_init(struct net *net)
3010 {
3011 	int ret = -ENOMEM;
3012 
3013 	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3014 	       sizeof(net->ipv6.ip6_dst_ops));
3015 
3016 	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3017 		goto out_ip6_dst_ops;
3018 
3019 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3020 					   sizeof(*net->ipv6.ip6_null_entry),
3021 					   GFP_KERNEL);
3022 	if (!net->ipv6.ip6_null_entry)
3023 		goto out_ip6_dst_entries;
3024 	net->ipv6.ip6_null_entry->dst.path =
3025 		(struct dst_entry *)net->ipv6.ip6_null_entry;
3026 	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3027 	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3028 			 ip6_template_metrics, true);
3029 
3030 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3031 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3032 					       sizeof(*net->ipv6.ip6_prohibit_entry),
3033 					       GFP_KERNEL);
3034 	if (!net->ipv6.ip6_prohibit_entry)
3035 		goto out_ip6_null_entry;
3036 	net->ipv6.ip6_prohibit_entry->dst.path =
3037 		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3038 	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3039 	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3040 			 ip6_template_metrics, true);
3041 
3042 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3043 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
3044 					       GFP_KERNEL);
3045 	if (!net->ipv6.ip6_blk_hole_entry)
3046 		goto out_ip6_prohibit_entry;
3047 	net->ipv6.ip6_blk_hole_entry->dst.path =
3048 		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3049 	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3050 	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3051 			 ip6_template_metrics, true);
3052 #endif
3053 
3054 	net->ipv6.sysctl.flush_delay = 0;
3055 	net->ipv6.sysctl.ip6_rt_max_size = 4096;
3056 	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3057 	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3058 	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3059 	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3060 	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3061 	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3062 
3063 	net->ipv6.ip6_rt_gc_expire = 30*HZ;
3064 
3065 	ret = 0;
3066 out:
3067 	return ret;
3068 
3069 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3070 out_ip6_prohibit_entry:
3071 	kfree(net->ipv6.ip6_prohibit_entry);
3072 out_ip6_null_entry:
3073 	kfree(net->ipv6.ip6_null_entry);
3074 #endif
3075 out_ip6_dst_entries:
3076 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3077 out_ip6_dst_ops:
3078 	goto out;
3079 }
3080 
3081 static void __net_exit ip6_route_net_exit(struct net *net)
3082 {
3083 	kfree(net->ipv6.ip6_null_entry);
3084 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3085 	kfree(net->ipv6.ip6_prohibit_entry);
3086 	kfree(net->ipv6.ip6_blk_hole_entry);
3087 #endif
3088 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3089 }
3090 
3091 static int __net_init ip6_route_net_init_late(struct net *net)
3092 {
3093 #ifdef CONFIG_PROC_FS
3094 	proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3095 	proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3096 #endif
3097 	return 0;
3098 }
3099 
3100 static void __net_exit ip6_route_net_exit_late(struct net *net)
3101 {
3102 #ifdef CONFIG_PROC_FS
3103 	remove_proc_entry("ipv6_route", net->proc_net);
3104 	remove_proc_entry("rt6_stats", net->proc_net);
3105 #endif
3106 }
3107 
3108 static struct pernet_operations ip6_route_net_ops = {
3109 	.init = ip6_route_net_init,
3110 	.exit = ip6_route_net_exit,
3111 };
3112 
3113 static int __net_init ipv6_inetpeer_init(struct net *net)
3114 {
3115 	struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3116 
3117 	if (!bp)
3118 		return -ENOMEM;
3119 	inet_peer_base_init(bp);
3120 	net->ipv6.peers = bp;
3121 	return 0;
3122 }
3123 
3124 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3125 {
3126 	struct inet_peer_base *bp = net->ipv6.peers;
3127 
3128 	net->ipv6.peers = NULL;
3129 	inetpeer_invalidate_tree(bp);
3130 	kfree(bp);
3131 }
3132 
3133 static struct pernet_operations ipv6_inetpeer_ops = {
3134 	.init	=	ipv6_inetpeer_init,
3135 	.exit	=	ipv6_inetpeer_exit,
3136 };
3137 
3138 static struct pernet_operations ip6_route_net_late_ops = {
3139 	.init = ip6_route_net_init_late,
3140 	.exit = ip6_route_net_exit_late,
3141 };
3142 
3143 static struct notifier_block ip6_route_dev_notifier = {
3144 	.notifier_call = ip6_route_dev_notify,
3145 	.priority = 0,
3146 };
3147 
3148 int __init ip6_route_init(void)
3149 {
3150 	int ret;
3151 
3152 	ret = -ENOMEM;
3153 	ip6_dst_ops_template.kmem_cachep =
3154 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3155 				  SLAB_HWCACHE_ALIGN, NULL);
3156 	if (!ip6_dst_ops_template.kmem_cachep)
3157 		goto out;
3158 
3159 	ret = dst_entries_init(&ip6_dst_blackhole_ops);
3160 	if (ret)
3161 		goto out_kmem_cache;
3162 
3163 	ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3164 	if (ret)
3165 		goto out_dst_entries;
3166 
3167 	ret = register_pernet_subsys(&ip6_route_net_ops);
3168 	if (ret)
3169 		goto out_register_inetpeer;
3170 
3171 	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3172 
3173 	/* Registering of the loopback is done before this portion of code,
3174 	 * the loopback reference in rt6_info will not be taken, do it
3175 	 * manually for init_net */
3176 	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3177 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3178   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3179 	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3180 	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3181 	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3182 	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3183   #endif
3184 	ret = fib6_init();
3185 	if (ret)
3186 		goto out_register_subsys;
3187 
3188 	ret = xfrm6_init();
3189 	if (ret)
3190 		goto out_fib6_init;
3191 
3192 	ret = fib6_rules_init();
3193 	if (ret)
3194 		goto xfrm6_init;
3195 
3196 	ret = register_pernet_subsys(&ip6_route_net_late_ops);
3197 	if (ret)
3198 		goto fib6_rules_init;
3199 
3200 	ret = -ENOBUFS;
3201 	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3202 	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3203 	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3204 		goto out_register_late_subsys;
3205 
3206 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3207 	if (ret)
3208 		goto out_register_late_subsys;
3209 
3210 out:
3211 	return ret;
3212 
3213 out_register_late_subsys:
3214 	unregister_pernet_subsys(&ip6_route_net_late_ops);
3215 fib6_rules_init:
3216 	fib6_rules_cleanup();
3217 xfrm6_init:
3218 	xfrm6_fini();
3219 out_fib6_init:
3220 	fib6_gc_cleanup();
3221 out_register_subsys:
3222 	unregister_pernet_subsys(&ip6_route_net_ops);
3223 out_register_inetpeer:
3224 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3225 out_dst_entries:
3226 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3227 out_kmem_cache:
3228 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3229 	goto out;
3230 }
3231 
3232 void ip6_route_cleanup(void)
3233 {
3234 	unregister_netdevice_notifier(&ip6_route_dev_notifier);
3235 	unregister_pernet_subsys(&ip6_route_net_late_ops);
3236 	fib6_rules_cleanup();
3237 	xfrm6_fini();
3238 	fib6_gc_cleanup();
3239 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3240 	unregister_pernet_subsys(&ip6_route_net_ops);
3241 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3242 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3243 }
3244