xref: /openbmc/linux/net/ipv6/route.c (revision 87c2ce3b)
1 /*
2  *	Linux INET6 implementation
3  *	FIB front-end.
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	$Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9  *
10  *	This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  */
15 
16 /*	Changes:
17  *
18  *	YOSHIFUJI Hideaki @USAGI
19  *		reworked default router selection.
20  *		- respect outgoing interface
21  *		- select from (probably) reachable routers (i.e.
22  *		routers in REACHABLE, STALE, DELAY or PROBE states).
23  *		- always select the same router if it is (probably)
24  *		reachable.  otherwise, round-robin the list.
25  */
26 
27 #include <linux/config.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/times.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/route.h>
35 #include <linux/netdevice.h>
36 #include <linux/in6.h>
37 #include <linux/init.h>
38 #include <linux/netlink.h>
39 #include <linux/if_arp.h>
40 
41 #ifdef 	CONFIG_PROC_FS
42 #include <linux/proc_fs.h>
43 #include <linux/seq_file.h>
44 #endif
45 
46 #include <net/snmp.h>
47 #include <net/ipv6.h>
48 #include <net/ip6_fib.h>
49 #include <net/ip6_route.h>
50 #include <net/ndisc.h>
51 #include <net/addrconf.h>
52 #include <net/tcp.h>
53 #include <linux/rtnetlink.h>
54 #include <net/dst.h>
55 #include <net/xfrm.h>
56 
57 #include <asm/uaccess.h>
58 
59 #ifdef CONFIG_SYSCTL
60 #include <linux/sysctl.h>
61 #endif
62 
63 /* Set to 3 to get tracing. */
64 #define RT6_DEBUG 2
65 
66 #if RT6_DEBUG >= 3
67 #define RDBG(x) printk x
68 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
69 #else
70 #define RDBG(x)
71 #define RT6_TRACE(x...) do { ; } while (0)
72 #endif
73 
74 
75 static int ip6_rt_max_size = 4096;
76 static int ip6_rt_gc_min_interval = HZ / 2;
77 static int ip6_rt_gc_timeout = 60*HZ;
78 int ip6_rt_gc_interval = 30*HZ;
79 static int ip6_rt_gc_elasticity = 9;
80 static int ip6_rt_mtu_expires = 10*60*HZ;
81 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
82 
83 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
84 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
85 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
86 static void		ip6_dst_destroy(struct dst_entry *);
87 static void		ip6_dst_ifdown(struct dst_entry *,
88 				       struct net_device *dev, int how);
89 static int		 ip6_dst_gc(void);
90 
91 static int		ip6_pkt_discard(struct sk_buff *skb);
92 static int		ip6_pkt_discard_out(struct sk_buff *skb);
93 static void		ip6_link_failure(struct sk_buff *skb);
94 static void		ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
95 
96 static struct dst_ops ip6_dst_ops = {
97 	.family			=	AF_INET6,
98 	.protocol		=	__constant_htons(ETH_P_IPV6),
99 	.gc			=	ip6_dst_gc,
100 	.gc_thresh		=	1024,
101 	.check			=	ip6_dst_check,
102 	.destroy		=	ip6_dst_destroy,
103 	.ifdown			=	ip6_dst_ifdown,
104 	.negative_advice	=	ip6_negative_advice,
105 	.link_failure		=	ip6_link_failure,
106 	.update_pmtu		=	ip6_rt_update_pmtu,
107 	.entry_size		=	sizeof(struct rt6_info),
108 };
109 
110 struct rt6_info ip6_null_entry = {
111 	.u = {
112 		.dst = {
113 			.__refcnt	= ATOMIC_INIT(1),
114 			.__use		= 1,
115 			.dev		= &loopback_dev,
116 			.obsolete	= -1,
117 			.error		= -ENETUNREACH,
118 			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
119 			.input		= ip6_pkt_discard,
120 			.output		= ip6_pkt_discard_out,
121 			.ops		= &ip6_dst_ops,
122 			.path		= (struct dst_entry*)&ip6_null_entry,
123 		}
124 	},
125 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
126 	.rt6i_metric	= ~(u32) 0,
127 	.rt6i_ref	= ATOMIC_INIT(1),
128 };
129 
130 struct fib6_node ip6_routing_table = {
131 	.leaf		= &ip6_null_entry,
132 	.fn_flags	= RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
133 };
134 
135 /* Protects all the ip6 fib */
136 
137 DEFINE_RWLOCK(rt6_lock);
138 
139 
140 /* allocate dst with ip6_dst_ops */
141 static __inline__ struct rt6_info *ip6_dst_alloc(void)
142 {
143 	return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
144 }
145 
146 static void ip6_dst_destroy(struct dst_entry *dst)
147 {
148 	struct rt6_info *rt = (struct rt6_info *)dst;
149 	struct inet6_dev *idev = rt->rt6i_idev;
150 
151 	if (idev != NULL) {
152 		rt->rt6i_idev = NULL;
153 		in6_dev_put(idev);
154 	}
155 }
156 
157 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
158 			   int how)
159 {
160 	struct rt6_info *rt = (struct rt6_info *)dst;
161 	struct inet6_dev *idev = rt->rt6i_idev;
162 
163 	if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
164 		struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
165 		if (loopback_idev != NULL) {
166 			rt->rt6i_idev = loopback_idev;
167 			in6_dev_put(idev);
168 		}
169 	}
170 }
171 
172 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
173 {
174 	return (rt->rt6i_flags & RTF_EXPIRES &&
175 		time_after(jiffies, rt->rt6i_expires));
176 }
177 
178 /*
179  *	Route lookup. Any rt6_lock is implied.
180  */
181 
182 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
183 						    int oif,
184 						    int strict)
185 {
186 	struct rt6_info *local = NULL;
187 	struct rt6_info *sprt;
188 
189 	if (oif) {
190 		for (sprt = rt; sprt; sprt = sprt->u.next) {
191 			struct net_device *dev = sprt->rt6i_dev;
192 			if (dev->ifindex == oif)
193 				return sprt;
194 			if (dev->flags & IFF_LOOPBACK) {
195 				if (sprt->rt6i_idev == NULL ||
196 				    sprt->rt6i_idev->dev->ifindex != oif) {
197 					if (strict && oif)
198 						continue;
199 					if (local && (!oif ||
200 						      local->rt6i_idev->dev->ifindex == oif))
201 						continue;
202 				}
203 				local = sprt;
204 			}
205 		}
206 
207 		if (local)
208 			return local;
209 
210 		if (strict)
211 			return &ip6_null_entry;
212 	}
213 	return rt;
214 }
215 
216 /*
217  *	pointer to the last default router chosen. BH is disabled locally.
218  */
219 static struct rt6_info *rt6_dflt_pointer;
220 static DEFINE_SPINLOCK(rt6_dflt_lock);
221 
222 void rt6_reset_dflt_pointer(struct rt6_info *rt)
223 {
224 	spin_lock_bh(&rt6_dflt_lock);
225 	if (rt == NULL || rt == rt6_dflt_pointer) {
226 		RT6_TRACE("reset default router: %p->NULL\n", rt6_dflt_pointer);
227 		rt6_dflt_pointer = NULL;
228 	}
229 	spin_unlock_bh(&rt6_dflt_lock);
230 }
231 
232 /* Default Router Selection (RFC 2461 6.3.6) */
233 static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif)
234 {
235 	struct rt6_info *match = NULL;
236 	struct rt6_info *sprt;
237 	int mpri = 0;
238 
239 	for (sprt = rt; sprt; sprt = sprt->u.next) {
240 		struct neighbour *neigh;
241 		int m = 0;
242 
243 		if (!oif ||
244 		    (sprt->rt6i_dev &&
245 		     sprt->rt6i_dev->ifindex == oif))
246 			m += 8;
247 
248 		if (rt6_check_expired(sprt))
249 			continue;
250 
251 		if (sprt == rt6_dflt_pointer)
252 			m += 4;
253 
254 		if ((neigh = sprt->rt6i_nexthop) != NULL) {
255 			read_lock_bh(&neigh->lock);
256 			switch (neigh->nud_state) {
257 			case NUD_REACHABLE:
258 				m += 3;
259 				break;
260 
261 			case NUD_STALE:
262 			case NUD_DELAY:
263 			case NUD_PROBE:
264 				m += 2;
265 				break;
266 
267 			case NUD_NOARP:
268 			case NUD_PERMANENT:
269 				m += 1;
270 				break;
271 
272 			case NUD_INCOMPLETE:
273 			default:
274 				read_unlock_bh(&neigh->lock);
275 				continue;
276 			}
277 			read_unlock_bh(&neigh->lock);
278 		} else {
279 			continue;
280 		}
281 
282 		if (m > mpri || m >= 12) {
283 			match = sprt;
284 			mpri = m;
285 			if (m >= 12) {
286 				/* we choose the last default router if it
287 				 * is in (probably) reachable state.
288 				 * If route changed, we should do pmtu
289 				 * discovery. --yoshfuji
290 				 */
291 				break;
292 			}
293 		}
294 	}
295 
296 	spin_lock(&rt6_dflt_lock);
297 	if (!match) {
298 		/*
299 		 *	No default routers are known to be reachable.
300 		 *	SHOULD round robin
301 		 */
302 		if (rt6_dflt_pointer) {
303 			for (sprt = rt6_dflt_pointer->u.next;
304 			     sprt; sprt = sprt->u.next) {
305 				if (sprt->u.dst.obsolete <= 0 &&
306 				    sprt->u.dst.error == 0 &&
307 				    !rt6_check_expired(sprt)) {
308 					match = sprt;
309 					break;
310 				}
311 			}
312 			for (sprt = rt;
313 			     !match && sprt;
314 			     sprt = sprt->u.next) {
315 				if (sprt->u.dst.obsolete <= 0 &&
316 				    sprt->u.dst.error == 0 &&
317 				    !rt6_check_expired(sprt)) {
318 					match = sprt;
319 					break;
320 				}
321 				if (sprt == rt6_dflt_pointer)
322 					break;
323 			}
324 		}
325 	}
326 
327 	if (match) {
328 		if (rt6_dflt_pointer != match)
329 			RT6_TRACE("changed default router: %p->%p\n",
330 				  rt6_dflt_pointer, match);
331 		rt6_dflt_pointer = match;
332 	}
333 	spin_unlock(&rt6_dflt_lock);
334 
335 	if (!match) {
336 		/*
337 		 * Last Resort: if no default routers found,
338 		 * use addrconf default route.
339 		 * We don't record this route.
340 		 */
341 		for (sprt = ip6_routing_table.leaf;
342 		     sprt; sprt = sprt->u.next) {
343 			if (!rt6_check_expired(sprt) &&
344 			    (sprt->rt6i_flags & RTF_DEFAULT) &&
345 			    (!oif ||
346 			     (sprt->rt6i_dev &&
347 			      sprt->rt6i_dev->ifindex == oif))) {
348 				match = sprt;
349 				break;
350 			}
351 		}
352 		if (!match) {
353 			/* no default route.  give up. */
354 			match = &ip6_null_entry;
355 		}
356 	}
357 
358 	return match;
359 }
360 
361 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
362 			    int oif, int strict)
363 {
364 	struct fib6_node *fn;
365 	struct rt6_info *rt;
366 
367 	read_lock_bh(&rt6_lock);
368 	fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
369 	rt = rt6_device_match(fn->leaf, oif, strict);
370 	dst_hold(&rt->u.dst);
371 	rt->u.dst.__use++;
372 	read_unlock_bh(&rt6_lock);
373 
374 	rt->u.dst.lastuse = jiffies;
375 	if (rt->u.dst.error == 0)
376 		return rt;
377 	dst_release(&rt->u.dst);
378 	return NULL;
379 }
380 
381 /* ip6_ins_rt is called with FREE rt6_lock.
382    It takes new route entry, the addition fails by any reason the
383    route is freed. In any case, if caller does not hold it, it may
384    be destroyed.
385  */
386 
387 int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
388 		void *_rtattr, struct netlink_skb_parms *req)
389 {
390 	int err;
391 
392 	write_lock_bh(&rt6_lock);
393 	err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr, req);
394 	write_unlock_bh(&rt6_lock);
395 
396 	return err;
397 }
398 
399 /* No rt6_lock! If COW failed, the function returns dead route entry
400    with dst->error set to errno value.
401  */
402 
403 static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
404 				struct in6_addr *saddr, struct netlink_skb_parms *req)
405 {
406 	int err;
407 	struct rt6_info *rt;
408 
409 	/*
410 	 *	Clone the route.
411 	 */
412 
413 	rt = ip6_rt_copy(ort);
414 
415 	if (rt) {
416 		if (!(rt->rt6i_flags&RTF_GATEWAY)) {
417 			if (rt->rt6i_dst.plen != 128 &&
418 			    ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
419 				rt->rt6i_flags |= RTF_ANYCAST;
420 			ipv6_addr_copy(&rt->rt6i_gateway, daddr);
421 		}
422 
423 		ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
424 		rt->rt6i_dst.plen = 128;
425 		rt->rt6i_flags |= RTF_CACHE;
426 		rt->u.dst.flags |= DST_HOST;
427 
428 #ifdef CONFIG_IPV6_SUBTREES
429 		if (rt->rt6i_src.plen && saddr) {
430 			ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
431 			rt->rt6i_src.plen = 128;
432 		}
433 #endif
434 
435 		rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
436 
437 		dst_hold(&rt->u.dst);
438 
439 		err = ip6_ins_rt(rt, NULL, NULL, req);
440 		if (err == 0)
441 			return rt;
442 
443 		rt->u.dst.error = err;
444 
445 		return rt;
446 	}
447 	dst_hold(&ip6_null_entry.u.dst);
448 	return &ip6_null_entry;
449 }
450 
451 #define BACKTRACK() \
452 if (rt == &ip6_null_entry && strict) { \
453        while ((fn = fn->parent) != NULL) { \
454 		if (fn->fn_flags & RTN_ROOT) { \
455 			dst_hold(&rt->u.dst); \
456 			goto out; \
457 		} \
458 		if (fn->fn_flags & RTN_RTINFO) \
459 			goto restart; \
460 	} \
461 }
462 
463 
464 void ip6_route_input(struct sk_buff *skb)
465 {
466 	struct fib6_node *fn;
467 	struct rt6_info *rt;
468 	int strict;
469 	int attempts = 3;
470 
471 	strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
472 
473 relookup:
474 	read_lock_bh(&rt6_lock);
475 
476 	fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
477 			 &skb->nh.ipv6h->saddr);
478 
479 restart:
480 	rt = fn->leaf;
481 
482 	if ((rt->rt6i_flags & RTF_CACHE)) {
483 		rt = rt6_device_match(rt, skb->dev->ifindex, strict);
484 		BACKTRACK();
485 		dst_hold(&rt->u.dst);
486 		goto out;
487 	}
488 
489 	rt = rt6_device_match(rt, skb->dev->ifindex, strict);
490 	BACKTRACK();
491 
492 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
493 		struct rt6_info *nrt;
494 		dst_hold(&rt->u.dst);
495 		read_unlock_bh(&rt6_lock);
496 
497 		nrt = rt6_cow(rt, &skb->nh.ipv6h->daddr,
498 			      &skb->nh.ipv6h->saddr,
499 			      &NETLINK_CB(skb));
500 
501 		dst_release(&rt->u.dst);
502 		rt = nrt;
503 
504 		if (rt->u.dst.error != -EEXIST || --attempts <= 0)
505 			goto out2;
506 
507 		/* Race condition! In the gap, when rt6_lock was
508 		   released someone could insert this route.  Relookup.
509 		*/
510 		dst_release(&rt->u.dst);
511 		goto relookup;
512 	}
513 	dst_hold(&rt->u.dst);
514 
515 out:
516 	read_unlock_bh(&rt6_lock);
517 out2:
518 	rt->u.dst.lastuse = jiffies;
519 	rt->u.dst.__use++;
520 	skb->dst = (struct dst_entry *) rt;
521 }
522 
523 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
524 {
525 	struct fib6_node *fn;
526 	struct rt6_info *rt;
527 	int strict;
528 	int attempts = 3;
529 
530 	strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
531 
532 relookup:
533 	read_lock_bh(&rt6_lock);
534 
535 	fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
536 
537 restart:
538 	rt = fn->leaf;
539 
540 	if ((rt->rt6i_flags & RTF_CACHE)) {
541 		rt = rt6_device_match(rt, fl->oif, strict);
542 		BACKTRACK();
543 		dst_hold(&rt->u.dst);
544 		goto out;
545 	}
546 	if (rt->rt6i_flags & RTF_DEFAULT) {
547 		if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF)
548 			rt = rt6_best_dflt(rt, fl->oif);
549 	} else {
550 		rt = rt6_device_match(rt, fl->oif, strict);
551 		BACKTRACK();
552 	}
553 
554 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
555 		struct rt6_info *nrt;
556 		dst_hold(&rt->u.dst);
557 		read_unlock_bh(&rt6_lock);
558 
559 		nrt = rt6_cow(rt, &fl->fl6_dst, &fl->fl6_src, NULL);
560 
561 		dst_release(&rt->u.dst);
562 		rt = nrt;
563 
564 		if (rt->u.dst.error != -EEXIST || --attempts <= 0)
565 			goto out2;
566 
567 		/* Race condition! In the gap, when rt6_lock was
568 		   released someone could insert this route.  Relookup.
569 		*/
570 		dst_release(&rt->u.dst);
571 		goto relookup;
572 	}
573 	dst_hold(&rt->u.dst);
574 
575 out:
576 	read_unlock_bh(&rt6_lock);
577 out2:
578 	rt->u.dst.lastuse = jiffies;
579 	rt->u.dst.__use++;
580 	return &rt->u.dst;
581 }
582 
583 
584 /*
585  *	Destination cache support functions
586  */
587 
588 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
589 {
590 	struct rt6_info *rt;
591 
592 	rt = (struct rt6_info *) dst;
593 
594 	if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
595 		return dst;
596 
597 	return NULL;
598 }
599 
600 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
601 {
602 	struct rt6_info *rt = (struct rt6_info *) dst;
603 
604 	if (rt) {
605 		if (rt->rt6i_flags & RTF_CACHE)
606 			ip6_del_rt(rt, NULL, NULL, NULL);
607 		else
608 			dst_release(dst);
609 	}
610 	return NULL;
611 }
612 
613 static void ip6_link_failure(struct sk_buff *skb)
614 {
615 	struct rt6_info *rt;
616 
617 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
618 
619 	rt = (struct rt6_info *) skb->dst;
620 	if (rt) {
621 		if (rt->rt6i_flags&RTF_CACHE) {
622 			dst_set_expires(&rt->u.dst, 0);
623 			rt->rt6i_flags |= RTF_EXPIRES;
624 		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
625 			rt->rt6i_node->fn_sernum = -1;
626 	}
627 }
628 
629 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
630 {
631 	struct rt6_info *rt6 = (struct rt6_info*)dst;
632 
633 	if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
634 		rt6->rt6i_flags |= RTF_MODIFIED;
635 		if (mtu < IPV6_MIN_MTU) {
636 			mtu = IPV6_MIN_MTU;
637 			dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
638 		}
639 		dst->metrics[RTAX_MTU-1] = mtu;
640 	}
641 }
642 
643 /* Protected by rt6_lock.  */
644 static struct dst_entry *ndisc_dst_gc_list;
645 static int ipv6_get_mtu(struct net_device *dev);
646 
647 static inline unsigned int ipv6_advmss(unsigned int mtu)
648 {
649 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
650 
651 	if (mtu < ip6_rt_min_advmss)
652 		mtu = ip6_rt_min_advmss;
653 
654 	/*
655 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
656 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
657 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
658 	 * rely only on pmtu discovery"
659 	 */
660 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
661 		mtu = IPV6_MAXPLEN;
662 	return mtu;
663 }
664 
665 struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
666 				  struct neighbour *neigh,
667 				  struct in6_addr *addr,
668 				  int (*output)(struct sk_buff *))
669 {
670 	struct rt6_info *rt;
671 	struct inet6_dev *idev = in6_dev_get(dev);
672 
673 	if (unlikely(idev == NULL))
674 		return NULL;
675 
676 	rt = ip6_dst_alloc();
677 	if (unlikely(rt == NULL)) {
678 		in6_dev_put(idev);
679 		goto out;
680 	}
681 
682 	dev_hold(dev);
683 	if (neigh)
684 		neigh_hold(neigh);
685 	else
686 		neigh = ndisc_get_neigh(dev, addr);
687 
688 	rt->rt6i_dev	  = dev;
689 	rt->rt6i_idev     = idev;
690 	rt->rt6i_nexthop  = neigh;
691 	atomic_set(&rt->u.dst.__refcnt, 1);
692 	rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
693 	rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
694 	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
695 	rt->u.dst.output  = output;
696 
697 #if 0	/* there's no chance to use these for ndisc */
698 	rt->u.dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
699 				? DST_HOST
700 				: 0;
701 	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
702 	rt->rt6i_dst.plen = 128;
703 #endif
704 
705 	write_lock_bh(&rt6_lock);
706 	rt->u.dst.next = ndisc_dst_gc_list;
707 	ndisc_dst_gc_list = &rt->u.dst;
708 	write_unlock_bh(&rt6_lock);
709 
710 	fib6_force_start_gc();
711 
712 out:
713 	return (struct dst_entry *)rt;
714 }
715 
716 int ndisc_dst_gc(int *more)
717 {
718 	struct dst_entry *dst, *next, **pprev;
719 	int freed;
720 
721 	next = NULL;
722 	pprev = &ndisc_dst_gc_list;
723 	freed = 0;
724 	while ((dst = *pprev) != NULL) {
725 		if (!atomic_read(&dst->__refcnt)) {
726 			*pprev = dst->next;
727 			dst_free(dst);
728 			freed++;
729 		} else {
730 			pprev = &dst->next;
731 			(*more)++;
732 		}
733 	}
734 
735 	return freed;
736 }
737 
738 static int ip6_dst_gc(void)
739 {
740 	static unsigned expire = 30*HZ;
741 	static unsigned long last_gc;
742 	unsigned long now = jiffies;
743 
744 	if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
745 	    atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
746 		goto out;
747 
748 	expire++;
749 	fib6_run_gc(expire);
750 	last_gc = now;
751 	if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
752 		expire = ip6_rt_gc_timeout>>1;
753 
754 out:
755 	expire -= expire>>ip6_rt_gc_elasticity;
756 	return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
757 }
758 
759 /* Clean host part of a prefix. Not necessary in radix tree,
760    but results in cleaner routing tables.
761 
762    Remove it only when all the things will work!
763  */
764 
765 static int ipv6_get_mtu(struct net_device *dev)
766 {
767 	int mtu = IPV6_MIN_MTU;
768 	struct inet6_dev *idev;
769 
770 	idev = in6_dev_get(dev);
771 	if (idev) {
772 		mtu = idev->cnf.mtu6;
773 		in6_dev_put(idev);
774 	}
775 	return mtu;
776 }
777 
778 int ipv6_get_hoplimit(struct net_device *dev)
779 {
780 	int hoplimit = ipv6_devconf.hop_limit;
781 	struct inet6_dev *idev;
782 
783 	idev = in6_dev_get(dev);
784 	if (idev) {
785 		hoplimit = idev->cnf.hop_limit;
786 		in6_dev_put(idev);
787 	}
788 	return hoplimit;
789 }
790 
791 /*
792  *
793  */
794 
795 int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
796 		void *_rtattr, struct netlink_skb_parms *req)
797 {
798 	int err;
799 	struct rtmsg *r;
800 	struct rtattr **rta;
801 	struct rt6_info *rt = NULL;
802 	struct net_device *dev = NULL;
803 	struct inet6_dev *idev = NULL;
804 	int addr_type;
805 
806 	rta = (struct rtattr **) _rtattr;
807 
808 	if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
809 		return -EINVAL;
810 #ifndef CONFIG_IPV6_SUBTREES
811 	if (rtmsg->rtmsg_src_len)
812 		return -EINVAL;
813 #endif
814 	if (rtmsg->rtmsg_ifindex) {
815 		err = -ENODEV;
816 		dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
817 		if (!dev)
818 			goto out;
819 		idev = in6_dev_get(dev);
820 		if (!idev)
821 			goto out;
822 	}
823 
824 	if (rtmsg->rtmsg_metric == 0)
825 		rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
826 
827 	rt = ip6_dst_alloc();
828 
829 	if (rt == NULL) {
830 		err = -ENOMEM;
831 		goto out;
832 	}
833 
834 	rt->u.dst.obsolete = -1;
835 	rt->rt6i_expires = jiffies + clock_t_to_jiffies(rtmsg->rtmsg_info);
836 	if (nlh && (r = NLMSG_DATA(nlh))) {
837 		rt->rt6i_protocol = r->rtm_protocol;
838 	} else {
839 		rt->rt6i_protocol = RTPROT_BOOT;
840 	}
841 
842 	addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
843 
844 	if (addr_type & IPV6_ADDR_MULTICAST)
845 		rt->u.dst.input = ip6_mc_input;
846 	else
847 		rt->u.dst.input = ip6_forward;
848 
849 	rt->u.dst.output = ip6_output;
850 
851 	ipv6_addr_prefix(&rt->rt6i_dst.addr,
852 			 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
853 	rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
854 	if (rt->rt6i_dst.plen == 128)
855 	       rt->u.dst.flags = DST_HOST;
856 
857 #ifdef CONFIG_IPV6_SUBTREES
858 	ipv6_addr_prefix(&rt->rt6i_src.addr,
859 			 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
860 	rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
861 #endif
862 
863 	rt->rt6i_metric = rtmsg->rtmsg_metric;
864 
865 	/* We cannot add true routes via loopback here,
866 	   they would result in kernel looping; promote them to reject routes
867 	 */
868 	if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
869 	    (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
870 		/* hold loopback dev/idev if we haven't done so. */
871 		if (dev != &loopback_dev) {
872 			if (dev) {
873 				dev_put(dev);
874 				in6_dev_put(idev);
875 			}
876 			dev = &loopback_dev;
877 			dev_hold(dev);
878 			idev = in6_dev_get(dev);
879 			if (!idev) {
880 				err = -ENODEV;
881 				goto out;
882 			}
883 		}
884 		rt->u.dst.output = ip6_pkt_discard_out;
885 		rt->u.dst.input = ip6_pkt_discard;
886 		rt->u.dst.error = -ENETUNREACH;
887 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
888 		goto install_route;
889 	}
890 
891 	if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
892 		struct in6_addr *gw_addr;
893 		int gwa_type;
894 
895 		gw_addr = &rtmsg->rtmsg_gateway;
896 		ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
897 		gwa_type = ipv6_addr_type(gw_addr);
898 
899 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
900 			struct rt6_info *grt;
901 
902 			/* IPv6 strictly inhibits using not link-local
903 			   addresses as nexthop address.
904 			   Otherwise, router will not able to send redirects.
905 			   It is very good, but in some (rare!) circumstances
906 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
907 			   some exceptions. --ANK
908 			 */
909 			err = -EINVAL;
910 			if (!(gwa_type&IPV6_ADDR_UNICAST))
911 				goto out;
912 
913 			grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
914 
915 			err = -EHOSTUNREACH;
916 			if (grt == NULL)
917 				goto out;
918 			if (dev) {
919 				if (dev != grt->rt6i_dev) {
920 					dst_release(&grt->u.dst);
921 					goto out;
922 				}
923 			} else {
924 				dev = grt->rt6i_dev;
925 				idev = grt->rt6i_idev;
926 				dev_hold(dev);
927 				in6_dev_hold(grt->rt6i_idev);
928 			}
929 			if (!(grt->rt6i_flags&RTF_GATEWAY))
930 				err = 0;
931 			dst_release(&grt->u.dst);
932 
933 			if (err)
934 				goto out;
935 		}
936 		err = -EINVAL;
937 		if (dev == NULL || (dev->flags&IFF_LOOPBACK))
938 			goto out;
939 	}
940 
941 	err = -ENODEV;
942 	if (dev == NULL)
943 		goto out;
944 
945 	if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
946 		rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
947 		if (IS_ERR(rt->rt6i_nexthop)) {
948 			err = PTR_ERR(rt->rt6i_nexthop);
949 			rt->rt6i_nexthop = NULL;
950 			goto out;
951 		}
952 	}
953 
954 	rt->rt6i_flags = rtmsg->rtmsg_flags;
955 
956 install_route:
957 	if (rta && rta[RTA_METRICS-1]) {
958 		int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
959 		struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
960 
961 		while (RTA_OK(attr, attrlen)) {
962 			unsigned flavor = attr->rta_type;
963 			if (flavor) {
964 				if (flavor > RTAX_MAX) {
965 					err = -EINVAL;
966 					goto out;
967 				}
968 				rt->u.dst.metrics[flavor-1] =
969 					*(u32 *)RTA_DATA(attr);
970 			}
971 			attr = RTA_NEXT(attr, attrlen);
972 		}
973 	}
974 
975 	if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
976 		rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
977 	if (!rt->u.dst.metrics[RTAX_MTU-1])
978 		rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
979 	if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
980 		rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
981 	rt->u.dst.dev = dev;
982 	rt->rt6i_idev = idev;
983 	return ip6_ins_rt(rt, nlh, _rtattr, req);
984 
985 out:
986 	if (dev)
987 		dev_put(dev);
988 	if (idev)
989 		in6_dev_put(idev);
990 	if (rt)
991 		dst_free((struct dst_entry *) rt);
992 	return err;
993 }
994 
995 int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
996 {
997 	int err;
998 
999 	write_lock_bh(&rt6_lock);
1000 
1001 	rt6_reset_dflt_pointer(NULL);
1002 
1003 	err = fib6_del(rt, nlh, _rtattr, req);
1004 	dst_release(&rt->u.dst);
1005 
1006 	write_unlock_bh(&rt6_lock);
1007 
1008 	return err;
1009 }
1010 
1011 static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
1012 {
1013 	struct fib6_node *fn;
1014 	struct rt6_info *rt;
1015 	int err = -ESRCH;
1016 
1017 	read_lock_bh(&rt6_lock);
1018 
1019 	fn = fib6_locate(&ip6_routing_table,
1020 			 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
1021 			 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
1022 
1023 	if (fn) {
1024 		for (rt = fn->leaf; rt; rt = rt->u.next) {
1025 			if (rtmsg->rtmsg_ifindex &&
1026 			    (rt->rt6i_dev == NULL ||
1027 			     rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
1028 				continue;
1029 			if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
1030 			    !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
1031 				continue;
1032 			if (rtmsg->rtmsg_metric &&
1033 			    rtmsg->rtmsg_metric != rt->rt6i_metric)
1034 				continue;
1035 			dst_hold(&rt->u.dst);
1036 			read_unlock_bh(&rt6_lock);
1037 
1038 			return ip6_del_rt(rt, nlh, _rtattr, req);
1039 		}
1040 	}
1041 	read_unlock_bh(&rt6_lock);
1042 
1043 	return err;
1044 }
1045 
1046 /*
1047  *	Handle redirects
1048  */
1049 void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
1050 		  struct neighbour *neigh, u8 *lladdr, int on_link)
1051 {
1052 	struct rt6_info *rt, *nrt;
1053 
1054 	/* Locate old route to this destination. */
1055 	rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
1056 
1057 	if (rt == NULL)
1058 		return;
1059 
1060 	if (neigh->dev != rt->rt6i_dev)
1061 		goto out;
1062 
1063 	/*
1064 	 * Current route is on-link; redirect is always invalid.
1065 	 *
1066 	 * Seems, previous statement is not true. It could
1067 	 * be node, which looks for us as on-link (f.e. proxy ndisc)
1068 	 * But then router serving it might decide, that we should
1069 	 * know truth 8)8) --ANK (980726).
1070 	 */
1071 	if (!(rt->rt6i_flags&RTF_GATEWAY))
1072 		goto out;
1073 
1074 	/*
1075 	 *	RFC 2461 specifies that redirects should only be
1076 	 *	accepted if they come from the nexthop to the target.
1077 	 *	Due to the way default routers are chosen, this notion
1078 	 *	is a bit fuzzy and one might need to check all default
1079 	 *	routers.
1080 	 */
1081 	if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway)) {
1082 		if (rt->rt6i_flags & RTF_DEFAULT) {
1083 			struct rt6_info *rt1;
1084 
1085 			read_lock(&rt6_lock);
1086 			for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
1087 				if (ipv6_addr_equal(saddr, &rt1->rt6i_gateway)) {
1088 					dst_hold(&rt1->u.dst);
1089 					dst_release(&rt->u.dst);
1090 					read_unlock(&rt6_lock);
1091 					rt = rt1;
1092 					goto source_ok;
1093 				}
1094 			}
1095 			read_unlock(&rt6_lock);
1096 		}
1097 		if (net_ratelimit())
1098 			printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1099 			       "for redirect target\n");
1100 		goto out;
1101 	}
1102 
1103 source_ok:
1104 
1105 	/*
1106 	 *	We have finally decided to accept it.
1107 	 */
1108 
1109 	neigh_update(neigh, lladdr, NUD_STALE,
1110 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
1111 		     NEIGH_UPDATE_F_OVERRIDE|
1112 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1113 				     NEIGH_UPDATE_F_ISROUTER))
1114 		     );
1115 
1116 	/*
1117 	 * Redirect received -> path was valid.
1118 	 * Look, redirects are sent only in response to data packets,
1119 	 * so that this nexthop apparently is reachable. --ANK
1120 	 */
1121 	dst_confirm(&rt->u.dst);
1122 
1123 	/* Duplicate redirect: silently ignore. */
1124 	if (neigh == rt->u.dst.neighbour)
1125 		goto out;
1126 
1127 	nrt = ip6_rt_copy(rt);
1128 	if (nrt == NULL)
1129 		goto out;
1130 
1131 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1132 	if (on_link)
1133 		nrt->rt6i_flags &= ~RTF_GATEWAY;
1134 
1135 	ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1136 	nrt->rt6i_dst.plen = 128;
1137 	nrt->u.dst.flags |= DST_HOST;
1138 
1139 	ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1140 	nrt->rt6i_nexthop = neigh_clone(neigh);
1141 	/* Reset pmtu, it may be better */
1142 	nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1143 	nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1144 
1145 	if (ip6_ins_rt(nrt, NULL, NULL, NULL))
1146 		goto out;
1147 
1148 	if (rt->rt6i_flags&RTF_CACHE) {
1149 		ip6_del_rt(rt, NULL, NULL, NULL);
1150 		return;
1151 	}
1152 
1153 out:
1154         dst_release(&rt->u.dst);
1155 	return;
1156 }
1157 
1158 /*
1159  *	Handle ICMP "packet too big" messages
1160  *	i.e. Path MTU discovery
1161  */
1162 
1163 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1164 			struct net_device *dev, u32 pmtu)
1165 {
1166 	struct rt6_info *rt, *nrt;
1167 	int allfrag = 0;
1168 
1169 	rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1170 	if (rt == NULL)
1171 		return;
1172 
1173 	if (pmtu >= dst_mtu(&rt->u.dst))
1174 		goto out;
1175 
1176 	if (pmtu < IPV6_MIN_MTU) {
1177 		/*
1178 		 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1179 		 * MTU (1280) and a fragment header should always be included
1180 		 * after a node receiving Too Big message reporting PMTU is
1181 		 * less than the IPv6 Minimum Link MTU.
1182 		 */
1183 		pmtu = IPV6_MIN_MTU;
1184 		allfrag = 1;
1185 	}
1186 
1187 	/* New mtu received -> path was valid.
1188 	   They are sent only in response to data packets,
1189 	   so that this nexthop apparently is reachable. --ANK
1190 	 */
1191 	dst_confirm(&rt->u.dst);
1192 
1193 	/* Host route. If it is static, it would be better
1194 	   not to override it, but add new one, so that
1195 	   when cache entry will expire old pmtu
1196 	   would return automatically.
1197 	 */
1198 	if (rt->rt6i_flags & RTF_CACHE) {
1199 		rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1200 		if (allfrag)
1201 			rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1202 		dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1203 		rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1204 		goto out;
1205 	}
1206 
1207 	/* Network route.
1208 	   Two cases are possible:
1209 	   1. It is connected route. Action: COW
1210 	   2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1211 	 */
1212 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
1213 		nrt = rt6_cow(rt, daddr, saddr, NULL);
1214 		if (!nrt->u.dst.error) {
1215 			nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1216 			if (allfrag)
1217 				nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1218 			/* According to RFC 1981, detecting PMTU increase shouldn't be
1219 			   happened within 5 mins, the recommended timer is 10 mins.
1220 			   Here this route expiration time is set to ip6_rt_mtu_expires
1221 			   which is 10 mins. After 10 mins the decreased pmtu is expired
1222 			   and detecting PMTU increase will be automatically happened.
1223 			 */
1224 			dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1225 			nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1226 		}
1227 		dst_release(&nrt->u.dst);
1228 	} else {
1229 		nrt = ip6_rt_copy(rt);
1230 		if (nrt == NULL)
1231 			goto out;
1232 		ipv6_addr_copy(&nrt->rt6i_dst.addr, daddr);
1233 		nrt->rt6i_dst.plen = 128;
1234 		nrt->u.dst.flags |= DST_HOST;
1235 		nrt->rt6i_nexthop = neigh_clone(rt->rt6i_nexthop);
1236 		dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1237 		nrt->rt6i_flags |= RTF_DYNAMIC|RTF_CACHE|RTF_EXPIRES;
1238 		nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1239 		if (allfrag)
1240 			nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1241 		ip6_ins_rt(nrt, NULL, NULL, NULL);
1242 	}
1243 
1244 out:
1245 	dst_release(&rt->u.dst);
1246 }
1247 
1248 /*
1249  *	Misc support functions
1250  */
1251 
1252 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1253 {
1254 	struct rt6_info *rt = ip6_dst_alloc();
1255 
1256 	if (rt) {
1257 		rt->u.dst.input = ort->u.dst.input;
1258 		rt->u.dst.output = ort->u.dst.output;
1259 
1260 		memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1261 		rt->u.dst.dev = ort->u.dst.dev;
1262 		if (rt->u.dst.dev)
1263 			dev_hold(rt->u.dst.dev);
1264 		rt->rt6i_idev = ort->rt6i_idev;
1265 		if (rt->rt6i_idev)
1266 			in6_dev_hold(rt->rt6i_idev);
1267 		rt->u.dst.lastuse = jiffies;
1268 		rt->rt6i_expires = 0;
1269 
1270 		ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1271 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1272 		rt->rt6i_metric = 0;
1273 
1274 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1275 #ifdef CONFIG_IPV6_SUBTREES
1276 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1277 #endif
1278 	}
1279 	return rt;
1280 }
1281 
1282 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1283 {
1284 	struct rt6_info *rt;
1285 	struct fib6_node *fn;
1286 
1287 	fn = &ip6_routing_table;
1288 
1289 	write_lock_bh(&rt6_lock);
1290 	for (rt = fn->leaf; rt; rt=rt->u.next) {
1291 		if (dev == rt->rt6i_dev &&
1292 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
1293 			break;
1294 	}
1295 	if (rt)
1296 		dst_hold(&rt->u.dst);
1297 	write_unlock_bh(&rt6_lock);
1298 	return rt;
1299 }
1300 
1301 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1302 				     struct net_device *dev)
1303 {
1304 	struct in6_rtmsg rtmsg;
1305 
1306 	memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1307 	rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1308 	ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1309 	rtmsg.rtmsg_metric = 1024;
1310 	rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES;
1311 
1312 	rtmsg.rtmsg_ifindex = dev->ifindex;
1313 
1314 	ip6_route_add(&rtmsg, NULL, NULL, NULL);
1315 	return rt6_get_dflt_router(gwaddr, dev);
1316 }
1317 
1318 void rt6_purge_dflt_routers(void)
1319 {
1320 	struct rt6_info *rt;
1321 
1322 restart:
1323 	read_lock_bh(&rt6_lock);
1324 	for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1325 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1326 			dst_hold(&rt->u.dst);
1327 
1328 			rt6_reset_dflt_pointer(NULL);
1329 
1330 			read_unlock_bh(&rt6_lock);
1331 
1332 			ip6_del_rt(rt, NULL, NULL, NULL);
1333 
1334 			goto restart;
1335 		}
1336 	}
1337 	read_unlock_bh(&rt6_lock);
1338 }
1339 
1340 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1341 {
1342 	struct in6_rtmsg rtmsg;
1343 	int err;
1344 
1345 	switch(cmd) {
1346 	case SIOCADDRT:		/* Add a route */
1347 	case SIOCDELRT:		/* Delete a route */
1348 		if (!capable(CAP_NET_ADMIN))
1349 			return -EPERM;
1350 		err = copy_from_user(&rtmsg, arg,
1351 				     sizeof(struct in6_rtmsg));
1352 		if (err)
1353 			return -EFAULT;
1354 
1355 		rtnl_lock();
1356 		switch (cmd) {
1357 		case SIOCADDRT:
1358 			err = ip6_route_add(&rtmsg, NULL, NULL, NULL);
1359 			break;
1360 		case SIOCDELRT:
1361 			err = ip6_route_del(&rtmsg, NULL, NULL, NULL);
1362 			break;
1363 		default:
1364 			err = -EINVAL;
1365 		}
1366 		rtnl_unlock();
1367 
1368 		return err;
1369 	};
1370 
1371 	return -EINVAL;
1372 }
1373 
1374 /*
1375  *	Drop the packet on the floor
1376  */
1377 
1378 static int ip6_pkt_discard(struct sk_buff *skb)
1379 {
1380 	IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1381 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1382 	kfree_skb(skb);
1383 	return 0;
1384 }
1385 
1386 static int ip6_pkt_discard_out(struct sk_buff *skb)
1387 {
1388 	skb->dev = skb->dst->dev;
1389 	return ip6_pkt_discard(skb);
1390 }
1391 
1392 /*
1393  *	Allocate a dst for local (unicast / anycast) address.
1394  */
1395 
1396 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1397 				    const struct in6_addr *addr,
1398 				    int anycast)
1399 {
1400 	struct rt6_info *rt = ip6_dst_alloc();
1401 
1402 	if (rt == NULL)
1403 		return ERR_PTR(-ENOMEM);
1404 
1405 	dev_hold(&loopback_dev);
1406 	in6_dev_hold(idev);
1407 
1408 	rt->u.dst.flags = DST_HOST;
1409 	rt->u.dst.input = ip6_input;
1410 	rt->u.dst.output = ip6_output;
1411 	rt->rt6i_dev = &loopback_dev;
1412 	rt->rt6i_idev = idev;
1413 	rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1414 	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1415 	rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1416 	rt->u.dst.obsolete = -1;
1417 
1418 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1419 	if (anycast)
1420 		rt->rt6i_flags |= RTF_ANYCAST;
1421 	else
1422 		rt->rt6i_flags |= RTF_LOCAL;
1423 	rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1424 	if (rt->rt6i_nexthop == NULL) {
1425 		dst_free((struct dst_entry *) rt);
1426 		return ERR_PTR(-ENOMEM);
1427 	}
1428 
1429 	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1430 	rt->rt6i_dst.plen = 128;
1431 
1432 	atomic_set(&rt->u.dst.__refcnt, 1);
1433 
1434 	return rt;
1435 }
1436 
1437 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1438 {
1439 	if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1440 	    rt != &ip6_null_entry) {
1441 		RT6_TRACE("deleted by ifdown %p\n", rt);
1442 		return -1;
1443 	}
1444 	return 0;
1445 }
1446 
1447 void rt6_ifdown(struct net_device *dev)
1448 {
1449 	write_lock_bh(&rt6_lock);
1450 	fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1451 	write_unlock_bh(&rt6_lock);
1452 }
1453 
1454 struct rt6_mtu_change_arg
1455 {
1456 	struct net_device *dev;
1457 	unsigned mtu;
1458 };
1459 
1460 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1461 {
1462 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1463 	struct inet6_dev *idev;
1464 
1465 	/* In IPv6 pmtu discovery is not optional,
1466 	   so that RTAX_MTU lock cannot disable it.
1467 	   We still use this lock to block changes
1468 	   caused by addrconf/ndisc.
1469 	*/
1470 
1471 	idev = __in6_dev_get(arg->dev);
1472 	if (idev == NULL)
1473 		return 0;
1474 
1475 	/* For administrative MTU increase, there is no way to discover
1476 	   IPv6 PMTU increase, so PMTU increase should be updated here.
1477 	   Since RFC 1981 doesn't include administrative MTU increase
1478 	   update PMTU increase is a MUST. (i.e. jumbo frame)
1479 	 */
1480 	/*
1481 	   If new MTU is less than route PMTU, this new MTU will be the
1482 	   lowest MTU in the path, update the route PMTU to reflect PMTU
1483 	   decreases; if new MTU is greater than route PMTU, and the
1484 	   old MTU is the lowest MTU in the path, update the route PMTU
1485 	   to reflect the increase. In this case if the other nodes' MTU
1486 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
1487 	   PMTU discouvery.
1488 	 */
1489 	if (rt->rt6i_dev == arg->dev &&
1490 	    !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1491             (dst_mtu(&rt->u.dst) > arg->mtu ||
1492              (dst_mtu(&rt->u.dst) < arg->mtu &&
1493 	      dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1494 		rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1495 	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1496 	return 0;
1497 }
1498 
1499 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1500 {
1501 	struct rt6_mtu_change_arg arg;
1502 
1503 	arg.dev = dev;
1504 	arg.mtu = mtu;
1505 	read_lock_bh(&rt6_lock);
1506 	fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1507 	read_unlock_bh(&rt6_lock);
1508 }
1509 
1510 static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1511 			      struct in6_rtmsg *rtmsg)
1512 {
1513 	memset(rtmsg, 0, sizeof(*rtmsg));
1514 
1515 	rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1516 	rtmsg->rtmsg_src_len = r->rtm_src_len;
1517 	rtmsg->rtmsg_flags = RTF_UP;
1518 	if (r->rtm_type == RTN_UNREACHABLE)
1519 		rtmsg->rtmsg_flags |= RTF_REJECT;
1520 
1521 	if (rta[RTA_GATEWAY-1]) {
1522 		if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1523 			return -EINVAL;
1524 		memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1525 		rtmsg->rtmsg_flags |= RTF_GATEWAY;
1526 	}
1527 	if (rta[RTA_DST-1]) {
1528 		if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1529 			return -EINVAL;
1530 		memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1531 	}
1532 	if (rta[RTA_SRC-1]) {
1533 		if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1534 			return -EINVAL;
1535 		memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1536 	}
1537 	if (rta[RTA_OIF-1]) {
1538 		if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1539 			return -EINVAL;
1540 		memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1541 	}
1542 	if (rta[RTA_PRIORITY-1]) {
1543 		if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1544 			return -EINVAL;
1545 		memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1546 	}
1547 	return 0;
1548 }
1549 
1550 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1551 {
1552 	struct rtmsg *r = NLMSG_DATA(nlh);
1553 	struct in6_rtmsg rtmsg;
1554 
1555 	if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1556 		return -EINVAL;
1557 	return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1558 }
1559 
1560 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1561 {
1562 	struct rtmsg *r = NLMSG_DATA(nlh);
1563 	struct in6_rtmsg rtmsg;
1564 
1565 	if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1566 		return -EINVAL;
1567 	return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1568 }
1569 
1570 struct rt6_rtnl_dump_arg
1571 {
1572 	struct sk_buff *skb;
1573 	struct netlink_callback *cb;
1574 };
1575 
1576 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1577 			 struct in6_addr *dst, struct in6_addr *src,
1578 			 int iif, int type, u32 pid, u32 seq,
1579 			 int prefix, unsigned int flags)
1580 {
1581 	struct rtmsg *rtm;
1582 	struct nlmsghdr  *nlh;
1583 	unsigned char	 *b = skb->tail;
1584 	struct rta_cacheinfo ci;
1585 
1586 	if (prefix) {	/* user wants prefix routes only */
1587 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1588 			/* success since this is not a prefix route */
1589 			return 1;
1590 		}
1591 	}
1592 
1593 	nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags);
1594 	rtm = NLMSG_DATA(nlh);
1595 	rtm->rtm_family = AF_INET6;
1596 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
1597 	rtm->rtm_src_len = rt->rt6i_src.plen;
1598 	rtm->rtm_tos = 0;
1599 	rtm->rtm_table = RT_TABLE_MAIN;
1600 	if (rt->rt6i_flags&RTF_REJECT)
1601 		rtm->rtm_type = RTN_UNREACHABLE;
1602 	else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1603 		rtm->rtm_type = RTN_LOCAL;
1604 	else
1605 		rtm->rtm_type = RTN_UNICAST;
1606 	rtm->rtm_flags = 0;
1607 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1608 	rtm->rtm_protocol = rt->rt6i_protocol;
1609 	if (rt->rt6i_flags&RTF_DYNAMIC)
1610 		rtm->rtm_protocol = RTPROT_REDIRECT;
1611 	else if (rt->rt6i_flags & RTF_ADDRCONF)
1612 		rtm->rtm_protocol = RTPROT_KERNEL;
1613 	else if (rt->rt6i_flags&RTF_DEFAULT)
1614 		rtm->rtm_protocol = RTPROT_RA;
1615 
1616 	if (rt->rt6i_flags&RTF_CACHE)
1617 		rtm->rtm_flags |= RTM_F_CLONED;
1618 
1619 	if (dst) {
1620 		RTA_PUT(skb, RTA_DST, 16, dst);
1621 	        rtm->rtm_dst_len = 128;
1622 	} else if (rtm->rtm_dst_len)
1623 		RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1624 #ifdef CONFIG_IPV6_SUBTREES
1625 	if (src) {
1626 		RTA_PUT(skb, RTA_SRC, 16, src);
1627 	        rtm->rtm_src_len = 128;
1628 	} else if (rtm->rtm_src_len)
1629 		RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1630 #endif
1631 	if (iif)
1632 		RTA_PUT(skb, RTA_IIF, 4, &iif);
1633 	else if (dst) {
1634 		struct in6_addr saddr_buf;
1635 		if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1636 			RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1637 	}
1638 	if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1639 		goto rtattr_failure;
1640 	if (rt->u.dst.neighbour)
1641 		RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1642 	if (rt->u.dst.dev)
1643 		RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1644 	RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1645 	ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1646 	if (rt->rt6i_expires)
1647 		ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1648 	else
1649 		ci.rta_expires = 0;
1650 	ci.rta_used = rt->u.dst.__use;
1651 	ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1652 	ci.rta_error = rt->u.dst.error;
1653 	ci.rta_id = 0;
1654 	ci.rta_ts = 0;
1655 	ci.rta_tsage = 0;
1656 	RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1657 	nlh->nlmsg_len = skb->tail - b;
1658 	return skb->len;
1659 
1660 nlmsg_failure:
1661 rtattr_failure:
1662 	skb_trim(skb, b - skb->data);
1663 	return -1;
1664 }
1665 
1666 static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1667 {
1668 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1669 	int prefix;
1670 
1671 	if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1672 		struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1673 		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1674 	} else
1675 		prefix = 0;
1676 
1677 	return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1678 		     NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
1679 		     prefix, NLM_F_MULTI);
1680 }
1681 
1682 static int fib6_dump_node(struct fib6_walker_t *w)
1683 {
1684 	int res;
1685 	struct rt6_info *rt;
1686 
1687 	for (rt = w->leaf; rt; rt = rt->u.next) {
1688 		res = rt6_dump_route(rt, w->args);
1689 		if (res < 0) {
1690 			/* Frame is full, suspend walking */
1691 			w->leaf = rt;
1692 			return 1;
1693 		}
1694 		BUG_TRAP(res!=0);
1695 	}
1696 	w->leaf = NULL;
1697 	return 0;
1698 }
1699 
1700 static void fib6_dump_end(struct netlink_callback *cb)
1701 {
1702 	struct fib6_walker_t *w = (void*)cb->args[0];
1703 
1704 	if (w) {
1705 		cb->args[0] = 0;
1706 		fib6_walker_unlink(w);
1707 		kfree(w);
1708 	}
1709 	cb->done = (void*)cb->args[1];
1710 	cb->args[1] = 0;
1711 }
1712 
1713 static int fib6_dump_done(struct netlink_callback *cb)
1714 {
1715 	fib6_dump_end(cb);
1716 	return cb->done ? cb->done(cb) : 0;
1717 }
1718 
1719 int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1720 {
1721 	struct rt6_rtnl_dump_arg arg;
1722 	struct fib6_walker_t *w;
1723 	int res;
1724 
1725 	arg.skb = skb;
1726 	arg.cb = cb;
1727 
1728 	w = (void*)cb->args[0];
1729 	if (w == NULL) {
1730 		/* New dump:
1731 		 *
1732 		 * 1. hook callback destructor.
1733 		 */
1734 		cb->args[1] = (long)cb->done;
1735 		cb->done = fib6_dump_done;
1736 
1737 		/*
1738 		 * 2. allocate and initialize walker.
1739 		 */
1740 		w = kmalloc(sizeof(*w), GFP_ATOMIC);
1741 		if (w == NULL)
1742 			return -ENOMEM;
1743 		RT6_TRACE("dump<%p", w);
1744 		memset(w, 0, sizeof(*w));
1745 		w->root = &ip6_routing_table;
1746 		w->func = fib6_dump_node;
1747 		w->args = &arg;
1748 		cb->args[0] = (long)w;
1749 		read_lock_bh(&rt6_lock);
1750 		res = fib6_walk(w);
1751 		read_unlock_bh(&rt6_lock);
1752 	} else {
1753 		w->args = &arg;
1754 		read_lock_bh(&rt6_lock);
1755 		res = fib6_walk_continue(w);
1756 		read_unlock_bh(&rt6_lock);
1757 	}
1758 #if RT6_DEBUG >= 3
1759 	if (res <= 0 && skb->len == 0)
1760 		RT6_TRACE("%p>dump end\n", w);
1761 #endif
1762 	res = res < 0 ? res : skb->len;
1763 	/* res < 0 is an error. (really, impossible)
1764 	   res == 0 means that dump is complete, but skb still can contain data.
1765 	   res > 0 dump is not complete, but frame is full.
1766 	 */
1767 	/* Destroy walker, if dump of this table is complete. */
1768 	if (res <= 0)
1769 		fib6_dump_end(cb);
1770 	return res;
1771 }
1772 
1773 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1774 {
1775 	struct rtattr **rta = arg;
1776 	int iif = 0;
1777 	int err = -ENOBUFS;
1778 	struct sk_buff *skb;
1779 	struct flowi fl;
1780 	struct rt6_info *rt;
1781 
1782 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1783 	if (skb == NULL)
1784 		goto out;
1785 
1786 	/* Reserve room for dummy headers, this skb can pass
1787 	   through good chunk of routing engine.
1788 	 */
1789 	skb->mac.raw = skb->data;
1790 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1791 
1792 	memset(&fl, 0, sizeof(fl));
1793 	if (rta[RTA_SRC-1])
1794 		ipv6_addr_copy(&fl.fl6_src,
1795 			       (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1796 	if (rta[RTA_DST-1])
1797 		ipv6_addr_copy(&fl.fl6_dst,
1798 			       (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1799 
1800 	if (rta[RTA_IIF-1])
1801 		memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1802 
1803 	if (iif) {
1804 		struct net_device *dev;
1805 		dev = __dev_get_by_index(iif);
1806 		if (!dev) {
1807 			err = -ENODEV;
1808 			goto out_free;
1809 		}
1810 	}
1811 
1812 	fl.oif = 0;
1813 	if (rta[RTA_OIF-1])
1814 		memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1815 
1816 	rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1817 
1818 	skb->dst = &rt->u.dst;
1819 
1820 	NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1821 	err = rt6_fill_node(skb, rt,
1822 			    &fl.fl6_dst, &fl.fl6_src,
1823 			    iif,
1824 			    RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
1825 			    nlh->nlmsg_seq, 0, 0);
1826 	if (err < 0) {
1827 		err = -EMSGSIZE;
1828 		goto out_free;
1829 	}
1830 
1831 	err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1832 	if (err > 0)
1833 		err = 0;
1834 out:
1835 	return err;
1836 out_free:
1837 	kfree_skb(skb);
1838 	goto out;
1839 }
1840 
1841 void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh,
1842 			struct netlink_skb_parms *req)
1843 {
1844 	struct sk_buff *skb;
1845 	int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
1846 	u32 pid = current->pid;
1847 	u32 seq = 0;
1848 
1849 	if (req)
1850 		pid = req->pid;
1851 	if (nlh)
1852 		seq = nlh->nlmsg_seq;
1853 
1854 	skb = alloc_skb(size, gfp_any());
1855 	if (!skb) {
1856 		netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS);
1857 		return;
1858 	}
1859 	if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) {
1860 		kfree_skb(skb);
1861 		netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL);
1862 		return;
1863 	}
1864 	NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE;
1865 	netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any());
1866 }
1867 
1868 /*
1869  *	/proc
1870  */
1871 
1872 #ifdef CONFIG_PROC_FS
1873 
1874 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1875 
1876 struct rt6_proc_arg
1877 {
1878 	char *buffer;
1879 	int offset;
1880 	int length;
1881 	int skip;
1882 	int len;
1883 };
1884 
1885 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
1886 {
1887 	struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1888 	int i;
1889 
1890 	if (arg->skip < arg->offset / RT6_INFO_LEN) {
1891 		arg->skip++;
1892 		return 0;
1893 	}
1894 
1895 	if (arg->len >= arg->length)
1896 		return 0;
1897 
1898 	for (i=0; i<16; i++) {
1899 		sprintf(arg->buffer + arg->len, "%02x",
1900 			rt->rt6i_dst.addr.s6_addr[i]);
1901 		arg->len += 2;
1902 	}
1903 	arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1904 			    rt->rt6i_dst.plen);
1905 
1906 #ifdef CONFIG_IPV6_SUBTREES
1907 	for (i=0; i<16; i++) {
1908 		sprintf(arg->buffer + arg->len, "%02x",
1909 			rt->rt6i_src.addr.s6_addr[i]);
1910 		arg->len += 2;
1911 	}
1912 	arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1913 			    rt->rt6i_src.plen);
1914 #else
1915 	sprintf(arg->buffer + arg->len,
1916 		"00000000000000000000000000000000 00 ");
1917 	arg->len += 36;
1918 #endif
1919 
1920 	if (rt->rt6i_nexthop) {
1921 		for (i=0; i<16; i++) {
1922 			sprintf(arg->buffer + arg->len, "%02x",
1923 				rt->rt6i_nexthop->primary_key[i]);
1924 			arg->len += 2;
1925 		}
1926 	} else {
1927 		sprintf(arg->buffer + arg->len,
1928 			"00000000000000000000000000000000");
1929 		arg->len += 32;
1930 	}
1931 	arg->len += sprintf(arg->buffer + arg->len,
1932 			    " %08x %08x %08x %08x %8s\n",
1933 			    rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1934 			    rt->u.dst.__use, rt->rt6i_flags,
1935 			    rt->rt6i_dev ? rt->rt6i_dev->name : "");
1936 	return 0;
1937 }
1938 
1939 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
1940 {
1941 	struct rt6_proc_arg arg;
1942 	arg.buffer = buffer;
1943 	arg.offset = offset;
1944 	arg.length = length;
1945 	arg.skip = 0;
1946 	arg.len = 0;
1947 
1948 	read_lock_bh(&rt6_lock);
1949 	fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
1950 	read_unlock_bh(&rt6_lock);
1951 
1952 	*start = buffer;
1953 	if (offset)
1954 		*start += offset % RT6_INFO_LEN;
1955 
1956 	arg.len -= offset % RT6_INFO_LEN;
1957 
1958 	if (arg.len > length)
1959 		arg.len = length;
1960 	if (arg.len < 0)
1961 		arg.len = 0;
1962 
1963 	return arg.len;
1964 }
1965 
1966 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
1967 {
1968 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
1969 		      rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
1970 		      rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
1971 		      rt6_stats.fib_rt_cache,
1972 		      atomic_read(&ip6_dst_ops.entries),
1973 		      rt6_stats.fib_discarded_routes);
1974 
1975 	return 0;
1976 }
1977 
1978 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
1979 {
1980 	return single_open(file, rt6_stats_seq_show, NULL);
1981 }
1982 
1983 static struct file_operations rt6_stats_seq_fops = {
1984 	.owner	 = THIS_MODULE,
1985 	.open	 = rt6_stats_seq_open,
1986 	.read	 = seq_read,
1987 	.llseek	 = seq_lseek,
1988 	.release = single_release,
1989 };
1990 #endif	/* CONFIG_PROC_FS */
1991 
1992 #ifdef CONFIG_SYSCTL
1993 
1994 static int flush_delay;
1995 
1996 static
1997 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
1998 			      void __user *buffer, size_t *lenp, loff_t *ppos)
1999 {
2000 	if (write) {
2001 		proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2002 		fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2003 		return 0;
2004 	} else
2005 		return -EINVAL;
2006 }
2007 
2008 ctl_table ipv6_route_table[] = {
2009         {
2010 		.ctl_name	=	NET_IPV6_ROUTE_FLUSH,
2011 		.procname	=	"flush",
2012          	.data		=	&flush_delay,
2013 		.maxlen		=	sizeof(int),
2014 		.mode		=	0200,
2015          	.proc_handler	=	&ipv6_sysctl_rtcache_flush
2016 	},
2017 	{
2018 		.ctl_name	=	NET_IPV6_ROUTE_GC_THRESH,
2019 		.procname	=	"gc_thresh",
2020          	.data		=	&ip6_dst_ops.gc_thresh,
2021 		.maxlen		=	sizeof(int),
2022 		.mode		=	0644,
2023          	.proc_handler	=	&proc_dointvec,
2024 	},
2025 	{
2026 		.ctl_name	=	NET_IPV6_ROUTE_MAX_SIZE,
2027 		.procname	=	"max_size",
2028          	.data		=	&ip6_rt_max_size,
2029 		.maxlen		=	sizeof(int),
2030 		.mode		=	0644,
2031          	.proc_handler	=	&proc_dointvec,
2032 	},
2033 	{
2034 		.ctl_name	=	NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2035 		.procname	=	"gc_min_interval",
2036          	.data		=	&ip6_rt_gc_min_interval,
2037 		.maxlen		=	sizeof(int),
2038 		.mode		=	0644,
2039          	.proc_handler	=	&proc_dointvec_jiffies,
2040 		.strategy	=	&sysctl_jiffies,
2041 	},
2042 	{
2043 		.ctl_name	=	NET_IPV6_ROUTE_GC_TIMEOUT,
2044 		.procname	=	"gc_timeout",
2045          	.data		=	&ip6_rt_gc_timeout,
2046 		.maxlen		=	sizeof(int),
2047 		.mode		=	0644,
2048          	.proc_handler	=	&proc_dointvec_jiffies,
2049 		.strategy	=	&sysctl_jiffies,
2050 	},
2051 	{
2052 		.ctl_name	=	NET_IPV6_ROUTE_GC_INTERVAL,
2053 		.procname	=	"gc_interval",
2054          	.data		=	&ip6_rt_gc_interval,
2055 		.maxlen		=	sizeof(int),
2056 		.mode		=	0644,
2057          	.proc_handler	=	&proc_dointvec_jiffies,
2058 		.strategy	=	&sysctl_jiffies,
2059 	},
2060 	{
2061 		.ctl_name	=	NET_IPV6_ROUTE_GC_ELASTICITY,
2062 		.procname	=	"gc_elasticity",
2063          	.data		=	&ip6_rt_gc_elasticity,
2064 		.maxlen		=	sizeof(int),
2065 		.mode		=	0644,
2066          	.proc_handler	=	&proc_dointvec_jiffies,
2067 		.strategy	=	&sysctl_jiffies,
2068 	},
2069 	{
2070 		.ctl_name	=	NET_IPV6_ROUTE_MTU_EXPIRES,
2071 		.procname	=	"mtu_expires",
2072          	.data		=	&ip6_rt_mtu_expires,
2073 		.maxlen		=	sizeof(int),
2074 		.mode		=	0644,
2075          	.proc_handler	=	&proc_dointvec_jiffies,
2076 		.strategy	=	&sysctl_jiffies,
2077 	},
2078 	{
2079 		.ctl_name	=	NET_IPV6_ROUTE_MIN_ADVMSS,
2080 		.procname	=	"min_adv_mss",
2081          	.data		=	&ip6_rt_min_advmss,
2082 		.maxlen		=	sizeof(int),
2083 		.mode		=	0644,
2084          	.proc_handler	=	&proc_dointvec_jiffies,
2085 		.strategy	=	&sysctl_jiffies,
2086 	},
2087 	{
2088 		.ctl_name	=	NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2089 		.procname	=	"gc_min_interval_ms",
2090          	.data		=	&ip6_rt_gc_min_interval,
2091 		.maxlen		=	sizeof(int),
2092 		.mode		=	0644,
2093          	.proc_handler	=	&proc_dointvec_ms_jiffies,
2094 		.strategy	=	&sysctl_ms_jiffies,
2095 	},
2096 	{ .ctl_name = 0 }
2097 };
2098 
2099 #endif
2100 
2101 void __init ip6_route_init(void)
2102 {
2103 	struct proc_dir_entry *p;
2104 
2105 	ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2106 						     sizeof(struct rt6_info),
2107 						     0, SLAB_HWCACHE_ALIGN,
2108 						     NULL, NULL);
2109 	if (!ip6_dst_ops.kmem_cachep)
2110 		panic("cannot create ip6_dst_cache");
2111 
2112 	fib6_init();
2113 #ifdef 	CONFIG_PROC_FS
2114 	p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2115 	if (p)
2116 		p->owner = THIS_MODULE;
2117 
2118 	proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2119 #endif
2120 #ifdef CONFIG_XFRM
2121 	xfrm6_init();
2122 #endif
2123 }
2124 
2125 void ip6_route_cleanup(void)
2126 {
2127 #ifdef CONFIG_PROC_FS
2128 	proc_net_remove("ipv6_route");
2129 	proc_net_remove("rt6_stats");
2130 #endif
2131 #ifdef CONFIG_XFRM
2132 	xfrm6_fini();
2133 #endif
2134 	rt6_ifdown(NULL);
2135 	fib6_gc_cleanup();
2136 	kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2137 }
2138