xref: /openbmc/linux/net/ipv6/route.c (revision b68e31d0)
1 /*
2  *	Linux INET6 implementation
3  *	FIB front-end.
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	$Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9  *
10  *	This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  */
15 
16 /*	Changes:
17  *
18  *	YOSHIFUJI Hideaki @USAGI
19  *		reworked default router selection.
20  *		- respect outgoing interface
21  *		- select from (probably) reachable routers (i.e.
22  *		routers in REACHABLE, STALE, DELAY or PROBE states).
23  *		- always select the same router if it is (probably)
24  *		reachable.  otherwise, round-robin the list.
25  *	Ville Nuorvala
26  *		Fixed routing subtrees.
27  */
28 
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/types.h>
32 #include <linux/times.h>
33 #include <linux/socket.h>
34 #include <linux/sockios.h>
35 #include <linux/net.h>
36 #include <linux/route.h>
37 #include <linux/netdevice.h>
38 #include <linux/in6.h>
39 #include <linux/init.h>
40 #include <linux/if_arp.h>
41 
42 #ifdef 	CONFIG_PROC_FS
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #endif
46 
47 #include <net/snmp.h>
48 #include <net/ipv6.h>
49 #include <net/ip6_fib.h>
50 #include <net/ip6_route.h>
51 #include <net/ndisc.h>
52 #include <net/addrconf.h>
53 #include <net/tcp.h>
54 #include <linux/rtnetlink.h>
55 #include <net/dst.h>
56 #include <net/xfrm.h>
57 #include <net/netevent.h>
58 #include <net/netlink.h>
59 
60 #include <asm/uaccess.h>
61 
62 #ifdef CONFIG_SYSCTL
63 #include <linux/sysctl.h>
64 #endif
65 
66 /* Set to 3 to get tracing. */
67 #define RT6_DEBUG 2
68 
69 #if RT6_DEBUG >= 3
70 #define RDBG(x) printk x
71 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
72 #else
73 #define RDBG(x)
74 #define RT6_TRACE(x...) do { ; } while (0)
75 #endif
76 
77 #define CLONE_OFFLINK_ROUTE 0
78 
79 static int ip6_rt_max_size = 4096;
80 static int ip6_rt_gc_min_interval = HZ / 2;
81 static int ip6_rt_gc_timeout = 60*HZ;
82 int ip6_rt_gc_interval = 30*HZ;
83 static int ip6_rt_gc_elasticity = 9;
84 static int ip6_rt_mtu_expires = 10*60*HZ;
85 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
86 
87 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
88 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
89 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
90 static void		ip6_dst_destroy(struct dst_entry *);
91 static void		ip6_dst_ifdown(struct dst_entry *,
92 				       struct net_device *dev, int how);
93 static int		 ip6_dst_gc(void);
94 
95 static int		ip6_pkt_discard(struct sk_buff *skb);
96 static int		ip6_pkt_discard_out(struct sk_buff *skb);
97 static void		ip6_link_failure(struct sk_buff *skb);
98 static void		ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
99 
100 #ifdef CONFIG_IPV6_ROUTE_INFO
101 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
102 					   struct in6_addr *gwaddr, int ifindex,
103 					   unsigned pref);
104 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
105 					   struct in6_addr *gwaddr, int ifindex);
106 #endif
107 
108 static struct dst_ops ip6_dst_ops = {
109 	.family			=	AF_INET6,
110 	.protocol		=	__constant_htons(ETH_P_IPV6),
111 	.gc			=	ip6_dst_gc,
112 	.gc_thresh		=	1024,
113 	.check			=	ip6_dst_check,
114 	.destroy		=	ip6_dst_destroy,
115 	.ifdown			=	ip6_dst_ifdown,
116 	.negative_advice	=	ip6_negative_advice,
117 	.link_failure		=	ip6_link_failure,
118 	.update_pmtu		=	ip6_rt_update_pmtu,
119 	.entry_size		=	sizeof(struct rt6_info),
120 };
121 
122 struct rt6_info ip6_null_entry = {
123 	.u = {
124 		.dst = {
125 			.__refcnt	= ATOMIC_INIT(1),
126 			.__use		= 1,
127 			.dev		= &loopback_dev,
128 			.obsolete	= -1,
129 			.error		= -ENETUNREACH,
130 			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
131 			.input		= ip6_pkt_discard,
132 			.output		= ip6_pkt_discard_out,
133 			.ops		= &ip6_dst_ops,
134 			.path		= (struct dst_entry*)&ip6_null_entry,
135 		}
136 	},
137 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
138 	.rt6i_metric	= ~(u32) 0,
139 	.rt6i_ref	= ATOMIC_INIT(1),
140 };
141 
142 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
143 
144 struct rt6_info ip6_prohibit_entry = {
145 	.u = {
146 		.dst = {
147 			.__refcnt	= ATOMIC_INIT(1),
148 			.__use		= 1,
149 			.dev		= &loopback_dev,
150 			.obsolete	= -1,
151 			.error		= -EACCES,
152 			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
153 			.input		= ip6_pkt_discard,
154 			.output		= ip6_pkt_discard_out,
155 			.ops		= &ip6_dst_ops,
156 			.path		= (struct dst_entry*)&ip6_prohibit_entry,
157 		}
158 	},
159 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
160 	.rt6i_metric	= ~(u32) 0,
161 	.rt6i_ref	= ATOMIC_INIT(1),
162 };
163 
164 struct rt6_info ip6_blk_hole_entry = {
165 	.u = {
166 		.dst = {
167 			.__refcnt	= ATOMIC_INIT(1),
168 			.__use		= 1,
169 			.dev		= &loopback_dev,
170 			.obsolete	= -1,
171 			.error		= -EINVAL,
172 			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
173 			.input		= ip6_pkt_discard,
174 			.output		= ip6_pkt_discard_out,
175 			.ops		= &ip6_dst_ops,
176 			.path		= (struct dst_entry*)&ip6_blk_hole_entry,
177 		}
178 	},
179 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
180 	.rt6i_metric	= ~(u32) 0,
181 	.rt6i_ref	= ATOMIC_INIT(1),
182 };
183 
184 #endif
185 
186 /* allocate dst with ip6_dst_ops */
187 static __inline__ struct rt6_info *ip6_dst_alloc(void)
188 {
189 	return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
190 }
191 
192 static void ip6_dst_destroy(struct dst_entry *dst)
193 {
194 	struct rt6_info *rt = (struct rt6_info *)dst;
195 	struct inet6_dev *idev = rt->rt6i_idev;
196 
197 	if (idev != NULL) {
198 		rt->rt6i_idev = NULL;
199 		in6_dev_put(idev);
200 	}
201 }
202 
203 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
204 			   int how)
205 {
206 	struct rt6_info *rt = (struct rt6_info *)dst;
207 	struct inet6_dev *idev = rt->rt6i_idev;
208 
209 	if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
210 		struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
211 		if (loopback_idev != NULL) {
212 			rt->rt6i_idev = loopback_idev;
213 			in6_dev_put(idev);
214 		}
215 	}
216 }
217 
218 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
219 {
220 	return (rt->rt6i_flags & RTF_EXPIRES &&
221 		time_after(jiffies, rt->rt6i_expires));
222 }
223 
224 static inline int rt6_need_strict(struct in6_addr *daddr)
225 {
226 	return (ipv6_addr_type(daddr) &
227 		(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
228 }
229 
230 /*
231  *	Route lookup. Any table->tb6_lock is implied.
232  */
233 
234 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
235 						    int oif,
236 						    int strict)
237 {
238 	struct rt6_info *local = NULL;
239 	struct rt6_info *sprt;
240 
241 	if (oif) {
242 		for (sprt = rt; sprt; sprt = sprt->u.next) {
243 			struct net_device *dev = sprt->rt6i_dev;
244 			if (dev->ifindex == oif)
245 				return sprt;
246 			if (dev->flags & IFF_LOOPBACK) {
247 				if (sprt->rt6i_idev == NULL ||
248 				    sprt->rt6i_idev->dev->ifindex != oif) {
249 					if (strict && oif)
250 						continue;
251 					if (local && (!oif ||
252 						      local->rt6i_idev->dev->ifindex == oif))
253 						continue;
254 				}
255 				local = sprt;
256 			}
257 		}
258 
259 		if (local)
260 			return local;
261 
262 		if (strict)
263 			return &ip6_null_entry;
264 	}
265 	return rt;
266 }
267 
268 #ifdef CONFIG_IPV6_ROUTER_PREF
269 static void rt6_probe(struct rt6_info *rt)
270 {
271 	struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
272 	/*
273 	 * Okay, this does not seem to be appropriate
274 	 * for now, however, we need to check if it
275 	 * is really so; aka Router Reachability Probing.
276 	 *
277 	 * Router Reachability Probe MUST be rate-limited
278 	 * to no more than one per minute.
279 	 */
280 	if (!neigh || (neigh->nud_state & NUD_VALID))
281 		return;
282 	read_lock_bh(&neigh->lock);
283 	if (!(neigh->nud_state & NUD_VALID) &&
284 	    time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
285 		struct in6_addr mcaddr;
286 		struct in6_addr *target;
287 
288 		neigh->updated = jiffies;
289 		read_unlock_bh(&neigh->lock);
290 
291 		target = (struct in6_addr *)&neigh->primary_key;
292 		addrconf_addr_solict_mult(target, &mcaddr);
293 		ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
294 	} else
295 		read_unlock_bh(&neigh->lock);
296 }
297 #else
298 static inline void rt6_probe(struct rt6_info *rt)
299 {
300 	return;
301 }
302 #endif
303 
304 /*
305  * Default Router Selection (RFC 2461 6.3.6)
306  */
307 static int inline rt6_check_dev(struct rt6_info *rt, int oif)
308 {
309 	struct net_device *dev = rt->rt6i_dev;
310 	if (!oif || dev->ifindex == oif)
311 		return 2;
312 	if ((dev->flags & IFF_LOOPBACK) &&
313 	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
314 		return 1;
315 	return 0;
316 }
317 
318 static int inline rt6_check_neigh(struct rt6_info *rt)
319 {
320 	struct neighbour *neigh = rt->rt6i_nexthop;
321 	int m = 0;
322 	if (rt->rt6i_flags & RTF_NONEXTHOP ||
323 	    !(rt->rt6i_flags & RTF_GATEWAY))
324 		m = 1;
325 	else if (neigh) {
326 		read_lock_bh(&neigh->lock);
327 		if (neigh->nud_state & NUD_VALID)
328 			m = 2;
329 		read_unlock_bh(&neigh->lock);
330 	}
331 	return m;
332 }
333 
334 static int rt6_score_route(struct rt6_info *rt, int oif,
335 			   int strict)
336 {
337 	int m, n;
338 
339 	m = rt6_check_dev(rt, oif);
340 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
341 		return -1;
342 #ifdef CONFIG_IPV6_ROUTER_PREF
343 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
344 #endif
345 	n = rt6_check_neigh(rt);
346 	if (n > 1)
347 		m |= 16;
348 	else if (!n && strict & RT6_LOOKUP_F_REACHABLE)
349 		return -1;
350 	return m;
351 }
352 
353 static struct rt6_info *rt6_select(struct rt6_info **head, int oif,
354 				   int strict)
355 {
356 	struct rt6_info *match = NULL, *last = NULL;
357 	struct rt6_info *rt, *rt0 = *head;
358 	u32 metric;
359 	int mpri = -1;
360 
361 	RT6_TRACE("%s(head=%p(*head=%p), oif=%d)\n",
362 		  __FUNCTION__, head, head ? *head : NULL, oif);
363 
364 	for (rt = rt0, metric = rt0->rt6i_metric;
365 	     rt && rt->rt6i_metric == metric && (!last || rt != rt0);
366 	     rt = rt->u.next) {
367 		int m;
368 
369 		if (rt6_check_expired(rt))
370 			continue;
371 
372 		last = rt;
373 
374 		m = rt6_score_route(rt, oif, strict);
375 		if (m < 0)
376 			continue;
377 
378 		if (m > mpri) {
379 			rt6_probe(match);
380 			match = rt;
381 			mpri = m;
382 		} else {
383 			rt6_probe(rt);
384 		}
385 	}
386 
387 	if (!match &&
388 	    (strict & RT6_LOOKUP_F_REACHABLE) &&
389 	    last && last != rt0) {
390 		/* no entries matched; do round-robin */
391 		static DEFINE_SPINLOCK(lock);
392 		spin_lock(&lock);
393 		*head = rt0->u.next;
394 		rt0->u.next = last->u.next;
395 		last->u.next = rt0;
396 		spin_unlock(&lock);
397 	}
398 
399 	RT6_TRACE("%s() => %p, score=%d\n",
400 		  __FUNCTION__, match, mpri);
401 
402 	return (match ? match : &ip6_null_entry);
403 }
404 
405 #ifdef CONFIG_IPV6_ROUTE_INFO
406 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
407 		  struct in6_addr *gwaddr)
408 {
409 	struct route_info *rinfo = (struct route_info *) opt;
410 	struct in6_addr prefix_buf, *prefix;
411 	unsigned int pref;
412 	u32 lifetime;
413 	struct rt6_info *rt;
414 
415 	if (len < sizeof(struct route_info)) {
416 		return -EINVAL;
417 	}
418 
419 	/* Sanity check for prefix_len and length */
420 	if (rinfo->length > 3) {
421 		return -EINVAL;
422 	} else if (rinfo->prefix_len > 128) {
423 		return -EINVAL;
424 	} else if (rinfo->prefix_len > 64) {
425 		if (rinfo->length < 2) {
426 			return -EINVAL;
427 		}
428 	} else if (rinfo->prefix_len > 0) {
429 		if (rinfo->length < 1) {
430 			return -EINVAL;
431 		}
432 	}
433 
434 	pref = rinfo->route_pref;
435 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
436 		pref = ICMPV6_ROUTER_PREF_MEDIUM;
437 
438 	lifetime = htonl(rinfo->lifetime);
439 	if (lifetime == 0xffffffff) {
440 		/* infinity */
441 	} else if (lifetime > 0x7fffffff/HZ) {
442 		/* Avoid arithmetic overflow */
443 		lifetime = 0x7fffffff/HZ - 1;
444 	}
445 
446 	if (rinfo->length == 3)
447 		prefix = (struct in6_addr *)rinfo->prefix;
448 	else {
449 		/* this function is safe */
450 		ipv6_addr_prefix(&prefix_buf,
451 				 (struct in6_addr *)rinfo->prefix,
452 				 rinfo->prefix_len);
453 		prefix = &prefix_buf;
454 	}
455 
456 	rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
457 
458 	if (rt && !lifetime) {
459 		ip6_del_rt(rt);
460 		rt = NULL;
461 	}
462 
463 	if (!rt && lifetime)
464 		rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
465 					pref);
466 	else if (rt)
467 		rt->rt6i_flags = RTF_ROUTEINFO |
468 				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
469 
470 	if (rt) {
471 		if (lifetime == 0xffffffff) {
472 			rt->rt6i_flags &= ~RTF_EXPIRES;
473 		} else {
474 			rt->rt6i_expires = jiffies + HZ * lifetime;
475 			rt->rt6i_flags |= RTF_EXPIRES;
476 		}
477 		dst_release(&rt->u.dst);
478 	}
479 	return 0;
480 }
481 #endif
482 
483 #define BACKTRACK(saddr) \
484 do { \
485 	if (rt == &ip6_null_entry) { \
486 		struct fib6_node *pn; \
487 		while (fn) { \
488 			if (fn->fn_flags & RTN_TL_ROOT) \
489 				goto out; \
490 			pn = fn->parent; \
491 			if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
492 				fn = fib6_lookup(pn->subtree, NULL, saddr); \
493 			else \
494 				fn = pn; \
495 			if (fn->fn_flags & RTN_RTINFO) \
496 				goto restart; \
497 		} \
498 	} \
499 } while(0)
500 
501 static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
502 					     struct flowi *fl, int flags)
503 {
504 	struct fib6_node *fn;
505 	struct rt6_info *rt;
506 
507 	read_lock_bh(&table->tb6_lock);
508 	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
509 restart:
510 	rt = fn->leaf;
511 	rt = rt6_device_match(rt, fl->oif, flags);
512 	BACKTRACK(&fl->fl6_src);
513 out:
514 	dst_hold(&rt->u.dst);
515 	read_unlock_bh(&table->tb6_lock);
516 
517 	rt->u.dst.lastuse = jiffies;
518 	rt->u.dst.__use++;
519 
520 	return rt;
521 
522 }
523 
524 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
525 			    int oif, int strict)
526 {
527 	struct flowi fl = {
528 		.oif = oif,
529 		.nl_u = {
530 			.ip6_u = {
531 				.daddr = *daddr,
532 				/* TODO: saddr */
533 			},
534 		},
535 	};
536 	struct dst_entry *dst;
537 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
538 
539 	dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup);
540 	if (dst->error == 0)
541 		return (struct rt6_info *) dst;
542 
543 	dst_release(dst);
544 
545 	return NULL;
546 }
547 
548 /* ip6_ins_rt is called with FREE table->tb6_lock.
549    It takes new route entry, the addition fails by any reason the
550    route is freed. In any case, if caller does not hold it, it may
551    be destroyed.
552  */
553 
554 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
555 {
556 	int err;
557 	struct fib6_table *table;
558 
559 	table = rt->rt6i_table;
560 	write_lock_bh(&table->tb6_lock);
561 	err = fib6_add(&table->tb6_root, rt, info);
562 	write_unlock_bh(&table->tb6_lock);
563 
564 	return err;
565 }
566 
567 int ip6_ins_rt(struct rt6_info *rt)
568 {
569 	return __ip6_ins_rt(rt, NULL);
570 }
571 
572 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
573 				      struct in6_addr *saddr)
574 {
575 	struct rt6_info *rt;
576 
577 	/*
578 	 *	Clone the route.
579 	 */
580 
581 	rt = ip6_rt_copy(ort);
582 
583 	if (rt) {
584 		if (!(rt->rt6i_flags&RTF_GATEWAY)) {
585 			if (rt->rt6i_dst.plen != 128 &&
586 			    ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
587 				rt->rt6i_flags |= RTF_ANYCAST;
588 			ipv6_addr_copy(&rt->rt6i_gateway, daddr);
589 		}
590 
591 		ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
592 		rt->rt6i_dst.plen = 128;
593 		rt->rt6i_flags |= RTF_CACHE;
594 		rt->u.dst.flags |= DST_HOST;
595 
596 #ifdef CONFIG_IPV6_SUBTREES
597 		if (rt->rt6i_src.plen && saddr) {
598 			ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
599 			rt->rt6i_src.plen = 128;
600 		}
601 #endif
602 
603 		rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
604 
605 	}
606 
607 	return rt;
608 }
609 
610 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
611 {
612 	struct rt6_info *rt = ip6_rt_copy(ort);
613 	if (rt) {
614 		ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
615 		rt->rt6i_dst.plen = 128;
616 		rt->rt6i_flags |= RTF_CACHE;
617 		if (rt->rt6i_flags & RTF_REJECT)
618 			rt->u.dst.error = ort->u.dst.error;
619 		rt->u.dst.flags |= DST_HOST;
620 		rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
621 	}
622 	return rt;
623 }
624 
625 static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
626 					    struct flowi *fl, int flags)
627 {
628 	struct fib6_node *fn;
629 	struct rt6_info *rt, *nrt;
630 	int strict = 0;
631 	int attempts = 3;
632 	int err;
633 	int reachable = RT6_LOOKUP_F_REACHABLE;
634 
635 	strict |= flags & RT6_LOOKUP_F_IFACE;
636 
637 relookup:
638 	read_lock_bh(&table->tb6_lock);
639 
640 restart_2:
641 	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
642 
643 restart:
644 	rt = rt6_select(&fn->leaf, fl->iif, strict | reachable);
645 	BACKTRACK(&fl->fl6_src);
646 	if (rt == &ip6_null_entry ||
647 	    rt->rt6i_flags & RTF_CACHE)
648 		goto out;
649 
650 	dst_hold(&rt->u.dst);
651 	read_unlock_bh(&table->tb6_lock);
652 
653 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
654 		nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
655 	else {
656 #if CLONE_OFFLINK_ROUTE
657 		nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
658 #else
659 		goto out2;
660 #endif
661 	}
662 
663 	dst_release(&rt->u.dst);
664 	rt = nrt ? : &ip6_null_entry;
665 
666 	dst_hold(&rt->u.dst);
667 	if (nrt) {
668 		err = ip6_ins_rt(nrt);
669 		if (!err)
670 			goto out2;
671 	}
672 
673 	if (--attempts <= 0)
674 		goto out2;
675 
676 	/*
677 	 * Race condition! In the gap, when table->tb6_lock was
678 	 * released someone could insert this route.  Relookup.
679 	 */
680 	dst_release(&rt->u.dst);
681 	goto relookup;
682 
683 out:
684 	if (reachable) {
685 		reachable = 0;
686 		goto restart_2;
687 	}
688 	dst_hold(&rt->u.dst);
689 	read_unlock_bh(&table->tb6_lock);
690 out2:
691 	rt->u.dst.lastuse = jiffies;
692 	rt->u.dst.__use++;
693 
694 	return rt;
695 }
696 
697 void ip6_route_input(struct sk_buff *skb)
698 {
699 	struct ipv6hdr *iph = skb->nh.ipv6h;
700 	struct flowi fl = {
701 		.iif = skb->dev->ifindex,
702 		.nl_u = {
703 			.ip6_u = {
704 				.daddr = iph->daddr,
705 				.saddr = iph->saddr,
706 #ifdef CONFIG_IPV6_ROUTE_FWMARK
707 				.fwmark = skb->nfmark,
708 #endif
709 				.flowlabel = (* (u32 *) iph)&IPV6_FLOWINFO_MASK,
710 			},
711 		},
712 		.proto = iph->nexthdr,
713 	};
714 	int flags = rt6_need_strict(&iph->daddr) ? RT6_LOOKUP_F_IFACE : 0;
715 
716 	skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input);
717 }
718 
719 static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
720 					     struct flowi *fl, int flags)
721 {
722 	struct fib6_node *fn;
723 	struct rt6_info *rt, *nrt;
724 	int strict = 0;
725 	int attempts = 3;
726 	int err;
727 	int reachable = RT6_LOOKUP_F_REACHABLE;
728 
729 	strict |= flags & RT6_LOOKUP_F_IFACE;
730 
731 relookup:
732 	read_lock_bh(&table->tb6_lock);
733 
734 restart_2:
735 	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
736 
737 restart:
738 	rt = rt6_select(&fn->leaf, fl->oif, strict | reachable);
739 	BACKTRACK(&fl->fl6_src);
740 	if (rt == &ip6_null_entry ||
741 	    rt->rt6i_flags & RTF_CACHE)
742 		goto out;
743 
744 	dst_hold(&rt->u.dst);
745 	read_unlock_bh(&table->tb6_lock);
746 
747 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
748 		nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
749 	else {
750 #if CLONE_OFFLINK_ROUTE
751 		nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
752 #else
753 		goto out2;
754 #endif
755 	}
756 
757 	dst_release(&rt->u.dst);
758 	rt = nrt ? : &ip6_null_entry;
759 
760 	dst_hold(&rt->u.dst);
761 	if (nrt) {
762 		err = ip6_ins_rt(nrt);
763 		if (!err)
764 			goto out2;
765 	}
766 
767 	if (--attempts <= 0)
768 		goto out2;
769 
770 	/*
771 	 * Race condition! In the gap, when table->tb6_lock was
772 	 * released someone could insert this route.  Relookup.
773 	 */
774 	dst_release(&rt->u.dst);
775 	goto relookup;
776 
777 out:
778 	if (reachable) {
779 		reachable = 0;
780 		goto restart_2;
781 	}
782 	dst_hold(&rt->u.dst);
783 	read_unlock_bh(&table->tb6_lock);
784 out2:
785 	rt->u.dst.lastuse = jiffies;
786 	rt->u.dst.__use++;
787 	return rt;
788 }
789 
790 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
791 {
792 	int flags = 0;
793 
794 	if (rt6_need_strict(&fl->fl6_dst))
795 		flags |= RT6_LOOKUP_F_IFACE;
796 
797 	return fib6_rule_lookup(fl, flags, ip6_pol_route_output);
798 }
799 
800 
801 /*
802  *	Destination cache support functions
803  */
804 
805 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
806 {
807 	struct rt6_info *rt;
808 
809 	rt = (struct rt6_info *) dst;
810 
811 	if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
812 		return dst;
813 
814 	return NULL;
815 }
816 
817 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
818 {
819 	struct rt6_info *rt = (struct rt6_info *) dst;
820 
821 	if (rt) {
822 		if (rt->rt6i_flags & RTF_CACHE)
823 			ip6_del_rt(rt);
824 		else
825 			dst_release(dst);
826 	}
827 	return NULL;
828 }
829 
830 static void ip6_link_failure(struct sk_buff *skb)
831 {
832 	struct rt6_info *rt;
833 
834 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
835 
836 	rt = (struct rt6_info *) skb->dst;
837 	if (rt) {
838 		if (rt->rt6i_flags&RTF_CACHE) {
839 			dst_set_expires(&rt->u.dst, 0);
840 			rt->rt6i_flags |= RTF_EXPIRES;
841 		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
842 			rt->rt6i_node->fn_sernum = -1;
843 	}
844 }
845 
846 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
847 {
848 	struct rt6_info *rt6 = (struct rt6_info*)dst;
849 
850 	if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
851 		rt6->rt6i_flags |= RTF_MODIFIED;
852 		if (mtu < IPV6_MIN_MTU) {
853 			mtu = IPV6_MIN_MTU;
854 			dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
855 		}
856 		dst->metrics[RTAX_MTU-1] = mtu;
857 		call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
858 	}
859 }
860 
861 static int ipv6_get_mtu(struct net_device *dev);
862 
863 static inline unsigned int ipv6_advmss(unsigned int mtu)
864 {
865 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
866 
867 	if (mtu < ip6_rt_min_advmss)
868 		mtu = ip6_rt_min_advmss;
869 
870 	/*
871 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
872 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
873 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
874 	 * rely only on pmtu discovery"
875 	 */
876 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
877 		mtu = IPV6_MAXPLEN;
878 	return mtu;
879 }
880 
881 static struct dst_entry *ndisc_dst_gc_list;
882 static DEFINE_SPINLOCK(ndisc_lock);
883 
884 struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
885 				  struct neighbour *neigh,
886 				  struct in6_addr *addr,
887 				  int (*output)(struct sk_buff *))
888 {
889 	struct rt6_info *rt;
890 	struct inet6_dev *idev = in6_dev_get(dev);
891 
892 	if (unlikely(idev == NULL))
893 		return NULL;
894 
895 	rt = ip6_dst_alloc();
896 	if (unlikely(rt == NULL)) {
897 		in6_dev_put(idev);
898 		goto out;
899 	}
900 
901 	dev_hold(dev);
902 	if (neigh)
903 		neigh_hold(neigh);
904 	else
905 		neigh = ndisc_get_neigh(dev, addr);
906 
907 	rt->rt6i_dev	  = dev;
908 	rt->rt6i_idev     = idev;
909 	rt->rt6i_nexthop  = neigh;
910 	atomic_set(&rt->u.dst.__refcnt, 1);
911 	rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
912 	rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
913 	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
914 	rt->u.dst.output  = output;
915 
916 #if 0	/* there's no chance to use these for ndisc */
917 	rt->u.dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
918 				? DST_HOST
919 				: 0;
920 	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
921 	rt->rt6i_dst.plen = 128;
922 #endif
923 
924 	spin_lock_bh(&ndisc_lock);
925 	rt->u.dst.next = ndisc_dst_gc_list;
926 	ndisc_dst_gc_list = &rt->u.dst;
927 	spin_unlock_bh(&ndisc_lock);
928 
929 	fib6_force_start_gc();
930 
931 out:
932 	return (struct dst_entry *)rt;
933 }
934 
935 int ndisc_dst_gc(int *more)
936 {
937 	struct dst_entry *dst, *next, **pprev;
938 	int freed;
939 
940 	next = NULL;
941  	freed = 0;
942 
943 	spin_lock_bh(&ndisc_lock);
944 	pprev = &ndisc_dst_gc_list;
945 
946 	while ((dst = *pprev) != NULL) {
947 		if (!atomic_read(&dst->__refcnt)) {
948 			*pprev = dst->next;
949 			dst_free(dst);
950 			freed++;
951 		} else {
952 			pprev = &dst->next;
953 			(*more)++;
954 		}
955 	}
956 
957 	spin_unlock_bh(&ndisc_lock);
958 
959 	return freed;
960 }
961 
962 static int ip6_dst_gc(void)
963 {
964 	static unsigned expire = 30*HZ;
965 	static unsigned long last_gc;
966 	unsigned long now = jiffies;
967 
968 	if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
969 	    atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
970 		goto out;
971 
972 	expire++;
973 	fib6_run_gc(expire);
974 	last_gc = now;
975 	if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
976 		expire = ip6_rt_gc_timeout>>1;
977 
978 out:
979 	expire -= expire>>ip6_rt_gc_elasticity;
980 	return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
981 }
982 
983 /* Clean host part of a prefix. Not necessary in radix tree,
984    but results in cleaner routing tables.
985 
986    Remove it only when all the things will work!
987  */
988 
989 static int ipv6_get_mtu(struct net_device *dev)
990 {
991 	int mtu = IPV6_MIN_MTU;
992 	struct inet6_dev *idev;
993 
994 	idev = in6_dev_get(dev);
995 	if (idev) {
996 		mtu = idev->cnf.mtu6;
997 		in6_dev_put(idev);
998 	}
999 	return mtu;
1000 }
1001 
1002 int ipv6_get_hoplimit(struct net_device *dev)
1003 {
1004 	int hoplimit = ipv6_devconf.hop_limit;
1005 	struct inet6_dev *idev;
1006 
1007 	idev = in6_dev_get(dev);
1008 	if (idev) {
1009 		hoplimit = idev->cnf.hop_limit;
1010 		in6_dev_put(idev);
1011 	}
1012 	return hoplimit;
1013 }
1014 
1015 /*
1016  *
1017  */
1018 
1019 int ip6_route_add(struct fib6_config *cfg)
1020 {
1021 	int err;
1022 	struct rt6_info *rt = NULL;
1023 	struct net_device *dev = NULL;
1024 	struct inet6_dev *idev = NULL;
1025 	struct fib6_table *table;
1026 	int addr_type;
1027 
1028 	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1029 		return -EINVAL;
1030 #ifndef CONFIG_IPV6_SUBTREES
1031 	if (cfg->fc_src_len)
1032 		return -EINVAL;
1033 #endif
1034 	if (cfg->fc_ifindex) {
1035 		err = -ENODEV;
1036 		dev = dev_get_by_index(cfg->fc_ifindex);
1037 		if (!dev)
1038 			goto out;
1039 		idev = in6_dev_get(dev);
1040 		if (!idev)
1041 			goto out;
1042 	}
1043 
1044 	if (cfg->fc_metric == 0)
1045 		cfg->fc_metric = IP6_RT_PRIO_USER;
1046 
1047 	table = fib6_new_table(cfg->fc_table);
1048 	if (table == NULL) {
1049 		err = -ENOBUFS;
1050 		goto out;
1051 	}
1052 
1053 	rt = ip6_dst_alloc();
1054 
1055 	if (rt == NULL) {
1056 		err = -ENOMEM;
1057 		goto out;
1058 	}
1059 
1060 	rt->u.dst.obsolete = -1;
1061 	rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires);
1062 
1063 	if (cfg->fc_protocol == RTPROT_UNSPEC)
1064 		cfg->fc_protocol = RTPROT_BOOT;
1065 	rt->rt6i_protocol = cfg->fc_protocol;
1066 
1067 	addr_type = ipv6_addr_type(&cfg->fc_dst);
1068 
1069 	if (addr_type & IPV6_ADDR_MULTICAST)
1070 		rt->u.dst.input = ip6_mc_input;
1071 	else
1072 		rt->u.dst.input = ip6_forward;
1073 
1074 	rt->u.dst.output = ip6_output;
1075 
1076 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1077 	rt->rt6i_dst.plen = cfg->fc_dst_len;
1078 	if (rt->rt6i_dst.plen == 128)
1079 	       rt->u.dst.flags = DST_HOST;
1080 
1081 #ifdef CONFIG_IPV6_SUBTREES
1082 	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1083 	rt->rt6i_src.plen = cfg->fc_src_len;
1084 #endif
1085 
1086 	rt->rt6i_metric = cfg->fc_metric;
1087 
1088 	/* We cannot add true routes via loopback here,
1089 	   they would result in kernel looping; promote them to reject routes
1090 	 */
1091 	if ((cfg->fc_flags & RTF_REJECT) ||
1092 	    (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1093 		/* hold loopback dev/idev if we haven't done so. */
1094 		if (dev != &loopback_dev) {
1095 			if (dev) {
1096 				dev_put(dev);
1097 				in6_dev_put(idev);
1098 			}
1099 			dev = &loopback_dev;
1100 			dev_hold(dev);
1101 			idev = in6_dev_get(dev);
1102 			if (!idev) {
1103 				err = -ENODEV;
1104 				goto out;
1105 			}
1106 		}
1107 		rt->u.dst.output = ip6_pkt_discard_out;
1108 		rt->u.dst.input = ip6_pkt_discard;
1109 		rt->u.dst.error = -ENETUNREACH;
1110 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1111 		goto install_route;
1112 	}
1113 
1114 	if (cfg->fc_flags & RTF_GATEWAY) {
1115 		struct in6_addr *gw_addr;
1116 		int gwa_type;
1117 
1118 		gw_addr = &cfg->fc_gateway;
1119 		ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1120 		gwa_type = ipv6_addr_type(gw_addr);
1121 
1122 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1123 			struct rt6_info *grt;
1124 
1125 			/* IPv6 strictly inhibits using not link-local
1126 			   addresses as nexthop address.
1127 			   Otherwise, router will not able to send redirects.
1128 			   It is very good, but in some (rare!) circumstances
1129 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
1130 			   some exceptions. --ANK
1131 			 */
1132 			err = -EINVAL;
1133 			if (!(gwa_type&IPV6_ADDR_UNICAST))
1134 				goto out;
1135 
1136 			grt = rt6_lookup(gw_addr, NULL, cfg->fc_ifindex, 1);
1137 
1138 			err = -EHOSTUNREACH;
1139 			if (grt == NULL)
1140 				goto out;
1141 			if (dev) {
1142 				if (dev != grt->rt6i_dev) {
1143 					dst_release(&grt->u.dst);
1144 					goto out;
1145 				}
1146 			} else {
1147 				dev = grt->rt6i_dev;
1148 				idev = grt->rt6i_idev;
1149 				dev_hold(dev);
1150 				in6_dev_hold(grt->rt6i_idev);
1151 			}
1152 			if (!(grt->rt6i_flags&RTF_GATEWAY))
1153 				err = 0;
1154 			dst_release(&grt->u.dst);
1155 
1156 			if (err)
1157 				goto out;
1158 		}
1159 		err = -EINVAL;
1160 		if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1161 			goto out;
1162 	}
1163 
1164 	err = -ENODEV;
1165 	if (dev == NULL)
1166 		goto out;
1167 
1168 	if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1169 		rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1170 		if (IS_ERR(rt->rt6i_nexthop)) {
1171 			err = PTR_ERR(rt->rt6i_nexthop);
1172 			rt->rt6i_nexthop = NULL;
1173 			goto out;
1174 		}
1175 	}
1176 
1177 	rt->rt6i_flags = cfg->fc_flags;
1178 
1179 install_route:
1180 	if (cfg->fc_mx) {
1181 		struct nlattr *nla;
1182 		int remaining;
1183 
1184 		nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1185 			int type = nla->nla_type;
1186 
1187 			if (type) {
1188 				if (type > RTAX_MAX) {
1189 					err = -EINVAL;
1190 					goto out;
1191 				}
1192 
1193 				rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
1194 			}
1195 		}
1196 	}
1197 
1198 	if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1199 		rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1200 	if (!rt->u.dst.metrics[RTAX_MTU-1])
1201 		rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1202 	if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1203 		rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1204 	rt->u.dst.dev = dev;
1205 	rt->rt6i_idev = idev;
1206 	rt->rt6i_table = table;
1207 	return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1208 
1209 out:
1210 	if (dev)
1211 		dev_put(dev);
1212 	if (idev)
1213 		in6_dev_put(idev);
1214 	if (rt)
1215 		dst_free((struct dst_entry *) rt);
1216 	return err;
1217 }
1218 
1219 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1220 {
1221 	int err;
1222 	struct fib6_table *table;
1223 
1224 	if (rt == &ip6_null_entry)
1225 		return -ENOENT;
1226 
1227 	table = rt->rt6i_table;
1228 	write_lock_bh(&table->tb6_lock);
1229 
1230 	err = fib6_del(rt, info);
1231 	dst_release(&rt->u.dst);
1232 
1233 	write_unlock_bh(&table->tb6_lock);
1234 
1235 	return err;
1236 }
1237 
1238 int ip6_del_rt(struct rt6_info *rt)
1239 {
1240 	return __ip6_del_rt(rt, NULL);
1241 }
1242 
1243 static int ip6_route_del(struct fib6_config *cfg)
1244 {
1245 	struct fib6_table *table;
1246 	struct fib6_node *fn;
1247 	struct rt6_info *rt;
1248 	int err = -ESRCH;
1249 
1250 	table = fib6_get_table(cfg->fc_table);
1251 	if (table == NULL)
1252 		return err;
1253 
1254 	read_lock_bh(&table->tb6_lock);
1255 
1256 	fn = fib6_locate(&table->tb6_root,
1257 			 &cfg->fc_dst, cfg->fc_dst_len,
1258 			 &cfg->fc_src, cfg->fc_src_len);
1259 
1260 	if (fn) {
1261 		for (rt = fn->leaf; rt; rt = rt->u.next) {
1262 			if (cfg->fc_ifindex &&
1263 			    (rt->rt6i_dev == NULL ||
1264 			     rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1265 				continue;
1266 			if (cfg->fc_flags & RTF_GATEWAY &&
1267 			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1268 				continue;
1269 			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1270 				continue;
1271 			dst_hold(&rt->u.dst);
1272 			read_unlock_bh(&table->tb6_lock);
1273 
1274 			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1275 		}
1276 	}
1277 	read_unlock_bh(&table->tb6_lock);
1278 
1279 	return err;
1280 }
1281 
1282 /*
1283  *	Handle redirects
1284  */
1285 struct ip6rd_flowi {
1286 	struct flowi fl;
1287 	struct in6_addr gateway;
1288 };
1289 
1290 static struct rt6_info *__ip6_route_redirect(struct fib6_table *table,
1291 					     struct flowi *fl,
1292 					     int flags)
1293 {
1294 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1295 	struct rt6_info *rt;
1296 	struct fib6_node *fn;
1297 
1298 	/*
1299 	 * Get the "current" route for this destination and
1300 	 * check if the redirect has come from approriate router.
1301 	 *
1302 	 * RFC 2461 specifies that redirects should only be
1303 	 * accepted if they come from the nexthop to the target.
1304 	 * Due to the way the routes are chosen, this notion
1305 	 * is a bit fuzzy and one might need to check all possible
1306 	 * routes.
1307 	 */
1308 
1309 	read_lock_bh(&table->tb6_lock);
1310 	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1311 restart:
1312 	for (rt = fn->leaf; rt; rt = rt->u.next) {
1313 		/*
1314 		 * Current route is on-link; redirect is always invalid.
1315 		 *
1316 		 * Seems, previous statement is not true. It could
1317 		 * be node, which looks for us as on-link (f.e. proxy ndisc)
1318 		 * But then router serving it might decide, that we should
1319 		 * know truth 8)8) --ANK (980726).
1320 		 */
1321 		if (rt6_check_expired(rt))
1322 			continue;
1323 		if (!(rt->rt6i_flags & RTF_GATEWAY))
1324 			continue;
1325 		if (fl->oif != rt->rt6i_dev->ifindex)
1326 			continue;
1327 		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1328 			continue;
1329 		break;
1330 	}
1331 
1332 	if (!rt)
1333 		rt = &ip6_null_entry;
1334 	BACKTRACK(&fl->fl6_src);
1335 out:
1336 	dst_hold(&rt->u.dst);
1337 
1338 	read_unlock_bh(&table->tb6_lock);
1339 
1340 	return rt;
1341 };
1342 
1343 static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1344 					   struct in6_addr *src,
1345 					   struct in6_addr *gateway,
1346 					   struct net_device *dev)
1347 {
1348 	struct ip6rd_flowi rdfl = {
1349 		.fl = {
1350 			.oif = dev->ifindex,
1351 			.nl_u = {
1352 				.ip6_u = {
1353 					.daddr = *dest,
1354 					.saddr = *src,
1355 				},
1356 			},
1357 		},
1358 		.gateway = *gateway,
1359 	};
1360 	int flags = rt6_need_strict(dest) ? RT6_LOOKUP_F_IFACE : 0;
1361 
1362 	return (struct rt6_info *)fib6_rule_lookup((struct flowi *)&rdfl, flags, __ip6_route_redirect);
1363 }
1364 
1365 void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1366 		  struct in6_addr *saddr,
1367 		  struct neighbour *neigh, u8 *lladdr, int on_link)
1368 {
1369 	struct rt6_info *rt, *nrt = NULL;
1370 	struct netevent_redirect netevent;
1371 
1372 	rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1373 
1374 	if (rt == &ip6_null_entry) {
1375 		if (net_ratelimit())
1376 			printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1377 			       "for redirect target\n");
1378 		goto out;
1379 	}
1380 
1381 	/*
1382 	 *	We have finally decided to accept it.
1383 	 */
1384 
1385 	neigh_update(neigh, lladdr, NUD_STALE,
1386 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
1387 		     NEIGH_UPDATE_F_OVERRIDE|
1388 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1389 				     NEIGH_UPDATE_F_ISROUTER))
1390 		     );
1391 
1392 	/*
1393 	 * Redirect received -> path was valid.
1394 	 * Look, redirects are sent only in response to data packets,
1395 	 * so that this nexthop apparently is reachable. --ANK
1396 	 */
1397 	dst_confirm(&rt->u.dst);
1398 
1399 	/* Duplicate redirect: silently ignore. */
1400 	if (neigh == rt->u.dst.neighbour)
1401 		goto out;
1402 
1403 	nrt = ip6_rt_copy(rt);
1404 	if (nrt == NULL)
1405 		goto out;
1406 
1407 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1408 	if (on_link)
1409 		nrt->rt6i_flags &= ~RTF_GATEWAY;
1410 
1411 	ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1412 	nrt->rt6i_dst.plen = 128;
1413 	nrt->u.dst.flags |= DST_HOST;
1414 
1415 	ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1416 	nrt->rt6i_nexthop = neigh_clone(neigh);
1417 	/* Reset pmtu, it may be better */
1418 	nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1419 	nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1420 
1421 	if (ip6_ins_rt(nrt))
1422 		goto out;
1423 
1424 	netevent.old = &rt->u.dst;
1425 	netevent.new = &nrt->u.dst;
1426 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1427 
1428 	if (rt->rt6i_flags&RTF_CACHE) {
1429 		ip6_del_rt(rt);
1430 		return;
1431 	}
1432 
1433 out:
1434         dst_release(&rt->u.dst);
1435 	return;
1436 }
1437 
1438 /*
1439  *	Handle ICMP "packet too big" messages
1440  *	i.e. Path MTU discovery
1441  */
1442 
1443 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1444 			struct net_device *dev, u32 pmtu)
1445 {
1446 	struct rt6_info *rt, *nrt;
1447 	int allfrag = 0;
1448 
1449 	rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1450 	if (rt == NULL)
1451 		return;
1452 
1453 	if (pmtu >= dst_mtu(&rt->u.dst))
1454 		goto out;
1455 
1456 	if (pmtu < IPV6_MIN_MTU) {
1457 		/*
1458 		 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1459 		 * MTU (1280) and a fragment header should always be included
1460 		 * after a node receiving Too Big message reporting PMTU is
1461 		 * less than the IPv6 Minimum Link MTU.
1462 		 */
1463 		pmtu = IPV6_MIN_MTU;
1464 		allfrag = 1;
1465 	}
1466 
1467 	/* New mtu received -> path was valid.
1468 	   They are sent only in response to data packets,
1469 	   so that this nexthop apparently is reachable. --ANK
1470 	 */
1471 	dst_confirm(&rt->u.dst);
1472 
1473 	/* Host route. If it is static, it would be better
1474 	   not to override it, but add new one, so that
1475 	   when cache entry will expire old pmtu
1476 	   would return automatically.
1477 	 */
1478 	if (rt->rt6i_flags & RTF_CACHE) {
1479 		rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1480 		if (allfrag)
1481 			rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1482 		dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1483 		rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1484 		goto out;
1485 	}
1486 
1487 	/* Network route.
1488 	   Two cases are possible:
1489 	   1. It is connected route. Action: COW
1490 	   2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1491 	 */
1492 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1493 		nrt = rt6_alloc_cow(rt, daddr, saddr);
1494 	else
1495 		nrt = rt6_alloc_clone(rt, daddr);
1496 
1497 	if (nrt) {
1498 		nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1499 		if (allfrag)
1500 			nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1501 
1502 		/* According to RFC 1981, detecting PMTU increase shouldn't be
1503 		 * happened within 5 mins, the recommended timer is 10 mins.
1504 		 * Here this route expiration time is set to ip6_rt_mtu_expires
1505 		 * which is 10 mins. After 10 mins the decreased pmtu is expired
1506 		 * and detecting PMTU increase will be automatically happened.
1507 		 */
1508 		dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1509 		nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1510 
1511 		ip6_ins_rt(nrt);
1512 	}
1513 out:
1514 	dst_release(&rt->u.dst);
1515 }
1516 
1517 /*
1518  *	Misc support functions
1519  */
1520 
1521 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1522 {
1523 	struct rt6_info *rt = ip6_dst_alloc();
1524 
1525 	if (rt) {
1526 		rt->u.dst.input = ort->u.dst.input;
1527 		rt->u.dst.output = ort->u.dst.output;
1528 
1529 		memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1530 		rt->u.dst.dev = ort->u.dst.dev;
1531 		if (rt->u.dst.dev)
1532 			dev_hold(rt->u.dst.dev);
1533 		rt->rt6i_idev = ort->rt6i_idev;
1534 		if (rt->rt6i_idev)
1535 			in6_dev_hold(rt->rt6i_idev);
1536 		rt->u.dst.lastuse = jiffies;
1537 		rt->rt6i_expires = 0;
1538 
1539 		ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1540 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1541 		rt->rt6i_metric = 0;
1542 
1543 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1544 #ifdef CONFIG_IPV6_SUBTREES
1545 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1546 #endif
1547 		rt->rt6i_table = ort->rt6i_table;
1548 	}
1549 	return rt;
1550 }
1551 
1552 #ifdef CONFIG_IPV6_ROUTE_INFO
1553 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
1554 					   struct in6_addr *gwaddr, int ifindex)
1555 {
1556 	struct fib6_node *fn;
1557 	struct rt6_info *rt = NULL;
1558 	struct fib6_table *table;
1559 
1560 	table = fib6_get_table(RT6_TABLE_INFO);
1561 	if (table == NULL)
1562 		return NULL;
1563 
1564 	write_lock_bh(&table->tb6_lock);
1565 	fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1566 	if (!fn)
1567 		goto out;
1568 
1569 	for (rt = fn->leaf; rt; rt = rt->u.next) {
1570 		if (rt->rt6i_dev->ifindex != ifindex)
1571 			continue;
1572 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1573 			continue;
1574 		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1575 			continue;
1576 		dst_hold(&rt->u.dst);
1577 		break;
1578 	}
1579 out:
1580 	write_unlock_bh(&table->tb6_lock);
1581 	return rt;
1582 }
1583 
1584 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
1585 					   struct in6_addr *gwaddr, int ifindex,
1586 					   unsigned pref)
1587 {
1588 	struct fib6_config cfg = {
1589 		.fc_table	= RT6_TABLE_INFO,
1590 		.fc_metric	= 1024,
1591 		.fc_ifindex	= ifindex,
1592 		.fc_dst_len	= prefixlen,
1593 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1594 				  RTF_UP | RTF_PREF(pref),
1595 	};
1596 
1597 	ipv6_addr_copy(&cfg.fc_dst, prefix);
1598 	ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1599 
1600 	/* We should treat it as a default route if prefix length is 0. */
1601 	if (!prefixlen)
1602 		cfg.fc_flags |= RTF_DEFAULT;
1603 
1604 	ip6_route_add(&cfg);
1605 
1606 	return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
1607 }
1608 #endif
1609 
1610 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1611 {
1612 	struct rt6_info *rt;
1613 	struct fib6_table *table;
1614 
1615 	table = fib6_get_table(RT6_TABLE_DFLT);
1616 	if (table == NULL)
1617 		return NULL;
1618 
1619 	write_lock_bh(&table->tb6_lock);
1620 	for (rt = table->tb6_root.leaf; rt; rt=rt->u.next) {
1621 		if (dev == rt->rt6i_dev &&
1622 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1623 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
1624 			break;
1625 	}
1626 	if (rt)
1627 		dst_hold(&rt->u.dst);
1628 	write_unlock_bh(&table->tb6_lock);
1629 	return rt;
1630 }
1631 
1632 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1633 				     struct net_device *dev,
1634 				     unsigned int pref)
1635 {
1636 	struct fib6_config cfg = {
1637 		.fc_table	= RT6_TABLE_DFLT,
1638 		.fc_metric	= 1024,
1639 		.fc_ifindex	= dev->ifindex,
1640 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1641 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1642 	};
1643 
1644 	ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1645 
1646 	ip6_route_add(&cfg);
1647 
1648 	return rt6_get_dflt_router(gwaddr, dev);
1649 }
1650 
1651 void rt6_purge_dflt_routers(void)
1652 {
1653 	struct rt6_info *rt;
1654 	struct fib6_table *table;
1655 
1656 	/* NOTE: Keep consistent with rt6_get_dflt_router */
1657 	table = fib6_get_table(RT6_TABLE_DFLT);
1658 	if (table == NULL)
1659 		return;
1660 
1661 restart:
1662 	read_lock_bh(&table->tb6_lock);
1663 	for (rt = table->tb6_root.leaf; rt; rt = rt->u.next) {
1664 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1665 			dst_hold(&rt->u.dst);
1666 			read_unlock_bh(&table->tb6_lock);
1667 			ip6_del_rt(rt);
1668 			goto restart;
1669 		}
1670 	}
1671 	read_unlock_bh(&table->tb6_lock);
1672 }
1673 
1674 static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg,
1675 				 struct fib6_config *cfg)
1676 {
1677 	memset(cfg, 0, sizeof(*cfg));
1678 
1679 	cfg->fc_table = RT6_TABLE_MAIN;
1680 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1681 	cfg->fc_metric = rtmsg->rtmsg_metric;
1682 	cfg->fc_expires = rtmsg->rtmsg_info;
1683 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1684 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
1685 	cfg->fc_flags = rtmsg->rtmsg_flags;
1686 
1687 	ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1688 	ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1689 	ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1690 }
1691 
1692 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1693 {
1694 	struct fib6_config cfg;
1695 	struct in6_rtmsg rtmsg;
1696 	int err;
1697 
1698 	switch(cmd) {
1699 	case SIOCADDRT:		/* Add a route */
1700 	case SIOCDELRT:		/* Delete a route */
1701 		if (!capable(CAP_NET_ADMIN))
1702 			return -EPERM;
1703 		err = copy_from_user(&rtmsg, arg,
1704 				     sizeof(struct in6_rtmsg));
1705 		if (err)
1706 			return -EFAULT;
1707 
1708 		rtmsg_to_fib6_config(&rtmsg, &cfg);
1709 
1710 		rtnl_lock();
1711 		switch (cmd) {
1712 		case SIOCADDRT:
1713 			err = ip6_route_add(&cfg);
1714 			break;
1715 		case SIOCDELRT:
1716 			err = ip6_route_del(&cfg);
1717 			break;
1718 		default:
1719 			err = -EINVAL;
1720 		}
1721 		rtnl_unlock();
1722 
1723 		return err;
1724 	};
1725 
1726 	return -EINVAL;
1727 }
1728 
1729 /*
1730  *	Drop the packet on the floor
1731  */
1732 
1733 static int ip6_pkt_discard(struct sk_buff *skb)
1734 {
1735 	int type = ipv6_addr_type(&skb->nh.ipv6h->daddr);
1736 	if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED)
1737 		IP6_INC_STATS(IPSTATS_MIB_INADDRERRORS);
1738 
1739 	IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1740 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1741 	kfree_skb(skb);
1742 	return 0;
1743 }
1744 
1745 static int ip6_pkt_discard_out(struct sk_buff *skb)
1746 {
1747 	skb->dev = skb->dst->dev;
1748 	return ip6_pkt_discard(skb);
1749 }
1750 
1751 /*
1752  *	Allocate a dst for local (unicast / anycast) address.
1753  */
1754 
1755 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1756 				    const struct in6_addr *addr,
1757 				    int anycast)
1758 {
1759 	struct rt6_info *rt = ip6_dst_alloc();
1760 
1761 	if (rt == NULL)
1762 		return ERR_PTR(-ENOMEM);
1763 
1764 	dev_hold(&loopback_dev);
1765 	in6_dev_hold(idev);
1766 
1767 	rt->u.dst.flags = DST_HOST;
1768 	rt->u.dst.input = ip6_input;
1769 	rt->u.dst.output = ip6_output;
1770 	rt->rt6i_dev = &loopback_dev;
1771 	rt->rt6i_idev = idev;
1772 	rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1773 	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1774 	rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1775 	rt->u.dst.obsolete = -1;
1776 
1777 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1778 	if (anycast)
1779 		rt->rt6i_flags |= RTF_ANYCAST;
1780 	else
1781 		rt->rt6i_flags |= RTF_LOCAL;
1782 	rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1783 	if (rt->rt6i_nexthop == NULL) {
1784 		dst_free((struct dst_entry *) rt);
1785 		return ERR_PTR(-ENOMEM);
1786 	}
1787 
1788 	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1789 	rt->rt6i_dst.plen = 128;
1790 	rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL);
1791 
1792 	atomic_set(&rt->u.dst.__refcnt, 1);
1793 
1794 	return rt;
1795 }
1796 
1797 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1798 {
1799 	if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1800 	    rt != &ip6_null_entry) {
1801 		RT6_TRACE("deleted by ifdown %p\n", rt);
1802 		return -1;
1803 	}
1804 	return 0;
1805 }
1806 
1807 void rt6_ifdown(struct net_device *dev)
1808 {
1809 	fib6_clean_all(fib6_ifdown, 0, dev);
1810 }
1811 
1812 struct rt6_mtu_change_arg
1813 {
1814 	struct net_device *dev;
1815 	unsigned mtu;
1816 };
1817 
1818 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1819 {
1820 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1821 	struct inet6_dev *idev;
1822 
1823 	/* In IPv6 pmtu discovery is not optional,
1824 	   so that RTAX_MTU lock cannot disable it.
1825 	   We still use this lock to block changes
1826 	   caused by addrconf/ndisc.
1827 	*/
1828 
1829 	idev = __in6_dev_get(arg->dev);
1830 	if (idev == NULL)
1831 		return 0;
1832 
1833 	/* For administrative MTU increase, there is no way to discover
1834 	   IPv6 PMTU increase, so PMTU increase should be updated here.
1835 	   Since RFC 1981 doesn't include administrative MTU increase
1836 	   update PMTU increase is a MUST. (i.e. jumbo frame)
1837 	 */
1838 	/*
1839 	   If new MTU is less than route PMTU, this new MTU will be the
1840 	   lowest MTU in the path, update the route PMTU to reflect PMTU
1841 	   decreases; if new MTU is greater than route PMTU, and the
1842 	   old MTU is the lowest MTU in the path, update the route PMTU
1843 	   to reflect the increase. In this case if the other nodes' MTU
1844 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
1845 	   PMTU discouvery.
1846 	 */
1847 	if (rt->rt6i_dev == arg->dev &&
1848 	    !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1849             (dst_mtu(&rt->u.dst) > arg->mtu ||
1850              (dst_mtu(&rt->u.dst) < arg->mtu &&
1851 	      dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1852 		rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1853 	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1854 	return 0;
1855 }
1856 
1857 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1858 {
1859 	struct rt6_mtu_change_arg arg = {
1860 		.dev = dev,
1861 		.mtu = mtu,
1862 	};
1863 
1864 	fib6_clean_all(rt6_mtu_change_route, 0, &arg);
1865 }
1866 
1867 static struct nla_policy rtm_ipv6_policy[RTA_MAX+1] __read_mostly = {
1868 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
1869 	[RTA_OIF]               = { .type = NLA_U32 },
1870 	[RTA_IIF]		= { .type = NLA_U32 },
1871 	[RTA_PRIORITY]          = { .type = NLA_U32 },
1872 	[RTA_METRICS]           = { .type = NLA_NESTED },
1873 };
1874 
1875 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
1876 			      struct fib6_config *cfg)
1877 {
1878 	struct rtmsg *rtm;
1879 	struct nlattr *tb[RTA_MAX+1];
1880 	int err;
1881 
1882 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
1883 	if (err < 0)
1884 		goto errout;
1885 
1886 	err = -EINVAL;
1887 	rtm = nlmsg_data(nlh);
1888 	memset(cfg, 0, sizeof(*cfg));
1889 
1890 	cfg->fc_table = rtm->rtm_table;
1891 	cfg->fc_dst_len = rtm->rtm_dst_len;
1892 	cfg->fc_src_len = rtm->rtm_src_len;
1893 	cfg->fc_flags = RTF_UP;
1894 	cfg->fc_protocol = rtm->rtm_protocol;
1895 
1896 	if (rtm->rtm_type == RTN_UNREACHABLE)
1897 		cfg->fc_flags |= RTF_REJECT;
1898 
1899 	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
1900 	cfg->fc_nlinfo.nlh = nlh;
1901 
1902 	if (tb[RTA_GATEWAY]) {
1903 		nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
1904 		cfg->fc_flags |= RTF_GATEWAY;
1905 	}
1906 
1907 	if (tb[RTA_DST]) {
1908 		int plen = (rtm->rtm_dst_len + 7) >> 3;
1909 
1910 		if (nla_len(tb[RTA_DST]) < plen)
1911 			goto errout;
1912 
1913 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1914 	}
1915 
1916 	if (tb[RTA_SRC]) {
1917 		int plen = (rtm->rtm_src_len + 7) >> 3;
1918 
1919 		if (nla_len(tb[RTA_SRC]) < plen)
1920 			goto errout;
1921 
1922 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1923 	}
1924 
1925 	if (tb[RTA_OIF])
1926 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
1927 
1928 	if (tb[RTA_PRIORITY])
1929 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
1930 
1931 	if (tb[RTA_METRICS]) {
1932 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
1933 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1934 	}
1935 
1936 	if (tb[RTA_TABLE])
1937 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
1938 
1939 	err = 0;
1940 errout:
1941 	return err;
1942 }
1943 
1944 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1945 {
1946 	struct fib6_config cfg;
1947 	int err;
1948 
1949 	err = rtm_to_fib6_config(skb, nlh, &cfg);
1950 	if (err < 0)
1951 		return err;
1952 
1953 	return ip6_route_del(&cfg);
1954 }
1955 
1956 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1957 {
1958 	struct fib6_config cfg;
1959 	int err;
1960 
1961 	err = rtm_to_fib6_config(skb, nlh, &cfg);
1962 	if (err < 0)
1963 		return err;
1964 
1965 	return ip6_route_add(&cfg);
1966 }
1967 
1968 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1969 			 struct in6_addr *dst, struct in6_addr *src,
1970 			 int iif, int type, u32 pid, u32 seq,
1971 			 int prefix, unsigned int flags)
1972 {
1973 	struct rtmsg *rtm;
1974 	struct nlmsghdr *nlh;
1975 	struct rta_cacheinfo ci;
1976 	u32 table;
1977 
1978 	if (prefix) {	/* user wants prefix routes only */
1979 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1980 			/* success since this is not a prefix route */
1981 			return 1;
1982 		}
1983 	}
1984 
1985 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
1986 	if (nlh == NULL)
1987 		return -ENOBUFS;
1988 
1989 	rtm = nlmsg_data(nlh);
1990 	rtm->rtm_family = AF_INET6;
1991 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
1992 	rtm->rtm_src_len = rt->rt6i_src.plen;
1993 	rtm->rtm_tos = 0;
1994 	if (rt->rt6i_table)
1995 		table = rt->rt6i_table->tb6_id;
1996 	else
1997 		table = RT6_TABLE_UNSPEC;
1998 	rtm->rtm_table = table;
1999 	NLA_PUT_U32(skb, RTA_TABLE, table);
2000 	if (rt->rt6i_flags&RTF_REJECT)
2001 		rtm->rtm_type = RTN_UNREACHABLE;
2002 	else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2003 		rtm->rtm_type = RTN_LOCAL;
2004 	else
2005 		rtm->rtm_type = RTN_UNICAST;
2006 	rtm->rtm_flags = 0;
2007 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2008 	rtm->rtm_protocol = rt->rt6i_protocol;
2009 	if (rt->rt6i_flags&RTF_DYNAMIC)
2010 		rtm->rtm_protocol = RTPROT_REDIRECT;
2011 	else if (rt->rt6i_flags & RTF_ADDRCONF)
2012 		rtm->rtm_protocol = RTPROT_KERNEL;
2013 	else if (rt->rt6i_flags&RTF_DEFAULT)
2014 		rtm->rtm_protocol = RTPROT_RA;
2015 
2016 	if (rt->rt6i_flags&RTF_CACHE)
2017 		rtm->rtm_flags |= RTM_F_CLONED;
2018 
2019 	if (dst) {
2020 		NLA_PUT(skb, RTA_DST, 16, dst);
2021 	        rtm->rtm_dst_len = 128;
2022 	} else if (rtm->rtm_dst_len)
2023 		NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2024 #ifdef CONFIG_IPV6_SUBTREES
2025 	if (src) {
2026 		NLA_PUT(skb, RTA_SRC, 16, src);
2027 	        rtm->rtm_src_len = 128;
2028 	} else if (rtm->rtm_src_len)
2029 		NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2030 #endif
2031 	if (iif)
2032 		NLA_PUT_U32(skb, RTA_IIF, iif);
2033 	else if (dst) {
2034 		struct in6_addr saddr_buf;
2035 		if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
2036 			NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2037 	}
2038 
2039 	if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
2040 		goto nla_put_failure;
2041 
2042 	if (rt->u.dst.neighbour)
2043 		NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2044 
2045 	if (rt->u.dst.dev)
2046 		NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2047 
2048 	NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2049 	ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
2050 	if (rt->rt6i_expires)
2051 		ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
2052 	else
2053 		ci.rta_expires = 0;
2054 	ci.rta_used = rt->u.dst.__use;
2055 	ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
2056 	ci.rta_error = rt->u.dst.error;
2057 	ci.rta_id = 0;
2058 	ci.rta_ts = 0;
2059 	ci.rta_tsage = 0;
2060 	NLA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
2061 
2062 	return nlmsg_end(skb, nlh);
2063 
2064 nla_put_failure:
2065 	return nlmsg_cancel(skb, nlh);
2066 }
2067 
2068 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2069 {
2070 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2071 	int prefix;
2072 
2073 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2074 		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2075 		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2076 	} else
2077 		prefix = 0;
2078 
2079 	return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2080 		     NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2081 		     prefix, NLM_F_MULTI);
2082 }
2083 
2084 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2085 {
2086 	struct nlattr *tb[RTA_MAX+1];
2087 	struct rt6_info *rt;
2088 	struct sk_buff *skb;
2089 	struct rtmsg *rtm;
2090 	struct flowi fl;
2091 	int err, iif = 0;
2092 
2093 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2094 	if (err < 0)
2095 		goto errout;
2096 
2097 	err = -EINVAL;
2098 	memset(&fl, 0, sizeof(fl));
2099 
2100 	if (tb[RTA_SRC]) {
2101 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2102 			goto errout;
2103 
2104 		ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2105 	}
2106 
2107 	if (tb[RTA_DST]) {
2108 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2109 			goto errout;
2110 
2111 		ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2112 	}
2113 
2114 	if (tb[RTA_IIF])
2115 		iif = nla_get_u32(tb[RTA_IIF]);
2116 
2117 	if (tb[RTA_OIF])
2118 		fl.oif = nla_get_u32(tb[RTA_OIF]);
2119 
2120 	if (iif) {
2121 		struct net_device *dev;
2122 		dev = __dev_get_by_index(iif);
2123 		if (!dev) {
2124 			err = -ENODEV;
2125 			goto errout;
2126 		}
2127 	}
2128 
2129 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2130 	if (skb == NULL) {
2131 		err = -ENOBUFS;
2132 		goto errout;
2133 	}
2134 
2135 	/* Reserve room for dummy headers, this skb can pass
2136 	   through good chunk of routing engine.
2137 	 */
2138 	skb->mac.raw = skb->data;
2139 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2140 
2141 	rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
2142 	skb->dst = &rt->u.dst;
2143 
2144 	err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
2145 			    RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2146 			    nlh->nlmsg_seq, 0, 0);
2147 	if (err < 0) {
2148 		kfree_skb(skb);
2149 		goto errout;
2150 	}
2151 
2152 	err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
2153 errout:
2154 	return err;
2155 }
2156 
2157 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2158 {
2159 	struct sk_buff *skb;
2160 	u32 pid = 0, seq = 0;
2161 	struct nlmsghdr *nlh = NULL;
2162 	int payload = sizeof(struct rtmsg) + 256;
2163 	int err = -ENOBUFS;
2164 
2165 	if (info) {
2166 		pid = info->pid;
2167 		nlh = info->nlh;
2168 		if (nlh)
2169 			seq = nlh->nlmsg_seq;
2170 	}
2171 
2172 	skb = nlmsg_new(nlmsg_total_size(payload), gfp_any());
2173 	if (skb == NULL)
2174 		goto errout;
2175 
2176 	err = rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0);
2177 	if (err < 0) {
2178 		kfree_skb(skb);
2179 		goto errout;
2180 	}
2181 
2182 	err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any());
2183 errout:
2184 	if (err < 0)
2185 		rtnl_set_sk_err(RTNLGRP_IPV6_ROUTE, err);
2186 }
2187 
2188 /*
2189  *	/proc
2190  */
2191 
2192 #ifdef CONFIG_PROC_FS
2193 
2194 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2195 
2196 struct rt6_proc_arg
2197 {
2198 	char *buffer;
2199 	int offset;
2200 	int length;
2201 	int skip;
2202 	int len;
2203 };
2204 
2205 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2206 {
2207 	struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
2208 	int i;
2209 
2210 	if (arg->skip < arg->offset / RT6_INFO_LEN) {
2211 		arg->skip++;
2212 		return 0;
2213 	}
2214 
2215 	if (arg->len >= arg->length)
2216 		return 0;
2217 
2218 	for (i=0; i<16; i++) {
2219 		sprintf(arg->buffer + arg->len, "%02x",
2220 			rt->rt6i_dst.addr.s6_addr[i]);
2221 		arg->len += 2;
2222 	}
2223 	arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2224 			    rt->rt6i_dst.plen);
2225 
2226 #ifdef CONFIG_IPV6_SUBTREES
2227 	for (i=0; i<16; i++) {
2228 		sprintf(arg->buffer + arg->len, "%02x",
2229 			rt->rt6i_src.addr.s6_addr[i]);
2230 		arg->len += 2;
2231 	}
2232 	arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2233 			    rt->rt6i_src.plen);
2234 #else
2235 	sprintf(arg->buffer + arg->len,
2236 		"00000000000000000000000000000000 00 ");
2237 	arg->len += 36;
2238 #endif
2239 
2240 	if (rt->rt6i_nexthop) {
2241 		for (i=0; i<16; i++) {
2242 			sprintf(arg->buffer + arg->len, "%02x",
2243 				rt->rt6i_nexthop->primary_key[i]);
2244 			arg->len += 2;
2245 		}
2246 	} else {
2247 		sprintf(arg->buffer + arg->len,
2248 			"00000000000000000000000000000000");
2249 		arg->len += 32;
2250 	}
2251 	arg->len += sprintf(arg->buffer + arg->len,
2252 			    " %08x %08x %08x %08x %8s\n",
2253 			    rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2254 			    rt->u.dst.__use, rt->rt6i_flags,
2255 			    rt->rt6i_dev ? rt->rt6i_dev->name : "");
2256 	return 0;
2257 }
2258 
2259 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
2260 {
2261 	struct rt6_proc_arg arg = {
2262 		.buffer = buffer,
2263 		.offset = offset,
2264 		.length = length,
2265 	};
2266 
2267 	fib6_clean_all(rt6_info_route, 0, &arg);
2268 
2269 	*start = buffer;
2270 	if (offset)
2271 		*start += offset % RT6_INFO_LEN;
2272 
2273 	arg.len -= offset % RT6_INFO_LEN;
2274 
2275 	if (arg.len > length)
2276 		arg.len = length;
2277 	if (arg.len < 0)
2278 		arg.len = 0;
2279 
2280 	return arg.len;
2281 }
2282 
2283 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2284 {
2285 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2286 		      rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
2287 		      rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
2288 		      rt6_stats.fib_rt_cache,
2289 		      atomic_read(&ip6_dst_ops.entries),
2290 		      rt6_stats.fib_discarded_routes);
2291 
2292 	return 0;
2293 }
2294 
2295 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2296 {
2297 	return single_open(file, rt6_stats_seq_show, NULL);
2298 }
2299 
2300 static struct file_operations rt6_stats_seq_fops = {
2301 	.owner	 = THIS_MODULE,
2302 	.open	 = rt6_stats_seq_open,
2303 	.read	 = seq_read,
2304 	.llseek	 = seq_lseek,
2305 	.release = single_release,
2306 };
2307 #endif	/* CONFIG_PROC_FS */
2308 
2309 #ifdef CONFIG_SYSCTL
2310 
2311 static int flush_delay;
2312 
2313 static
2314 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2315 			      void __user *buffer, size_t *lenp, loff_t *ppos)
2316 {
2317 	if (write) {
2318 		proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2319 		fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2320 		return 0;
2321 	} else
2322 		return -EINVAL;
2323 }
2324 
2325 ctl_table ipv6_route_table[] = {
2326         {
2327 		.ctl_name	=	NET_IPV6_ROUTE_FLUSH,
2328 		.procname	=	"flush",
2329          	.data		=	&flush_delay,
2330 		.maxlen		=	sizeof(int),
2331 		.mode		=	0200,
2332          	.proc_handler	=	&ipv6_sysctl_rtcache_flush
2333 	},
2334 	{
2335 		.ctl_name	=	NET_IPV6_ROUTE_GC_THRESH,
2336 		.procname	=	"gc_thresh",
2337          	.data		=	&ip6_dst_ops.gc_thresh,
2338 		.maxlen		=	sizeof(int),
2339 		.mode		=	0644,
2340          	.proc_handler	=	&proc_dointvec,
2341 	},
2342 	{
2343 		.ctl_name	=	NET_IPV6_ROUTE_MAX_SIZE,
2344 		.procname	=	"max_size",
2345          	.data		=	&ip6_rt_max_size,
2346 		.maxlen		=	sizeof(int),
2347 		.mode		=	0644,
2348          	.proc_handler	=	&proc_dointvec,
2349 	},
2350 	{
2351 		.ctl_name	=	NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2352 		.procname	=	"gc_min_interval",
2353          	.data		=	&ip6_rt_gc_min_interval,
2354 		.maxlen		=	sizeof(int),
2355 		.mode		=	0644,
2356          	.proc_handler	=	&proc_dointvec_jiffies,
2357 		.strategy	=	&sysctl_jiffies,
2358 	},
2359 	{
2360 		.ctl_name	=	NET_IPV6_ROUTE_GC_TIMEOUT,
2361 		.procname	=	"gc_timeout",
2362          	.data		=	&ip6_rt_gc_timeout,
2363 		.maxlen		=	sizeof(int),
2364 		.mode		=	0644,
2365          	.proc_handler	=	&proc_dointvec_jiffies,
2366 		.strategy	=	&sysctl_jiffies,
2367 	},
2368 	{
2369 		.ctl_name	=	NET_IPV6_ROUTE_GC_INTERVAL,
2370 		.procname	=	"gc_interval",
2371          	.data		=	&ip6_rt_gc_interval,
2372 		.maxlen		=	sizeof(int),
2373 		.mode		=	0644,
2374          	.proc_handler	=	&proc_dointvec_jiffies,
2375 		.strategy	=	&sysctl_jiffies,
2376 	},
2377 	{
2378 		.ctl_name	=	NET_IPV6_ROUTE_GC_ELASTICITY,
2379 		.procname	=	"gc_elasticity",
2380          	.data		=	&ip6_rt_gc_elasticity,
2381 		.maxlen		=	sizeof(int),
2382 		.mode		=	0644,
2383          	.proc_handler	=	&proc_dointvec_jiffies,
2384 		.strategy	=	&sysctl_jiffies,
2385 	},
2386 	{
2387 		.ctl_name	=	NET_IPV6_ROUTE_MTU_EXPIRES,
2388 		.procname	=	"mtu_expires",
2389          	.data		=	&ip6_rt_mtu_expires,
2390 		.maxlen		=	sizeof(int),
2391 		.mode		=	0644,
2392          	.proc_handler	=	&proc_dointvec_jiffies,
2393 		.strategy	=	&sysctl_jiffies,
2394 	},
2395 	{
2396 		.ctl_name	=	NET_IPV6_ROUTE_MIN_ADVMSS,
2397 		.procname	=	"min_adv_mss",
2398          	.data		=	&ip6_rt_min_advmss,
2399 		.maxlen		=	sizeof(int),
2400 		.mode		=	0644,
2401          	.proc_handler	=	&proc_dointvec_jiffies,
2402 		.strategy	=	&sysctl_jiffies,
2403 	},
2404 	{
2405 		.ctl_name	=	NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2406 		.procname	=	"gc_min_interval_ms",
2407          	.data		=	&ip6_rt_gc_min_interval,
2408 		.maxlen		=	sizeof(int),
2409 		.mode		=	0644,
2410          	.proc_handler	=	&proc_dointvec_ms_jiffies,
2411 		.strategy	=	&sysctl_ms_jiffies,
2412 	},
2413 	{ .ctl_name = 0 }
2414 };
2415 
2416 #endif
2417 
2418 void __init ip6_route_init(void)
2419 {
2420 	struct proc_dir_entry *p;
2421 
2422 	ip6_dst_ops.kmem_cachep =
2423 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2424 				  SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
2425 	fib6_init();
2426 #ifdef 	CONFIG_PROC_FS
2427 	p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2428 	if (p)
2429 		p->owner = THIS_MODULE;
2430 
2431 	proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2432 #endif
2433 #ifdef CONFIG_XFRM
2434 	xfrm6_init();
2435 #endif
2436 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2437 	fib6_rules_init();
2438 #endif
2439 }
2440 
2441 void ip6_route_cleanup(void)
2442 {
2443 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2444 	fib6_rules_cleanup();
2445 #endif
2446 #ifdef CONFIG_PROC_FS
2447 	proc_net_remove("ipv6_route");
2448 	proc_net_remove("rt6_stats");
2449 #endif
2450 #ifdef CONFIG_XFRM
2451 	xfrm6_fini();
2452 #endif
2453 	rt6_ifdown(NULL);
2454 	fib6_gc_cleanup();
2455 	kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2456 }
2457