xref: /openbmc/linux/net/ipv4/fib_semantics.c (revision 13abf813)
1 /*
2  * INET		An implementation of the TCP/IP protocol suite for the LINUX
3  *		operating system.  INET is implemented using the  BSD Socket
4  *		interface as the means of communication with the user level.
5  *
6  *		IPv4 Forwarding Information Base: semantics.
7  *
8  * Version:	$Id: fib_semantics.c,v 1.19 2002/01/12 07:54:56 davem Exp $
9  *
10  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11  *
12  *		This program is free software; you can redistribute it and/or
13  *		modify it under the terms of the GNU General Public License
14  *		as published by the Free Software Foundation; either version
15  *		2 of the License, or (at your option) any later version.
16  */
17 
18 #include <linux/config.h>
19 #include <asm/uaccess.h>
20 #include <asm/system.h>
21 #include <linux/bitops.h>
22 #include <linux/types.h>
23 #include <linux/kernel.h>
24 #include <linux/jiffies.h>
25 #include <linux/mm.h>
26 #include <linux/string.h>
27 #include <linux/socket.h>
28 #include <linux/sockios.h>
29 #include <linux/errno.h>
30 #include <linux/in.h>
31 #include <linux/inet.h>
32 #include <linux/netdevice.h>
33 #include <linux/if_arp.h>
34 #include <linux/proc_fs.h>
35 #include <linux/skbuff.h>
36 #include <linux/netlink.h>
37 #include <linux/init.h>
38 
39 #include <net/ip.h>
40 #include <net/protocol.h>
41 #include <net/route.h>
42 #include <net/tcp.h>
43 #include <net/sock.h>
44 #include <net/ip_fib.h>
45 #include <net/ip_mp_alg.h>
46 
47 #include "fib_lookup.h"
48 
49 #define FSprintk(a...)
50 
51 static DEFINE_RWLOCK(fib_info_lock);
52 static struct hlist_head *fib_info_hash;
53 static struct hlist_head *fib_info_laddrhash;
54 static unsigned int fib_hash_size;
55 static unsigned int fib_info_cnt;
56 
57 #define DEVINDEX_HASHBITS 8
58 #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
59 static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
60 
61 #ifdef CONFIG_IP_ROUTE_MULTIPATH
62 
63 static DEFINE_SPINLOCK(fib_multipath_lock);
64 
65 #define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \
66 for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
67 
68 #define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \
69 for (nhsel=0, nh = (struct fib_nh*)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++)
70 
71 #else /* CONFIG_IP_ROUTE_MULTIPATH */
72 
73 /* Hope, that gcc will optimize it to get rid of dummy loop */
74 
75 #define for_nexthops(fi) { int nhsel=0; const struct fib_nh * nh = (fi)->fib_nh; \
76 for (nhsel=0; nhsel < 1; nhsel++)
77 
78 #define change_nexthops(fi) { int nhsel=0; struct fib_nh * nh = (struct fib_nh*)((fi)->fib_nh); \
79 for (nhsel=0; nhsel < 1; nhsel++)
80 
81 #endif /* CONFIG_IP_ROUTE_MULTIPATH */
82 
83 #define endfor_nexthops(fi) }
84 
85 
86 static struct
87 {
88 	int	error;
89 	u8	scope;
90 } fib_props[RTA_MAX + 1] = {
91         {
92 		.error	= 0,
93 		.scope	= RT_SCOPE_NOWHERE,
94 	},	/* RTN_UNSPEC */
95 	{
96 		.error	= 0,
97 		.scope	= RT_SCOPE_UNIVERSE,
98 	},	/* RTN_UNICAST */
99 	{
100 		.error	= 0,
101 		.scope	= RT_SCOPE_HOST,
102 	},	/* RTN_LOCAL */
103 	{
104 		.error	= 0,
105 		.scope	= RT_SCOPE_LINK,
106 	},	/* RTN_BROADCAST */
107 	{
108 		.error	= 0,
109 		.scope	= RT_SCOPE_LINK,
110 	},	/* RTN_ANYCAST */
111 	{
112 		.error	= 0,
113 		.scope	= RT_SCOPE_UNIVERSE,
114 	},	/* RTN_MULTICAST */
115 	{
116 		.error	= -EINVAL,
117 		.scope	= RT_SCOPE_UNIVERSE,
118 	},	/* RTN_BLACKHOLE */
119 	{
120 		.error	= -EHOSTUNREACH,
121 		.scope	= RT_SCOPE_UNIVERSE,
122 	},	/* RTN_UNREACHABLE */
123 	{
124 		.error	= -EACCES,
125 		.scope	= RT_SCOPE_UNIVERSE,
126 	},	/* RTN_PROHIBIT */
127 	{
128 		.error	= -EAGAIN,
129 		.scope	= RT_SCOPE_UNIVERSE,
130 	},	/* RTN_THROW */
131 	{
132 		.error	= -EINVAL,
133 		.scope	= RT_SCOPE_NOWHERE,
134 	},	/* RTN_NAT */
135 	{
136 		.error	= -EINVAL,
137 		.scope	= RT_SCOPE_NOWHERE,
138 	},	/* RTN_XRESOLVE */
139 };
140 
141 
142 /* Release a nexthop info record */
143 
144 void free_fib_info(struct fib_info *fi)
145 {
146 	if (fi->fib_dead == 0) {
147 		printk("Freeing alive fib_info %p\n", fi);
148 		return;
149 	}
150 	change_nexthops(fi) {
151 		if (nh->nh_dev)
152 			dev_put(nh->nh_dev);
153 		nh->nh_dev = NULL;
154 	} endfor_nexthops(fi);
155 	fib_info_cnt--;
156 	kfree(fi);
157 }
158 
159 void fib_release_info(struct fib_info *fi)
160 {
161 	write_lock(&fib_info_lock);
162 	if (fi && --fi->fib_treeref == 0) {
163 		hlist_del(&fi->fib_hash);
164 		if (fi->fib_prefsrc)
165 			hlist_del(&fi->fib_lhash);
166 		change_nexthops(fi) {
167 			if (!nh->nh_dev)
168 				continue;
169 			hlist_del(&nh->nh_hash);
170 		} endfor_nexthops(fi)
171 		fi->fib_dead = 1;
172 		fib_info_put(fi);
173 	}
174 	write_unlock(&fib_info_lock);
175 }
176 
177 static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
178 {
179 	const struct fib_nh *onh = ofi->fib_nh;
180 
181 	for_nexthops(fi) {
182 		if (nh->nh_oif != onh->nh_oif ||
183 		    nh->nh_gw  != onh->nh_gw ||
184 		    nh->nh_scope != onh->nh_scope ||
185 #ifdef CONFIG_IP_ROUTE_MULTIPATH
186 		    nh->nh_weight != onh->nh_weight ||
187 #endif
188 #ifdef CONFIG_NET_CLS_ROUTE
189 		    nh->nh_tclassid != onh->nh_tclassid ||
190 #endif
191 		    ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
192 			return -1;
193 		onh++;
194 	} endfor_nexthops(fi);
195 	return 0;
196 }
197 
198 static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
199 {
200 	unsigned int mask = (fib_hash_size - 1);
201 	unsigned int val = fi->fib_nhs;
202 
203 	val ^= fi->fib_protocol;
204 	val ^= fi->fib_prefsrc;
205 	val ^= fi->fib_priority;
206 
207 	return (val ^ (val >> 7) ^ (val >> 12)) & mask;
208 }
209 
210 static struct fib_info *fib_find_info(const struct fib_info *nfi)
211 {
212 	struct hlist_head *head;
213 	struct hlist_node *node;
214 	struct fib_info *fi;
215 	unsigned int hash;
216 
217 	hash = fib_info_hashfn(nfi);
218 	head = &fib_info_hash[hash];
219 
220 	hlist_for_each_entry(fi, node, head, fib_hash) {
221 		if (fi->fib_nhs != nfi->fib_nhs)
222 			continue;
223 		if (nfi->fib_protocol == fi->fib_protocol &&
224 		    nfi->fib_prefsrc == fi->fib_prefsrc &&
225 		    nfi->fib_priority == fi->fib_priority &&
226 		    memcmp(nfi->fib_metrics, fi->fib_metrics,
227 			   sizeof(fi->fib_metrics)) == 0 &&
228 		    ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
229 		    (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
230 			return fi;
231 	}
232 
233 	return NULL;
234 }
235 
236 static inline unsigned int fib_devindex_hashfn(unsigned int val)
237 {
238 	unsigned int mask = DEVINDEX_HASHSIZE - 1;
239 
240 	return (val ^
241 		(val >> DEVINDEX_HASHBITS) ^
242 		(val >> (DEVINDEX_HASHBITS * 2))) & mask;
243 }
244 
245 /* Check, that the gateway is already configured.
246    Used only by redirect accept routine.
247  */
248 
249 int ip_fib_check_default(u32 gw, struct net_device *dev)
250 {
251 	struct hlist_head *head;
252 	struct hlist_node *node;
253 	struct fib_nh *nh;
254 	unsigned int hash;
255 
256 	read_lock(&fib_info_lock);
257 
258 	hash = fib_devindex_hashfn(dev->ifindex);
259 	head = &fib_info_devhash[hash];
260 	hlist_for_each_entry(nh, node, head, nh_hash) {
261 		if (nh->nh_dev == dev &&
262 		    nh->nh_gw == gw &&
263 		    !(nh->nh_flags&RTNH_F_DEAD)) {
264 			read_unlock(&fib_info_lock);
265 			return 0;
266 		}
267 	}
268 
269 	read_unlock(&fib_info_lock);
270 
271 	return -1;
272 }
273 
274 void rtmsg_fib(int event, u32 key, struct fib_alias *fa,
275 	       int z, int tb_id,
276 	       struct nlmsghdr *n, struct netlink_skb_parms *req)
277 {
278 	struct sk_buff *skb;
279 	u32 pid = req ? req->pid : n->nlmsg_pid;
280 	int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
281 
282 	skb = alloc_skb(size, GFP_KERNEL);
283 	if (!skb)
284 		return;
285 
286 	if (fib_dump_info(skb, pid, n->nlmsg_seq, event, tb_id,
287 			  fa->fa_type, fa->fa_scope, &key, z,
288 			  fa->fa_tos,
289 			  fa->fa_info, 0) < 0) {
290 		kfree_skb(skb);
291 		return;
292 	}
293 	NETLINK_CB(skb).dst_group = RTNLGRP_IPV4_ROUTE;
294 	if (n->nlmsg_flags&NLM_F_ECHO)
295 		atomic_inc(&skb->users);
296 	netlink_broadcast(rtnl, skb, pid, RTNLGRP_IPV4_ROUTE, GFP_KERNEL);
297 	if (n->nlmsg_flags&NLM_F_ECHO)
298 		netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT);
299 }
300 
301 /* Return the first fib alias matching TOS with
302  * priority less than or equal to PRIO.
303  */
304 struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio)
305 {
306 	if (fah) {
307 		struct fib_alias *fa;
308 		list_for_each_entry(fa, fah, fa_list) {
309 			if (fa->fa_tos > tos)
310 				continue;
311 			if (fa->fa_info->fib_priority >= prio ||
312 			    fa->fa_tos < tos)
313 				return fa;
314 		}
315 	}
316 	return NULL;
317 }
318 
319 int fib_detect_death(struct fib_info *fi, int order,
320 		     struct fib_info **last_resort, int *last_idx, int *dflt)
321 {
322 	struct neighbour *n;
323 	int state = NUD_NONE;
324 
325 	n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
326 	if (n) {
327 		state = n->nud_state;
328 		neigh_release(n);
329 	}
330 	if (state==NUD_REACHABLE)
331 		return 0;
332 	if ((state&NUD_VALID) && order != *dflt)
333 		return 0;
334 	if ((state&NUD_VALID) ||
335 	    (*last_idx<0 && order > *dflt)) {
336 		*last_resort = fi;
337 		*last_idx = order;
338 	}
339 	return 1;
340 }
341 
342 #ifdef CONFIG_IP_ROUTE_MULTIPATH
343 
344 static u32 fib_get_attr32(struct rtattr *attr, int attrlen, int type)
345 {
346 	while (RTA_OK(attr,attrlen)) {
347 		if (attr->rta_type == type)
348 			return *(u32*)RTA_DATA(attr);
349 		attr = RTA_NEXT(attr, attrlen);
350 	}
351 	return 0;
352 }
353 
354 static int
355 fib_count_nexthops(struct rtattr *rta)
356 {
357 	int nhs = 0;
358 	struct rtnexthop *nhp = RTA_DATA(rta);
359 	int nhlen = RTA_PAYLOAD(rta);
360 
361 	while (nhlen >= (int)sizeof(struct rtnexthop)) {
362 		if ((nhlen -= nhp->rtnh_len) < 0)
363 			return 0;
364 		nhs++;
365 		nhp = RTNH_NEXT(nhp);
366 	};
367 	return nhs;
368 }
369 
370 static int
371 fib_get_nhs(struct fib_info *fi, const struct rtattr *rta, const struct rtmsg *r)
372 {
373 	struct rtnexthop *nhp = RTA_DATA(rta);
374 	int nhlen = RTA_PAYLOAD(rta);
375 
376 	change_nexthops(fi) {
377 		int attrlen = nhlen - sizeof(struct rtnexthop);
378 		if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
379 			return -EINVAL;
380 		nh->nh_flags = (r->rtm_flags&~0xFF) | nhp->rtnh_flags;
381 		nh->nh_oif = nhp->rtnh_ifindex;
382 		nh->nh_weight = nhp->rtnh_hops + 1;
383 		if (attrlen) {
384 			nh->nh_gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
385 #ifdef CONFIG_NET_CLS_ROUTE
386 			nh->nh_tclassid = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW);
387 #endif
388 		}
389 		nhp = RTNH_NEXT(nhp);
390 	} endfor_nexthops(fi);
391 	return 0;
392 }
393 
394 #endif
395 
396 int fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct kern_rta *rta,
397 		 struct fib_info *fi)
398 {
399 #ifdef CONFIG_IP_ROUTE_MULTIPATH
400 	struct rtnexthop *nhp;
401 	int nhlen;
402 #endif
403 
404 	if (rta->rta_priority &&
405 	    *rta->rta_priority != fi->fib_priority)
406 		return 1;
407 
408 	if (rta->rta_oif || rta->rta_gw) {
409 		if ((!rta->rta_oif || *rta->rta_oif == fi->fib_nh->nh_oif) &&
410 		    (!rta->rta_gw  || memcmp(rta->rta_gw, &fi->fib_nh->nh_gw, 4) == 0))
411 			return 0;
412 		return 1;
413 	}
414 
415 #ifdef CONFIG_IP_ROUTE_MULTIPATH
416 	if (rta->rta_mp == NULL)
417 		return 0;
418 	nhp = RTA_DATA(rta->rta_mp);
419 	nhlen = RTA_PAYLOAD(rta->rta_mp);
420 
421 	for_nexthops(fi) {
422 		int attrlen = nhlen - sizeof(struct rtnexthop);
423 		u32 gw;
424 
425 		if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
426 			return -EINVAL;
427 		if (nhp->rtnh_ifindex && nhp->rtnh_ifindex != nh->nh_oif)
428 			return 1;
429 		if (attrlen) {
430 			gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
431 			if (gw && gw != nh->nh_gw)
432 				return 1;
433 #ifdef CONFIG_NET_CLS_ROUTE
434 			gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW);
435 			if (gw && gw != nh->nh_tclassid)
436 				return 1;
437 #endif
438 		}
439 		nhp = RTNH_NEXT(nhp);
440 	} endfor_nexthops(fi);
441 #endif
442 	return 0;
443 }
444 
445 
446 /*
447    Picture
448    -------
449 
450    Semantics of nexthop is very messy by historical reasons.
451    We have to take into account, that:
452    a) gateway can be actually local interface address,
453       so that gatewayed route is direct.
454    b) gateway must be on-link address, possibly
455       described not by an ifaddr, but also by a direct route.
456    c) If both gateway and interface are specified, they should not
457       contradict.
458    d) If we use tunnel routes, gateway could be not on-link.
459 
460    Attempt to reconcile all of these (alas, self-contradictory) conditions
461    results in pretty ugly and hairy code with obscure logic.
462 
463    I chose to generalized it instead, so that the size
464    of code does not increase practically, but it becomes
465    much more general.
466    Every prefix is assigned a "scope" value: "host" is local address,
467    "link" is direct route,
468    [ ... "site" ... "interior" ... ]
469    and "universe" is true gateway route with global meaning.
470 
471    Every prefix refers to a set of "nexthop"s (gw, oif),
472    where gw must have narrower scope. This recursion stops
473    when gw has LOCAL scope or if "nexthop" is declared ONLINK,
474    which means that gw is forced to be on link.
475 
476    Code is still hairy, but now it is apparently logically
477    consistent and very flexible. F.e. as by-product it allows
478    to co-exists in peace independent exterior and interior
479    routing processes.
480 
481    Normally it looks as following.
482 
483    {universe prefix}  -> (gw, oif) [scope link]
484                           |
485 			  |-> {link prefix} -> (gw, oif) [scope local]
486 			                        |
487 						|-> {local prefix} (terminal node)
488  */
489 
490 static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_nh *nh)
491 {
492 	int err;
493 
494 	if (nh->nh_gw) {
495 		struct fib_result res;
496 
497 #ifdef CONFIG_IP_ROUTE_PERVASIVE
498 		if (nh->nh_flags&RTNH_F_PERVASIVE)
499 			return 0;
500 #endif
501 		if (nh->nh_flags&RTNH_F_ONLINK) {
502 			struct net_device *dev;
503 
504 			if (r->rtm_scope >= RT_SCOPE_LINK)
505 				return -EINVAL;
506 			if (inet_addr_type(nh->nh_gw) != RTN_UNICAST)
507 				return -EINVAL;
508 			if ((dev = __dev_get_by_index(nh->nh_oif)) == NULL)
509 				return -ENODEV;
510 			if (!(dev->flags&IFF_UP))
511 				return -ENETDOWN;
512 			nh->nh_dev = dev;
513 			dev_hold(dev);
514 			nh->nh_scope = RT_SCOPE_LINK;
515 			return 0;
516 		}
517 		{
518 			struct flowi fl = { .nl_u = { .ip4_u =
519 						      { .daddr = nh->nh_gw,
520 							.scope = r->rtm_scope + 1 } },
521 					    .oif = nh->nh_oif };
522 
523 			/* It is not necessary, but requires a bit of thinking */
524 			if (fl.fl4_scope < RT_SCOPE_LINK)
525 				fl.fl4_scope = RT_SCOPE_LINK;
526 			if ((err = fib_lookup(&fl, &res)) != 0)
527 				return err;
528 		}
529 		err = -EINVAL;
530 		if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
531 			goto out;
532 		nh->nh_scope = res.scope;
533 		nh->nh_oif = FIB_RES_OIF(res);
534 		if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
535 			goto out;
536 		dev_hold(nh->nh_dev);
537 		err = -ENETDOWN;
538 		if (!(nh->nh_dev->flags & IFF_UP))
539 			goto out;
540 		err = 0;
541 out:
542 		fib_res_put(&res);
543 		return err;
544 	} else {
545 		struct in_device *in_dev;
546 
547 		if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
548 			return -EINVAL;
549 
550 		in_dev = inetdev_by_index(nh->nh_oif);
551 		if (in_dev == NULL)
552 			return -ENODEV;
553 		if (!(in_dev->dev->flags&IFF_UP)) {
554 			in_dev_put(in_dev);
555 			return -ENETDOWN;
556 		}
557 		nh->nh_dev = in_dev->dev;
558 		dev_hold(nh->nh_dev);
559 		nh->nh_scope = RT_SCOPE_HOST;
560 		in_dev_put(in_dev);
561 	}
562 	return 0;
563 }
564 
565 static inline unsigned int fib_laddr_hashfn(u32 val)
566 {
567 	unsigned int mask = (fib_hash_size - 1);
568 
569 	return (val ^ (val >> 7) ^ (val >> 14)) & mask;
570 }
571 
572 static struct hlist_head *fib_hash_alloc(int bytes)
573 {
574 	if (bytes <= PAGE_SIZE)
575 		return kmalloc(bytes, GFP_KERNEL);
576 	else
577 		return (struct hlist_head *)
578 			__get_free_pages(GFP_KERNEL, get_order(bytes));
579 }
580 
581 static void fib_hash_free(struct hlist_head *hash, int bytes)
582 {
583 	if (!hash)
584 		return;
585 
586 	if (bytes <= PAGE_SIZE)
587 		kfree(hash);
588 	else
589 		free_pages((unsigned long) hash, get_order(bytes));
590 }
591 
592 static void fib_hash_move(struct hlist_head *new_info_hash,
593 			  struct hlist_head *new_laddrhash,
594 			  unsigned int new_size)
595 {
596 	struct hlist_head *old_info_hash, *old_laddrhash;
597 	unsigned int old_size = fib_hash_size;
598 	unsigned int i, bytes;
599 
600 	write_lock(&fib_info_lock);
601 	old_info_hash = fib_info_hash;
602 	old_laddrhash = fib_info_laddrhash;
603 	fib_hash_size = new_size;
604 
605 	for (i = 0; i < old_size; i++) {
606 		struct hlist_head *head = &fib_info_hash[i];
607 		struct hlist_node *node, *n;
608 		struct fib_info *fi;
609 
610 		hlist_for_each_entry_safe(fi, node, n, head, fib_hash) {
611 			struct hlist_head *dest;
612 			unsigned int new_hash;
613 
614 			hlist_del(&fi->fib_hash);
615 
616 			new_hash = fib_info_hashfn(fi);
617 			dest = &new_info_hash[new_hash];
618 			hlist_add_head(&fi->fib_hash, dest);
619 		}
620 	}
621 	fib_info_hash = new_info_hash;
622 
623 	for (i = 0; i < old_size; i++) {
624 		struct hlist_head *lhead = &fib_info_laddrhash[i];
625 		struct hlist_node *node, *n;
626 		struct fib_info *fi;
627 
628 		hlist_for_each_entry_safe(fi, node, n, lhead, fib_lhash) {
629 			struct hlist_head *ldest;
630 			unsigned int new_hash;
631 
632 			hlist_del(&fi->fib_lhash);
633 
634 			new_hash = fib_laddr_hashfn(fi->fib_prefsrc);
635 			ldest = &new_laddrhash[new_hash];
636 			hlist_add_head(&fi->fib_lhash, ldest);
637 		}
638 	}
639 	fib_info_laddrhash = new_laddrhash;
640 
641 	write_unlock(&fib_info_lock);
642 
643 	bytes = old_size * sizeof(struct hlist_head *);
644 	fib_hash_free(old_info_hash, bytes);
645 	fib_hash_free(old_laddrhash, bytes);
646 }
647 
648 struct fib_info *
649 fib_create_info(const struct rtmsg *r, struct kern_rta *rta,
650 		const struct nlmsghdr *nlh, int *errp)
651 {
652 	int err;
653 	struct fib_info *fi = NULL;
654 	struct fib_info *ofi;
655 #ifdef CONFIG_IP_ROUTE_MULTIPATH
656 	int nhs = 1;
657 #else
658 	const int nhs = 1;
659 #endif
660 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
661 	u32 mp_alg = IP_MP_ALG_NONE;
662 #endif
663 
664 	/* Fast check to catch the most weird cases */
665 	if (fib_props[r->rtm_type].scope > r->rtm_scope)
666 		goto err_inval;
667 
668 #ifdef CONFIG_IP_ROUTE_MULTIPATH
669 	if (rta->rta_mp) {
670 		nhs = fib_count_nexthops(rta->rta_mp);
671 		if (nhs == 0)
672 			goto err_inval;
673 	}
674 #endif
675 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
676 	if (rta->rta_mp_alg) {
677 		mp_alg = *rta->rta_mp_alg;
678 
679 		if (mp_alg < IP_MP_ALG_NONE ||
680 		    mp_alg > IP_MP_ALG_MAX)
681 			goto err_inval;
682 	}
683 #endif
684 
685 	err = -ENOBUFS;
686 	if (fib_info_cnt >= fib_hash_size) {
687 		unsigned int new_size = fib_hash_size << 1;
688 		struct hlist_head *new_info_hash;
689 		struct hlist_head *new_laddrhash;
690 		unsigned int bytes;
691 
692 		if (!new_size)
693 			new_size = 1;
694 		bytes = new_size * sizeof(struct hlist_head *);
695 		new_info_hash = fib_hash_alloc(bytes);
696 		new_laddrhash = fib_hash_alloc(bytes);
697 		if (!new_info_hash || !new_laddrhash) {
698 			fib_hash_free(new_info_hash, bytes);
699 			fib_hash_free(new_laddrhash, bytes);
700 		} else {
701 			memset(new_info_hash, 0, bytes);
702 			memset(new_laddrhash, 0, bytes);
703 
704 			fib_hash_move(new_info_hash, new_laddrhash, new_size);
705 		}
706 
707 		if (!fib_hash_size)
708 			goto failure;
709 	}
710 
711 	fi = kmalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
712 	if (fi == NULL)
713 		goto failure;
714 	fib_info_cnt++;
715 	memset(fi, 0, sizeof(*fi)+nhs*sizeof(struct fib_nh));
716 
717 	fi->fib_protocol = r->rtm_protocol;
718 
719 	fi->fib_nhs = nhs;
720 	change_nexthops(fi) {
721 		nh->nh_parent = fi;
722 	} endfor_nexthops(fi)
723 
724 	fi->fib_flags = r->rtm_flags;
725 	if (rta->rta_priority)
726 		fi->fib_priority = *rta->rta_priority;
727 	if (rta->rta_mx) {
728 		int attrlen = RTA_PAYLOAD(rta->rta_mx);
729 		struct rtattr *attr = RTA_DATA(rta->rta_mx);
730 
731 		while (RTA_OK(attr, attrlen)) {
732 			unsigned flavor = attr->rta_type;
733 			if (flavor) {
734 				if (flavor > RTAX_MAX)
735 					goto err_inval;
736 				fi->fib_metrics[flavor-1] = *(unsigned*)RTA_DATA(attr);
737 			}
738 			attr = RTA_NEXT(attr, attrlen);
739 		}
740 	}
741 	if (rta->rta_prefsrc)
742 		memcpy(&fi->fib_prefsrc, rta->rta_prefsrc, 4);
743 
744 	if (rta->rta_mp) {
745 #ifdef CONFIG_IP_ROUTE_MULTIPATH
746 		if ((err = fib_get_nhs(fi, rta->rta_mp, r)) != 0)
747 			goto failure;
748 		if (rta->rta_oif && fi->fib_nh->nh_oif != *rta->rta_oif)
749 			goto err_inval;
750 		if (rta->rta_gw && memcmp(&fi->fib_nh->nh_gw, rta->rta_gw, 4))
751 			goto err_inval;
752 #ifdef CONFIG_NET_CLS_ROUTE
753 		if (rta->rta_flow && memcmp(&fi->fib_nh->nh_tclassid, rta->rta_flow, 4))
754 			goto err_inval;
755 #endif
756 #else
757 		goto err_inval;
758 #endif
759 	} else {
760 		struct fib_nh *nh = fi->fib_nh;
761 		if (rta->rta_oif)
762 			nh->nh_oif = *rta->rta_oif;
763 		if (rta->rta_gw)
764 			memcpy(&nh->nh_gw, rta->rta_gw, 4);
765 #ifdef CONFIG_NET_CLS_ROUTE
766 		if (rta->rta_flow)
767 			memcpy(&nh->nh_tclassid, rta->rta_flow, 4);
768 #endif
769 		nh->nh_flags = r->rtm_flags;
770 #ifdef CONFIG_IP_ROUTE_MULTIPATH
771 		nh->nh_weight = 1;
772 #endif
773 	}
774 
775 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
776 	fi->fib_mp_alg = mp_alg;
777 #endif
778 
779 	if (fib_props[r->rtm_type].error) {
780 		if (rta->rta_gw || rta->rta_oif || rta->rta_mp)
781 			goto err_inval;
782 		goto link_it;
783 	}
784 
785 	if (r->rtm_scope > RT_SCOPE_HOST)
786 		goto err_inval;
787 
788 	if (r->rtm_scope == RT_SCOPE_HOST) {
789 		struct fib_nh *nh = fi->fib_nh;
790 
791 		/* Local address is added. */
792 		if (nhs != 1 || nh->nh_gw)
793 			goto err_inval;
794 		nh->nh_scope = RT_SCOPE_NOWHERE;
795 		nh->nh_dev = dev_get_by_index(fi->fib_nh->nh_oif);
796 		err = -ENODEV;
797 		if (nh->nh_dev == NULL)
798 			goto failure;
799 	} else {
800 		change_nexthops(fi) {
801 			if ((err = fib_check_nh(r, fi, nh)) != 0)
802 				goto failure;
803 		} endfor_nexthops(fi)
804 	}
805 
806 	if (fi->fib_prefsrc) {
807 		if (r->rtm_type != RTN_LOCAL || rta->rta_dst == NULL ||
808 		    memcmp(&fi->fib_prefsrc, rta->rta_dst, 4))
809 			if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL)
810 				goto err_inval;
811 	}
812 
813 link_it:
814 	if ((ofi = fib_find_info(fi)) != NULL) {
815 		fi->fib_dead = 1;
816 		free_fib_info(fi);
817 		ofi->fib_treeref++;
818 		return ofi;
819 	}
820 
821 	fi->fib_treeref++;
822 	atomic_inc(&fi->fib_clntref);
823 	write_lock(&fib_info_lock);
824 	hlist_add_head(&fi->fib_hash,
825 		       &fib_info_hash[fib_info_hashfn(fi)]);
826 	if (fi->fib_prefsrc) {
827 		struct hlist_head *head;
828 
829 		head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];
830 		hlist_add_head(&fi->fib_lhash, head);
831 	}
832 	change_nexthops(fi) {
833 		struct hlist_head *head;
834 		unsigned int hash;
835 
836 		if (!nh->nh_dev)
837 			continue;
838 		hash = fib_devindex_hashfn(nh->nh_dev->ifindex);
839 		head = &fib_info_devhash[hash];
840 		hlist_add_head(&nh->nh_hash, head);
841 	} endfor_nexthops(fi)
842 	write_unlock(&fib_info_lock);
843 	return fi;
844 
845 err_inval:
846 	err = -EINVAL;
847 
848 failure:
849         *errp = err;
850         if (fi) {
851 		fi->fib_dead = 1;
852 		free_fib_info(fi);
853 	}
854 	return NULL;
855 }
856 
857 /* Note! fib_semantic_match intentionally uses  RCU list functions. */
858 int fib_semantic_match(struct list_head *head, const struct flowi *flp,
859 		       struct fib_result *res, __u32 zone, __u32 mask,
860 			int prefixlen)
861 {
862 	struct fib_alias *fa;
863 	int nh_sel = 0;
864 
865 	list_for_each_entry_rcu(fa, head, fa_list) {
866 		int err;
867 
868 		if (fa->fa_tos &&
869 		    fa->fa_tos != flp->fl4_tos)
870 			continue;
871 
872 		if (fa->fa_scope < flp->fl4_scope)
873 			continue;
874 
875 		fa->fa_state |= FA_S_ACCESSED;
876 
877 		err = fib_props[fa->fa_type].error;
878 		if (err == 0) {
879 			struct fib_info *fi = fa->fa_info;
880 
881 			if (fi->fib_flags & RTNH_F_DEAD)
882 				continue;
883 
884 			switch (fa->fa_type) {
885 			case RTN_UNICAST:
886 			case RTN_LOCAL:
887 			case RTN_BROADCAST:
888 			case RTN_ANYCAST:
889 			case RTN_MULTICAST:
890 				for_nexthops(fi) {
891 					if (nh->nh_flags&RTNH_F_DEAD)
892 						continue;
893 					if (!flp->oif || flp->oif == nh->nh_oif)
894 						break;
895 				}
896 #ifdef CONFIG_IP_ROUTE_MULTIPATH
897 				if (nhsel < fi->fib_nhs) {
898 					nh_sel = nhsel;
899 					goto out_fill_res;
900 				}
901 #else
902 				if (nhsel < 1) {
903 					goto out_fill_res;
904 				}
905 #endif
906 				endfor_nexthops(fi);
907 				continue;
908 
909 			default:
910 				printk(KERN_DEBUG "impossible 102\n");
911 				return -EINVAL;
912 			};
913 		}
914 		return err;
915 	}
916 	return 1;
917 
918 out_fill_res:
919 	res->prefixlen = prefixlen;
920 	res->nh_sel = nh_sel;
921 	res->type = fa->fa_type;
922 	res->scope = fa->fa_scope;
923 	res->fi = fa->fa_info;
924 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
925 	res->netmask = mask;
926 	res->network = zone &
927 		(0xFFFFFFFF >> (32 - prefixlen));
928 #endif
929 	atomic_inc(&res->fi->fib_clntref);
930 	return 0;
931 }
932 
933 /* Find appropriate source address to this destination */
934 
935 u32 __fib_res_prefsrc(struct fib_result *res)
936 {
937 	return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope);
938 }
939 
940 int
941 fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
942 	      u8 tb_id, u8 type, u8 scope, void *dst, int dst_len, u8 tos,
943 	      struct fib_info *fi, unsigned int flags)
944 {
945 	struct rtmsg *rtm;
946 	struct nlmsghdr  *nlh;
947 	unsigned char	 *b = skb->tail;
948 
949 	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*rtm), flags);
950 	rtm = NLMSG_DATA(nlh);
951 	rtm->rtm_family = AF_INET;
952 	rtm->rtm_dst_len = dst_len;
953 	rtm->rtm_src_len = 0;
954 	rtm->rtm_tos = tos;
955 	rtm->rtm_table = tb_id;
956 	rtm->rtm_type = type;
957 	rtm->rtm_flags = fi->fib_flags;
958 	rtm->rtm_scope = scope;
959 	if (rtm->rtm_dst_len)
960 		RTA_PUT(skb, RTA_DST, 4, dst);
961 	rtm->rtm_protocol = fi->fib_protocol;
962 	if (fi->fib_priority)
963 		RTA_PUT(skb, RTA_PRIORITY, 4, &fi->fib_priority);
964 #ifdef CONFIG_NET_CLS_ROUTE
965 	if (fi->fib_nh[0].nh_tclassid)
966 		RTA_PUT(skb, RTA_FLOW, 4, &fi->fib_nh[0].nh_tclassid);
967 #endif
968 	if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
969 		goto rtattr_failure;
970 	if (fi->fib_prefsrc)
971 		RTA_PUT(skb, RTA_PREFSRC, 4, &fi->fib_prefsrc);
972 	if (fi->fib_nhs == 1) {
973 		if (fi->fib_nh->nh_gw)
974 			RTA_PUT(skb, RTA_GATEWAY, 4, &fi->fib_nh->nh_gw);
975 		if (fi->fib_nh->nh_oif)
976 			RTA_PUT(skb, RTA_OIF, sizeof(int), &fi->fib_nh->nh_oif);
977 	}
978 #ifdef CONFIG_IP_ROUTE_MULTIPATH
979 	if (fi->fib_nhs > 1) {
980 		struct rtnexthop *nhp;
981 		struct rtattr *mp_head;
982 		if (skb_tailroom(skb) <= RTA_SPACE(0))
983 			goto rtattr_failure;
984 		mp_head = (struct rtattr*)skb_put(skb, RTA_SPACE(0));
985 
986 		for_nexthops(fi) {
987 			if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
988 				goto rtattr_failure;
989 			nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
990 			nhp->rtnh_flags = nh->nh_flags & 0xFF;
991 			nhp->rtnh_hops = nh->nh_weight-1;
992 			nhp->rtnh_ifindex = nh->nh_oif;
993 			if (nh->nh_gw)
994 				RTA_PUT(skb, RTA_GATEWAY, 4, &nh->nh_gw);
995 			nhp->rtnh_len = skb->tail - (unsigned char*)nhp;
996 		} endfor_nexthops(fi);
997 		mp_head->rta_type = RTA_MULTIPATH;
998 		mp_head->rta_len = skb->tail - (u8*)mp_head;
999 	}
1000 #endif
1001 	nlh->nlmsg_len = skb->tail - b;
1002 	return skb->len;
1003 
1004 nlmsg_failure:
1005 rtattr_failure:
1006 	skb_trim(skb, b - skb->data);
1007 	return -1;
1008 }
1009 
1010 #ifndef CONFIG_IP_NOSIOCRT
1011 
1012 int
1013 fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm,
1014 		    struct kern_rta *rta, struct rtentry *r)
1015 {
1016 	int    plen;
1017 	u32    *ptr;
1018 
1019 	memset(rtm, 0, sizeof(*rtm));
1020 	memset(rta, 0, sizeof(*rta));
1021 
1022 	if (r->rt_dst.sa_family != AF_INET)
1023 		return -EAFNOSUPPORT;
1024 
1025 	/* Check mask for validity:
1026 	   a) it must be contiguous.
1027 	   b) destination must have all host bits clear.
1028 	   c) if application forgot to set correct family (AF_INET),
1029 	      reject request unless it is absolutely clear i.e.
1030 	      both family and mask are zero.
1031 	 */
1032 	plen = 32;
1033 	ptr = &((struct sockaddr_in*)&r->rt_dst)->sin_addr.s_addr;
1034 	if (!(r->rt_flags&RTF_HOST)) {
1035 		u32 mask = ((struct sockaddr_in*)&r->rt_genmask)->sin_addr.s_addr;
1036 		if (r->rt_genmask.sa_family != AF_INET) {
1037 			if (mask || r->rt_genmask.sa_family)
1038 				return -EAFNOSUPPORT;
1039 		}
1040 		if (bad_mask(mask, *ptr))
1041 			return -EINVAL;
1042 		plen = inet_mask_len(mask);
1043 	}
1044 
1045 	nl->nlmsg_flags = NLM_F_REQUEST;
1046 	nl->nlmsg_pid = current->pid;
1047 	nl->nlmsg_seq = 0;
1048 	nl->nlmsg_len = NLMSG_LENGTH(sizeof(*rtm));
1049 	if (cmd == SIOCDELRT) {
1050 		nl->nlmsg_type = RTM_DELROUTE;
1051 		nl->nlmsg_flags = 0;
1052 	} else {
1053 		nl->nlmsg_type = RTM_NEWROUTE;
1054 		nl->nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE;
1055 		rtm->rtm_protocol = RTPROT_BOOT;
1056 	}
1057 
1058 	rtm->rtm_dst_len = plen;
1059 	rta->rta_dst = ptr;
1060 
1061 	if (r->rt_metric) {
1062 		*(u32*)&r->rt_pad3 = r->rt_metric - 1;
1063 		rta->rta_priority = (u32*)&r->rt_pad3;
1064 	}
1065 	if (r->rt_flags&RTF_REJECT) {
1066 		rtm->rtm_scope = RT_SCOPE_HOST;
1067 		rtm->rtm_type = RTN_UNREACHABLE;
1068 		return 0;
1069 	}
1070 	rtm->rtm_scope = RT_SCOPE_NOWHERE;
1071 	rtm->rtm_type = RTN_UNICAST;
1072 
1073 	if (r->rt_dev) {
1074 		char *colon;
1075 		struct net_device *dev;
1076 		char   devname[IFNAMSIZ];
1077 
1078 		if (copy_from_user(devname, r->rt_dev, IFNAMSIZ-1))
1079 			return -EFAULT;
1080 		devname[IFNAMSIZ-1] = 0;
1081 		colon = strchr(devname, ':');
1082 		if (colon)
1083 			*colon = 0;
1084 		dev = __dev_get_by_name(devname);
1085 		if (!dev)
1086 			return -ENODEV;
1087 		rta->rta_oif = &dev->ifindex;
1088 		if (colon) {
1089 			struct in_ifaddr *ifa;
1090 			struct in_device *in_dev = __in_dev_get(dev);
1091 			if (!in_dev)
1092 				return -ENODEV;
1093 			*colon = ':';
1094 			for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
1095 				if (strcmp(ifa->ifa_label, devname) == 0)
1096 					break;
1097 			if (ifa == NULL)
1098 				return -ENODEV;
1099 			rta->rta_prefsrc = &ifa->ifa_local;
1100 		}
1101 	}
1102 
1103 	ptr = &((struct sockaddr_in*)&r->rt_gateway)->sin_addr.s_addr;
1104 	if (r->rt_gateway.sa_family == AF_INET && *ptr) {
1105 		rta->rta_gw = ptr;
1106 		if (r->rt_flags&RTF_GATEWAY && inet_addr_type(*ptr) == RTN_UNICAST)
1107 			rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1108 	}
1109 
1110 	if (cmd == SIOCDELRT)
1111 		return 0;
1112 
1113 	if (r->rt_flags&RTF_GATEWAY && rta->rta_gw == NULL)
1114 		return -EINVAL;
1115 
1116 	if (rtm->rtm_scope == RT_SCOPE_NOWHERE)
1117 		rtm->rtm_scope = RT_SCOPE_LINK;
1118 
1119 	if (r->rt_flags&(RTF_MTU|RTF_WINDOW|RTF_IRTT)) {
1120 		struct rtattr *rec;
1121 		struct rtattr *mx = kmalloc(RTA_LENGTH(3*RTA_LENGTH(4)), GFP_KERNEL);
1122 		if (mx == NULL)
1123 			return -ENOMEM;
1124 		rta->rta_mx = mx;
1125 		mx->rta_type = RTA_METRICS;
1126 		mx->rta_len  = RTA_LENGTH(0);
1127 		if (r->rt_flags&RTF_MTU) {
1128 			rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
1129 			rec->rta_type = RTAX_ADVMSS;
1130 			rec->rta_len = RTA_LENGTH(4);
1131 			mx->rta_len += RTA_LENGTH(4);
1132 			*(u32*)RTA_DATA(rec) = r->rt_mtu - 40;
1133 		}
1134 		if (r->rt_flags&RTF_WINDOW) {
1135 			rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
1136 			rec->rta_type = RTAX_WINDOW;
1137 			rec->rta_len = RTA_LENGTH(4);
1138 			mx->rta_len += RTA_LENGTH(4);
1139 			*(u32*)RTA_DATA(rec) = r->rt_window;
1140 		}
1141 		if (r->rt_flags&RTF_IRTT) {
1142 			rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
1143 			rec->rta_type = RTAX_RTT;
1144 			rec->rta_len = RTA_LENGTH(4);
1145 			mx->rta_len += RTA_LENGTH(4);
1146 			*(u32*)RTA_DATA(rec) = r->rt_irtt<<3;
1147 		}
1148 	}
1149 	return 0;
1150 }
1151 
1152 #endif
1153 
1154 /*
1155    Update FIB if:
1156    - local address disappeared -> we must delete all the entries
1157      referring to it.
1158    - device went down -> we must shutdown all nexthops going via it.
1159  */
1160 
1161 int fib_sync_down(u32 local, struct net_device *dev, int force)
1162 {
1163 	int ret = 0;
1164 	int scope = RT_SCOPE_NOWHERE;
1165 
1166 	if (force)
1167 		scope = -1;
1168 
1169 	if (local && fib_info_laddrhash) {
1170 		unsigned int hash = fib_laddr_hashfn(local);
1171 		struct hlist_head *head = &fib_info_laddrhash[hash];
1172 		struct hlist_node *node;
1173 		struct fib_info *fi;
1174 
1175 		hlist_for_each_entry(fi, node, head, fib_lhash) {
1176 			if (fi->fib_prefsrc == local) {
1177 				fi->fib_flags |= RTNH_F_DEAD;
1178 				ret++;
1179 			}
1180 		}
1181 	}
1182 
1183 	if (dev) {
1184 		struct fib_info *prev_fi = NULL;
1185 		unsigned int hash = fib_devindex_hashfn(dev->ifindex);
1186 		struct hlist_head *head = &fib_info_devhash[hash];
1187 		struct hlist_node *node;
1188 		struct fib_nh *nh;
1189 
1190 		hlist_for_each_entry(nh, node, head, nh_hash) {
1191 			struct fib_info *fi = nh->nh_parent;
1192 			int dead;
1193 
1194 			BUG_ON(!fi->fib_nhs);
1195 			if (nh->nh_dev != dev || fi == prev_fi)
1196 				continue;
1197 			prev_fi = fi;
1198 			dead = 0;
1199 			change_nexthops(fi) {
1200 				if (nh->nh_flags&RTNH_F_DEAD)
1201 					dead++;
1202 				else if (nh->nh_dev == dev &&
1203 					 nh->nh_scope != scope) {
1204 					nh->nh_flags |= RTNH_F_DEAD;
1205 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1206 					spin_lock_bh(&fib_multipath_lock);
1207 					fi->fib_power -= nh->nh_power;
1208 					nh->nh_power = 0;
1209 					spin_unlock_bh(&fib_multipath_lock);
1210 #endif
1211 					dead++;
1212 				}
1213 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1214 				if (force > 1 && nh->nh_dev == dev) {
1215 					dead = fi->fib_nhs;
1216 					break;
1217 				}
1218 #endif
1219 			} endfor_nexthops(fi)
1220 			if (dead == fi->fib_nhs) {
1221 				fi->fib_flags |= RTNH_F_DEAD;
1222 				ret++;
1223 			}
1224 		}
1225 	}
1226 
1227 	return ret;
1228 }
1229 
1230 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1231 
1232 /*
1233    Dead device goes up. We wake up dead nexthops.
1234    It takes sense only on multipath routes.
1235  */
1236 
1237 int fib_sync_up(struct net_device *dev)
1238 {
1239 	struct fib_info *prev_fi;
1240 	unsigned int hash;
1241 	struct hlist_head *head;
1242 	struct hlist_node *node;
1243 	struct fib_nh *nh;
1244 	int ret;
1245 
1246 	if (!(dev->flags&IFF_UP))
1247 		return 0;
1248 
1249 	prev_fi = NULL;
1250 	hash = fib_devindex_hashfn(dev->ifindex);
1251 	head = &fib_info_devhash[hash];
1252 	ret = 0;
1253 
1254 	hlist_for_each_entry(nh, node, head, nh_hash) {
1255 		struct fib_info *fi = nh->nh_parent;
1256 		int alive;
1257 
1258 		BUG_ON(!fi->fib_nhs);
1259 		if (nh->nh_dev != dev || fi == prev_fi)
1260 			continue;
1261 
1262 		prev_fi = fi;
1263 		alive = 0;
1264 		change_nexthops(fi) {
1265 			if (!(nh->nh_flags&RTNH_F_DEAD)) {
1266 				alive++;
1267 				continue;
1268 			}
1269 			if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
1270 				continue;
1271 			if (nh->nh_dev != dev || __in_dev_get(dev) == NULL)
1272 				continue;
1273 			alive++;
1274 			spin_lock_bh(&fib_multipath_lock);
1275 			nh->nh_power = 0;
1276 			nh->nh_flags &= ~RTNH_F_DEAD;
1277 			spin_unlock_bh(&fib_multipath_lock);
1278 		} endfor_nexthops(fi)
1279 
1280 		if (alive > 0) {
1281 			fi->fib_flags &= ~RTNH_F_DEAD;
1282 			ret++;
1283 		}
1284 	}
1285 
1286 	return ret;
1287 }
1288 
1289 /*
1290    The algorithm is suboptimal, but it provides really
1291    fair weighted route distribution.
1292  */
1293 
1294 void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
1295 {
1296 	struct fib_info *fi = res->fi;
1297 	int w;
1298 
1299 	spin_lock_bh(&fib_multipath_lock);
1300 	if (fi->fib_power <= 0) {
1301 		int power = 0;
1302 		change_nexthops(fi) {
1303 			if (!(nh->nh_flags&RTNH_F_DEAD)) {
1304 				power += nh->nh_weight;
1305 				nh->nh_power = nh->nh_weight;
1306 			}
1307 		} endfor_nexthops(fi);
1308 		fi->fib_power = power;
1309 		if (power <= 0) {
1310 			spin_unlock_bh(&fib_multipath_lock);
1311 			/* Race condition: route has just become dead. */
1312 			res->nh_sel = 0;
1313 			return;
1314 		}
1315 	}
1316 
1317 
1318 	/* w should be random number [0..fi->fib_power-1],
1319 	   it is pretty bad approximation.
1320 	 */
1321 
1322 	w = jiffies % fi->fib_power;
1323 
1324 	change_nexthops(fi) {
1325 		if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) {
1326 			if ((w -= nh->nh_power) <= 0) {
1327 				nh->nh_power--;
1328 				fi->fib_power--;
1329 				res->nh_sel = nhsel;
1330 				spin_unlock_bh(&fib_multipath_lock);
1331 				return;
1332 			}
1333 		}
1334 	} endfor_nexthops(fi);
1335 
1336 	/* Race condition: route has just become dead. */
1337 	res->nh_sel = 0;
1338 	spin_unlock_bh(&fib_multipath_lock);
1339 }
1340 #endif
1341