xref: /openbmc/linux/net/ipv4/fib_frontend.c (revision 39fe5434cb9de5da40510028b17b96bc4eb312b3)
1 /*
2  * INET		An implementation of the TCP/IP protocol suite for the LINUX
3  *		operating system.  INET is implemented using the  BSD Socket
4  *		interface as the means of communication with the user level.
5  *
6  *		IPv4 Forwarding Information Base: FIB frontend.
7  *
8  * Version:	$Id: fib_frontend.c,v 1.26 2001/10/31 21:55:54 davem Exp $
9  *
10  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11  *
12  *		This program is free software; you can redistribute it and/or
13  *		modify it under the terms of the GNU General Public License
14  *		as published by the Free Software Foundation; either version
15  *		2 of the License, or (at your option) any later version.
16  */
17 
18 #include <linux/module.h>
19 #include <asm/uaccess.h>
20 #include <asm/system.h>
21 #include <linux/bitops.h>
22 #include <linux/capability.h>
23 #include <linux/types.h>
24 #include <linux/kernel.h>
25 #include <linux/mm.h>
26 #include <linux/string.h>
27 #include <linux/socket.h>
28 #include <linux/sockios.h>
29 #include <linux/errno.h>
30 #include <linux/in.h>
31 #include <linux/inet.h>
32 #include <linux/inetdevice.h>
33 #include <linux/netdevice.h>
34 #include <linux/if_addr.h>
35 #include <linux/if_arp.h>
36 #include <linux/skbuff.h>
37 #include <linux/init.h>
38 #include <linux/list.h>
39 
40 #include <net/ip.h>
41 #include <net/protocol.h>
42 #include <net/route.h>
43 #include <net/tcp.h>
44 #include <net/sock.h>
45 #include <net/icmp.h>
46 #include <net/arp.h>
47 #include <net/ip_fib.h>
48 #include <net/rtnetlink.h>
49 
50 #define FFprint(a...) printk(KERN_DEBUG a)
51 
52 #ifndef CONFIG_IP_MULTIPLE_TABLES
53 
54 struct fib_table *ip_fib_local_table;
55 struct fib_table *ip_fib_main_table;
56 
57 #define FIB_TABLE_HASHSZ 1
58 static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
59 
60 #else
61 
62 #define FIB_TABLE_HASHSZ 256
63 static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
64 
65 struct fib_table *fib_new_table(u32 id)
66 {
67 	struct fib_table *tb;
68 	unsigned int h;
69 
70 	if (id == 0)
71 		id = RT_TABLE_MAIN;
72 	tb = fib_get_table(id);
73 	if (tb)
74 		return tb;
75 	tb = fib_hash_init(id);
76 	if (!tb)
77 		return NULL;
78 	h = id & (FIB_TABLE_HASHSZ - 1);
79 	hlist_add_head_rcu(&tb->tb_hlist, &fib_table_hash[h]);
80 	return tb;
81 }
82 
83 struct fib_table *fib_get_table(u32 id)
84 {
85 	struct fib_table *tb;
86 	struct hlist_node *node;
87 	unsigned int h;
88 
89 	if (id == 0)
90 		id = RT_TABLE_MAIN;
91 	h = id & (FIB_TABLE_HASHSZ - 1);
92 	rcu_read_lock();
93 	hlist_for_each_entry_rcu(tb, node, &fib_table_hash[h], tb_hlist) {
94 		if (tb->tb_id == id) {
95 			rcu_read_unlock();
96 			return tb;
97 		}
98 	}
99 	rcu_read_unlock();
100 	return NULL;
101 }
102 #endif /* CONFIG_IP_MULTIPLE_TABLES */
103 
104 static void fib_flush(void)
105 {
106 	int flushed = 0;
107 	struct fib_table *tb;
108 	struct hlist_node *node;
109 	unsigned int h;
110 
111 	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
112 		hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist)
113 			flushed += tb->tb_flush(tb);
114 	}
115 
116 	if (flushed)
117 		rt_cache_flush(-1);
118 }
119 
120 /*
121  *	Find the first device with a given source address.
122  */
123 
124 struct net_device * ip_dev_find(__be32 addr)
125 {
126 	struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
127 	struct fib_result res;
128 	struct net_device *dev = NULL;
129 
130 #ifdef CONFIG_IP_MULTIPLE_TABLES
131 	res.r = NULL;
132 #endif
133 
134 	if (!ip_fib_local_table ||
135 	    ip_fib_local_table->tb_lookup(ip_fib_local_table, &fl, &res))
136 		return NULL;
137 	if (res.type != RTN_LOCAL)
138 		goto out;
139 	dev = FIB_RES_DEV(res);
140 
141 	if (dev)
142 		dev_hold(dev);
143 out:
144 	fib_res_put(&res);
145 	return dev;
146 }
147 
148 unsigned inet_addr_type(__be32 addr)
149 {
150 	struct flowi		fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
151 	struct fib_result	res;
152 	unsigned ret = RTN_BROADCAST;
153 
154 	if (ZERONET(addr) || BADCLASS(addr))
155 		return RTN_BROADCAST;
156 	if (MULTICAST(addr))
157 		return RTN_MULTICAST;
158 
159 #ifdef CONFIG_IP_MULTIPLE_TABLES
160 	res.r = NULL;
161 #endif
162 
163 	if (ip_fib_local_table) {
164 		ret = RTN_UNICAST;
165 		if (!ip_fib_local_table->tb_lookup(ip_fib_local_table,
166 						   &fl, &res)) {
167 			ret = res.type;
168 			fib_res_put(&res);
169 		}
170 	}
171 	return ret;
172 }
173 
174 /* Given (packet source, input interface) and optional (dst, oif, tos):
175    - (main) check, that source is valid i.e. not broadcast or our local
176      address.
177    - figure out what "logical" interface this packet arrived
178      and calculate "specific destination" address.
179    - check, that packet arrived from expected physical interface.
180  */
181 
182 int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
183 			struct net_device *dev, __be32 *spec_dst, u32 *itag)
184 {
185 	struct in_device *in_dev;
186 	struct flowi fl = { .nl_u = { .ip4_u =
187 				      { .daddr = src,
188 					.saddr = dst,
189 					.tos = tos } },
190 			    .iif = oif };
191 	struct fib_result res;
192 	int no_addr, rpf;
193 	int ret;
194 
195 	no_addr = rpf = 0;
196 	rcu_read_lock();
197 	in_dev = __in_dev_get_rcu(dev);
198 	if (in_dev) {
199 		no_addr = in_dev->ifa_list == NULL;
200 		rpf = IN_DEV_RPFILTER(in_dev);
201 	}
202 	rcu_read_unlock();
203 
204 	if (in_dev == NULL)
205 		goto e_inval;
206 
207 	if (fib_lookup(&fl, &res))
208 		goto last_resort;
209 	if (res.type != RTN_UNICAST)
210 		goto e_inval_res;
211 	*spec_dst = FIB_RES_PREFSRC(res);
212 	fib_combine_itag(itag, &res);
213 #ifdef CONFIG_IP_ROUTE_MULTIPATH
214 	if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
215 #else
216 	if (FIB_RES_DEV(res) == dev)
217 #endif
218 	{
219 		ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
220 		fib_res_put(&res);
221 		return ret;
222 	}
223 	fib_res_put(&res);
224 	if (no_addr)
225 		goto last_resort;
226 	if (rpf)
227 		goto e_inval;
228 	fl.oif = dev->ifindex;
229 
230 	ret = 0;
231 	if (fib_lookup(&fl, &res) == 0) {
232 		if (res.type == RTN_UNICAST) {
233 			*spec_dst = FIB_RES_PREFSRC(res);
234 			ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
235 		}
236 		fib_res_put(&res);
237 	}
238 	return ret;
239 
240 last_resort:
241 	if (rpf)
242 		goto e_inval;
243 	*spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
244 	*itag = 0;
245 	return 0;
246 
247 e_inval_res:
248 	fib_res_put(&res);
249 e_inval:
250 	return -EINVAL;
251 }
252 
253 static inline __be32 sk_extract_addr(struct sockaddr *addr)
254 {
255 	return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
256 }
257 
258 static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
259 {
260 	struct nlattr *nla;
261 
262 	nla = (struct nlattr *) ((char *) mx + len);
263 	nla->nla_type = type;
264 	nla->nla_len = nla_attr_size(4);
265 	*(u32 *) nla_data(nla) = value;
266 
267 	return len + nla_total_size(4);
268 }
269 
270 static int rtentry_to_fib_config(int cmd, struct rtentry *rt,
271 				 struct fib_config *cfg)
272 {
273 	__be32 addr;
274 	int plen;
275 
276 	memset(cfg, 0, sizeof(*cfg));
277 
278 	if (rt->rt_dst.sa_family != AF_INET)
279 		return -EAFNOSUPPORT;
280 
281 	/*
282 	 * Check mask for validity:
283 	 * a) it must be contiguous.
284 	 * b) destination must have all host bits clear.
285 	 * c) if application forgot to set correct family (AF_INET),
286 	 *    reject request unless it is absolutely clear i.e.
287 	 *    both family and mask are zero.
288 	 */
289 	plen = 32;
290 	addr = sk_extract_addr(&rt->rt_dst);
291 	if (!(rt->rt_flags & RTF_HOST)) {
292 		__be32 mask = sk_extract_addr(&rt->rt_genmask);
293 
294 		if (rt->rt_genmask.sa_family != AF_INET) {
295 			if (mask || rt->rt_genmask.sa_family)
296 				return -EAFNOSUPPORT;
297 		}
298 
299 		if (bad_mask(mask, addr))
300 			return -EINVAL;
301 
302 		plen = inet_mask_len(mask);
303 	}
304 
305 	cfg->fc_dst_len = plen;
306 	cfg->fc_dst = addr;
307 
308 	if (cmd != SIOCDELRT) {
309 		cfg->fc_nlflags = NLM_F_CREATE;
310 		cfg->fc_protocol = RTPROT_BOOT;
311 	}
312 
313 	if (rt->rt_metric)
314 		cfg->fc_priority = rt->rt_metric - 1;
315 
316 	if (rt->rt_flags & RTF_REJECT) {
317 		cfg->fc_scope = RT_SCOPE_HOST;
318 		cfg->fc_type = RTN_UNREACHABLE;
319 		return 0;
320 	}
321 
322 	cfg->fc_scope = RT_SCOPE_NOWHERE;
323 	cfg->fc_type = RTN_UNICAST;
324 
325 	if (rt->rt_dev) {
326 		char *colon;
327 		struct net_device *dev;
328 		char devname[IFNAMSIZ];
329 
330 		if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
331 			return -EFAULT;
332 
333 		devname[IFNAMSIZ-1] = 0;
334 		colon = strchr(devname, ':');
335 		if (colon)
336 			*colon = 0;
337 		dev = __dev_get_by_name(devname);
338 		if (!dev)
339 			return -ENODEV;
340 		cfg->fc_oif = dev->ifindex;
341 		if (colon) {
342 			struct in_ifaddr *ifa;
343 			struct in_device *in_dev = __in_dev_get_rtnl(dev);
344 			if (!in_dev)
345 				return -ENODEV;
346 			*colon = ':';
347 			for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
348 				if (strcmp(ifa->ifa_label, devname) == 0)
349 					break;
350 			if (ifa == NULL)
351 				return -ENODEV;
352 			cfg->fc_prefsrc = ifa->ifa_local;
353 		}
354 	}
355 
356 	addr = sk_extract_addr(&rt->rt_gateway);
357 	if (rt->rt_gateway.sa_family == AF_INET && addr) {
358 		cfg->fc_gw = addr;
359 		if (rt->rt_flags & RTF_GATEWAY &&
360 		    inet_addr_type(addr) == RTN_UNICAST)
361 			cfg->fc_scope = RT_SCOPE_UNIVERSE;
362 	}
363 
364 	if (cmd == SIOCDELRT)
365 		return 0;
366 
367 	if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
368 		return -EINVAL;
369 
370 	if (cfg->fc_scope == RT_SCOPE_NOWHERE)
371 		cfg->fc_scope = RT_SCOPE_LINK;
372 
373 	if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
374 		struct nlattr *mx;
375 		int len = 0;
376 
377 		mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
378 		if (mx == NULL)
379 			return -ENOMEM;
380 
381 		if (rt->rt_flags & RTF_MTU)
382 			len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
383 
384 		if (rt->rt_flags & RTF_WINDOW)
385 			len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
386 
387 		if (rt->rt_flags & RTF_IRTT)
388 			len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
389 
390 		cfg->fc_mx = mx;
391 		cfg->fc_mx_len = len;
392 	}
393 
394 	return 0;
395 }
396 
397 /*
398  *	Handle IP routing ioctl calls. These are used to manipulate the routing tables
399  */
400 
401 int ip_rt_ioctl(unsigned int cmd, void __user *arg)
402 {
403 	struct fib_config cfg;
404 	struct rtentry rt;
405 	int err;
406 
407 	switch (cmd) {
408 	case SIOCADDRT:		/* Add a route */
409 	case SIOCDELRT:		/* Delete a route */
410 		if (!capable(CAP_NET_ADMIN))
411 			return -EPERM;
412 
413 		if (copy_from_user(&rt, arg, sizeof(rt)))
414 			return -EFAULT;
415 
416 		rtnl_lock();
417 		err = rtentry_to_fib_config(cmd, &rt, &cfg);
418 		if (err == 0) {
419 			struct fib_table *tb;
420 
421 			if (cmd == SIOCDELRT) {
422 				tb = fib_get_table(cfg.fc_table);
423 				if (tb)
424 					err = tb->tb_delete(tb, &cfg);
425 				else
426 					err = -ESRCH;
427 			} else {
428 				tb = fib_new_table(cfg.fc_table);
429 				if (tb)
430 					err = tb->tb_insert(tb, &cfg);
431 				else
432 					err = -ENOBUFS;
433 			}
434 
435 			/* allocated by rtentry_to_fib_config() */
436 			kfree(cfg.fc_mx);
437 		}
438 		rtnl_unlock();
439 		return err;
440 	}
441 	return -EINVAL;
442 }
443 
444 const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
445 	[RTA_DST]		= { .type = NLA_U32 },
446 	[RTA_SRC]		= { .type = NLA_U32 },
447 	[RTA_IIF]		= { .type = NLA_U32 },
448 	[RTA_OIF]		= { .type = NLA_U32 },
449 	[RTA_GATEWAY]		= { .type = NLA_U32 },
450 	[RTA_PRIORITY]		= { .type = NLA_U32 },
451 	[RTA_PREFSRC]		= { .type = NLA_U32 },
452 	[RTA_METRICS]		= { .type = NLA_NESTED },
453 	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
454 	[RTA_PROTOINFO]		= { .type = NLA_U32 },
455 	[RTA_FLOW]		= { .type = NLA_U32 },
456 };
457 
458 static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh,
459 			     struct fib_config *cfg)
460 {
461 	struct nlattr *attr;
462 	int err, remaining;
463 	struct rtmsg *rtm;
464 
465 	err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
466 	if (err < 0)
467 		goto errout;
468 
469 	memset(cfg, 0, sizeof(*cfg));
470 
471 	rtm = nlmsg_data(nlh);
472 	cfg->fc_dst_len = rtm->rtm_dst_len;
473 	cfg->fc_tos = rtm->rtm_tos;
474 	cfg->fc_table = rtm->rtm_table;
475 	cfg->fc_protocol = rtm->rtm_protocol;
476 	cfg->fc_scope = rtm->rtm_scope;
477 	cfg->fc_type = rtm->rtm_type;
478 	cfg->fc_flags = rtm->rtm_flags;
479 	cfg->fc_nlflags = nlh->nlmsg_flags;
480 
481 	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
482 	cfg->fc_nlinfo.nlh = nlh;
483 
484 	if (cfg->fc_type > RTN_MAX) {
485 		err = -EINVAL;
486 		goto errout;
487 	}
488 
489 	nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
490 		switch (attr->nla_type) {
491 		case RTA_DST:
492 			cfg->fc_dst = nla_get_be32(attr);
493 			break;
494 		case RTA_OIF:
495 			cfg->fc_oif = nla_get_u32(attr);
496 			break;
497 		case RTA_GATEWAY:
498 			cfg->fc_gw = nla_get_be32(attr);
499 			break;
500 		case RTA_PRIORITY:
501 			cfg->fc_priority = nla_get_u32(attr);
502 			break;
503 		case RTA_PREFSRC:
504 			cfg->fc_prefsrc = nla_get_be32(attr);
505 			break;
506 		case RTA_METRICS:
507 			cfg->fc_mx = nla_data(attr);
508 			cfg->fc_mx_len = nla_len(attr);
509 			break;
510 		case RTA_MULTIPATH:
511 			cfg->fc_mp = nla_data(attr);
512 			cfg->fc_mp_len = nla_len(attr);
513 			break;
514 		case RTA_FLOW:
515 			cfg->fc_flow = nla_get_u32(attr);
516 			break;
517 		case RTA_TABLE:
518 			cfg->fc_table = nla_get_u32(attr);
519 			break;
520 		}
521 	}
522 
523 	return 0;
524 errout:
525 	return err;
526 }
527 
528 static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
529 {
530 	struct fib_config cfg;
531 	struct fib_table *tb;
532 	int err;
533 
534 	err = rtm_to_fib_config(skb, nlh, &cfg);
535 	if (err < 0)
536 		goto errout;
537 
538 	tb = fib_get_table(cfg.fc_table);
539 	if (tb == NULL) {
540 		err = -ESRCH;
541 		goto errout;
542 	}
543 
544 	err = tb->tb_delete(tb, &cfg);
545 errout:
546 	return err;
547 }
548 
549 static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
550 {
551 	struct fib_config cfg;
552 	struct fib_table *tb;
553 	int err;
554 
555 	err = rtm_to_fib_config(skb, nlh, &cfg);
556 	if (err < 0)
557 		goto errout;
558 
559 	tb = fib_new_table(cfg.fc_table);
560 	if (tb == NULL) {
561 		err = -ENOBUFS;
562 		goto errout;
563 	}
564 
565 	err = tb->tb_insert(tb, &cfg);
566 errout:
567 	return err;
568 }
569 
570 static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
571 {
572 	unsigned int h, s_h;
573 	unsigned int e = 0, s_e;
574 	struct fib_table *tb;
575 	struct hlist_node *node;
576 	int dumped = 0;
577 
578 	if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
579 	    ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
580 		return ip_rt_dump(skb, cb);
581 
582 	s_h = cb->args[0];
583 	s_e = cb->args[1];
584 
585 	for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
586 		e = 0;
587 		hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist) {
588 			if (e < s_e)
589 				goto next;
590 			if (dumped)
591 				memset(&cb->args[2], 0, sizeof(cb->args) -
592 						 2 * sizeof(cb->args[0]));
593 			if (tb->tb_dump(tb, skb, cb) < 0)
594 				goto out;
595 			dumped = 1;
596 next:
597 			e++;
598 		}
599 	}
600 out:
601 	cb->args[1] = e;
602 	cb->args[0] = h;
603 
604 	return skb->len;
605 }
606 
607 /* Prepare and feed intra-kernel routing request.
608    Really, it should be netlink message, but :-( netlink
609    can be not configured, so that we feed it directly
610    to fib engine. It is legal, because all events occur
611    only when netlink is already locked.
612  */
613 
614 static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
615 {
616 	struct fib_table *tb;
617 	struct fib_config cfg = {
618 		.fc_protocol = RTPROT_KERNEL,
619 		.fc_type = type,
620 		.fc_dst = dst,
621 		.fc_dst_len = dst_len,
622 		.fc_prefsrc = ifa->ifa_local,
623 		.fc_oif = ifa->ifa_dev->dev->ifindex,
624 		.fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
625 	};
626 
627 	if (type == RTN_UNICAST)
628 		tb = fib_new_table(RT_TABLE_MAIN);
629 	else
630 		tb = fib_new_table(RT_TABLE_LOCAL);
631 
632 	if (tb == NULL)
633 		return;
634 
635 	cfg.fc_table = tb->tb_id;
636 
637 	if (type != RTN_LOCAL)
638 		cfg.fc_scope = RT_SCOPE_LINK;
639 	else
640 		cfg.fc_scope = RT_SCOPE_HOST;
641 
642 	if (cmd == RTM_NEWROUTE)
643 		tb->tb_insert(tb, &cfg);
644 	else
645 		tb->tb_delete(tb, &cfg);
646 }
647 
648 void fib_add_ifaddr(struct in_ifaddr *ifa)
649 {
650 	struct in_device *in_dev = ifa->ifa_dev;
651 	struct net_device *dev = in_dev->dev;
652 	struct in_ifaddr *prim = ifa;
653 	__be32 mask = ifa->ifa_mask;
654 	__be32 addr = ifa->ifa_local;
655 	__be32 prefix = ifa->ifa_address&mask;
656 
657 	if (ifa->ifa_flags&IFA_F_SECONDARY) {
658 		prim = inet_ifa_byprefix(in_dev, prefix, mask);
659 		if (prim == NULL) {
660 			printk(KERN_DEBUG "fib_add_ifaddr: bug: prim == NULL\n");
661 			return;
662 		}
663 	}
664 
665 	fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
666 
667 	if (!(dev->flags&IFF_UP))
668 		return;
669 
670 	/* Add broadcast address, if it is explicitly assigned. */
671 	if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
672 		fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
673 
674 	if (!ZERONET(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
675 	    (prefix != addr || ifa->ifa_prefixlen < 32)) {
676 		fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
677 			  RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
678 
679 		/* Add network specific broadcasts, when it takes a sense */
680 		if (ifa->ifa_prefixlen < 31) {
681 			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
682 			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
683 		}
684 	}
685 }
686 
687 static void fib_del_ifaddr(struct in_ifaddr *ifa)
688 {
689 	struct in_device *in_dev = ifa->ifa_dev;
690 	struct net_device *dev = in_dev->dev;
691 	struct in_ifaddr *ifa1;
692 	struct in_ifaddr *prim = ifa;
693 	__be32 brd = ifa->ifa_address|~ifa->ifa_mask;
694 	__be32 any = ifa->ifa_address&ifa->ifa_mask;
695 #define LOCAL_OK	1
696 #define BRD_OK		2
697 #define BRD0_OK		4
698 #define BRD1_OK		8
699 	unsigned ok = 0;
700 
701 	if (!(ifa->ifa_flags&IFA_F_SECONDARY))
702 		fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
703 			  RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
704 	else {
705 		prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
706 		if (prim == NULL) {
707 			printk(KERN_DEBUG "fib_del_ifaddr: bug: prim == NULL\n");
708 			return;
709 		}
710 	}
711 
712 	/* Deletion is more complicated than add.
713 	   We should take care of not to delete too much :-)
714 
715 	   Scan address list to be sure that addresses are really gone.
716 	 */
717 
718 	for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
719 		if (ifa->ifa_local == ifa1->ifa_local)
720 			ok |= LOCAL_OK;
721 		if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
722 			ok |= BRD_OK;
723 		if (brd == ifa1->ifa_broadcast)
724 			ok |= BRD1_OK;
725 		if (any == ifa1->ifa_broadcast)
726 			ok |= BRD0_OK;
727 	}
728 
729 	if (!(ok&BRD_OK))
730 		fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
731 	if (!(ok&BRD1_OK))
732 		fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
733 	if (!(ok&BRD0_OK))
734 		fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
735 	if (!(ok&LOCAL_OK)) {
736 		fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
737 
738 		/* Check, that this local address finally disappeared. */
739 		if (inet_addr_type(ifa->ifa_local) != RTN_LOCAL) {
740 			/* And the last, but not the least thing.
741 			   We must flush stray FIB entries.
742 
743 			   First of all, we scan fib_info list searching
744 			   for stray nexthop entries, then ignite fib_flush.
745 			*/
746 			if (fib_sync_down(ifa->ifa_local, NULL, 0))
747 				fib_flush();
748 		}
749 	}
750 #undef LOCAL_OK
751 #undef BRD_OK
752 #undef BRD0_OK
753 #undef BRD1_OK
754 }
755 
756 static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
757 {
758 
759 	struct fib_result       res;
760 	struct flowi            fl = { .mark = frn->fl_mark,
761 				       .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
762 							    .tos = frn->fl_tos,
763 							    .scope = frn->fl_scope } } };
764 
765 #ifdef CONFIG_IP_MULTIPLE_TABLES
766 	res.r = NULL;
767 #endif
768 
769 	frn->err = -ENOENT;
770 	if (tb) {
771 		local_bh_disable();
772 
773 		frn->tb_id = tb->tb_id;
774 		frn->err = tb->tb_lookup(tb, &fl, &res);
775 
776 		if (!frn->err) {
777 			frn->prefixlen = res.prefixlen;
778 			frn->nh_sel = res.nh_sel;
779 			frn->type = res.type;
780 			frn->scope = res.scope;
781 			fib_res_put(&res);
782 		}
783 		local_bh_enable();
784 	}
785 }
786 
787 static void nl_fib_input(struct sock *sk, int len)
788 {
789 	struct sk_buff *skb = NULL;
790 	struct nlmsghdr *nlh = NULL;
791 	struct fib_result_nl *frn;
792 	u32 pid;
793 	struct fib_table *tb;
794 
795 	skb = skb_dequeue(&sk->sk_receive_queue);
796 	if (skb == NULL)
797 		return;
798 
799 	nlh = nlmsg_hdr(skb);
800 	if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
801 	    nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn))) {
802 		kfree_skb(skb);
803 		return;
804 	}
805 
806 	frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
807 	tb = fib_get_table(frn->tb_id_in);
808 
809 	nl_fib_lookup(frn, tb);
810 
811 	pid = NETLINK_CB(skb).pid;       /* pid of sending process */
812 	NETLINK_CB(skb).pid = 0;         /* from kernel */
813 	NETLINK_CB(skb).dst_group = 0;  /* unicast */
814 	netlink_unicast(sk, skb, pid, MSG_DONTWAIT);
815 }
816 
817 static void nl_fib_lookup_init(void)
818 {
819       netlink_kernel_create(NETLINK_FIB_LOOKUP, 0, nl_fib_input, NULL,
820 			    THIS_MODULE);
821 }
822 
823 static void fib_disable_ip(struct net_device *dev, int force)
824 {
825 	if (fib_sync_down(0, dev, force))
826 		fib_flush();
827 	rt_cache_flush(0);
828 	arp_ifdown(dev);
829 }
830 
831 static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
832 {
833 	struct in_ifaddr *ifa = (struct in_ifaddr*)ptr;
834 
835 	switch (event) {
836 	case NETDEV_UP:
837 		fib_add_ifaddr(ifa);
838 #ifdef CONFIG_IP_ROUTE_MULTIPATH
839 		fib_sync_up(ifa->ifa_dev->dev);
840 #endif
841 		rt_cache_flush(-1);
842 		break;
843 	case NETDEV_DOWN:
844 		fib_del_ifaddr(ifa);
845 		if (ifa->ifa_dev->ifa_list == NULL) {
846 			/* Last address was deleted from this interface.
847 			   Disable IP.
848 			 */
849 			fib_disable_ip(ifa->ifa_dev->dev, 1);
850 		} else {
851 			rt_cache_flush(-1);
852 		}
853 		break;
854 	}
855 	return NOTIFY_DONE;
856 }
857 
858 static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
859 {
860 	struct net_device *dev = ptr;
861 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
862 
863 	if (event == NETDEV_UNREGISTER) {
864 		fib_disable_ip(dev, 2);
865 		return NOTIFY_DONE;
866 	}
867 
868 	if (!in_dev)
869 		return NOTIFY_DONE;
870 
871 	switch (event) {
872 	case NETDEV_UP:
873 		for_ifa(in_dev) {
874 			fib_add_ifaddr(ifa);
875 		} endfor_ifa(in_dev);
876 #ifdef CONFIG_IP_ROUTE_MULTIPATH
877 		fib_sync_up(dev);
878 #endif
879 		rt_cache_flush(-1);
880 		break;
881 	case NETDEV_DOWN:
882 		fib_disable_ip(dev, 0);
883 		break;
884 	case NETDEV_CHANGEMTU:
885 	case NETDEV_CHANGE:
886 		rt_cache_flush(0);
887 		break;
888 	}
889 	return NOTIFY_DONE;
890 }
891 
892 static struct notifier_block fib_inetaddr_notifier = {
893 	.notifier_call =fib_inetaddr_event,
894 };
895 
896 static struct notifier_block fib_netdev_notifier = {
897 	.notifier_call =fib_netdev_event,
898 };
899 
900 void __init ip_fib_init(void)
901 {
902 	unsigned int i;
903 
904 	for (i = 0; i < FIB_TABLE_HASHSZ; i++)
905 		INIT_HLIST_HEAD(&fib_table_hash[i]);
906 #ifndef CONFIG_IP_MULTIPLE_TABLES
907 	ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL);
908 	hlist_add_head_rcu(&ip_fib_local_table->tb_hlist, &fib_table_hash[0]);
909 	ip_fib_main_table  = fib_hash_init(RT_TABLE_MAIN);
910 	hlist_add_head_rcu(&ip_fib_main_table->tb_hlist, &fib_table_hash[0]);
911 #else
912 	fib4_rules_init();
913 #endif
914 
915 	register_netdevice_notifier(&fib_netdev_notifier);
916 	register_inetaddr_notifier(&fib_inetaddr_notifier);
917 	nl_fib_lookup_init();
918 
919 	rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
920 	rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
921 	rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
922 }
923 
924 EXPORT_SYMBOL(inet_addr_type);
925 EXPORT_SYMBOL(ip_dev_find);
926