xref: /openbmc/linux/net/ipv4/fib_frontend.c (revision 96de0e252cedffad61b3cb5e05662c591898e69a)
1 /*
2  * INET		An implementation of the TCP/IP protocol suite for the LINUX
3  *		operating system.  INET is implemented using the  BSD Socket
4  *		interface as the means of communication with the user level.
5  *
6  *		IPv4 Forwarding Information Base: FIB frontend.
7  *
8  * Version:	$Id: fib_frontend.c,v 1.26 2001/10/31 21:55:54 davem Exp $
9  *
10  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11  *
12  *		This program is free software; you can redistribute it and/or
13  *		modify it under the terms of the GNU General Public License
14  *		as published by the Free Software Foundation; either version
15  *		2 of the License, or (at your option) any later version.
16  */
17 
18 #include <linux/module.h>
19 #include <asm/uaccess.h>
20 #include <asm/system.h>
21 #include <linux/bitops.h>
22 #include <linux/capability.h>
23 #include <linux/types.h>
24 #include <linux/kernel.h>
25 #include <linux/mm.h>
26 #include <linux/string.h>
27 #include <linux/socket.h>
28 #include <linux/sockios.h>
29 #include <linux/errno.h>
30 #include <linux/in.h>
31 #include <linux/inet.h>
32 #include <linux/inetdevice.h>
33 #include <linux/netdevice.h>
34 #include <linux/if_addr.h>
35 #include <linux/if_arp.h>
36 #include <linux/skbuff.h>
37 #include <linux/init.h>
38 #include <linux/list.h>
39 
40 #include <net/ip.h>
41 #include <net/protocol.h>
42 #include <net/route.h>
43 #include <net/tcp.h>
44 #include <net/sock.h>
45 #include <net/icmp.h>
46 #include <net/arp.h>
47 #include <net/ip_fib.h>
48 #include <net/rtnetlink.h>
49 
50 #define FFprint(a...) printk(KERN_DEBUG a)
51 
52 static struct sock *fibnl;
53 
54 #ifndef CONFIG_IP_MULTIPLE_TABLES
55 
56 struct fib_table *ip_fib_local_table;
57 struct fib_table *ip_fib_main_table;
58 
59 #define FIB_TABLE_HASHSZ 1
60 static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
61 
62 #else
63 
64 #define FIB_TABLE_HASHSZ 256
65 static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
66 
67 struct fib_table *fib_new_table(u32 id)
68 {
69 	struct fib_table *tb;
70 	unsigned int h;
71 
72 	if (id == 0)
73 		id = RT_TABLE_MAIN;
74 	tb = fib_get_table(id);
75 	if (tb)
76 		return tb;
77 	tb = fib_hash_init(id);
78 	if (!tb)
79 		return NULL;
80 	h = id & (FIB_TABLE_HASHSZ - 1);
81 	hlist_add_head_rcu(&tb->tb_hlist, &fib_table_hash[h]);
82 	return tb;
83 }
84 
85 struct fib_table *fib_get_table(u32 id)
86 {
87 	struct fib_table *tb;
88 	struct hlist_node *node;
89 	unsigned int h;
90 
91 	if (id == 0)
92 		id = RT_TABLE_MAIN;
93 	h = id & (FIB_TABLE_HASHSZ - 1);
94 	rcu_read_lock();
95 	hlist_for_each_entry_rcu(tb, node, &fib_table_hash[h], tb_hlist) {
96 		if (tb->tb_id == id) {
97 			rcu_read_unlock();
98 			return tb;
99 		}
100 	}
101 	rcu_read_unlock();
102 	return NULL;
103 }
104 #endif /* CONFIG_IP_MULTIPLE_TABLES */
105 
106 static void fib_flush(void)
107 {
108 	int flushed = 0;
109 	struct fib_table *tb;
110 	struct hlist_node *node;
111 	unsigned int h;
112 
113 	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
114 		hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist)
115 			flushed += tb->tb_flush(tb);
116 	}
117 
118 	if (flushed)
119 		rt_cache_flush(-1);
120 }
121 
122 /*
123  *	Find the first device with a given source address.
124  */
125 
126 struct net_device * ip_dev_find(__be32 addr)
127 {
128 	struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
129 	struct fib_result res;
130 	struct net_device *dev = NULL;
131 
132 #ifdef CONFIG_IP_MULTIPLE_TABLES
133 	res.r = NULL;
134 #endif
135 
136 	if (!ip_fib_local_table ||
137 	    ip_fib_local_table->tb_lookup(ip_fib_local_table, &fl, &res))
138 		return NULL;
139 	if (res.type != RTN_LOCAL)
140 		goto out;
141 	dev = FIB_RES_DEV(res);
142 
143 	if (dev)
144 		dev_hold(dev);
145 out:
146 	fib_res_put(&res);
147 	return dev;
148 }
149 
150 unsigned inet_addr_type(__be32 addr)
151 {
152 	struct flowi		fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
153 	struct fib_result	res;
154 	unsigned ret = RTN_BROADCAST;
155 
156 	if (ZERONET(addr) || BADCLASS(addr))
157 		return RTN_BROADCAST;
158 	if (MULTICAST(addr))
159 		return RTN_MULTICAST;
160 
161 #ifdef CONFIG_IP_MULTIPLE_TABLES
162 	res.r = NULL;
163 #endif
164 
165 	if (ip_fib_local_table) {
166 		ret = RTN_UNICAST;
167 		if (!ip_fib_local_table->tb_lookup(ip_fib_local_table,
168 						   &fl, &res)) {
169 			ret = res.type;
170 			fib_res_put(&res);
171 		}
172 	}
173 	return ret;
174 }
175 
176 /* Given (packet source, input interface) and optional (dst, oif, tos):
177    - (main) check, that source is valid i.e. not broadcast or our local
178      address.
179    - figure out what "logical" interface this packet arrived
180      and calculate "specific destination" address.
181    - check, that packet arrived from expected physical interface.
182  */
183 
184 int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
185 			struct net_device *dev, __be32 *spec_dst, u32 *itag)
186 {
187 	struct in_device *in_dev;
188 	struct flowi fl = { .nl_u = { .ip4_u =
189 				      { .daddr = src,
190 					.saddr = dst,
191 					.tos = tos } },
192 			    .iif = oif };
193 	struct fib_result res;
194 	int no_addr, rpf;
195 	int ret;
196 
197 	no_addr = rpf = 0;
198 	rcu_read_lock();
199 	in_dev = __in_dev_get_rcu(dev);
200 	if (in_dev) {
201 		no_addr = in_dev->ifa_list == NULL;
202 		rpf = IN_DEV_RPFILTER(in_dev);
203 	}
204 	rcu_read_unlock();
205 
206 	if (in_dev == NULL)
207 		goto e_inval;
208 
209 	if (fib_lookup(&fl, &res))
210 		goto last_resort;
211 	if (res.type != RTN_UNICAST)
212 		goto e_inval_res;
213 	*spec_dst = FIB_RES_PREFSRC(res);
214 	fib_combine_itag(itag, &res);
215 #ifdef CONFIG_IP_ROUTE_MULTIPATH
216 	if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
217 #else
218 	if (FIB_RES_DEV(res) == dev)
219 #endif
220 	{
221 		ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
222 		fib_res_put(&res);
223 		return ret;
224 	}
225 	fib_res_put(&res);
226 	if (no_addr)
227 		goto last_resort;
228 	if (rpf)
229 		goto e_inval;
230 	fl.oif = dev->ifindex;
231 
232 	ret = 0;
233 	if (fib_lookup(&fl, &res) == 0) {
234 		if (res.type == RTN_UNICAST) {
235 			*spec_dst = FIB_RES_PREFSRC(res);
236 			ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
237 		}
238 		fib_res_put(&res);
239 	}
240 	return ret;
241 
242 last_resort:
243 	if (rpf)
244 		goto e_inval;
245 	*spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
246 	*itag = 0;
247 	return 0;
248 
249 e_inval_res:
250 	fib_res_put(&res);
251 e_inval:
252 	return -EINVAL;
253 }
254 
255 static inline __be32 sk_extract_addr(struct sockaddr *addr)
256 {
257 	return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
258 }
259 
260 static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
261 {
262 	struct nlattr *nla;
263 
264 	nla = (struct nlattr *) ((char *) mx + len);
265 	nla->nla_type = type;
266 	nla->nla_len = nla_attr_size(4);
267 	*(u32 *) nla_data(nla) = value;
268 
269 	return len + nla_total_size(4);
270 }
271 
272 static int rtentry_to_fib_config(int cmd, struct rtentry *rt,
273 				 struct fib_config *cfg)
274 {
275 	__be32 addr;
276 	int plen;
277 
278 	memset(cfg, 0, sizeof(*cfg));
279 
280 	if (rt->rt_dst.sa_family != AF_INET)
281 		return -EAFNOSUPPORT;
282 
283 	/*
284 	 * Check mask for validity:
285 	 * a) it must be contiguous.
286 	 * b) destination must have all host bits clear.
287 	 * c) if application forgot to set correct family (AF_INET),
288 	 *    reject request unless it is absolutely clear i.e.
289 	 *    both family and mask are zero.
290 	 */
291 	plen = 32;
292 	addr = sk_extract_addr(&rt->rt_dst);
293 	if (!(rt->rt_flags & RTF_HOST)) {
294 		__be32 mask = sk_extract_addr(&rt->rt_genmask);
295 
296 		if (rt->rt_genmask.sa_family != AF_INET) {
297 			if (mask || rt->rt_genmask.sa_family)
298 				return -EAFNOSUPPORT;
299 		}
300 
301 		if (bad_mask(mask, addr))
302 			return -EINVAL;
303 
304 		plen = inet_mask_len(mask);
305 	}
306 
307 	cfg->fc_dst_len = plen;
308 	cfg->fc_dst = addr;
309 
310 	if (cmd != SIOCDELRT) {
311 		cfg->fc_nlflags = NLM_F_CREATE;
312 		cfg->fc_protocol = RTPROT_BOOT;
313 	}
314 
315 	if (rt->rt_metric)
316 		cfg->fc_priority = rt->rt_metric - 1;
317 
318 	if (rt->rt_flags & RTF_REJECT) {
319 		cfg->fc_scope = RT_SCOPE_HOST;
320 		cfg->fc_type = RTN_UNREACHABLE;
321 		return 0;
322 	}
323 
324 	cfg->fc_scope = RT_SCOPE_NOWHERE;
325 	cfg->fc_type = RTN_UNICAST;
326 
327 	if (rt->rt_dev) {
328 		char *colon;
329 		struct net_device *dev;
330 		char devname[IFNAMSIZ];
331 
332 		if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
333 			return -EFAULT;
334 
335 		devname[IFNAMSIZ-1] = 0;
336 		colon = strchr(devname, ':');
337 		if (colon)
338 			*colon = 0;
339 		dev = __dev_get_by_name(&init_net, devname);
340 		if (!dev)
341 			return -ENODEV;
342 		cfg->fc_oif = dev->ifindex;
343 		if (colon) {
344 			struct in_ifaddr *ifa;
345 			struct in_device *in_dev = __in_dev_get_rtnl(dev);
346 			if (!in_dev)
347 				return -ENODEV;
348 			*colon = ':';
349 			for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
350 				if (strcmp(ifa->ifa_label, devname) == 0)
351 					break;
352 			if (ifa == NULL)
353 				return -ENODEV;
354 			cfg->fc_prefsrc = ifa->ifa_local;
355 		}
356 	}
357 
358 	addr = sk_extract_addr(&rt->rt_gateway);
359 	if (rt->rt_gateway.sa_family == AF_INET && addr) {
360 		cfg->fc_gw = addr;
361 		if (rt->rt_flags & RTF_GATEWAY &&
362 		    inet_addr_type(addr) == RTN_UNICAST)
363 			cfg->fc_scope = RT_SCOPE_UNIVERSE;
364 	}
365 
366 	if (cmd == SIOCDELRT)
367 		return 0;
368 
369 	if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
370 		return -EINVAL;
371 
372 	if (cfg->fc_scope == RT_SCOPE_NOWHERE)
373 		cfg->fc_scope = RT_SCOPE_LINK;
374 
375 	if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
376 		struct nlattr *mx;
377 		int len = 0;
378 
379 		mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
380 		if (mx == NULL)
381 			return -ENOMEM;
382 
383 		if (rt->rt_flags & RTF_MTU)
384 			len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
385 
386 		if (rt->rt_flags & RTF_WINDOW)
387 			len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
388 
389 		if (rt->rt_flags & RTF_IRTT)
390 			len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
391 
392 		cfg->fc_mx = mx;
393 		cfg->fc_mx_len = len;
394 	}
395 
396 	return 0;
397 }
398 
399 /*
400  *	Handle IP routing ioctl calls. These are used to manipulate the routing tables
401  */
402 
403 int ip_rt_ioctl(unsigned int cmd, void __user *arg)
404 {
405 	struct fib_config cfg;
406 	struct rtentry rt;
407 	int err;
408 
409 	switch (cmd) {
410 	case SIOCADDRT:		/* Add a route */
411 	case SIOCDELRT:		/* Delete a route */
412 		if (!capable(CAP_NET_ADMIN))
413 			return -EPERM;
414 
415 		if (copy_from_user(&rt, arg, sizeof(rt)))
416 			return -EFAULT;
417 
418 		rtnl_lock();
419 		err = rtentry_to_fib_config(cmd, &rt, &cfg);
420 		if (err == 0) {
421 			struct fib_table *tb;
422 
423 			if (cmd == SIOCDELRT) {
424 				tb = fib_get_table(cfg.fc_table);
425 				if (tb)
426 					err = tb->tb_delete(tb, &cfg);
427 				else
428 					err = -ESRCH;
429 			} else {
430 				tb = fib_new_table(cfg.fc_table);
431 				if (tb)
432 					err = tb->tb_insert(tb, &cfg);
433 				else
434 					err = -ENOBUFS;
435 			}
436 
437 			/* allocated by rtentry_to_fib_config() */
438 			kfree(cfg.fc_mx);
439 		}
440 		rtnl_unlock();
441 		return err;
442 	}
443 	return -EINVAL;
444 }
445 
446 const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
447 	[RTA_DST]		= { .type = NLA_U32 },
448 	[RTA_SRC]		= { .type = NLA_U32 },
449 	[RTA_IIF]		= { .type = NLA_U32 },
450 	[RTA_OIF]		= { .type = NLA_U32 },
451 	[RTA_GATEWAY]		= { .type = NLA_U32 },
452 	[RTA_PRIORITY]		= { .type = NLA_U32 },
453 	[RTA_PREFSRC]		= { .type = NLA_U32 },
454 	[RTA_METRICS]		= { .type = NLA_NESTED },
455 	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
456 	[RTA_PROTOINFO]		= { .type = NLA_U32 },
457 	[RTA_FLOW]		= { .type = NLA_U32 },
458 };
459 
460 static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh,
461 			     struct fib_config *cfg)
462 {
463 	struct nlattr *attr;
464 	int err, remaining;
465 	struct rtmsg *rtm;
466 
467 	err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
468 	if (err < 0)
469 		goto errout;
470 
471 	memset(cfg, 0, sizeof(*cfg));
472 
473 	rtm = nlmsg_data(nlh);
474 	cfg->fc_dst_len = rtm->rtm_dst_len;
475 	cfg->fc_tos = rtm->rtm_tos;
476 	cfg->fc_table = rtm->rtm_table;
477 	cfg->fc_protocol = rtm->rtm_protocol;
478 	cfg->fc_scope = rtm->rtm_scope;
479 	cfg->fc_type = rtm->rtm_type;
480 	cfg->fc_flags = rtm->rtm_flags;
481 	cfg->fc_nlflags = nlh->nlmsg_flags;
482 
483 	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
484 	cfg->fc_nlinfo.nlh = nlh;
485 
486 	if (cfg->fc_type > RTN_MAX) {
487 		err = -EINVAL;
488 		goto errout;
489 	}
490 
491 	nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
492 		switch (nla_type(attr)) {
493 		case RTA_DST:
494 			cfg->fc_dst = nla_get_be32(attr);
495 			break;
496 		case RTA_OIF:
497 			cfg->fc_oif = nla_get_u32(attr);
498 			break;
499 		case RTA_GATEWAY:
500 			cfg->fc_gw = nla_get_be32(attr);
501 			break;
502 		case RTA_PRIORITY:
503 			cfg->fc_priority = nla_get_u32(attr);
504 			break;
505 		case RTA_PREFSRC:
506 			cfg->fc_prefsrc = nla_get_be32(attr);
507 			break;
508 		case RTA_METRICS:
509 			cfg->fc_mx = nla_data(attr);
510 			cfg->fc_mx_len = nla_len(attr);
511 			break;
512 		case RTA_MULTIPATH:
513 			cfg->fc_mp = nla_data(attr);
514 			cfg->fc_mp_len = nla_len(attr);
515 			break;
516 		case RTA_FLOW:
517 			cfg->fc_flow = nla_get_u32(attr);
518 			break;
519 		case RTA_TABLE:
520 			cfg->fc_table = nla_get_u32(attr);
521 			break;
522 		}
523 	}
524 
525 	return 0;
526 errout:
527 	return err;
528 }
529 
530 static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
531 {
532 	struct fib_config cfg;
533 	struct fib_table *tb;
534 	int err;
535 
536 	err = rtm_to_fib_config(skb, nlh, &cfg);
537 	if (err < 0)
538 		goto errout;
539 
540 	tb = fib_get_table(cfg.fc_table);
541 	if (tb == NULL) {
542 		err = -ESRCH;
543 		goto errout;
544 	}
545 
546 	err = tb->tb_delete(tb, &cfg);
547 errout:
548 	return err;
549 }
550 
551 static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
552 {
553 	struct fib_config cfg;
554 	struct fib_table *tb;
555 	int err;
556 
557 	err = rtm_to_fib_config(skb, nlh, &cfg);
558 	if (err < 0)
559 		goto errout;
560 
561 	tb = fib_new_table(cfg.fc_table);
562 	if (tb == NULL) {
563 		err = -ENOBUFS;
564 		goto errout;
565 	}
566 
567 	err = tb->tb_insert(tb, &cfg);
568 errout:
569 	return err;
570 }
571 
572 static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
573 {
574 	unsigned int h, s_h;
575 	unsigned int e = 0, s_e;
576 	struct fib_table *tb;
577 	struct hlist_node *node;
578 	int dumped = 0;
579 
580 	if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
581 	    ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
582 		return ip_rt_dump(skb, cb);
583 
584 	s_h = cb->args[0];
585 	s_e = cb->args[1];
586 
587 	for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
588 		e = 0;
589 		hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist) {
590 			if (e < s_e)
591 				goto next;
592 			if (dumped)
593 				memset(&cb->args[2], 0, sizeof(cb->args) -
594 						 2 * sizeof(cb->args[0]));
595 			if (tb->tb_dump(tb, skb, cb) < 0)
596 				goto out;
597 			dumped = 1;
598 next:
599 			e++;
600 		}
601 	}
602 out:
603 	cb->args[1] = e;
604 	cb->args[0] = h;
605 
606 	return skb->len;
607 }
608 
609 /* Prepare and feed intra-kernel routing request.
610    Really, it should be netlink message, but :-( netlink
611    can be not configured, so that we feed it directly
612    to fib engine. It is legal, because all events occur
613    only when netlink is already locked.
614  */
615 
616 static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
617 {
618 	struct fib_table *tb;
619 	struct fib_config cfg = {
620 		.fc_protocol = RTPROT_KERNEL,
621 		.fc_type = type,
622 		.fc_dst = dst,
623 		.fc_dst_len = dst_len,
624 		.fc_prefsrc = ifa->ifa_local,
625 		.fc_oif = ifa->ifa_dev->dev->ifindex,
626 		.fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
627 	};
628 
629 	if (type == RTN_UNICAST)
630 		tb = fib_new_table(RT_TABLE_MAIN);
631 	else
632 		tb = fib_new_table(RT_TABLE_LOCAL);
633 
634 	if (tb == NULL)
635 		return;
636 
637 	cfg.fc_table = tb->tb_id;
638 
639 	if (type != RTN_LOCAL)
640 		cfg.fc_scope = RT_SCOPE_LINK;
641 	else
642 		cfg.fc_scope = RT_SCOPE_HOST;
643 
644 	if (cmd == RTM_NEWROUTE)
645 		tb->tb_insert(tb, &cfg);
646 	else
647 		tb->tb_delete(tb, &cfg);
648 }
649 
650 void fib_add_ifaddr(struct in_ifaddr *ifa)
651 {
652 	struct in_device *in_dev = ifa->ifa_dev;
653 	struct net_device *dev = in_dev->dev;
654 	struct in_ifaddr *prim = ifa;
655 	__be32 mask = ifa->ifa_mask;
656 	__be32 addr = ifa->ifa_local;
657 	__be32 prefix = ifa->ifa_address&mask;
658 
659 	if (ifa->ifa_flags&IFA_F_SECONDARY) {
660 		prim = inet_ifa_byprefix(in_dev, prefix, mask);
661 		if (prim == NULL) {
662 			printk(KERN_DEBUG "fib_add_ifaddr: bug: prim == NULL\n");
663 			return;
664 		}
665 	}
666 
667 	fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
668 
669 	if (!(dev->flags&IFF_UP))
670 		return;
671 
672 	/* Add broadcast address, if it is explicitly assigned. */
673 	if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
674 		fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
675 
676 	if (!ZERONET(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
677 	    (prefix != addr || ifa->ifa_prefixlen < 32)) {
678 		fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
679 			  RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
680 
681 		/* Add network specific broadcasts, when it takes a sense */
682 		if (ifa->ifa_prefixlen < 31) {
683 			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
684 			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
685 		}
686 	}
687 }
688 
689 static void fib_del_ifaddr(struct in_ifaddr *ifa)
690 {
691 	struct in_device *in_dev = ifa->ifa_dev;
692 	struct net_device *dev = in_dev->dev;
693 	struct in_ifaddr *ifa1;
694 	struct in_ifaddr *prim = ifa;
695 	__be32 brd = ifa->ifa_address|~ifa->ifa_mask;
696 	__be32 any = ifa->ifa_address&ifa->ifa_mask;
697 #define LOCAL_OK	1
698 #define BRD_OK		2
699 #define BRD0_OK		4
700 #define BRD1_OK		8
701 	unsigned ok = 0;
702 
703 	if (!(ifa->ifa_flags&IFA_F_SECONDARY))
704 		fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
705 			  RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
706 	else {
707 		prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
708 		if (prim == NULL) {
709 			printk(KERN_DEBUG "fib_del_ifaddr: bug: prim == NULL\n");
710 			return;
711 		}
712 	}
713 
714 	/* Deletion is more complicated than add.
715 	   We should take care of not to delete too much :-)
716 
717 	   Scan address list to be sure that addresses are really gone.
718 	 */
719 
720 	for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
721 		if (ifa->ifa_local == ifa1->ifa_local)
722 			ok |= LOCAL_OK;
723 		if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
724 			ok |= BRD_OK;
725 		if (brd == ifa1->ifa_broadcast)
726 			ok |= BRD1_OK;
727 		if (any == ifa1->ifa_broadcast)
728 			ok |= BRD0_OK;
729 	}
730 
731 	if (!(ok&BRD_OK))
732 		fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
733 	if (!(ok&BRD1_OK))
734 		fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
735 	if (!(ok&BRD0_OK))
736 		fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
737 	if (!(ok&LOCAL_OK)) {
738 		fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
739 
740 		/* Check, that this local address finally disappeared. */
741 		if (inet_addr_type(ifa->ifa_local) != RTN_LOCAL) {
742 			/* And the last, but not the least thing.
743 			   We must flush stray FIB entries.
744 
745 			   First of all, we scan fib_info list searching
746 			   for stray nexthop entries, then ignite fib_flush.
747 			*/
748 			if (fib_sync_down(ifa->ifa_local, NULL, 0))
749 				fib_flush();
750 		}
751 	}
752 #undef LOCAL_OK
753 #undef BRD_OK
754 #undef BRD0_OK
755 #undef BRD1_OK
756 }
757 
758 static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
759 {
760 
761 	struct fib_result       res;
762 	struct flowi            fl = { .mark = frn->fl_mark,
763 				       .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
764 							    .tos = frn->fl_tos,
765 							    .scope = frn->fl_scope } } };
766 
767 #ifdef CONFIG_IP_MULTIPLE_TABLES
768 	res.r = NULL;
769 #endif
770 
771 	frn->err = -ENOENT;
772 	if (tb) {
773 		local_bh_disable();
774 
775 		frn->tb_id = tb->tb_id;
776 		frn->err = tb->tb_lookup(tb, &fl, &res);
777 
778 		if (!frn->err) {
779 			frn->prefixlen = res.prefixlen;
780 			frn->nh_sel = res.nh_sel;
781 			frn->type = res.type;
782 			frn->scope = res.scope;
783 			fib_res_put(&res);
784 		}
785 		local_bh_enable();
786 	}
787 }
788 
789 static void nl_fib_input(struct sk_buff *skb)
790 {
791 	struct fib_result_nl *frn;
792 	struct nlmsghdr *nlh;
793 	struct fib_table *tb;
794 	u32 pid;
795 
796 	nlh = nlmsg_hdr(skb);
797 	if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
798 	    nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn))) {
799 		kfree_skb(skb);
800 		return;
801 	}
802 
803 	frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
804 	tb = fib_get_table(frn->tb_id_in);
805 
806 	nl_fib_lookup(frn, tb);
807 
808 	pid = NETLINK_CB(skb).pid;       /* pid of sending process */
809 	NETLINK_CB(skb).pid = 0;         /* from kernel */
810 	NETLINK_CB(skb).dst_group = 0;  /* unicast */
811 	netlink_unicast(fibnl, skb, pid, MSG_DONTWAIT);
812 }
813 
814 static void nl_fib_lookup_init(void)
815 {
816 	fibnl = netlink_kernel_create(&init_net, NETLINK_FIB_LOOKUP, 0,
817 				      nl_fib_input, NULL, THIS_MODULE);
818 }
819 
820 static void fib_disable_ip(struct net_device *dev, int force)
821 {
822 	if (fib_sync_down(0, dev, force))
823 		fib_flush();
824 	rt_cache_flush(0);
825 	arp_ifdown(dev);
826 }
827 
828 static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
829 {
830 	struct in_ifaddr *ifa = (struct in_ifaddr*)ptr;
831 
832 	switch (event) {
833 	case NETDEV_UP:
834 		fib_add_ifaddr(ifa);
835 #ifdef CONFIG_IP_ROUTE_MULTIPATH
836 		fib_sync_up(ifa->ifa_dev->dev);
837 #endif
838 		rt_cache_flush(-1);
839 		break;
840 	case NETDEV_DOWN:
841 		fib_del_ifaddr(ifa);
842 		if (ifa->ifa_dev->ifa_list == NULL) {
843 			/* Last address was deleted from this interface.
844 			   Disable IP.
845 			 */
846 			fib_disable_ip(ifa->ifa_dev->dev, 1);
847 		} else {
848 			rt_cache_flush(-1);
849 		}
850 		break;
851 	}
852 	return NOTIFY_DONE;
853 }
854 
855 static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
856 {
857 	struct net_device *dev = ptr;
858 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
859 
860 	if (dev->nd_net != &init_net)
861 		return NOTIFY_DONE;
862 
863 	if (event == NETDEV_UNREGISTER) {
864 		fib_disable_ip(dev, 2);
865 		return NOTIFY_DONE;
866 	}
867 
868 	if (!in_dev)
869 		return NOTIFY_DONE;
870 
871 	switch (event) {
872 	case NETDEV_UP:
873 		for_ifa(in_dev) {
874 			fib_add_ifaddr(ifa);
875 		} endfor_ifa(in_dev);
876 #ifdef CONFIG_IP_ROUTE_MULTIPATH
877 		fib_sync_up(dev);
878 #endif
879 		rt_cache_flush(-1);
880 		break;
881 	case NETDEV_DOWN:
882 		fib_disable_ip(dev, 0);
883 		break;
884 	case NETDEV_CHANGEMTU:
885 	case NETDEV_CHANGE:
886 		rt_cache_flush(0);
887 		break;
888 	}
889 	return NOTIFY_DONE;
890 }
891 
892 static struct notifier_block fib_inetaddr_notifier = {
893 	.notifier_call =fib_inetaddr_event,
894 };
895 
896 static struct notifier_block fib_netdev_notifier = {
897 	.notifier_call =fib_netdev_event,
898 };
899 
900 void __init ip_fib_init(void)
901 {
902 	unsigned int i;
903 
904 	for (i = 0; i < FIB_TABLE_HASHSZ; i++)
905 		INIT_HLIST_HEAD(&fib_table_hash[i]);
906 #ifndef CONFIG_IP_MULTIPLE_TABLES
907 	ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL);
908 	hlist_add_head_rcu(&ip_fib_local_table->tb_hlist, &fib_table_hash[0]);
909 	ip_fib_main_table  = fib_hash_init(RT_TABLE_MAIN);
910 	hlist_add_head_rcu(&ip_fib_main_table->tb_hlist, &fib_table_hash[0]);
911 #else
912 	fib4_rules_init();
913 #endif
914 
915 	register_netdevice_notifier(&fib_netdev_notifier);
916 	register_inetaddr_notifier(&fib_inetaddr_notifier);
917 	nl_fib_lookup_init();
918 
919 	rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
920 	rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
921 	rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
922 }
923 
924 EXPORT_SYMBOL(inet_addr_type);
925 EXPORT_SYMBOL(ip_dev_find);
926