xref: /openbmc/linux/net/ipv4/fib_frontend.c (revision 64c70b1c)
1 /*
2  * INET		An implementation of the TCP/IP protocol suite for the LINUX
3  *		operating system.  INET is implemented using the  BSD Socket
4  *		interface as the means of communication with the user level.
5  *
6  *		IPv4 Forwarding Information Base: FIB frontend.
7  *
8  * Version:	$Id: fib_frontend.c,v 1.26 2001/10/31 21:55:54 davem Exp $
9  *
10  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11  *
12  *		This program is free software; you can redistribute it and/or
13  *		modify it under the terms of the GNU General Public License
14  *		as published by the Free Software Foundation; either version
15  *		2 of the License, or (at your option) any later version.
16  */
17 
18 #include <linux/module.h>
19 #include <asm/uaccess.h>
20 #include <asm/system.h>
21 #include <linux/bitops.h>
22 #include <linux/capability.h>
23 #include <linux/types.h>
24 #include <linux/kernel.h>
25 #include <linux/mm.h>
26 #include <linux/string.h>
27 #include <linux/socket.h>
28 #include <linux/sockios.h>
29 #include <linux/errno.h>
30 #include <linux/in.h>
31 #include <linux/inet.h>
32 #include <linux/inetdevice.h>
33 #include <linux/netdevice.h>
34 #include <linux/if_addr.h>
35 #include <linux/if_arp.h>
36 #include <linux/skbuff.h>
37 #include <linux/init.h>
38 #include <linux/list.h>
39 
40 #include <net/ip.h>
41 #include <net/protocol.h>
42 #include <net/route.h>
43 #include <net/tcp.h>
44 #include <net/sock.h>
45 #include <net/icmp.h>
46 #include <net/arp.h>
47 #include <net/ip_fib.h>
48 #include <net/rtnetlink.h>
49 
50 #define FFprint(a...) printk(KERN_DEBUG a)
51 
52 #ifndef CONFIG_IP_MULTIPLE_TABLES
53 
54 struct fib_table *ip_fib_local_table;
55 struct fib_table *ip_fib_main_table;
56 
57 #define FIB_TABLE_HASHSZ 1
58 static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
59 
60 #else
61 
62 #define FIB_TABLE_HASHSZ 256
63 static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
64 
65 struct fib_table *fib_new_table(u32 id)
66 {
67 	struct fib_table *tb;
68 	unsigned int h;
69 
70 	if (id == 0)
71 		id = RT_TABLE_MAIN;
72 	tb = fib_get_table(id);
73 	if (tb)
74 		return tb;
75 	tb = fib_hash_init(id);
76 	if (!tb)
77 		return NULL;
78 	h = id & (FIB_TABLE_HASHSZ - 1);
79 	hlist_add_head_rcu(&tb->tb_hlist, &fib_table_hash[h]);
80 	return tb;
81 }
82 
83 struct fib_table *fib_get_table(u32 id)
84 {
85 	struct fib_table *tb;
86 	struct hlist_node *node;
87 	unsigned int h;
88 
89 	if (id == 0)
90 		id = RT_TABLE_MAIN;
91 	h = id & (FIB_TABLE_HASHSZ - 1);
92 	rcu_read_lock();
93 	hlist_for_each_entry_rcu(tb, node, &fib_table_hash[h], tb_hlist) {
94 		if (tb->tb_id == id) {
95 			rcu_read_unlock();
96 			return tb;
97 		}
98 	}
99 	rcu_read_unlock();
100 	return NULL;
101 }
102 #endif /* CONFIG_IP_MULTIPLE_TABLES */
103 
104 static void fib_flush(void)
105 {
106 	int flushed = 0;
107 	struct fib_table *tb;
108 	struct hlist_node *node;
109 	unsigned int h;
110 
111 	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
112 		hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist)
113 			flushed += tb->tb_flush(tb);
114 	}
115 
116 	if (flushed)
117 		rt_cache_flush(-1);
118 }
119 
120 /*
121  *	Find the first device with a given source address.
122  */
123 
124 struct net_device * ip_dev_find(__be32 addr)
125 {
126 	struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
127 	struct fib_result res;
128 	struct net_device *dev = NULL;
129 
130 #ifdef CONFIG_IP_MULTIPLE_TABLES
131 	res.r = NULL;
132 #endif
133 
134 	if (!ip_fib_local_table ||
135 	    ip_fib_local_table->tb_lookup(ip_fib_local_table, &fl, &res))
136 		return NULL;
137 	if (res.type != RTN_LOCAL)
138 		goto out;
139 	dev = FIB_RES_DEV(res);
140 
141 	if (dev)
142 		dev_hold(dev);
143 out:
144 	fib_res_put(&res);
145 	return dev;
146 }
147 
148 unsigned inet_addr_type(__be32 addr)
149 {
150 	struct flowi		fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
151 	struct fib_result	res;
152 	unsigned ret = RTN_BROADCAST;
153 
154 	if (ZERONET(addr) || BADCLASS(addr))
155 		return RTN_BROADCAST;
156 	if (MULTICAST(addr))
157 		return RTN_MULTICAST;
158 
159 #ifdef CONFIG_IP_MULTIPLE_TABLES
160 	res.r = NULL;
161 #endif
162 
163 	if (ip_fib_local_table) {
164 		ret = RTN_UNICAST;
165 		if (!ip_fib_local_table->tb_lookup(ip_fib_local_table,
166 						   &fl, &res)) {
167 			ret = res.type;
168 			fib_res_put(&res);
169 		}
170 	}
171 	return ret;
172 }
173 
174 /* Given (packet source, input interface) and optional (dst, oif, tos):
175    - (main) check, that source is valid i.e. not broadcast or our local
176      address.
177    - figure out what "logical" interface this packet arrived
178      and calculate "specific destination" address.
179    - check, that packet arrived from expected physical interface.
180  */
181 
182 int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
183 			struct net_device *dev, __be32 *spec_dst, u32 *itag)
184 {
185 	struct in_device *in_dev;
186 	struct flowi fl = { .nl_u = { .ip4_u =
187 				      { .daddr = src,
188 					.saddr = dst,
189 					.tos = tos } },
190 			    .iif = oif };
191 	struct fib_result res;
192 	int no_addr, rpf;
193 	int ret;
194 
195 	no_addr = rpf = 0;
196 	rcu_read_lock();
197 	in_dev = __in_dev_get_rcu(dev);
198 	if (in_dev) {
199 		no_addr = in_dev->ifa_list == NULL;
200 		rpf = IN_DEV_RPFILTER(in_dev);
201 	}
202 	rcu_read_unlock();
203 
204 	if (in_dev == NULL)
205 		goto e_inval;
206 
207 	if (fib_lookup(&fl, &res))
208 		goto last_resort;
209 	if (res.type != RTN_UNICAST)
210 		goto e_inval_res;
211 	*spec_dst = FIB_RES_PREFSRC(res);
212 	fib_combine_itag(itag, &res);
213 #ifdef CONFIG_IP_ROUTE_MULTIPATH
214 	if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
215 #else
216 	if (FIB_RES_DEV(res) == dev)
217 #endif
218 	{
219 		ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
220 		fib_res_put(&res);
221 		return ret;
222 	}
223 	fib_res_put(&res);
224 	if (no_addr)
225 		goto last_resort;
226 	if (rpf)
227 		goto e_inval;
228 	fl.oif = dev->ifindex;
229 
230 	ret = 0;
231 	if (fib_lookup(&fl, &res) == 0) {
232 		if (res.type == RTN_UNICAST) {
233 			*spec_dst = FIB_RES_PREFSRC(res);
234 			ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
235 		}
236 		fib_res_put(&res);
237 	}
238 	return ret;
239 
240 last_resort:
241 	if (rpf)
242 		goto e_inval;
243 	*spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
244 	*itag = 0;
245 	return 0;
246 
247 e_inval_res:
248 	fib_res_put(&res);
249 e_inval:
250 	return -EINVAL;
251 }
252 
253 static inline __be32 sk_extract_addr(struct sockaddr *addr)
254 {
255 	return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
256 }
257 
258 static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
259 {
260 	struct nlattr *nla;
261 
262 	nla = (struct nlattr *) ((char *) mx + len);
263 	nla->nla_type = type;
264 	nla->nla_len = nla_attr_size(4);
265 	*(u32 *) nla_data(nla) = value;
266 
267 	return len + nla_total_size(4);
268 }
269 
270 static int rtentry_to_fib_config(int cmd, struct rtentry *rt,
271 				 struct fib_config *cfg)
272 {
273 	__be32 addr;
274 	int plen;
275 
276 	memset(cfg, 0, sizeof(*cfg));
277 
278 	if (rt->rt_dst.sa_family != AF_INET)
279 		return -EAFNOSUPPORT;
280 
281 	/*
282 	 * Check mask for validity:
283 	 * a) it must be contiguous.
284 	 * b) destination must have all host bits clear.
285 	 * c) if application forgot to set correct family (AF_INET),
286 	 *    reject request unless it is absolutely clear i.e.
287 	 *    both family and mask are zero.
288 	 */
289 	plen = 32;
290 	addr = sk_extract_addr(&rt->rt_dst);
291 	if (!(rt->rt_flags & RTF_HOST)) {
292 		__be32 mask = sk_extract_addr(&rt->rt_genmask);
293 
294 		if (rt->rt_genmask.sa_family != AF_INET) {
295 			if (mask || rt->rt_genmask.sa_family)
296 				return -EAFNOSUPPORT;
297 		}
298 
299 		if (bad_mask(mask, addr))
300 			return -EINVAL;
301 
302 		plen = inet_mask_len(mask);
303 	}
304 
305 	cfg->fc_dst_len = plen;
306 	cfg->fc_dst = addr;
307 
308 	if (cmd != SIOCDELRT) {
309 		cfg->fc_nlflags = NLM_F_CREATE;
310 		cfg->fc_protocol = RTPROT_BOOT;
311 	}
312 
313 	if (rt->rt_metric)
314 		cfg->fc_priority = rt->rt_metric - 1;
315 
316 	if (rt->rt_flags & RTF_REJECT) {
317 		cfg->fc_scope = RT_SCOPE_HOST;
318 		cfg->fc_type = RTN_UNREACHABLE;
319 		return 0;
320 	}
321 
322 	cfg->fc_scope = RT_SCOPE_NOWHERE;
323 	cfg->fc_type = RTN_UNICAST;
324 
325 	if (rt->rt_dev) {
326 		char *colon;
327 		struct net_device *dev;
328 		char devname[IFNAMSIZ];
329 
330 		if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
331 			return -EFAULT;
332 
333 		devname[IFNAMSIZ-1] = 0;
334 		colon = strchr(devname, ':');
335 		if (colon)
336 			*colon = 0;
337 		dev = __dev_get_by_name(devname);
338 		if (!dev)
339 			return -ENODEV;
340 		cfg->fc_oif = dev->ifindex;
341 		if (colon) {
342 			struct in_ifaddr *ifa;
343 			struct in_device *in_dev = __in_dev_get_rtnl(dev);
344 			if (!in_dev)
345 				return -ENODEV;
346 			*colon = ':';
347 			for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
348 				if (strcmp(ifa->ifa_label, devname) == 0)
349 					break;
350 			if (ifa == NULL)
351 				return -ENODEV;
352 			cfg->fc_prefsrc = ifa->ifa_local;
353 		}
354 	}
355 
356 	addr = sk_extract_addr(&rt->rt_gateway);
357 	if (rt->rt_gateway.sa_family == AF_INET && addr) {
358 		cfg->fc_gw = addr;
359 		if (rt->rt_flags & RTF_GATEWAY &&
360 		    inet_addr_type(addr) == RTN_UNICAST)
361 			cfg->fc_scope = RT_SCOPE_UNIVERSE;
362 	}
363 
364 	if (cmd == SIOCDELRT)
365 		return 0;
366 
367 	if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
368 		return -EINVAL;
369 
370 	if (cfg->fc_scope == RT_SCOPE_NOWHERE)
371 		cfg->fc_scope = RT_SCOPE_LINK;
372 
373 	if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
374 		struct nlattr *mx;
375 		int len = 0;
376 
377 		mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
378 		if (mx == NULL)
379 			return -ENOMEM;
380 
381 		if (rt->rt_flags & RTF_MTU)
382 			len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
383 
384 		if (rt->rt_flags & RTF_WINDOW)
385 			len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
386 
387 		if (rt->rt_flags & RTF_IRTT)
388 			len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
389 
390 		cfg->fc_mx = mx;
391 		cfg->fc_mx_len = len;
392 	}
393 
394 	return 0;
395 }
396 
397 /*
398  *	Handle IP routing ioctl calls. These are used to manipulate the routing tables
399  */
400 
401 int ip_rt_ioctl(unsigned int cmd, void __user *arg)
402 {
403 	struct fib_config cfg;
404 	struct rtentry rt;
405 	int err;
406 
407 	switch (cmd) {
408 	case SIOCADDRT:		/* Add a route */
409 	case SIOCDELRT:		/* Delete a route */
410 		if (!capable(CAP_NET_ADMIN))
411 			return -EPERM;
412 
413 		if (copy_from_user(&rt, arg, sizeof(rt)))
414 			return -EFAULT;
415 
416 		rtnl_lock();
417 		err = rtentry_to_fib_config(cmd, &rt, &cfg);
418 		if (err == 0) {
419 			struct fib_table *tb;
420 
421 			if (cmd == SIOCDELRT) {
422 				tb = fib_get_table(cfg.fc_table);
423 				if (tb)
424 					err = tb->tb_delete(tb, &cfg);
425 				else
426 					err = -ESRCH;
427 			} else {
428 				tb = fib_new_table(cfg.fc_table);
429 				if (tb)
430 					err = tb->tb_insert(tb, &cfg);
431 				else
432 					err = -ENOBUFS;
433 			}
434 
435 			/* allocated by rtentry_to_fib_config() */
436 			kfree(cfg.fc_mx);
437 		}
438 		rtnl_unlock();
439 		return err;
440 	}
441 	return -EINVAL;
442 }
443 
444 const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
445 	[RTA_DST]		= { .type = NLA_U32 },
446 	[RTA_SRC]		= { .type = NLA_U32 },
447 	[RTA_IIF]		= { .type = NLA_U32 },
448 	[RTA_OIF]		= { .type = NLA_U32 },
449 	[RTA_GATEWAY]		= { .type = NLA_U32 },
450 	[RTA_PRIORITY]		= { .type = NLA_U32 },
451 	[RTA_PREFSRC]		= { .type = NLA_U32 },
452 	[RTA_METRICS]		= { .type = NLA_NESTED },
453 	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
454 	[RTA_PROTOINFO]		= { .type = NLA_U32 },
455 	[RTA_FLOW]		= { .type = NLA_U32 },
456 	[RTA_MP_ALGO]		= { .type = NLA_U32 },
457 };
458 
459 static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh,
460 			     struct fib_config *cfg)
461 {
462 	struct nlattr *attr;
463 	int err, remaining;
464 	struct rtmsg *rtm;
465 
466 	err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
467 	if (err < 0)
468 		goto errout;
469 
470 	memset(cfg, 0, sizeof(*cfg));
471 
472 	rtm = nlmsg_data(nlh);
473 	cfg->fc_dst_len = rtm->rtm_dst_len;
474 	cfg->fc_tos = rtm->rtm_tos;
475 	cfg->fc_table = rtm->rtm_table;
476 	cfg->fc_protocol = rtm->rtm_protocol;
477 	cfg->fc_scope = rtm->rtm_scope;
478 	cfg->fc_type = rtm->rtm_type;
479 	cfg->fc_flags = rtm->rtm_flags;
480 	cfg->fc_nlflags = nlh->nlmsg_flags;
481 
482 	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
483 	cfg->fc_nlinfo.nlh = nlh;
484 
485 	if (cfg->fc_type > RTN_MAX) {
486 		err = -EINVAL;
487 		goto errout;
488 	}
489 
490 	nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
491 		switch (attr->nla_type) {
492 		case RTA_DST:
493 			cfg->fc_dst = nla_get_be32(attr);
494 			break;
495 		case RTA_OIF:
496 			cfg->fc_oif = nla_get_u32(attr);
497 			break;
498 		case RTA_GATEWAY:
499 			cfg->fc_gw = nla_get_be32(attr);
500 			break;
501 		case RTA_PRIORITY:
502 			cfg->fc_priority = nla_get_u32(attr);
503 			break;
504 		case RTA_PREFSRC:
505 			cfg->fc_prefsrc = nla_get_be32(attr);
506 			break;
507 		case RTA_METRICS:
508 			cfg->fc_mx = nla_data(attr);
509 			cfg->fc_mx_len = nla_len(attr);
510 			break;
511 		case RTA_MULTIPATH:
512 			cfg->fc_mp = nla_data(attr);
513 			cfg->fc_mp_len = nla_len(attr);
514 			break;
515 		case RTA_FLOW:
516 			cfg->fc_flow = nla_get_u32(attr);
517 			break;
518 		case RTA_MP_ALGO:
519 			cfg->fc_mp_alg = nla_get_u32(attr);
520 			break;
521 		case RTA_TABLE:
522 			cfg->fc_table = nla_get_u32(attr);
523 			break;
524 		}
525 	}
526 
527 	return 0;
528 errout:
529 	return err;
530 }
531 
532 static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
533 {
534 	struct fib_config cfg;
535 	struct fib_table *tb;
536 	int err;
537 
538 	err = rtm_to_fib_config(skb, nlh, &cfg);
539 	if (err < 0)
540 		goto errout;
541 
542 	tb = fib_get_table(cfg.fc_table);
543 	if (tb == NULL) {
544 		err = -ESRCH;
545 		goto errout;
546 	}
547 
548 	err = tb->tb_delete(tb, &cfg);
549 errout:
550 	return err;
551 }
552 
553 static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
554 {
555 	struct fib_config cfg;
556 	struct fib_table *tb;
557 	int err;
558 
559 	err = rtm_to_fib_config(skb, nlh, &cfg);
560 	if (err < 0)
561 		goto errout;
562 
563 	tb = fib_new_table(cfg.fc_table);
564 	if (tb == NULL) {
565 		err = -ENOBUFS;
566 		goto errout;
567 	}
568 
569 	err = tb->tb_insert(tb, &cfg);
570 errout:
571 	return err;
572 }
573 
574 static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
575 {
576 	unsigned int h, s_h;
577 	unsigned int e = 0, s_e;
578 	struct fib_table *tb;
579 	struct hlist_node *node;
580 	int dumped = 0;
581 
582 	if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
583 	    ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
584 		return ip_rt_dump(skb, cb);
585 
586 	s_h = cb->args[0];
587 	s_e = cb->args[1];
588 
589 	for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
590 		e = 0;
591 		hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist) {
592 			if (e < s_e)
593 				goto next;
594 			if (dumped)
595 				memset(&cb->args[2], 0, sizeof(cb->args) -
596 						 2 * sizeof(cb->args[0]));
597 			if (tb->tb_dump(tb, skb, cb) < 0)
598 				goto out;
599 			dumped = 1;
600 next:
601 			e++;
602 		}
603 	}
604 out:
605 	cb->args[1] = e;
606 	cb->args[0] = h;
607 
608 	return skb->len;
609 }
610 
611 /* Prepare and feed intra-kernel routing request.
612    Really, it should be netlink message, but :-( netlink
613    can be not configured, so that we feed it directly
614    to fib engine. It is legal, because all events occur
615    only when netlink is already locked.
616  */
617 
618 static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
619 {
620 	struct fib_table *tb;
621 	struct fib_config cfg = {
622 		.fc_protocol = RTPROT_KERNEL,
623 		.fc_type = type,
624 		.fc_dst = dst,
625 		.fc_dst_len = dst_len,
626 		.fc_prefsrc = ifa->ifa_local,
627 		.fc_oif = ifa->ifa_dev->dev->ifindex,
628 		.fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
629 	};
630 
631 	if (type == RTN_UNICAST)
632 		tb = fib_new_table(RT_TABLE_MAIN);
633 	else
634 		tb = fib_new_table(RT_TABLE_LOCAL);
635 
636 	if (tb == NULL)
637 		return;
638 
639 	cfg.fc_table = tb->tb_id;
640 
641 	if (type != RTN_LOCAL)
642 		cfg.fc_scope = RT_SCOPE_LINK;
643 	else
644 		cfg.fc_scope = RT_SCOPE_HOST;
645 
646 	if (cmd == RTM_NEWROUTE)
647 		tb->tb_insert(tb, &cfg);
648 	else
649 		tb->tb_delete(tb, &cfg);
650 }
651 
652 void fib_add_ifaddr(struct in_ifaddr *ifa)
653 {
654 	struct in_device *in_dev = ifa->ifa_dev;
655 	struct net_device *dev = in_dev->dev;
656 	struct in_ifaddr *prim = ifa;
657 	__be32 mask = ifa->ifa_mask;
658 	__be32 addr = ifa->ifa_local;
659 	__be32 prefix = ifa->ifa_address&mask;
660 
661 	if (ifa->ifa_flags&IFA_F_SECONDARY) {
662 		prim = inet_ifa_byprefix(in_dev, prefix, mask);
663 		if (prim == NULL) {
664 			printk(KERN_DEBUG "fib_add_ifaddr: bug: prim == NULL\n");
665 			return;
666 		}
667 	}
668 
669 	fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
670 
671 	if (!(dev->flags&IFF_UP))
672 		return;
673 
674 	/* Add broadcast address, if it is explicitly assigned. */
675 	if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
676 		fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
677 
678 	if (!ZERONET(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
679 	    (prefix != addr || ifa->ifa_prefixlen < 32)) {
680 		fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
681 			  RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
682 
683 		/* Add network specific broadcasts, when it takes a sense */
684 		if (ifa->ifa_prefixlen < 31) {
685 			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
686 			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
687 		}
688 	}
689 }
690 
691 static void fib_del_ifaddr(struct in_ifaddr *ifa)
692 {
693 	struct in_device *in_dev = ifa->ifa_dev;
694 	struct net_device *dev = in_dev->dev;
695 	struct in_ifaddr *ifa1;
696 	struct in_ifaddr *prim = ifa;
697 	__be32 brd = ifa->ifa_address|~ifa->ifa_mask;
698 	__be32 any = ifa->ifa_address&ifa->ifa_mask;
699 #define LOCAL_OK	1
700 #define BRD_OK		2
701 #define BRD0_OK		4
702 #define BRD1_OK		8
703 	unsigned ok = 0;
704 
705 	if (!(ifa->ifa_flags&IFA_F_SECONDARY))
706 		fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
707 			  RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
708 	else {
709 		prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
710 		if (prim == NULL) {
711 			printk(KERN_DEBUG "fib_del_ifaddr: bug: prim == NULL\n");
712 			return;
713 		}
714 	}
715 
716 	/* Deletion is more complicated than add.
717 	   We should take care of not to delete too much :-)
718 
719 	   Scan address list to be sure that addresses are really gone.
720 	 */
721 
722 	for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
723 		if (ifa->ifa_local == ifa1->ifa_local)
724 			ok |= LOCAL_OK;
725 		if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
726 			ok |= BRD_OK;
727 		if (brd == ifa1->ifa_broadcast)
728 			ok |= BRD1_OK;
729 		if (any == ifa1->ifa_broadcast)
730 			ok |= BRD0_OK;
731 	}
732 
733 	if (!(ok&BRD_OK))
734 		fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
735 	if (!(ok&BRD1_OK))
736 		fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
737 	if (!(ok&BRD0_OK))
738 		fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
739 	if (!(ok&LOCAL_OK)) {
740 		fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
741 
742 		/* Check, that this local address finally disappeared. */
743 		if (inet_addr_type(ifa->ifa_local) != RTN_LOCAL) {
744 			/* And the last, but not the least thing.
745 			   We must flush stray FIB entries.
746 
747 			   First of all, we scan fib_info list searching
748 			   for stray nexthop entries, then ignite fib_flush.
749 			*/
750 			if (fib_sync_down(ifa->ifa_local, NULL, 0))
751 				fib_flush();
752 		}
753 	}
754 #undef LOCAL_OK
755 #undef BRD_OK
756 #undef BRD0_OK
757 #undef BRD1_OK
758 }
759 
760 static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
761 {
762 
763 	struct fib_result       res;
764 	struct flowi            fl = { .mark = frn->fl_mark,
765 				       .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
766 							    .tos = frn->fl_tos,
767 							    .scope = frn->fl_scope } } };
768 
769 #ifdef CONFIG_IP_MULTIPLE_TABLES
770 	res.r = NULL;
771 #endif
772 
773 	frn->err = -ENOENT;
774 	if (tb) {
775 		local_bh_disable();
776 
777 		frn->tb_id = tb->tb_id;
778 		frn->err = tb->tb_lookup(tb, &fl, &res);
779 
780 		if (!frn->err) {
781 			frn->prefixlen = res.prefixlen;
782 			frn->nh_sel = res.nh_sel;
783 			frn->type = res.type;
784 			frn->scope = res.scope;
785 			fib_res_put(&res);
786 		}
787 		local_bh_enable();
788 	}
789 }
790 
791 static void nl_fib_input(struct sock *sk, int len)
792 {
793 	struct sk_buff *skb = NULL;
794 	struct nlmsghdr *nlh = NULL;
795 	struct fib_result_nl *frn;
796 	u32 pid;
797 	struct fib_table *tb;
798 
799 	skb = skb_dequeue(&sk->sk_receive_queue);
800 	if (skb == NULL)
801 		return;
802 
803 	nlh = nlmsg_hdr(skb);
804 	if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
805 	    nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn))) {
806 		kfree_skb(skb);
807 		return;
808 	}
809 
810 	frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
811 	tb = fib_get_table(frn->tb_id_in);
812 
813 	nl_fib_lookup(frn, tb);
814 
815 	pid = NETLINK_CB(skb).pid;       /* pid of sending process */
816 	NETLINK_CB(skb).pid = 0;         /* from kernel */
817 	NETLINK_CB(skb).dst_group = 0;  /* unicast */
818 	netlink_unicast(sk, skb, pid, MSG_DONTWAIT);
819 }
820 
821 static void nl_fib_lookup_init(void)
822 {
823       netlink_kernel_create(NETLINK_FIB_LOOKUP, 0, nl_fib_input, NULL,
824       			    THIS_MODULE);
825 }
826 
827 static void fib_disable_ip(struct net_device *dev, int force)
828 {
829 	if (fib_sync_down(0, dev, force))
830 		fib_flush();
831 	rt_cache_flush(0);
832 	arp_ifdown(dev);
833 }
834 
835 static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
836 {
837 	struct in_ifaddr *ifa = (struct in_ifaddr*)ptr;
838 
839 	switch (event) {
840 	case NETDEV_UP:
841 		fib_add_ifaddr(ifa);
842 #ifdef CONFIG_IP_ROUTE_MULTIPATH
843 		fib_sync_up(ifa->ifa_dev->dev);
844 #endif
845 		rt_cache_flush(-1);
846 		break;
847 	case NETDEV_DOWN:
848 		fib_del_ifaddr(ifa);
849 		if (ifa->ifa_dev->ifa_list == NULL) {
850 			/* Last address was deleted from this interface.
851 			   Disable IP.
852 			 */
853 			fib_disable_ip(ifa->ifa_dev->dev, 1);
854 		} else {
855 			rt_cache_flush(-1);
856 		}
857 		break;
858 	}
859 	return NOTIFY_DONE;
860 }
861 
862 static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
863 {
864 	struct net_device *dev = ptr;
865 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
866 
867 	if (event == NETDEV_UNREGISTER) {
868 		fib_disable_ip(dev, 2);
869 		return NOTIFY_DONE;
870 	}
871 
872 	if (!in_dev)
873 		return NOTIFY_DONE;
874 
875 	switch (event) {
876 	case NETDEV_UP:
877 		for_ifa(in_dev) {
878 			fib_add_ifaddr(ifa);
879 		} endfor_ifa(in_dev);
880 #ifdef CONFIG_IP_ROUTE_MULTIPATH
881 		fib_sync_up(dev);
882 #endif
883 		rt_cache_flush(-1);
884 		break;
885 	case NETDEV_DOWN:
886 		fib_disable_ip(dev, 0);
887 		break;
888 	case NETDEV_CHANGEMTU:
889 	case NETDEV_CHANGE:
890 		rt_cache_flush(0);
891 		break;
892 	}
893 	return NOTIFY_DONE;
894 }
895 
896 static struct notifier_block fib_inetaddr_notifier = {
897 	.notifier_call =fib_inetaddr_event,
898 };
899 
900 static struct notifier_block fib_netdev_notifier = {
901 	.notifier_call =fib_netdev_event,
902 };
903 
904 void __init ip_fib_init(void)
905 {
906 	unsigned int i;
907 
908 	for (i = 0; i < FIB_TABLE_HASHSZ; i++)
909 		INIT_HLIST_HEAD(&fib_table_hash[i]);
910 #ifndef CONFIG_IP_MULTIPLE_TABLES
911 	ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL);
912 	hlist_add_head_rcu(&ip_fib_local_table->tb_hlist, &fib_table_hash[0]);
913 	ip_fib_main_table  = fib_hash_init(RT_TABLE_MAIN);
914 	hlist_add_head_rcu(&ip_fib_main_table->tb_hlist, &fib_table_hash[0]);
915 #else
916 	fib4_rules_init();
917 #endif
918 
919 	register_netdevice_notifier(&fib_netdev_notifier);
920 	register_inetaddr_notifier(&fib_inetaddr_notifier);
921 	nl_fib_lookup_init();
922 
923 	rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
924 	rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
925 	rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
926 }
927 
928 EXPORT_SYMBOL(inet_addr_type);
929 EXPORT_SYMBOL(ip_dev_find);
930