xref: /openbmc/linux/net/ipv4/fib_frontend.c (revision f42b3800)
1 /*
2  * INET		An implementation of the TCP/IP protocol suite for the LINUX
3  *		operating system.  INET is implemented using the  BSD Socket
4  *		interface as the means of communication with the user level.
5  *
6  *		IPv4 Forwarding Information Base: FIB frontend.
7  *
8  * Version:	$Id: fib_frontend.c,v 1.26 2001/10/31 21:55:54 davem Exp $
9  *
10  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11  *
12  *		This program is free software; you can redistribute it and/or
13  *		modify it under the terms of the GNU General Public License
14  *		as published by the Free Software Foundation; either version
15  *		2 of the License, or (at your option) any later version.
16  */
17 
18 #include <linux/module.h>
19 #include <asm/uaccess.h>
20 #include <asm/system.h>
21 #include <linux/bitops.h>
22 #include <linux/capability.h>
23 #include <linux/types.h>
24 #include <linux/kernel.h>
25 #include <linux/mm.h>
26 #include <linux/string.h>
27 #include <linux/socket.h>
28 #include <linux/sockios.h>
29 #include <linux/errno.h>
30 #include <linux/in.h>
31 #include <linux/inet.h>
32 #include <linux/inetdevice.h>
33 #include <linux/netdevice.h>
34 #include <linux/if_addr.h>
35 #include <linux/if_arp.h>
36 #include <linux/skbuff.h>
37 #include <linux/init.h>
38 #include <linux/list.h>
39 
40 #include <net/ip.h>
41 #include <net/protocol.h>
42 #include <net/route.h>
43 #include <net/tcp.h>
44 #include <net/sock.h>
45 #include <net/icmp.h>
46 #include <net/arp.h>
47 #include <net/ip_fib.h>
48 #include <net/rtnetlink.h>
49 
50 #ifndef CONFIG_IP_MULTIPLE_TABLES
51 
52 static int __net_init fib4_rules_init(struct net *net)
53 {
54 	struct fib_table *local_table, *main_table;
55 
56 	local_table = fib_hash_table(RT_TABLE_LOCAL);
57 	if (local_table == NULL)
58 		return -ENOMEM;
59 
60 	main_table  = fib_hash_table(RT_TABLE_MAIN);
61 	if (main_table == NULL)
62 		goto fail;
63 
64 	hlist_add_head_rcu(&local_table->tb_hlist,
65 				&net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]);
66 	hlist_add_head_rcu(&main_table->tb_hlist,
67 				&net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]);
68 	return 0;
69 
70 fail:
71 	kfree(local_table);
72 	return -ENOMEM;
73 }
74 #else
75 
76 struct fib_table *fib_new_table(struct net *net, u32 id)
77 {
78 	struct fib_table *tb;
79 	unsigned int h;
80 
81 	if (id == 0)
82 		id = RT_TABLE_MAIN;
83 	tb = fib_get_table(net, id);
84 	if (tb)
85 		return tb;
86 
87 	tb = fib_hash_table(id);
88 	if (!tb)
89 		return NULL;
90 	h = id & (FIB_TABLE_HASHSZ - 1);
91 	hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]);
92 	return tb;
93 }
94 
95 struct fib_table *fib_get_table(struct net *net, u32 id)
96 {
97 	struct fib_table *tb;
98 	struct hlist_node *node;
99 	struct hlist_head *head;
100 	unsigned int h;
101 
102 	if (id == 0)
103 		id = RT_TABLE_MAIN;
104 	h = id & (FIB_TABLE_HASHSZ - 1);
105 
106 	rcu_read_lock();
107 	head = &net->ipv4.fib_table_hash[h];
108 	hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
109 		if (tb->tb_id == id) {
110 			rcu_read_unlock();
111 			return tb;
112 		}
113 	}
114 	rcu_read_unlock();
115 	return NULL;
116 }
117 #endif /* CONFIG_IP_MULTIPLE_TABLES */
118 
119 void fib_select_default(struct net *net,
120 			const struct flowi *flp, struct fib_result *res)
121 {
122 	struct fib_table *tb;
123 	int table = RT_TABLE_MAIN;
124 #ifdef CONFIG_IP_MULTIPLE_TABLES
125 	if (res->r == NULL || res->r->action != FR_ACT_TO_TBL)
126 		return;
127 	table = res->r->table;
128 #endif
129 	tb = fib_get_table(net, table);
130 	if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
131 		tb->tb_select_default(tb, flp, res);
132 }
133 
134 static void fib_flush(struct net *net)
135 {
136 	int flushed = 0;
137 	struct fib_table *tb;
138 	struct hlist_node *node;
139 	struct hlist_head *head;
140 	unsigned int h;
141 
142 	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
143 		head = &net->ipv4.fib_table_hash[h];
144 		hlist_for_each_entry(tb, node, head, tb_hlist)
145 			flushed += tb->tb_flush(tb);
146 	}
147 
148 	if (flushed)
149 		rt_cache_flush(-1);
150 }
151 
152 /*
153  *	Find the first device with a given source address.
154  */
155 
156 struct net_device * ip_dev_find(struct net *net, __be32 addr)
157 {
158 	struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
159 	struct fib_result res;
160 	struct net_device *dev = NULL;
161 	struct fib_table *local_table;
162 
163 #ifdef CONFIG_IP_MULTIPLE_TABLES
164 	res.r = NULL;
165 #endif
166 
167 	local_table = fib_get_table(net, RT_TABLE_LOCAL);
168 	if (!local_table || local_table->tb_lookup(local_table, &fl, &res))
169 		return NULL;
170 	if (res.type != RTN_LOCAL)
171 		goto out;
172 	dev = FIB_RES_DEV(res);
173 
174 	if (dev)
175 		dev_hold(dev);
176 out:
177 	fib_res_put(&res);
178 	return dev;
179 }
180 
181 /*
182  * Find address type as if only "dev" was present in the system. If
183  * on_dev is NULL then all interfaces are taken into consideration.
184  */
185 static inline unsigned __inet_dev_addr_type(struct net *net,
186 					    const struct net_device *dev,
187 					    __be32 addr)
188 {
189 	struct flowi		fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
190 	struct fib_result	res;
191 	unsigned ret = RTN_BROADCAST;
192 	struct fib_table *local_table;
193 
194 	if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
195 		return RTN_BROADCAST;
196 	if (ipv4_is_multicast(addr))
197 		return RTN_MULTICAST;
198 
199 #ifdef CONFIG_IP_MULTIPLE_TABLES
200 	res.r = NULL;
201 #endif
202 
203 	local_table = fib_get_table(net, RT_TABLE_LOCAL);
204 	if (local_table) {
205 		ret = RTN_UNICAST;
206 		if (!local_table->tb_lookup(local_table, &fl, &res)) {
207 			if (!dev || dev == res.fi->fib_dev)
208 				ret = res.type;
209 			fib_res_put(&res);
210 		}
211 	}
212 	return ret;
213 }
214 
215 unsigned int inet_addr_type(struct net *net, __be32 addr)
216 {
217 	return __inet_dev_addr_type(net, NULL, addr);
218 }
219 
220 unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
221 				__be32 addr)
222 {
223        return __inet_dev_addr_type(net, dev, addr);
224 }
225 
226 /* Given (packet source, input interface) and optional (dst, oif, tos):
227    - (main) check, that source is valid i.e. not broadcast or our local
228      address.
229    - figure out what "logical" interface this packet arrived
230      and calculate "specific destination" address.
231    - check, that packet arrived from expected physical interface.
232  */
233 
234 int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
235 			struct net_device *dev, __be32 *spec_dst, u32 *itag)
236 {
237 	struct in_device *in_dev;
238 	struct flowi fl = { .nl_u = { .ip4_u =
239 				      { .daddr = src,
240 					.saddr = dst,
241 					.tos = tos } },
242 			    .iif = oif };
243 	struct fib_result res;
244 	int no_addr, rpf;
245 	int ret;
246 	struct net *net;
247 
248 	no_addr = rpf = 0;
249 	rcu_read_lock();
250 	in_dev = __in_dev_get_rcu(dev);
251 	if (in_dev) {
252 		no_addr = in_dev->ifa_list == NULL;
253 		rpf = IN_DEV_RPFILTER(in_dev);
254 	}
255 	rcu_read_unlock();
256 
257 	if (in_dev == NULL)
258 		goto e_inval;
259 
260 	net = dev_net(dev);
261 	if (fib_lookup(net, &fl, &res))
262 		goto last_resort;
263 	if (res.type != RTN_UNICAST)
264 		goto e_inval_res;
265 	*spec_dst = FIB_RES_PREFSRC(res);
266 	fib_combine_itag(itag, &res);
267 #ifdef CONFIG_IP_ROUTE_MULTIPATH
268 	if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
269 #else
270 	if (FIB_RES_DEV(res) == dev)
271 #endif
272 	{
273 		ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
274 		fib_res_put(&res);
275 		return ret;
276 	}
277 	fib_res_put(&res);
278 	if (no_addr)
279 		goto last_resort;
280 	if (rpf)
281 		goto e_inval;
282 	fl.oif = dev->ifindex;
283 
284 	ret = 0;
285 	if (fib_lookup(net, &fl, &res) == 0) {
286 		if (res.type == RTN_UNICAST) {
287 			*spec_dst = FIB_RES_PREFSRC(res);
288 			ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
289 		}
290 		fib_res_put(&res);
291 	}
292 	return ret;
293 
294 last_resort:
295 	if (rpf)
296 		goto e_inval;
297 	*spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
298 	*itag = 0;
299 	return 0;
300 
301 e_inval_res:
302 	fib_res_put(&res);
303 e_inval:
304 	return -EINVAL;
305 }
306 
307 static inline __be32 sk_extract_addr(struct sockaddr *addr)
308 {
309 	return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
310 }
311 
312 static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
313 {
314 	struct nlattr *nla;
315 
316 	nla = (struct nlattr *) ((char *) mx + len);
317 	nla->nla_type = type;
318 	nla->nla_len = nla_attr_size(4);
319 	*(u32 *) nla_data(nla) = value;
320 
321 	return len + nla_total_size(4);
322 }
323 
324 static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
325 				 struct fib_config *cfg)
326 {
327 	__be32 addr;
328 	int plen;
329 
330 	memset(cfg, 0, sizeof(*cfg));
331 	cfg->fc_nlinfo.nl_net = net;
332 
333 	if (rt->rt_dst.sa_family != AF_INET)
334 		return -EAFNOSUPPORT;
335 
336 	/*
337 	 * Check mask for validity:
338 	 * a) it must be contiguous.
339 	 * b) destination must have all host bits clear.
340 	 * c) if application forgot to set correct family (AF_INET),
341 	 *    reject request unless it is absolutely clear i.e.
342 	 *    both family and mask are zero.
343 	 */
344 	plen = 32;
345 	addr = sk_extract_addr(&rt->rt_dst);
346 	if (!(rt->rt_flags & RTF_HOST)) {
347 		__be32 mask = sk_extract_addr(&rt->rt_genmask);
348 
349 		if (rt->rt_genmask.sa_family != AF_INET) {
350 			if (mask || rt->rt_genmask.sa_family)
351 				return -EAFNOSUPPORT;
352 		}
353 
354 		if (bad_mask(mask, addr))
355 			return -EINVAL;
356 
357 		plen = inet_mask_len(mask);
358 	}
359 
360 	cfg->fc_dst_len = plen;
361 	cfg->fc_dst = addr;
362 
363 	if (cmd != SIOCDELRT) {
364 		cfg->fc_nlflags = NLM_F_CREATE;
365 		cfg->fc_protocol = RTPROT_BOOT;
366 	}
367 
368 	if (rt->rt_metric)
369 		cfg->fc_priority = rt->rt_metric - 1;
370 
371 	if (rt->rt_flags & RTF_REJECT) {
372 		cfg->fc_scope = RT_SCOPE_HOST;
373 		cfg->fc_type = RTN_UNREACHABLE;
374 		return 0;
375 	}
376 
377 	cfg->fc_scope = RT_SCOPE_NOWHERE;
378 	cfg->fc_type = RTN_UNICAST;
379 
380 	if (rt->rt_dev) {
381 		char *colon;
382 		struct net_device *dev;
383 		char devname[IFNAMSIZ];
384 
385 		if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
386 			return -EFAULT;
387 
388 		devname[IFNAMSIZ-1] = 0;
389 		colon = strchr(devname, ':');
390 		if (colon)
391 			*colon = 0;
392 		dev = __dev_get_by_name(net, devname);
393 		if (!dev)
394 			return -ENODEV;
395 		cfg->fc_oif = dev->ifindex;
396 		if (colon) {
397 			struct in_ifaddr *ifa;
398 			struct in_device *in_dev = __in_dev_get_rtnl(dev);
399 			if (!in_dev)
400 				return -ENODEV;
401 			*colon = ':';
402 			for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
403 				if (strcmp(ifa->ifa_label, devname) == 0)
404 					break;
405 			if (ifa == NULL)
406 				return -ENODEV;
407 			cfg->fc_prefsrc = ifa->ifa_local;
408 		}
409 	}
410 
411 	addr = sk_extract_addr(&rt->rt_gateway);
412 	if (rt->rt_gateway.sa_family == AF_INET && addr) {
413 		cfg->fc_gw = addr;
414 		if (rt->rt_flags & RTF_GATEWAY &&
415 		    inet_addr_type(net, addr) == RTN_UNICAST)
416 			cfg->fc_scope = RT_SCOPE_UNIVERSE;
417 	}
418 
419 	if (cmd == SIOCDELRT)
420 		return 0;
421 
422 	if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
423 		return -EINVAL;
424 
425 	if (cfg->fc_scope == RT_SCOPE_NOWHERE)
426 		cfg->fc_scope = RT_SCOPE_LINK;
427 
428 	if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
429 		struct nlattr *mx;
430 		int len = 0;
431 
432 		mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
433 		if (mx == NULL)
434 			return -ENOMEM;
435 
436 		if (rt->rt_flags & RTF_MTU)
437 			len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
438 
439 		if (rt->rt_flags & RTF_WINDOW)
440 			len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
441 
442 		if (rt->rt_flags & RTF_IRTT)
443 			len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
444 
445 		cfg->fc_mx = mx;
446 		cfg->fc_mx_len = len;
447 	}
448 
449 	return 0;
450 }
451 
452 /*
453  *	Handle IP routing ioctl calls. These are used to manipulate the routing tables
454  */
455 
456 int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
457 {
458 	struct fib_config cfg;
459 	struct rtentry rt;
460 	int err;
461 
462 	switch (cmd) {
463 	case SIOCADDRT:		/* Add a route */
464 	case SIOCDELRT:		/* Delete a route */
465 		if (!capable(CAP_NET_ADMIN))
466 			return -EPERM;
467 
468 		if (copy_from_user(&rt, arg, sizeof(rt)))
469 			return -EFAULT;
470 
471 		rtnl_lock();
472 		err = rtentry_to_fib_config(net, cmd, &rt, &cfg);
473 		if (err == 0) {
474 			struct fib_table *tb;
475 
476 			if (cmd == SIOCDELRT) {
477 				tb = fib_get_table(net, cfg.fc_table);
478 				if (tb)
479 					err = tb->tb_delete(tb, &cfg);
480 				else
481 					err = -ESRCH;
482 			} else {
483 				tb = fib_new_table(net, cfg.fc_table);
484 				if (tb)
485 					err = tb->tb_insert(tb, &cfg);
486 				else
487 					err = -ENOBUFS;
488 			}
489 
490 			/* allocated by rtentry_to_fib_config() */
491 			kfree(cfg.fc_mx);
492 		}
493 		rtnl_unlock();
494 		return err;
495 	}
496 	return -EINVAL;
497 }
498 
499 const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
500 	[RTA_DST]		= { .type = NLA_U32 },
501 	[RTA_SRC]		= { .type = NLA_U32 },
502 	[RTA_IIF]		= { .type = NLA_U32 },
503 	[RTA_OIF]		= { .type = NLA_U32 },
504 	[RTA_GATEWAY]		= { .type = NLA_U32 },
505 	[RTA_PRIORITY]		= { .type = NLA_U32 },
506 	[RTA_PREFSRC]		= { .type = NLA_U32 },
507 	[RTA_METRICS]		= { .type = NLA_NESTED },
508 	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
509 	[RTA_PROTOINFO]		= { .type = NLA_U32 },
510 	[RTA_FLOW]		= { .type = NLA_U32 },
511 };
512 
513 static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
514 			    struct nlmsghdr *nlh, struct fib_config *cfg)
515 {
516 	struct nlattr *attr;
517 	int err, remaining;
518 	struct rtmsg *rtm;
519 
520 	err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
521 	if (err < 0)
522 		goto errout;
523 
524 	memset(cfg, 0, sizeof(*cfg));
525 
526 	rtm = nlmsg_data(nlh);
527 	cfg->fc_dst_len = rtm->rtm_dst_len;
528 	cfg->fc_tos = rtm->rtm_tos;
529 	cfg->fc_table = rtm->rtm_table;
530 	cfg->fc_protocol = rtm->rtm_protocol;
531 	cfg->fc_scope = rtm->rtm_scope;
532 	cfg->fc_type = rtm->rtm_type;
533 	cfg->fc_flags = rtm->rtm_flags;
534 	cfg->fc_nlflags = nlh->nlmsg_flags;
535 
536 	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
537 	cfg->fc_nlinfo.nlh = nlh;
538 	cfg->fc_nlinfo.nl_net = net;
539 
540 	if (cfg->fc_type > RTN_MAX) {
541 		err = -EINVAL;
542 		goto errout;
543 	}
544 
545 	nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
546 		switch (nla_type(attr)) {
547 		case RTA_DST:
548 			cfg->fc_dst = nla_get_be32(attr);
549 			break;
550 		case RTA_OIF:
551 			cfg->fc_oif = nla_get_u32(attr);
552 			break;
553 		case RTA_GATEWAY:
554 			cfg->fc_gw = nla_get_be32(attr);
555 			break;
556 		case RTA_PRIORITY:
557 			cfg->fc_priority = nla_get_u32(attr);
558 			break;
559 		case RTA_PREFSRC:
560 			cfg->fc_prefsrc = nla_get_be32(attr);
561 			break;
562 		case RTA_METRICS:
563 			cfg->fc_mx = nla_data(attr);
564 			cfg->fc_mx_len = nla_len(attr);
565 			break;
566 		case RTA_MULTIPATH:
567 			cfg->fc_mp = nla_data(attr);
568 			cfg->fc_mp_len = nla_len(attr);
569 			break;
570 		case RTA_FLOW:
571 			cfg->fc_flow = nla_get_u32(attr);
572 			break;
573 		case RTA_TABLE:
574 			cfg->fc_table = nla_get_u32(attr);
575 			break;
576 		}
577 	}
578 
579 	return 0;
580 errout:
581 	return err;
582 }
583 
584 static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
585 {
586 	struct net *net = sock_net(skb->sk);
587 	struct fib_config cfg;
588 	struct fib_table *tb;
589 	int err;
590 
591 	err = rtm_to_fib_config(net, skb, nlh, &cfg);
592 	if (err < 0)
593 		goto errout;
594 
595 	tb = fib_get_table(net, cfg.fc_table);
596 	if (tb == NULL) {
597 		err = -ESRCH;
598 		goto errout;
599 	}
600 
601 	err = tb->tb_delete(tb, &cfg);
602 errout:
603 	return err;
604 }
605 
606 static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
607 {
608 	struct net *net = sock_net(skb->sk);
609 	struct fib_config cfg;
610 	struct fib_table *tb;
611 	int err;
612 
613 	err = rtm_to_fib_config(net, skb, nlh, &cfg);
614 	if (err < 0)
615 		goto errout;
616 
617 	tb = fib_new_table(net, cfg.fc_table);
618 	if (tb == NULL) {
619 		err = -ENOBUFS;
620 		goto errout;
621 	}
622 
623 	err = tb->tb_insert(tb, &cfg);
624 errout:
625 	return err;
626 }
627 
628 static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
629 {
630 	struct net *net = sock_net(skb->sk);
631 	unsigned int h, s_h;
632 	unsigned int e = 0, s_e;
633 	struct fib_table *tb;
634 	struct hlist_node *node;
635 	struct hlist_head *head;
636 	int dumped = 0;
637 
638 	if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
639 	    ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
640 		return ip_rt_dump(skb, cb);
641 
642 	s_h = cb->args[0];
643 	s_e = cb->args[1];
644 
645 	for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
646 		e = 0;
647 		head = &net->ipv4.fib_table_hash[h];
648 		hlist_for_each_entry(tb, node, head, tb_hlist) {
649 			if (e < s_e)
650 				goto next;
651 			if (dumped)
652 				memset(&cb->args[2], 0, sizeof(cb->args) -
653 						 2 * sizeof(cb->args[0]));
654 			if (tb->tb_dump(tb, skb, cb) < 0)
655 				goto out;
656 			dumped = 1;
657 next:
658 			e++;
659 		}
660 	}
661 out:
662 	cb->args[1] = e;
663 	cb->args[0] = h;
664 
665 	return skb->len;
666 }
667 
668 /* Prepare and feed intra-kernel routing request.
669    Really, it should be netlink message, but :-( netlink
670    can be not configured, so that we feed it directly
671    to fib engine. It is legal, because all events occur
672    only when netlink is already locked.
673  */
674 
675 static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
676 {
677 	struct net *net = dev_net(ifa->ifa_dev->dev);
678 	struct fib_table *tb;
679 	struct fib_config cfg = {
680 		.fc_protocol = RTPROT_KERNEL,
681 		.fc_type = type,
682 		.fc_dst = dst,
683 		.fc_dst_len = dst_len,
684 		.fc_prefsrc = ifa->ifa_local,
685 		.fc_oif = ifa->ifa_dev->dev->ifindex,
686 		.fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
687 		.fc_nlinfo = {
688 			.nl_net = net,
689 		},
690 	};
691 
692 	if (type == RTN_UNICAST)
693 		tb = fib_new_table(net, RT_TABLE_MAIN);
694 	else
695 		tb = fib_new_table(net, RT_TABLE_LOCAL);
696 
697 	if (tb == NULL)
698 		return;
699 
700 	cfg.fc_table = tb->tb_id;
701 
702 	if (type != RTN_LOCAL)
703 		cfg.fc_scope = RT_SCOPE_LINK;
704 	else
705 		cfg.fc_scope = RT_SCOPE_HOST;
706 
707 	if (cmd == RTM_NEWROUTE)
708 		tb->tb_insert(tb, &cfg);
709 	else
710 		tb->tb_delete(tb, &cfg);
711 }
712 
713 void fib_add_ifaddr(struct in_ifaddr *ifa)
714 {
715 	struct in_device *in_dev = ifa->ifa_dev;
716 	struct net_device *dev = in_dev->dev;
717 	struct in_ifaddr *prim = ifa;
718 	__be32 mask = ifa->ifa_mask;
719 	__be32 addr = ifa->ifa_local;
720 	__be32 prefix = ifa->ifa_address&mask;
721 
722 	if (ifa->ifa_flags&IFA_F_SECONDARY) {
723 		prim = inet_ifa_byprefix(in_dev, prefix, mask);
724 		if (prim == NULL) {
725 			printk(KERN_WARNING "fib_add_ifaddr: bug: prim == NULL\n");
726 			return;
727 		}
728 	}
729 
730 	fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
731 
732 	if (!(dev->flags&IFF_UP))
733 		return;
734 
735 	/* Add broadcast address, if it is explicitly assigned. */
736 	if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
737 		fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
738 
739 	if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
740 	    (prefix != addr || ifa->ifa_prefixlen < 32)) {
741 		fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
742 			  RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
743 
744 		/* Add network specific broadcasts, when it takes a sense */
745 		if (ifa->ifa_prefixlen < 31) {
746 			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
747 			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
748 		}
749 	}
750 }
751 
752 static void fib_del_ifaddr(struct in_ifaddr *ifa)
753 {
754 	struct in_device *in_dev = ifa->ifa_dev;
755 	struct net_device *dev = in_dev->dev;
756 	struct in_ifaddr *ifa1;
757 	struct in_ifaddr *prim = ifa;
758 	__be32 brd = ifa->ifa_address|~ifa->ifa_mask;
759 	__be32 any = ifa->ifa_address&ifa->ifa_mask;
760 #define LOCAL_OK	1
761 #define BRD_OK		2
762 #define BRD0_OK		4
763 #define BRD1_OK		8
764 	unsigned ok = 0;
765 
766 	if (!(ifa->ifa_flags&IFA_F_SECONDARY))
767 		fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
768 			  RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
769 	else {
770 		prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
771 		if (prim == NULL) {
772 			printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n");
773 			return;
774 		}
775 	}
776 
777 	/* Deletion is more complicated than add.
778 	   We should take care of not to delete too much :-)
779 
780 	   Scan address list to be sure that addresses are really gone.
781 	 */
782 
783 	for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
784 		if (ifa->ifa_local == ifa1->ifa_local)
785 			ok |= LOCAL_OK;
786 		if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
787 			ok |= BRD_OK;
788 		if (brd == ifa1->ifa_broadcast)
789 			ok |= BRD1_OK;
790 		if (any == ifa1->ifa_broadcast)
791 			ok |= BRD0_OK;
792 	}
793 
794 	if (!(ok&BRD_OK))
795 		fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
796 	if (!(ok&BRD1_OK))
797 		fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
798 	if (!(ok&BRD0_OK))
799 		fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
800 	if (!(ok&LOCAL_OK)) {
801 		fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
802 
803 		/* Check, that this local address finally disappeared. */
804 		if (inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) {
805 			/* And the last, but not the least thing.
806 			   We must flush stray FIB entries.
807 
808 			   First of all, we scan fib_info list searching
809 			   for stray nexthop entries, then ignite fib_flush.
810 			*/
811 			if (fib_sync_down_addr(dev_net(dev), ifa->ifa_local))
812 				fib_flush(dev_net(dev));
813 		}
814 	}
815 #undef LOCAL_OK
816 #undef BRD_OK
817 #undef BRD0_OK
818 #undef BRD1_OK
819 }
820 
821 static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
822 {
823 
824 	struct fib_result       res;
825 	struct flowi            fl = { .mark = frn->fl_mark,
826 				       .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
827 							    .tos = frn->fl_tos,
828 							    .scope = frn->fl_scope } } };
829 
830 #ifdef CONFIG_IP_MULTIPLE_TABLES
831 	res.r = NULL;
832 #endif
833 
834 	frn->err = -ENOENT;
835 	if (tb) {
836 		local_bh_disable();
837 
838 		frn->tb_id = tb->tb_id;
839 		frn->err = tb->tb_lookup(tb, &fl, &res);
840 
841 		if (!frn->err) {
842 			frn->prefixlen = res.prefixlen;
843 			frn->nh_sel = res.nh_sel;
844 			frn->type = res.type;
845 			frn->scope = res.scope;
846 			fib_res_put(&res);
847 		}
848 		local_bh_enable();
849 	}
850 }
851 
852 static void nl_fib_input(struct sk_buff *skb)
853 {
854 	struct net *net;
855 	struct fib_result_nl *frn;
856 	struct nlmsghdr *nlh;
857 	struct fib_table *tb;
858 	u32 pid;
859 
860 	net = sock_net(skb->sk);
861 	nlh = nlmsg_hdr(skb);
862 	if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
863 	    nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn)))
864 		return;
865 
866 	skb = skb_clone(skb, GFP_KERNEL);
867 	if (skb == NULL)
868 		return;
869 	nlh = nlmsg_hdr(skb);
870 
871 	frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
872 	tb = fib_get_table(net, frn->tb_id_in);
873 
874 	nl_fib_lookup(frn, tb);
875 
876 	pid = NETLINK_CB(skb).pid;       /* pid of sending process */
877 	NETLINK_CB(skb).pid = 0;         /* from kernel */
878 	NETLINK_CB(skb).dst_group = 0;  /* unicast */
879 	netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT);
880 }
881 
882 static int nl_fib_lookup_init(struct net *net)
883 {
884 	struct sock *sk;
885 	sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0,
886 				   nl_fib_input, NULL, THIS_MODULE);
887 	if (sk == NULL)
888 		return -EAFNOSUPPORT;
889 	net->ipv4.fibnl = sk;
890 	return 0;
891 }
892 
893 static void nl_fib_lookup_exit(struct net *net)
894 {
895 	netlink_kernel_release(net->ipv4.fibnl);
896 	net->ipv4.fibnl = NULL;
897 }
898 
899 static void fib_disable_ip(struct net_device *dev, int force)
900 {
901 	if (fib_sync_down_dev(dev, force))
902 		fib_flush(dev_net(dev));
903 	rt_cache_flush(0);
904 	arp_ifdown(dev);
905 }
906 
907 static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
908 {
909 	struct in_ifaddr *ifa = (struct in_ifaddr*)ptr;
910 
911 	switch (event) {
912 	case NETDEV_UP:
913 		fib_add_ifaddr(ifa);
914 #ifdef CONFIG_IP_ROUTE_MULTIPATH
915 		fib_sync_up(ifa->ifa_dev->dev);
916 #endif
917 		rt_cache_flush(-1);
918 		break;
919 	case NETDEV_DOWN:
920 		fib_del_ifaddr(ifa);
921 		if (ifa->ifa_dev->ifa_list == NULL) {
922 			/* Last address was deleted from this interface.
923 			   Disable IP.
924 			 */
925 			fib_disable_ip(ifa->ifa_dev->dev, 1);
926 		} else {
927 			rt_cache_flush(-1);
928 		}
929 		break;
930 	}
931 	return NOTIFY_DONE;
932 }
933 
934 static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
935 {
936 	struct net_device *dev = ptr;
937 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
938 
939 	if (event == NETDEV_UNREGISTER) {
940 		fib_disable_ip(dev, 2);
941 		return NOTIFY_DONE;
942 	}
943 
944 	if (!in_dev)
945 		return NOTIFY_DONE;
946 
947 	switch (event) {
948 	case NETDEV_UP:
949 		for_ifa(in_dev) {
950 			fib_add_ifaddr(ifa);
951 		} endfor_ifa(in_dev);
952 #ifdef CONFIG_IP_ROUTE_MULTIPATH
953 		fib_sync_up(dev);
954 #endif
955 		rt_cache_flush(-1);
956 		break;
957 	case NETDEV_DOWN:
958 		fib_disable_ip(dev, 0);
959 		break;
960 	case NETDEV_CHANGEMTU:
961 	case NETDEV_CHANGE:
962 		rt_cache_flush(0);
963 		break;
964 	}
965 	return NOTIFY_DONE;
966 }
967 
968 static struct notifier_block fib_inetaddr_notifier = {
969 	.notifier_call =fib_inetaddr_event,
970 };
971 
972 static struct notifier_block fib_netdev_notifier = {
973 	.notifier_call =fib_netdev_event,
974 };
975 
976 static int __net_init ip_fib_net_init(struct net *net)
977 {
978 	int err;
979 	unsigned int i;
980 
981 	net->ipv4.fib_table_hash = kzalloc(
982 			sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL);
983 	if (net->ipv4.fib_table_hash == NULL)
984 		return -ENOMEM;
985 
986 	for (i = 0; i < FIB_TABLE_HASHSZ; i++)
987 		INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]);
988 
989 	err = fib4_rules_init(net);
990 	if (err < 0)
991 		goto fail;
992 	return 0;
993 
994 fail:
995 	kfree(net->ipv4.fib_table_hash);
996 	return err;
997 }
998 
999 static void __net_exit ip_fib_net_exit(struct net *net)
1000 {
1001 	unsigned int i;
1002 
1003 #ifdef CONFIG_IP_MULTIPLE_TABLES
1004 	fib4_rules_exit(net);
1005 #endif
1006 
1007 	for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
1008 		struct fib_table *tb;
1009 		struct hlist_head *head;
1010 		struct hlist_node *node, *tmp;
1011 
1012 		head = &net->ipv4.fib_table_hash[i];
1013 		hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
1014 			hlist_del(node);
1015 			tb->tb_flush(tb);
1016 			kfree(tb);
1017 		}
1018 	}
1019 	kfree(net->ipv4.fib_table_hash);
1020 }
1021 
1022 static int __net_init fib_net_init(struct net *net)
1023 {
1024 	int error;
1025 
1026 	error = ip_fib_net_init(net);
1027 	if (error < 0)
1028 		goto out;
1029 	error = nl_fib_lookup_init(net);
1030 	if (error < 0)
1031 		goto out_nlfl;
1032 	error = fib_proc_init(net);
1033 	if (error < 0)
1034 		goto out_proc;
1035 out:
1036 	return error;
1037 
1038 out_proc:
1039 	nl_fib_lookup_exit(net);
1040 out_nlfl:
1041 	ip_fib_net_exit(net);
1042 	goto out;
1043 }
1044 
1045 static void __net_exit fib_net_exit(struct net *net)
1046 {
1047 	fib_proc_exit(net);
1048 	nl_fib_lookup_exit(net);
1049 	ip_fib_net_exit(net);
1050 }
1051 
1052 static struct pernet_operations fib_net_ops = {
1053 	.init = fib_net_init,
1054 	.exit = fib_net_exit,
1055 };
1056 
1057 void __init ip_fib_init(void)
1058 {
1059 	rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
1060 	rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
1061 	rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
1062 
1063 	register_pernet_subsys(&fib_net_ops);
1064 	register_netdevice_notifier(&fib_netdev_notifier);
1065 	register_inetaddr_notifier(&fib_inetaddr_notifier);
1066 
1067 	fib_hash_init();
1068 }
1069 
1070 EXPORT_SYMBOL(inet_addr_type);
1071 EXPORT_SYMBOL(inet_dev_addr_type);
1072 EXPORT_SYMBOL(ip_dev_find);
1073