xref: /openbmc/linux/net/ipv4/fib_frontend.c (revision 1da177e4)
1 /*
2  * INET		An implementation of the TCP/IP protocol suite for the LINUX
3  *		operating system.  INET is implemented using the  BSD Socket
4  *		interface as the means of communication with the user level.
5  *
6  *		IPv4 Forwarding Information Base: FIB frontend.
7  *
8  * Version:	$Id: fib_frontend.c,v 1.26 2001/10/31 21:55:54 davem Exp $
9  *
10  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11  *
12  *		This program is free software; you can redistribute it and/or
13  *		modify it under the terms of the GNU General Public License
14  *		as published by the Free Software Foundation; either version
15  *		2 of the License, or (at your option) any later version.
16  */
17 
18 #include <linux/config.h>
19 #include <linux/module.h>
20 #include <asm/uaccess.h>
21 #include <asm/system.h>
22 #include <linux/bitops.h>
23 #include <linux/types.h>
24 #include <linux/kernel.h>
25 #include <linux/sched.h>
26 #include <linux/mm.h>
27 #include <linux/string.h>
28 #include <linux/socket.h>
29 #include <linux/sockios.h>
30 #include <linux/errno.h>
31 #include <linux/in.h>
32 #include <linux/inet.h>
33 #include <linux/netdevice.h>
34 #include <linux/if_arp.h>
35 #include <linux/skbuff.h>
36 #include <linux/netlink.h>
37 #include <linux/init.h>
38 
39 #include <net/ip.h>
40 #include <net/protocol.h>
41 #include <net/route.h>
42 #include <net/tcp.h>
43 #include <net/sock.h>
44 #include <net/icmp.h>
45 #include <net/arp.h>
46 #include <net/ip_fib.h>
47 
48 #define FFprint(a...) printk(KERN_DEBUG a)
49 
50 #ifndef CONFIG_IP_MULTIPLE_TABLES
51 
52 #define RT_TABLE_MIN RT_TABLE_MAIN
53 
54 struct fib_table *ip_fib_local_table;
55 struct fib_table *ip_fib_main_table;
56 
57 #else
58 
59 #define RT_TABLE_MIN 1
60 
61 struct fib_table *fib_tables[RT_TABLE_MAX+1];
62 
63 struct fib_table *__fib_new_table(int id)
64 {
65 	struct fib_table *tb;
66 
67 	tb = fib_hash_init(id);
68 	if (!tb)
69 		return NULL;
70 	fib_tables[id] = tb;
71 	return tb;
72 }
73 
74 
75 #endif /* CONFIG_IP_MULTIPLE_TABLES */
76 
77 
78 static void fib_flush(void)
79 {
80 	int flushed = 0;
81 #ifdef CONFIG_IP_MULTIPLE_TABLES
82 	struct fib_table *tb;
83 	int id;
84 
85 	for (id = RT_TABLE_MAX; id>0; id--) {
86 		if ((tb = fib_get_table(id))==NULL)
87 			continue;
88 		flushed += tb->tb_flush(tb);
89 	}
90 #else /* CONFIG_IP_MULTIPLE_TABLES */
91 	flushed += ip_fib_main_table->tb_flush(ip_fib_main_table);
92 	flushed += ip_fib_local_table->tb_flush(ip_fib_local_table);
93 #endif /* CONFIG_IP_MULTIPLE_TABLES */
94 
95 	if (flushed)
96 		rt_cache_flush(-1);
97 }
98 
99 /*
100  *	Find the first device with a given source address.
101  */
102 
103 struct net_device * ip_dev_find(u32 addr)
104 {
105 	struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
106 	struct fib_result res;
107 	struct net_device *dev = NULL;
108 
109 #ifdef CONFIG_IP_MULTIPLE_TABLES
110 	res.r = NULL;
111 #endif
112 
113 	if (!ip_fib_local_table ||
114 	    ip_fib_local_table->tb_lookup(ip_fib_local_table, &fl, &res))
115 		return NULL;
116 	if (res.type != RTN_LOCAL)
117 		goto out;
118 	dev = FIB_RES_DEV(res);
119 
120 	if (dev)
121 		dev_hold(dev);
122 out:
123 	fib_res_put(&res);
124 	return dev;
125 }
126 
127 unsigned inet_addr_type(u32 addr)
128 {
129 	struct flowi		fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
130 	struct fib_result	res;
131 	unsigned ret = RTN_BROADCAST;
132 
133 	if (ZERONET(addr) || BADCLASS(addr))
134 		return RTN_BROADCAST;
135 	if (MULTICAST(addr))
136 		return RTN_MULTICAST;
137 
138 #ifdef CONFIG_IP_MULTIPLE_TABLES
139 	res.r = NULL;
140 #endif
141 
142 	if (ip_fib_local_table) {
143 		ret = RTN_UNICAST;
144 		if (!ip_fib_local_table->tb_lookup(ip_fib_local_table,
145 						   &fl, &res)) {
146 			ret = res.type;
147 			fib_res_put(&res);
148 		}
149 	}
150 	return ret;
151 }
152 
153 /* Given (packet source, input interface) and optional (dst, oif, tos):
154    - (main) check, that source is valid i.e. not broadcast or our local
155      address.
156    - figure out what "logical" interface this packet arrived
157      and calculate "specific destination" address.
158    - check, that packet arrived from expected physical interface.
159  */
160 
161 int fib_validate_source(u32 src, u32 dst, u8 tos, int oif,
162 			struct net_device *dev, u32 *spec_dst, u32 *itag)
163 {
164 	struct in_device *in_dev;
165 	struct flowi fl = { .nl_u = { .ip4_u =
166 				      { .daddr = src,
167 					.saddr = dst,
168 					.tos = tos } },
169 			    .iif = oif };
170 	struct fib_result res;
171 	int no_addr, rpf;
172 	int ret;
173 
174 	no_addr = rpf = 0;
175 	rcu_read_lock();
176 	in_dev = __in_dev_get(dev);
177 	if (in_dev) {
178 		no_addr = in_dev->ifa_list == NULL;
179 		rpf = IN_DEV_RPFILTER(in_dev);
180 	}
181 	rcu_read_unlock();
182 
183 	if (in_dev == NULL)
184 		goto e_inval;
185 
186 	if (fib_lookup(&fl, &res))
187 		goto last_resort;
188 	if (res.type != RTN_UNICAST)
189 		goto e_inval_res;
190 	*spec_dst = FIB_RES_PREFSRC(res);
191 	fib_combine_itag(itag, &res);
192 #ifdef CONFIG_IP_ROUTE_MULTIPATH
193 	if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
194 #else
195 	if (FIB_RES_DEV(res) == dev)
196 #endif
197 	{
198 		ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
199 		fib_res_put(&res);
200 		return ret;
201 	}
202 	fib_res_put(&res);
203 	if (no_addr)
204 		goto last_resort;
205 	if (rpf)
206 		goto e_inval;
207 	fl.oif = dev->ifindex;
208 
209 	ret = 0;
210 	if (fib_lookup(&fl, &res) == 0) {
211 		if (res.type == RTN_UNICAST) {
212 			*spec_dst = FIB_RES_PREFSRC(res);
213 			ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
214 		}
215 		fib_res_put(&res);
216 	}
217 	return ret;
218 
219 last_resort:
220 	if (rpf)
221 		goto e_inval;
222 	*spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
223 	*itag = 0;
224 	return 0;
225 
226 e_inval_res:
227 	fib_res_put(&res);
228 e_inval:
229 	return -EINVAL;
230 }
231 
232 #ifndef CONFIG_IP_NOSIOCRT
233 
234 /*
235  *	Handle IP routing ioctl calls. These are used to manipulate the routing tables
236  */
237 
238 int ip_rt_ioctl(unsigned int cmd, void __user *arg)
239 {
240 	int err;
241 	struct kern_rta rta;
242 	struct rtentry  r;
243 	struct {
244 		struct nlmsghdr nlh;
245 		struct rtmsg	rtm;
246 	} req;
247 
248 	switch (cmd) {
249 	case SIOCADDRT:		/* Add a route */
250 	case SIOCDELRT:		/* Delete a route */
251 		if (!capable(CAP_NET_ADMIN))
252 			return -EPERM;
253 		if (copy_from_user(&r, arg, sizeof(struct rtentry)))
254 			return -EFAULT;
255 		rtnl_lock();
256 		err = fib_convert_rtentry(cmd, &req.nlh, &req.rtm, &rta, &r);
257 		if (err == 0) {
258 			if (cmd == SIOCDELRT) {
259 				struct fib_table *tb = fib_get_table(req.rtm.rtm_table);
260 				err = -ESRCH;
261 				if (tb)
262 					err = tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL);
263 			} else {
264 				struct fib_table *tb = fib_new_table(req.rtm.rtm_table);
265 				err = -ENOBUFS;
266 				if (tb)
267 					err = tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL);
268 			}
269 			if (rta.rta_mx)
270 				kfree(rta.rta_mx);
271 		}
272 		rtnl_unlock();
273 		return err;
274 	}
275 	return -EINVAL;
276 }
277 
278 #else
279 
280 int ip_rt_ioctl(unsigned int cmd, void *arg)
281 {
282 	return -EINVAL;
283 }
284 
285 #endif
286 
287 static int inet_check_attr(struct rtmsg *r, struct rtattr **rta)
288 {
289 	int i;
290 
291 	for (i=1; i<=RTA_MAX; i++) {
292 		struct rtattr *attr = rta[i-1];
293 		if (attr) {
294 			if (RTA_PAYLOAD(attr) < 4)
295 				return -EINVAL;
296 			if (i != RTA_MULTIPATH && i != RTA_METRICS)
297 				rta[i-1] = (struct rtattr*)RTA_DATA(attr);
298 		}
299 	}
300 	return 0;
301 }
302 
303 int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
304 {
305 	struct fib_table * tb;
306 	struct rtattr **rta = arg;
307 	struct rtmsg *r = NLMSG_DATA(nlh);
308 
309 	if (inet_check_attr(r, rta))
310 		return -EINVAL;
311 
312 	tb = fib_get_table(r->rtm_table);
313 	if (tb)
314 		return tb->tb_delete(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb));
315 	return -ESRCH;
316 }
317 
318 int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
319 {
320 	struct fib_table * tb;
321 	struct rtattr **rta = arg;
322 	struct rtmsg *r = NLMSG_DATA(nlh);
323 
324 	if (inet_check_attr(r, rta))
325 		return -EINVAL;
326 
327 	tb = fib_new_table(r->rtm_table);
328 	if (tb)
329 		return tb->tb_insert(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb));
330 	return -ENOBUFS;
331 }
332 
333 int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
334 {
335 	int t;
336 	int s_t;
337 	struct fib_table *tb;
338 
339 	if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) &&
340 	    ((struct rtmsg*)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED)
341 		return ip_rt_dump(skb, cb);
342 
343 	s_t = cb->args[0];
344 	if (s_t == 0)
345 		s_t = cb->args[0] = RT_TABLE_MIN;
346 
347 	for (t=s_t; t<=RT_TABLE_MAX; t++) {
348 		if (t < s_t) continue;
349 		if (t > s_t)
350 			memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
351 		if ((tb = fib_get_table(t))==NULL)
352 			continue;
353 		if (tb->tb_dump(tb, skb, cb) < 0)
354 			break;
355 	}
356 
357 	cb->args[0] = t;
358 
359 	return skb->len;
360 }
361 
362 /* Prepare and feed intra-kernel routing request.
363    Really, it should be netlink message, but :-( netlink
364    can be not configured, so that we feed it directly
365    to fib engine. It is legal, because all events occur
366    only when netlink is already locked.
367  */
368 
369 static void fib_magic(int cmd, int type, u32 dst, int dst_len, struct in_ifaddr *ifa)
370 {
371 	struct fib_table * tb;
372 	struct {
373 		struct nlmsghdr	nlh;
374 		struct rtmsg	rtm;
375 	} req;
376 	struct kern_rta rta;
377 
378 	memset(&req.rtm, 0, sizeof(req.rtm));
379 	memset(&rta, 0, sizeof(rta));
380 
381 	if (type == RTN_UNICAST)
382 		tb = fib_new_table(RT_TABLE_MAIN);
383 	else
384 		tb = fib_new_table(RT_TABLE_LOCAL);
385 
386 	if (tb == NULL)
387 		return;
388 
389 	req.nlh.nlmsg_len = sizeof(req);
390 	req.nlh.nlmsg_type = cmd;
391 	req.nlh.nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE|NLM_F_APPEND;
392 	req.nlh.nlmsg_pid = 0;
393 	req.nlh.nlmsg_seq = 0;
394 
395 	req.rtm.rtm_dst_len = dst_len;
396 	req.rtm.rtm_table = tb->tb_id;
397 	req.rtm.rtm_protocol = RTPROT_KERNEL;
398 	req.rtm.rtm_scope = (type != RTN_LOCAL ? RT_SCOPE_LINK : RT_SCOPE_HOST);
399 	req.rtm.rtm_type = type;
400 
401 	rta.rta_dst = &dst;
402 	rta.rta_prefsrc = &ifa->ifa_local;
403 	rta.rta_oif = &ifa->ifa_dev->dev->ifindex;
404 
405 	if (cmd == RTM_NEWROUTE)
406 		tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL);
407 	else
408 		tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL);
409 }
410 
411 static void fib_add_ifaddr(struct in_ifaddr *ifa)
412 {
413 	struct in_device *in_dev = ifa->ifa_dev;
414 	struct net_device *dev = in_dev->dev;
415 	struct in_ifaddr *prim = ifa;
416 	u32 mask = ifa->ifa_mask;
417 	u32 addr = ifa->ifa_local;
418 	u32 prefix = ifa->ifa_address&mask;
419 
420 	if (ifa->ifa_flags&IFA_F_SECONDARY) {
421 		prim = inet_ifa_byprefix(in_dev, prefix, mask);
422 		if (prim == NULL) {
423 			printk(KERN_DEBUG "fib_add_ifaddr: bug: prim == NULL\n");
424 			return;
425 		}
426 	}
427 
428 	fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
429 
430 	if (!(dev->flags&IFF_UP))
431 		return;
432 
433 	/* Add broadcast address, if it is explicitly assigned. */
434 	if (ifa->ifa_broadcast && ifa->ifa_broadcast != 0xFFFFFFFF)
435 		fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
436 
437 	if (!ZERONET(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
438 	    (prefix != addr || ifa->ifa_prefixlen < 32)) {
439 		fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
440 			  RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
441 
442 		/* Add network specific broadcasts, when it takes a sense */
443 		if (ifa->ifa_prefixlen < 31) {
444 			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
445 			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
446 		}
447 	}
448 }
449 
450 static void fib_del_ifaddr(struct in_ifaddr *ifa)
451 {
452 	struct in_device *in_dev = ifa->ifa_dev;
453 	struct net_device *dev = in_dev->dev;
454 	struct in_ifaddr *ifa1;
455 	struct in_ifaddr *prim = ifa;
456 	u32 brd = ifa->ifa_address|~ifa->ifa_mask;
457 	u32 any = ifa->ifa_address&ifa->ifa_mask;
458 #define LOCAL_OK	1
459 #define BRD_OK		2
460 #define BRD0_OK		4
461 #define BRD1_OK		8
462 	unsigned ok = 0;
463 
464 	if (!(ifa->ifa_flags&IFA_F_SECONDARY))
465 		fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
466 			  RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
467 	else {
468 		prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
469 		if (prim == NULL) {
470 			printk(KERN_DEBUG "fib_del_ifaddr: bug: prim == NULL\n");
471 			return;
472 		}
473 	}
474 
475 	/* Deletion is more complicated than add.
476 	   We should take care of not to delete too much :-)
477 
478 	   Scan address list to be sure that addresses are really gone.
479 	 */
480 
481 	for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
482 		if (ifa->ifa_local == ifa1->ifa_local)
483 			ok |= LOCAL_OK;
484 		if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
485 			ok |= BRD_OK;
486 		if (brd == ifa1->ifa_broadcast)
487 			ok |= BRD1_OK;
488 		if (any == ifa1->ifa_broadcast)
489 			ok |= BRD0_OK;
490 	}
491 
492 	if (!(ok&BRD_OK))
493 		fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
494 	if (!(ok&BRD1_OK))
495 		fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
496 	if (!(ok&BRD0_OK))
497 		fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
498 	if (!(ok&LOCAL_OK)) {
499 		fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
500 
501 		/* Check, that this local address finally disappeared. */
502 		if (inet_addr_type(ifa->ifa_local) != RTN_LOCAL) {
503 			/* And the last, but not the least thing.
504 			   We must flush stray FIB entries.
505 
506 			   First of all, we scan fib_info list searching
507 			   for stray nexthop entries, then ignite fib_flush.
508 			*/
509 			if (fib_sync_down(ifa->ifa_local, NULL, 0))
510 				fib_flush();
511 		}
512 	}
513 #undef LOCAL_OK
514 #undef BRD_OK
515 #undef BRD0_OK
516 #undef BRD1_OK
517 }
518 
519 static void fib_disable_ip(struct net_device *dev, int force)
520 {
521 	if (fib_sync_down(0, dev, force))
522 		fib_flush();
523 	rt_cache_flush(0);
524 	arp_ifdown(dev);
525 }
526 
527 static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
528 {
529 	struct in_ifaddr *ifa = (struct in_ifaddr*)ptr;
530 
531 	switch (event) {
532 	case NETDEV_UP:
533 		fib_add_ifaddr(ifa);
534 #ifdef CONFIG_IP_ROUTE_MULTIPATH
535 		fib_sync_up(ifa->ifa_dev->dev);
536 #endif
537 		rt_cache_flush(-1);
538 		break;
539 	case NETDEV_DOWN:
540 		fib_del_ifaddr(ifa);
541 		if (ifa->ifa_dev && ifa->ifa_dev->ifa_list == NULL) {
542 			/* Last address was deleted from this interface.
543 			   Disable IP.
544 			 */
545 			fib_disable_ip(ifa->ifa_dev->dev, 1);
546 		} else {
547 			rt_cache_flush(-1);
548 		}
549 		break;
550 	}
551 	return NOTIFY_DONE;
552 }
553 
554 static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
555 {
556 	struct net_device *dev = ptr;
557 	struct in_device *in_dev = __in_dev_get(dev);
558 
559 	if (event == NETDEV_UNREGISTER) {
560 		fib_disable_ip(dev, 2);
561 		return NOTIFY_DONE;
562 	}
563 
564 	if (!in_dev)
565 		return NOTIFY_DONE;
566 
567 	switch (event) {
568 	case NETDEV_UP:
569 		for_ifa(in_dev) {
570 			fib_add_ifaddr(ifa);
571 		} endfor_ifa(in_dev);
572 #ifdef CONFIG_IP_ROUTE_MULTIPATH
573 		fib_sync_up(dev);
574 #endif
575 		rt_cache_flush(-1);
576 		break;
577 	case NETDEV_DOWN:
578 		fib_disable_ip(dev, 0);
579 		break;
580 	case NETDEV_CHANGEMTU:
581 	case NETDEV_CHANGE:
582 		rt_cache_flush(0);
583 		break;
584 	}
585 	return NOTIFY_DONE;
586 }
587 
588 static struct notifier_block fib_inetaddr_notifier = {
589 	.notifier_call =fib_inetaddr_event,
590 };
591 
592 static struct notifier_block fib_netdev_notifier = {
593 	.notifier_call =fib_netdev_event,
594 };
595 
596 void __init ip_fib_init(void)
597 {
598 #ifndef CONFIG_IP_MULTIPLE_TABLES
599 	ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL);
600 	ip_fib_main_table  = fib_hash_init(RT_TABLE_MAIN);
601 #else
602 	fib_rules_init();
603 #endif
604 
605 	register_netdevice_notifier(&fib_netdev_notifier);
606 	register_inetaddr_notifier(&fib_inetaddr_notifier);
607 }
608 
609 EXPORT_SYMBOL(inet_addr_type);
610 EXPORT_SYMBOL(ip_dev_find);
611 EXPORT_SYMBOL(ip_rt_ioctl);
612