xref: /openbmc/linux/net/ipv4/ipmr.c (revision c21b37f6)
1 /*
2  *	IP multicast routing support for mrouted 3.6/3.8
3  *
4  *		(c) 1995 Alan Cox, <alan@redhat.com>
5  *	  Linux Consultancy and Custom Driver Development
6  *
7  *	This program is free software; you can redistribute it and/or
8  *	modify it under the terms of the GNU General Public License
9  *	as published by the Free Software Foundation; either version
10  *	2 of the License, or (at your option) any later version.
11  *
12  *	Version: $Id: ipmr.c,v 1.65 2001/10/31 21:55:54 davem Exp $
13  *
14  *	Fixes:
15  *	Michael Chastain	:	Incorrect size of copying.
16  *	Alan Cox		:	Added the cache manager code
17  *	Alan Cox		:	Fixed the clone/copy bug and device race.
18  *	Mike McLagan		:	Routing by source
19  *	Malcolm Beattie		:	Buffer handling fixes.
20  *	Alexey Kuznetsov	:	Double buffer free and other fixes.
21  *	SVR Anand		:	Fixed several multicast bugs and problems.
22  *	Alexey Kuznetsov	:	Status, optimisations and more.
23  *	Brad Parker		:	Better behaviour on mrouted upcall
24  *					overflow.
25  *      Carlos Picoto           :       PIMv1 Support
26  *	Pavlin Ivanov Radoslavov:	PIMv2 Registers must checksum only PIM header
27  *					Relax this requrement to work with older peers.
28  *
29  */
30 
31 #include <asm/system.h>
32 #include <asm/uaccess.h>
33 #include <linux/types.h>
34 #include <linux/capability.h>
35 #include <linux/errno.h>
36 #include <linux/timer.h>
37 #include <linux/mm.h>
38 #include <linux/kernel.h>
39 #include <linux/fcntl.h>
40 #include <linux/stat.h>
41 #include <linux/socket.h>
42 #include <linux/in.h>
43 #include <linux/inet.h>
44 #include <linux/netdevice.h>
45 #include <linux/inetdevice.h>
46 #include <linux/igmp.h>
47 #include <linux/proc_fs.h>
48 #include <linux/seq_file.h>
49 #include <linux/mroute.h>
50 #include <linux/init.h>
51 #include <linux/if_ether.h>
52 #include <net/ip.h>
53 #include <net/protocol.h>
54 #include <linux/skbuff.h>
55 #include <net/route.h>
56 #include <net/sock.h>
57 #include <net/icmp.h>
58 #include <net/udp.h>
59 #include <net/raw.h>
60 #include <linux/notifier.h>
61 #include <linux/if_arp.h>
62 #include <linux/netfilter_ipv4.h>
63 #include <net/ipip.h>
64 #include <net/checksum.h>
65 #include <net/netlink.h>
66 
67 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
68 #define CONFIG_IP_PIMSM	1
69 #endif
70 
71 static struct sock *mroute_socket;
72 
73 
74 /* Big lock, protecting vif table, mrt cache and mroute socket state.
75    Note that the changes are semaphored via rtnl_lock.
76  */
77 
78 static DEFINE_RWLOCK(mrt_lock);
79 
80 /*
81  *	Multicast router control variables
82  */
83 
84 static struct vif_device vif_table[MAXVIFS];		/* Devices 		*/
85 static int maxvif;
86 
87 #define VIF_EXISTS(idx) (vif_table[idx].dev != NULL)
88 
89 static int mroute_do_assert;				/* Set in PIM assert	*/
90 static int mroute_do_pim;
91 
92 static struct mfc_cache *mfc_cache_array[MFC_LINES];	/* Forwarding cache	*/
93 
94 static struct mfc_cache *mfc_unres_queue;		/* Queue of unresolved entries */
95 static atomic_t cache_resolve_queue_len;		/* Size of unresolved	*/
96 
97 /* Special spinlock for queue of unresolved entries */
98 static DEFINE_SPINLOCK(mfc_unres_lock);
99 
100 /* We return to original Alan's scheme. Hash table of resolved
101    entries is changed only in process context and protected
102    with weak lock mrt_lock. Queue of unresolved entries is protected
103    with strong spinlock mfc_unres_lock.
104 
105    In this case data path is free of exclusive locks at all.
106  */
107 
108 static struct kmem_cache *mrt_cachep __read_mostly;
109 
110 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
111 static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert);
112 static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
113 
114 #ifdef CONFIG_IP_PIMSM_V2
115 static struct net_protocol pim_protocol;
116 #endif
117 
118 static struct timer_list ipmr_expire_timer;
119 
120 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
121 
122 static
123 struct net_device *ipmr_new_tunnel(struct vifctl *v)
124 {
125 	struct net_device  *dev;
126 
127 	dev = __dev_get_by_name("tunl0");
128 
129 	if (dev) {
130 		int err;
131 		struct ifreq ifr;
132 		mm_segment_t	oldfs;
133 		struct ip_tunnel_parm p;
134 		struct in_device  *in_dev;
135 
136 		memset(&p, 0, sizeof(p));
137 		p.iph.daddr = v->vifc_rmt_addr.s_addr;
138 		p.iph.saddr = v->vifc_lcl_addr.s_addr;
139 		p.iph.version = 4;
140 		p.iph.ihl = 5;
141 		p.iph.protocol = IPPROTO_IPIP;
142 		sprintf(p.name, "dvmrp%d", v->vifc_vifi);
143 		ifr.ifr_ifru.ifru_data = (void*)&p;
144 
145 		oldfs = get_fs(); set_fs(KERNEL_DS);
146 		err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL);
147 		set_fs(oldfs);
148 
149 		dev = NULL;
150 
151 		if (err == 0 && (dev = __dev_get_by_name(p.name)) != NULL) {
152 			dev->flags |= IFF_MULTICAST;
153 
154 			in_dev = __in_dev_get_rtnl(dev);
155 			if (in_dev == NULL)
156 				goto failure;
157 
158 			ipv4_devconf_setall(in_dev);
159 			IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
160 
161 			if (dev_open(dev))
162 				goto failure;
163 		}
164 	}
165 	return dev;
166 
167 failure:
168 	/* allow the register to be completed before unregistering. */
169 	rtnl_unlock();
170 	rtnl_lock();
171 
172 	unregister_netdevice(dev);
173 	return NULL;
174 }
175 
176 #ifdef CONFIG_IP_PIMSM
177 
178 static int reg_vif_num = -1;
179 
180 static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
181 {
182 	read_lock(&mrt_lock);
183 	((struct net_device_stats*)netdev_priv(dev))->tx_bytes += skb->len;
184 	((struct net_device_stats*)netdev_priv(dev))->tx_packets++;
185 	ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT);
186 	read_unlock(&mrt_lock);
187 	kfree_skb(skb);
188 	return 0;
189 }
190 
191 static struct net_device_stats *reg_vif_get_stats(struct net_device *dev)
192 {
193 	return (struct net_device_stats*)netdev_priv(dev);
194 }
195 
196 static void reg_vif_setup(struct net_device *dev)
197 {
198 	dev->type		= ARPHRD_PIMREG;
199 	dev->mtu		= ETH_DATA_LEN - sizeof(struct iphdr) - 8;
200 	dev->flags		= IFF_NOARP;
201 	dev->hard_start_xmit	= reg_vif_xmit;
202 	dev->get_stats		= reg_vif_get_stats;
203 	dev->destructor		= free_netdev;
204 }
205 
206 static struct net_device *ipmr_reg_vif(void)
207 {
208 	struct net_device *dev;
209 	struct in_device *in_dev;
210 
211 	dev = alloc_netdev(sizeof(struct net_device_stats), "pimreg",
212 			   reg_vif_setup);
213 
214 	if (dev == NULL)
215 		return NULL;
216 
217 	if (register_netdevice(dev)) {
218 		free_netdev(dev);
219 		return NULL;
220 	}
221 	dev->iflink = 0;
222 
223 	rcu_read_lock();
224 	if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
225 		rcu_read_unlock();
226 		goto failure;
227 	}
228 
229 	ipv4_devconf_setall(in_dev);
230 	IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
231 	rcu_read_unlock();
232 
233 	if (dev_open(dev))
234 		goto failure;
235 
236 	return dev;
237 
238 failure:
239 	/* allow the register to be completed before unregistering. */
240 	rtnl_unlock();
241 	rtnl_lock();
242 
243 	unregister_netdevice(dev);
244 	return NULL;
245 }
246 #endif
247 
248 /*
249  *	Delete a VIF entry
250  */
251 
252 static int vif_delete(int vifi)
253 {
254 	struct vif_device *v;
255 	struct net_device *dev;
256 	struct in_device *in_dev;
257 
258 	if (vifi < 0 || vifi >= maxvif)
259 		return -EADDRNOTAVAIL;
260 
261 	v = &vif_table[vifi];
262 
263 	write_lock_bh(&mrt_lock);
264 	dev = v->dev;
265 	v->dev = NULL;
266 
267 	if (!dev) {
268 		write_unlock_bh(&mrt_lock);
269 		return -EADDRNOTAVAIL;
270 	}
271 
272 #ifdef CONFIG_IP_PIMSM
273 	if (vifi == reg_vif_num)
274 		reg_vif_num = -1;
275 #endif
276 
277 	if (vifi+1 == maxvif) {
278 		int tmp;
279 		for (tmp=vifi-1; tmp>=0; tmp--) {
280 			if (VIF_EXISTS(tmp))
281 				break;
282 		}
283 		maxvif = tmp+1;
284 	}
285 
286 	write_unlock_bh(&mrt_lock);
287 
288 	dev_set_allmulti(dev, -1);
289 
290 	if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
291 		IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
292 		ip_rt_multicast_event(in_dev);
293 	}
294 
295 	if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
296 		unregister_netdevice(dev);
297 
298 	dev_put(dev);
299 	return 0;
300 }
301 
302 /* Destroy an unresolved cache entry, killing queued skbs
303    and reporting error to netlink readers.
304  */
305 
306 static void ipmr_destroy_unres(struct mfc_cache *c)
307 {
308 	struct sk_buff *skb;
309 	struct nlmsgerr *e;
310 
311 	atomic_dec(&cache_resolve_queue_len);
312 
313 	while ((skb=skb_dequeue(&c->mfc_un.unres.unresolved))) {
314 		if (ip_hdr(skb)->version == 0) {
315 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
316 			nlh->nlmsg_type = NLMSG_ERROR;
317 			nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
318 			skb_trim(skb, nlh->nlmsg_len);
319 			e = NLMSG_DATA(nlh);
320 			e->error = -ETIMEDOUT;
321 			memset(&e->msg, 0, sizeof(e->msg));
322 
323 			rtnl_unicast(skb, NETLINK_CB(skb).pid);
324 		} else
325 			kfree_skb(skb);
326 	}
327 
328 	kmem_cache_free(mrt_cachep, c);
329 }
330 
331 
332 /* Single timer process for all the unresolved queue. */
333 
334 static void ipmr_expire_process(unsigned long dummy)
335 {
336 	unsigned long now;
337 	unsigned long expires;
338 	struct mfc_cache *c, **cp;
339 
340 	if (!spin_trylock(&mfc_unres_lock)) {
341 		mod_timer(&ipmr_expire_timer, jiffies+HZ/10);
342 		return;
343 	}
344 
345 	if (atomic_read(&cache_resolve_queue_len) == 0)
346 		goto out;
347 
348 	now = jiffies;
349 	expires = 10*HZ;
350 	cp = &mfc_unres_queue;
351 
352 	while ((c=*cp) != NULL) {
353 		if (time_after(c->mfc_un.unres.expires, now)) {
354 			unsigned long interval = c->mfc_un.unres.expires - now;
355 			if (interval < expires)
356 				expires = interval;
357 			cp = &c->next;
358 			continue;
359 		}
360 
361 		*cp = c->next;
362 
363 		ipmr_destroy_unres(c);
364 	}
365 
366 	if (atomic_read(&cache_resolve_queue_len))
367 		mod_timer(&ipmr_expire_timer, jiffies + expires);
368 
369 out:
370 	spin_unlock(&mfc_unres_lock);
371 }
372 
373 /* Fill oifs list. It is called under write locked mrt_lock. */
374 
375 static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls)
376 {
377 	int vifi;
378 
379 	cache->mfc_un.res.minvif = MAXVIFS;
380 	cache->mfc_un.res.maxvif = 0;
381 	memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
382 
383 	for (vifi=0; vifi<maxvif; vifi++) {
384 		if (VIF_EXISTS(vifi) && ttls[vifi] && ttls[vifi] < 255) {
385 			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
386 			if (cache->mfc_un.res.minvif > vifi)
387 				cache->mfc_un.res.minvif = vifi;
388 			if (cache->mfc_un.res.maxvif <= vifi)
389 				cache->mfc_un.res.maxvif = vifi + 1;
390 		}
391 	}
392 }
393 
394 static int vif_add(struct vifctl *vifc, int mrtsock)
395 {
396 	int vifi = vifc->vifc_vifi;
397 	struct vif_device *v = &vif_table[vifi];
398 	struct net_device *dev;
399 	struct in_device *in_dev;
400 
401 	/* Is vif busy ? */
402 	if (VIF_EXISTS(vifi))
403 		return -EADDRINUSE;
404 
405 	switch (vifc->vifc_flags) {
406 #ifdef CONFIG_IP_PIMSM
407 	case VIFF_REGISTER:
408 		/*
409 		 * Special Purpose VIF in PIM
410 		 * All the packets will be sent to the daemon
411 		 */
412 		if (reg_vif_num >= 0)
413 			return -EADDRINUSE;
414 		dev = ipmr_reg_vif();
415 		if (!dev)
416 			return -ENOBUFS;
417 		break;
418 #endif
419 	case VIFF_TUNNEL:
420 		dev = ipmr_new_tunnel(vifc);
421 		if (!dev)
422 			return -ENOBUFS;
423 		break;
424 	case 0:
425 		dev = ip_dev_find(vifc->vifc_lcl_addr.s_addr);
426 		if (!dev)
427 			return -EADDRNOTAVAIL;
428 		dev_put(dev);
429 		break;
430 	default:
431 		return -EINVAL;
432 	}
433 
434 	if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
435 		return -EADDRNOTAVAIL;
436 	IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
437 	dev_set_allmulti(dev, +1);
438 	ip_rt_multicast_event(in_dev);
439 
440 	/*
441 	 *	Fill in the VIF structures
442 	 */
443 	v->rate_limit=vifc->vifc_rate_limit;
444 	v->local=vifc->vifc_lcl_addr.s_addr;
445 	v->remote=vifc->vifc_rmt_addr.s_addr;
446 	v->flags=vifc->vifc_flags;
447 	if (!mrtsock)
448 		v->flags |= VIFF_STATIC;
449 	v->threshold=vifc->vifc_threshold;
450 	v->bytes_in = 0;
451 	v->bytes_out = 0;
452 	v->pkt_in = 0;
453 	v->pkt_out = 0;
454 	v->link = dev->ifindex;
455 	if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
456 		v->link = dev->iflink;
457 
458 	/* And finish update writing critical data */
459 	write_lock_bh(&mrt_lock);
460 	dev_hold(dev);
461 	v->dev=dev;
462 #ifdef CONFIG_IP_PIMSM
463 	if (v->flags&VIFF_REGISTER)
464 		reg_vif_num = vifi;
465 #endif
466 	if (vifi+1 > maxvif)
467 		maxvif = vifi+1;
468 	write_unlock_bh(&mrt_lock);
469 	return 0;
470 }
471 
472 static struct mfc_cache *ipmr_cache_find(__be32 origin, __be32 mcastgrp)
473 {
474 	int line=MFC_HASH(mcastgrp,origin);
475 	struct mfc_cache *c;
476 
477 	for (c=mfc_cache_array[line]; c; c = c->next) {
478 		if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
479 			break;
480 	}
481 	return c;
482 }
483 
484 /*
485  *	Allocate a multicast cache entry
486  */
487 static struct mfc_cache *ipmr_cache_alloc(void)
488 {
489 	struct mfc_cache *c=kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
490 	if (c==NULL)
491 		return NULL;
492 	c->mfc_un.res.minvif = MAXVIFS;
493 	return c;
494 }
495 
496 static struct mfc_cache *ipmr_cache_alloc_unres(void)
497 {
498 	struct mfc_cache *c=kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
499 	if (c==NULL)
500 		return NULL;
501 	skb_queue_head_init(&c->mfc_un.unres.unresolved);
502 	c->mfc_un.unres.expires = jiffies + 10*HZ;
503 	return c;
504 }
505 
506 /*
507  *	A cache entry has gone into a resolved state from queued
508  */
509 
510 static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
511 {
512 	struct sk_buff *skb;
513 	struct nlmsgerr *e;
514 
515 	/*
516 	 *	Play the pending entries through our router
517 	 */
518 
519 	while ((skb=__skb_dequeue(&uc->mfc_un.unres.unresolved))) {
520 		if (ip_hdr(skb)->version == 0) {
521 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
522 
523 			if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
524 				nlh->nlmsg_len = (skb_tail_pointer(skb) -
525 						  (u8 *)nlh);
526 			} else {
527 				nlh->nlmsg_type = NLMSG_ERROR;
528 				nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
529 				skb_trim(skb, nlh->nlmsg_len);
530 				e = NLMSG_DATA(nlh);
531 				e->error = -EMSGSIZE;
532 				memset(&e->msg, 0, sizeof(e->msg));
533 			}
534 
535 			rtnl_unicast(skb, NETLINK_CB(skb).pid);
536 		} else
537 			ip_mr_forward(skb, c, 0);
538 	}
539 }
540 
541 /*
542  *	Bounce a cache query up to mrouted. We could use netlink for this but mrouted
543  *	expects the following bizarre scheme.
544  *
545  *	Called under mrt_lock.
546  */
547 
548 static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
549 {
550 	struct sk_buff *skb;
551 	const int ihl = ip_hdrlen(pkt);
552 	struct igmphdr *igmp;
553 	struct igmpmsg *msg;
554 	int ret;
555 
556 #ifdef CONFIG_IP_PIMSM
557 	if (assert == IGMPMSG_WHOLEPKT)
558 		skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
559 	else
560 #endif
561 		skb = alloc_skb(128, GFP_ATOMIC);
562 
563 	if (!skb)
564 		return -ENOBUFS;
565 
566 #ifdef CONFIG_IP_PIMSM
567 	if (assert == IGMPMSG_WHOLEPKT) {
568 		/* Ugly, but we have no choice with this interface.
569 		   Duplicate old header, fix ihl, length etc.
570 		   And all this only to mangle msg->im_msgtype and
571 		   to set msg->im_mbz to "mbz" :-)
572 		 */
573 		skb_push(skb, sizeof(struct iphdr));
574 		skb_reset_network_header(skb);
575 		skb_reset_transport_header(skb);
576 		msg = (struct igmpmsg *)skb_network_header(skb);
577 		memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
578 		msg->im_msgtype = IGMPMSG_WHOLEPKT;
579 		msg->im_mbz = 0;
580 		msg->im_vif = reg_vif_num;
581 		ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
582 		ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
583 					     sizeof(struct iphdr));
584 	} else
585 #endif
586 	{
587 
588 	/*
589 	 *	Copy the IP header
590 	 */
591 
592 	skb->network_header = skb->tail;
593 	skb_put(skb, ihl);
594 	skb_copy_to_linear_data(skb, pkt->data, ihl);
595 	ip_hdr(skb)->protocol = 0;			/* Flag to the kernel this is a route add */
596 	msg = (struct igmpmsg *)skb_network_header(skb);
597 	msg->im_vif = vifi;
598 	skb->dst = dst_clone(pkt->dst);
599 
600 	/*
601 	 *	Add our header
602 	 */
603 
604 	igmp=(struct igmphdr *)skb_put(skb,sizeof(struct igmphdr));
605 	igmp->type	=
606 	msg->im_msgtype = assert;
607 	igmp->code 	=	0;
608 	ip_hdr(skb)->tot_len = htons(skb->len);			/* Fix the length */
609 	skb->transport_header = skb->network_header;
610 	}
611 
612 	if (mroute_socket == NULL) {
613 		kfree_skb(skb);
614 		return -EINVAL;
615 	}
616 
617 	/*
618 	 *	Deliver to mrouted
619 	 */
620 	if ((ret=sock_queue_rcv_skb(mroute_socket,skb))<0) {
621 		if (net_ratelimit())
622 			printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
623 		kfree_skb(skb);
624 	}
625 
626 	return ret;
627 }
628 
629 /*
630  *	Queue a packet for resolution. It gets locked cache entry!
631  */
632 
633 static int
634 ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
635 {
636 	int err;
637 	struct mfc_cache *c;
638 	const struct iphdr *iph = ip_hdr(skb);
639 
640 	spin_lock_bh(&mfc_unres_lock);
641 	for (c=mfc_unres_queue; c; c=c->next) {
642 		if (c->mfc_mcastgrp == iph->daddr &&
643 		    c->mfc_origin == iph->saddr)
644 			break;
645 	}
646 
647 	if (c == NULL) {
648 		/*
649 		 *	Create a new entry if allowable
650 		 */
651 
652 		if (atomic_read(&cache_resolve_queue_len)>=10 ||
653 		    (c=ipmr_cache_alloc_unres())==NULL) {
654 			spin_unlock_bh(&mfc_unres_lock);
655 
656 			kfree_skb(skb);
657 			return -ENOBUFS;
658 		}
659 
660 		/*
661 		 *	Fill in the new cache entry
662 		 */
663 		c->mfc_parent	= -1;
664 		c->mfc_origin	= iph->saddr;
665 		c->mfc_mcastgrp	= iph->daddr;
666 
667 		/*
668 		 *	Reflect first query at mrouted.
669 		 */
670 		if ((err = ipmr_cache_report(skb, vifi, IGMPMSG_NOCACHE))<0) {
671 			/* If the report failed throw the cache entry
672 			   out - Brad Parker
673 			 */
674 			spin_unlock_bh(&mfc_unres_lock);
675 
676 			kmem_cache_free(mrt_cachep, c);
677 			kfree_skb(skb);
678 			return err;
679 		}
680 
681 		atomic_inc(&cache_resolve_queue_len);
682 		c->next = mfc_unres_queue;
683 		mfc_unres_queue = c;
684 
685 		mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires);
686 	}
687 
688 	/*
689 	 *	See if we can append the packet
690 	 */
691 	if (c->mfc_un.unres.unresolved.qlen>3) {
692 		kfree_skb(skb);
693 		err = -ENOBUFS;
694 	} else {
695 		skb_queue_tail(&c->mfc_un.unres.unresolved,skb);
696 		err = 0;
697 	}
698 
699 	spin_unlock_bh(&mfc_unres_lock);
700 	return err;
701 }
702 
703 /*
704  *	MFC cache manipulation by user space mroute daemon
705  */
706 
707 static int ipmr_mfc_delete(struct mfcctl *mfc)
708 {
709 	int line;
710 	struct mfc_cache *c, **cp;
711 
712 	line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
713 
714 	for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) {
715 		if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
716 		    c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
717 			write_lock_bh(&mrt_lock);
718 			*cp = c->next;
719 			write_unlock_bh(&mrt_lock);
720 
721 			kmem_cache_free(mrt_cachep, c);
722 			return 0;
723 		}
724 	}
725 	return -ENOENT;
726 }
727 
728 static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
729 {
730 	int line;
731 	struct mfc_cache *uc, *c, **cp;
732 
733 	line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
734 
735 	for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) {
736 		if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
737 		    c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
738 			break;
739 	}
740 
741 	if (c != NULL) {
742 		write_lock_bh(&mrt_lock);
743 		c->mfc_parent = mfc->mfcc_parent;
744 		ipmr_update_thresholds(c, mfc->mfcc_ttls);
745 		if (!mrtsock)
746 			c->mfc_flags |= MFC_STATIC;
747 		write_unlock_bh(&mrt_lock);
748 		return 0;
749 	}
750 
751 	if (!MULTICAST(mfc->mfcc_mcastgrp.s_addr))
752 		return -EINVAL;
753 
754 	c=ipmr_cache_alloc();
755 	if (c==NULL)
756 		return -ENOMEM;
757 
758 	c->mfc_origin=mfc->mfcc_origin.s_addr;
759 	c->mfc_mcastgrp=mfc->mfcc_mcastgrp.s_addr;
760 	c->mfc_parent=mfc->mfcc_parent;
761 	ipmr_update_thresholds(c, mfc->mfcc_ttls);
762 	if (!mrtsock)
763 		c->mfc_flags |= MFC_STATIC;
764 
765 	write_lock_bh(&mrt_lock);
766 	c->next = mfc_cache_array[line];
767 	mfc_cache_array[line] = c;
768 	write_unlock_bh(&mrt_lock);
769 
770 	/*
771 	 *	Check to see if we resolved a queued list. If so we
772 	 *	need to send on the frames and tidy up.
773 	 */
774 	spin_lock_bh(&mfc_unres_lock);
775 	for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
776 	     cp = &uc->next) {
777 		if (uc->mfc_origin == c->mfc_origin &&
778 		    uc->mfc_mcastgrp == c->mfc_mcastgrp) {
779 			*cp = uc->next;
780 			if (atomic_dec_and_test(&cache_resolve_queue_len))
781 				del_timer(&ipmr_expire_timer);
782 			break;
783 		}
784 	}
785 	spin_unlock_bh(&mfc_unres_lock);
786 
787 	if (uc) {
788 		ipmr_cache_resolve(uc, c);
789 		kmem_cache_free(mrt_cachep, uc);
790 	}
791 	return 0;
792 }
793 
794 /*
795  *	Close the multicast socket, and clear the vif tables etc
796  */
797 
798 static void mroute_clean_tables(struct sock *sk)
799 {
800 	int i;
801 
802 	/*
803 	 *	Shut down all active vif entries
804 	 */
805 	for (i=0; i<maxvif; i++) {
806 		if (!(vif_table[i].flags&VIFF_STATIC))
807 			vif_delete(i);
808 	}
809 
810 	/*
811 	 *	Wipe the cache
812 	 */
813 	for (i=0;i<MFC_LINES;i++) {
814 		struct mfc_cache *c, **cp;
815 
816 		cp = &mfc_cache_array[i];
817 		while ((c = *cp) != NULL) {
818 			if (c->mfc_flags&MFC_STATIC) {
819 				cp = &c->next;
820 				continue;
821 			}
822 			write_lock_bh(&mrt_lock);
823 			*cp = c->next;
824 			write_unlock_bh(&mrt_lock);
825 
826 			kmem_cache_free(mrt_cachep, c);
827 		}
828 	}
829 
830 	if (atomic_read(&cache_resolve_queue_len) != 0) {
831 		struct mfc_cache *c;
832 
833 		spin_lock_bh(&mfc_unres_lock);
834 		while (mfc_unres_queue != NULL) {
835 			c = mfc_unres_queue;
836 			mfc_unres_queue = c->next;
837 			spin_unlock_bh(&mfc_unres_lock);
838 
839 			ipmr_destroy_unres(c);
840 
841 			spin_lock_bh(&mfc_unres_lock);
842 		}
843 		spin_unlock_bh(&mfc_unres_lock);
844 	}
845 }
846 
847 static void mrtsock_destruct(struct sock *sk)
848 {
849 	rtnl_lock();
850 	if (sk == mroute_socket) {
851 		IPV4_DEVCONF_ALL(MC_FORWARDING)--;
852 
853 		write_lock_bh(&mrt_lock);
854 		mroute_socket=NULL;
855 		write_unlock_bh(&mrt_lock);
856 
857 		mroute_clean_tables(sk);
858 	}
859 	rtnl_unlock();
860 }
861 
862 /*
863  *	Socket options and virtual interface manipulation. The whole
864  *	virtual interface system is a complete heap, but unfortunately
865  *	that's how BSD mrouted happens to think. Maybe one day with a proper
866  *	MOSPF/PIM router set up we can clean this up.
867  */
868 
869 int ip_mroute_setsockopt(struct sock *sk,int optname,char __user *optval,int optlen)
870 {
871 	int ret;
872 	struct vifctl vif;
873 	struct mfcctl mfc;
874 
875 	if (optname != MRT_INIT) {
876 		if (sk != mroute_socket && !capable(CAP_NET_ADMIN))
877 			return -EACCES;
878 	}
879 
880 	switch (optname) {
881 	case MRT_INIT:
882 		if (sk->sk_type != SOCK_RAW ||
883 		    inet_sk(sk)->num != IPPROTO_IGMP)
884 			return -EOPNOTSUPP;
885 		if (optlen!=sizeof(int))
886 			return -ENOPROTOOPT;
887 
888 		rtnl_lock();
889 		if (mroute_socket) {
890 			rtnl_unlock();
891 			return -EADDRINUSE;
892 		}
893 
894 		ret = ip_ra_control(sk, 1, mrtsock_destruct);
895 		if (ret == 0) {
896 			write_lock_bh(&mrt_lock);
897 			mroute_socket=sk;
898 			write_unlock_bh(&mrt_lock);
899 
900 			IPV4_DEVCONF_ALL(MC_FORWARDING)++;
901 		}
902 		rtnl_unlock();
903 		return ret;
904 	case MRT_DONE:
905 		if (sk!=mroute_socket)
906 			return -EACCES;
907 		return ip_ra_control(sk, 0, NULL);
908 	case MRT_ADD_VIF:
909 	case MRT_DEL_VIF:
910 		if (optlen!=sizeof(vif))
911 			return -EINVAL;
912 		if (copy_from_user(&vif,optval,sizeof(vif)))
913 			return -EFAULT;
914 		if (vif.vifc_vifi >= MAXVIFS)
915 			return -ENFILE;
916 		rtnl_lock();
917 		if (optname==MRT_ADD_VIF) {
918 			ret = vif_add(&vif, sk==mroute_socket);
919 		} else {
920 			ret = vif_delete(vif.vifc_vifi);
921 		}
922 		rtnl_unlock();
923 		return ret;
924 
925 		/*
926 		 *	Manipulate the forwarding caches. These live
927 		 *	in a sort of kernel/user symbiosis.
928 		 */
929 	case MRT_ADD_MFC:
930 	case MRT_DEL_MFC:
931 		if (optlen!=sizeof(mfc))
932 			return -EINVAL;
933 		if (copy_from_user(&mfc,optval, sizeof(mfc)))
934 			return -EFAULT;
935 		rtnl_lock();
936 		if (optname==MRT_DEL_MFC)
937 			ret = ipmr_mfc_delete(&mfc);
938 		else
939 			ret = ipmr_mfc_add(&mfc, sk==mroute_socket);
940 		rtnl_unlock();
941 		return ret;
942 		/*
943 		 *	Control PIM assert.
944 		 */
945 	case MRT_ASSERT:
946 	{
947 		int v;
948 		if (get_user(v,(int __user *)optval))
949 			return -EFAULT;
950 		mroute_do_assert=(v)?1:0;
951 		return 0;
952 	}
953 #ifdef CONFIG_IP_PIMSM
954 	case MRT_PIM:
955 	{
956 		int v, ret;
957 		if (get_user(v,(int __user *)optval))
958 			return -EFAULT;
959 		v = (v)?1:0;
960 		rtnl_lock();
961 		ret = 0;
962 		if (v != mroute_do_pim) {
963 			mroute_do_pim = v;
964 			mroute_do_assert = v;
965 #ifdef CONFIG_IP_PIMSM_V2
966 			if (mroute_do_pim)
967 				ret = inet_add_protocol(&pim_protocol,
968 							IPPROTO_PIM);
969 			else
970 				ret = inet_del_protocol(&pim_protocol,
971 							IPPROTO_PIM);
972 			if (ret < 0)
973 				ret = -EAGAIN;
974 #endif
975 		}
976 		rtnl_unlock();
977 		return ret;
978 	}
979 #endif
980 	/*
981 	 *	Spurious command, or MRT_VERSION which you cannot
982 	 *	set.
983 	 */
984 	default:
985 		return -ENOPROTOOPT;
986 	}
987 }
988 
989 /*
990  *	Getsock opt support for the multicast routing system.
991  */
992 
993 int ip_mroute_getsockopt(struct sock *sk,int optname,char __user *optval,int __user *optlen)
994 {
995 	int olr;
996 	int val;
997 
998 	if (optname!=MRT_VERSION &&
999 #ifdef CONFIG_IP_PIMSM
1000 	   optname!=MRT_PIM &&
1001 #endif
1002 	   optname!=MRT_ASSERT)
1003 		return -ENOPROTOOPT;
1004 
1005 	if (get_user(olr, optlen))
1006 		return -EFAULT;
1007 
1008 	olr = min_t(unsigned int, olr, sizeof(int));
1009 	if (olr < 0)
1010 		return -EINVAL;
1011 
1012 	if (put_user(olr,optlen))
1013 		return -EFAULT;
1014 	if (optname==MRT_VERSION)
1015 		val=0x0305;
1016 #ifdef CONFIG_IP_PIMSM
1017 	else if (optname==MRT_PIM)
1018 		val=mroute_do_pim;
1019 #endif
1020 	else
1021 		val=mroute_do_assert;
1022 	if (copy_to_user(optval,&val,olr))
1023 		return -EFAULT;
1024 	return 0;
1025 }
1026 
1027 /*
1028  *	The IP multicast ioctl support routines.
1029  */
1030 
1031 int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1032 {
1033 	struct sioc_sg_req sr;
1034 	struct sioc_vif_req vr;
1035 	struct vif_device *vif;
1036 	struct mfc_cache *c;
1037 
1038 	switch (cmd) {
1039 	case SIOCGETVIFCNT:
1040 		if (copy_from_user(&vr,arg,sizeof(vr)))
1041 			return -EFAULT;
1042 		if (vr.vifi>=maxvif)
1043 			return -EINVAL;
1044 		read_lock(&mrt_lock);
1045 		vif=&vif_table[vr.vifi];
1046 		if (VIF_EXISTS(vr.vifi))	{
1047 			vr.icount=vif->pkt_in;
1048 			vr.ocount=vif->pkt_out;
1049 			vr.ibytes=vif->bytes_in;
1050 			vr.obytes=vif->bytes_out;
1051 			read_unlock(&mrt_lock);
1052 
1053 			if (copy_to_user(arg,&vr,sizeof(vr)))
1054 				return -EFAULT;
1055 			return 0;
1056 		}
1057 		read_unlock(&mrt_lock);
1058 		return -EADDRNOTAVAIL;
1059 	case SIOCGETSGCNT:
1060 		if (copy_from_user(&sr,arg,sizeof(sr)))
1061 			return -EFAULT;
1062 
1063 		read_lock(&mrt_lock);
1064 		c = ipmr_cache_find(sr.src.s_addr, sr.grp.s_addr);
1065 		if (c) {
1066 			sr.pktcnt = c->mfc_un.res.pkt;
1067 			sr.bytecnt = c->mfc_un.res.bytes;
1068 			sr.wrong_if = c->mfc_un.res.wrong_if;
1069 			read_unlock(&mrt_lock);
1070 
1071 			if (copy_to_user(arg,&sr,sizeof(sr)))
1072 				return -EFAULT;
1073 			return 0;
1074 		}
1075 		read_unlock(&mrt_lock);
1076 		return -EADDRNOTAVAIL;
1077 	default:
1078 		return -ENOIOCTLCMD;
1079 	}
1080 }
1081 
1082 
1083 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1084 {
1085 	struct vif_device *v;
1086 	int ct;
1087 	if (event != NETDEV_UNREGISTER)
1088 		return NOTIFY_DONE;
1089 	v=&vif_table[0];
1090 	for (ct=0;ct<maxvif;ct++,v++) {
1091 		if (v->dev==ptr)
1092 			vif_delete(ct);
1093 	}
1094 	return NOTIFY_DONE;
1095 }
1096 
1097 
1098 static struct notifier_block ip_mr_notifier={
1099 	.notifier_call = ipmr_device_event,
1100 };
1101 
1102 /*
1103  * 	Encapsulate a packet by attaching a valid IPIP header to it.
1104  *	This avoids tunnel drivers and other mess and gives us the speed so
1105  *	important for multicast video.
1106  */
1107 
1108 static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1109 {
1110 	struct iphdr *iph;
1111 	struct iphdr *old_iph = ip_hdr(skb);
1112 
1113 	skb_push(skb, sizeof(struct iphdr));
1114 	skb->transport_header = skb->network_header;
1115 	skb_reset_network_header(skb);
1116 	iph = ip_hdr(skb);
1117 
1118 	iph->version	= 	4;
1119 	iph->tos	=	old_iph->tos;
1120 	iph->ttl	=	old_iph->ttl;
1121 	iph->frag_off	=	0;
1122 	iph->daddr	=	daddr;
1123 	iph->saddr	=	saddr;
1124 	iph->protocol	=	IPPROTO_IPIP;
1125 	iph->ihl	=	5;
1126 	iph->tot_len	=	htons(skb->len);
1127 	ip_select_ident(iph, skb->dst, NULL);
1128 	ip_send_check(iph);
1129 
1130 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1131 	nf_reset(skb);
1132 }
1133 
1134 static inline int ipmr_forward_finish(struct sk_buff *skb)
1135 {
1136 	struct ip_options * opt	= &(IPCB(skb)->opt);
1137 
1138 	IP_INC_STATS_BH(IPSTATS_MIB_OUTFORWDATAGRAMS);
1139 
1140 	if (unlikely(opt->optlen))
1141 		ip_forward_options(skb);
1142 
1143 	return dst_output(skb);
1144 }
1145 
1146 /*
1147  *	Processing handlers for ipmr_forward
1148  */
1149 
1150 static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1151 {
1152 	const struct iphdr *iph = ip_hdr(skb);
1153 	struct vif_device *vif = &vif_table[vifi];
1154 	struct net_device *dev;
1155 	struct rtable *rt;
1156 	int    encap = 0;
1157 
1158 	if (vif->dev == NULL)
1159 		goto out_free;
1160 
1161 #ifdef CONFIG_IP_PIMSM
1162 	if (vif->flags & VIFF_REGISTER) {
1163 		vif->pkt_out++;
1164 		vif->bytes_out+=skb->len;
1165 		((struct net_device_stats*)netdev_priv(vif->dev))->tx_bytes += skb->len;
1166 		((struct net_device_stats*)netdev_priv(vif->dev))->tx_packets++;
1167 		ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT);
1168 		kfree_skb(skb);
1169 		return;
1170 	}
1171 #endif
1172 
1173 	if (vif->flags&VIFF_TUNNEL) {
1174 		struct flowi fl = { .oif = vif->link,
1175 				    .nl_u = { .ip4_u =
1176 					      { .daddr = vif->remote,
1177 						.saddr = vif->local,
1178 						.tos = RT_TOS(iph->tos) } },
1179 				    .proto = IPPROTO_IPIP };
1180 		if (ip_route_output_key(&rt, &fl))
1181 			goto out_free;
1182 		encap = sizeof(struct iphdr);
1183 	} else {
1184 		struct flowi fl = { .oif = vif->link,
1185 				    .nl_u = { .ip4_u =
1186 					      { .daddr = iph->daddr,
1187 						.tos = RT_TOS(iph->tos) } },
1188 				    .proto = IPPROTO_IPIP };
1189 		if (ip_route_output_key(&rt, &fl))
1190 			goto out_free;
1191 	}
1192 
1193 	dev = rt->u.dst.dev;
1194 
1195 	if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1196 		/* Do not fragment multicasts. Alas, IPv4 does not
1197 		   allow to send ICMP, so that packets will disappear
1198 		   to blackhole.
1199 		 */
1200 
1201 		IP_INC_STATS_BH(IPSTATS_MIB_FRAGFAILS);
1202 		ip_rt_put(rt);
1203 		goto out_free;
1204 	}
1205 
1206 	encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1207 
1208 	if (skb_cow(skb, encap)) {
1209 		ip_rt_put(rt);
1210 		goto out_free;
1211 	}
1212 
1213 	vif->pkt_out++;
1214 	vif->bytes_out+=skb->len;
1215 
1216 	dst_release(skb->dst);
1217 	skb->dst = &rt->u.dst;
1218 	ip_decrease_ttl(ip_hdr(skb));
1219 
1220 	/* FIXME: forward and output firewalls used to be called here.
1221 	 * What do we do with netfilter? -- RR */
1222 	if (vif->flags & VIFF_TUNNEL) {
1223 		ip_encap(skb, vif->local, vif->remote);
1224 		/* FIXME: extra output firewall step used to be here. --RR */
1225 		((struct ip_tunnel *)netdev_priv(vif->dev))->stat.tx_packets++;
1226 		((struct ip_tunnel *)netdev_priv(vif->dev))->stat.tx_bytes+=skb->len;
1227 	}
1228 
1229 	IPCB(skb)->flags |= IPSKB_FORWARDED;
1230 
1231 	/*
1232 	 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1233 	 * not only before forwarding, but after forwarding on all output
1234 	 * interfaces. It is clear, if mrouter runs a multicasting
1235 	 * program, it should receive packets not depending to what interface
1236 	 * program is joined.
1237 	 * If we will not make it, the program will have to join on all
1238 	 * interfaces. On the other hand, multihoming host (or router, but
1239 	 * not mrouter) cannot join to more than one interface - it will
1240 	 * result in receiving multiple packets.
1241 	 */
1242 	NF_HOOK(PF_INET, NF_IP_FORWARD, skb, skb->dev, dev,
1243 		ipmr_forward_finish);
1244 	return;
1245 
1246 out_free:
1247 	kfree_skb(skb);
1248 	return;
1249 }
1250 
1251 static int ipmr_find_vif(struct net_device *dev)
1252 {
1253 	int ct;
1254 	for (ct=maxvif-1; ct>=0; ct--) {
1255 		if (vif_table[ct].dev == dev)
1256 			break;
1257 	}
1258 	return ct;
1259 }
1260 
1261 /* "local" means that we should preserve one skb (for local delivery) */
1262 
1263 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
1264 {
1265 	int psend = -1;
1266 	int vif, ct;
1267 
1268 	vif = cache->mfc_parent;
1269 	cache->mfc_un.res.pkt++;
1270 	cache->mfc_un.res.bytes += skb->len;
1271 
1272 	/*
1273 	 * Wrong interface: drop packet and (maybe) send PIM assert.
1274 	 */
1275 	if (vif_table[vif].dev != skb->dev) {
1276 		int true_vifi;
1277 
1278 		if (((struct rtable*)skb->dst)->fl.iif == 0) {
1279 			/* It is our own packet, looped back.
1280 			   Very complicated situation...
1281 
1282 			   The best workaround until routing daemons will be
1283 			   fixed is not to redistribute packet, if it was
1284 			   send through wrong interface. It means, that
1285 			   multicast applications WILL NOT work for
1286 			   (S,G), which have default multicast route pointing
1287 			   to wrong oif. In any case, it is not a good
1288 			   idea to use multicasting applications on router.
1289 			 */
1290 			goto dont_forward;
1291 		}
1292 
1293 		cache->mfc_un.res.wrong_if++;
1294 		true_vifi = ipmr_find_vif(skb->dev);
1295 
1296 		if (true_vifi >= 0 && mroute_do_assert &&
1297 		    /* pimsm uses asserts, when switching from RPT to SPT,
1298 		       so that we cannot check that packet arrived on an oif.
1299 		       It is bad, but otherwise we would need to move pretty
1300 		       large chunk of pimd to kernel. Ough... --ANK
1301 		     */
1302 		    (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) &&
1303 		    time_after(jiffies,
1304 			       cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1305 			cache->mfc_un.res.last_assert = jiffies;
1306 			ipmr_cache_report(skb, true_vifi, IGMPMSG_WRONGVIF);
1307 		}
1308 		goto dont_forward;
1309 	}
1310 
1311 	vif_table[vif].pkt_in++;
1312 	vif_table[vif].bytes_in+=skb->len;
1313 
1314 	/*
1315 	 *	Forward the frame
1316 	 */
1317 	for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
1318 		if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
1319 			if (psend != -1) {
1320 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1321 				if (skb2)
1322 					ipmr_queue_xmit(skb2, cache, psend);
1323 			}
1324 			psend=ct;
1325 		}
1326 	}
1327 	if (psend != -1) {
1328 		if (local) {
1329 			struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1330 			if (skb2)
1331 				ipmr_queue_xmit(skb2, cache, psend);
1332 		} else {
1333 			ipmr_queue_xmit(skb, cache, psend);
1334 			return 0;
1335 		}
1336 	}
1337 
1338 dont_forward:
1339 	if (!local)
1340 		kfree_skb(skb);
1341 	return 0;
1342 }
1343 
1344 
1345 /*
1346  *	Multicast packets for forwarding arrive here
1347  */
1348 
1349 int ip_mr_input(struct sk_buff *skb)
1350 {
1351 	struct mfc_cache *cache;
1352 	int local = ((struct rtable*)skb->dst)->rt_flags&RTCF_LOCAL;
1353 
1354 	/* Packet is looped back after forward, it should not be
1355 	   forwarded second time, but still can be delivered locally.
1356 	 */
1357 	if (IPCB(skb)->flags&IPSKB_FORWARDED)
1358 		goto dont_forward;
1359 
1360 	if (!local) {
1361 		    if (IPCB(skb)->opt.router_alert) {
1362 			    if (ip_call_ra_chain(skb))
1363 				    return 0;
1364 		    } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
1365 			    /* IGMPv1 (and broken IGMPv2 implementations sort of
1366 			       Cisco IOS <= 11.2(8)) do not put router alert
1367 			       option to IGMP packets destined to routable
1368 			       groups. It is very bad, because it means
1369 			       that we can forward NO IGMP messages.
1370 			     */
1371 			    read_lock(&mrt_lock);
1372 			    if (mroute_socket) {
1373 				    nf_reset(skb);
1374 				    raw_rcv(mroute_socket, skb);
1375 				    read_unlock(&mrt_lock);
1376 				    return 0;
1377 			    }
1378 			    read_unlock(&mrt_lock);
1379 		    }
1380 	}
1381 
1382 	read_lock(&mrt_lock);
1383 	cache = ipmr_cache_find(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1384 
1385 	/*
1386 	 *	No usable cache entry
1387 	 */
1388 	if (cache==NULL) {
1389 		int vif;
1390 
1391 		if (local) {
1392 			struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1393 			ip_local_deliver(skb);
1394 			if (skb2 == NULL) {
1395 				read_unlock(&mrt_lock);
1396 				return -ENOBUFS;
1397 			}
1398 			skb = skb2;
1399 		}
1400 
1401 		vif = ipmr_find_vif(skb->dev);
1402 		if (vif >= 0) {
1403 			int err = ipmr_cache_unresolved(vif, skb);
1404 			read_unlock(&mrt_lock);
1405 
1406 			return err;
1407 		}
1408 		read_unlock(&mrt_lock);
1409 		kfree_skb(skb);
1410 		return -ENODEV;
1411 	}
1412 
1413 	ip_mr_forward(skb, cache, local);
1414 
1415 	read_unlock(&mrt_lock);
1416 
1417 	if (local)
1418 		return ip_local_deliver(skb);
1419 
1420 	return 0;
1421 
1422 dont_forward:
1423 	if (local)
1424 		return ip_local_deliver(skb);
1425 	kfree_skb(skb);
1426 	return 0;
1427 }
1428 
1429 #ifdef CONFIG_IP_PIMSM_V1
1430 /*
1431  * Handle IGMP messages of PIMv1
1432  */
1433 
1434 int pim_rcv_v1(struct sk_buff * skb)
1435 {
1436 	struct igmphdr *pim;
1437 	struct iphdr   *encap;
1438 	struct net_device  *reg_dev = NULL;
1439 
1440 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
1441 		goto drop;
1442 
1443 	pim = igmp_hdr(skb);
1444 
1445 	if (!mroute_do_pim ||
1446 	    skb->len < sizeof(*pim) + sizeof(*encap) ||
1447 	    pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1448 		goto drop;
1449 
1450 	encap = (struct iphdr *)(skb_transport_header(skb) +
1451 				 sizeof(struct igmphdr));
1452 	/*
1453 	   Check that:
1454 	   a. packet is really destinted to a multicast group
1455 	   b. packet is not a NULL-REGISTER
1456 	   c. packet is not truncated
1457 	 */
1458 	if (!MULTICAST(encap->daddr) ||
1459 	    encap->tot_len == 0 ||
1460 	    ntohs(encap->tot_len) + sizeof(*pim) > skb->len)
1461 		goto drop;
1462 
1463 	read_lock(&mrt_lock);
1464 	if (reg_vif_num >= 0)
1465 		reg_dev = vif_table[reg_vif_num].dev;
1466 	if (reg_dev)
1467 		dev_hold(reg_dev);
1468 	read_unlock(&mrt_lock);
1469 
1470 	if (reg_dev == NULL)
1471 		goto drop;
1472 
1473 	skb->mac_header = skb->network_header;
1474 	skb_pull(skb, (u8*)encap - skb->data);
1475 	skb_reset_network_header(skb);
1476 	skb->dev = reg_dev;
1477 	skb->protocol = htons(ETH_P_IP);
1478 	skb->ip_summed = 0;
1479 	skb->pkt_type = PACKET_HOST;
1480 	dst_release(skb->dst);
1481 	skb->dst = NULL;
1482 	((struct net_device_stats*)netdev_priv(reg_dev))->rx_bytes += skb->len;
1483 	((struct net_device_stats*)netdev_priv(reg_dev))->rx_packets++;
1484 	nf_reset(skb);
1485 	netif_rx(skb);
1486 	dev_put(reg_dev);
1487 	return 0;
1488  drop:
1489 	kfree_skb(skb);
1490 	return 0;
1491 }
1492 #endif
1493 
1494 #ifdef CONFIG_IP_PIMSM_V2
1495 static int pim_rcv(struct sk_buff * skb)
1496 {
1497 	struct pimreghdr *pim;
1498 	struct iphdr   *encap;
1499 	struct net_device  *reg_dev = NULL;
1500 
1501 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
1502 		goto drop;
1503 
1504 	pim = (struct pimreghdr *)skb_transport_header(skb);
1505 	if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1506 	    (pim->flags&PIM_NULL_REGISTER) ||
1507 	    (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
1508 	     csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1509 		goto drop;
1510 
1511 	/* check if the inner packet is destined to mcast group */
1512 	encap = (struct iphdr *)(skb_transport_header(skb) +
1513 				 sizeof(struct pimreghdr));
1514 	if (!MULTICAST(encap->daddr) ||
1515 	    encap->tot_len == 0 ||
1516 	    ntohs(encap->tot_len) + sizeof(*pim) > skb->len)
1517 		goto drop;
1518 
1519 	read_lock(&mrt_lock);
1520 	if (reg_vif_num >= 0)
1521 		reg_dev = vif_table[reg_vif_num].dev;
1522 	if (reg_dev)
1523 		dev_hold(reg_dev);
1524 	read_unlock(&mrt_lock);
1525 
1526 	if (reg_dev == NULL)
1527 		goto drop;
1528 
1529 	skb->mac_header = skb->network_header;
1530 	skb_pull(skb, (u8*)encap - skb->data);
1531 	skb_reset_network_header(skb);
1532 	skb->dev = reg_dev;
1533 	skb->protocol = htons(ETH_P_IP);
1534 	skb->ip_summed = 0;
1535 	skb->pkt_type = PACKET_HOST;
1536 	dst_release(skb->dst);
1537 	((struct net_device_stats*)netdev_priv(reg_dev))->rx_bytes += skb->len;
1538 	((struct net_device_stats*)netdev_priv(reg_dev))->rx_packets++;
1539 	skb->dst = NULL;
1540 	nf_reset(skb);
1541 	netif_rx(skb);
1542 	dev_put(reg_dev);
1543 	return 0;
1544  drop:
1545 	kfree_skb(skb);
1546 	return 0;
1547 }
1548 #endif
1549 
1550 static int
1551 ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1552 {
1553 	int ct;
1554 	struct rtnexthop *nhp;
1555 	struct net_device *dev = vif_table[c->mfc_parent].dev;
1556 	u8 *b = skb_tail_pointer(skb);
1557 	struct rtattr *mp_head;
1558 
1559 	if (dev)
1560 		RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1561 
1562 	mp_head = (struct rtattr*)skb_put(skb, RTA_LENGTH(0));
1563 
1564 	for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1565 		if (c->mfc_un.res.ttls[ct] < 255) {
1566 			if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1567 				goto rtattr_failure;
1568 			nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1569 			nhp->rtnh_flags = 0;
1570 			nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1571 			nhp->rtnh_ifindex = vif_table[ct].dev->ifindex;
1572 			nhp->rtnh_len = sizeof(*nhp);
1573 		}
1574 	}
1575 	mp_head->rta_type = RTA_MULTIPATH;
1576 	mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1577 	rtm->rtm_type = RTN_MULTICAST;
1578 	return 1;
1579 
1580 rtattr_failure:
1581 	nlmsg_trim(skb, b);
1582 	return -EMSGSIZE;
1583 }
1584 
1585 int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1586 {
1587 	int err;
1588 	struct mfc_cache *cache;
1589 	struct rtable *rt = (struct rtable*)skb->dst;
1590 
1591 	read_lock(&mrt_lock);
1592 	cache = ipmr_cache_find(rt->rt_src, rt->rt_dst);
1593 
1594 	if (cache==NULL) {
1595 		struct sk_buff *skb2;
1596 		struct iphdr *iph;
1597 		struct net_device *dev;
1598 		int vif;
1599 
1600 		if (nowait) {
1601 			read_unlock(&mrt_lock);
1602 			return -EAGAIN;
1603 		}
1604 
1605 		dev = skb->dev;
1606 		if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
1607 			read_unlock(&mrt_lock);
1608 			return -ENODEV;
1609 		}
1610 		skb2 = skb_clone(skb, GFP_ATOMIC);
1611 		if (!skb2) {
1612 			read_unlock(&mrt_lock);
1613 			return -ENOMEM;
1614 		}
1615 
1616 		skb_push(skb2, sizeof(struct iphdr));
1617 		skb_reset_network_header(skb2);
1618 		iph = ip_hdr(skb2);
1619 		iph->ihl = sizeof(struct iphdr) >> 2;
1620 		iph->saddr = rt->rt_src;
1621 		iph->daddr = rt->rt_dst;
1622 		iph->version = 0;
1623 		err = ipmr_cache_unresolved(vif, skb2);
1624 		read_unlock(&mrt_lock);
1625 		return err;
1626 	}
1627 
1628 	if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1629 		cache->mfc_flags |= MFC_NOTIFY;
1630 	err = ipmr_fill_mroute(skb, cache, rtm);
1631 	read_unlock(&mrt_lock);
1632 	return err;
1633 }
1634 
1635 #ifdef CONFIG_PROC_FS
1636 /*
1637  *	The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1638  */
1639 struct ipmr_vif_iter {
1640 	int ct;
1641 };
1642 
1643 static struct vif_device *ipmr_vif_seq_idx(struct ipmr_vif_iter *iter,
1644 					   loff_t pos)
1645 {
1646 	for (iter->ct = 0; iter->ct < maxvif; ++iter->ct) {
1647 		if (!VIF_EXISTS(iter->ct))
1648 			continue;
1649 		if (pos-- == 0)
1650 			return &vif_table[iter->ct];
1651 	}
1652 	return NULL;
1653 }
1654 
1655 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
1656 {
1657 	read_lock(&mrt_lock);
1658 	return *pos ? ipmr_vif_seq_idx(seq->private, *pos - 1)
1659 		: SEQ_START_TOKEN;
1660 }
1661 
1662 static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1663 {
1664 	struct ipmr_vif_iter *iter = seq->private;
1665 
1666 	++*pos;
1667 	if (v == SEQ_START_TOKEN)
1668 		return ipmr_vif_seq_idx(iter, 0);
1669 
1670 	while (++iter->ct < maxvif) {
1671 		if (!VIF_EXISTS(iter->ct))
1672 			continue;
1673 		return &vif_table[iter->ct];
1674 	}
1675 	return NULL;
1676 }
1677 
1678 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
1679 {
1680 	read_unlock(&mrt_lock);
1681 }
1682 
1683 static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1684 {
1685 	if (v == SEQ_START_TOKEN) {
1686 		seq_puts(seq,
1687 			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags Local    Remote\n");
1688 	} else {
1689 		const struct vif_device *vif = v;
1690 		const char *name =  vif->dev ? vif->dev->name : "none";
1691 
1692 		seq_printf(seq,
1693 			   "%2Zd %-10s %8ld %7ld  %8ld %7ld %05X %08X %08X\n",
1694 			   vif - vif_table,
1695 			   name, vif->bytes_in, vif->pkt_in,
1696 			   vif->bytes_out, vif->pkt_out,
1697 			   vif->flags, vif->local, vif->remote);
1698 	}
1699 	return 0;
1700 }
1701 
1702 static const struct seq_operations ipmr_vif_seq_ops = {
1703 	.start = ipmr_vif_seq_start,
1704 	.next  = ipmr_vif_seq_next,
1705 	.stop  = ipmr_vif_seq_stop,
1706 	.show  = ipmr_vif_seq_show,
1707 };
1708 
1709 static int ipmr_vif_open(struct inode *inode, struct file *file)
1710 {
1711 	struct seq_file *seq;
1712 	int rc = -ENOMEM;
1713 	struct ipmr_vif_iter *s = kmalloc(sizeof(*s), GFP_KERNEL);
1714 
1715 	if (!s)
1716 		goto out;
1717 
1718 	rc = seq_open(file, &ipmr_vif_seq_ops);
1719 	if (rc)
1720 		goto out_kfree;
1721 
1722 	s->ct = 0;
1723 	seq = file->private_data;
1724 	seq->private = s;
1725 out:
1726 	return rc;
1727 out_kfree:
1728 	kfree(s);
1729 	goto out;
1730 
1731 }
1732 
1733 static const struct file_operations ipmr_vif_fops = {
1734 	.owner	 = THIS_MODULE,
1735 	.open    = ipmr_vif_open,
1736 	.read    = seq_read,
1737 	.llseek  = seq_lseek,
1738 	.release = seq_release_private,
1739 };
1740 
1741 struct ipmr_mfc_iter {
1742 	struct mfc_cache **cache;
1743 	int ct;
1744 };
1745 
1746 
1747 static struct mfc_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos)
1748 {
1749 	struct mfc_cache *mfc;
1750 
1751 	it->cache = mfc_cache_array;
1752 	read_lock(&mrt_lock);
1753 	for (it->ct = 0; it->ct < MFC_LINES; it->ct++)
1754 		for (mfc = mfc_cache_array[it->ct]; mfc; mfc = mfc->next)
1755 			if (pos-- == 0)
1756 				return mfc;
1757 	read_unlock(&mrt_lock);
1758 
1759 	it->cache = &mfc_unres_queue;
1760 	spin_lock_bh(&mfc_unres_lock);
1761 	for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
1762 		if (pos-- == 0)
1763 			return mfc;
1764 	spin_unlock_bh(&mfc_unres_lock);
1765 
1766 	it->cache = NULL;
1767 	return NULL;
1768 }
1769 
1770 
1771 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
1772 {
1773 	struct ipmr_mfc_iter *it = seq->private;
1774 	it->cache = NULL;
1775 	it->ct = 0;
1776 	return *pos ? ipmr_mfc_seq_idx(seq->private, *pos - 1)
1777 		: SEQ_START_TOKEN;
1778 }
1779 
1780 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1781 {
1782 	struct mfc_cache *mfc = v;
1783 	struct ipmr_mfc_iter *it = seq->private;
1784 
1785 	++*pos;
1786 
1787 	if (v == SEQ_START_TOKEN)
1788 		return ipmr_mfc_seq_idx(seq->private, 0);
1789 
1790 	if (mfc->next)
1791 		return mfc->next;
1792 
1793 	if (it->cache == &mfc_unres_queue)
1794 		goto end_of_list;
1795 
1796 	BUG_ON(it->cache != mfc_cache_array);
1797 
1798 	while (++it->ct < MFC_LINES) {
1799 		mfc = mfc_cache_array[it->ct];
1800 		if (mfc)
1801 			return mfc;
1802 	}
1803 
1804 	/* exhausted cache_array, show unresolved */
1805 	read_unlock(&mrt_lock);
1806 	it->cache = &mfc_unres_queue;
1807 	it->ct = 0;
1808 
1809 	spin_lock_bh(&mfc_unres_lock);
1810 	mfc = mfc_unres_queue;
1811 	if (mfc)
1812 		return mfc;
1813 
1814  end_of_list:
1815 	spin_unlock_bh(&mfc_unres_lock);
1816 	it->cache = NULL;
1817 
1818 	return NULL;
1819 }
1820 
1821 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
1822 {
1823 	struct ipmr_mfc_iter *it = seq->private;
1824 
1825 	if (it->cache == &mfc_unres_queue)
1826 		spin_unlock_bh(&mfc_unres_lock);
1827 	else if (it->cache == mfc_cache_array)
1828 		read_unlock(&mrt_lock);
1829 }
1830 
1831 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1832 {
1833 	int n;
1834 
1835 	if (v == SEQ_START_TOKEN) {
1836 		seq_puts(seq,
1837 		 "Group    Origin   Iif     Pkts    Bytes    Wrong Oifs\n");
1838 	} else {
1839 		const struct mfc_cache *mfc = v;
1840 		const struct ipmr_mfc_iter *it = seq->private;
1841 
1842 		seq_printf(seq, "%08lX %08lX %-3d %8ld %8ld %8ld",
1843 			   (unsigned long) mfc->mfc_mcastgrp,
1844 			   (unsigned long) mfc->mfc_origin,
1845 			   mfc->mfc_parent,
1846 			   mfc->mfc_un.res.pkt,
1847 			   mfc->mfc_un.res.bytes,
1848 			   mfc->mfc_un.res.wrong_if);
1849 
1850 		if (it->cache != &mfc_unres_queue) {
1851 			for (n = mfc->mfc_un.res.minvif;
1852 			     n < mfc->mfc_un.res.maxvif; n++ ) {
1853 				if (VIF_EXISTS(n)
1854 				   && mfc->mfc_un.res.ttls[n] < 255)
1855 				seq_printf(seq,
1856 					   " %2d:%-3d",
1857 					   n, mfc->mfc_un.res.ttls[n]);
1858 			}
1859 		}
1860 		seq_putc(seq, '\n');
1861 	}
1862 	return 0;
1863 }
1864 
1865 static const struct seq_operations ipmr_mfc_seq_ops = {
1866 	.start = ipmr_mfc_seq_start,
1867 	.next  = ipmr_mfc_seq_next,
1868 	.stop  = ipmr_mfc_seq_stop,
1869 	.show  = ipmr_mfc_seq_show,
1870 };
1871 
1872 static int ipmr_mfc_open(struct inode *inode, struct file *file)
1873 {
1874 	struct seq_file *seq;
1875 	int rc = -ENOMEM;
1876 	struct ipmr_mfc_iter *s = kmalloc(sizeof(*s), GFP_KERNEL);
1877 
1878 	if (!s)
1879 		goto out;
1880 
1881 	rc = seq_open(file, &ipmr_mfc_seq_ops);
1882 	if (rc)
1883 		goto out_kfree;
1884 
1885 	seq = file->private_data;
1886 	seq->private = s;
1887 out:
1888 	return rc;
1889 out_kfree:
1890 	kfree(s);
1891 	goto out;
1892 
1893 }
1894 
1895 static const struct file_operations ipmr_mfc_fops = {
1896 	.owner	 = THIS_MODULE,
1897 	.open    = ipmr_mfc_open,
1898 	.read    = seq_read,
1899 	.llseek  = seq_lseek,
1900 	.release = seq_release_private,
1901 };
1902 #endif
1903 
1904 #ifdef CONFIG_IP_PIMSM_V2
1905 static struct net_protocol pim_protocol = {
1906 	.handler	=	pim_rcv,
1907 };
1908 #endif
1909 
1910 
1911 /*
1912  *	Setup for IP multicast routing
1913  */
1914 
1915 void __init ip_mr_init(void)
1916 {
1917 	mrt_cachep = kmem_cache_create("ip_mrt_cache",
1918 				       sizeof(struct mfc_cache),
1919 				       0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
1920 				       NULL);
1921 	init_timer(&ipmr_expire_timer);
1922 	ipmr_expire_timer.function=ipmr_expire_process;
1923 	register_netdevice_notifier(&ip_mr_notifier);
1924 #ifdef CONFIG_PROC_FS
1925 	proc_net_fops_create("ip_mr_vif", 0, &ipmr_vif_fops);
1926 	proc_net_fops_create("ip_mr_cache", 0, &ipmr_mfc_fops);
1927 #endif
1928 }
1929