xref: /openbmc/linux/net/ipv4/ipmr.c (revision 82ced6fd)
1 /*
2  *	IP multicast routing support for mrouted 3.6/3.8
3  *
4  *		(c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
5  *	  Linux Consultancy and Custom Driver Development
6  *
7  *	This program is free software; you can redistribute it and/or
8  *	modify it under the terms of the GNU General Public License
9  *	as published by the Free Software Foundation; either version
10  *	2 of the License, or (at your option) any later version.
11  *
12  *	Fixes:
13  *	Michael Chastain	:	Incorrect size of copying.
14  *	Alan Cox		:	Added the cache manager code
15  *	Alan Cox		:	Fixed the clone/copy bug and device race.
16  *	Mike McLagan		:	Routing by source
17  *	Malcolm Beattie		:	Buffer handling fixes.
18  *	Alexey Kuznetsov	:	Double buffer free and other fixes.
19  *	SVR Anand		:	Fixed several multicast bugs and problems.
20  *	Alexey Kuznetsov	:	Status, optimisations and more.
21  *	Brad Parker		:	Better behaviour on mrouted upcall
22  *					overflow.
23  *      Carlos Picoto           :       PIMv1 Support
24  *	Pavlin Ivanov Radoslavov:	PIMv2 Registers must checksum only PIM header
25  *					Relax this requrement to work with older peers.
26  *
27  */
28 
29 #include <asm/system.h>
30 #include <asm/uaccess.h>
31 #include <linux/types.h>
32 #include <linux/capability.h>
33 #include <linux/errno.h>
34 #include <linux/timer.h>
35 #include <linux/mm.h>
36 #include <linux/kernel.h>
37 #include <linux/fcntl.h>
38 #include <linux/stat.h>
39 #include <linux/socket.h>
40 #include <linux/in.h>
41 #include <linux/inet.h>
42 #include <linux/netdevice.h>
43 #include <linux/inetdevice.h>
44 #include <linux/igmp.h>
45 #include <linux/proc_fs.h>
46 #include <linux/seq_file.h>
47 #include <linux/mroute.h>
48 #include <linux/init.h>
49 #include <linux/if_ether.h>
50 #include <net/net_namespace.h>
51 #include <net/ip.h>
52 #include <net/protocol.h>
53 #include <linux/skbuff.h>
54 #include <net/route.h>
55 #include <net/sock.h>
56 #include <net/icmp.h>
57 #include <net/udp.h>
58 #include <net/raw.h>
59 #include <linux/notifier.h>
60 #include <linux/if_arp.h>
61 #include <linux/netfilter_ipv4.h>
62 #include <net/ipip.h>
63 #include <net/checksum.h>
64 #include <net/netlink.h>
65 
66 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
67 #define CONFIG_IP_PIMSM	1
68 #endif
69 
70 /* Big lock, protecting vif table, mrt cache and mroute socket state.
71    Note that the changes are semaphored via rtnl_lock.
72  */
73 
74 static DEFINE_RWLOCK(mrt_lock);
75 
76 /*
77  *	Multicast router control variables
78  */
79 
80 #define VIF_EXISTS(_net, _idx) ((_net)->ipv4.vif_table[_idx].dev != NULL)
81 
82 static struct mfc_cache *mfc_unres_queue;		/* Queue of unresolved entries */
83 
84 /* Special spinlock for queue of unresolved entries */
85 static DEFINE_SPINLOCK(mfc_unres_lock);
86 
87 /* We return to original Alan's scheme. Hash table of resolved
88    entries is changed only in process context and protected
89    with weak lock mrt_lock. Queue of unresolved entries is protected
90    with strong spinlock mfc_unres_lock.
91 
92    In this case data path is free of exclusive locks at all.
93  */
94 
95 static struct kmem_cache *mrt_cachep __read_mostly;
96 
97 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
98 static int ipmr_cache_report(struct net *net,
99 			     struct sk_buff *pkt, vifi_t vifi, int assert);
100 static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
101 
102 #ifdef CONFIG_IP_PIMSM_V2
103 static struct net_protocol pim_protocol;
104 #endif
105 
106 static struct timer_list ipmr_expire_timer;
107 
108 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
109 
110 static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
111 {
112 	struct net *net = dev_net(dev);
113 
114 	dev_close(dev);
115 
116 	dev = __dev_get_by_name(net, "tunl0");
117 	if (dev) {
118 		const struct net_device_ops *ops = dev->netdev_ops;
119 		struct ifreq ifr;
120 		struct ip_tunnel_parm p;
121 
122 		memset(&p, 0, sizeof(p));
123 		p.iph.daddr = v->vifc_rmt_addr.s_addr;
124 		p.iph.saddr = v->vifc_lcl_addr.s_addr;
125 		p.iph.version = 4;
126 		p.iph.ihl = 5;
127 		p.iph.protocol = IPPROTO_IPIP;
128 		sprintf(p.name, "dvmrp%d", v->vifc_vifi);
129 		ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
130 
131 		if (ops->ndo_do_ioctl) {
132 			mm_segment_t oldfs = get_fs();
133 
134 			set_fs(KERNEL_DS);
135 			ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
136 			set_fs(oldfs);
137 		}
138 	}
139 }
140 
141 static
142 struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
143 {
144 	struct net_device  *dev;
145 
146 	dev = __dev_get_by_name(net, "tunl0");
147 
148 	if (dev) {
149 		const struct net_device_ops *ops = dev->netdev_ops;
150 		int err;
151 		struct ifreq ifr;
152 		struct ip_tunnel_parm p;
153 		struct in_device  *in_dev;
154 
155 		memset(&p, 0, sizeof(p));
156 		p.iph.daddr = v->vifc_rmt_addr.s_addr;
157 		p.iph.saddr = v->vifc_lcl_addr.s_addr;
158 		p.iph.version = 4;
159 		p.iph.ihl = 5;
160 		p.iph.protocol = IPPROTO_IPIP;
161 		sprintf(p.name, "dvmrp%d", v->vifc_vifi);
162 		ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
163 
164 		if (ops->ndo_do_ioctl) {
165 			mm_segment_t oldfs = get_fs();
166 
167 			set_fs(KERNEL_DS);
168 			err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
169 			set_fs(oldfs);
170 		} else
171 			err = -EOPNOTSUPP;
172 
173 		dev = NULL;
174 
175 		if (err == 0 &&
176 		    (dev = __dev_get_by_name(net, p.name)) != NULL) {
177 			dev->flags |= IFF_MULTICAST;
178 
179 			in_dev = __in_dev_get_rtnl(dev);
180 			if (in_dev == NULL)
181 				goto failure;
182 
183 			ipv4_devconf_setall(in_dev);
184 			IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
185 
186 			if (dev_open(dev))
187 				goto failure;
188 			dev_hold(dev);
189 		}
190 	}
191 	return dev;
192 
193 failure:
194 	/* allow the register to be completed before unregistering. */
195 	rtnl_unlock();
196 	rtnl_lock();
197 
198 	unregister_netdevice(dev);
199 	return NULL;
200 }
201 
202 #ifdef CONFIG_IP_PIMSM
203 
204 static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
205 {
206 	struct net *net = dev_net(dev);
207 
208 	read_lock(&mrt_lock);
209 	dev->stats.tx_bytes += skb->len;
210 	dev->stats.tx_packets++;
211 	ipmr_cache_report(net, skb, net->ipv4.mroute_reg_vif_num,
212 			  IGMPMSG_WHOLEPKT);
213 	read_unlock(&mrt_lock);
214 	kfree_skb(skb);
215 	return 0;
216 }
217 
218 static const struct net_device_ops reg_vif_netdev_ops = {
219 	.ndo_start_xmit	= reg_vif_xmit,
220 };
221 
222 static void reg_vif_setup(struct net_device *dev)
223 {
224 	dev->type		= ARPHRD_PIMREG;
225 	dev->mtu		= ETH_DATA_LEN - sizeof(struct iphdr) - 8;
226 	dev->flags		= IFF_NOARP;
227 	dev->netdev_ops		= &reg_vif_netdev_ops,
228 	dev->destructor		= free_netdev;
229 }
230 
231 static struct net_device *ipmr_reg_vif(void)
232 {
233 	struct net_device *dev;
234 	struct in_device *in_dev;
235 
236 	dev = alloc_netdev(0, "pimreg", reg_vif_setup);
237 
238 	if (dev == NULL)
239 		return NULL;
240 
241 	if (register_netdevice(dev)) {
242 		free_netdev(dev);
243 		return NULL;
244 	}
245 	dev->iflink = 0;
246 
247 	rcu_read_lock();
248 	if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
249 		rcu_read_unlock();
250 		goto failure;
251 	}
252 
253 	ipv4_devconf_setall(in_dev);
254 	IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
255 	rcu_read_unlock();
256 
257 	if (dev_open(dev))
258 		goto failure;
259 
260 	dev_hold(dev);
261 
262 	return dev;
263 
264 failure:
265 	/* allow the register to be completed before unregistering. */
266 	rtnl_unlock();
267 	rtnl_lock();
268 
269 	unregister_netdevice(dev);
270 	return NULL;
271 }
272 #endif
273 
274 /*
275  *	Delete a VIF entry
276  *	@notify: Set to 1, if the caller is a notifier_call
277  */
278 
279 static int vif_delete(struct net *net, int vifi, int notify)
280 {
281 	struct vif_device *v;
282 	struct net_device *dev;
283 	struct in_device *in_dev;
284 
285 	if (vifi < 0 || vifi >= net->ipv4.maxvif)
286 		return -EADDRNOTAVAIL;
287 
288 	v = &net->ipv4.vif_table[vifi];
289 
290 	write_lock_bh(&mrt_lock);
291 	dev = v->dev;
292 	v->dev = NULL;
293 
294 	if (!dev) {
295 		write_unlock_bh(&mrt_lock);
296 		return -EADDRNOTAVAIL;
297 	}
298 
299 #ifdef CONFIG_IP_PIMSM
300 	if (vifi == net->ipv4.mroute_reg_vif_num)
301 		net->ipv4.mroute_reg_vif_num = -1;
302 #endif
303 
304 	if (vifi+1 == net->ipv4.maxvif) {
305 		int tmp;
306 		for (tmp=vifi-1; tmp>=0; tmp--) {
307 			if (VIF_EXISTS(net, tmp))
308 				break;
309 		}
310 		net->ipv4.maxvif = tmp+1;
311 	}
312 
313 	write_unlock_bh(&mrt_lock);
314 
315 	dev_set_allmulti(dev, -1);
316 
317 	if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
318 		IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
319 		ip_rt_multicast_event(in_dev);
320 	}
321 
322 	if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
323 		unregister_netdevice(dev);
324 
325 	dev_put(dev);
326 	return 0;
327 }
328 
329 static inline void ipmr_cache_free(struct mfc_cache *c)
330 {
331 	release_net(mfc_net(c));
332 	kmem_cache_free(mrt_cachep, c);
333 }
334 
335 /* Destroy an unresolved cache entry, killing queued skbs
336    and reporting error to netlink readers.
337  */
338 
339 static void ipmr_destroy_unres(struct mfc_cache *c)
340 {
341 	struct sk_buff *skb;
342 	struct nlmsgerr *e;
343 	struct net *net = mfc_net(c);
344 
345 	atomic_dec(&net->ipv4.cache_resolve_queue_len);
346 
347 	while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
348 		if (ip_hdr(skb)->version == 0) {
349 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
350 			nlh->nlmsg_type = NLMSG_ERROR;
351 			nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
352 			skb_trim(skb, nlh->nlmsg_len);
353 			e = NLMSG_DATA(nlh);
354 			e->error = -ETIMEDOUT;
355 			memset(&e->msg, 0, sizeof(e->msg));
356 
357 			rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
358 		} else
359 			kfree_skb(skb);
360 	}
361 
362 	ipmr_cache_free(c);
363 }
364 
365 
366 /* Single timer process for all the unresolved queue. */
367 
368 static void ipmr_expire_process(unsigned long dummy)
369 {
370 	unsigned long now;
371 	unsigned long expires;
372 	struct mfc_cache *c, **cp;
373 
374 	if (!spin_trylock(&mfc_unres_lock)) {
375 		mod_timer(&ipmr_expire_timer, jiffies+HZ/10);
376 		return;
377 	}
378 
379 	if (mfc_unres_queue == NULL)
380 		goto out;
381 
382 	now = jiffies;
383 	expires = 10*HZ;
384 	cp = &mfc_unres_queue;
385 
386 	while ((c=*cp) != NULL) {
387 		if (time_after(c->mfc_un.unres.expires, now)) {
388 			unsigned long interval = c->mfc_un.unres.expires - now;
389 			if (interval < expires)
390 				expires = interval;
391 			cp = &c->next;
392 			continue;
393 		}
394 
395 		*cp = c->next;
396 
397 		ipmr_destroy_unres(c);
398 	}
399 
400 	if (mfc_unres_queue != NULL)
401 		mod_timer(&ipmr_expire_timer, jiffies + expires);
402 
403 out:
404 	spin_unlock(&mfc_unres_lock);
405 }
406 
407 /* Fill oifs list. It is called under write locked mrt_lock. */
408 
409 static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls)
410 {
411 	int vifi;
412 	struct net *net = mfc_net(cache);
413 
414 	cache->mfc_un.res.minvif = MAXVIFS;
415 	cache->mfc_un.res.maxvif = 0;
416 	memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
417 
418 	for (vifi = 0; vifi < net->ipv4.maxvif; vifi++) {
419 		if (VIF_EXISTS(net, vifi) &&
420 		    ttls[vifi] && ttls[vifi] < 255) {
421 			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
422 			if (cache->mfc_un.res.minvif > vifi)
423 				cache->mfc_un.res.minvif = vifi;
424 			if (cache->mfc_un.res.maxvif <= vifi)
425 				cache->mfc_un.res.maxvif = vifi + 1;
426 		}
427 	}
428 }
429 
430 static int vif_add(struct net *net, struct vifctl *vifc, int mrtsock)
431 {
432 	int vifi = vifc->vifc_vifi;
433 	struct vif_device *v = &net->ipv4.vif_table[vifi];
434 	struct net_device *dev;
435 	struct in_device *in_dev;
436 	int err;
437 
438 	/* Is vif busy ? */
439 	if (VIF_EXISTS(net, vifi))
440 		return -EADDRINUSE;
441 
442 	switch (vifc->vifc_flags) {
443 #ifdef CONFIG_IP_PIMSM
444 	case VIFF_REGISTER:
445 		/*
446 		 * Special Purpose VIF in PIM
447 		 * All the packets will be sent to the daemon
448 		 */
449 		if (net->ipv4.mroute_reg_vif_num >= 0)
450 			return -EADDRINUSE;
451 		dev = ipmr_reg_vif();
452 		if (!dev)
453 			return -ENOBUFS;
454 		err = dev_set_allmulti(dev, 1);
455 		if (err) {
456 			unregister_netdevice(dev);
457 			dev_put(dev);
458 			return err;
459 		}
460 		break;
461 #endif
462 	case VIFF_TUNNEL:
463 		dev = ipmr_new_tunnel(net, vifc);
464 		if (!dev)
465 			return -ENOBUFS;
466 		err = dev_set_allmulti(dev, 1);
467 		if (err) {
468 			ipmr_del_tunnel(dev, vifc);
469 			dev_put(dev);
470 			return err;
471 		}
472 		break;
473 	case 0:
474 		dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
475 		if (!dev)
476 			return -EADDRNOTAVAIL;
477 		err = dev_set_allmulti(dev, 1);
478 		if (err) {
479 			dev_put(dev);
480 			return err;
481 		}
482 		break;
483 	default:
484 		return -EINVAL;
485 	}
486 
487 	if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
488 		return -EADDRNOTAVAIL;
489 	IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
490 	ip_rt_multicast_event(in_dev);
491 
492 	/*
493 	 *	Fill in the VIF structures
494 	 */
495 	v->rate_limit = vifc->vifc_rate_limit;
496 	v->local = vifc->vifc_lcl_addr.s_addr;
497 	v->remote = vifc->vifc_rmt_addr.s_addr;
498 	v->flags = vifc->vifc_flags;
499 	if (!mrtsock)
500 		v->flags |= VIFF_STATIC;
501 	v->threshold = vifc->vifc_threshold;
502 	v->bytes_in = 0;
503 	v->bytes_out = 0;
504 	v->pkt_in = 0;
505 	v->pkt_out = 0;
506 	v->link = dev->ifindex;
507 	if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
508 		v->link = dev->iflink;
509 
510 	/* And finish update writing critical data */
511 	write_lock_bh(&mrt_lock);
512 	v->dev = dev;
513 #ifdef CONFIG_IP_PIMSM
514 	if (v->flags&VIFF_REGISTER)
515 		net->ipv4.mroute_reg_vif_num = vifi;
516 #endif
517 	if (vifi+1 > net->ipv4.maxvif)
518 		net->ipv4.maxvif = vifi+1;
519 	write_unlock_bh(&mrt_lock);
520 	return 0;
521 }
522 
523 static struct mfc_cache *ipmr_cache_find(struct net *net,
524 					 __be32 origin,
525 					 __be32 mcastgrp)
526 {
527 	int line = MFC_HASH(mcastgrp, origin);
528 	struct mfc_cache *c;
529 
530 	for (c = net->ipv4.mfc_cache_array[line]; c; c = c->next) {
531 		if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
532 			break;
533 	}
534 	return c;
535 }
536 
537 /*
538  *	Allocate a multicast cache entry
539  */
540 static struct mfc_cache *ipmr_cache_alloc(struct net *net)
541 {
542 	struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
543 	if (c == NULL)
544 		return NULL;
545 	c->mfc_un.res.minvif = MAXVIFS;
546 	mfc_net_set(c, net);
547 	return c;
548 }
549 
550 static struct mfc_cache *ipmr_cache_alloc_unres(struct net *net)
551 {
552 	struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
553 	if (c == NULL)
554 		return NULL;
555 	skb_queue_head_init(&c->mfc_un.unres.unresolved);
556 	c->mfc_un.unres.expires = jiffies + 10*HZ;
557 	mfc_net_set(c, net);
558 	return c;
559 }
560 
561 /*
562  *	A cache entry has gone into a resolved state from queued
563  */
564 
565 static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
566 {
567 	struct sk_buff *skb;
568 	struct nlmsgerr *e;
569 
570 	/*
571 	 *	Play the pending entries through our router
572 	 */
573 
574 	while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
575 		if (ip_hdr(skb)->version == 0) {
576 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
577 
578 			if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
579 				nlh->nlmsg_len = (skb_tail_pointer(skb) -
580 						  (u8 *)nlh);
581 			} else {
582 				nlh->nlmsg_type = NLMSG_ERROR;
583 				nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
584 				skb_trim(skb, nlh->nlmsg_len);
585 				e = NLMSG_DATA(nlh);
586 				e->error = -EMSGSIZE;
587 				memset(&e->msg, 0, sizeof(e->msg));
588 			}
589 
590 			rtnl_unicast(skb, mfc_net(c), NETLINK_CB(skb).pid);
591 		} else
592 			ip_mr_forward(skb, c, 0);
593 	}
594 }
595 
596 /*
597  *	Bounce a cache query up to mrouted. We could use netlink for this but mrouted
598  *	expects the following bizarre scheme.
599  *
600  *	Called under mrt_lock.
601  */
602 
603 static int ipmr_cache_report(struct net *net,
604 			     struct sk_buff *pkt, vifi_t vifi, int assert)
605 {
606 	struct sk_buff *skb;
607 	const int ihl = ip_hdrlen(pkt);
608 	struct igmphdr *igmp;
609 	struct igmpmsg *msg;
610 	int ret;
611 
612 #ifdef CONFIG_IP_PIMSM
613 	if (assert == IGMPMSG_WHOLEPKT)
614 		skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
615 	else
616 #endif
617 		skb = alloc_skb(128, GFP_ATOMIC);
618 
619 	if (!skb)
620 		return -ENOBUFS;
621 
622 #ifdef CONFIG_IP_PIMSM
623 	if (assert == IGMPMSG_WHOLEPKT) {
624 		/* Ugly, but we have no choice with this interface.
625 		   Duplicate old header, fix ihl, length etc.
626 		   And all this only to mangle msg->im_msgtype and
627 		   to set msg->im_mbz to "mbz" :-)
628 		 */
629 		skb_push(skb, sizeof(struct iphdr));
630 		skb_reset_network_header(skb);
631 		skb_reset_transport_header(skb);
632 		msg = (struct igmpmsg *)skb_network_header(skb);
633 		memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
634 		msg->im_msgtype = IGMPMSG_WHOLEPKT;
635 		msg->im_mbz = 0;
636 		msg->im_vif = net->ipv4.mroute_reg_vif_num;
637 		ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
638 		ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
639 					     sizeof(struct iphdr));
640 	} else
641 #endif
642 	{
643 
644 	/*
645 	 *	Copy the IP header
646 	 */
647 
648 	skb->network_header = skb->tail;
649 	skb_put(skb, ihl);
650 	skb_copy_to_linear_data(skb, pkt->data, ihl);
651 	ip_hdr(skb)->protocol = 0;			/* Flag to the kernel this is a route add */
652 	msg = (struct igmpmsg *)skb_network_header(skb);
653 	msg->im_vif = vifi;
654 	skb->dst = dst_clone(pkt->dst);
655 
656 	/*
657 	 *	Add our header
658 	 */
659 
660 	igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
661 	igmp->type	=
662 	msg->im_msgtype = assert;
663 	igmp->code 	=	0;
664 	ip_hdr(skb)->tot_len = htons(skb->len);			/* Fix the length */
665 	skb->transport_header = skb->network_header;
666 	}
667 
668 	if (net->ipv4.mroute_sk == NULL) {
669 		kfree_skb(skb);
670 		return -EINVAL;
671 	}
672 
673 	/*
674 	 *	Deliver to mrouted
675 	 */
676 	ret = sock_queue_rcv_skb(net->ipv4.mroute_sk, skb);
677 	if (ret < 0) {
678 		if (net_ratelimit())
679 			printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
680 		kfree_skb(skb);
681 	}
682 
683 	return ret;
684 }
685 
686 /*
687  *	Queue a packet for resolution. It gets locked cache entry!
688  */
689 
690 static int
691 ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
692 {
693 	int err;
694 	struct mfc_cache *c;
695 	const struct iphdr *iph = ip_hdr(skb);
696 
697 	spin_lock_bh(&mfc_unres_lock);
698 	for (c=mfc_unres_queue; c; c=c->next) {
699 		if (net_eq(mfc_net(c), net) &&
700 		    c->mfc_mcastgrp == iph->daddr &&
701 		    c->mfc_origin == iph->saddr)
702 			break;
703 	}
704 
705 	if (c == NULL) {
706 		/*
707 		 *	Create a new entry if allowable
708 		 */
709 
710 		if (atomic_read(&net->ipv4.cache_resolve_queue_len) >= 10 ||
711 		    (c = ipmr_cache_alloc_unres(net)) == NULL) {
712 			spin_unlock_bh(&mfc_unres_lock);
713 
714 			kfree_skb(skb);
715 			return -ENOBUFS;
716 		}
717 
718 		/*
719 		 *	Fill in the new cache entry
720 		 */
721 		c->mfc_parent	= -1;
722 		c->mfc_origin	= iph->saddr;
723 		c->mfc_mcastgrp	= iph->daddr;
724 
725 		/*
726 		 *	Reflect first query at mrouted.
727 		 */
728 		err = ipmr_cache_report(net, skb, vifi, IGMPMSG_NOCACHE);
729 		if (err < 0) {
730 			/* If the report failed throw the cache entry
731 			   out - Brad Parker
732 			 */
733 			spin_unlock_bh(&mfc_unres_lock);
734 
735 			ipmr_cache_free(c);
736 			kfree_skb(skb);
737 			return err;
738 		}
739 
740 		atomic_inc(&net->ipv4.cache_resolve_queue_len);
741 		c->next = mfc_unres_queue;
742 		mfc_unres_queue = c;
743 
744 		mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires);
745 	}
746 
747 	/*
748 	 *	See if we can append the packet
749 	 */
750 	if (c->mfc_un.unres.unresolved.qlen>3) {
751 		kfree_skb(skb);
752 		err = -ENOBUFS;
753 	} else {
754 		skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
755 		err = 0;
756 	}
757 
758 	spin_unlock_bh(&mfc_unres_lock);
759 	return err;
760 }
761 
762 /*
763  *	MFC cache manipulation by user space mroute daemon
764  */
765 
766 static int ipmr_mfc_delete(struct net *net, struct mfcctl *mfc)
767 {
768 	int line;
769 	struct mfc_cache *c, **cp;
770 
771 	line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
772 
773 	for (cp = &net->ipv4.mfc_cache_array[line];
774 	     (c = *cp) != NULL; cp = &c->next) {
775 		if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
776 		    c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
777 			write_lock_bh(&mrt_lock);
778 			*cp = c->next;
779 			write_unlock_bh(&mrt_lock);
780 
781 			ipmr_cache_free(c);
782 			return 0;
783 		}
784 	}
785 	return -ENOENT;
786 }
787 
788 static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
789 {
790 	int line;
791 	struct mfc_cache *uc, *c, **cp;
792 
793 	line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
794 
795 	for (cp = &net->ipv4.mfc_cache_array[line];
796 	     (c = *cp) != NULL; cp = &c->next) {
797 		if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
798 		    c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
799 			break;
800 	}
801 
802 	if (c != NULL) {
803 		write_lock_bh(&mrt_lock);
804 		c->mfc_parent = mfc->mfcc_parent;
805 		ipmr_update_thresholds(c, mfc->mfcc_ttls);
806 		if (!mrtsock)
807 			c->mfc_flags |= MFC_STATIC;
808 		write_unlock_bh(&mrt_lock);
809 		return 0;
810 	}
811 
812 	if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
813 		return -EINVAL;
814 
815 	c = ipmr_cache_alloc(net);
816 	if (c == NULL)
817 		return -ENOMEM;
818 
819 	c->mfc_origin = mfc->mfcc_origin.s_addr;
820 	c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
821 	c->mfc_parent = mfc->mfcc_parent;
822 	ipmr_update_thresholds(c, mfc->mfcc_ttls);
823 	if (!mrtsock)
824 		c->mfc_flags |= MFC_STATIC;
825 
826 	write_lock_bh(&mrt_lock);
827 	c->next = net->ipv4.mfc_cache_array[line];
828 	net->ipv4.mfc_cache_array[line] = c;
829 	write_unlock_bh(&mrt_lock);
830 
831 	/*
832 	 *	Check to see if we resolved a queued list. If so we
833 	 *	need to send on the frames and tidy up.
834 	 */
835 	spin_lock_bh(&mfc_unres_lock);
836 	for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
837 	     cp = &uc->next) {
838 		if (net_eq(mfc_net(uc), net) &&
839 		    uc->mfc_origin == c->mfc_origin &&
840 		    uc->mfc_mcastgrp == c->mfc_mcastgrp) {
841 			*cp = uc->next;
842 			atomic_dec(&net->ipv4.cache_resolve_queue_len);
843 			break;
844 		}
845 	}
846 	if (mfc_unres_queue == NULL)
847 		del_timer(&ipmr_expire_timer);
848 	spin_unlock_bh(&mfc_unres_lock);
849 
850 	if (uc) {
851 		ipmr_cache_resolve(uc, c);
852 		ipmr_cache_free(uc);
853 	}
854 	return 0;
855 }
856 
857 /*
858  *	Close the multicast socket, and clear the vif tables etc
859  */
860 
861 static void mroute_clean_tables(struct net *net)
862 {
863 	int i;
864 
865 	/*
866 	 *	Shut down all active vif entries
867 	 */
868 	for (i = 0; i < net->ipv4.maxvif; i++) {
869 		if (!(net->ipv4.vif_table[i].flags&VIFF_STATIC))
870 			vif_delete(net, i, 0);
871 	}
872 
873 	/*
874 	 *	Wipe the cache
875 	 */
876 	for (i=0; i<MFC_LINES; i++) {
877 		struct mfc_cache *c, **cp;
878 
879 		cp = &net->ipv4.mfc_cache_array[i];
880 		while ((c = *cp) != NULL) {
881 			if (c->mfc_flags&MFC_STATIC) {
882 				cp = &c->next;
883 				continue;
884 			}
885 			write_lock_bh(&mrt_lock);
886 			*cp = c->next;
887 			write_unlock_bh(&mrt_lock);
888 
889 			ipmr_cache_free(c);
890 		}
891 	}
892 
893 	if (atomic_read(&net->ipv4.cache_resolve_queue_len) != 0) {
894 		struct mfc_cache *c, **cp;
895 
896 		spin_lock_bh(&mfc_unres_lock);
897 		cp = &mfc_unres_queue;
898 		while ((c = *cp) != NULL) {
899 			if (!net_eq(mfc_net(c), net)) {
900 				cp = &c->next;
901 				continue;
902 			}
903 			*cp = c->next;
904 
905 			ipmr_destroy_unres(c);
906 		}
907 		spin_unlock_bh(&mfc_unres_lock);
908 	}
909 }
910 
911 static void mrtsock_destruct(struct sock *sk)
912 {
913 	struct net *net = sock_net(sk);
914 
915 	rtnl_lock();
916 	if (sk == net->ipv4.mroute_sk) {
917 		IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
918 
919 		write_lock_bh(&mrt_lock);
920 		net->ipv4.mroute_sk = NULL;
921 		write_unlock_bh(&mrt_lock);
922 
923 		mroute_clean_tables(net);
924 	}
925 	rtnl_unlock();
926 }
927 
928 /*
929  *	Socket options and virtual interface manipulation. The whole
930  *	virtual interface system is a complete heap, but unfortunately
931  *	that's how BSD mrouted happens to think. Maybe one day with a proper
932  *	MOSPF/PIM router set up we can clean this up.
933  */
934 
935 int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int optlen)
936 {
937 	int ret;
938 	struct vifctl vif;
939 	struct mfcctl mfc;
940 	struct net *net = sock_net(sk);
941 
942 	if (optname != MRT_INIT) {
943 		if (sk != net->ipv4.mroute_sk && !capable(CAP_NET_ADMIN))
944 			return -EACCES;
945 	}
946 
947 	switch (optname) {
948 	case MRT_INIT:
949 		if (sk->sk_type != SOCK_RAW ||
950 		    inet_sk(sk)->num != IPPROTO_IGMP)
951 			return -EOPNOTSUPP;
952 		if (optlen != sizeof(int))
953 			return -ENOPROTOOPT;
954 
955 		rtnl_lock();
956 		if (net->ipv4.mroute_sk) {
957 			rtnl_unlock();
958 			return -EADDRINUSE;
959 		}
960 
961 		ret = ip_ra_control(sk, 1, mrtsock_destruct);
962 		if (ret == 0) {
963 			write_lock_bh(&mrt_lock);
964 			net->ipv4.mroute_sk = sk;
965 			write_unlock_bh(&mrt_lock);
966 
967 			IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
968 		}
969 		rtnl_unlock();
970 		return ret;
971 	case MRT_DONE:
972 		if (sk != net->ipv4.mroute_sk)
973 			return -EACCES;
974 		return ip_ra_control(sk, 0, NULL);
975 	case MRT_ADD_VIF:
976 	case MRT_DEL_VIF:
977 		if (optlen != sizeof(vif))
978 			return -EINVAL;
979 		if (copy_from_user(&vif, optval, sizeof(vif)))
980 			return -EFAULT;
981 		if (vif.vifc_vifi >= MAXVIFS)
982 			return -ENFILE;
983 		rtnl_lock();
984 		if (optname == MRT_ADD_VIF) {
985 			ret = vif_add(net, &vif, sk == net->ipv4.mroute_sk);
986 		} else {
987 			ret = vif_delete(net, vif.vifc_vifi, 0);
988 		}
989 		rtnl_unlock();
990 		return ret;
991 
992 		/*
993 		 *	Manipulate the forwarding caches. These live
994 		 *	in a sort of kernel/user symbiosis.
995 		 */
996 	case MRT_ADD_MFC:
997 	case MRT_DEL_MFC:
998 		if (optlen != sizeof(mfc))
999 			return -EINVAL;
1000 		if (copy_from_user(&mfc, optval, sizeof(mfc)))
1001 			return -EFAULT;
1002 		rtnl_lock();
1003 		if (optname == MRT_DEL_MFC)
1004 			ret = ipmr_mfc_delete(net, &mfc);
1005 		else
1006 			ret = ipmr_mfc_add(net, &mfc, sk == net->ipv4.mroute_sk);
1007 		rtnl_unlock();
1008 		return ret;
1009 		/*
1010 		 *	Control PIM assert.
1011 		 */
1012 	case MRT_ASSERT:
1013 	{
1014 		int v;
1015 		if (get_user(v,(int __user *)optval))
1016 			return -EFAULT;
1017 		net->ipv4.mroute_do_assert = (v) ? 1 : 0;
1018 		return 0;
1019 	}
1020 #ifdef CONFIG_IP_PIMSM
1021 	case MRT_PIM:
1022 	{
1023 		int v;
1024 
1025 		if (get_user(v,(int __user *)optval))
1026 			return -EFAULT;
1027 		v = (v) ? 1 : 0;
1028 
1029 		rtnl_lock();
1030 		ret = 0;
1031 		if (v != net->ipv4.mroute_do_pim) {
1032 			net->ipv4.mroute_do_pim = v;
1033 			net->ipv4.mroute_do_assert = v;
1034 #ifdef CONFIG_IP_PIMSM_V2
1035 			if (net->ipv4.mroute_do_pim)
1036 				ret = inet_add_protocol(&pim_protocol,
1037 							IPPROTO_PIM);
1038 			else
1039 				ret = inet_del_protocol(&pim_protocol,
1040 							IPPROTO_PIM);
1041 			if (ret < 0)
1042 				ret = -EAGAIN;
1043 #endif
1044 		}
1045 		rtnl_unlock();
1046 		return ret;
1047 	}
1048 #endif
1049 	/*
1050 	 *	Spurious command, or MRT_VERSION which you cannot
1051 	 *	set.
1052 	 */
1053 	default:
1054 		return -ENOPROTOOPT;
1055 	}
1056 }
1057 
1058 /*
1059  *	Getsock opt support for the multicast routing system.
1060  */
1061 
1062 int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
1063 {
1064 	int olr;
1065 	int val;
1066 	struct net *net = sock_net(sk);
1067 
1068 	if (optname != MRT_VERSION &&
1069 #ifdef CONFIG_IP_PIMSM
1070 	   optname!=MRT_PIM &&
1071 #endif
1072 	   optname!=MRT_ASSERT)
1073 		return -ENOPROTOOPT;
1074 
1075 	if (get_user(olr, optlen))
1076 		return -EFAULT;
1077 
1078 	olr = min_t(unsigned int, olr, sizeof(int));
1079 	if (olr < 0)
1080 		return -EINVAL;
1081 
1082 	if (put_user(olr, optlen))
1083 		return -EFAULT;
1084 	if (optname == MRT_VERSION)
1085 		val = 0x0305;
1086 #ifdef CONFIG_IP_PIMSM
1087 	else if (optname == MRT_PIM)
1088 		val = net->ipv4.mroute_do_pim;
1089 #endif
1090 	else
1091 		val = net->ipv4.mroute_do_assert;
1092 	if (copy_to_user(optval, &val, olr))
1093 		return -EFAULT;
1094 	return 0;
1095 }
1096 
1097 /*
1098  *	The IP multicast ioctl support routines.
1099  */
1100 
1101 int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1102 {
1103 	struct sioc_sg_req sr;
1104 	struct sioc_vif_req vr;
1105 	struct vif_device *vif;
1106 	struct mfc_cache *c;
1107 	struct net *net = sock_net(sk);
1108 
1109 	switch (cmd) {
1110 	case SIOCGETVIFCNT:
1111 		if (copy_from_user(&vr, arg, sizeof(vr)))
1112 			return -EFAULT;
1113 		if (vr.vifi >= net->ipv4.maxvif)
1114 			return -EINVAL;
1115 		read_lock(&mrt_lock);
1116 		vif = &net->ipv4.vif_table[vr.vifi];
1117 		if (VIF_EXISTS(net, vr.vifi)) {
1118 			vr.icount = vif->pkt_in;
1119 			vr.ocount = vif->pkt_out;
1120 			vr.ibytes = vif->bytes_in;
1121 			vr.obytes = vif->bytes_out;
1122 			read_unlock(&mrt_lock);
1123 
1124 			if (copy_to_user(arg, &vr, sizeof(vr)))
1125 				return -EFAULT;
1126 			return 0;
1127 		}
1128 		read_unlock(&mrt_lock);
1129 		return -EADDRNOTAVAIL;
1130 	case SIOCGETSGCNT:
1131 		if (copy_from_user(&sr, arg, sizeof(sr)))
1132 			return -EFAULT;
1133 
1134 		read_lock(&mrt_lock);
1135 		c = ipmr_cache_find(net, sr.src.s_addr, sr.grp.s_addr);
1136 		if (c) {
1137 			sr.pktcnt = c->mfc_un.res.pkt;
1138 			sr.bytecnt = c->mfc_un.res.bytes;
1139 			sr.wrong_if = c->mfc_un.res.wrong_if;
1140 			read_unlock(&mrt_lock);
1141 
1142 			if (copy_to_user(arg, &sr, sizeof(sr)))
1143 				return -EFAULT;
1144 			return 0;
1145 		}
1146 		read_unlock(&mrt_lock);
1147 		return -EADDRNOTAVAIL;
1148 	default:
1149 		return -ENOIOCTLCMD;
1150 	}
1151 }
1152 
1153 
1154 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1155 {
1156 	struct net_device *dev = ptr;
1157 	struct net *net = dev_net(dev);
1158 	struct vif_device *v;
1159 	int ct;
1160 
1161 	if (!net_eq(dev_net(dev), net))
1162 		return NOTIFY_DONE;
1163 
1164 	if (event != NETDEV_UNREGISTER)
1165 		return NOTIFY_DONE;
1166 	v = &net->ipv4.vif_table[0];
1167 	for (ct = 0; ct < net->ipv4.maxvif; ct++, v++) {
1168 		if (v->dev == dev)
1169 			vif_delete(net, ct, 1);
1170 	}
1171 	return NOTIFY_DONE;
1172 }
1173 
1174 
1175 static struct notifier_block ip_mr_notifier = {
1176 	.notifier_call = ipmr_device_event,
1177 };
1178 
1179 /*
1180  * 	Encapsulate a packet by attaching a valid IPIP header to it.
1181  *	This avoids tunnel drivers and other mess and gives us the speed so
1182  *	important for multicast video.
1183  */
1184 
1185 static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1186 {
1187 	struct iphdr *iph;
1188 	struct iphdr *old_iph = ip_hdr(skb);
1189 
1190 	skb_push(skb, sizeof(struct iphdr));
1191 	skb->transport_header = skb->network_header;
1192 	skb_reset_network_header(skb);
1193 	iph = ip_hdr(skb);
1194 
1195 	iph->version	= 	4;
1196 	iph->tos	=	old_iph->tos;
1197 	iph->ttl	=	old_iph->ttl;
1198 	iph->frag_off	=	0;
1199 	iph->daddr	=	daddr;
1200 	iph->saddr	=	saddr;
1201 	iph->protocol	=	IPPROTO_IPIP;
1202 	iph->ihl	=	5;
1203 	iph->tot_len	=	htons(skb->len);
1204 	ip_select_ident(iph, skb->dst, NULL);
1205 	ip_send_check(iph);
1206 
1207 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1208 	nf_reset(skb);
1209 }
1210 
1211 static inline int ipmr_forward_finish(struct sk_buff *skb)
1212 {
1213 	struct ip_options * opt	= &(IPCB(skb)->opt);
1214 
1215 	IP_INC_STATS_BH(dev_net(skb->dst->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
1216 
1217 	if (unlikely(opt->optlen))
1218 		ip_forward_options(skb);
1219 
1220 	return dst_output(skb);
1221 }
1222 
1223 /*
1224  *	Processing handlers for ipmr_forward
1225  */
1226 
1227 static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1228 {
1229 	struct net *net = mfc_net(c);
1230 	const struct iphdr *iph = ip_hdr(skb);
1231 	struct vif_device *vif = &net->ipv4.vif_table[vifi];
1232 	struct net_device *dev;
1233 	struct rtable *rt;
1234 	int    encap = 0;
1235 
1236 	if (vif->dev == NULL)
1237 		goto out_free;
1238 
1239 #ifdef CONFIG_IP_PIMSM
1240 	if (vif->flags & VIFF_REGISTER) {
1241 		vif->pkt_out++;
1242 		vif->bytes_out += skb->len;
1243 		vif->dev->stats.tx_bytes += skb->len;
1244 		vif->dev->stats.tx_packets++;
1245 		ipmr_cache_report(net, skb, vifi, IGMPMSG_WHOLEPKT);
1246 		goto out_free;
1247 	}
1248 #endif
1249 
1250 	if (vif->flags&VIFF_TUNNEL) {
1251 		struct flowi fl = { .oif = vif->link,
1252 				    .nl_u = { .ip4_u =
1253 					      { .daddr = vif->remote,
1254 						.saddr = vif->local,
1255 						.tos = RT_TOS(iph->tos) } },
1256 				    .proto = IPPROTO_IPIP };
1257 		if (ip_route_output_key(net, &rt, &fl))
1258 			goto out_free;
1259 		encap = sizeof(struct iphdr);
1260 	} else {
1261 		struct flowi fl = { .oif = vif->link,
1262 				    .nl_u = { .ip4_u =
1263 					      { .daddr = iph->daddr,
1264 						.tos = RT_TOS(iph->tos) } },
1265 				    .proto = IPPROTO_IPIP };
1266 		if (ip_route_output_key(net, &rt, &fl))
1267 			goto out_free;
1268 	}
1269 
1270 	dev = rt->u.dst.dev;
1271 
1272 	if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1273 		/* Do not fragment multicasts. Alas, IPv4 does not
1274 		   allow to send ICMP, so that packets will disappear
1275 		   to blackhole.
1276 		 */
1277 
1278 		IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
1279 		ip_rt_put(rt);
1280 		goto out_free;
1281 	}
1282 
1283 	encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1284 
1285 	if (skb_cow(skb, encap)) {
1286 		ip_rt_put(rt);
1287 		goto out_free;
1288 	}
1289 
1290 	vif->pkt_out++;
1291 	vif->bytes_out += skb->len;
1292 
1293 	dst_release(skb->dst);
1294 	skb->dst = &rt->u.dst;
1295 	ip_decrease_ttl(ip_hdr(skb));
1296 
1297 	/* FIXME: forward and output firewalls used to be called here.
1298 	 * What do we do with netfilter? -- RR */
1299 	if (vif->flags & VIFF_TUNNEL) {
1300 		ip_encap(skb, vif->local, vif->remote);
1301 		/* FIXME: extra output firewall step used to be here. --RR */
1302 		vif->dev->stats.tx_packets++;
1303 		vif->dev->stats.tx_bytes += skb->len;
1304 	}
1305 
1306 	IPCB(skb)->flags |= IPSKB_FORWARDED;
1307 
1308 	/*
1309 	 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1310 	 * not only before forwarding, but after forwarding on all output
1311 	 * interfaces. It is clear, if mrouter runs a multicasting
1312 	 * program, it should receive packets not depending to what interface
1313 	 * program is joined.
1314 	 * If we will not make it, the program will have to join on all
1315 	 * interfaces. On the other hand, multihoming host (or router, but
1316 	 * not mrouter) cannot join to more than one interface - it will
1317 	 * result in receiving multiple packets.
1318 	 */
1319 	NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, dev,
1320 		ipmr_forward_finish);
1321 	return;
1322 
1323 out_free:
1324 	kfree_skb(skb);
1325 	return;
1326 }
1327 
1328 static int ipmr_find_vif(struct net_device *dev)
1329 {
1330 	struct net *net = dev_net(dev);
1331 	int ct;
1332 	for (ct = net->ipv4.maxvif-1; ct >= 0; ct--) {
1333 		if (net->ipv4.vif_table[ct].dev == dev)
1334 			break;
1335 	}
1336 	return ct;
1337 }
1338 
1339 /* "local" means that we should preserve one skb (for local delivery) */
1340 
1341 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
1342 {
1343 	int psend = -1;
1344 	int vif, ct;
1345 	struct net *net = mfc_net(cache);
1346 
1347 	vif = cache->mfc_parent;
1348 	cache->mfc_un.res.pkt++;
1349 	cache->mfc_un.res.bytes += skb->len;
1350 
1351 	/*
1352 	 * Wrong interface: drop packet and (maybe) send PIM assert.
1353 	 */
1354 	if (net->ipv4.vif_table[vif].dev != skb->dev) {
1355 		int true_vifi;
1356 
1357 		if (skb->rtable->fl.iif == 0) {
1358 			/* It is our own packet, looped back.
1359 			   Very complicated situation...
1360 
1361 			   The best workaround until routing daemons will be
1362 			   fixed is not to redistribute packet, if it was
1363 			   send through wrong interface. It means, that
1364 			   multicast applications WILL NOT work for
1365 			   (S,G), which have default multicast route pointing
1366 			   to wrong oif. In any case, it is not a good
1367 			   idea to use multicasting applications on router.
1368 			 */
1369 			goto dont_forward;
1370 		}
1371 
1372 		cache->mfc_un.res.wrong_if++;
1373 		true_vifi = ipmr_find_vif(skb->dev);
1374 
1375 		if (true_vifi >= 0 && net->ipv4.mroute_do_assert &&
1376 		    /* pimsm uses asserts, when switching from RPT to SPT,
1377 		       so that we cannot check that packet arrived on an oif.
1378 		       It is bad, but otherwise we would need to move pretty
1379 		       large chunk of pimd to kernel. Ough... --ANK
1380 		     */
1381 		    (net->ipv4.mroute_do_pim ||
1382 		     cache->mfc_un.res.ttls[true_vifi] < 255) &&
1383 		    time_after(jiffies,
1384 			       cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1385 			cache->mfc_un.res.last_assert = jiffies;
1386 			ipmr_cache_report(net, skb, true_vifi, IGMPMSG_WRONGVIF);
1387 		}
1388 		goto dont_forward;
1389 	}
1390 
1391 	net->ipv4.vif_table[vif].pkt_in++;
1392 	net->ipv4.vif_table[vif].bytes_in += skb->len;
1393 
1394 	/*
1395 	 *	Forward the frame
1396 	 */
1397 	for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
1398 		if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
1399 			if (psend != -1) {
1400 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1401 				if (skb2)
1402 					ipmr_queue_xmit(skb2, cache, psend);
1403 			}
1404 			psend = ct;
1405 		}
1406 	}
1407 	if (psend != -1) {
1408 		if (local) {
1409 			struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1410 			if (skb2)
1411 				ipmr_queue_xmit(skb2, cache, psend);
1412 		} else {
1413 			ipmr_queue_xmit(skb, cache, psend);
1414 			return 0;
1415 		}
1416 	}
1417 
1418 dont_forward:
1419 	if (!local)
1420 		kfree_skb(skb);
1421 	return 0;
1422 }
1423 
1424 
1425 /*
1426  *	Multicast packets for forwarding arrive here
1427  */
1428 
1429 int ip_mr_input(struct sk_buff *skb)
1430 {
1431 	struct mfc_cache *cache;
1432 	struct net *net = dev_net(skb->dev);
1433 	int local = skb->rtable->rt_flags&RTCF_LOCAL;
1434 
1435 	/* Packet is looped back after forward, it should not be
1436 	   forwarded second time, but still can be delivered locally.
1437 	 */
1438 	if (IPCB(skb)->flags&IPSKB_FORWARDED)
1439 		goto dont_forward;
1440 
1441 	if (!local) {
1442 		    if (IPCB(skb)->opt.router_alert) {
1443 			    if (ip_call_ra_chain(skb))
1444 				    return 0;
1445 		    } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
1446 			    /* IGMPv1 (and broken IGMPv2 implementations sort of
1447 			       Cisco IOS <= 11.2(8)) do not put router alert
1448 			       option to IGMP packets destined to routable
1449 			       groups. It is very bad, because it means
1450 			       that we can forward NO IGMP messages.
1451 			     */
1452 			    read_lock(&mrt_lock);
1453 			    if (net->ipv4.mroute_sk) {
1454 				    nf_reset(skb);
1455 				    raw_rcv(net->ipv4.mroute_sk, skb);
1456 				    read_unlock(&mrt_lock);
1457 				    return 0;
1458 			    }
1459 			    read_unlock(&mrt_lock);
1460 		    }
1461 	}
1462 
1463 	read_lock(&mrt_lock);
1464 	cache = ipmr_cache_find(net, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1465 
1466 	/*
1467 	 *	No usable cache entry
1468 	 */
1469 	if (cache == NULL) {
1470 		int vif;
1471 
1472 		if (local) {
1473 			struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1474 			ip_local_deliver(skb);
1475 			if (skb2 == NULL) {
1476 				read_unlock(&mrt_lock);
1477 				return -ENOBUFS;
1478 			}
1479 			skb = skb2;
1480 		}
1481 
1482 		vif = ipmr_find_vif(skb->dev);
1483 		if (vif >= 0) {
1484 			int err = ipmr_cache_unresolved(net, vif, skb);
1485 			read_unlock(&mrt_lock);
1486 
1487 			return err;
1488 		}
1489 		read_unlock(&mrt_lock);
1490 		kfree_skb(skb);
1491 		return -ENODEV;
1492 	}
1493 
1494 	ip_mr_forward(skb, cache, local);
1495 
1496 	read_unlock(&mrt_lock);
1497 
1498 	if (local)
1499 		return ip_local_deliver(skb);
1500 
1501 	return 0;
1502 
1503 dont_forward:
1504 	if (local)
1505 		return ip_local_deliver(skb);
1506 	kfree_skb(skb);
1507 	return 0;
1508 }
1509 
1510 #ifdef CONFIG_IP_PIMSM
1511 static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen)
1512 {
1513 	struct net_device *reg_dev = NULL;
1514 	struct iphdr *encap;
1515 	struct net *net = dev_net(skb->dev);
1516 
1517 	encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
1518 	/*
1519 	   Check that:
1520 	   a. packet is really destinted to a multicast group
1521 	   b. packet is not a NULL-REGISTER
1522 	   c. packet is not truncated
1523 	 */
1524 	if (!ipv4_is_multicast(encap->daddr) ||
1525 	    encap->tot_len == 0 ||
1526 	    ntohs(encap->tot_len) + pimlen > skb->len)
1527 		return 1;
1528 
1529 	read_lock(&mrt_lock);
1530 	if (net->ipv4.mroute_reg_vif_num >= 0)
1531 		reg_dev = net->ipv4.vif_table[net->ipv4.mroute_reg_vif_num].dev;
1532 	if (reg_dev)
1533 		dev_hold(reg_dev);
1534 	read_unlock(&mrt_lock);
1535 
1536 	if (reg_dev == NULL)
1537 		return 1;
1538 
1539 	skb->mac_header = skb->network_header;
1540 	skb_pull(skb, (u8*)encap - skb->data);
1541 	skb_reset_network_header(skb);
1542 	skb->dev = reg_dev;
1543 	skb->protocol = htons(ETH_P_IP);
1544 	skb->ip_summed = 0;
1545 	skb->pkt_type = PACKET_HOST;
1546 	dst_release(skb->dst);
1547 	skb->dst = NULL;
1548 	reg_dev->stats.rx_bytes += skb->len;
1549 	reg_dev->stats.rx_packets++;
1550 	nf_reset(skb);
1551 	netif_rx(skb);
1552 	dev_put(reg_dev);
1553 
1554 	return 0;
1555 }
1556 #endif
1557 
1558 #ifdef CONFIG_IP_PIMSM_V1
1559 /*
1560  * Handle IGMP messages of PIMv1
1561  */
1562 
1563 int pim_rcv_v1(struct sk_buff * skb)
1564 {
1565 	struct igmphdr *pim;
1566 	struct net *net = dev_net(skb->dev);
1567 
1568 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1569 		goto drop;
1570 
1571 	pim = igmp_hdr(skb);
1572 
1573 	if (!net->ipv4.mroute_do_pim ||
1574 	    pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1575 		goto drop;
1576 
1577 	if (__pim_rcv(skb, sizeof(*pim))) {
1578 drop:
1579 		kfree_skb(skb);
1580 	}
1581 	return 0;
1582 }
1583 #endif
1584 
1585 #ifdef CONFIG_IP_PIMSM_V2
1586 static int pim_rcv(struct sk_buff * skb)
1587 {
1588 	struct pimreghdr *pim;
1589 
1590 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1591 		goto drop;
1592 
1593 	pim = (struct pimreghdr *)skb_transport_header(skb);
1594 	if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1595 	    (pim->flags&PIM_NULL_REGISTER) ||
1596 	    (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
1597 	     csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1598 		goto drop;
1599 
1600 	if (__pim_rcv(skb, sizeof(*pim))) {
1601 drop:
1602 		kfree_skb(skb);
1603 	}
1604 	return 0;
1605 }
1606 #endif
1607 
1608 static int
1609 ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1610 {
1611 	int ct;
1612 	struct rtnexthop *nhp;
1613 	struct net *net = mfc_net(c);
1614 	struct net_device *dev = net->ipv4.vif_table[c->mfc_parent].dev;
1615 	u8 *b = skb_tail_pointer(skb);
1616 	struct rtattr *mp_head;
1617 
1618 	if (dev)
1619 		RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1620 
1621 	mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1622 
1623 	for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1624 		if (c->mfc_un.res.ttls[ct] < 255) {
1625 			if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1626 				goto rtattr_failure;
1627 			nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1628 			nhp->rtnh_flags = 0;
1629 			nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1630 			nhp->rtnh_ifindex = net->ipv4.vif_table[ct].dev->ifindex;
1631 			nhp->rtnh_len = sizeof(*nhp);
1632 		}
1633 	}
1634 	mp_head->rta_type = RTA_MULTIPATH;
1635 	mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1636 	rtm->rtm_type = RTN_MULTICAST;
1637 	return 1;
1638 
1639 rtattr_failure:
1640 	nlmsg_trim(skb, b);
1641 	return -EMSGSIZE;
1642 }
1643 
1644 int ipmr_get_route(struct net *net,
1645 		   struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1646 {
1647 	int err;
1648 	struct mfc_cache *cache;
1649 	struct rtable *rt = skb->rtable;
1650 
1651 	read_lock(&mrt_lock);
1652 	cache = ipmr_cache_find(net, rt->rt_src, rt->rt_dst);
1653 
1654 	if (cache == NULL) {
1655 		struct sk_buff *skb2;
1656 		struct iphdr *iph;
1657 		struct net_device *dev;
1658 		int vif;
1659 
1660 		if (nowait) {
1661 			read_unlock(&mrt_lock);
1662 			return -EAGAIN;
1663 		}
1664 
1665 		dev = skb->dev;
1666 		if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
1667 			read_unlock(&mrt_lock);
1668 			return -ENODEV;
1669 		}
1670 		skb2 = skb_clone(skb, GFP_ATOMIC);
1671 		if (!skb2) {
1672 			read_unlock(&mrt_lock);
1673 			return -ENOMEM;
1674 		}
1675 
1676 		skb_push(skb2, sizeof(struct iphdr));
1677 		skb_reset_network_header(skb2);
1678 		iph = ip_hdr(skb2);
1679 		iph->ihl = sizeof(struct iphdr) >> 2;
1680 		iph->saddr = rt->rt_src;
1681 		iph->daddr = rt->rt_dst;
1682 		iph->version = 0;
1683 		err = ipmr_cache_unresolved(net, vif, skb2);
1684 		read_unlock(&mrt_lock);
1685 		return err;
1686 	}
1687 
1688 	if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1689 		cache->mfc_flags |= MFC_NOTIFY;
1690 	err = ipmr_fill_mroute(skb, cache, rtm);
1691 	read_unlock(&mrt_lock);
1692 	return err;
1693 }
1694 
1695 #ifdef CONFIG_PROC_FS
1696 /*
1697  *	The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1698  */
1699 struct ipmr_vif_iter {
1700 	struct seq_net_private p;
1701 	int ct;
1702 };
1703 
1704 static struct vif_device *ipmr_vif_seq_idx(struct net *net,
1705 					   struct ipmr_vif_iter *iter,
1706 					   loff_t pos)
1707 {
1708 	for (iter->ct = 0; iter->ct < net->ipv4.maxvif; ++iter->ct) {
1709 		if (!VIF_EXISTS(net, iter->ct))
1710 			continue;
1711 		if (pos-- == 0)
1712 			return &net->ipv4.vif_table[iter->ct];
1713 	}
1714 	return NULL;
1715 }
1716 
1717 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
1718 	__acquires(mrt_lock)
1719 {
1720 	struct net *net = seq_file_net(seq);
1721 
1722 	read_lock(&mrt_lock);
1723 	return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
1724 		: SEQ_START_TOKEN;
1725 }
1726 
1727 static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1728 {
1729 	struct ipmr_vif_iter *iter = seq->private;
1730 	struct net *net = seq_file_net(seq);
1731 
1732 	++*pos;
1733 	if (v == SEQ_START_TOKEN)
1734 		return ipmr_vif_seq_idx(net, iter, 0);
1735 
1736 	while (++iter->ct < net->ipv4.maxvif) {
1737 		if (!VIF_EXISTS(net, iter->ct))
1738 			continue;
1739 		return &net->ipv4.vif_table[iter->ct];
1740 	}
1741 	return NULL;
1742 }
1743 
1744 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
1745 	__releases(mrt_lock)
1746 {
1747 	read_unlock(&mrt_lock);
1748 }
1749 
1750 static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1751 {
1752 	struct net *net = seq_file_net(seq);
1753 
1754 	if (v == SEQ_START_TOKEN) {
1755 		seq_puts(seq,
1756 			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags Local    Remote\n");
1757 	} else {
1758 		const struct vif_device *vif = v;
1759 		const char *name =  vif->dev ? vif->dev->name : "none";
1760 
1761 		seq_printf(seq,
1762 			   "%2Zd %-10s %8ld %7ld  %8ld %7ld %05X %08X %08X\n",
1763 			   vif - net->ipv4.vif_table,
1764 			   name, vif->bytes_in, vif->pkt_in,
1765 			   vif->bytes_out, vif->pkt_out,
1766 			   vif->flags, vif->local, vif->remote);
1767 	}
1768 	return 0;
1769 }
1770 
1771 static const struct seq_operations ipmr_vif_seq_ops = {
1772 	.start = ipmr_vif_seq_start,
1773 	.next  = ipmr_vif_seq_next,
1774 	.stop  = ipmr_vif_seq_stop,
1775 	.show  = ipmr_vif_seq_show,
1776 };
1777 
1778 static int ipmr_vif_open(struct inode *inode, struct file *file)
1779 {
1780 	return seq_open_net(inode, file, &ipmr_vif_seq_ops,
1781 			    sizeof(struct ipmr_vif_iter));
1782 }
1783 
1784 static const struct file_operations ipmr_vif_fops = {
1785 	.owner	 = THIS_MODULE,
1786 	.open    = ipmr_vif_open,
1787 	.read    = seq_read,
1788 	.llseek  = seq_lseek,
1789 	.release = seq_release_net,
1790 };
1791 
1792 struct ipmr_mfc_iter {
1793 	struct seq_net_private p;
1794 	struct mfc_cache **cache;
1795 	int ct;
1796 };
1797 
1798 
1799 static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
1800 					  struct ipmr_mfc_iter *it, loff_t pos)
1801 {
1802 	struct mfc_cache *mfc;
1803 
1804 	it->cache = net->ipv4.mfc_cache_array;
1805 	read_lock(&mrt_lock);
1806 	for (it->ct = 0; it->ct < MFC_LINES; it->ct++)
1807 		for (mfc = net->ipv4.mfc_cache_array[it->ct];
1808 		     mfc; mfc = mfc->next)
1809 			if (pos-- == 0)
1810 				return mfc;
1811 	read_unlock(&mrt_lock);
1812 
1813 	it->cache = &mfc_unres_queue;
1814 	spin_lock_bh(&mfc_unres_lock);
1815 	for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
1816 		if (net_eq(mfc_net(mfc), net) &&
1817 		    pos-- == 0)
1818 			return mfc;
1819 	spin_unlock_bh(&mfc_unres_lock);
1820 
1821 	it->cache = NULL;
1822 	return NULL;
1823 }
1824 
1825 
1826 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
1827 {
1828 	struct ipmr_mfc_iter *it = seq->private;
1829 	struct net *net = seq_file_net(seq);
1830 
1831 	it->cache = NULL;
1832 	it->ct = 0;
1833 	return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
1834 		: SEQ_START_TOKEN;
1835 }
1836 
1837 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1838 {
1839 	struct mfc_cache *mfc = v;
1840 	struct ipmr_mfc_iter *it = seq->private;
1841 	struct net *net = seq_file_net(seq);
1842 
1843 	++*pos;
1844 
1845 	if (v == SEQ_START_TOKEN)
1846 		return ipmr_mfc_seq_idx(net, seq->private, 0);
1847 
1848 	if (mfc->next)
1849 		return mfc->next;
1850 
1851 	if (it->cache == &mfc_unres_queue)
1852 		goto end_of_list;
1853 
1854 	BUG_ON(it->cache != net->ipv4.mfc_cache_array);
1855 
1856 	while (++it->ct < MFC_LINES) {
1857 		mfc = net->ipv4.mfc_cache_array[it->ct];
1858 		if (mfc)
1859 			return mfc;
1860 	}
1861 
1862 	/* exhausted cache_array, show unresolved */
1863 	read_unlock(&mrt_lock);
1864 	it->cache = &mfc_unres_queue;
1865 	it->ct = 0;
1866 
1867 	spin_lock_bh(&mfc_unres_lock);
1868 	mfc = mfc_unres_queue;
1869 	while (mfc && !net_eq(mfc_net(mfc), net))
1870 		mfc = mfc->next;
1871 	if (mfc)
1872 		return mfc;
1873 
1874  end_of_list:
1875 	spin_unlock_bh(&mfc_unres_lock);
1876 	it->cache = NULL;
1877 
1878 	return NULL;
1879 }
1880 
1881 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
1882 {
1883 	struct ipmr_mfc_iter *it = seq->private;
1884 	struct net *net = seq_file_net(seq);
1885 
1886 	if (it->cache == &mfc_unres_queue)
1887 		spin_unlock_bh(&mfc_unres_lock);
1888 	else if (it->cache == net->ipv4.mfc_cache_array)
1889 		read_unlock(&mrt_lock);
1890 }
1891 
1892 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1893 {
1894 	int n;
1895 	struct net *net = seq_file_net(seq);
1896 
1897 	if (v == SEQ_START_TOKEN) {
1898 		seq_puts(seq,
1899 		 "Group    Origin   Iif     Pkts    Bytes    Wrong Oifs\n");
1900 	} else {
1901 		const struct mfc_cache *mfc = v;
1902 		const struct ipmr_mfc_iter *it = seq->private;
1903 
1904 		seq_printf(seq, "%08lX %08lX %-3hd",
1905 			   (unsigned long) mfc->mfc_mcastgrp,
1906 			   (unsigned long) mfc->mfc_origin,
1907 			   mfc->mfc_parent);
1908 
1909 		if (it->cache != &mfc_unres_queue) {
1910 			seq_printf(seq, " %8lu %8lu %8lu",
1911 				   mfc->mfc_un.res.pkt,
1912 				   mfc->mfc_un.res.bytes,
1913 				   mfc->mfc_un.res.wrong_if);
1914 			for (n = mfc->mfc_un.res.minvif;
1915 			     n < mfc->mfc_un.res.maxvif; n++ ) {
1916 				if (VIF_EXISTS(net, n) &&
1917 				    mfc->mfc_un.res.ttls[n] < 255)
1918 					seq_printf(seq,
1919 					   " %2d:%-3d",
1920 					   n, mfc->mfc_un.res.ttls[n]);
1921 			}
1922 		} else {
1923 			/* unresolved mfc_caches don't contain
1924 			 * pkt, bytes and wrong_if values
1925 			 */
1926 			seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
1927 		}
1928 		seq_putc(seq, '\n');
1929 	}
1930 	return 0;
1931 }
1932 
1933 static const struct seq_operations ipmr_mfc_seq_ops = {
1934 	.start = ipmr_mfc_seq_start,
1935 	.next  = ipmr_mfc_seq_next,
1936 	.stop  = ipmr_mfc_seq_stop,
1937 	.show  = ipmr_mfc_seq_show,
1938 };
1939 
1940 static int ipmr_mfc_open(struct inode *inode, struct file *file)
1941 {
1942 	return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
1943 			    sizeof(struct ipmr_mfc_iter));
1944 }
1945 
1946 static const struct file_operations ipmr_mfc_fops = {
1947 	.owner	 = THIS_MODULE,
1948 	.open    = ipmr_mfc_open,
1949 	.read    = seq_read,
1950 	.llseek  = seq_lseek,
1951 	.release = seq_release_net,
1952 };
1953 #endif
1954 
1955 #ifdef CONFIG_IP_PIMSM_V2
1956 static struct net_protocol pim_protocol = {
1957 	.handler	=	pim_rcv,
1958 };
1959 #endif
1960 
1961 
1962 /*
1963  *	Setup for IP multicast routing
1964  */
1965 static int __net_init ipmr_net_init(struct net *net)
1966 {
1967 	int err = 0;
1968 
1969 	net->ipv4.vif_table = kcalloc(MAXVIFS, sizeof(struct vif_device),
1970 				      GFP_KERNEL);
1971 	if (!net->ipv4.vif_table) {
1972 		err = -ENOMEM;
1973 		goto fail;
1974 	}
1975 
1976 	/* Forwarding cache */
1977 	net->ipv4.mfc_cache_array = kcalloc(MFC_LINES,
1978 					    sizeof(struct mfc_cache *),
1979 					    GFP_KERNEL);
1980 	if (!net->ipv4.mfc_cache_array) {
1981 		err = -ENOMEM;
1982 		goto fail_mfc_cache;
1983 	}
1984 
1985 #ifdef CONFIG_IP_PIMSM
1986 	net->ipv4.mroute_reg_vif_num = -1;
1987 #endif
1988 
1989 #ifdef CONFIG_PROC_FS
1990 	err = -ENOMEM;
1991 	if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops))
1992 		goto proc_vif_fail;
1993 	if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
1994 		goto proc_cache_fail;
1995 #endif
1996 	return 0;
1997 
1998 #ifdef CONFIG_PROC_FS
1999 proc_cache_fail:
2000 	proc_net_remove(net, "ip_mr_vif");
2001 proc_vif_fail:
2002 	kfree(net->ipv4.mfc_cache_array);
2003 #endif
2004 fail_mfc_cache:
2005 	kfree(net->ipv4.vif_table);
2006 fail:
2007 	return err;
2008 }
2009 
2010 static void __net_exit ipmr_net_exit(struct net *net)
2011 {
2012 #ifdef CONFIG_PROC_FS
2013 	proc_net_remove(net, "ip_mr_cache");
2014 	proc_net_remove(net, "ip_mr_vif");
2015 #endif
2016 	kfree(net->ipv4.mfc_cache_array);
2017 	kfree(net->ipv4.vif_table);
2018 }
2019 
2020 static struct pernet_operations ipmr_net_ops = {
2021 	.init = ipmr_net_init,
2022 	.exit = ipmr_net_exit,
2023 };
2024 
2025 int __init ip_mr_init(void)
2026 {
2027 	int err;
2028 
2029 	mrt_cachep = kmem_cache_create("ip_mrt_cache",
2030 				       sizeof(struct mfc_cache),
2031 				       0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
2032 				       NULL);
2033 	if (!mrt_cachep)
2034 		return -ENOMEM;
2035 
2036 	err = register_pernet_subsys(&ipmr_net_ops);
2037 	if (err)
2038 		goto reg_pernet_fail;
2039 
2040 	setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
2041 	err = register_netdevice_notifier(&ip_mr_notifier);
2042 	if (err)
2043 		goto reg_notif_fail;
2044 	return 0;
2045 
2046 reg_notif_fail:
2047 	del_timer(&ipmr_expire_timer);
2048 	unregister_pernet_subsys(&ipmr_net_ops);
2049 reg_pernet_fail:
2050 	kmem_cache_destroy(mrt_cachep);
2051 	return err;
2052 }
2053