xref: /openbmc/linux/net/ipv4/ipmr.c (revision e8e0929d)
1 /*
2  *	IP multicast routing support for mrouted 3.6/3.8
3  *
4  *		(c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
5  *	  Linux Consultancy and Custom Driver Development
6  *
7  *	This program is free software; you can redistribute it and/or
8  *	modify it under the terms of the GNU General Public License
9  *	as published by the Free Software Foundation; either version
10  *	2 of the License, or (at your option) any later version.
11  *
12  *	Fixes:
13  *	Michael Chastain	:	Incorrect size of copying.
14  *	Alan Cox		:	Added the cache manager code
15  *	Alan Cox		:	Fixed the clone/copy bug and device race.
16  *	Mike McLagan		:	Routing by source
17  *	Malcolm Beattie		:	Buffer handling fixes.
18  *	Alexey Kuznetsov	:	Double buffer free and other fixes.
19  *	SVR Anand		:	Fixed several multicast bugs and problems.
20  *	Alexey Kuznetsov	:	Status, optimisations and more.
21  *	Brad Parker		:	Better behaviour on mrouted upcall
22  *					overflow.
23  *      Carlos Picoto           :       PIMv1 Support
24  *	Pavlin Ivanov Radoslavov:	PIMv2 Registers must checksum only PIM header
25  *					Relax this requrement to work with older peers.
26  *
27  */
28 
29 #include <asm/system.h>
30 #include <asm/uaccess.h>
31 #include <linux/types.h>
32 #include <linux/capability.h>
33 #include <linux/errno.h>
34 #include <linux/timer.h>
35 #include <linux/mm.h>
36 #include <linux/kernel.h>
37 #include <linux/fcntl.h>
38 #include <linux/stat.h>
39 #include <linux/socket.h>
40 #include <linux/in.h>
41 #include <linux/inet.h>
42 #include <linux/netdevice.h>
43 #include <linux/inetdevice.h>
44 #include <linux/igmp.h>
45 #include <linux/proc_fs.h>
46 #include <linux/seq_file.h>
47 #include <linux/mroute.h>
48 #include <linux/init.h>
49 #include <linux/if_ether.h>
50 #include <net/net_namespace.h>
51 #include <net/ip.h>
52 #include <net/protocol.h>
53 #include <linux/skbuff.h>
54 #include <net/route.h>
55 #include <net/sock.h>
56 #include <net/icmp.h>
57 #include <net/udp.h>
58 #include <net/raw.h>
59 #include <linux/notifier.h>
60 #include <linux/if_arp.h>
61 #include <linux/netfilter_ipv4.h>
62 #include <net/ipip.h>
63 #include <net/checksum.h>
64 #include <net/netlink.h>
65 
66 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
67 #define CONFIG_IP_PIMSM	1
68 #endif
69 
70 /* Big lock, protecting vif table, mrt cache and mroute socket state.
71    Note that the changes are semaphored via rtnl_lock.
72  */
73 
74 static DEFINE_RWLOCK(mrt_lock);
75 
76 /*
77  *	Multicast router control variables
78  */
79 
80 #define VIF_EXISTS(_net, _idx) ((_net)->ipv4.vif_table[_idx].dev != NULL)
81 
82 static struct mfc_cache *mfc_unres_queue;		/* Queue of unresolved entries */
83 
84 /* Special spinlock for queue of unresolved entries */
85 static DEFINE_SPINLOCK(mfc_unres_lock);
86 
87 /* We return to original Alan's scheme. Hash table of resolved
88    entries is changed only in process context and protected
89    with weak lock mrt_lock. Queue of unresolved entries is protected
90    with strong spinlock mfc_unres_lock.
91 
92    In this case data path is free of exclusive locks at all.
93  */
94 
95 static struct kmem_cache *mrt_cachep __read_mostly;
96 
97 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
98 static int ipmr_cache_report(struct net *net,
99 			     struct sk_buff *pkt, vifi_t vifi, int assert);
100 static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
101 
102 static struct timer_list ipmr_expire_timer;
103 
104 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
105 
106 static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
107 {
108 	struct net *net = dev_net(dev);
109 
110 	dev_close(dev);
111 
112 	dev = __dev_get_by_name(net, "tunl0");
113 	if (dev) {
114 		const struct net_device_ops *ops = dev->netdev_ops;
115 		struct ifreq ifr;
116 		struct ip_tunnel_parm p;
117 
118 		memset(&p, 0, sizeof(p));
119 		p.iph.daddr = v->vifc_rmt_addr.s_addr;
120 		p.iph.saddr = v->vifc_lcl_addr.s_addr;
121 		p.iph.version = 4;
122 		p.iph.ihl = 5;
123 		p.iph.protocol = IPPROTO_IPIP;
124 		sprintf(p.name, "dvmrp%d", v->vifc_vifi);
125 		ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
126 
127 		if (ops->ndo_do_ioctl) {
128 			mm_segment_t oldfs = get_fs();
129 
130 			set_fs(KERNEL_DS);
131 			ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
132 			set_fs(oldfs);
133 		}
134 	}
135 }
136 
137 static
138 struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
139 {
140 	struct net_device  *dev;
141 
142 	dev = __dev_get_by_name(net, "tunl0");
143 
144 	if (dev) {
145 		const struct net_device_ops *ops = dev->netdev_ops;
146 		int err;
147 		struct ifreq ifr;
148 		struct ip_tunnel_parm p;
149 		struct in_device  *in_dev;
150 
151 		memset(&p, 0, sizeof(p));
152 		p.iph.daddr = v->vifc_rmt_addr.s_addr;
153 		p.iph.saddr = v->vifc_lcl_addr.s_addr;
154 		p.iph.version = 4;
155 		p.iph.ihl = 5;
156 		p.iph.protocol = IPPROTO_IPIP;
157 		sprintf(p.name, "dvmrp%d", v->vifc_vifi);
158 		ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
159 
160 		if (ops->ndo_do_ioctl) {
161 			mm_segment_t oldfs = get_fs();
162 
163 			set_fs(KERNEL_DS);
164 			err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
165 			set_fs(oldfs);
166 		} else
167 			err = -EOPNOTSUPP;
168 
169 		dev = NULL;
170 
171 		if (err == 0 &&
172 		    (dev = __dev_get_by_name(net, p.name)) != NULL) {
173 			dev->flags |= IFF_MULTICAST;
174 
175 			in_dev = __in_dev_get_rtnl(dev);
176 			if (in_dev == NULL)
177 				goto failure;
178 
179 			ipv4_devconf_setall(in_dev);
180 			IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
181 
182 			if (dev_open(dev))
183 				goto failure;
184 			dev_hold(dev);
185 		}
186 	}
187 	return dev;
188 
189 failure:
190 	/* allow the register to be completed before unregistering. */
191 	rtnl_unlock();
192 	rtnl_lock();
193 
194 	unregister_netdevice(dev);
195 	return NULL;
196 }
197 
198 #ifdef CONFIG_IP_PIMSM
199 
200 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
201 {
202 	struct net *net = dev_net(dev);
203 
204 	read_lock(&mrt_lock);
205 	dev->stats.tx_bytes += skb->len;
206 	dev->stats.tx_packets++;
207 	ipmr_cache_report(net, skb, net->ipv4.mroute_reg_vif_num,
208 			  IGMPMSG_WHOLEPKT);
209 	read_unlock(&mrt_lock);
210 	kfree_skb(skb);
211 	return NETDEV_TX_OK;
212 }
213 
214 static const struct net_device_ops reg_vif_netdev_ops = {
215 	.ndo_start_xmit	= reg_vif_xmit,
216 };
217 
218 static void reg_vif_setup(struct net_device *dev)
219 {
220 	dev->type		= ARPHRD_PIMREG;
221 	dev->mtu		= ETH_DATA_LEN - sizeof(struct iphdr) - 8;
222 	dev->flags		= IFF_NOARP;
223 	dev->netdev_ops		= &reg_vif_netdev_ops,
224 	dev->destructor		= free_netdev;
225 	dev->features		|= NETIF_F_NETNS_LOCAL;
226 }
227 
228 static struct net_device *ipmr_reg_vif(struct net *net)
229 {
230 	struct net_device *dev;
231 	struct in_device *in_dev;
232 
233 	dev = alloc_netdev(0, "pimreg", reg_vif_setup);
234 
235 	if (dev == NULL)
236 		return NULL;
237 
238 	dev_net_set(dev, net);
239 
240 	if (register_netdevice(dev)) {
241 		free_netdev(dev);
242 		return NULL;
243 	}
244 	dev->iflink = 0;
245 
246 	rcu_read_lock();
247 	if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
248 		rcu_read_unlock();
249 		goto failure;
250 	}
251 
252 	ipv4_devconf_setall(in_dev);
253 	IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
254 	rcu_read_unlock();
255 
256 	if (dev_open(dev))
257 		goto failure;
258 
259 	dev_hold(dev);
260 
261 	return dev;
262 
263 failure:
264 	/* allow the register to be completed before unregistering. */
265 	rtnl_unlock();
266 	rtnl_lock();
267 
268 	unregister_netdevice(dev);
269 	return NULL;
270 }
271 #endif
272 
273 /*
274  *	Delete a VIF entry
275  *	@notify: Set to 1, if the caller is a notifier_call
276  */
277 
278 static int vif_delete(struct net *net, int vifi, int notify)
279 {
280 	struct vif_device *v;
281 	struct net_device *dev;
282 	struct in_device *in_dev;
283 
284 	if (vifi < 0 || vifi >= net->ipv4.maxvif)
285 		return -EADDRNOTAVAIL;
286 
287 	v = &net->ipv4.vif_table[vifi];
288 
289 	write_lock_bh(&mrt_lock);
290 	dev = v->dev;
291 	v->dev = NULL;
292 
293 	if (!dev) {
294 		write_unlock_bh(&mrt_lock);
295 		return -EADDRNOTAVAIL;
296 	}
297 
298 #ifdef CONFIG_IP_PIMSM
299 	if (vifi == net->ipv4.mroute_reg_vif_num)
300 		net->ipv4.mroute_reg_vif_num = -1;
301 #endif
302 
303 	if (vifi+1 == net->ipv4.maxvif) {
304 		int tmp;
305 		for (tmp=vifi-1; tmp>=0; tmp--) {
306 			if (VIF_EXISTS(net, tmp))
307 				break;
308 		}
309 		net->ipv4.maxvif = tmp+1;
310 	}
311 
312 	write_unlock_bh(&mrt_lock);
313 
314 	dev_set_allmulti(dev, -1);
315 
316 	if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
317 		IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
318 		ip_rt_multicast_event(in_dev);
319 	}
320 
321 	if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
322 		unregister_netdevice(dev);
323 
324 	dev_put(dev);
325 	return 0;
326 }
327 
328 static inline void ipmr_cache_free(struct mfc_cache *c)
329 {
330 	release_net(mfc_net(c));
331 	kmem_cache_free(mrt_cachep, c);
332 }
333 
334 /* Destroy an unresolved cache entry, killing queued skbs
335    and reporting error to netlink readers.
336  */
337 
338 static void ipmr_destroy_unres(struct mfc_cache *c)
339 {
340 	struct sk_buff *skb;
341 	struct nlmsgerr *e;
342 	struct net *net = mfc_net(c);
343 
344 	atomic_dec(&net->ipv4.cache_resolve_queue_len);
345 
346 	while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
347 		if (ip_hdr(skb)->version == 0) {
348 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
349 			nlh->nlmsg_type = NLMSG_ERROR;
350 			nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
351 			skb_trim(skb, nlh->nlmsg_len);
352 			e = NLMSG_DATA(nlh);
353 			e->error = -ETIMEDOUT;
354 			memset(&e->msg, 0, sizeof(e->msg));
355 
356 			rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
357 		} else
358 			kfree_skb(skb);
359 	}
360 
361 	ipmr_cache_free(c);
362 }
363 
364 
365 /* Single timer process for all the unresolved queue. */
366 
367 static void ipmr_expire_process(unsigned long dummy)
368 {
369 	unsigned long now;
370 	unsigned long expires;
371 	struct mfc_cache *c, **cp;
372 
373 	if (!spin_trylock(&mfc_unres_lock)) {
374 		mod_timer(&ipmr_expire_timer, jiffies+HZ/10);
375 		return;
376 	}
377 
378 	if (mfc_unres_queue == NULL)
379 		goto out;
380 
381 	now = jiffies;
382 	expires = 10*HZ;
383 	cp = &mfc_unres_queue;
384 
385 	while ((c=*cp) != NULL) {
386 		if (time_after(c->mfc_un.unres.expires, now)) {
387 			unsigned long interval = c->mfc_un.unres.expires - now;
388 			if (interval < expires)
389 				expires = interval;
390 			cp = &c->next;
391 			continue;
392 		}
393 
394 		*cp = c->next;
395 
396 		ipmr_destroy_unres(c);
397 	}
398 
399 	if (mfc_unres_queue != NULL)
400 		mod_timer(&ipmr_expire_timer, jiffies + expires);
401 
402 out:
403 	spin_unlock(&mfc_unres_lock);
404 }
405 
406 /* Fill oifs list. It is called under write locked mrt_lock. */
407 
408 static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls)
409 {
410 	int vifi;
411 	struct net *net = mfc_net(cache);
412 
413 	cache->mfc_un.res.minvif = MAXVIFS;
414 	cache->mfc_un.res.maxvif = 0;
415 	memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
416 
417 	for (vifi = 0; vifi < net->ipv4.maxvif; vifi++) {
418 		if (VIF_EXISTS(net, vifi) &&
419 		    ttls[vifi] && ttls[vifi] < 255) {
420 			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
421 			if (cache->mfc_un.res.minvif > vifi)
422 				cache->mfc_un.res.minvif = vifi;
423 			if (cache->mfc_un.res.maxvif <= vifi)
424 				cache->mfc_un.res.maxvif = vifi + 1;
425 		}
426 	}
427 }
428 
429 static int vif_add(struct net *net, struct vifctl *vifc, int mrtsock)
430 {
431 	int vifi = vifc->vifc_vifi;
432 	struct vif_device *v = &net->ipv4.vif_table[vifi];
433 	struct net_device *dev;
434 	struct in_device *in_dev;
435 	int err;
436 
437 	/* Is vif busy ? */
438 	if (VIF_EXISTS(net, vifi))
439 		return -EADDRINUSE;
440 
441 	switch (vifc->vifc_flags) {
442 #ifdef CONFIG_IP_PIMSM
443 	case VIFF_REGISTER:
444 		/*
445 		 * Special Purpose VIF in PIM
446 		 * All the packets will be sent to the daemon
447 		 */
448 		if (net->ipv4.mroute_reg_vif_num >= 0)
449 			return -EADDRINUSE;
450 		dev = ipmr_reg_vif(net);
451 		if (!dev)
452 			return -ENOBUFS;
453 		err = dev_set_allmulti(dev, 1);
454 		if (err) {
455 			unregister_netdevice(dev);
456 			dev_put(dev);
457 			return err;
458 		}
459 		break;
460 #endif
461 	case VIFF_TUNNEL:
462 		dev = ipmr_new_tunnel(net, vifc);
463 		if (!dev)
464 			return -ENOBUFS;
465 		err = dev_set_allmulti(dev, 1);
466 		if (err) {
467 			ipmr_del_tunnel(dev, vifc);
468 			dev_put(dev);
469 			return err;
470 		}
471 		break;
472 	case 0:
473 		dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
474 		if (!dev)
475 			return -EADDRNOTAVAIL;
476 		err = dev_set_allmulti(dev, 1);
477 		if (err) {
478 			dev_put(dev);
479 			return err;
480 		}
481 		break;
482 	default:
483 		return -EINVAL;
484 	}
485 
486 	if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
487 		return -EADDRNOTAVAIL;
488 	IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
489 	ip_rt_multicast_event(in_dev);
490 
491 	/*
492 	 *	Fill in the VIF structures
493 	 */
494 	v->rate_limit = vifc->vifc_rate_limit;
495 	v->local = vifc->vifc_lcl_addr.s_addr;
496 	v->remote = vifc->vifc_rmt_addr.s_addr;
497 	v->flags = vifc->vifc_flags;
498 	if (!mrtsock)
499 		v->flags |= VIFF_STATIC;
500 	v->threshold = vifc->vifc_threshold;
501 	v->bytes_in = 0;
502 	v->bytes_out = 0;
503 	v->pkt_in = 0;
504 	v->pkt_out = 0;
505 	v->link = dev->ifindex;
506 	if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
507 		v->link = dev->iflink;
508 
509 	/* And finish update writing critical data */
510 	write_lock_bh(&mrt_lock);
511 	v->dev = dev;
512 #ifdef CONFIG_IP_PIMSM
513 	if (v->flags&VIFF_REGISTER)
514 		net->ipv4.mroute_reg_vif_num = vifi;
515 #endif
516 	if (vifi+1 > net->ipv4.maxvif)
517 		net->ipv4.maxvif = vifi+1;
518 	write_unlock_bh(&mrt_lock);
519 	return 0;
520 }
521 
522 static struct mfc_cache *ipmr_cache_find(struct net *net,
523 					 __be32 origin,
524 					 __be32 mcastgrp)
525 {
526 	int line = MFC_HASH(mcastgrp, origin);
527 	struct mfc_cache *c;
528 
529 	for (c = net->ipv4.mfc_cache_array[line]; c; c = c->next) {
530 		if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
531 			break;
532 	}
533 	return c;
534 }
535 
536 /*
537  *	Allocate a multicast cache entry
538  */
539 static struct mfc_cache *ipmr_cache_alloc(struct net *net)
540 {
541 	struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
542 	if (c == NULL)
543 		return NULL;
544 	c->mfc_un.res.minvif = MAXVIFS;
545 	mfc_net_set(c, net);
546 	return c;
547 }
548 
549 static struct mfc_cache *ipmr_cache_alloc_unres(struct net *net)
550 {
551 	struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
552 	if (c == NULL)
553 		return NULL;
554 	skb_queue_head_init(&c->mfc_un.unres.unresolved);
555 	c->mfc_un.unres.expires = jiffies + 10*HZ;
556 	mfc_net_set(c, net);
557 	return c;
558 }
559 
560 /*
561  *	A cache entry has gone into a resolved state from queued
562  */
563 
564 static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
565 {
566 	struct sk_buff *skb;
567 	struct nlmsgerr *e;
568 
569 	/*
570 	 *	Play the pending entries through our router
571 	 */
572 
573 	while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
574 		if (ip_hdr(skb)->version == 0) {
575 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
576 
577 			if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
578 				nlh->nlmsg_len = (skb_tail_pointer(skb) -
579 						  (u8 *)nlh);
580 			} else {
581 				nlh->nlmsg_type = NLMSG_ERROR;
582 				nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
583 				skb_trim(skb, nlh->nlmsg_len);
584 				e = NLMSG_DATA(nlh);
585 				e->error = -EMSGSIZE;
586 				memset(&e->msg, 0, sizeof(e->msg));
587 			}
588 
589 			rtnl_unicast(skb, mfc_net(c), NETLINK_CB(skb).pid);
590 		} else
591 			ip_mr_forward(skb, c, 0);
592 	}
593 }
594 
595 /*
596  *	Bounce a cache query up to mrouted. We could use netlink for this but mrouted
597  *	expects the following bizarre scheme.
598  *
599  *	Called under mrt_lock.
600  */
601 
602 static int ipmr_cache_report(struct net *net,
603 			     struct sk_buff *pkt, vifi_t vifi, int assert)
604 {
605 	struct sk_buff *skb;
606 	const int ihl = ip_hdrlen(pkt);
607 	struct igmphdr *igmp;
608 	struct igmpmsg *msg;
609 	int ret;
610 
611 #ifdef CONFIG_IP_PIMSM
612 	if (assert == IGMPMSG_WHOLEPKT)
613 		skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
614 	else
615 #endif
616 		skb = alloc_skb(128, GFP_ATOMIC);
617 
618 	if (!skb)
619 		return -ENOBUFS;
620 
621 #ifdef CONFIG_IP_PIMSM
622 	if (assert == IGMPMSG_WHOLEPKT) {
623 		/* Ugly, but we have no choice with this interface.
624 		   Duplicate old header, fix ihl, length etc.
625 		   And all this only to mangle msg->im_msgtype and
626 		   to set msg->im_mbz to "mbz" :-)
627 		 */
628 		skb_push(skb, sizeof(struct iphdr));
629 		skb_reset_network_header(skb);
630 		skb_reset_transport_header(skb);
631 		msg = (struct igmpmsg *)skb_network_header(skb);
632 		memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
633 		msg->im_msgtype = IGMPMSG_WHOLEPKT;
634 		msg->im_mbz = 0;
635 		msg->im_vif = net->ipv4.mroute_reg_vif_num;
636 		ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
637 		ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
638 					     sizeof(struct iphdr));
639 	} else
640 #endif
641 	{
642 
643 	/*
644 	 *	Copy the IP header
645 	 */
646 
647 	skb->network_header = skb->tail;
648 	skb_put(skb, ihl);
649 	skb_copy_to_linear_data(skb, pkt->data, ihl);
650 	ip_hdr(skb)->protocol = 0;			/* Flag to the kernel this is a route add */
651 	msg = (struct igmpmsg *)skb_network_header(skb);
652 	msg->im_vif = vifi;
653 	skb_dst_set(skb, dst_clone(skb_dst(pkt)));
654 
655 	/*
656 	 *	Add our header
657 	 */
658 
659 	igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
660 	igmp->type	=
661 	msg->im_msgtype = assert;
662 	igmp->code 	=	0;
663 	ip_hdr(skb)->tot_len = htons(skb->len);			/* Fix the length */
664 	skb->transport_header = skb->network_header;
665 	}
666 
667 	if (net->ipv4.mroute_sk == NULL) {
668 		kfree_skb(skb);
669 		return -EINVAL;
670 	}
671 
672 	/*
673 	 *	Deliver to mrouted
674 	 */
675 	ret = sock_queue_rcv_skb(net->ipv4.mroute_sk, skb);
676 	if (ret < 0) {
677 		if (net_ratelimit())
678 			printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
679 		kfree_skb(skb);
680 	}
681 
682 	return ret;
683 }
684 
685 /*
686  *	Queue a packet for resolution. It gets locked cache entry!
687  */
688 
689 static int
690 ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
691 {
692 	int err;
693 	struct mfc_cache *c;
694 	const struct iphdr *iph = ip_hdr(skb);
695 
696 	spin_lock_bh(&mfc_unres_lock);
697 	for (c=mfc_unres_queue; c; c=c->next) {
698 		if (net_eq(mfc_net(c), net) &&
699 		    c->mfc_mcastgrp == iph->daddr &&
700 		    c->mfc_origin == iph->saddr)
701 			break;
702 	}
703 
704 	if (c == NULL) {
705 		/*
706 		 *	Create a new entry if allowable
707 		 */
708 
709 		if (atomic_read(&net->ipv4.cache_resolve_queue_len) >= 10 ||
710 		    (c = ipmr_cache_alloc_unres(net)) == NULL) {
711 			spin_unlock_bh(&mfc_unres_lock);
712 
713 			kfree_skb(skb);
714 			return -ENOBUFS;
715 		}
716 
717 		/*
718 		 *	Fill in the new cache entry
719 		 */
720 		c->mfc_parent	= -1;
721 		c->mfc_origin	= iph->saddr;
722 		c->mfc_mcastgrp	= iph->daddr;
723 
724 		/*
725 		 *	Reflect first query at mrouted.
726 		 */
727 		err = ipmr_cache_report(net, skb, vifi, IGMPMSG_NOCACHE);
728 		if (err < 0) {
729 			/* If the report failed throw the cache entry
730 			   out - Brad Parker
731 			 */
732 			spin_unlock_bh(&mfc_unres_lock);
733 
734 			ipmr_cache_free(c);
735 			kfree_skb(skb);
736 			return err;
737 		}
738 
739 		atomic_inc(&net->ipv4.cache_resolve_queue_len);
740 		c->next = mfc_unres_queue;
741 		mfc_unres_queue = c;
742 
743 		mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires);
744 	}
745 
746 	/*
747 	 *	See if we can append the packet
748 	 */
749 	if (c->mfc_un.unres.unresolved.qlen>3) {
750 		kfree_skb(skb);
751 		err = -ENOBUFS;
752 	} else {
753 		skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
754 		err = 0;
755 	}
756 
757 	spin_unlock_bh(&mfc_unres_lock);
758 	return err;
759 }
760 
761 /*
762  *	MFC cache manipulation by user space mroute daemon
763  */
764 
765 static int ipmr_mfc_delete(struct net *net, struct mfcctl *mfc)
766 {
767 	int line;
768 	struct mfc_cache *c, **cp;
769 
770 	line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
771 
772 	for (cp = &net->ipv4.mfc_cache_array[line];
773 	     (c = *cp) != NULL; cp = &c->next) {
774 		if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
775 		    c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
776 			write_lock_bh(&mrt_lock);
777 			*cp = c->next;
778 			write_unlock_bh(&mrt_lock);
779 
780 			ipmr_cache_free(c);
781 			return 0;
782 		}
783 	}
784 	return -ENOENT;
785 }
786 
787 static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
788 {
789 	int line;
790 	struct mfc_cache *uc, *c, **cp;
791 
792 	line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
793 
794 	for (cp = &net->ipv4.mfc_cache_array[line];
795 	     (c = *cp) != NULL; cp = &c->next) {
796 		if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
797 		    c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
798 			break;
799 	}
800 
801 	if (c != NULL) {
802 		write_lock_bh(&mrt_lock);
803 		c->mfc_parent = mfc->mfcc_parent;
804 		ipmr_update_thresholds(c, mfc->mfcc_ttls);
805 		if (!mrtsock)
806 			c->mfc_flags |= MFC_STATIC;
807 		write_unlock_bh(&mrt_lock);
808 		return 0;
809 	}
810 
811 	if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
812 		return -EINVAL;
813 
814 	c = ipmr_cache_alloc(net);
815 	if (c == NULL)
816 		return -ENOMEM;
817 
818 	c->mfc_origin = mfc->mfcc_origin.s_addr;
819 	c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
820 	c->mfc_parent = mfc->mfcc_parent;
821 	ipmr_update_thresholds(c, mfc->mfcc_ttls);
822 	if (!mrtsock)
823 		c->mfc_flags |= MFC_STATIC;
824 
825 	write_lock_bh(&mrt_lock);
826 	c->next = net->ipv4.mfc_cache_array[line];
827 	net->ipv4.mfc_cache_array[line] = c;
828 	write_unlock_bh(&mrt_lock);
829 
830 	/*
831 	 *	Check to see if we resolved a queued list. If so we
832 	 *	need to send on the frames and tidy up.
833 	 */
834 	spin_lock_bh(&mfc_unres_lock);
835 	for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
836 	     cp = &uc->next) {
837 		if (net_eq(mfc_net(uc), net) &&
838 		    uc->mfc_origin == c->mfc_origin &&
839 		    uc->mfc_mcastgrp == c->mfc_mcastgrp) {
840 			*cp = uc->next;
841 			atomic_dec(&net->ipv4.cache_resolve_queue_len);
842 			break;
843 		}
844 	}
845 	if (mfc_unres_queue == NULL)
846 		del_timer(&ipmr_expire_timer);
847 	spin_unlock_bh(&mfc_unres_lock);
848 
849 	if (uc) {
850 		ipmr_cache_resolve(uc, c);
851 		ipmr_cache_free(uc);
852 	}
853 	return 0;
854 }
855 
856 /*
857  *	Close the multicast socket, and clear the vif tables etc
858  */
859 
860 static void mroute_clean_tables(struct net *net)
861 {
862 	int i;
863 
864 	/*
865 	 *	Shut down all active vif entries
866 	 */
867 	for (i = 0; i < net->ipv4.maxvif; i++) {
868 		if (!(net->ipv4.vif_table[i].flags&VIFF_STATIC))
869 			vif_delete(net, i, 0);
870 	}
871 
872 	/*
873 	 *	Wipe the cache
874 	 */
875 	for (i=0; i<MFC_LINES; i++) {
876 		struct mfc_cache *c, **cp;
877 
878 		cp = &net->ipv4.mfc_cache_array[i];
879 		while ((c = *cp) != NULL) {
880 			if (c->mfc_flags&MFC_STATIC) {
881 				cp = &c->next;
882 				continue;
883 			}
884 			write_lock_bh(&mrt_lock);
885 			*cp = c->next;
886 			write_unlock_bh(&mrt_lock);
887 
888 			ipmr_cache_free(c);
889 		}
890 	}
891 
892 	if (atomic_read(&net->ipv4.cache_resolve_queue_len) != 0) {
893 		struct mfc_cache *c, **cp;
894 
895 		spin_lock_bh(&mfc_unres_lock);
896 		cp = &mfc_unres_queue;
897 		while ((c = *cp) != NULL) {
898 			if (!net_eq(mfc_net(c), net)) {
899 				cp = &c->next;
900 				continue;
901 			}
902 			*cp = c->next;
903 
904 			ipmr_destroy_unres(c);
905 		}
906 		spin_unlock_bh(&mfc_unres_lock);
907 	}
908 }
909 
910 static void mrtsock_destruct(struct sock *sk)
911 {
912 	struct net *net = sock_net(sk);
913 
914 	rtnl_lock();
915 	if (sk == net->ipv4.mroute_sk) {
916 		IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
917 
918 		write_lock_bh(&mrt_lock);
919 		net->ipv4.mroute_sk = NULL;
920 		write_unlock_bh(&mrt_lock);
921 
922 		mroute_clean_tables(net);
923 	}
924 	rtnl_unlock();
925 }
926 
927 /*
928  *	Socket options and virtual interface manipulation. The whole
929  *	virtual interface system is a complete heap, but unfortunately
930  *	that's how BSD mrouted happens to think. Maybe one day with a proper
931  *	MOSPF/PIM router set up we can clean this up.
932  */
933 
934 int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
935 {
936 	int ret;
937 	struct vifctl vif;
938 	struct mfcctl mfc;
939 	struct net *net = sock_net(sk);
940 
941 	if (optname != MRT_INIT) {
942 		if (sk != net->ipv4.mroute_sk && !capable(CAP_NET_ADMIN))
943 			return -EACCES;
944 	}
945 
946 	switch (optname) {
947 	case MRT_INIT:
948 		if (sk->sk_type != SOCK_RAW ||
949 		    inet_sk(sk)->num != IPPROTO_IGMP)
950 			return -EOPNOTSUPP;
951 		if (optlen != sizeof(int))
952 			return -ENOPROTOOPT;
953 
954 		rtnl_lock();
955 		if (net->ipv4.mroute_sk) {
956 			rtnl_unlock();
957 			return -EADDRINUSE;
958 		}
959 
960 		ret = ip_ra_control(sk, 1, mrtsock_destruct);
961 		if (ret == 0) {
962 			write_lock_bh(&mrt_lock);
963 			net->ipv4.mroute_sk = sk;
964 			write_unlock_bh(&mrt_lock);
965 
966 			IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
967 		}
968 		rtnl_unlock();
969 		return ret;
970 	case MRT_DONE:
971 		if (sk != net->ipv4.mroute_sk)
972 			return -EACCES;
973 		return ip_ra_control(sk, 0, NULL);
974 	case MRT_ADD_VIF:
975 	case MRT_DEL_VIF:
976 		if (optlen != sizeof(vif))
977 			return -EINVAL;
978 		if (copy_from_user(&vif, optval, sizeof(vif)))
979 			return -EFAULT;
980 		if (vif.vifc_vifi >= MAXVIFS)
981 			return -ENFILE;
982 		rtnl_lock();
983 		if (optname == MRT_ADD_VIF) {
984 			ret = vif_add(net, &vif, sk == net->ipv4.mroute_sk);
985 		} else {
986 			ret = vif_delete(net, vif.vifc_vifi, 0);
987 		}
988 		rtnl_unlock();
989 		return ret;
990 
991 		/*
992 		 *	Manipulate the forwarding caches. These live
993 		 *	in a sort of kernel/user symbiosis.
994 		 */
995 	case MRT_ADD_MFC:
996 	case MRT_DEL_MFC:
997 		if (optlen != sizeof(mfc))
998 			return -EINVAL;
999 		if (copy_from_user(&mfc, optval, sizeof(mfc)))
1000 			return -EFAULT;
1001 		rtnl_lock();
1002 		if (optname == MRT_DEL_MFC)
1003 			ret = ipmr_mfc_delete(net, &mfc);
1004 		else
1005 			ret = ipmr_mfc_add(net, &mfc, sk == net->ipv4.mroute_sk);
1006 		rtnl_unlock();
1007 		return ret;
1008 		/*
1009 		 *	Control PIM assert.
1010 		 */
1011 	case MRT_ASSERT:
1012 	{
1013 		int v;
1014 		if (get_user(v,(int __user *)optval))
1015 			return -EFAULT;
1016 		net->ipv4.mroute_do_assert = (v) ? 1 : 0;
1017 		return 0;
1018 	}
1019 #ifdef CONFIG_IP_PIMSM
1020 	case MRT_PIM:
1021 	{
1022 		int v;
1023 
1024 		if (get_user(v,(int __user *)optval))
1025 			return -EFAULT;
1026 		v = (v) ? 1 : 0;
1027 
1028 		rtnl_lock();
1029 		ret = 0;
1030 		if (v != net->ipv4.mroute_do_pim) {
1031 			net->ipv4.mroute_do_pim = v;
1032 			net->ipv4.mroute_do_assert = v;
1033 		}
1034 		rtnl_unlock();
1035 		return ret;
1036 	}
1037 #endif
1038 	/*
1039 	 *	Spurious command, or MRT_VERSION which you cannot
1040 	 *	set.
1041 	 */
1042 	default:
1043 		return -ENOPROTOOPT;
1044 	}
1045 }
1046 
1047 /*
1048  *	Getsock opt support for the multicast routing system.
1049  */
1050 
1051 int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
1052 {
1053 	int olr;
1054 	int val;
1055 	struct net *net = sock_net(sk);
1056 
1057 	if (optname != MRT_VERSION &&
1058 #ifdef CONFIG_IP_PIMSM
1059 	   optname!=MRT_PIM &&
1060 #endif
1061 	   optname!=MRT_ASSERT)
1062 		return -ENOPROTOOPT;
1063 
1064 	if (get_user(olr, optlen))
1065 		return -EFAULT;
1066 
1067 	olr = min_t(unsigned int, olr, sizeof(int));
1068 	if (olr < 0)
1069 		return -EINVAL;
1070 
1071 	if (put_user(olr, optlen))
1072 		return -EFAULT;
1073 	if (optname == MRT_VERSION)
1074 		val = 0x0305;
1075 #ifdef CONFIG_IP_PIMSM
1076 	else if (optname == MRT_PIM)
1077 		val = net->ipv4.mroute_do_pim;
1078 #endif
1079 	else
1080 		val = net->ipv4.mroute_do_assert;
1081 	if (copy_to_user(optval, &val, olr))
1082 		return -EFAULT;
1083 	return 0;
1084 }
1085 
1086 /*
1087  *	The IP multicast ioctl support routines.
1088  */
1089 
1090 int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1091 {
1092 	struct sioc_sg_req sr;
1093 	struct sioc_vif_req vr;
1094 	struct vif_device *vif;
1095 	struct mfc_cache *c;
1096 	struct net *net = sock_net(sk);
1097 
1098 	switch (cmd) {
1099 	case SIOCGETVIFCNT:
1100 		if (copy_from_user(&vr, arg, sizeof(vr)))
1101 			return -EFAULT;
1102 		if (vr.vifi >= net->ipv4.maxvif)
1103 			return -EINVAL;
1104 		read_lock(&mrt_lock);
1105 		vif = &net->ipv4.vif_table[vr.vifi];
1106 		if (VIF_EXISTS(net, vr.vifi)) {
1107 			vr.icount = vif->pkt_in;
1108 			vr.ocount = vif->pkt_out;
1109 			vr.ibytes = vif->bytes_in;
1110 			vr.obytes = vif->bytes_out;
1111 			read_unlock(&mrt_lock);
1112 
1113 			if (copy_to_user(arg, &vr, sizeof(vr)))
1114 				return -EFAULT;
1115 			return 0;
1116 		}
1117 		read_unlock(&mrt_lock);
1118 		return -EADDRNOTAVAIL;
1119 	case SIOCGETSGCNT:
1120 		if (copy_from_user(&sr, arg, sizeof(sr)))
1121 			return -EFAULT;
1122 
1123 		read_lock(&mrt_lock);
1124 		c = ipmr_cache_find(net, sr.src.s_addr, sr.grp.s_addr);
1125 		if (c) {
1126 			sr.pktcnt = c->mfc_un.res.pkt;
1127 			sr.bytecnt = c->mfc_un.res.bytes;
1128 			sr.wrong_if = c->mfc_un.res.wrong_if;
1129 			read_unlock(&mrt_lock);
1130 
1131 			if (copy_to_user(arg, &sr, sizeof(sr)))
1132 				return -EFAULT;
1133 			return 0;
1134 		}
1135 		read_unlock(&mrt_lock);
1136 		return -EADDRNOTAVAIL;
1137 	default:
1138 		return -ENOIOCTLCMD;
1139 	}
1140 }
1141 
1142 
1143 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1144 {
1145 	struct net_device *dev = ptr;
1146 	struct net *net = dev_net(dev);
1147 	struct vif_device *v;
1148 	int ct;
1149 
1150 	if (!net_eq(dev_net(dev), net))
1151 		return NOTIFY_DONE;
1152 
1153 	if (event != NETDEV_UNREGISTER)
1154 		return NOTIFY_DONE;
1155 	v = &net->ipv4.vif_table[0];
1156 	for (ct = 0; ct < net->ipv4.maxvif; ct++, v++) {
1157 		if (v->dev == dev)
1158 			vif_delete(net, ct, 1);
1159 	}
1160 	return NOTIFY_DONE;
1161 }
1162 
1163 
1164 static struct notifier_block ip_mr_notifier = {
1165 	.notifier_call = ipmr_device_event,
1166 };
1167 
1168 /*
1169  * 	Encapsulate a packet by attaching a valid IPIP header to it.
1170  *	This avoids tunnel drivers and other mess and gives us the speed so
1171  *	important for multicast video.
1172  */
1173 
1174 static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1175 {
1176 	struct iphdr *iph;
1177 	struct iphdr *old_iph = ip_hdr(skb);
1178 
1179 	skb_push(skb, sizeof(struct iphdr));
1180 	skb->transport_header = skb->network_header;
1181 	skb_reset_network_header(skb);
1182 	iph = ip_hdr(skb);
1183 
1184 	iph->version	= 	4;
1185 	iph->tos	=	old_iph->tos;
1186 	iph->ttl	=	old_iph->ttl;
1187 	iph->frag_off	=	0;
1188 	iph->daddr	=	daddr;
1189 	iph->saddr	=	saddr;
1190 	iph->protocol	=	IPPROTO_IPIP;
1191 	iph->ihl	=	5;
1192 	iph->tot_len	=	htons(skb->len);
1193 	ip_select_ident(iph, skb_dst(skb), NULL);
1194 	ip_send_check(iph);
1195 
1196 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1197 	nf_reset(skb);
1198 }
1199 
1200 static inline int ipmr_forward_finish(struct sk_buff *skb)
1201 {
1202 	struct ip_options * opt	= &(IPCB(skb)->opt);
1203 
1204 	IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
1205 
1206 	if (unlikely(opt->optlen))
1207 		ip_forward_options(skb);
1208 
1209 	return dst_output(skb);
1210 }
1211 
1212 /*
1213  *	Processing handlers for ipmr_forward
1214  */
1215 
1216 static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1217 {
1218 	struct net *net = mfc_net(c);
1219 	const struct iphdr *iph = ip_hdr(skb);
1220 	struct vif_device *vif = &net->ipv4.vif_table[vifi];
1221 	struct net_device *dev;
1222 	struct rtable *rt;
1223 	int    encap = 0;
1224 
1225 	if (vif->dev == NULL)
1226 		goto out_free;
1227 
1228 #ifdef CONFIG_IP_PIMSM
1229 	if (vif->flags & VIFF_REGISTER) {
1230 		vif->pkt_out++;
1231 		vif->bytes_out += skb->len;
1232 		vif->dev->stats.tx_bytes += skb->len;
1233 		vif->dev->stats.tx_packets++;
1234 		ipmr_cache_report(net, skb, vifi, IGMPMSG_WHOLEPKT);
1235 		goto out_free;
1236 	}
1237 #endif
1238 
1239 	if (vif->flags&VIFF_TUNNEL) {
1240 		struct flowi fl = { .oif = vif->link,
1241 				    .nl_u = { .ip4_u =
1242 					      { .daddr = vif->remote,
1243 						.saddr = vif->local,
1244 						.tos = RT_TOS(iph->tos) } },
1245 				    .proto = IPPROTO_IPIP };
1246 		if (ip_route_output_key(net, &rt, &fl))
1247 			goto out_free;
1248 		encap = sizeof(struct iphdr);
1249 	} else {
1250 		struct flowi fl = { .oif = vif->link,
1251 				    .nl_u = { .ip4_u =
1252 					      { .daddr = iph->daddr,
1253 						.tos = RT_TOS(iph->tos) } },
1254 				    .proto = IPPROTO_IPIP };
1255 		if (ip_route_output_key(net, &rt, &fl))
1256 			goto out_free;
1257 	}
1258 
1259 	dev = rt->u.dst.dev;
1260 
1261 	if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1262 		/* Do not fragment multicasts. Alas, IPv4 does not
1263 		   allow to send ICMP, so that packets will disappear
1264 		   to blackhole.
1265 		 */
1266 
1267 		IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
1268 		ip_rt_put(rt);
1269 		goto out_free;
1270 	}
1271 
1272 	encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1273 
1274 	if (skb_cow(skb, encap)) {
1275 		ip_rt_put(rt);
1276 		goto out_free;
1277 	}
1278 
1279 	vif->pkt_out++;
1280 	vif->bytes_out += skb->len;
1281 
1282 	skb_dst_drop(skb);
1283 	skb_dst_set(skb, &rt->u.dst);
1284 	ip_decrease_ttl(ip_hdr(skb));
1285 
1286 	/* FIXME: forward and output firewalls used to be called here.
1287 	 * What do we do with netfilter? -- RR */
1288 	if (vif->flags & VIFF_TUNNEL) {
1289 		ip_encap(skb, vif->local, vif->remote);
1290 		/* FIXME: extra output firewall step used to be here. --RR */
1291 		vif->dev->stats.tx_packets++;
1292 		vif->dev->stats.tx_bytes += skb->len;
1293 	}
1294 
1295 	IPCB(skb)->flags |= IPSKB_FORWARDED;
1296 
1297 	/*
1298 	 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1299 	 * not only before forwarding, but after forwarding on all output
1300 	 * interfaces. It is clear, if mrouter runs a multicasting
1301 	 * program, it should receive packets not depending to what interface
1302 	 * program is joined.
1303 	 * If we will not make it, the program will have to join on all
1304 	 * interfaces. On the other hand, multihoming host (or router, but
1305 	 * not mrouter) cannot join to more than one interface - it will
1306 	 * result in receiving multiple packets.
1307 	 */
1308 	NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, dev,
1309 		ipmr_forward_finish);
1310 	return;
1311 
1312 out_free:
1313 	kfree_skb(skb);
1314 	return;
1315 }
1316 
1317 static int ipmr_find_vif(struct net_device *dev)
1318 {
1319 	struct net *net = dev_net(dev);
1320 	int ct;
1321 	for (ct = net->ipv4.maxvif-1; ct >= 0; ct--) {
1322 		if (net->ipv4.vif_table[ct].dev == dev)
1323 			break;
1324 	}
1325 	return ct;
1326 }
1327 
1328 /* "local" means that we should preserve one skb (for local delivery) */
1329 
1330 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
1331 {
1332 	int psend = -1;
1333 	int vif, ct;
1334 	struct net *net = mfc_net(cache);
1335 
1336 	vif = cache->mfc_parent;
1337 	cache->mfc_un.res.pkt++;
1338 	cache->mfc_un.res.bytes += skb->len;
1339 
1340 	/*
1341 	 * Wrong interface: drop packet and (maybe) send PIM assert.
1342 	 */
1343 	if (net->ipv4.vif_table[vif].dev != skb->dev) {
1344 		int true_vifi;
1345 
1346 		if (skb_rtable(skb)->fl.iif == 0) {
1347 			/* It is our own packet, looped back.
1348 			   Very complicated situation...
1349 
1350 			   The best workaround until routing daemons will be
1351 			   fixed is not to redistribute packet, if it was
1352 			   send through wrong interface. It means, that
1353 			   multicast applications WILL NOT work for
1354 			   (S,G), which have default multicast route pointing
1355 			   to wrong oif. In any case, it is not a good
1356 			   idea to use multicasting applications on router.
1357 			 */
1358 			goto dont_forward;
1359 		}
1360 
1361 		cache->mfc_un.res.wrong_if++;
1362 		true_vifi = ipmr_find_vif(skb->dev);
1363 
1364 		if (true_vifi >= 0 && net->ipv4.mroute_do_assert &&
1365 		    /* pimsm uses asserts, when switching from RPT to SPT,
1366 		       so that we cannot check that packet arrived on an oif.
1367 		       It is bad, but otherwise we would need to move pretty
1368 		       large chunk of pimd to kernel. Ough... --ANK
1369 		     */
1370 		    (net->ipv4.mroute_do_pim ||
1371 		     cache->mfc_un.res.ttls[true_vifi] < 255) &&
1372 		    time_after(jiffies,
1373 			       cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1374 			cache->mfc_un.res.last_assert = jiffies;
1375 			ipmr_cache_report(net, skb, true_vifi, IGMPMSG_WRONGVIF);
1376 		}
1377 		goto dont_forward;
1378 	}
1379 
1380 	net->ipv4.vif_table[vif].pkt_in++;
1381 	net->ipv4.vif_table[vif].bytes_in += skb->len;
1382 
1383 	/*
1384 	 *	Forward the frame
1385 	 */
1386 	for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
1387 		if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
1388 			if (psend != -1) {
1389 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1390 				if (skb2)
1391 					ipmr_queue_xmit(skb2, cache, psend);
1392 			}
1393 			psend = ct;
1394 		}
1395 	}
1396 	if (psend != -1) {
1397 		if (local) {
1398 			struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1399 			if (skb2)
1400 				ipmr_queue_xmit(skb2, cache, psend);
1401 		} else {
1402 			ipmr_queue_xmit(skb, cache, psend);
1403 			return 0;
1404 		}
1405 	}
1406 
1407 dont_forward:
1408 	if (!local)
1409 		kfree_skb(skb);
1410 	return 0;
1411 }
1412 
1413 
1414 /*
1415  *	Multicast packets for forwarding arrive here
1416  */
1417 
1418 int ip_mr_input(struct sk_buff *skb)
1419 {
1420 	struct mfc_cache *cache;
1421 	struct net *net = dev_net(skb->dev);
1422 	int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
1423 
1424 	/* Packet is looped back after forward, it should not be
1425 	   forwarded second time, but still can be delivered locally.
1426 	 */
1427 	if (IPCB(skb)->flags&IPSKB_FORWARDED)
1428 		goto dont_forward;
1429 
1430 	if (!local) {
1431 		    if (IPCB(skb)->opt.router_alert) {
1432 			    if (ip_call_ra_chain(skb))
1433 				    return 0;
1434 		    } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
1435 			    /* IGMPv1 (and broken IGMPv2 implementations sort of
1436 			       Cisco IOS <= 11.2(8)) do not put router alert
1437 			       option to IGMP packets destined to routable
1438 			       groups. It is very bad, because it means
1439 			       that we can forward NO IGMP messages.
1440 			     */
1441 			    read_lock(&mrt_lock);
1442 			    if (net->ipv4.mroute_sk) {
1443 				    nf_reset(skb);
1444 				    raw_rcv(net->ipv4.mroute_sk, skb);
1445 				    read_unlock(&mrt_lock);
1446 				    return 0;
1447 			    }
1448 			    read_unlock(&mrt_lock);
1449 		    }
1450 	}
1451 
1452 	read_lock(&mrt_lock);
1453 	cache = ipmr_cache_find(net, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1454 
1455 	/*
1456 	 *	No usable cache entry
1457 	 */
1458 	if (cache == NULL) {
1459 		int vif;
1460 
1461 		if (local) {
1462 			struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1463 			ip_local_deliver(skb);
1464 			if (skb2 == NULL) {
1465 				read_unlock(&mrt_lock);
1466 				return -ENOBUFS;
1467 			}
1468 			skb = skb2;
1469 		}
1470 
1471 		vif = ipmr_find_vif(skb->dev);
1472 		if (vif >= 0) {
1473 			int err = ipmr_cache_unresolved(net, vif, skb);
1474 			read_unlock(&mrt_lock);
1475 
1476 			return err;
1477 		}
1478 		read_unlock(&mrt_lock);
1479 		kfree_skb(skb);
1480 		return -ENODEV;
1481 	}
1482 
1483 	ip_mr_forward(skb, cache, local);
1484 
1485 	read_unlock(&mrt_lock);
1486 
1487 	if (local)
1488 		return ip_local_deliver(skb);
1489 
1490 	return 0;
1491 
1492 dont_forward:
1493 	if (local)
1494 		return ip_local_deliver(skb);
1495 	kfree_skb(skb);
1496 	return 0;
1497 }
1498 
1499 #ifdef CONFIG_IP_PIMSM
1500 static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen)
1501 {
1502 	struct net_device *reg_dev = NULL;
1503 	struct iphdr *encap;
1504 	struct net *net = dev_net(skb->dev);
1505 
1506 	encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
1507 	/*
1508 	   Check that:
1509 	   a. packet is really destinted to a multicast group
1510 	   b. packet is not a NULL-REGISTER
1511 	   c. packet is not truncated
1512 	 */
1513 	if (!ipv4_is_multicast(encap->daddr) ||
1514 	    encap->tot_len == 0 ||
1515 	    ntohs(encap->tot_len) + pimlen > skb->len)
1516 		return 1;
1517 
1518 	read_lock(&mrt_lock);
1519 	if (net->ipv4.mroute_reg_vif_num >= 0)
1520 		reg_dev = net->ipv4.vif_table[net->ipv4.mroute_reg_vif_num].dev;
1521 	if (reg_dev)
1522 		dev_hold(reg_dev);
1523 	read_unlock(&mrt_lock);
1524 
1525 	if (reg_dev == NULL)
1526 		return 1;
1527 
1528 	skb->mac_header = skb->network_header;
1529 	skb_pull(skb, (u8*)encap - skb->data);
1530 	skb_reset_network_header(skb);
1531 	skb->dev = reg_dev;
1532 	skb->protocol = htons(ETH_P_IP);
1533 	skb->ip_summed = 0;
1534 	skb->pkt_type = PACKET_HOST;
1535 	skb_dst_drop(skb);
1536 	reg_dev->stats.rx_bytes += skb->len;
1537 	reg_dev->stats.rx_packets++;
1538 	nf_reset(skb);
1539 	netif_rx(skb);
1540 	dev_put(reg_dev);
1541 
1542 	return 0;
1543 }
1544 #endif
1545 
1546 #ifdef CONFIG_IP_PIMSM_V1
1547 /*
1548  * Handle IGMP messages of PIMv1
1549  */
1550 
1551 int pim_rcv_v1(struct sk_buff * skb)
1552 {
1553 	struct igmphdr *pim;
1554 	struct net *net = dev_net(skb->dev);
1555 
1556 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1557 		goto drop;
1558 
1559 	pim = igmp_hdr(skb);
1560 
1561 	if (!net->ipv4.mroute_do_pim ||
1562 	    pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1563 		goto drop;
1564 
1565 	if (__pim_rcv(skb, sizeof(*pim))) {
1566 drop:
1567 		kfree_skb(skb);
1568 	}
1569 	return 0;
1570 }
1571 #endif
1572 
1573 #ifdef CONFIG_IP_PIMSM_V2
1574 static int pim_rcv(struct sk_buff * skb)
1575 {
1576 	struct pimreghdr *pim;
1577 
1578 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1579 		goto drop;
1580 
1581 	pim = (struct pimreghdr *)skb_transport_header(skb);
1582 	if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1583 	    (pim->flags&PIM_NULL_REGISTER) ||
1584 	    (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
1585 	     csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1586 		goto drop;
1587 
1588 	if (__pim_rcv(skb, sizeof(*pim))) {
1589 drop:
1590 		kfree_skb(skb);
1591 	}
1592 	return 0;
1593 }
1594 #endif
1595 
1596 static int
1597 ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1598 {
1599 	int ct;
1600 	struct rtnexthop *nhp;
1601 	struct net *net = mfc_net(c);
1602 	struct net_device *dev = net->ipv4.vif_table[c->mfc_parent].dev;
1603 	u8 *b = skb_tail_pointer(skb);
1604 	struct rtattr *mp_head;
1605 
1606 	if (dev)
1607 		RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1608 
1609 	mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1610 
1611 	for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1612 		if (c->mfc_un.res.ttls[ct] < 255) {
1613 			if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1614 				goto rtattr_failure;
1615 			nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1616 			nhp->rtnh_flags = 0;
1617 			nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1618 			nhp->rtnh_ifindex = net->ipv4.vif_table[ct].dev->ifindex;
1619 			nhp->rtnh_len = sizeof(*nhp);
1620 		}
1621 	}
1622 	mp_head->rta_type = RTA_MULTIPATH;
1623 	mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1624 	rtm->rtm_type = RTN_MULTICAST;
1625 	return 1;
1626 
1627 rtattr_failure:
1628 	nlmsg_trim(skb, b);
1629 	return -EMSGSIZE;
1630 }
1631 
1632 int ipmr_get_route(struct net *net,
1633 		   struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1634 {
1635 	int err;
1636 	struct mfc_cache *cache;
1637 	struct rtable *rt = skb_rtable(skb);
1638 
1639 	read_lock(&mrt_lock);
1640 	cache = ipmr_cache_find(net, rt->rt_src, rt->rt_dst);
1641 
1642 	if (cache == NULL) {
1643 		struct sk_buff *skb2;
1644 		struct iphdr *iph;
1645 		struct net_device *dev;
1646 		int vif;
1647 
1648 		if (nowait) {
1649 			read_unlock(&mrt_lock);
1650 			return -EAGAIN;
1651 		}
1652 
1653 		dev = skb->dev;
1654 		if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
1655 			read_unlock(&mrt_lock);
1656 			return -ENODEV;
1657 		}
1658 		skb2 = skb_clone(skb, GFP_ATOMIC);
1659 		if (!skb2) {
1660 			read_unlock(&mrt_lock);
1661 			return -ENOMEM;
1662 		}
1663 
1664 		skb_push(skb2, sizeof(struct iphdr));
1665 		skb_reset_network_header(skb2);
1666 		iph = ip_hdr(skb2);
1667 		iph->ihl = sizeof(struct iphdr) >> 2;
1668 		iph->saddr = rt->rt_src;
1669 		iph->daddr = rt->rt_dst;
1670 		iph->version = 0;
1671 		err = ipmr_cache_unresolved(net, vif, skb2);
1672 		read_unlock(&mrt_lock);
1673 		return err;
1674 	}
1675 
1676 	if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1677 		cache->mfc_flags |= MFC_NOTIFY;
1678 	err = ipmr_fill_mroute(skb, cache, rtm);
1679 	read_unlock(&mrt_lock);
1680 	return err;
1681 }
1682 
1683 #ifdef CONFIG_PROC_FS
1684 /*
1685  *	The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1686  */
1687 struct ipmr_vif_iter {
1688 	struct seq_net_private p;
1689 	int ct;
1690 };
1691 
1692 static struct vif_device *ipmr_vif_seq_idx(struct net *net,
1693 					   struct ipmr_vif_iter *iter,
1694 					   loff_t pos)
1695 {
1696 	for (iter->ct = 0; iter->ct < net->ipv4.maxvif; ++iter->ct) {
1697 		if (!VIF_EXISTS(net, iter->ct))
1698 			continue;
1699 		if (pos-- == 0)
1700 			return &net->ipv4.vif_table[iter->ct];
1701 	}
1702 	return NULL;
1703 }
1704 
1705 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
1706 	__acquires(mrt_lock)
1707 {
1708 	struct net *net = seq_file_net(seq);
1709 
1710 	read_lock(&mrt_lock);
1711 	return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
1712 		: SEQ_START_TOKEN;
1713 }
1714 
1715 static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1716 {
1717 	struct ipmr_vif_iter *iter = seq->private;
1718 	struct net *net = seq_file_net(seq);
1719 
1720 	++*pos;
1721 	if (v == SEQ_START_TOKEN)
1722 		return ipmr_vif_seq_idx(net, iter, 0);
1723 
1724 	while (++iter->ct < net->ipv4.maxvif) {
1725 		if (!VIF_EXISTS(net, iter->ct))
1726 			continue;
1727 		return &net->ipv4.vif_table[iter->ct];
1728 	}
1729 	return NULL;
1730 }
1731 
1732 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
1733 	__releases(mrt_lock)
1734 {
1735 	read_unlock(&mrt_lock);
1736 }
1737 
1738 static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1739 {
1740 	struct net *net = seq_file_net(seq);
1741 
1742 	if (v == SEQ_START_TOKEN) {
1743 		seq_puts(seq,
1744 			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags Local    Remote\n");
1745 	} else {
1746 		const struct vif_device *vif = v;
1747 		const char *name =  vif->dev ? vif->dev->name : "none";
1748 
1749 		seq_printf(seq,
1750 			   "%2Zd %-10s %8ld %7ld  %8ld %7ld %05X %08X %08X\n",
1751 			   vif - net->ipv4.vif_table,
1752 			   name, vif->bytes_in, vif->pkt_in,
1753 			   vif->bytes_out, vif->pkt_out,
1754 			   vif->flags, vif->local, vif->remote);
1755 	}
1756 	return 0;
1757 }
1758 
1759 static const struct seq_operations ipmr_vif_seq_ops = {
1760 	.start = ipmr_vif_seq_start,
1761 	.next  = ipmr_vif_seq_next,
1762 	.stop  = ipmr_vif_seq_stop,
1763 	.show  = ipmr_vif_seq_show,
1764 };
1765 
1766 static int ipmr_vif_open(struct inode *inode, struct file *file)
1767 {
1768 	return seq_open_net(inode, file, &ipmr_vif_seq_ops,
1769 			    sizeof(struct ipmr_vif_iter));
1770 }
1771 
1772 static const struct file_operations ipmr_vif_fops = {
1773 	.owner	 = THIS_MODULE,
1774 	.open    = ipmr_vif_open,
1775 	.read    = seq_read,
1776 	.llseek  = seq_lseek,
1777 	.release = seq_release_net,
1778 };
1779 
1780 struct ipmr_mfc_iter {
1781 	struct seq_net_private p;
1782 	struct mfc_cache **cache;
1783 	int ct;
1784 };
1785 
1786 
1787 static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
1788 					  struct ipmr_mfc_iter *it, loff_t pos)
1789 {
1790 	struct mfc_cache *mfc;
1791 
1792 	it->cache = net->ipv4.mfc_cache_array;
1793 	read_lock(&mrt_lock);
1794 	for (it->ct = 0; it->ct < MFC_LINES; it->ct++)
1795 		for (mfc = net->ipv4.mfc_cache_array[it->ct];
1796 		     mfc; mfc = mfc->next)
1797 			if (pos-- == 0)
1798 				return mfc;
1799 	read_unlock(&mrt_lock);
1800 
1801 	it->cache = &mfc_unres_queue;
1802 	spin_lock_bh(&mfc_unres_lock);
1803 	for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
1804 		if (net_eq(mfc_net(mfc), net) &&
1805 		    pos-- == 0)
1806 			return mfc;
1807 	spin_unlock_bh(&mfc_unres_lock);
1808 
1809 	it->cache = NULL;
1810 	return NULL;
1811 }
1812 
1813 
1814 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
1815 {
1816 	struct ipmr_mfc_iter *it = seq->private;
1817 	struct net *net = seq_file_net(seq);
1818 
1819 	it->cache = NULL;
1820 	it->ct = 0;
1821 	return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
1822 		: SEQ_START_TOKEN;
1823 }
1824 
1825 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1826 {
1827 	struct mfc_cache *mfc = v;
1828 	struct ipmr_mfc_iter *it = seq->private;
1829 	struct net *net = seq_file_net(seq);
1830 
1831 	++*pos;
1832 
1833 	if (v == SEQ_START_TOKEN)
1834 		return ipmr_mfc_seq_idx(net, seq->private, 0);
1835 
1836 	if (mfc->next)
1837 		return mfc->next;
1838 
1839 	if (it->cache == &mfc_unres_queue)
1840 		goto end_of_list;
1841 
1842 	BUG_ON(it->cache != net->ipv4.mfc_cache_array);
1843 
1844 	while (++it->ct < MFC_LINES) {
1845 		mfc = net->ipv4.mfc_cache_array[it->ct];
1846 		if (mfc)
1847 			return mfc;
1848 	}
1849 
1850 	/* exhausted cache_array, show unresolved */
1851 	read_unlock(&mrt_lock);
1852 	it->cache = &mfc_unres_queue;
1853 	it->ct = 0;
1854 
1855 	spin_lock_bh(&mfc_unres_lock);
1856 	mfc = mfc_unres_queue;
1857 	while (mfc && !net_eq(mfc_net(mfc), net))
1858 		mfc = mfc->next;
1859 	if (mfc)
1860 		return mfc;
1861 
1862  end_of_list:
1863 	spin_unlock_bh(&mfc_unres_lock);
1864 	it->cache = NULL;
1865 
1866 	return NULL;
1867 }
1868 
1869 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
1870 {
1871 	struct ipmr_mfc_iter *it = seq->private;
1872 	struct net *net = seq_file_net(seq);
1873 
1874 	if (it->cache == &mfc_unres_queue)
1875 		spin_unlock_bh(&mfc_unres_lock);
1876 	else if (it->cache == net->ipv4.mfc_cache_array)
1877 		read_unlock(&mrt_lock);
1878 }
1879 
1880 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1881 {
1882 	int n;
1883 	struct net *net = seq_file_net(seq);
1884 
1885 	if (v == SEQ_START_TOKEN) {
1886 		seq_puts(seq,
1887 		 "Group    Origin   Iif     Pkts    Bytes    Wrong Oifs\n");
1888 	} else {
1889 		const struct mfc_cache *mfc = v;
1890 		const struct ipmr_mfc_iter *it = seq->private;
1891 
1892 		seq_printf(seq, "%08lX %08lX %-3hd",
1893 			   (unsigned long) mfc->mfc_mcastgrp,
1894 			   (unsigned long) mfc->mfc_origin,
1895 			   mfc->mfc_parent);
1896 
1897 		if (it->cache != &mfc_unres_queue) {
1898 			seq_printf(seq, " %8lu %8lu %8lu",
1899 				   mfc->mfc_un.res.pkt,
1900 				   mfc->mfc_un.res.bytes,
1901 				   mfc->mfc_un.res.wrong_if);
1902 			for (n = mfc->mfc_un.res.minvif;
1903 			     n < mfc->mfc_un.res.maxvif; n++ ) {
1904 				if (VIF_EXISTS(net, n) &&
1905 				    mfc->mfc_un.res.ttls[n] < 255)
1906 					seq_printf(seq,
1907 					   " %2d:%-3d",
1908 					   n, mfc->mfc_un.res.ttls[n]);
1909 			}
1910 		} else {
1911 			/* unresolved mfc_caches don't contain
1912 			 * pkt, bytes and wrong_if values
1913 			 */
1914 			seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
1915 		}
1916 		seq_putc(seq, '\n');
1917 	}
1918 	return 0;
1919 }
1920 
1921 static const struct seq_operations ipmr_mfc_seq_ops = {
1922 	.start = ipmr_mfc_seq_start,
1923 	.next  = ipmr_mfc_seq_next,
1924 	.stop  = ipmr_mfc_seq_stop,
1925 	.show  = ipmr_mfc_seq_show,
1926 };
1927 
1928 static int ipmr_mfc_open(struct inode *inode, struct file *file)
1929 {
1930 	return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
1931 			    sizeof(struct ipmr_mfc_iter));
1932 }
1933 
1934 static const struct file_operations ipmr_mfc_fops = {
1935 	.owner	 = THIS_MODULE,
1936 	.open    = ipmr_mfc_open,
1937 	.read    = seq_read,
1938 	.llseek  = seq_lseek,
1939 	.release = seq_release_net,
1940 };
1941 #endif
1942 
1943 #ifdef CONFIG_IP_PIMSM_V2
1944 static const struct net_protocol pim_protocol = {
1945 	.handler	=	pim_rcv,
1946 	.netns_ok	=	1,
1947 };
1948 #endif
1949 
1950 
1951 /*
1952  *	Setup for IP multicast routing
1953  */
1954 static int __net_init ipmr_net_init(struct net *net)
1955 {
1956 	int err = 0;
1957 
1958 	net->ipv4.vif_table = kcalloc(MAXVIFS, sizeof(struct vif_device),
1959 				      GFP_KERNEL);
1960 	if (!net->ipv4.vif_table) {
1961 		err = -ENOMEM;
1962 		goto fail;
1963 	}
1964 
1965 	/* Forwarding cache */
1966 	net->ipv4.mfc_cache_array = kcalloc(MFC_LINES,
1967 					    sizeof(struct mfc_cache *),
1968 					    GFP_KERNEL);
1969 	if (!net->ipv4.mfc_cache_array) {
1970 		err = -ENOMEM;
1971 		goto fail_mfc_cache;
1972 	}
1973 
1974 #ifdef CONFIG_IP_PIMSM
1975 	net->ipv4.mroute_reg_vif_num = -1;
1976 #endif
1977 
1978 #ifdef CONFIG_PROC_FS
1979 	err = -ENOMEM;
1980 	if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops))
1981 		goto proc_vif_fail;
1982 	if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
1983 		goto proc_cache_fail;
1984 #endif
1985 	return 0;
1986 
1987 #ifdef CONFIG_PROC_FS
1988 proc_cache_fail:
1989 	proc_net_remove(net, "ip_mr_vif");
1990 proc_vif_fail:
1991 	kfree(net->ipv4.mfc_cache_array);
1992 #endif
1993 fail_mfc_cache:
1994 	kfree(net->ipv4.vif_table);
1995 fail:
1996 	return err;
1997 }
1998 
1999 static void __net_exit ipmr_net_exit(struct net *net)
2000 {
2001 #ifdef CONFIG_PROC_FS
2002 	proc_net_remove(net, "ip_mr_cache");
2003 	proc_net_remove(net, "ip_mr_vif");
2004 #endif
2005 	kfree(net->ipv4.mfc_cache_array);
2006 	kfree(net->ipv4.vif_table);
2007 }
2008 
2009 static struct pernet_operations ipmr_net_ops = {
2010 	.init = ipmr_net_init,
2011 	.exit = ipmr_net_exit,
2012 };
2013 
2014 int __init ip_mr_init(void)
2015 {
2016 	int err;
2017 
2018 	mrt_cachep = kmem_cache_create("ip_mrt_cache",
2019 				       sizeof(struct mfc_cache),
2020 				       0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
2021 				       NULL);
2022 	if (!mrt_cachep)
2023 		return -ENOMEM;
2024 
2025 	err = register_pernet_subsys(&ipmr_net_ops);
2026 	if (err)
2027 		goto reg_pernet_fail;
2028 
2029 	setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
2030 	err = register_netdevice_notifier(&ip_mr_notifier);
2031 	if (err)
2032 		goto reg_notif_fail;
2033 #ifdef CONFIG_IP_PIMSM_V2
2034 	if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) {
2035 		printk(KERN_ERR "ip_mr_init: can't add PIM protocol\n");
2036 		err = -EAGAIN;
2037 		goto add_proto_fail;
2038 	}
2039 #endif
2040 	return 0;
2041 
2042 #ifdef CONFIG_IP_PIMSM_V2
2043 add_proto_fail:
2044 	unregister_netdevice_notifier(&ip_mr_notifier);
2045 #endif
2046 reg_notif_fail:
2047 	del_timer(&ipmr_expire_timer);
2048 	unregister_pernet_subsys(&ipmr_net_ops);
2049 reg_pernet_fail:
2050 	kmem_cache_destroy(mrt_cachep);
2051 	return err;
2052 }
2053