xref: /openbmc/linux/net/ipv4/ipmr.c (revision 6ee73861)
1 /*
2  *	IP multicast routing support for mrouted 3.6/3.8
3  *
4  *		(c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
5  *	  Linux Consultancy and Custom Driver Development
6  *
7  *	This program is free software; you can redistribute it and/or
8  *	modify it under the terms of the GNU General Public License
9  *	as published by the Free Software Foundation; either version
10  *	2 of the License, or (at your option) any later version.
11  *
12  *	Fixes:
13  *	Michael Chastain	:	Incorrect size of copying.
14  *	Alan Cox		:	Added the cache manager code
15  *	Alan Cox		:	Fixed the clone/copy bug and device race.
16  *	Mike McLagan		:	Routing by source
17  *	Malcolm Beattie		:	Buffer handling fixes.
18  *	Alexey Kuznetsov	:	Double buffer free and other fixes.
19  *	SVR Anand		:	Fixed several multicast bugs and problems.
20  *	Alexey Kuznetsov	:	Status, optimisations and more.
21  *	Brad Parker		:	Better behaviour on mrouted upcall
22  *					overflow.
23  *      Carlos Picoto           :       PIMv1 Support
24  *	Pavlin Ivanov Radoslavov:	PIMv2 Registers must checksum only PIM header
25  *					Relax this requrement to work with older peers.
26  *
27  */
28 
29 #include <asm/system.h>
30 #include <asm/uaccess.h>
31 #include <linux/types.h>
32 #include <linux/capability.h>
33 #include <linux/errno.h>
34 #include <linux/timer.h>
35 #include <linux/mm.h>
36 #include <linux/kernel.h>
37 #include <linux/fcntl.h>
38 #include <linux/stat.h>
39 #include <linux/socket.h>
40 #include <linux/in.h>
41 #include <linux/inet.h>
42 #include <linux/netdevice.h>
43 #include <linux/inetdevice.h>
44 #include <linux/igmp.h>
45 #include <linux/proc_fs.h>
46 #include <linux/seq_file.h>
47 #include <linux/mroute.h>
48 #include <linux/init.h>
49 #include <linux/if_ether.h>
50 #include <net/net_namespace.h>
51 #include <net/ip.h>
52 #include <net/protocol.h>
53 #include <linux/skbuff.h>
54 #include <net/route.h>
55 #include <net/sock.h>
56 #include <net/icmp.h>
57 #include <net/udp.h>
58 #include <net/raw.h>
59 #include <linux/notifier.h>
60 #include <linux/if_arp.h>
61 #include <linux/netfilter_ipv4.h>
62 #include <net/ipip.h>
63 #include <net/checksum.h>
64 #include <net/netlink.h>
65 
66 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
67 #define CONFIG_IP_PIMSM	1
68 #endif
69 
70 /* Big lock, protecting vif table, mrt cache and mroute socket state.
71    Note that the changes are semaphored via rtnl_lock.
72  */
73 
74 static DEFINE_RWLOCK(mrt_lock);
75 
76 /*
77  *	Multicast router control variables
78  */
79 
80 #define VIF_EXISTS(_net, _idx) ((_net)->ipv4.vif_table[_idx].dev != NULL)
81 
82 static struct mfc_cache *mfc_unres_queue;		/* Queue of unresolved entries */
83 
84 /* Special spinlock for queue of unresolved entries */
85 static DEFINE_SPINLOCK(mfc_unres_lock);
86 
87 /* We return to original Alan's scheme. Hash table of resolved
88    entries is changed only in process context and protected
89    with weak lock mrt_lock. Queue of unresolved entries is protected
90    with strong spinlock mfc_unres_lock.
91 
92    In this case data path is free of exclusive locks at all.
93  */
94 
95 static struct kmem_cache *mrt_cachep __read_mostly;
96 
97 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
98 static int ipmr_cache_report(struct net *net,
99 			     struct sk_buff *pkt, vifi_t vifi, int assert);
100 static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
101 
102 static struct timer_list ipmr_expire_timer;
103 
104 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
105 
106 static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
107 {
108 	struct net *net = dev_net(dev);
109 
110 	dev_close(dev);
111 
112 	dev = __dev_get_by_name(net, "tunl0");
113 	if (dev) {
114 		const struct net_device_ops *ops = dev->netdev_ops;
115 		struct ifreq ifr;
116 		struct ip_tunnel_parm p;
117 
118 		memset(&p, 0, sizeof(p));
119 		p.iph.daddr = v->vifc_rmt_addr.s_addr;
120 		p.iph.saddr = v->vifc_lcl_addr.s_addr;
121 		p.iph.version = 4;
122 		p.iph.ihl = 5;
123 		p.iph.protocol = IPPROTO_IPIP;
124 		sprintf(p.name, "dvmrp%d", v->vifc_vifi);
125 		ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
126 
127 		if (ops->ndo_do_ioctl) {
128 			mm_segment_t oldfs = get_fs();
129 
130 			set_fs(KERNEL_DS);
131 			ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
132 			set_fs(oldfs);
133 		}
134 	}
135 }
136 
137 static
138 struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
139 {
140 	struct net_device  *dev;
141 
142 	dev = __dev_get_by_name(net, "tunl0");
143 
144 	if (dev) {
145 		const struct net_device_ops *ops = dev->netdev_ops;
146 		int err;
147 		struct ifreq ifr;
148 		struct ip_tunnel_parm p;
149 		struct in_device  *in_dev;
150 
151 		memset(&p, 0, sizeof(p));
152 		p.iph.daddr = v->vifc_rmt_addr.s_addr;
153 		p.iph.saddr = v->vifc_lcl_addr.s_addr;
154 		p.iph.version = 4;
155 		p.iph.ihl = 5;
156 		p.iph.protocol = IPPROTO_IPIP;
157 		sprintf(p.name, "dvmrp%d", v->vifc_vifi);
158 		ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
159 
160 		if (ops->ndo_do_ioctl) {
161 			mm_segment_t oldfs = get_fs();
162 
163 			set_fs(KERNEL_DS);
164 			err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
165 			set_fs(oldfs);
166 		} else
167 			err = -EOPNOTSUPP;
168 
169 		dev = NULL;
170 
171 		if (err == 0 &&
172 		    (dev = __dev_get_by_name(net, p.name)) != NULL) {
173 			dev->flags |= IFF_MULTICAST;
174 
175 			in_dev = __in_dev_get_rtnl(dev);
176 			if (in_dev == NULL)
177 				goto failure;
178 
179 			ipv4_devconf_setall(in_dev);
180 			IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
181 
182 			if (dev_open(dev))
183 				goto failure;
184 			dev_hold(dev);
185 		}
186 	}
187 	return dev;
188 
189 failure:
190 	/* allow the register to be completed before unregistering. */
191 	rtnl_unlock();
192 	rtnl_lock();
193 
194 	unregister_netdevice(dev);
195 	return NULL;
196 }
197 
198 #ifdef CONFIG_IP_PIMSM
199 
200 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
201 {
202 	struct net *net = dev_net(dev);
203 
204 	read_lock(&mrt_lock);
205 	dev->stats.tx_bytes += skb->len;
206 	dev->stats.tx_packets++;
207 	ipmr_cache_report(net, skb, net->ipv4.mroute_reg_vif_num,
208 			  IGMPMSG_WHOLEPKT);
209 	read_unlock(&mrt_lock);
210 	kfree_skb(skb);
211 	return NETDEV_TX_OK;
212 }
213 
214 static const struct net_device_ops reg_vif_netdev_ops = {
215 	.ndo_start_xmit	= reg_vif_xmit,
216 };
217 
218 static void reg_vif_setup(struct net_device *dev)
219 {
220 	dev->type		= ARPHRD_PIMREG;
221 	dev->mtu		= ETH_DATA_LEN - sizeof(struct iphdr) - 8;
222 	dev->flags		= IFF_NOARP;
223 	dev->netdev_ops		= &reg_vif_netdev_ops,
224 	dev->destructor		= free_netdev;
225 	dev->features		|= NETIF_F_NETNS_LOCAL;
226 }
227 
228 static struct net_device *ipmr_reg_vif(struct net *net)
229 {
230 	struct net_device *dev;
231 	struct in_device *in_dev;
232 
233 	dev = alloc_netdev(0, "pimreg", reg_vif_setup);
234 
235 	if (dev == NULL)
236 		return NULL;
237 
238 	dev_net_set(dev, net);
239 
240 	if (register_netdevice(dev)) {
241 		free_netdev(dev);
242 		return NULL;
243 	}
244 	dev->iflink = 0;
245 
246 	rcu_read_lock();
247 	if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
248 		rcu_read_unlock();
249 		goto failure;
250 	}
251 
252 	ipv4_devconf_setall(in_dev);
253 	IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
254 	rcu_read_unlock();
255 
256 	if (dev_open(dev))
257 		goto failure;
258 
259 	dev_hold(dev);
260 
261 	return dev;
262 
263 failure:
264 	/* allow the register to be completed before unregistering. */
265 	rtnl_unlock();
266 	rtnl_lock();
267 
268 	unregister_netdevice(dev);
269 	return NULL;
270 }
271 #endif
272 
273 /*
274  *	Delete a VIF entry
275  *	@notify: Set to 1, if the caller is a notifier_call
276  */
277 
278 static int vif_delete(struct net *net, int vifi, int notify)
279 {
280 	struct vif_device *v;
281 	struct net_device *dev;
282 	struct in_device *in_dev;
283 
284 	if (vifi < 0 || vifi >= net->ipv4.maxvif)
285 		return -EADDRNOTAVAIL;
286 
287 	v = &net->ipv4.vif_table[vifi];
288 
289 	write_lock_bh(&mrt_lock);
290 	dev = v->dev;
291 	v->dev = NULL;
292 
293 	if (!dev) {
294 		write_unlock_bh(&mrt_lock);
295 		return -EADDRNOTAVAIL;
296 	}
297 
298 #ifdef CONFIG_IP_PIMSM
299 	if (vifi == net->ipv4.mroute_reg_vif_num)
300 		net->ipv4.mroute_reg_vif_num = -1;
301 #endif
302 
303 	if (vifi+1 == net->ipv4.maxvif) {
304 		int tmp;
305 		for (tmp=vifi-1; tmp>=0; tmp--) {
306 			if (VIF_EXISTS(net, tmp))
307 				break;
308 		}
309 		net->ipv4.maxvif = tmp+1;
310 	}
311 
312 	write_unlock_bh(&mrt_lock);
313 
314 	dev_set_allmulti(dev, -1);
315 
316 	if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
317 		IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
318 		ip_rt_multicast_event(in_dev);
319 	}
320 
321 	if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
322 		unregister_netdevice(dev);
323 
324 	dev_put(dev);
325 	return 0;
326 }
327 
328 static inline void ipmr_cache_free(struct mfc_cache *c)
329 {
330 	release_net(mfc_net(c));
331 	kmem_cache_free(mrt_cachep, c);
332 }
333 
334 /* Destroy an unresolved cache entry, killing queued skbs
335    and reporting error to netlink readers.
336  */
337 
338 static void ipmr_destroy_unres(struct mfc_cache *c)
339 {
340 	struct sk_buff *skb;
341 	struct nlmsgerr *e;
342 	struct net *net = mfc_net(c);
343 
344 	atomic_dec(&net->ipv4.cache_resolve_queue_len);
345 
346 	while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
347 		if (ip_hdr(skb)->version == 0) {
348 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
349 			nlh->nlmsg_type = NLMSG_ERROR;
350 			nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
351 			skb_trim(skb, nlh->nlmsg_len);
352 			e = NLMSG_DATA(nlh);
353 			e->error = -ETIMEDOUT;
354 			memset(&e->msg, 0, sizeof(e->msg));
355 
356 			rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
357 		} else
358 			kfree_skb(skb);
359 	}
360 
361 	ipmr_cache_free(c);
362 }
363 
364 
365 /* Single timer process for all the unresolved queue. */
366 
367 static void ipmr_expire_process(unsigned long dummy)
368 {
369 	unsigned long now;
370 	unsigned long expires;
371 	struct mfc_cache *c, **cp;
372 
373 	if (!spin_trylock(&mfc_unres_lock)) {
374 		mod_timer(&ipmr_expire_timer, jiffies+HZ/10);
375 		return;
376 	}
377 
378 	if (mfc_unres_queue == NULL)
379 		goto out;
380 
381 	now = jiffies;
382 	expires = 10*HZ;
383 	cp = &mfc_unres_queue;
384 
385 	while ((c=*cp) != NULL) {
386 		if (time_after(c->mfc_un.unres.expires, now)) {
387 			unsigned long interval = c->mfc_un.unres.expires - now;
388 			if (interval < expires)
389 				expires = interval;
390 			cp = &c->next;
391 			continue;
392 		}
393 
394 		*cp = c->next;
395 
396 		ipmr_destroy_unres(c);
397 	}
398 
399 	if (mfc_unres_queue != NULL)
400 		mod_timer(&ipmr_expire_timer, jiffies + expires);
401 
402 out:
403 	spin_unlock(&mfc_unres_lock);
404 }
405 
406 /* Fill oifs list. It is called under write locked mrt_lock. */
407 
408 static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls)
409 {
410 	int vifi;
411 	struct net *net = mfc_net(cache);
412 
413 	cache->mfc_un.res.minvif = MAXVIFS;
414 	cache->mfc_un.res.maxvif = 0;
415 	memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
416 
417 	for (vifi = 0; vifi < net->ipv4.maxvif; vifi++) {
418 		if (VIF_EXISTS(net, vifi) &&
419 		    ttls[vifi] && ttls[vifi] < 255) {
420 			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
421 			if (cache->mfc_un.res.minvif > vifi)
422 				cache->mfc_un.res.minvif = vifi;
423 			if (cache->mfc_un.res.maxvif <= vifi)
424 				cache->mfc_un.res.maxvif = vifi + 1;
425 		}
426 	}
427 }
428 
429 static int vif_add(struct net *net, struct vifctl *vifc, int mrtsock)
430 {
431 	int vifi = vifc->vifc_vifi;
432 	struct vif_device *v = &net->ipv4.vif_table[vifi];
433 	struct net_device *dev;
434 	struct in_device *in_dev;
435 	int err;
436 
437 	/* Is vif busy ? */
438 	if (VIF_EXISTS(net, vifi))
439 		return -EADDRINUSE;
440 
441 	switch (vifc->vifc_flags) {
442 #ifdef CONFIG_IP_PIMSM
443 	case VIFF_REGISTER:
444 		/*
445 		 * Special Purpose VIF in PIM
446 		 * All the packets will be sent to the daemon
447 		 */
448 		if (net->ipv4.mroute_reg_vif_num >= 0)
449 			return -EADDRINUSE;
450 		dev = ipmr_reg_vif(net);
451 		if (!dev)
452 			return -ENOBUFS;
453 		err = dev_set_allmulti(dev, 1);
454 		if (err) {
455 			unregister_netdevice(dev);
456 			dev_put(dev);
457 			return err;
458 		}
459 		break;
460 #endif
461 	case VIFF_TUNNEL:
462 		dev = ipmr_new_tunnel(net, vifc);
463 		if (!dev)
464 			return -ENOBUFS;
465 		err = dev_set_allmulti(dev, 1);
466 		if (err) {
467 			ipmr_del_tunnel(dev, vifc);
468 			dev_put(dev);
469 			return err;
470 		}
471 		break;
472 	case 0:
473 		dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
474 		if (!dev)
475 			return -EADDRNOTAVAIL;
476 		err = dev_set_allmulti(dev, 1);
477 		if (err) {
478 			dev_put(dev);
479 			return err;
480 		}
481 		break;
482 	default:
483 		return -EINVAL;
484 	}
485 
486 	if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) {
487 		dev_put(dev);
488 		return -EADDRNOTAVAIL;
489 	}
490 	IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
491 	ip_rt_multicast_event(in_dev);
492 
493 	/*
494 	 *	Fill in the VIF structures
495 	 */
496 	v->rate_limit = vifc->vifc_rate_limit;
497 	v->local = vifc->vifc_lcl_addr.s_addr;
498 	v->remote = vifc->vifc_rmt_addr.s_addr;
499 	v->flags = vifc->vifc_flags;
500 	if (!mrtsock)
501 		v->flags |= VIFF_STATIC;
502 	v->threshold = vifc->vifc_threshold;
503 	v->bytes_in = 0;
504 	v->bytes_out = 0;
505 	v->pkt_in = 0;
506 	v->pkt_out = 0;
507 	v->link = dev->ifindex;
508 	if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
509 		v->link = dev->iflink;
510 
511 	/* And finish update writing critical data */
512 	write_lock_bh(&mrt_lock);
513 	v->dev = dev;
514 #ifdef CONFIG_IP_PIMSM
515 	if (v->flags&VIFF_REGISTER)
516 		net->ipv4.mroute_reg_vif_num = vifi;
517 #endif
518 	if (vifi+1 > net->ipv4.maxvif)
519 		net->ipv4.maxvif = vifi+1;
520 	write_unlock_bh(&mrt_lock);
521 	return 0;
522 }
523 
524 static struct mfc_cache *ipmr_cache_find(struct net *net,
525 					 __be32 origin,
526 					 __be32 mcastgrp)
527 {
528 	int line = MFC_HASH(mcastgrp, origin);
529 	struct mfc_cache *c;
530 
531 	for (c = net->ipv4.mfc_cache_array[line]; c; c = c->next) {
532 		if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
533 			break;
534 	}
535 	return c;
536 }
537 
538 /*
539  *	Allocate a multicast cache entry
540  */
541 static struct mfc_cache *ipmr_cache_alloc(struct net *net)
542 {
543 	struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
544 	if (c == NULL)
545 		return NULL;
546 	c->mfc_un.res.minvif = MAXVIFS;
547 	mfc_net_set(c, net);
548 	return c;
549 }
550 
551 static struct mfc_cache *ipmr_cache_alloc_unres(struct net *net)
552 {
553 	struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
554 	if (c == NULL)
555 		return NULL;
556 	skb_queue_head_init(&c->mfc_un.unres.unresolved);
557 	c->mfc_un.unres.expires = jiffies + 10*HZ;
558 	mfc_net_set(c, net);
559 	return c;
560 }
561 
562 /*
563  *	A cache entry has gone into a resolved state from queued
564  */
565 
566 static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
567 {
568 	struct sk_buff *skb;
569 	struct nlmsgerr *e;
570 
571 	/*
572 	 *	Play the pending entries through our router
573 	 */
574 
575 	while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
576 		if (ip_hdr(skb)->version == 0) {
577 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
578 
579 			if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
580 				nlh->nlmsg_len = (skb_tail_pointer(skb) -
581 						  (u8 *)nlh);
582 			} else {
583 				nlh->nlmsg_type = NLMSG_ERROR;
584 				nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
585 				skb_trim(skb, nlh->nlmsg_len);
586 				e = NLMSG_DATA(nlh);
587 				e->error = -EMSGSIZE;
588 				memset(&e->msg, 0, sizeof(e->msg));
589 			}
590 
591 			rtnl_unicast(skb, mfc_net(c), NETLINK_CB(skb).pid);
592 		} else
593 			ip_mr_forward(skb, c, 0);
594 	}
595 }
596 
597 /*
598  *	Bounce a cache query up to mrouted. We could use netlink for this but mrouted
599  *	expects the following bizarre scheme.
600  *
601  *	Called under mrt_lock.
602  */
603 
604 static int ipmr_cache_report(struct net *net,
605 			     struct sk_buff *pkt, vifi_t vifi, int assert)
606 {
607 	struct sk_buff *skb;
608 	const int ihl = ip_hdrlen(pkt);
609 	struct igmphdr *igmp;
610 	struct igmpmsg *msg;
611 	int ret;
612 
613 #ifdef CONFIG_IP_PIMSM
614 	if (assert == IGMPMSG_WHOLEPKT)
615 		skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
616 	else
617 #endif
618 		skb = alloc_skb(128, GFP_ATOMIC);
619 
620 	if (!skb)
621 		return -ENOBUFS;
622 
623 #ifdef CONFIG_IP_PIMSM
624 	if (assert == IGMPMSG_WHOLEPKT) {
625 		/* Ugly, but we have no choice with this interface.
626 		   Duplicate old header, fix ihl, length etc.
627 		   And all this only to mangle msg->im_msgtype and
628 		   to set msg->im_mbz to "mbz" :-)
629 		 */
630 		skb_push(skb, sizeof(struct iphdr));
631 		skb_reset_network_header(skb);
632 		skb_reset_transport_header(skb);
633 		msg = (struct igmpmsg *)skb_network_header(skb);
634 		memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
635 		msg->im_msgtype = IGMPMSG_WHOLEPKT;
636 		msg->im_mbz = 0;
637 		msg->im_vif = net->ipv4.mroute_reg_vif_num;
638 		ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
639 		ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
640 					     sizeof(struct iphdr));
641 	} else
642 #endif
643 	{
644 
645 	/*
646 	 *	Copy the IP header
647 	 */
648 
649 	skb->network_header = skb->tail;
650 	skb_put(skb, ihl);
651 	skb_copy_to_linear_data(skb, pkt->data, ihl);
652 	ip_hdr(skb)->protocol = 0;			/* Flag to the kernel this is a route add */
653 	msg = (struct igmpmsg *)skb_network_header(skb);
654 	msg->im_vif = vifi;
655 	skb_dst_set(skb, dst_clone(skb_dst(pkt)));
656 
657 	/*
658 	 *	Add our header
659 	 */
660 
661 	igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
662 	igmp->type	=
663 	msg->im_msgtype = assert;
664 	igmp->code 	=	0;
665 	ip_hdr(skb)->tot_len = htons(skb->len);			/* Fix the length */
666 	skb->transport_header = skb->network_header;
667 	}
668 
669 	if (net->ipv4.mroute_sk == NULL) {
670 		kfree_skb(skb);
671 		return -EINVAL;
672 	}
673 
674 	/*
675 	 *	Deliver to mrouted
676 	 */
677 	ret = sock_queue_rcv_skb(net->ipv4.mroute_sk, skb);
678 	if (ret < 0) {
679 		if (net_ratelimit())
680 			printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
681 		kfree_skb(skb);
682 	}
683 
684 	return ret;
685 }
686 
687 /*
688  *	Queue a packet for resolution. It gets locked cache entry!
689  */
690 
691 static int
692 ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
693 {
694 	int err;
695 	struct mfc_cache *c;
696 	const struct iphdr *iph = ip_hdr(skb);
697 
698 	spin_lock_bh(&mfc_unres_lock);
699 	for (c=mfc_unres_queue; c; c=c->next) {
700 		if (net_eq(mfc_net(c), net) &&
701 		    c->mfc_mcastgrp == iph->daddr &&
702 		    c->mfc_origin == iph->saddr)
703 			break;
704 	}
705 
706 	if (c == NULL) {
707 		/*
708 		 *	Create a new entry if allowable
709 		 */
710 
711 		if (atomic_read(&net->ipv4.cache_resolve_queue_len) >= 10 ||
712 		    (c = ipmr_cache_alloc_unres(net)) == NULL) {
713 			spin_unlock_bh(&mfc_unres_lock);
714 
715 			kfree_skb(skb);
716 			return -ENOBUFS;
717 		}
718 
719 		/*
720 		 *	Fill in the new cache entry
721 		 */
722 		c->mfc_parent	= -1;
723 		c->mfc_origin	= iph->saddr;
724 		c->mfc_mcastgrp	= iph->daddr;
725 
726 		/*
727 		 *	Reflect first query at mrouted.
728 		 */
729 		err = ipmr_cache_report(net, skb, vifi, IGMPMSG_NOCACHE);
730 		if (err < 0) {
731 			/* If the report failed throw the cache entry
732 			   out - Brad Parker
733 			 */
734 			spin_unlock_bh(&mfc_unres_lock);
735 
736 			ipmr_cache_free(c);
737 			kfree_skb(skb);
738 			return err;
739 		}
740 
741 		atomic_inc(&net->ipv4.cache_resolve_queue_len);
742 		c->next = mfc_unres_queue;
743 		mfc_unres_queue = c;
744 
745 		mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires);
746 	}
747 
748 	/*
749 	 *	See if we can append the packet
750 	 */
751 	if (c->mfc_un.unres.unresolved.qlen>3) {
752 		kfree_skb(skb);
753 		err = -ENOBUFS;
754 	} else {
755 		skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
756 		err = 0;
757 	}
758 
759 	spin_unlock_bh(&mfc_unres_lock);
760 	return err;
761 }
762 
763 /*
764  *	MFC cache manipulation by user space mroute daemon
765  */
766 
767 static int ipmr_mfc_delete(struct net *net, struct mfcctl *mfc)
768 {
769 	int line;
770 	struct mfc_cache *c, **cp;
771 
772 	line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
773 
774 	for (cp = &net->ipv4.mfc_cache_array[line];
775 	     (c = *cp) != NULL; cp = &c->next) {
776 		if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
777 		    c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
778 			write_lock_bh(&mrt_lock);
779 			*cp = c->next;
780 			write_unlock_bh(&mrt_lock);
781 
782 			ipmr_cache_free(c);
783 			return 0;
784 		}
785 	}
786 	return -ENOENT;
787 }
788 
789 static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
790 {
791 	int line;
792 	struct mfc_cache *uc, *c, **cp;
793 
794 	line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
795 
796 	for (cp = &net->ipv4.mfc_cache_array[line];
797 	     (c = *cp) != NULL; cp = &c->next) {
798 		if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
799 		    c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
800 			break;
801 	}
802 
803 	if (c != NULL) {
804 		write_lock_bh(&mrt_lock);
805 		c->mfc_parent = mfc->mfcc_parent;
806 		ipmr_update_thresholds(c, mfc->mfcc_ttls);
807 		if (!mrtsock)
808 			c->mfc_flags |= MFC_STATIC;
809 		write_unlock_bh(&mrt_lock);
810 		return 0;
811 	}
812 
813 	if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
814 		return -EINVAL;
815 
816 	c = ipmr_cache_alloc(net);
817 	if (c == NULL)
818 		return -ENOMEM;
819 
820 	c->mfc_origin = mfc->mfcc_origin.s_addr;
821 	c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
822 	c->mfc_parent = mfc->mfcc_parent;
823 	ipmr_update_thresholds(c, mfc->mfcc_ttls);
824 	if (!mrtsock)
825 		c->mfc_flags |= MFC_STATIC;
826 
827 	write_lock_bh(&mrt_lock);
828 	c->next = net->ipv4.mfc_cache_array[line];
829 	net->ipv4.mfc_cache_array[line] = c;
830 	write_unlock_bh(&mrt_lock);
831 
832 	/*
833 	 *	Check to see if we resolved a queued list. If so we
834 	 *	need to send on the frames and tidy up.
835 	 */
836 	spin_lock_bh(&mfc_unres_lock);
837 	for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
838 	     cp = &uc->next) {
839 		if (net_eq(mfc_net(uc), net) &&
840 		    uc->mfc_origin == c->mfc_origin &&
841 		    uc->mfc_mcastgrp == c->mfc_mcastgrp) {
842 			*cp = uc->next;
843 			atomic_dec(&net->ipv4.cache_resolve_queue_len);
844 			break;
845 		}
846 	}
847 	if (mfc_unres_queue == NULL)
848 		del_timer(&ipmr_expire_timer);
849 	spin_unlock_bh(&mfc_unres_lock);
850 
851 	if (uc) {
852 		ipmr_cache_resolve(uc, c);
853 		ipmr_cache_free(uc);
854 	}
855 	return 0;
856 }
857 
858 /*
859  *	Close the multicast socket, and clear the vif tables etc
860  */
861 
862 static void mroute_clean_tables(struct net *net)
863 {
864 	int i;
865 
866 	/*
867 	 *	Shut down all active vif entries
868 	 */
869 	for (i = 0; i < net->ipv4.maxvif; i++) {
870 		if (!(net->ipv4.vif_table[i].flags&VIFF_STATIC))
871 			vif_delete(net, i, 0);
872 	}
873 
874 	/*
875 	 *	Wipe the cache
876 	 */
877 	for (i=0; i<MFC_LINES; i++) {
878 		struct mfc_cache *c, **cp;
879 
880 		cp = &net->ipv4.mfc_cache_array[i];
881 		while ((c = *cp) != NULL) {
882 			if (c->mfc_flags&MFC_STATIC) {
883 				cp = &c->next;
884 				continue;
885 			}
886 			write_lock_bh(&mrt_lock);
887 			*cp = c->next;
888 			write_unlock_bh(&mrt_lock);
889 
890 			ipmr_cache_free(c);
891 		}
892 	}
893 
894 	if (atomic_read(&net->ipv4.cache_resolve_queue_len) != 0) {
895 		struct mfc_cache *c, **cp;
896 
897 		spin_lock_bh(&mfc_unres_lock);
898 		cp = &mfc_unres_queue;
899 		while ((c = *cp) != NULL) {
900 			if (!net_eq(mfc_net(c), net)) {
901 				cp = &c->next;
902 				continue;
903 			}
904 			*cp = c->next;
905 
906 			ipmr_destroy_unres(c);
907 		}
908 		spin_unlock_bh(&mfc_unres_lock);
909 	}
910 }
911 
912 static void mrtsock_destruct(struct sock *sk)
913 {
914 	struct net *net = sock_net(sk);
915 
916 	rtnl_lock();
917 	if (sk == net->ipv4.mroute_sk) {
918 		IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
919 
920 		write_lock_bh(&mrt_lock);
921 		net->ipv4.mroute_sk = NULL;
922 		write_unlock_bh(&mrt_lock);
923 
924 		mroute_clean_tables(net);
925 	}
926 	rtnl_unlock();
927 }
928 
929 /*
930  *	Socket options and virtual interface manipulation. The whole
931  *	virtual interface system is a complete heap, but unfortunately
932  *	that's how BSD mrouted happens to think. Maybe one day with a proper
933  *	MOSPF/PIM router set up we can clean this up.
934  */
935 
936 int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
937 {
938 	int ret;
939 	struct vifctl vif;
940 	struct mfcctl mfc;
941 	struct net *net = sock_net(sk);
942 
943 	if (optname != MRT_INIT) {
944 		if (sk != net->ipv4.mroute_sk && !capable(CAP_NET_ADMIN))
945 			return -EACCES;
946 	}
947 
948 	switch (optname) {
949 	case MRT_INIT:
950 		if (sk->sk_type != SOCK_RAW ||
951 		    inet_sk(sk)->num != IPPROTO_IGMP)
952 			return -EOPNOTSUPP;
953 		if (optlen != sizeof(int))
954 			return -ENOPROTOOPT;
955 
956 		rtnl_lock();
957 		if (net->ipv4.mroute_sk) {
958 			rtnl_unlock();
959 			return -EADDRINUSE;
960 		}
961 
962 		ret = ip_ra_control(sk, 1, mrtsock_destruct);
963 		if (ret == 0) {
964 			write_lock_bh(&mrt_lock);
965 			net->ipv4.mroute_sk = sk;
966 			write_unlock_bh(&mrt_lock);
967 
968 			IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
969 		}
970 		rtnl_unlock();
971 		return ret;
972 	case MRT_DONE:
973 		if (sk != net->ipv4.mroute_sk)
974 			return -EACCES;
975 		return ip_ra_control(sk, 0, NULL);
976 	case MRT_ADD_VIF:
977 	case MRT_DEL_VIF:
978 		if (optlen != sizeof(vif))
979 			return -EINVAL;
980 		if (copy_from_user(&vif, optval, sizeof(vif)))
981 			return -EFAULT;
982 		if (vif.vifc_vifi >= MAXVIFS)
983 			return -ENFILE;
984 		rtnl_lock();
985 		if (optname == MRT_ADD_VIF) {
986 			ret = vif_add(net, &vif, sk == net->ipv4.mroute_sk);
987 		} else {
988 			ret = vif_delete(net, vif.vifc_vifi, 0);
989 		}
990 		rtnl_unlock();
991 		return ret;
992 
993 		/*
994 		 *	Manipulate the forwarding caches. These live
995 		 *	in a sort of kernel/user symbiosis.
996 		 */
997 	case MRT_ADD_MFC:
998 	case MRT_DEL_MFC:
999 		if (optlen != sizeof(mfc))
1000 			return -EINVAL;
1001 		if (copy_from_user(&mfc, optval, sizeof(mfc)))
1002 			return -EFAULT;
1003 		rtnl_lock();
1004 		if (optname == MRT_DEL_MFC)
1005 			ret = ipmr_mfc_delete(net, &mfc);
1006 		else
1007 			ret = ipmr_mfc_add(net, &mfc, sk == net->ipv4.mroute_sk);
1008 		rtnl_unlock();
1009 		return ret;
1010 		/*
1011 		 *	Control PIM assert.
1012 		 */
1013 	case MRT_ASSERT:
1014 	{
1015 		int v;
1016 		if (get_user(v,(int __user *)optval))
1017 			return -EFAULT;
1018 		net->ipv4.mroute_do_assert = (v) ? 1 : 0;
1019 		return 0;
1020 	}
1021 #ifdef CONFIG_IP_PIMSM
1022 	case MRT_PIM:
1023 	{
1024 		int v;
1025 
1026 		if (get_user(v,(int __user *)optval))
1027 			return -EFAULT;
1028 		v = (v) ? 1 : 0;
1029 
1030 		rtnl_lock();
1031 		ret = 0;
1032 		if (v != net->ipv4.mroute_do_pim) {
1033 			net->ipv4.mroute_do_pim = v;
1034 			net->ipv4.mroute_do_assert = v;
1035 		}
1036 		rtnl_unlock();
1037 		return ret;
1038 	}
1039 #endif
1040 	/*
1041 	 *	Spurious command, or MRT_VERSION which you cannot
1042 	 *	set.
1043 	 */
1044 	default:
1045 		return -ENOPROTOOPT;
1046 	}
1047 }
1048 
1049 /*
1050  *	Getsock opt support for the multicast routing system.
1051  */
1052 
1053 int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
1054 {
1055 	int olr;
1056 	int val;
1057 	struct net *net = sock_net(sk);
1058 
1059 	if (optname != MRT_VERSION &&
1060 #ifdef CONFIG_IP_PIMSM
1061 	   optname!=MRT_PIM &&
1062 #endif
1063 	   optname!=MRT_ASSERT)
1064 		return -ENOPROTOOPT;
1065 
1066 	if (get_user(olr, optlen))
1067 		return -EFAULT;
1068 
1069 	olr = min_t(unsigned int, olr, sizeof(int));
1070 	if (olr < 0)
1071 		return -EINVAL;
1072 
1073 	if (put_user(olr, optlen))
1074 		return -EFAULT;
1075 	if (optname == MRT_VERSION)
1076 		val = 0x0305;
1077 #ifdef CONFIG_IP_PIMSM
1078 	else if (optname == MRT_PIM)
1079 		val = net->ipv4.mroute_do_pim;
1080 #endif
1081 	else
1082 		val = net->ipv4.mroute_do_assert;
1083 	if (copy_to_user(optval, &val, olr))
1084 		return -EFAULT;
1085 	return 0;
1086 }
1087 
1088 /*
1089  *	The IP multicast ioctl support routines.
1090  */
1091 
1092 int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1093 {
1094 	struct sioc_sg_req sr;
1095 	struct sioc_vif_req vr;
1096 	struct vif_device *vif;
1097 	struct mfc_cache *c;
1098 	struct net *net = sock_net(sk);
1099 
1100 	switch (cmd) {
1101 	case SIOCGETVIFCNT:
1102 		if (copy_from_user(&vr, arg, sizeof(vr)))
1103 			return -EFAULT;
1104 		if (vr.vifi >= net->ipv4.maxvif)
1105 			return -EINVAL;
1106 		read_lock(&mrt_lock);
1107 		vif = &net->ipv4.vif_table[vr.vifi];
1108 		if (VIF_EXISTS(net, vr.vifi)) {
1109 			vr.icount = vif->pkt_in;
1110 			vr.ocount = vif->pkt_out;
1111 			vr.ibytes = vif->bytes_in;
1112 			vr.obytes = vif->bytes_out;
1113 			read_unlock(&mrt_lock);
1114 
1115 			if (copy_to_user(arg, &vr, sizeof(vr)))
1116 				return -EFAULT;
1117 			return 0;
1118 		}
1119 		read_unlock(&mrt_lock);
1120 		return -EADDRNOTAVAIL;
1121 	case SIOCGETSGCNT:
1122 		if (copy_from_user(&sr, arg, sizeof(sr)))
1123 			return -EFAULT;
1124 
1125 		read_lock(&mrt_lock);
1126 		c = ipmr_cache_find(net, sr.src.s_addr, sr.grp.s_addr);
1127 		if (c) {
1128 			sr.pktcnt = c->mfc_un.res.pkt;
1129 			sr.bytecnt = c->mfc_un.res.bytes;
1130 			sr.wrong_if = c->mfc_un.res.wrong_if;
1131 			read_unlock(&mrt_lock);
1132 
1133 			if (copy_to_user(arg, &sr, sizeof(sr)))
1134 				return -EFAULT;
1135 			return 0;
1136 		}
1137 		read_unlock(&mrt_lock);
1138 		return -EADDRNOTAVAIL;
1139 	default:
1140 		return -ENOIOCTLCMD;
1141 	}
1142 }
1143 
1144 
1145 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1146 {
1147 	struct net_device *dev = ptr;
1148 	struct net *net = dev_net(dev);
1149 	struct vif_device *v;
1150 	int ct;
1151 
1152 	if (!net_eq(dev_net(dev), net))
1153 		return NOTIFY_DONE;
1154 
1155 	if (event != NETDEV_UNREGISTER)
1156 		return NOTIFY_DONE;
1157 	v = &net->ipv4.vif_table[0];
1158 	for (ct = 0; ct < net->ipv4.maxvif; ct++, v++) {
1159 		if (v->dev == dev)
1160 			vif_delete(net, ct, 1);
1161 	}
1162 	return NOTIFY_DONE;
1163 }
1164 
1165 
1166 static struct notifier_block ip_mr_notifier = {
1167 	.notifier_call = ipmr_device_event,
1168 };
1169 
1170 /*
1171  * 	Encapsulate a packet by attaching a valid IPIP header to it.
1172  *	This avoids tunnel drivers and other mess and gives us the speed so
1173  *	important for multicast video.
1174  */
1175 
1176 static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1177 {
1178 	struct iphdr *iph;
1179 	struct iphdr *old_iph = ip_hdr(skb);
1180 
1181 	skb_push(skb, sizeof(struct iphdr));
1182 	skb->transport_header = skb->network_header;
1183 	skb_reset_network_header(skb);
1184 	iph = ip_hdr(skb);
1185 
1186 	iph->version	= 	4;
1187 	iph->tos	=	old_iph->tos;
1188 	iph->ttl	=	old_iph->ttl;
1189 	iph->frag_off	=	0;
1190 	iph->daddr	=	daddr;
1191 	iph->saddr	=	saddr;
1192 	iph->protocol	=	IPPROTO_IPIP;
1193 	iph->ihl	=	5;
1194 	iph->tot_len	=	htons(skb->len);
1195 	ip_select_ident(iph, skb_dst(skb), NULL);
1196 	ip_send_check(iph);
1197 
1198 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1199 	nf_reset(skb);
1200 }
1201 
1202 static inline int ipmr_forward_finish(struct sk_buff *skb)
1203 {
1204 	struct ip_options * opt	= &(IPCB(skb)->opt);
1205 
1206 	IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
1207 
1208 	if (unlikely(opt->optlen))
1209 		ip_forward_options(skb);
1210 
1211 	return dst_output(skb);
1212 }
1213 
1214 /*
1215  *	Processing handlers for ipmr_forward
1216  */
1217 
1218 static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1219 {
1220 	struct net *net = mfc_net(c);
1221 	const struct iphdr *iph = ip_hdr(skb);
1222 	struct vif_device *vif = &net->ipv4.vif_table[vifi];
1223 	struct net_device *dev;
1224 	struct rtable *rt;
1225 	int    encap = 0;
1226 
1227 	if (vif->dev == NULL)
1228 		goto out_free;
1229 
1230 #ifdef CONFIG_IP_PIMSM
1231 	if (vif->flags & VIFF_REGISTER) {
1232 		vif->pkt_out++;
1233 		vif->bytes_out += skb->len;
1234 		vif->dev->stats.tx_bytes += skb->len;
1235 		vif->dev->stats.tx_packets++;
1236 		ipmr_cache_report(net, skb, vifi, IGMPMSG_WHOLEPKT);
1237 		goto out_free;
1238 	}
1239 #endif
1240 
1241 	if (vif->flags&VIFF_TUNNEL) {
1242 		struct flowi fl = { .oif = vif->link,
1243 				    .nl_u = { .ip4_u =
1244 					      { .daddr = vif->remote,
1245 						.saddr = vif->local,
1246 						.tos = RT_TOS(iph->tos) } },
1247 				    .proto = IPPROTO_IPIP };
1248 		if (ip_route_output_key(net, &rt, &fl))
1249 			goto out_free;
1250 		encap = sizeof(struct iphdr);
1251 	} else {
1252 		struct flowi fl = { .oif = vif->link,
1253 				    .nl_u = { .ip4_u =
1254 					      { .daddr = iph->daddr,
1255 						.tos = RT_TOS(iph->tos) } },
1256 				    .proto = IPPROTO_IPIP };
1257 		if (ip_route_output_key(net, &rt, &fl))
1258 			goto out_free;
1259 	}
1260 
1261 	dev = rt->u.dst.dev;
1262 
1263 	if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1264 		/* Do not fragment multicasts. Alas, IPv4 does not
1265 		   allow to send ICMP, so that packets will disappear
1266 		   to blackhole.
1267 		 */
1268 
1269 		IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
1270 		ip_rt_put(rt);
1271 		goto out_free;
1272 	}
1273 
1274 	encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1275 
1276 	if (skb_cow(skb, encap)) {
1277 		ip_rt_put(rt);
1278 		goto out_free;
1279 	}
1280 
1281 	vif->pkt_out++;
1282 	vif->bytes_out += skb->len;
1283 
1284 	skb_dst_drop(skb);
1285 	skb_dst_set(skb, &rt->u.dst);
1286 	ip_decrease_ttl(ip_hdr(skb));
1287 
1288 	/* FIXME: forward and output firewalls used to be called here.
1289 	 * What do we do with netfilter? -- RR */
1290 	if (vif->flags & VIFF_TUNNEL) {
1291 		ip_encap(skb, vif->local, vif->remote);
1292 		/* FIXME: extra output firewall step used to be here. --RR */
1293 		vif->dev->stats.tx_packets++;
1294 		vif->dev->stats.tx_bytes += skb->len;
1295 	}
1296 
1297 	IPCB(skb)->flags |= IPSKB_FORWARDED;
1298 
1299 	/*
1300 	 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1301 	 * not only before forwarding, but after forwarding on all output
1302 	 * interfaces. It is clear, if mrouter runs a multicasting
1303 	 * program, it should receive packets not depending to what interface
1304 	 * program is joined.
1305 	 * If we will not make it, the program will have to join on all
1306 	 * interfaces. On the other hand, multihoming host (or router, but
1307 	 * not mrouter) cannot join to more than one interface - it will
1308 	 * result in receiving multiple packets.
1309 	 */
1310 	NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, dev,
1311 		ipmr_forward_finish);
1312 	return;
1313 
1314 out_free:
1315 	kfree_skb(skb);
1316 	return;
1317 }
1318 
1319 static int ipmr_find_vif(struct net_device *dev)
1320 {
1321 	struct net *net = dev_net(dev);
1322 	int ct;
1323 	for (ct = net->ipv4.maxvif-1; ct >= 0; ct--) {
1324 		if (net->ipv4.vif_table[ct].dev == dev)
1325 			break;
1326 	}
1327 	return ct;
1328 }
1329 
1330 /* "local" means that we should preserve one skb (for local delivery) */
1331 
1332 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
1333 {
1334 	int psend = -1;
1335 	int vif, ct;
1336 	struct net *net = mfc_net(cache);
1337 
1338 	vif = cache->mfc_parent;
1339 	cache->mfc_un.res.pkt++;
1340 	cache->mfc_un.res.bytes += skb->len;
1341 
1342 	/*
1343 	 * Wrong interface: drop packet and (maybe) send PIM assert.
1344 	 */
1345 	if (net->ipv4.vif_table[vif].dev != skb->dev) {
1346 		int true_vifi;
1347 
1348 		if (skb_rtable(skb)->fl.iif == 0) {
1349 			/* It is our own packet, looped back.
1350 			   Very complicated situation...
1351 
1352 			   The best workaround until routing daemons will be
1353 			   fixed is not to redistribute packet, if it was
1354 			   send through wrong interface. It means, that
1355 			   multicast applications WILL NOT work for
1356 			   (S,G), which have default multicast route pointing
1357 			   to wrong oif. In any case, it is not a good
1358 			   idea to use multicasting applications on router.
1359 			 */
1360 			goto dont_forward;
1361 		}
1362 
1363 		cache->mfc_un.res.wrong_if++;
1364 		true_vifi = ipmr_find_vif(skb->dev);
1365 
1366 		if (true_vifi >= 0 && net->ipv4.mroute_do_assert &&
1367 		    /* pimsm uses asserts, when switching from RPT to SPT,
1368 		       so that we cannot check that packet arrived on an oif.
1369 		       It is bad, but otherwise we would need to move pretty
1370 		       large chunk of pimd to kernel. Ough... --ANK
1371 		     */
1372 		    (net->ipv4.mroute_do_pim ||
1373 		     cache->mfc_un.res.ttls[true_vifi] < 255) &&
1374 		    time_after(jiffies,
1375 			       cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1376 			cache->mfc_un.res.last_assert = jiffies;
1377 			ipmr_cache_report(net, skb, true_vifi, IGMPMSG_WRONGVIF);
1378 		}
1379 		goto dont_forward;
1380 	}
1381 
1382 	net->ipv4.vif_table[vif].pkt_in++;
1383 	net->ipv4.vif_table[vif].bytes_in += skb->len;
1384 
1385 	/*
1386 	 *	Forward the frame
1387 	 */
1388 	for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
1389 		if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
1390 			if (psend != -1) {
1391 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1392 				if (skb2)
1393 					ipmr_queue_xmit(skb2, cache, psend);
1394 			}
1395 			psend = ct;
1396 		}
1397 	}
1398 	if (psend != -1) {
1399 		if (local) {
1400 			struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1401 			if (skb2)
1402 				ipmr_queue_xmit(skb2, cache, psend);
1403 		} else {
1404 			ipmr_queue_xmit(skb, cache, psend);
1405 			return 0;
1406 		}
1407 	}
1408 
1409 dont_forward:
1410 	if (!local)
1411 		kfree_skb(skb);
1412 	return 0;
1413 }
1414 
1415 
1416 /*
1417  *	Multicast packets for forwarding arrive here
1418  */
1419 
1420 int ip_mr_input(struct sk_buff *skb)
1421 {
1422 	struct mfc_cache *cache;
1423 	struct net *net = dev_net(skb->dev);
1424 	int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
1425 
1426 	/* Packet is looped back after forward, it should not be
1427 	   forwarded second time, but still can be delivered locally.
1428 	 */
1429 	if (IPCB(skb)->flags&IPSKB_FORWARDED)
1430 		goto dont_forward;
1431 
1432 	if (!local) {
1433 		    if (IPCB(skb)->opt.router_alert) {
1434 			    if (ip_call_ra_chain(skb))
1435 				    return 0;
1436 		    } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
1437 			    /* IGMPv1 (and broken IGMPv2 implementations sort of
1438 			       Cisco IOS <= 11.2(8)) do not put router alert
1439 			       option to IGMP packets destined to routable
1440 			       groups. It is very bad, because it means
1441 			       that we can forward NO IGMP messages.
1442 			     */
1443 			    read_lock(&mrt_lock);
1444 			    if (net->ipv4.mroute_sk) {
1445 				    nf_reset(skb);
1446 				    raw_rcv(net->ipv4.mroute_sk, skb);
1447 				    read_unlock(&mrt_lock);
1448 				    return 0;
1449 			    }
1450 			    read_unlock(&mrt_lock);
1451 		    }
1452 	}
1453 
1454 	read_lock(&mrt_lock);
1455 	cache = ipmr_cache_find(net, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1456 
1457 	/*
1458 	 *	No usable cache entry
1459 	 */
1460 	if (cache == NULL) {
1461 		int vif;
1462 
1463 		if (local) {
1464 			struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1465 			ip_local_deliver(skb);
1466 			if (skb2 == NULL) {
1467 				read_unlock(&mrt_lock);
1468 				return -ENOBUFS;
1469 			}
1470 			skb = skb2;
1471 		}
1472 
1473 		vif = ipmr_find_vif(skb->dev);
1474 		if (vif >= 0) {
1475 			int err = ipmr_cache_unresolved(net, vif, skb);
1476 			read_unlock(&mrt_lock);
1477 
1478 			return err;
1479 		}
1480 		read_unlock(&mrt_lock);
1481 		kfree_skb(skb);
1482 		return -ENODEV;
1483 	}
1484 
1485 	ip_mr_forward(skb, cache, local);
1486 
1487 	read_unlock(&mrt_lock);
1488 
1489 	if (local)
1490 		return ip_local_deliver(skb);
1491 
1492 	return 0;
1493 
1494 dont_forward:
1495 	if (local)
1496 		return ip_local_deliver(skb);
1497 	kfree_skb(skb);
1498 	return 0;
1499 }
1500 
1501 #ifdef CONFIG_IP_PIMSM
1502 static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen)
1503 {
1504 	struct net_device *reg_dev = NULL;
1505 	struct iphdr *encap;
1506 	struct net *net = dev_net(skb->dev);
1507 
1508 	encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
1509 	/*
1510 	   Check that:
1511 	   a. packet is really destinted to a multicast group
1512 	   b. packet is not a NULL-REGISTER
1513 	   c. packet is not truncated
1514 	 */
1515 	if (!ipv4_is_multicast(encap->daddr) ||
1516 	    encap->tot_len == 0 ||
1517 	    ntohs(encap->tot_len) + pimlen > skb->len)
1518 		return 1;
1519 
1520 	read_lock(&mrt_lock);
1521 	if (net->ipv4.mroute_reg_vif_num >= 0)
1522 		reg_dev = net->ipv4.vif_table[net->ipv4.mroute_reg_vif_num].dev;
1523 	if (reg_dev)
1524 		dev_hold(reg_dev);
1525 	read_unlock(&mrt_lock);
1526 
1527 	if (reg_dev == NULL)
1528 		return 1;
1529 
1530 	skb->mac_header = skb->network_header;
1531 	skb_pull(skb, (u8*)encap - skb->data);
1532 	skb_reset_network_header(skb);
1533 	skb->dev = reg_dev;
1534 	skb->protocol = htons(ETH_P_IP);
1535 	skb->ip_summed = 0;
1536 	skb->pkt_type = PACKET_HOST;
1537 	skb_dst_drop(skb);
1538 	reg_dev->stats.rx_bytes += skb->len;
1539 	reg_dev->stats.rx_packets++;
1540 	nf_reset(skb);
1541 	netif_rx(skb);
1542 	dev_put(reg_dev);
1543 
1544 	return 0;
1545 }
1546 #endif
1547 
1548 #ifdef CONFIG_IP_PIMSM_V1
1549 /*
1550  * Handle IGMP messages of PIMv1
1551  */
1552 
1553 int pim_rcv_v1(struct sk_buff * skb)
1554 {
1555 	struct igmphdr *pim;
1556 	struct net *net = dev_net(skb->dev);
1557 
1558 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1559 		goto drop;
1560 
1561 	pim = igmp_hdr(skb);
1562 
1563 	if (!net->ipv4.mroute_do_pim ||
1564 	    pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1565 		goto drop;
1566 
1567 	if (__pim_rcv(skb, sizeof(*pim))) {
1568 drop:
1569 		kfree_skb(skb);
1570 	}
1571 	return 0;
1572 }
1573 #endif
1574 
1575 #ifdef CONFIG_IP_PIMSM_V2
1576 static int pim_rcv(struct sk_buff * skb)
1577 {
1578 	struct pimreghdr *pim;
1579 
1580 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1581 		goto drop;
1582 
1583 	pim = (struct pimreghdr *)skb_transport_header(skb);
1584 	if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1585 	    (pim->flags&PIM_NULL_REGISTER) ||
1586 	    (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
1587 	     csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1588 		goto drop;
1589 
1590 	if (__pim_rcv(skb, sizeof(*pim))) {
1591 drop:
1592 		kfree_skb(skb);
1593 	}
1594 	return 0;
1595 }
1596 #endif
1597 
1598 static int
1599 ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1600 {
1601 	int ct;
1602 	struct rtnexthop *nhp;
1603 	struct net *net = mfc_net(c);
1604 	struct net_device *dev = net->ipv4.vif_table[c->mfc_parent].dev;
1605 	u8 *b = skb_tail_pointer(skb);
1606 	struct rtattr *mp_head;
1607 
1608 	if (dev)
1609 		RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1610 
1611 	mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1612 
1613 	for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1614 		if (c->mfc_un.res.ttls[ct] < 255) {
1615 			if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1616 				goto rtattr_failure;
1617 			nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1618 			nhp->rtnh_flags = 0;
1619 			nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1620 			nhp->rtnh_ifindex = net->ipv4.vif_table[ct].dev->ifindex;
1621 			nhp->rtnh_len = sizeof(*nhp);
1622 		}
1623 	}
1624 	mp_head->rta_type = RTA_MULTIPATH;
1625 	mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1626 	rtm->rtm_type = RTN_MULTICAST;
1627 	return 1;
1628 
1629 rtattr_failure:
1630 	nlmsg_trim(skb, b);
1631 	return -EMSGSIZE;
1632 }
1633 
1634 int ipmr_get_route(struct net *net,
1635 		   struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1636 {
1637 	int err;
1638 	struct mfc_cache *cache;
1639 	struct rtable *rt = skb_rtable(skb);
1640 
1641 	read_lock(&mrt_lock);
1642 	cache = ipmr_cache_find(net, rt->rt_src, rt->rt_dst);
1643 
1644 	if (cache == NULL) {
1645 		struct sk_buff *skb2;
1646 		struct iphdr *iph;
1647 		struct net_device *dev;
1648 		int vif;
1649 
1650 		if (nowait) {
1651 			read_unlock(&mrt_lock);
1652 			return -EAGAIN;
1653 		}
1654 
1655 		dev = skb->dev;
1656 		if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
1657 			read_unlock(&mrt_lock);
1658 			return -ENODEV;
1659 		}
1660 		skb2 = skb_clone(skb, GFP_ATOMIC);
1661 		if (!skb2) {
1662 			read_unlock(&mrt_lock);
1663 			return -ENOMEM;
1664 		}
1665 
1666 		skb_push(skb2, sizeof(struct iphdr));
1667 		skb_reset_network_header(skb2);
1668 		iph = ip_hdr(skb2);
1669 		iph->ihl = sizeof(struct iphdr) >> 2;
1670 		iph->saddr = rt->rt_src;
1671 		iph->daddr = rt->rt_dst;
1672 		iph->version = 0;
1673 		err = ipmr_cache_unresolved(net, vif, skb2);
1674 		read_unlock(&mrt_lock);
1675 		return err;
1676 	}
1677 
1678 	if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1679 		cache->mfc_flags |= MFC_NOTIFY;
1680 	err = ipmr_fill_mroute(skb, cache, rtm);
1681 	read_unlock(&mrt_lock);
1682 	return err;
1683 }
1684 
1685 #ifdef CONFIG_PROC_FS
1686 /*
1687  *	The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1688  */
1689 struct ipmr_vif_iter {
1690 	struct seq_net_private p;
1691 	int ct;
1692 };
1693 
1694 static struct vif_device *ipmr_vif_seq_idx(struct net *net,
1695 					   struct ipmr_vif_iter *iter,
1696 					   loff_t pos)
1697 {
1698 	for (iter->ct = 0; iter->ct < net->ipv4.maxvif; ++iter->ct) {
1699 		if (!VIF_EXISTS(net, iter->ct))
1700 			continue;
1701 		if (pos-- == 0)
1702 			return &net->ipv4.vif_table[iter->ct];
1703 	}
1704 	return NULL;
1705 }
1706 
1707 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
1708 	__acquires(mrt_lock)
1709 {
1710 	struct net *net = seq_file_net(seq);
1711 
1712 	read_lock(&mrt_lock);
1713 	return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
1714 		: SEQ_START_TOKEN;
1715 }
1716 
1717 static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1718 {
1719 	struct ipmr_vif_iter *iter = seq->private;
1720 	struct net *net = seq_file_net(seq);
1721 
1722 	++*pos;
1723 	if (v == SEQ_START_TOKEN)
1724 		return ipmr_vif_seq_idx(net, iter, 0);
1725 
1726 	while (++iter->ct < net->ipv4.maxvif) {
1727 		if (!VIF_EXISTS(net, iter->ct))
1728 			continue;
1729 		return &net->ipv4.vif_table[iter->ct];
1730 	}
1731 	return NULL;
1732 }
1733 
1734 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
1735 	__releases(mrt_lock)
1736 {
1737 	read_unlock(&mrt_lock);
1738 }
1739 
1740 static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1741 {
1742 	struct net *net = seq_file_net(seq);
1743 
1744 	if (v == SEQ_START_TOKEN) {
1745 		seq_puts(seq,
1746 			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags Local    Remote\n");
1747 	} else {
1748 		const struct vif_device *vif = v;
1749 		const char *name =  vif->dev ? vif->dev->name : "none";
1750 
1751 		seq_printf(seq,
1752 			   "%2Zd %-10s %8ld %7ld  %8ld %7ld %05X %08X %08X\n",
1753 			   vif - net->ipv4.vif_table,
1754 			   name, vif->bytes_in, vif->pkt_in,
1755 			   vif->bytes_out, vif->pkt_out,
1756 			   vif->flags, vif->local, vif->remote);
1757 	}
1758 	return 0;
1759 }
1760 
1761 static const struct seq_operations ipmr_vif_seq_ops = {
1762 	.start = ipmr_vif_seq_start,
1763 	.next  = ipmr_vif_seq_next,
1764 	.stop  = ipmr_vif_seq_stop,
1765 	.show  = ipmr_vif_seq_show,
1766 };
1767 
1768 static int ipmr_vif_open(struct inode *inode, struct file *file)
1769 {
1770 	return seq_open_net(inode, file, &ipmr_vif_seq_ops,
1771 			    sizeof(struct ipmr_vif_iter));
1772 }
1773 
1774 static const struct file_operations ipmr_vif_fops = {
1775 	.owner	 = THIS_MODULE,
1776 	.open    = ipmr_vif_open,
1777 	.read    = seq_read,
1778 	.llseek  = seq_lseek,
1779 	.release = seq_release_net,
1780 };
1781 
1782 struct ipmr_mfc_iter {
1783 	struct seq_net_private p;
1784 	struct mfc_cache **cache;
1785 	int ct;
1786 };
1787 
1788 
1789 static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
1790 					  struct ipmr_mfc_iter *it, loff_t pos)
1791 {
1792 	struct mfc_cache *mfc;
1793 
1794 	it->cache = net->ipv4.mfc_cache_array;
1795 	read_lock(&mrt_lock);
1796 	for (it->ct = 0; it->ct < MFC_LINES; it->ct++)
1797 		for (mfc = net->ipv4.mfc_cache_array[it->ct];
1798 		     mfc; mfc = mfc->next)
1799 			if (pos-- == 0)
1800 				return mfc;
1801 	read_unlock(&mrt_lock);
1802 
1803 	it->cache = &mfc_unres_queue;
1804 	spin_lock_bh(&mfc_unres_lock);
1805 	for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
1806 		if (net_eq(mfc_net(mfc), net) &&
1807 		    pos-- == 0)
1808 			return mfc;
1809 	spin_unlock_bh(&mfc_unres_lock);
1810 
1811 	it->cache = NULL;
1812 	return NULL;
1813 }
1814 
1815 
1816 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
1817 {
1818 	struct ipmr_mfc_iter *it = seq->private;
1819 	struct net *net = seq_file_net(seq);
1820 
1821 	it->cache = NULL;
1822 	it->ct = 0;
1823 	return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
1824 		: SEQ_START_TOKEN;
1825 }
1826 
1827 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1828 {
1829 	struct mfc_cache *mfc = v;
1830 	struct ipmr_mfc_iter *it = seq->private;
1831 	struct net *net = seq_file_net(seq);
1832 
1833 	++*pos;
1834 
1835 	if (v == SEQ_START_TOKEN)
1836 		return ipmr_mfc_seq_idx(net, seq->private, 0);
1837 
1838 	if (mfc->next)
1839 		return mfc->next;
1840 
1841 	if (it->cache == &mfc_unres_queue)
1842 		goto end_of_list;
1843 
1844 	BUG_ON(it->cache != net->ipv4.mfc_cache_array);
1845 
1846 	while (++it->ct < MFC_LINES) {
1847 		mfc = net->ipv4.mfc_cache_array[it->ct];
1848 		if (mfc)
1849 			return mfc;
1850 	}
1851 
1852 	/* exhausted cache_array, show unresolved */
1853 	read_unlock(&mrt_lock);
1854 	it->cache = &mfc_unres_queue;
1855 	it->ct = 0;
1856 
1857 	spin_lock_bh(&mfc_unres_lock);
1858 	mfc = mfc_unres_queue;
1859 	while (mfc && !net_eq(mfc_net(mfc), net))
1860 		mfc = mfc->next;
1861 	if (mfc)
1862 		return mfc;
1863 
1864  end_of_list:
1865 	spin_unlock_bh(&mfc_unres_lock);
1866 	it->cache = NULL;
1867 
1868 	return NULL;
1869 }
1870 
1871 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
1872 {
1873 	struct ipmr_mfc_iter *it = seq->private;
1874 	struct net *net = seq_file_net(seq);
1875 
1876 	if (it->cache == &mfc_unres_queue)
1877 		spin_unlock_bh(&mfc_unres_lock);
1878 	else if (it->cache == net->ipv4.mfc_cache_array)
1879 		read_unlock(&mrt_lock);
1880 }
1881 
1882 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1883 {
1884 	int n;
1885 	struct net *net = seq_file_net(seq);
1886 
1887 	if (v == SEQ_START_TOKEN) {
1888 		seq_puts(seq,
1889 		 "Group    Origin   Iif     Pkts    Bytes    Wrong Oifs\n");
1890 	} else {
1891 		const struct mfc_cache *mfc = v;
1892 		const struct ipmr_mfc_iter *it = seq->private;
1893 
1894 		seq_printf(seq, "%08lX %08lX %-3hd",
1895 			   (unsigned long) mfc->mfc_mcastgrp,
1896 			   (unsigned long) mfc->mfc_origin,
1897 			   mfc->mfc_parent);
1898 
1899 		if (it->cache != &mfc_unres_queue) {
1900 			seq_printf(seq, " %8lu %8lu %8lu",
1901 				   mfc->mfc_un.res.pkt,
1902 				   mfc->mfc_un.res.bytes,
1903 				   mfc->mfc_un.res.wrong_if);
1904 			for (n = mfc->mfc_un.res.minvif;
1905 			     n < mfc->mfc_un.res.maxvif; n++ ) {
1906 				if (VIF_EXISTS(net, n) &&
1907 				    mfc->mfc_un.res.ttls[n] < 255)
1908 					seq_printf(seq,
1909 					   " %2d:%-3d",
1910 					   n, mfc->mfc_un.res.ttls[n]);
1911 			}
1912 		} else {
1913 			/* unresolved mfc_caches don't contain
1914 			 * pkt, bytes and wrong_if values
1915 			 */
1916 			seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
1917 		}
1918 		seq_putc(seq, '\n');
1919 	}
1920 	return 0;
1921 }
1922 
1923 static const struct seq_operations ipmr_mfc_seq_ops = {
1924 	.start = ipmr_mfc_seq_start,
1925 	.next  = ipmr_mfc_seq_next,
1926 	.stop  = ipmr_mfc_seq_stop,
1927 	.show  = ipmr_mfc_seq_show,
1928 };
1929 
1930 static int ipmr_mfc_open(struct inode *inode, struct file *file)
1931 {
1932 	return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
1933 			    sizeof(struct ipmr_mfc_iter));
1934 }
1935 
1936 static const struct file_operations ipmr_mfc_fops = {
1937 	.owner	 = THIS_MODULE,
1938 	.open    = ipmr_mfc_open,
1939 	.read    = seq_read,
1940 	.llseek  = seq_lseek,
1941 	.release = seq_release_net,
1942 };
1943 #endif
1944 
1945 #ifdef CONFIG_IP_PIMSM_V2
1946 static const struct net_protocol pim_protocol = {
1947 	.handler	=	pim_rcv,
1948 	.netns_ok	=	1,
1949 };
1950 #endif
1951 
1952 
1953 /*
1954  *	Setup for IP multicast routing
1955  */
1956 static int __net_init ipmr_net_init(struct net *net)
1957 {
1958 	int err = 0;
1959 
1960 	net->ipv4.vif_table = kcalloc(MAXVIFS, sizeof(struct vif_device),
1961 				      GFP_KERNEL);
1962 	if (!net->ipv4.vif_table) {
1963 		err = -ENOMEM;
1964 		goto fail;
1965 	}
1966 
1967 	/* Forwarding cache */
1968 	net->ipv4.mfc_cache_array = kcalloc(MFC_LINES,
1969 					    sizeof(struct mfc_cache *),
1970 					    GFP_KERNEL);
1971 	if (!net->ipv4.mfc_cache_array) {
1972 		err = -ENOMEM;
1973 		goto fail_mfc_cache;
1974 	}
1975 
1976 #ifdef CONFIG_IP_PIMSM
1977 	net->ipv4.mroute_reg_vif_num = -1;
1978 #endif
1979 
1980 #ifdef CONFIG_PROC_FS
1981 	err = -ENOMEM;
1982 	if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops))
1983 		goto proc_vif_fail;
1984 	if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
1985 		goto proc_cache_fail;
1986 #endif
1987 	return 0;
1988 
1989 #ifdef CONFIG_PROC_FS
1990 proc_cache_fail:
1991 	proc_net_remove(net, "ip_mr_vif");
1992 proc_vif_fail:
1993 	kfree(net->ipv4.mfc_cache_array);
1994 #endif
1995 fail_mfc_cache:
1996 	kfree(net->ipv4.vif_table);
1997 fail:
1998 	return err;
1999 }
2000 
2001 static void __net_exit ipmr_net_exit(struct net *net)
2002 {
2003 #ifdef CONFIG_PROC_FS
2004 	proc_net_remove(net, "ip_mr_cache");
2005 	proc_net_remove(net, "ip_mr_vif");
2006 #endif
2007 	kfree(net->ipv4.mfc_cache_array);
2008 	kfree(net->ipv4.vif_table);
2009 }
2010 
2011 static struct pernet_operations ipmr_net_ops = {
2012 	.init = ipmr_net_init,
2013 	.exit = ipmr_net_exit,
2014 };
2015 
2016 int __init ip_mr_init(void)
2017 {
2018 	int err;
2019 
2020 	mrt_cachep = kmem_cache_create("ip_mrt_cache",
2021 				       sizeof(struct mfc_cache),
2022 				       0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
2023 				       NULL);
2024 	if (!mrt_cachep)
2025 		return -ENOMEM;
2026 
2027 	err = register_pernet_subsys(&ipmr_net_ops);
2028 	if (err)
2029 		goto reg_pernet_fail;
2030 
2031 	setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
2032 	err = register_netdevice_notifier(&ip_mr_notifier);
2033 	if (err)
2034 		goto reg_notif_fail;
2035 #ifdef CONFIG_IP_PIMSM_V2
2036 	if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) {
2037 		printk(KERN_ERR "ip_mr_init: can't add PIM protocol\n");
2038 		err = -EAGAIN;
2039 		goto add_proto_fail;
2040 	}
2041 #endif
2042 	return 0;
2043 
2044 #ifdef CONFIG_IP_PIMSM_V2
2045 add_proto_fail:
2046 	unregister_netdevice_notifier(&ip_mr_notifier);
2047 #endif
2048 reg_notif_fail:
2049 	del_timer(&ipmr_expire_timer);
2050 	unregister_pernet_subsys(&ipmr_net_ops);
2051 reg_pernet_fail:
2052 	kmem_cache_destroy(mrt_cachep);
2053 	return err;
2054 }
2055