xref: /openbmc/linux/net/ipv4/ipmr.c (revision b6dcefde)
1 /*
2  *	IP multicast routing support for mrouted 3.6/3.8
3  *
4  *		(c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
5  *	  Linux Consultancy and Custom Driver Development
6  *
7  *	This program is free software; you can redistribute it and/or
8  *	modify it under the terms of the GNU General Public License
9  *	as published by the Free Software Foundation; either version
10  *	2 of the License, or (at your option) any later version.
11  *
12  *	Fixes:
13  *	Michael Chastain	:	Incorrect size of copying.
14  *	Alan Cox		:	Added the cache manager code
15  *	Alan Cox		:	Fixed the clone/copy bug and device race.
16  *	Mike McLagan		:	Routing by source
17  *	Malcolm Beattie		:	Buffer handling fixes.
18  *	Alexey Kuznetsov	:	Double buffer free and other fixes.
19  *	SVR Anand		:	Fixed several multicast bugs and problems.
20  *	Alexey Kuznetsov	:	Status, optimisations and more.
21  *	Brad Parker		:	Better behaviour on mrouted upcall
22  *					overflow.
23  *      Carlos Picoto           :       PIMv1 Support
24  *	Pavlin Ivanov Radoslavov:	PIMv2 Registers must checksum only PIM header
25  *					Relax this requrement to work with older peers.
26  *
27  */
28 
29 #include <asm/system.h>
30 #include <asm/uaccess.h>
31 #include <linux/types.h>
32 #include <linux/capability.h>
33 #include <linux/errno.h>
34 #include <linux/timer.h>
35 #include <linux/mm.h>
36 #include <linux/kernel.h>
37 #include <linux/fcntl.h>
38 #include <linux/stat.h>
39 #include <linux/socket.h>
40 #include <linux/in.h>
41 #include <linux/inet.h>
42 #include <linux/netdevice.h>
43 #include <linux/inetdevice.h>
44 #include <linux/igmp.h>
45 #include <linux/proc_fs.h>
46 #include <linux/seq_file.h>
47 #include <linux/mroute.h>
48 #include <linux/init.h>
49 #include <linux/if_ether.h>
50 #include <net/net_namespace.h>
51 #include <net/ip.h>
52 #include <net/protocol.h>
53 #include <linux/skbuff.h>
54 #include <net/route.h>
55 #include <net/sock.h>
56 #include <net/icmp.h>
57 #include <net/udp.h>
58 #include <net/raw.h>
59 #include <linux/notifier.h>
60 #include <linux/if_arp.h>
61 #include <linux/netfilter_ipv4.h>
62 #include <net/ipip.h>
63 #include <net/checksum.h>
64 #include <net/netlink.h>
65 
66 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
67 #define CONFIG_IP_PIMSM	1
68 #endif
69 
70 /* Big lock, protecting vif table, mrt cache and mroute socket state.
71    Note that the changes are semaphored via rtnl_lock.
72  */
73 
74 static DEFINE_RWLOCK(mrt_lock);
75 
76 /*
77  *	Multicast router control variables
78  */
79 
80 #define VIF_EXISTS(_net, _idx) ((_net)->ipv4.vif_table[_idx].dev != NULL)
81 
82 static struct mfc_cache *mfc_unres_queue;		/* Queue of unresolved entries */
83 
84 /* Special spinlock for queue of unresolved entries */
85 static DEFINE_SPINLOCK(mfc_unres_lock);
86 
87 /* We return to original Alan's scheme. Hash table of resolved
88    entries is changed only in process context and protected
89    with weak lock mrt_lock. Queue of unresolved entries is protected
90    with strong spinlock mfc_unres_lock.
91 
92    In this case data path is free of exclusive locks at all.
93  */
94 
95 static struct kmem_cache *mrt_cachep __read_mostly;
96 
97 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
98 static int ipmr_cache_report(struct net *net,
99 			     struct sk_buff *pkt, vifi_t vifi, int assert);
100 static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
101 
102 static struct timer_list ipmr_expire_timer;
103 
104 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
105 
106 static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
107 {
108 	struct net *net = dev_net(dev);
109 
110 	dev_close(dev);
111 
112 	dev = __dev_get_by_name(net, "tunl0");
113 	if (dev) {
114 		const struct net_device_ops *ops = dev->netdev_ops;
115 		struct ifreq ifr;
116 		struct ip_tunnel_parm p;
117 
118 		memset(&p, 0, sizeof(p));
119 		p.iph.daddr = v->vifc_rmt_addr.s_addr;
120 		p.iph.saddr = v->vifc_lcl_addr.s_addr;
121 		p.iph.version = 4;
122 		p.iph.ihl = 5;
123 		p.iph.protocol = IPPROTO_IPIP;
124 		sprintf(p.name, "dvmrp%d", v->vifc_vifi);
125 		ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
126 
127 		if (ops->ndo_do_ioctl) {
128 			mm_segment_t oldfs = get_fs();
129 
130 			set_fs(KERNEL_DS);
131 			ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
132 			set_fs(oldfs);
133 		}
134 	}
135 }
136 
137 static
138 struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
139 {
140 	struct net_device  *dev;
141 
142 	dev = __dev_get_by_name(net, "tunl0");
143 
144 	if (dev) {
145 		const struct net_device_ops *ops = dev->netdev_ops;
146 		int err;
147 		struct ifreq ifr;
148 		struct ip_tunnel_parm p;
149 		struct in_device  *in_dev;
150 
151 		memset(&p, 0, sizeof(p));
152 		p.iph.daddr = v->vifc_rmt_addr.s_addr;
153 		p.iph.saddr = v->vifc_lcl_addr.s_addr;
154 		p.iph.version = 4;
155 		p.iph.ihl = 5;
156 		p.iph.protocol = IPPROTO_IPIP;
157 		sprintf(p.name, "dvmrp%d", v->vifc_vifi);
158 		ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
159 
160 		if (ops->ndo_do_ioctl) {
161 			mm_segment_t oldfs = get_fs();
162 
163 			set_fs(KERNEL_DS);
164 			err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
165 			set_fs(oldfs);
166 		} else
167 			err = -EOPNOTSUPP;
168 
169 		dev = NULL;
170 
171 		if (err == 0 &&
172 		    (dev = __dev_get_by_name(net, p.name)) != NULL) {
173 			dev->flags |= IFF_MULTICAST;
174 
175 			in_dev = __in_dev_get_rtnl(dev);
176 			if (in_dev == NULL)
177 				goto failure;
178 
179 			ipv4_devconf_setall(in_dev);
180 			IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
181 
182 			if (dev_open(dev))
183 				goto failure;
184 			dev_hold(dev);
185 		}
186 	}
187 	return dev;
188 
189 failure:
190 	/* allow the register to be completed before unregistering. */
191 	rtnl_unlock();
192 	rtnl_lock();
193 
194 	unregister_netdevice(dev);
195 	return NULL;
196 }
197 
198 #ifdef CONFIG_IP_PIMSM
199 
200 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
201 {
202 	struct net *net = dev_net(dev);
203 
204 	read_lock(&mrt_lock);
205 	dev->stats.tx_bytes += skb->len;
206 	dev->stats.tx_packets++;
207 	ipmr_cache_report(net, skb, net->ipv4.mroute_reg_vif_num,
208 			  IGMPMSG_WHOLEPKT);
209 	read_unlock(&mrt_lock);
210 	kfree_skb(skb);
211 	return NETDEV_TX_OK;
212 }
213 
214 static const struct net_device_ops reg_vif_netdev_ops = {
215 	.ndo_start_xmit	= reg_vif_xmit,
216 };
217 
218 static void reg_vif_setup(struct net_device *dev)
219 {
220 	dev->type		= ARPHRD_PIMREG;
221 	dev->mtu		= ETH_DATA_LEN - sizeof(struct iphdr) - 8;
222 	dev->flags		= IFF_NOARP;
223 	dev->netdev_ops		= &reg_vif_netdev_ops,
224 	dev->destructor		= free_netdev;
225 	dev->features		|= NETIF_F_NETNS_LOCAL;
226 }
227 
228 static struct net_device *ipmr_reg_vif(struct net *net)
229 {
230 	struct net_device *dev;
231 	struct in_device *in_dev;
232 
233 	dev = alloc_netdev(0, "pimreg", reg_vif_setup);
234 
235 	if (dev == NULL)
236 		return NULL;
237 
238 	dev_net_set(dev, net);
239 
240 	if (register_netdevice(dev)) {
241 		free_netdev(dev);
242 		return NULL;
243 	}
244 	dev->iflink = 0;
245 
246 	rcu_read_lock();
247 	if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
248 		rcu_read_unlock();
249 		goto failure;
250 	}
251 
252 	ipv4_devconf_setall(in_dev);
253 	IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
254 	rcu_read_unlock();
255 
256 	if (dev_open(dev))
257 		goto failure;
258 
259 	dev_hold(dev);
260 
261 	return dev;
262 
263 failure:
264 	/* allow the register to be completed before unregistering. */
265 	rtnl_unlock();
266 	rtnl_lock();
267 
268 	unregister_netdevice(dev);
269 	return NULL;
270 }
271 #endif
272 
273 /*
274  *	Delete a VIF entry
275  *	@notify: Set to 1, if the caller is a notifier_call
276  */
277 
278 static int vif_delete(struct net *net, int vifi, int notify,
279 		      struct list_head *head)
280 {
281 	struct vif_device *v;
282 	struct net_device *dev;
283 	struct in_device *in_dev;
284 
285 	if (vifi < 0 || vifi >= net->ipv4.maxvif)
286 		return -EADDRNOTAVAIL;
287 
288 	v = &net->ipv4.vif_table[vifi];
289 
290 	write_lock_bh(&mrt_lock);
291 	dev = v->dev;
292 	v->dev = NULL;
293 
294 	if (!dev) {
295 		write_unlock_bh(&mrt_lock);
296 		return -EADDRNOTAVAIL;
297 	}
298 
299 #ifdef CONFIG_IP_PIMSM
300 	if (vifi == net->ipv4.mroute_reg_vif_num)
301 		net->ipv4.mroute_reg_vif_num = -1;
302 #endif
303 
304 	if (vifi+1 == net->ipv4.maxvif) {
305 		int tmp;
306 		for (tmp=vifi-1; tmp>=0; tmp--) {
307 			if (VIF_EXISTS(net, tmp))
308 				break;
309 		}
310 		net->ipv4.maxvif = tmp+1;
311 	}
312 
313 	write_unlock_bh(&mrt_lock);
314 
315 	dev_set_allmulti(dev, -1);
316 
317 	if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
318 		IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
319 		ip_rt_multicast_event(in_dev);
320 	}
321 
322 	if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
323 		unregister_netdevice_queue(dev, head);
324 
325 	dev_put(dev);
326 	return 0;
327 }
328 
329 static inline void ipmr_cache_free(struct mfc_cache *c)
330 {
331 	release_net(mfc_net(c));
332 	kmem_cache_free(mrt_cachep, c);
333 }
334 
335 /* Destroy an unresolved cache entry, killing queued skbs
336    and reporting error to netlink readers.
337  */
338 
339 static void ipmr_destroy_unres(struct mfc_cache *c)
340 {
341 	struct sk_buff *skb;
342 	struct nlmsgerr *e;
343 	struct net *net = mfc_net(c);
344 
345 	atomic_dec(&net->ipv4.cache_resolve_queue_len);
346 
347 	while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
348 		if (ip_hdr(skb)->version == 0) {
349 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
350 			nlh->nlmsg_type = NLMSG_ERROR;
351 			nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
352 			skb_trim(skb, nlh->nlmsg_len);
353 			e = NLMSG_DATA(nlh);
354 			e->error = -ETIMEDOUT;
355 			memset(&e->msg, 0, sizeof(e->msg));
356 
357 			rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
358 		} else
359 			kfree_skb(skb);
360 	}
361 
362 	ipmr_cache_free(c);
363 }
364 
365 
366 /* Single timer process for all the unresolved queue. */
367 
368 static void ipmr_expire_process(unsigned long dummy)
369 {
370 	unsigned long now;
371 	unsigned long expires;
372 	struct mfc_cache *c, **cp;
373 
374 	if (!spin_trylock(&mfc_unres_lock)) {
375 		mod_timer(&ipmr_expire_timer, jiffies+HZ/10);
376 		return;
377 	}
378 
379 	if (mfc_unres_queue == NULL)
380 		goto out;
381 
382 	now = jiffies;
383 	expires = 10*HZ;
384 	cp = &mfc_unres_queue;
385 
386 	while ((c=*cp) != NULL) {
387 		if (time_after(c->mfc_un.unres.expires, now)) {
388 			unsigned long interval = c->mfc_un.unres.expires - now;
389 			if (interval < expires)
390 				expires = interval;
391 			cp = &c->next;
392 			continue;
393 		}
394 
395 		*cp = c->next;
396 
397 		ipmr_destroy_unres(c);
398 	}
399 
400 	if (mfc_unres_queue != NULL)
401 		mod_timer(&ipmr_expire_timer, jiffies + expires);
402 
403 out:
404 	spin_unlock(&mfc_unres_lock);
405 }
406 
407 /* Fill oifs list. It is called under write locked mrt_lock. */
408 
409 static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls)
410 {
411 	int vifi;
412 	struct net *net = mfc_net(cache);
413 
414 	cache->mfc_un.res.minvif = MAXVIFS;
415 	cache->mfc_un.res.maxvif = 0;
416 	memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
417 
418 	for (vifi = 0; vifi < net->ipv4.maxvif; vifi++) {
419 		if (VIF_EXISTS(net, vifi) &&
420 		    ttls[vifi] && ttls[vifi] < 255) {
421 			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
422 			if (cache->mfc_un.res.minvif > vifi)
423 				cache->mfc_un.res.minvif = vifi;
424 			if (cache->mfc_un.res.maxvif <= vifi)
425 				cache->mfc_un.res.maxvif = vifi + 1;
426 		}
427 	}
428 }
429 
430 static int vif_add(struct net *net, struct vifctl *vifc, int mrtsock)
431 {
432 	int vifi = vifc->vifc_vifi;
433 	struct vif_device *v = &net->ipv4.vif_table[vifi];
434 	struct net_device *dev;
435 	struct in_device *in_dev;
436 	int err;
437 
438 	/* Is vif busy ? */
439 	if (VIF_EXISTS(net, vifi))
440 		return -EADDRINUSE;
441 
442 	switch (vifc->vifc_flags) {
443 #ifdef CONFIG_IP_PIMSM
444 	case VIFF_REGISTER:
445 		/*
446 		 * Special Purpose VIF in PIM
447 		 * All the packets will be sent to the daemon
448 		 */
449 		if (net->ipv4.mroute_reg_vif_num >= 0)
450 			return -EADDRINUSE;
451 		dev = ipmr_reg_vif(net);
452 		if (!dev)
453 			return -ENOBUFS;
454 		err = dev_set_allmulti(dev, 1);
455 		if (err) {
456 			unregister_netdevice(dev);
457 			dev_put(dev);
458 			return err;
459 		}
460 		break;
461 #endif
462 	case VIFF_TUNNEL:
463 		dev = ipmr_new_tunnel(net, vifc);
464 		if (!dev)
465 			return -ENOBUFS;
466 		err = dev_set_allmulti(dev, 1);
467 		if (err) {
468 			ipmr_del_tunnel(dev, vifc);
469 			dev_put(dev);
470 			return err;
471 		}
472 		break;
473 
474 	case VIFF_USE_IFINDEX:
475 	case 0:
476 		if (vifc->vifc_flags == VIFF_USE_IFINDEX) {
477 			dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex);
478 			if (dev && dev->ip_ptr == NULL) {
479 				dev_put(dev);
480 				return -EADDRNOTAVAIL;
481 			}
482 		} else
483 			dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
484 
485 		if (!dev)
486 			return -EADDRNOTAVAIL;
487 		err = dev_set_allmulti(dev, 1);
488 		if (err) {
489 			dev_put(dev);
490 			return err;
491 		}
492 		break;
493 	default:
494 		return -EINVAL;
495 	}
496 
497 	if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) {
498 		dev_put(dev);
499 		return -EADDRNOTAVAIL;
500 	}
501 	IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
502 	ip_rt_multicast_event(in_dev);
503 
504 	/*
505 	 *	Fill in the VIF structures
506 	 */
507 	v->rate_limit = vifc->vifc_rate_limit;
508 	v->local = vifc->vifc_lcl_addr.s_addr;
509 	v->remote = vifc->vifc_rmt_addr.s_addr;
510 	v->flags = vifc->vifc_flags;
511 	if (!mrtsock)
512 		v->flags |= VIFF_STATIC;
513 	v->threshold = vifc->vifc_threshold;
514 	v->bytes_in = 0;
515 	v->bytes_out = 0;
516 	v->pkt_in = 0;
517 	v->pkt_out = 0;
518 	v->link = dev->ifindex;
519 	if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
520 		v->link = dev->iflink;
521 
522 	/* And finish update writing critical data */
523 	write_lock_bh(&mrt_lock);
524 	v->dev = dev;
525 #ifdef CONFIG_IP_PIMSM
526 	if (v->flags&VIFF_REGISTER)
527 		net->ipv4.mroute_reg_vif_num = vifi;
528 #endif
529 	if (vifi+1 > net->ipv4.maxvif)
530 		net->ipv4.maxvif = vifi+1;
531 	write_unlock_bh(&mrt_lock);
532 	return 0;
533 }
534 
535 static struct mfc_cache *ipmr_cache_find(struct net *net,
536 					 __be32 origin,
537 					 __be32 mcastgrp)
538 {
539 	int line = MFC_HASH(mcastgrp, origin);
540 	struct mfc_cache *c;
541 
542 	for (c = net->ipv4.mfc_cache_array[line]; c; c = c->next) {
543 		if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
544 			break;
545 	}
546 	return c;
547 }
548 
549 /*
550  *	Allocate a multicast cache entry
551  */
552 static struct mfc_cache *ipmr_cache_alloc(struct net *net)
553 {
554 	struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
555 	if (c == NULL)
556 		return NULL;
557 	c->mfc_un.res.minvif = MAXVIFS;
558 	mfc_net_set(c, net);
559 	return c;
560 }
561 
562 static struct mfc_cache *ipmr_cache_alloc_unres(struct net *net)
563 {
564 	struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
565 	if (c == NULL)
566 		return NULL;
567 	skb_queue_head_init(&c->mfc_un.unres.unresolved);
568 	c->mfc_un.unres.expires = jiffies + 10*HZ;
569 	mfc_net_set(c, net);
570 	return c;
571 }
572 
573 /*
574  *	A cache entry has gone into a resolved state from queued
575  */
576 
577 static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
578 {
579 	struct sk_buff *skb;
580 	struct nlmsgerr *e;
581 
582 	/*
583 	 *	Play the pending entries through our router
584 	 */
585 
586 	while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
587 		if (ip_hdr(skb)->version == 0) {
588 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
589 
590 			if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
591 				nlh->nlmsg_len = (skb_tail_pointer(skb) -
592 						  (u8 *)nlh);
593 			} else {
594 				nlh->nlmsg_type = NLMSG_ERROR;
595 				nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
596 				skb_trim(skb, nlh->nlmsg_len);
597 				e = NLMSG_DATA(nlh);
598 				e->error = -EMSGSIZE;
599 				memset(&e->msg, 0, sizeof(e->msg));
600 			}
601 
602 			rtnl_unicast(skb, mfc_net(c), NETLINK_CB(skb).pid);
603 		} else
604 			ip_mr_forward(skb, c, 0);
605 	}
606 }
607 
608 /*
609  *	Bounce a cache query up to mrouted. We could use netlink for this but mrouted
610  *	expects the following bizarre scheme.
611  *
612  *	Called under mrt_lock.
613  */
614 
615 static int ipmr_cache_report(struct net *net,
616 			     struct sk_buff *pkt, vifi_t vifi, int assert)
617 {
618 	struct sk_buff *skb;
619 	const int ihl = ip_hdrlen(pkt);
620 	struct igmphdr *igmp;
621 	struct igmpmsg *msg;
622 	int ret;
623 
624 #ifdef CONFIG_IP_PIMSM
625 	if (assert == IGMPMSG_WHOLEPKT)
626 		skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
627 	else
628 #endif
629 		skb = alloc_skb(128, GFP_ATOMIC);
630 
631 	if (!skb)
632 		return -ENOBUFS;
633 
634 #ifdef CONFIG_IP_PIMSM
635 	if (assert == IGMPMSG_WHOLEPKT) {
636 		/* Ugly, but we have no choice with this interface.
637 		   Duplicate old header, fix ihl, length etc.
638 		   And all this only to mangle msg->im_msgtype and
639 		   to set msg->im_mbz to "mbz" :-)
640 		 */
641 		skb_push(skb, sizeof(struct iphdr));
642 		skb_reset_network_header(skb);
643 		skb_reset_transport_header(skb);
644 		msg = (struct igmpmsg *)skb_network_header(skb);
645 		memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
646 		msg->im_msgtype = IGMPMSG_WHOLEPKT;
647 		msg->im_mbz = 0;
648 		msg->im_vif = net->ipv4.mroute_reg_vif_num;
649 		ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
650 		ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
651 					     sizeof(struct iphdr));
652 	} else
653 #endif
654 	{
655 
656 	/*
657 	 *	Copy the IP header
658 	 */
659 
660 	skb->network_header = skb->tail;
661 	skb_put(skb, ihl);
662 	skb_copy_to_linear_data(skb, pkt->data, ihl);
663 	ip_hdr(skb)->protocol = 0;			/* Flag to the kernel this is a route add */
664 	msg = (struct igmpmsg *)skb_network_header(skb);
665 	msg->im_vif = vifi;
666 	skb_dst_set(skb, dst_clone(skb_dst(pkt)));
667 
668 	/*
669 	 *	Add our header
670 	 */
671 
672 	igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
673 	igmp->type	=
674 	msg->im_msgtype = assert;
675 	igmp->code 	=	0;
676 	ip_hdr(skb)->tot_len = htons(skb->len);			/* Fix the length */
677 	skb->transport_header = skb->network_header;
678 	}
679 
680 	if (net->ipv4.mroute_sk == NULL) {
681 		kfree_skb(skb);
682 		return -EINVAL;
683 	}
684 
685 	/*
686 	 *	Deliver to mrouted
687 	 */
688 	ret = sock_queue_rcv_skb(net->ipv4.mroute_sk, skb);
689 	if (ret < 0) {
690 		if (net_ratelimit())
691 			printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
692 		kfree_skb(skb);
693 	}
694 
695 	return ret;
696 }
697 
698 /*
699  *	Queue a packet for resolution. It gets locked cache entry!
700  */
701 
702 static int
703 ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
704 {
705 	int err;
706 	struct mfc_cache *c;
707 	const struct iphdr *iph = ip_hdr(skb);
708 
709 	spin_lock_bh(&mfc_unres_lock);
710 	for (c=mfc_unres_queue; c; c=c->next) {
711 		if (net_eq(mfc_net(c), net) &&
712 		    c->mfc_mcastgrp == iph->daddr &&
713 		    c->mfc_origin == iph->saddr)
714 			break;
715 	}
716 
717 	if (c == NULL) {
718 		/*
719 		 *	Create a new entry if allowable
720 		 */
721 
722 		if (atomic_read(&net->ipv4.cache_resolve_queue_len) >= 10 ||
723 		    (c = ipmr_cache_alloc_unres(net)) == NULL) {
724 			spin_unlock_bh(&mfc_unres_lock);
725 
726 			kfree_skb(skb);
727 			return -ENOBUFS;
728 		}
729 
730 		/*
731 		 *	Fill in the new cache entry
732 		 */
733 		c->mfc_parent	= -1;
734 		c->mfc_origin	= iph->saddr;
735 		c->mfc_mcastgrp	= iph->daddr;
736 
737 		/*
738 		 *	Reflect first query at mrouted.
739 		 */
740 		err = ipmr_cache_report(net, skb, vifi, IGMPMSG_NOCACHE);
741 		if (err < 0) {
742 			/* If the report failed throw the cache entry
743 			   out - Brad Parker
744 			 */
745 			spin_unlock_bh(&mfc_unres_lock);
746 
747 			ipmr_cache_free(c);
748 			kfree_skb(skb);
749 			return err;
750 		}
751 
752 		atomic_inc(&net->ipv4.cache_resolve_queue_len);
753 		c->next = mfc_unres_queue;
754 		mfc_unres_queue = c;
755 
756 		mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires);
757 	}
758 
759 	/*
760 	 *	See if we can append the packet
761 	 */
762 	if (c->mfc_un.unres.unresolved.qlen>3) {
763 		kfree_skb(skb);
764 		err = -ENOBUFS;
765 	} else {
766 		skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
767 		err = 0;
768 	}
769 
770 	spin_unlock_bh(&mfc_unres_lock);
771 	return err;
772 }
773 
774 /*
775  *	MFC cache manipulation by user space mroute daemon
776  */
777 
778 static int ipmr_mfc_delete(struct net *net, struct mfcctl *mfc)
779 {
780 	int line;
781 	struct mfc_cache *c, **cp;
782 
783 	line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
784 
785 	for (cp = &net->ipv4.mfc_cache_array[line];
786 	     (c = *cp) != NULL; cp = &c->next) {
787 		if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
788 		    c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
789 			write_lock_bh(&mrt_lock);
790 			*cp = c->next;
791 			write_unlock_bh(&mrt_lock);
792 
793 			ipmr_cache_free(c);
794 			return 0;
795 		}
796 	}
797 	return -ENOENT;
798 }
799 
800 static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
801 {
802 	int line;
803 	struct mfc_cache *uc, *c, **cp;
804 
805 	line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
806 
807 	for (cp = &net->ipv4.mfc_cache_array[line];
808 	     (c = *cp) != NULL; cp = &c->next) {
809 		if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
810 		    c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
811 			break;
812 	}
813 
814 	if (c != NULL) {
815 		write_lock_bh(&mrt_lock);
816 		c->mfc_parent = mfc->mfcc_parent;
817 		ipmr_update_thresholds(c, mfc->mfcc_ttls);
818 		if (!mrtsock)
819 			c->mfc_flags |= MFC_STATIC;
820 		write_unlock_bh(&mrt_lock);
821 		return 0;
822 	}
823 
824 	if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
825 		return -EINVAL;
826 
827 	c = ipmr_cache_alloc(net);
828 	if (c == NULL)
829 		return -ENOMEM;
830 
831 	c->mfc_origin = mfc->mfcc_origin.s_addr;
832 	c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
833 	c->mfc_parent = mfc->mfcc_parent;
834 	ipmr_update_thresholds(c, mfc->mfcc_ttls);
835 	if (!mrtsock)
836 		c->mfc_flags |= MFC_STATIC;
837 
838 	write_lock_bh(&mrt_lock);
839 	c->next = net->ipv4.mfc_cache_array[line];
840 	net->ipv4.mfc_cache_array[line] = c;
841 	write_unlock_bh(&mrt_lock);
842 
843 	/*
844 	 *	Check to see if we resolved a queued list. If so we
845 	 *	need to send on the frames and tidy up.
846 	 */
847 	spin_lock_bh(&mfc_unres_lock);
848 	for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
849 	     cp = &uc->next) {
850 		if (net_eq(mfc_net(uc), net) &&
851 		    uc->mfc_origin == c->mfc_origin &&
852 		    uc->mfc_mcastgrp == c->mfc_mcastgrp) {
853 			*cp = uc->next;
854 			atomic_dec(&net->ipv4.cache_resolve_queue_len);
855 			break;
856 		}
857 	}
858 	if (mfc_unres_queue == NULL)
859 		del_timer(&ipmr_expire_timer);
860 	spin_unlock_bh(&mfc_unres_lock);
861 
862 	if (uc) {
863 		ipmr_cache_resolve(uc, c);
864 		ipmr_cache_free(uc);
865 	}
866 	return 0;
867 }
868 
869 /*
870  *	Close the multicast socket, and clear the vif tables etc
871  */
872 
873 static void mroute_clean_tables(struct net *net)
874 {
875 	int i;
876 	LIST_HEAD(list);
877 
878 	/*
879 	 *	Shut down all active vif entries
880 	 */
881 	for (i = 0; i < net->ipv4.maxvif; i++) {
882 		if (!(net->ipv4.vif_table[i].flags&VIFF_STATIC))
883 			vif_delete(net, i, 0, &list);
884 	}
885 	unregister_netdevice_many(&list);
886 
887 	/*
888 	 *	Wipe the cache
889 	 */
890 	for (i=0; i<MFC_LINES; i++) {
891 		struct mfc_cache *c, **cp;
892 
893 		cp = &net->ipv4.mfc_cache_array[i];
894 		while ((c = *cp) != NULL) {
895 			if (c->mfc_flags&MFC_STATIC) {
896 				cp = &c->next;
897 				continue;
898 			}
899 			write_lock_bh(&mrt_lock);
900 			*cp = c->next;
901 			write_unlock_bh(&mrt_lock);
902 
903 			ipmr_cache_free(c);
904 		}
905 	}
906 
907 	if (atomic_read(&net->ipv4.cache_resolve_queue_len) != 0) {
908 		struct mfc_cache *c, **cp;
909 
910 		spin_lock_bh(&mfc_unres_lock);
911 		cp = &mfc_unres_queue;
912 		while ((c = *cp) != NULL) {
913 			if (!net_eq(mfc_net(c), net)) {
914 				cp = &c->next;
915 				continue;
916 			}
917 			*cp = c->next;
918 
919 			ipmr_destroy_unres(c);
920 		}
921 		spin_unlock_bh(&mfc_unres_lock);
922 	}
923 }
924 
925 static void mrtsock_destruct(struct sock *sk)
926 {
927 	struct net *net = sock_net(sk);
928 
929 	rtnl_lock();
930 	if (sk == net->ipv4.mroute_sk) {
931 		IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
932 
933 		write_lock_bh(&mrt_lock);
934 		net->ipv4.mroute_sk = NULL;
935 		write_unlock_bh(&mrt_lock);
936 
937 		mroute_clean_tables(net);
938 	}
939 	rtnl_unlock();
940 }
941 
942 /*
943  *	Socket options and virtual interface manipulation. The whole
944  *	virtual interface system is a complete heap, but unfortunately
945  *	that's how BSD mrouted happens to think. Maybe one day with a proper
946  *	MOSPF/PIM router set up we can clean this up.
947  */
948 
949 int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
950 {
951 	int ret;
952 	struct vifctl vif;
953 	struct mfcctl mfc;
954 	struct net *net = sock_net(sk);
955 
956 	if (optname != MRT_INIT) {
957 		if (sk != net->ipv4.mroute_sk && !capable(CAP_NET_ADMIN))
958 			return -EACCES;
959 	}
960 
961 	switch (optname) {
962 	case MRT_INIT:
963 		if (sk->sk_type != SOCK_RAW ||
964 		    inet_sk(sk)->inet_num != IPPROTO_IGMP)
965 			return -EOPNOTSUPP;
966 		if (optlen != sizeof(int))
967 			return -ENOPROTOOPT;
968 
969 		rtnl_lock();
970 		if (net->ipv4.mroute_sk) {
971 			rtnl_unlock();
972 			return -EADDRINUSE;
973 		}
974 
975 		ret = ip_ra_control(sk, 1, mrtsock_destruct);
976 		if (ret == 0) {
977 			write_lock_bh(&mrt_lock);
978 			net->ipv4.mroute_sk = sk;
979 			write_unlock_bh(&mrt_lock);
980 
981 			IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
982 		}
983 		rtnl_unlock();
984 		return ret;
985 	case MRT_DONE:
986 		if (sk != net->ipv4.mroute_sk)
987 			return -EACCES;
988 		return ip_ra_control(sk, 0, NULL);
989 	case MRT_ADD_VIF:
990 	case MRT_DEL_VIF:
991 		if (optlen != sizeof(vif))
992 			return -EINVAL;
993 		if (copy_from_user(&vif, optval, sizeof(vif)))
994 			return -EFAULT;
995 		if (vif.vifc_vifi >= MAXVIFS)
996 			return -ENFILE;
997 		rtnl_lock();
998 		if (optname == MRT_ADD_VIF) {
999 			ret = vif_add(net, &vif, sk == net->ipv4.mroute_sk);
1000 		} else {
1001 			ret = vif_delete(net, vif.vifc_vifi, 0, NULL);
1002 		}
1003 		rtnl_unlock();
1004 		return ret;
1005 
1006 		/*
1007 		 *	Manipulate the forwarding caches. These live
1008 		 *	in a sort of kernel/user symbiosis.
1009 		 */
1010 	case MRT_ADD_MFC:
1011 	case MRT_DEL_MFC:
1012 		if (optlen != sizeof(mfc))
1013 			return -EINVAL;
1014 		if (copy_from_user(&mfc, optval, sizeof(mfc)))
1015 			return -EFAULT;
1016 		rtnl_lock();
1017 		if (optname == MRT_DEL_MFC)
1018 			ret = ipmr_mfc_delete(net, &mfc);
1019 		else
1020 			ret = ipmr_mfc_add(net, &mfc, sk == net->ipv4.mroute_sk);
1021 		rtnl_unlock();
1022 		return ret;
1023 		/*
1024 		 *	Control PIM assert.
1025 		 */
1026 	case MRT_ASSERT:
1027 	{
1028 		int v;
1029 		if (get_user(v,(int __user *)optval))
1030 			return -EFAULT;
1031 		net->ipv4.mroute_do_assert = (v) ? 1 : 0;
1032 		return 0;
1033 	}
1034 #ifdef CONFIG_IP_PIMSM
1035 	case MRT_PIM:
1036 	{
1037 		int v;
1038 
1039 		if (get_user(v,(int __user *)optval))
1040 			return -EFAULT;
1041 		v = (v) ? 1 : 0;
1042 
1043 		rtnl_lock();
1044 		ret = 0;
1045 		if (v != net->ipv4.mroute_do_pim) {
1046 			net->ipv4.mroute_do_pim = v;
1047 			net->ipv4.mroute_do_assert = v;
1048 		}
1049 		rtnl_unlock();
1050 		return ret;
1051 	}
1052 #endif
1053 	/*
1054 	 *	Spurious command, or MRT_VERSION which you cannot
1055 	 *	set.
1056 	 */
1057 	default:
1058 		return -ENOPROTOOPT;
1059 	}
1060 }
1061 
1062 /*
1063  *	Getsock opt support for the multicast routing system.
1064  */
1065 
1066 int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
1067 {
1068 	int olr;
1069 	int val;
1070 	struct net *net = sock_net(sk);
1071 
1072 	if (optname != MRT_VERSION &&
1073 #ifdef CONFIG_IP_PIMSM
1074 	   optname!=MRT_PIM &&
1075 #endif
1076 	   optname!=MRT_ASSERT)
1077 		return -ENOPROTOOPT;
1078 
1079 	if (get_user(olr, optlen))
1080 		return -EFAULT;
1081 
1082 	olr = min_t(unsigned int, olr, sizeof(int));
1083 	if (olr < 0)
1084 		return -EINVAL;
1085 
1086 	if (put_user(olr, optlen))
1087 		return -EFAULT;
1088 	if (optname == MRT_VERSION)
1089 		val = 0x0305;
1090 #ifdef CONFIG_IP_PIMSM
1091 	else if (optname == MRT_PIM)
1092 		val = net->ipv4.mroute_do_pim;
1093 #endif
1094 	else
1095 		val = net->ipv4.mroute_do_assert;
1096 	if (copy_to_user(optval, &val, olr))
1097 		return -EFAULT;
1098 	return 0;
1099 }
1100 
1101 /*
1102  *	The IP multicast ioctl support routines.
1103  */
1104 
1105 int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1106 {
1107 	struct sioc_sg_req sr;
1108 	struct sioc_vif_req vr;
1109 	struct vif_device *vif;
1110 	struct mfc_cache *c;
1111 	struct net *net = sock_net(sk);
1112 
1113 	switch (cmd) {
1114 	case SIOCGETVIFCNT:
1115 		if (copy_from_user(&vr, arg, sizeof(vr)))
1116 			return -EFAULT;
1117 		if (vr.vifi >= net->ipv4.maxvif)
1118 			return -EINVAL;
1119 		read_lock(&mrt_lock);
1120 		vif = &net->ipv4.vif_table[vr.vifi];
1121 		if (VIF_EXISTS(net, vr.vifi)) {
1122 			vr.icount = vif->pkt_in;
1123 			vr.ocount = vif->pkt_out;
1124 			vr.ibytes = vif->bytes_in;
1125 			vr.obytes = vif->bytes_out;
1126 			read_unlock(&mrt_lock);
1127 
1128 			if (copy_to_user(arg, &vr, sizeof(vr)))
1129 				return -EFAULT;
1130 			return 0;
1131 		}
1132 		read_unlock(&mrt_lock);
1133 		return -EADDRNOTAVAIL;
1134 	case SIOCGETSGCNT:
1135 		if (copy_from_user(&sr, arg, sizeof(sr)))
1136 			return -EFAULT;
1137 
1138 		read_lock(&mrt_lock);
1139 		c = ipmr_cache_find(net, sr.src.s_addr, sr.grp.s_addr);
1140 		if (c) {
1141 			sr.pktcnt = c->mfc_un.res.pkt;
1142 			sr.bytecnt = c->mfc_un.res.bytes;
1143 			sr.wrong_if = c->mfc_un.res.wrong_if;
1144 			read_unlock(&mrt_lock);
1145 
1146 			if (copy_to_user(arg, &sr, sizeof(sr)))
1147 				return -EFAULT;
1148 			return 0;
1149 		}
1150 		read_unlock(&mrt_lock);
1151 		return -EADDRNOTAVAIL;
1152 	default:
1153 		return -ENOIOCTLCMD;
1154 	}
1155 }
1156 
1157 
1158 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1159 {
1160 	struct net_device *dev = ptr;
1161 	struct net *net = dev_net(dev);
1162 	struct vif_device *v;
1163 	int ct;
1164 	LIST_HEAD(list);
1165 
1166 	if (!net_eq(dev_net(dev), net))
1167 		return NOTIFY_DONE;
1168 
1169 	if (event != NETDEV_UNREGISTER)
1170 		return NOTIFY_DONE;
1171 	v = &net->ipv4.vif_table[0];
1172 	for (ct = 0; ct < net->ipv4.maxvif; ct++, v++) {
1173 		if (v->dev == dev)
1174 			vif_delete(net, ct, 1, &list);
1175 	}
1176 	unregister_netdevice_many(&list);
1177 	return NOTIFY_DONE;
1178 }
1179 
1180 
1181 static struct notifier_block ip_mr_notifier = {
1182 	.notifier_call = ipmr_device_event,
1183 };
1184 
1185 /*
1186  * 	Encapsulate a packet by attaching a valid IPIP header to it.
1187  *	This avoids tunnel drivers and other mess and gives us the speed so
1188  *	important for multicast video.
1189  */
1190 
1191 static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1192 {
1193 	struct iphdr *iph;
1194 	struct iphdr *old_iph = ip_hdr(skb);
1195 
1196 	skb_push(skb, sizeof(struct iphdr));
1197 	skb->transport_header = skb->network_header;
1198 	skb_reset_network_header(skb);
1199 	iph = ip_hdr(skb);
1200 
1201 	iph->version	= 	4;
1202 	iph->tos	=	old_iph->tos;
1203 	iph->ttl	=	old_iph->ttl;
1204 	iph->frag_off	=	0;
1205 	iph->daddr	=	daddr;
1206 	iph->saddr	=	saddr;
1207 	iph->protocol	=	IPPROTO_IPIP;
1208 	iph->ihl	=	5;
1209 	iph->tot_len	=	htons(skb->len);
1210 	ip_select_ident(iph, skb_dst(skb), NULL);
1211 	ip_send_check(iph);
1212 
1213 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1214 	nf_reset(skb);
1215 }
1216 
1217 static inline int ipmr_forward_finish(struct sk_buff *skb)
1218 {
1219 	struct ip_options * opt	= &(IPCB(skb)->opt);
1220 
1221 	IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
1222 
1223 	if (unlikely(opt->optlen))
1224 		ip_forward_options(skb);
1225 
1226 	return dst_output(skb);
1227 }
1228 
1229 /*
1230  *	Processing handlers for ipmr_forward
1231  */
1232 
1233 static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1234 {
1235 	struct net *net = mfc_net(c);
1236 	const struct iphdr *iph = ip_hdr(skb);
1237 	struct vif_device *vif = &net->ipv4.vif_table[vifi];
1238 	struct net_device *dev;
1239 	struct rtable *rt;
1240 	int    encap = 0;
1241 
1242 	if (vif->dev == NULL)
1243 		goto out_free;
1244 
1245 #ifdef CONFIG_IP_PIMSM
1246 	if (vif->flags & VIFF_REGISTER) {
1247 		vif->pkt_out++;
1248 		vif->bytes_out += skb->len;
1249 		vif->dev->stats.tx_bytes += skb->len;
1250 		vif->dev->stats.tx_packets++;
1251 		ipmr_cache_report(net, skb, vifi, IGMPMSG_WHOLEPKT);
1252 		goto out_free;
1253 	}
1254 #endif
1255 
1256 	if (vif->flags&VIFF_TUNNEL) {
1257 		struct flowi fl = { .oif = vif->link,
1258 				    .nl_u = { .ip4_u =
1259 					      { .daddr = vif->remote,
1260 						.saddr = vif->local,
1261 						.tos = RT_TOS(iph->tos) } },
1262 				    .proto = IPPROTO_IPIP };
1263 		if (ip_route_output_key(net, &rt, &fl))
1264 			goto out_free;
1265 		encap = sizeof(struct iphdr);
1266 	} else {
1267 		struct flowi fl = { .oif = vif->link,
1268 				    .nl_u = { .ip4_u =
1269 					      { .daddr = iph->daddr,
1270 						.tos = RT_TOS(iph->tos) } },
1271 				    .proto = IPPROTO_IPIP };
1272 		if (ip_route_output_key(net, &rt, &fl))
1273 			goto out_free;
1274 	}
1275 
1276 	dev = rt->u.dst.dev;
1277 
1278 	if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1279 		/* Do not fragment multicasts. Alas, IPv4 does not
1280 		   allow to send ICMP, so that packets will disappear
1281 		   to blackhole.
1282 		 */
1283 
1284 		IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
1285 		ip_rt_put(rt);
1286 		goto out_free;
1287 	}
1288 
1289 	encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1290 
1291 	if (skb_cow(skb, encap)) {
1292 		ip_rt_put(rt);
1293 		goto out_free;
1294 	}
1295 
1296 	vif->pkt_out++;
1297 	vif->bytes_out += skb->len;
1298 
1299 	skb_dst_drop(skb);
1300 	skb_dst_set(skb, &rt->u.dst);
1301 	ip_decrease_ttl(ip_hdr(skb));
1302 
1303 	/* FIXME: forward and output firewalls used to be called here.
1304 	 * What do we do with netfilter? -- RR */
1305 	if (vif->flags & VIFF_TUNNEL) {
1306 		ip_encap(skb, vif->local, vif->remote);
1307 		/* FIXME: extra output firewall step used to be here. --RR */
1308 		vif->dev->stats.tx_packets++;
1309 		vif->dev->stats.tx_bytes += skb->len;
1310 	}
1311 
1312 	IPCB(skb)->flags |= IPSKB_FORWARDED;
1313 
1314 	/*
1315 	 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1316 	 * not only before forwarding, but after forwarding on all output
1317 	 * interfaces. It is clear, if mrouter runs a multicasting
1318 	 * program, it should receive packets not depending to what interface
1319 	 * program is joined.
1320 	 * If we will not make it, the program will have to join on all
1321 	 * interfaces. On the other hand, multihoming host (or router, but
1322 	 * not mrouter) cannot join to more than one interface - it will
1323 	 * result in receiving multiple packets.
1324 	 */
1325 	NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, dev,
1326 		ipmr_forward_finish);
1327 	return;
1328 
1329 out_free:
1330 	kfree_skb(skb);
1331 	return;
1332 }
1333 
1334 static int ipmr_find_vif(struct net_device *dev)
1335 {
1336 	struct net *net = dev_net(dev);
1337 	int ct;
1338 	for (ct = net->ipv4.maxvif-1; ct >= 0; ct--) {
1339 		if (net->ipv4.vif_table[ct].dev == dev)
1340 			break;
1341 	}
1342 	return ct;
1343 }
1344 
1345 /* "local" means that we should preserve one skb (for local delivery) */
1346 
1347 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
1348 {
1349 	int psend = -1;
1350 	int vif, ct;
1351 	struct net *net = mfc_net(cache);
1352 
1353 	vif = cache->mfc_parent;
1354 	cache->mfc_un.res.pkt++;
1355 	cache->mfc_un.res.bytes += skb->len;
1356 
1357 	/*
1358 	 * Wrong interface: drop packet and (maybe) send PIM assert.
1359 	 */
1360 	if (net->ipv4.vif_table[vif].dev != skb->dev) {
1361 		int true_vifi;
1362 
1363 		if (skb_rtable(skb)->fl.iif == 0) {
1364 			/* It is our own packet, looped back.
1365 			   Very complicated situation...
1366 
1367 			   The best workaround until routing daemons will be
1368 			   fixed is not to redistribute packet, if it was
1369 			   send through wrong interface. It means, that
1370 			   multicast applications WILL NOT work for
1371 			   (S,G), which have default multicast route pointing
1372 			   to wrong oif. In any case, it is not a good
1373 			   idea to use multicasting applications on router.
1374 			 */
1375 			goto dont_forward;
1376 		}
1377 
1378 		cache->mfc_un.res.wrong_if++;
1379 		true_vifi = ipmr_find_vif(skb->dev);
1380 
1381 		if (true_vifi >= 0 && net->ipv4.mroute_do_assert &&
1382 		    /* pimsm uses asserts, when switching from RPT to SPT,
1383 		       so that we cannot check that packet arrived on an oif.
1384 		       It is bad, but otherwise we would need to move pretty
1385 		       large chunk of pimd to kernel. Ough... --ANK
1386 		     */
1387 		    (net->ipv4.mroute_do_pim ||
1388 		     cache->mfc_un.res.ttls[true_vifi] < 255) &&
1389 		    time_after(jiffies,
1390 			       cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1391 			cache->mfc_un.res.last_assert = jiffies;
1392 			ipmr_cache_report(net, skb, true_vifi, IGMPMSG_WRONGVIF);
1393 		}
1394 		goto dont_forward;
1395 	}
1396 
1397 	net->ipv4.vif_table[vif].pkt_in++;
1398 	net->ipv4.vif_table[vif].bytes_in += skb->len;
1399 
1400 	/*
1401 	 *	Forward the frame
1402 	 */
1403 	for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
1404 		if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
1405 			if (psend != -1) {
1406 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1407 				if (skb2)
1408 					ipmr_queue_xmit(skb2, cache, psend);
1409 			}
1410 			psend = ct;
1411 		}
1412 	}
1413 	if (psend != -1) {
1414 		if (local) {
1415 			struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1416 			if (skb2)
1417 				ipmr_queue_xmit(skb2, cache, psend);
1418 		} else {
1419 			ipmr_queue_xmit(skb, cache, psend);
1420 			return 0;
1421 		}
1422 	}
1423 
1424 dont_forward:
1425 	if (!local)
1426 		kfree_skb(skb);
1427 	return 0;
1428 }
1429 
1430 
1431 /*
1432  *	Multicast packets for forwarding arrive here
1433  */
1434 
1435 int ip_mr_input(struct sk_buff *skb)
1436 {
1437 	struct mfc_cache *cache;
1438 	struct net *net = dev_net(skb->dev);
1439 	int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
1440 
1441 	/* Packet is looped back after forward, it should not be
1442 	   forwarded second time, but still can be delivered locally.
1443 	 */
1444 	if (IPCB(skb)->flags&IPSKB_FORWARDED)
1445 		goto dont_forward;
1446 
1447 	if (!local) {
1448 		    if (IPCB(skb)->opt.router_alert) {
1449 			    if (ip_call_ra_chain(skb))
1450 				    return 0;
1451 		    } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
1452 			    /* IGMPv1 (and broken IGMPv2 implementations sort of
1453 			       Cisco IOS <= 11.2(8)) do not put router alert
1454 			       option to IGMP packets destined to routable
1455 			       groups. It is very bad, because it means
1456 			       that we can forward NO IGMP messages.
1457 			     */
1458 			    read_lock(&mrt_lock);
1459 			    if (net->ipv4.mroute_sk) {
1460 				    nf_reset(skb);
1461 				    raw_rcv(net->ipv4.mroute_sk, skb);
1462 				    read_unlock(&mrt_lock);
1463 				    return 0;
1464 			    }
1465 			    read_unlock(&mrt_lock);
1466 		    }
1467 	}
1468 
1469 	read_lock(&mrt_lock);
1470 	cache = ipmr_cache_find(net, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1471 
1472 	/*
1473 	 *	No usable cache entry
1474 	 */
1475 	if (cache == NULL) {
1476 		int vif;
1477 
1478 		if (local) {
1479 			struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1480 			ip_local_deliver(skb);
1481 			if (skb2 == NULL) {
1482 				read_unlock(&mrt_lock);
1483 				return -ENOBUFS;
1484 			}
1485 			skb = skb2;
1486 		}
1487 
1488 		vif = ipmr_find_vif(skb->dev);
1489 		if (vif >= 0) {
1490 			int err = ipmr_cache_unresolved(net, vif, skb);
1491 			read_unlock(&mrt_lock);
1492 
1493 			return err;
1494 		}
1495 		read_unlock(&mrt_lock);
1496 		kfree_skb(skb);
1497 		return -ENODEV;
1498 	}
1499 
1500 	ip_mr_forward(skb, cache, local);
1501 
1502 	read_unlock(&mrt_lock);
1503 
1504 	if (local)
1505 		return ip_local_deliver(skb);
1506 
1507 	return 0;
1508 
1509 dont_forward:
1510 	if (local)
1511 		return ip_local_deliver(skb);
1512 	kfree_skb(skb);
1513 	return 0;
1514 }
1515 
1516 #ifdef CONFIG_IP_PIMSM
1517 static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen)
1518 {
1519 	struct net_device *reg_dev = NULL;
1520 	struct iphdr *encap;
1521 	struct net *net = dev_net(skb->dev);
1522 
1523 	encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
1524 	/*
1525 	   Check that:
1526 	   a. packet is really destinted to a multicast group
1527 	   b. packet is not a NULL-REGISTER
1528 	   c. packet is not truncated
1529 	 */
1530 	if (!ipv4_is_multicast(encap->daddr) ||
1531 	    encap->tot_len == 0 ||
1532 	    ntohs(encap->tot_len) + pimlen > skb->len)
1533 		return 1;
1534 
1535 	read_lock(&mrt_lock);
1536 	if (net->ipv4.mroute_reg_vif_num >= 0)
1537 		reg_dev = net->ipv4.vif_table[net->ipv4.mroute_reg_vif_num].dev;
1538 	if (reg_dev)
1539 		dev_hold(reg_dev);
1540 	read_unlock(&mrt_lock);
1541 
1542 	if (reg_dev == NULL)
1543 		return 1;
1544 
1545 	skb->mac_header = skb->network_header;
1546 	skb_pull(skb, (u8*)encap - skb->data);
1547 	skb_reset_network_header(skb);
1548 	skb->dev = reg_dev;
1549 	skb->protocol = htons(ETH_P_IP);
1550 	skb->ip_summed = 0;
1551 	skb->pkt_type = PACKET_HOST;
1552 	skb_dst_drop(skb);
1553 	reg_dev->stats.rx_bytes += skb->len;
1554 	reg_dev->stats.rx_packets++;
1555 	nf_reset(skb);
1556 	netif_rx(skb);
1557 	dev_put(reg_dev);
1558 
1559 	return 0;
1560 }
1561 #endif
1562 
1563 #ifdef CONFIG_IP_PIMSM_V1
1564 /*
1565  * Handle IGMP messages of PIMv1
1566  */
1567 
1568 int pim_rcv_v1(struct sk_buff * skb)
1569 {
1570 	struct igmphdr *pim;
1571 	struct net *net = dev_net(skb->dev);
1572 
1573 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1574 		goto drop;
1575 
1576 	pim = igmp_hdr(skb);
1577 
1578 	if (!net->ipv4.mroute_do_pim ||
1579 	    pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1580 		goto drop;
1581 
1582 	if (__pim_rcv(skb, sizeof(*pim))) {
1583 drop:
1584 		kfree_skb(skb);
1585 	}
1586 	return 0;
1587 }
1588 #endif
1589 
1590 #ifdef CONFIG_IP_PIMSM_V2
1591 static int pim_rcv(struct sk_buff * skb)
1592 {
1593 	struct pimreghdr *pim;
1594 
1595 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1596 		goto drop;
1597 
1598 	pim = (struct pimreghdr *)skb_transport_header(skb);
1599 	if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1600 	    (pim->flags&PIM_NULL_REGISTER) ||
1601 	    (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
1602 	     csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1603 		goto drop;
1604 
1605 	if (__pim_rcv(skb, sizeof(*pim))) {
1606 drop:
1607 		kfree_skb(skb);
1608 	}
1609 	return 0;
1610 }
1611 #endif
1612 
1613 static int
1614 ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1615 {
1616 	int ct;
1617 	struct rtnexthop *nhp;
1618 	struct net *net = mfc_net(c);
1619 	struct net_device *dev = net->ipv4.vif_table[c->mfc_parent].dev;
1620 	u8 *b = skb_tail_pointer(skb);
1621 	struct rtattr *mp_head;
1622 
1623 	if (dev)
1624 		RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1625 
1626 	mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1627 
1628 	for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1629 		if (c->mfc_un.res.ttls[ct] < 255) {
1630 			if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1631 				goto rtattr_failure;
1632 			nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1633 			nhp->rtnh_flags = 0;
1634 			nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1635 			nhp->rtnh_ifindex = net->ipv4.vif_table[ct].dev->ifindex;
1636 			nhp->rtnh_len = sizeof(*nhp);
1637 		}
1638 	}
1639 	mp_head->rta_type = RTA_MULTIPATH;
1640 	mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1641 	rtm->rtm_type = RTN_MULTICAST;
1642 	return 1;
1643 
1644 rtattr_failure:
1645 	nlmsg_trim(skb, b);
1646 	return -EMSGSIZE;
1647 }
1648 
1649 int ipmr_get_route(struct net *net,
1650 		   struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1651 {
1652 	int err;
1653 	struct mfc_cache *cache;
1654 	struct rtable *rt = skb_rtable(skb);
1655 
1656 	read_lock(&mrt_lock);
1657 	cache = ipmr_cache_find(net, rt->rt_src, rt->rt_dst);
1658 
1659 	if (cache == NULL) {
1660 		struct sk_buff *skb2;
1661 		struct iphdr *iph;
1662 		struct net_device *dev;
1663 		int vif;
1664 
1665 		if (nowait) {
1666 			read_unlock(&mrt_lock);
1667 			return -EAGAIN;
1668 		}
1669 
1670 		dev = skb->dev;
1671 		if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
1672 			read_unlock(&mrt_lock);
1673 			return -ENODEV;
1674 		}
1675 		skb2 = skb_clone(skb, GFP_ATOMIC);
1676 		if (!skb2) {
1677 			read_unlock(&mrt_lock);
1678 			return -ENOMEM;
1679 		}
1680 
1681 		skb_push(skb2, sizeof(struct iphdr));
1682 		skb_reset_network_header(skb2);
1683 		iph = ip_hdr(skb2);
1684 		iph->ihl = sizeof(struct iphdr) >> 2;
1685 		iph->saddr = rt->rt_src;
1686 		iph->daddr = rt->rt_dst;
1687 		iph->version = 0;
1688 		err = ipmr_cache_unresolved(net, vif, skb2);
1689 		read_unlock(&mrt_lock);
1690 		return err;
1691 	}
1692 
1693 	if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1694 		cache->mfc_flags |= MFC_NOTIFY;
1695 	err = ipmr_fill_mroute(skb, cache, rtm);
1696 	read_unlock(&mrt_lock);
1697 	return err;
1698 }
1699 
1700 #ifdef CONFIG_PROC_FS
1701 /*
1702  *	The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1703  */
1704 struct ipmr_vif_iter {
1705 	struct seq_net_private p;
1706 	int ct;
1707 };
1708 
1709 static struct vif_device *ipmr_vif_seq_idx(struct net *net,
1710 					   struct ipmr_vif_iter *iter,
1711 					   loff_t pos)
1712 {
1713 	for (iter->ct = 0; iter->ct < net->ipv4.maxvif; ++iter->ct) {
1714 		if (!VIF_EXISTS(net, iter->ct))
1715 			continue;
1716 		if (pos-- == 0)
1717 			return &net->ipv4.vif_table[iter->ct];
1718 	}
1719 	return NULL;
1720 }
1721 
1722 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
1723 	__acquires(mrt_lock)
1724 {
1725 	struct net *net = seq_file_net(seq);
1726 
1727 	read_lock(&mrt_lock);
1728 	return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
1729 		: SEQ_START_TOKEN;
1730 }
1731 
1732 static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1733 {
1734 	struct ipmr_vif_iter *iter = seq->private;
1735 	struct net *net = seq_file_net(seq);
1736 
1737 	++*pos;
1738 	if (v == SEQ_START_TOKEN)
1739 		return ipmr_vif_seq_idx(net, iter, 0);
1740 
1741 	while (++iter->ct < net->ipv4.maxvif) {
1742 		if (!VIF_EXISTS(net, iter->ct))
1743 			continue;
1744 		return &net->ipv4.vif_table[iter->ct];
1745 	}
1746 	return NULL;
1747 }
1748 
1749 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
1750 	__releases(mrt_lock)
1751 {
1752 	read_unlock(&mrt_lock);
1753 }
1754 
1755 static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1756 {
1757 	struct net *net = seq_file_net(seq);
1758 
1759 	if (v == SEQ_START_TOKEN) {
1760 		seq_puts(seq,
1761 			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags Local    Remote\n");
1762 	} else {
1763 		const struct vif_device *vif = v;
1764 		const char *name =  vif->dev ? vif->dev->name : "none";
1765 
1766 		seq_printf(seq,
1767 			   "%2Zd %-10s %8ld %7ld  %8ld %7ld %05X %08X %08X\n",
1768 			   vif - net->ipv4.vif_table,
1769 			   name, vif->bytes_in, vif->pkt_in,
1770 			   vif->bytes_out, vif->pkt_out,
1771 			   vif->flags, vif->local, vif->remote);
1772 	}
1773 	return 0;
1774 }
1775 
1776 static const struct seq_operations ipmr_vif_seq_ops = {
1777 	.start = ipmr_vif_seq_start,
1778 	.next  = ipmr_vif_seq_next,
1779 	.stop  = ipmr_vif_seq_stop,
1780 	.show  = ipmr_vif_seq_show,
1781 };
1782 
1783 static int ipmr_vif_open(struct inode *inode, struct file *file)
1784 {
1785 	return seq_open_net(inode, file, &ipmr_vif_seq_ops,
1786 			    sizeof(struct ipmr_vif_iter));
1787 }
1788 
1789 static const struct file_operations ipmr_vif_fops = {
1790 	.owner	 = THIS_MODULE,
1791 	.open    = ipmr_vif_open,
1792 	.read    = seq_read,
1793 	.llseek  = seq_lseek,
1794 	.release = seq_release_net,
1795 };
1796 
1797 struct ipmr_mfc_iter {
1798 	struct seq_net_private p;
1799 	struct mfc_cache **cache;
1800 	int ct;
1801 };
1802 
1803 
1804 static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
1805 					  struct ipmr_mfc_iter *it, loff_t pos)
1806 {
1807 	struct mfc_cache *mfc;
1808 
1809 	it->cache = net->ipv4.mfc_cache_array;
1810 	read_lock(&mrt_lock);
1811 	for (it->ct = 0; it->ct < MFC_LINES; it->ct++)
1812 		for (mfc = net->ipv4.mfc_cache_array[it->ct];
1813 		     mfc; mfc = mfc->next)
1814 			if (pos-- == 0)
1815 				return mfc;
1816 	read_unlock(&mrt_lock);
1817 
1818 	it->cache = &mfc_unres_queue;
1819 	spin_lock_bh(&mfc_unres_lock);
1820 	for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
1821 		if (net_eq(mfc_net(mfc), net) &&
1822 		    pos-- == 0)
1823 			return mfc;
1824 	spin_unlock_bh(&mfc_unres_lock);
1825 
1826 	it->cache = NULL;
1827 	return NULL;
1828 }
1829 
1830 
1831 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
1832 {
1833 	struct ipmr_mfc_iter *it = seq->private;
1834 	struct net *net = seq_file_net(seq);
1835 
1836 	it->cache = NULL;
1837 	it->ct = 0;
1838 	return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
1839 		: SEQ_START_TOKEN;
1840 }
1841 
1842 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1843 {
1844 	struct mfc_cache *mfc = v;
1845 	struct ipmr_mfc_iter *it = seq->private;
1846 	struct net *net = seq_file_net(seq);
1847 
1848 	++*pos;
1849 
1850 	if (v == SEQ_START_TOKEN)
1851 		return ipmr_mfc_seq_idx(net, seq->private, 0);
1852 
1853 	if (mfc->next)
1854 		return mfc->next;
1855 
1856 	if (it->cache == &mfc_unres_queue)
1857 		goto end_of_list;
1858 
1859 	BUG_ON(it->cache != net->ipv4.mfc_cache_array);
1860 
1861 	while (++it->ct < MFC_LINES) {
1862 		mfc = net->ipv4.mfc_cache_array[it->ct];
1863 		if (mfc)
1864 			return mfc;
1865 	}
1866 
1867 	/* exhausted cache_array, show unresolved */
1868 	read_unlock(&mrt_lock);
1869 	it->cache = &mfc_unres_queue;
1870 	it->ct = 0;
1871 
1872 	spin_lock_bh(&mfc_unres_lock);
1873 	mfc = mfc_unres_queue;
1874 	while (mfc && !net_eq(mfc_net(mfc), net))
1875 		mfc = mfc->next;
1876 	if (mfc)
1877 		return mfc;
1878 
1879  end_of_list:
1880 	spin_unlock_bh(&mfc_unres_lock);
1881 	it->cache = NULL;
1882 
1883 	return NULL;
1884 }
1885 
1886 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
1887 {
1888 	struct ipmr_mfc_iter *it = seq->private;
1889 	struct net *net = seq_file_net(seq);
1890 
1891 	if (it->cache == &mfc_unres_queue)
1892 		spin_unlock_bh(&mfc_unres_lock);
1893 	else if (it->cache == net->ipv4.mfc_cache_array)
1894 		read_unlock(&mrt_lock);
1895 }
1896 
1897 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1898 {
1899 	int n;
1900 	struct net *net = seq_file_net(seq);
1901 
1902 	if (v == SEQ_START_TOKEN) {
1903 		seq_puts(seq,
1904 		 "Group    Origin   Iif     Pkts    Bytes    Wrong Oifs\n");
1905 	} else {
1906 		const struct mfc_cache *mfc = v;
1907 		const struct ipmr_mfc_iter *it = seq->private;
1908 
1909 		seq_printf(seq, "%08lX %08lX %-3hd",
1910 			   (unsigned long) mfc->mfc_mcastgrp,
1911 			   (unsigned long) mfc->mfc_origin,
1912 			   mfc->mfc_parent);
1913 
1914 		if (it->cache != &mfc_unres_queue) {
1915 			seq_printf(seq, " %8lu %8lu %8lu",
1916 				   mfc->mfc_un.res.pkt,
1917 				   mfc->mfc_un.res.bytes,
1918 				   mfc->mfc_un.res.wrong_if);
1919 			for (n = mfc->mfc_un.res.minvif;
1920 			     n < mfc->mfc_un.res.maxvif; n++ ) {
1921 				if (VIF_EXISTS(net, n) &&
1922 				    mfc->mfc_un.res.ttls[n] < 255)
1923 					seq_printf(seq,
1924 					   " %2d:%-3d",
1925 					   n, mfc->mfc_un.res.ttls[n]);
1926 			}
1927 		} else {
1928 			/* unresolved mfc_caches don't contain
1929 			 * pkt, bytes and wrong_if values
1930 			 */
1931 			seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
1932 		}
1933 		seq_putc(seq, '\n');
1934 	}
1935 	return 0;
1936 }
1937 
1938 static const struct seq_operations ipmr_mfc_seq_ops = {
1939 	.start = ipmr_mfc_seq_start,
1940 	.next  = ipmr_mfc_seq_next,
1941 	.stop  = ipmr_mfc_seq_stop,
1942 	.show  = ipmr_mfc_seq_show,
1943 };
1944 
1945 static int ipmr_mfc_open(struct inode *inode, struct file *file)
1946 {
1947 	return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
1948 			    sizeof(struct ipmr_mfc_iter));
1949 }
1950 
1951 static const struct file_operations ipmr_mfc_fops = {
1952 	.owner	 = THIS_MODULE,
1953 	.open    = ipmr_mfc_open,
1954 	.read    = seq_read,
1955 	.llseek  = seq_lseek,
1956 	.release = seq_release_net,
1957 };
1958 #endif
1959 
1960 #ifdef CONFIG_IP_PIMSM_V2
1961 static const struct net_protocol pim_protocol = {
1962 	.handler	=	pim_rcv,
1963 	.netns_ok	=	1,
1964 };
1965 #endif
1966 
1967 
1968 /*
1969  *	Setup for IP multicast routing
1970  */
1971 static int __net_init ipmr_net_init(struct net *net)
1972 {
1973 	int err = 0;
1974 
1975 	net->ipv4.vif_table = kcalloc(MAXVIFS, sizeof(struct vif_device),
1976 				      GFP_KERNEL);
1977 	if (!net->ipv4.vif_table) {
1978 		err = -ENOMEM;
1979 		goto fail;
1980 	}
1981 
1982 	/* Forwarding cache */
1983 	net->ipv4.mfc_cache_array = kcalloc(MFC_LINES,
1984 					    sizeof(struct mfc_cache *),
1985 					    GFP_KERNEL);
1986 	if (!net->ipv4.mfc_cache_array) {
1987 		err = -ENOMEM;
1988 		goto fail_mfc_cache;
1989 	}
1990 
1991 #ifdef CONFIG_IP_PIMSM
1992 	net->ipv4.mroute_reg_vif_num = -1;
1993 #endif
1994 
1995 #ifdef CONFIG_PROC_FS
1996 	err = -ENOMEM;
1997 	if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops))
1998 		goto proc_vif_fail;
1999 	if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
2000 		goto proc_cache_fail;
2001 #endif
2002 	return 0;
2003 
2004 #ifdef CONFIG_PROC_FS
2005 proc_cache_fail:
2006 	proc_net_remove(net, "ip_mr_vif");
2007 proc_vif_fail:
2008 	kfree(net->ipv4.mfc_cache_array);
2009 #endif
2010 fail_mfc_cache:
2011 	kfree(net->ipv4.vif_table);
2012 fail:
2013 	return err;
2014 }
2015 
2016 static void __net_exit ipmr_net_exit(struct net *net)
2017 {
2018 #ifdef CONFIG_PROC_FS
2019 	proc_net_remove(net, "ip_mr_cache");
2020 	proc_net_remove(net, "ip_mr_vif");
2021 #endif
2022 	kfree(net->ipv4.mfc_cache_array);
2023 	kfree(net->ipv4.vif_table);
2024 }
2025 
2026 static struct pernet_operations ipmr_net_ops = {
2027 	.init = ipmr_net_init,
2028 	.exit = ipmr_net_exit,
2029 };
2030 
2031 int __init ip_mr_init(void)
2032 {
2033 	int err;
2034 
2035 	mrt_cachep = kmem_cache_create("ip_mrt_cache",
2036 				       sizeof(struct mfc_cache),
2037 				       0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
2038 				       NULL);
2039 	if (!mrt_cachep)
2040 		return -ENOMEM;
2041 
2042 	err = register_pernet_subsys(&ipmr_net_ops);
2043 	if (err)
2044 		goto reg_pernet_fail;
2045 
2046 	setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
2047 	err = register_netdevice_notifier(&ip_mr_notifier);
2048 	if (err)
2049 		goto reg_notif_fail;
2050 #ifdef CONFIG_IP_PIMSM_V2
2051 	if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) {
2052 		printk(KERN_ERR "ip_mr_init: can't add PIM protocol\n");
2053 		err = -EAGAIN;
2054 		goto add_proto_fail;
2055 	}
2056 #endif
2057 	return 0;
2058 
2059 #ifdef CONFIG_IP_PIMSM_V2
2060 add_proto_fail:
2061 	unregister_netdevice_notifier(&ip_mr_notifier);
2062 #endif
2063 reg_notif_fail:
2064 	del_timer(&ipmr_expire_timer);
2065 	unregister_pernet_subsys(&ipmr_net_ops);
2066 reg_pernet_fail:
2067 	kmem_cache_destroy(mrt_cachep);
2068 	return err;
2069 }
2070