xref: /openbmc/linux/net/ipv6/ip6mr.c (revision 10f0fc17)
1 /*
2  *	Linux IPv6 multicast routing support for BSD pim6sd
3  *	Based on net/ipv4/ipmr.c.
4  *
5  *	(c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
6  *		LSIIT Laboratory, Strasbourg, France
7  *	(c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
8  *		6WIND, Paris, France
9  *	Copyright (C)2007,2008 USAGI/WIDE Project
10  *		YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
11  *
12  *	This program is free software; you can redistribute it and/or
13  *	modify it under the terms of the GNU General Public License
14  *	as published by the Free Software Foundation; either version
15  *	2 of the License, or (at your option) any later version.
16  *
17  */
18 
19 #include <asm/system.h>
20 #include <asm/uaccess.h>
21 #include <linux/types.h>
22 #include <linux/sched.h>
23 #include <linux/errno.h>
24 #include <linux/timer.h>
25 #include <linux/mm.h>
26 #include <linux/kernel.h>
27 #include <linux/fcntl.h>
28 #include <linux/stat.h>
29 #include <linux/socket.h>
30 #include <linux/inet.h>
31 #include <linux/netdevice.h>
32 #include <linux/inetdevice.h>
33 #include <linux/proc_fs.h>
34 #include <linux/seq_file.h>
35 #include <linux/init.h>
36 #include <net/protocol.h>
37 #include <linux/skbuff.h>
38 #include <net/sock.h>
39 #include <net/raw.h>
40 #include <linux/notifier.h>
41 #include <linux/if_arp.h>
42 #include <net/checksum.h>
43 #include <net/netlink.h>
44 
45 #include <net/ipv6.h>
46 #include <net/ip6_route.h>
47 #include <linux/mroute6.h>
48 #include <linux/pim.h>
49 #include <net/addrconf.h>
50 #include <linux/netfilter_ipv6.h>
51 
52 /* Big lock, protecting vif table, mrt cache and mroute socket state.
53    Note that the changes are semaphored via rtnl_lock.
54  */
55 
56 static DEFINE_RWLOCK(mrt_lock);
57 
58 /*
59  *	Multicast router control variables
60  */
61 
62 #define MIF_EXISTS(_net, _idx) ((_net)->ipv6.vif6_table[_idx].dev != NULL)
63 
64 static struct mfc6_cache *mfc_unres_queue;		/* Queue of unresolved entries */
65 
66 /* Special spinlock for queue of unresolved entries */
67 static DEFINE_SPINLOCK(mfc_unres_lock);
68 
69 /* We return to original Alan's scheme. Hash table of resolved
70    entries is changed only in process context and protected
71    with weak lock mrt_lock. Queue of unresolved entries is protected
72    with strong spinlock mfc_unres_lock.
73 
74    In this case data path is free of exclusive locks at all.
75  */
76 
77 static struct kmem_cache *mrt_cachep __read_mostly;
78 
79 static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache);
80 static int ip6mr_cache_report(struct net *net, struct sk_buff *pkt,
81 			      mifi_t mifi, int assert);
82 static int ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm);
83 static void mroute_clean_tables(struct net *net);
84 
85 #ifdef CONFIG_IPV6_PIMSM_V2
86 static struct inet6_protocol pim6_protocol;
87 #endif
88 
89 static struct timer_list ipmr_expire_timer;
90 
91 
92 #ifdef CONFIG_PROC_FS
93 
94 struct ipmr_mfc_iter {
95 	struct seq_net_private p;
96 	struct mfc6_cache **cache;
97 	int ct;
98 };
99 
100 
101 static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
102 					   struct ipmr_mfc_iter *it, loff_t pos)
103 {
104 	struct mfc6_cache *mfc;
105 
106 	it->cache = net->ipv6.mfc6_cache_array;
107 	read_lock(&mrt_lock);
108 	for (it->ct = 0; it->ct < MFC6_LINES; it->ct++)
109 		for (mfc = net->ipv6.mfc6_cache_array[it->ct];
110 		     mfc; mfc = mfc->next)
111 			if (pos-- == 0)
112 				return mfc;
113 	read_unlock(&mrt_lock);
114 
115 	it->cache = &mfc_unres_queue;
116 	spin_lock_bh(&mfc_unres_lock);
117 	for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
118 		if (net_eq(mfc6_net(mfc), net) &&
119 		    pos-- == 0)
120 			return mfc;
121 	spin_unlock_bh(&mfc_unres_lock);
122 
123 	it->cache = NULL;
124 	return NULL;
125 }
126 
127 
128 
129 
130 /*
131  *	The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
132  */
133 
134 struct ipmr_vif_iter {
135 	struct seq_net_private p;
136 	int ct;
137 };
138 
139 static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
140 					    struct ipmr_vif_iter *iter,
141 					    loff_t pos)
142 {
143 	for (iter->ct = 0; iter->ct < net->ipv6.maxvif; ++iter->ct) {
144 		if (!MIF_EXISTS(net, iter->ct))
145 			continue;
146 		if (pos-- == 0)
147 			return &net->ipv6.vif6_table[iter->ct];
148 	}
149 	return NULL;
150 }
151 
152 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
153 	__acquires(mrt_lock)
154 {
155 	struct net *net = seq_file_net(seq);
156 
157 	read_lock(&mrt_lock);
158 	return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1)
159 		: SEQ_START_TOKEN;
160 }
161 
162 static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
163 {
164 	struct ipmr_vif_iter *iter = seq->private;
165 	struct net *net = seq_file_net(seq);
166 
167 	++*pos;
168 	if (v == SEQ_START_TOKEN)
169 		return ip6mr_vif_seq_idx(net, iter, 0);
170 
171 	while (++iter->ct < net->ipv6.maxvif) {
172 		if (!MIF_EXISTS(net, iter->ct))
173 			continue;
174 		return &net->ipv6.vif6_table[iter->ct];
175 	}
176 	return NULL;
177 }
178 
179 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
180 	__releases(mrt_lock)
181 {
182 	read_unlock(&mrt_lock);
183 }
184 
185 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
186 {
187 	struct net *net = seq_file_net(seq);
188 
189 	if (v == SEQ_START_TOKEN) {
190 		seq_puts(seq,
191 			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
192 	} else {
193 		const struct mif_device *vif = v;
194 		const char *name = vif->dev ? vif->dev->name : "none";
195 
196 		seq_printf(seq,
197 			   "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
198 			   vif - net->ipv6.vif6_table,
199 			   name, vif->bytes_in, vif->pkt_in,
200 			   vif->bytes_out, vif->pkt_out,
201 			   vif->flags);
202 	}
203 	return 0;
204 }
205 
206 static struct seq_operations ip6mr_vif_seq_ops = {
207 	.start = ip6mr_vif_seq_start,
208 	.next  = ip6mr_vif_seq_next,
209 	.stop  = ip6mr_vif_seq_stop,
210 	.show  = ip6mr_vif_seq_show,
211 };
212 
213 static int ip6mr_vif_open(struct inode *inode, struct file *file)
214 {
215 	return seq_open_net(inode, file, &ip6mr_vif_seq_ops,
216 			    sizeof(struct ipmr_vif_iter));
217 }
218 
219 static struct file_operations ip6mr_vif_fops = {
220 	.owner	 = THIS_MODULE,
221 	.open    = ip6mr_vif_open,
222 	.read    = seq_read,
223 	.llseek  = seq_lseek,
224 	.release = seq_release_net,
225 };
226 
227 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
228 {
229 	struct net *net = seq_file_net(seq);
230 
231 	return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
232 		: SEQ_START_TOKEN;
233 }
234 
235 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
236 {
237 	struct mfc6_cache *mfc = v;
238 	struct ipmr_mfc_iter *it = seq->private;
239 	struct net *net = seq_file_net(seq);
240 
241 	++*pos;
242 
243 	if (v == SEQ_START_TOKEN)
244 		return ipmr_mfc_seq_idx(net, seq->private, 0);
245 
246 	if (mfc->next)
247 		return mfc->next;
248 
249 	if (it->cache == &mfc_unres_queue)
250 		goto end_of_list;
251 
252 	BUG_ON(it->cache != net->ipv6.mfc6_cache_array);
253 
254 	while (++it->ct < MFC6_LINES) {
255 		mfc = net->ipv6.mfc6_cache_array[it->ct];
256 		if (mfc)
257 			return mfc;
258 	}
259 
260 	/* exhausted cache_array, show unresolved */
261 	read_unlock(&mrt_lock);
262 	it->cache = &mfc_unres_queue;
263 	it->ct = 0;
264 
265 	spin_lock_bh(&mfc_unres_lock);
266 	mfc = mfc_unres_queue;
267 	if (mfc)
268 		return mfc;
269 
270  end_of_list:
271 	spin_unlock_bh(&mfc_unres_lock);
272 	it->cache = NULL;
273 
274 	return NULL;
275 }
276 
277 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
278 {
279 	struct ipmr_mfc_iter *it = seq->private;
280 	struct net *net = seq_file_net(seq);
281 
282 	if (it->cache == &mfc_unres_queue)
283 		spin_unlock_bh(&mfc_unres_lock);
284 	else if (it->cache == net->ipv6.mfc6_cache_array)
285 		read_unlock(&mrt_lock);
286 }
287 
288 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
289 {
290 	int n;
291 	struct net *net = seq_file_net(seq);
292 
293 	if (v == SEQ_START_TOKEN) {
294 		seq_puts(seq,
295 			 "Group                            "
296 			 "Origin                           "
297 			 "Iif      Pkts  Bytes     Wrong  Oifs\n");
298 	} else {
299 		const struct mfc6_cache *mfc = v;
300 		const struct ipmr_mfc_iter *it = seq->private;
301 
302 		seq_printf(seq, "%pI6 %pI6 %-3hd",
303 			   &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
304 			   mfc->mf6c_parent);
305 
306 		if (it->cache != &mfc_unres_queue) {
307 			seq_printf(seq, " %8lu %8lu %8lu",
308 				   mfc->mfc_un.res.pkt,
309 				   mfc->mfc_un.res.bytes,
310 				   mfc->mfc_un.res.wrong_if);
311 			for (n = mfc->mfc_un.res.minvif;
312 			     n < mfc->mfc_un.res.maxvif; n++) {
313 				if (MIF_EXISTS(net, n) &&
314 				    mfc->mfc_un.res.ttls[n] < 255)
315 					seq_printf(seq,
316 						   " %2d:%-3d",
317 						   n, mfc->mfc_un.res.ttls[n]);
318 			}
319 		} else {
320 			/* unresolved mfc_caches don't contain
321 			 * pkt, bytes and wrong_if values
322 			 */
323 			seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
324 		}
325 		seq_putc(seq, '\n');
326 	}
327 	return 0;
328 }
329 
330 static struct seq_operations ipmr_mfc_seq_ops = {
331 	.start = ipmr_mfc_seq_start,
332 	.next  = ipmr_mfc_seq_next,
333 	.stop  = ipmr_mfc_seq_stop,
334 	.show  = ipmr_mfc_seq_show,
335 };
336 
337 static int ipmr_mfc_open(struct inode *inode, struct file *file)
338 {
339 	return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
340 			    sizeof(struct ipmr_mfc_iter));
341 }
342 
343 static struct file_operations ip6mr_mfc_fops = {
344 	.owner	 = THIS_MODULE,
345 	.open    = ipmr_mfc_open,
346 	.read    = seq_read,
347 	.llseek  = seq_lseek,
348 	.release = seq_release_net,
349 };
350 #endif
351 
352 #ifdef CONFIG_IPV6_PIMSM_V2
353 
354 static int pim6_rcv(struct sk_buff *skb)
355 {
356 	struct pimreghdr *pim;
357 	struct ipv6hdr   *encap;
358 	struct net_device  *reg_dev = NULL;
359 	struct net *net = dev_net(skb->dev);
360 	int reg_vif_num = net->ipv6.mroute_reg_vif_num;
361 
362 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
363 		goto drop;
364 
365 	pim = (struct pimreghdr *)skb_transport_header(skb);
366 	if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
367 	    (pim->flags & PIM_NULL_REGISTER) ||
368 	    (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
369 	     csum_fold(skb_checksum(skb, 0, skb->len, 0))))
370 		goto drop;
371 
372 	/* check if the inner packet is destined to mcast group */
373 	encap = (struct ipv6hdr *)(skb_transport_header(skb) +
374 				   sizeof(*pim));
375 
376 	if (!ipv6_addr_is_multicast(&encap->daddr) ||
377 	    encap->payload_len == 0 ||
378 	    ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
379 		goto drop;
380 
381 	read_lock(&mrt_lock);
382 	if (reg_vif_num >= 0)
383 		reg_dev = net->ipv6.vif6_table[reg_vif_num].dev;
384 	if (reg_dev)
385 		dev_hold(reg_dev);
386 	read_unlock(&mrt_lock);
387 
388 	if (reg_dev == NULL)
389 		goto drop;
390 
391 	skb->mac_header = skb->network_header;
392 	skb_pull(skb, (u8 *)encap - skb->data);
393 	skb_reset_network_header(skb);
394 	skb->dev = reg_dev;
395 	skb->protocol = htons(ETH_P_IP);
396 	skb->ip_summed = 0;
397 	skb->pkt_type = PACKET_HOST;
398 	dst_release(skb->dst);
399 	reg_dev->stats.rx_bytes += skb->len;
400 	reg_dev->stats.rx_packets++;
401 	skb->dst = NULL;
402 	nf_reset(skb);
403 	netif_rx(skb);
404 	dev_put(reg_dev);
405 	return 0;
406  drop:
407 	kfree_skb(skb);
408 	return 0;
409 }
410 
411 static struct inet6_protocol pim6_protocol = {
412 	.handler	=	pim6_rcv,
413 };
414 
415 /* Service routines creating virtual interfaces: PIMREG */
416 
417 static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
418 {
419 	struct net *net = dev_net(dev);
420 
421 	read_lock(&mrt_lock);
422 	dev->stats.tx_bytes += skb->len;
423 	dev->stats.tx_packets++;
424 	ip6mr_cache_report(net, skb, net->ipv6.mroute_reg_vif_num,
425 			   MRT6MSG_WHOLEPKT);
426 	read_unlock(&mrt_lock);
427 	kfree_skb(skb);
428 	return 0;
429 }
430 
431 static const struct net_device_ops reg_vif_netdev_ops = {
432 	.ndo_start_xmit	= reg_vif_xmit,
433 };
434 
435 static void reg_vif_setup(struct net_device *dev)
436 {
437 	dev->type		= ARPHRD_PIMREG;
438 	dev->mtu		= 1500 - sizeof(struct ipv6hdr) - 8;
439 	dev->flags		= IFF_NOARP;
440 	dev->netdev_ops		= &reg_vif_netdev_ops;
441 	dev->destructor		= free_netdev;
442 }
443 
444 static struct net_device *ip6mr_reg_vif(struct net *net)
445 {
446 	struct net_device *dev;
447 
448 	dev = alloc_netdev(0, "pim6reg", reg_vif_setup);
449 	if (dev == NULL)
450 		return NULL;
451 
452 	dev_net_set(dev, net);
453 
454 	if (register_netdevice(dev)) {
455 		free_netdev(dev);
456 		return NULL;
457 	}
458 	dev->iflink = 0;
459 
460 	if (dev_open(dev))
461 		goto failure;
462 
463 	dev_hold(dev);
464 	return dev;
465 
466 failure:
467 	/* allow the register to be completed before unregistering. */
468 	rtnl_unlock();
469 	rtnl_lock();
470 
471 	unregister_netdevice(dev);
472 	return NULL;
473 }
474 #endif
475 
476 /*
477  *	Delete a VIF entry
478  */
479 
480 static int mif6_delete(struct net *net, int vifi)
481 {
482 	struct mif_device *v;
483 	struct net_device *dev;
484 	if (vifi < 0 || vifi >= net->ipv6.maxvif)
485 		return -EADDRNOTAVAIL;
486 
487 	v = &net->ipv6.vif6_table[vifi];
488 
489 	write_lock_bh(&mrt_lock);
490 	dev = v->dev;
491 	v->dev = NULL;
492 
493 	if (!dev) {
494 		write_unlock_bh(&mrt_lock);
495 		return -EADDRNOTAVAIL;
496 	}
497 
498 #ifdef CONFIG_IPV6_PIMSM_V2
499 	if (vifi == net->ipv6.mroute_reg_vif_num)
500 		net->ipv6.mroute_reg_vif_num = -1;
501 #endif
502 
503 	if (vifi + 1 == net->ipv6.maxvif) {
504 		int tmp;
505 		for (tmp = vifi - 1; tmp >= 0; tmp--) {
506 			if (MIF_EXISTS(net, tmp))
507 				break;
508 		}
509 		net->ipv6.maxvif = tmp + 1;
510 	}
511 
512 	write_unlock_bh(&mrt_lock);
513 
514 	dev_set_allmulti(dev, -1);
515 
516 	if (v->flags & MIFF_REGISTER)
517 		unregister_netdevice(dev);
518 
519 	dev_put(dev);
520 	return 0;
521 }
522 
523 static inline void ip6mr_cache_free(struct mfc6_cache *c)
524 {
525 	release_net(mfc6_net(c));
526 	kmem_cache_free(mrt_cachep, c);
527 }
528 
529 /* Destroy an unresolved cache entry, killing queued skbs
530    and reporting error to netlink readers.
531  */
532 
533 static void ip6mr_destroy_unres(struct mfc6_cache *c)
534 {
535 	struct sk_buff *skb;
536 	struct net *net = mfc6_net(c);
537 
538 	atomic_dec(&net->ipv6.cache_resolve_queue_len);
539 
540 	while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
541 		if (ipv6_hdr(skb)->version == 0) {
542 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
543 			nlh->nlmsg_type = NLMSG_ERROR;
544 			nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
545 			skb_trim(skb, nlh->nlmsg_len);
546 			((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
547 			rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
548 		} else
549 			kfree_skb(skb);
550 	}
551 
552 	ip6mr_cache_free(c);
553 }
554 
555 
556 /* Single timer process for all the unresolved queue. */
557 
558 static void ipmr_do_expire_process(unsigned long dummy)
559 {
560 	unsigned long now = jiffies;
561 	unsigned long expires = 10 * HZ;
562 	struct mfc6_cache *c, **cp;
563 
564 	cp = &mfc_unres_queue;
565 
566 	while ((c = *cp) != NULL) {
567 		if (time_after(c->mfc_un.unres.expires, now)) {
568 			/* not yet... */
569 			unsigned long interval = c->mfc_un.unres.expires - now;
570 			if (interval < expires)
571 				expires = interval;
572 			cp = &c->next;
573 			continue;
574 		}
575 
576 		*cp = c->next;
577 		ip6mr_destroy_unres(c);
578 	}
579 
580 	if (mfc_unres_queue != NULL)
581 		mod_timer(&ipmr_expire_timer, jiffies + expires);
582 }
583 
584 static void ipmr_expire_process(unsigned long dummy)
585 {
586 	if (!spin_trylock(&mfc_unres_lock)) {
587 		mod_timer(&ipmr_expire_timer, jiffies + 1);
588 		return;
589 	}
590 
591 	if (mfc_unres_queue != NULL)
592 		ipmr_do_expire_process(dummy);
593 
594 	spin_unlock(&mfc_unres_lock);
595 }
596 
597 /* Fill oifs list. It is called under write locked mrt_lock. */
598 
599 static void ip6mr_update_thresholds(struct mfc6_cache *cache, unsigned char *ttls)
600 {
601 	int vifi;
602 	struct net *net = mfc6_net(cache);
603 
604 	cache->mfc_un.res.minvif = MAXMIFS;
605 	cache->mfc_un.res.maxvif = 0;
606 	memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
607 
608 	for (vifi = 0; vifi < net->ipv6.maxvif; vifi++) {
609 		if (MIF_EXISTS(net, vifi) &&
610 		    ttls[vifi] && ttls[vifi] < 255) {
611 			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
612 			if (cache->mfc_un.res.minvif > vifi)
613 				cache->mfc_un.res.minvif = vifi;
614 			if (cache->mfc_un.res.maxvif <= vifi)
615 				cache->mfc_un.res.maxvif = vifi + 1;
616 		}
617 	}
618 }
619 
620 static int mif6_add(struct net *net, struct mif6ctl *vifc, int mrtsock)
621 {
622 	int vifi = vifc->mif6c_mifi;
623 	struct mif_device *v = &net->ipv6.vif6_table[vifi];
624 	struct net_device *dev;
625 	int err;
626 
627 	/* Is vif busy ? */
628 	if (MIF_EXISTS(net, vifi))
629 		return -EADDRINUSE;
630 
631 	switch (vifc->mif6c_flags) {
632 #ifdef CONFIG_IPV6_PIMSM_V2
633 	case MIFF_REGISTER:
634 		/*
635 		 * Special Purpose VIF in PIM
636 		 * All the packets will be sent to the daemon
637 		 */
638 		if (net->ipv6.mroute_reg_vif_num >= 0)
639 			return -EADDRINUSE;
640 		dev = ip6mr_reg_vif(net);
641 		if (!dev)
642 			return -ENOBUFS;
643 		err = dev_set_allmulti(dev, 1);
644 		if (err) {
645 			unregister_netdevice(dev);
646 			dev_put(dev);
647 			return err;
648 		}
649 		break;
650 #endif
651 	case 0:
652 		dev = dev_get_by_index(net, vifc->mif6c_pifi);
653 		if (!dev)
654 			return -EADDRNOTAVAIL;
655 		err = dev_set_allmulti(dev, 1);
656 		if (err) {
657 			dev_put(dev);
658 			return err;
659 		}
660 		break;
661 	default:
662 		return -EINVAL;
663 	}
664 
665 	/*
666 	 *	Fill in the VIF structures
667 	 */
668 	v->rate_limit = vifc->vifc_rate_limit;
669 	v->flags = vifc->mif6c_flags;
670 	if (!mrtsock)
671 		v->flags |= VIFF_STATIC;
672 	v->threshold = vifc->vifc_threshold;
673 	v->bytes_in = 0;
674 	v->bytes_out = 0;
675 	v->pkt_in = 0;
676 	v->pkt_out = 0;
677 	v->link = dev->ifindex;
678 	if (v->flags & MIFF_REGISTER)
679 		v->link = dev->iflink;
680 
681 	/* And finish update writing critical data */
682 	write_lock_bh(&mrt_lock);
683 	v->dev = dev;
684 #ifdef CONFIG_IPV6_PIMSM_V2
685 	if (v->flags & MIFF_REGISTER)
686 		net->ipv6.mroute_reg_vif_num = vifi;
687 #endif
688 	if (vifi + 1 > net->ipv6.maxvif)
689 		net->ipv6.maxvif = vifi + 1;
690 	write_unlock_bh(&mrt_lock);
691 	return 0;
692 }
693 
694 static struct mfc6_cache *ip6mr_cache_find(struct net *net,
695 					   struct in6_addr *origin,
696 					   struct in6_addr *mcastgrp)
697 {
698 	int line = MFC6_HASH(mcastgrp, origin);
699 	struct mfc6_cache *c;
700 
701 	for (c = net->ipv6.mfc6_cache_array[line]; c; c = c->next) {
702 		if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
703 		    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
704 			break;
705 	}
706 	return c;
707 }
708 
709 /*
710  *	Allocate a multicast cache entry
711  */
712 static struct mfc6_cache *ip6mr_cache_alloc(struct net *net)
713 {
714 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
715 	if (c == NULL)
716 		return NULL;
717 	c->mfc_un.res.minvif = MAXMIFS;
718 	mfc6_net_set(c, net);
719 	return c;
720 }
721 
722 static struct mfc6_cache *ip6mr_cache_alloc_unres(struct net *net)
723 {
724 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
725 	if (c == NULL)
726 		return NULL;
727 	skb_queue_head_init(&c->mfc_un.unres.unresolved);
728 	c->mfc_un.unres.expires = jiffies + 10 * HZ;
729 	mfc6_net_set(c, net);
730 	return c;
731 }
732 
733 /*
734  *	A cache entry has gone into a resolved state from queued
735  */
736 
737 static void ip6mr_cache_resolve(struct mfc6_cache *uc, struct mfc6_cache *c)
738 {
739 	struct sk_buff *skb;
740 
741 	/*
742 	 *	Play the pending entries through our router
743 	 */
744 
745 	while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
746 		if (ipv6_hdr(skb)->version == 0) {
747 			int err;
748 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
749 
750 			if (ip6mr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
751 				nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
752 			} else {
753 				nlh->nlmsg_type = NLMSG_ERROR;
754 				nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
755 				skb_trim(skb, nlh->nlmsg_len);
756 				((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE;
757 			}
758 			err = rtnl_unicast(skb, mfc6_net(uc), NETLINK_CB(skb).pid);
759 		} else
760 			ip6_mr_forward(skb, c);
761 	}
762 }
763 
764 /*
765  *	Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
766  *	expects the following bizarre scheme.
767  *
768  *	Called under mrt_lock.
769  */
770 
771 static int ip6mr_cache_report(struct net *net, struct sk_buff *pkt, mifi_t mifi,
772 			      int assert)
773 {
774 	struct sk_buff *skb;
775 	struct mrt6msg *msg;
776 	int ret;
777 
778 #ifdef CONFIG_IPV6_PIMSM_V2
779 	if (assert == MRT6MSG_WHOLEPKT)
780 		skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
781 						+sizeof(*msg));
782 	else
783 #endif
784 		skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
785 
786 	if (!skb)
787 		return -ENOBUFS;
788 
789 	/* I suppose that internal messages
790 	 * do not require checksums */
791 
792 	skb->ip_summed = CHECKSUM_UNNECESSARY;
793 
794 #ifdef CONFIG_IPV6_PIMSM_V2
795 	if (assert == MRT6MSG_WHOLEPKT) {
796 		/* Ugly, but we have no choice with this interface.
797 		   Duplicate old header, fix length etc.
798 		   And all this only to mangle msg->im6_msgtype and
799 		   to set msg->im6_mbz to "mbz" :-)
800 		 */
801 		skb_push(skb, -skb_network_offset(pkt));
802 
803 		skb_push(skb, sizeof(*msg));
804 		skb_reset_transport_header(skb);
805 		msg = (struct mrt6msg *)skb_transport_header(skb);
806 		msg->im6_mbz = 0;
807 		msg->im6_msgtype = MRT6MSG_WHOLEPKT;
808 		msg->im6_mif = net->ipv6.mroute_reg_vif_num;
809 		msg->im6_pad = 0;
810 		ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
811 		ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
812 
813 		skb->ip_summed = CHECKSUM_UNNECESSARY;
814 	} else
815 #endif
816 	{
817 	/*
818 	 *	Copy the IP header
819 	 */
820 
821 	skb_put(skb, sizeof(struct ipv6hdr));
822 	skb_reset_network_header(skb);
823 	skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
824 
825 	/*
826 	 *	Add our header
827 	 */
828 	skb_put(skb, sizeof(*msg));
829 	skb_reset_transport_header(skb);
830 	msg = (struct mrt6msg *)skb_transport_header(skb);
831 
832 	msg->im6_mbz = 0;
833 	msg->im6_msgtype = assert;
834 	msg->im6_mif = mifi;
835 	msg->im6_pad = 0;
836 	ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
837 	ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
838 
839 	skb->dst = dst_clone(pkt->dst);
840 	skb->ip_summed = CHECKSUM_UNNECESSARY;
841 
842 	skb_pull(skb, sizeof(struct ipv6hdr));
843 	}
844 
845 	if (net->ipv6.mroute6_sk == NULL) {
846 		kfree_skb(skb);
847 		return -EINVAL;
848 	}
849 
850 	/*
851 	 *	Deliver to user space multicast routing algorithms
852 	 */
853 	ret = sock_queue_rcv_skb(net->ipv6.mroute6_sk, skb);
854 	if (ret < 0) {
855 		if (net_ratelimit())
856 			printk(KERN_WARNING "mroute6: pending queue full, dropping entries.\n");
857 		kfree_skb(skb);
858 	}
859 
860 	return ret;
861 }
862 
863 /*
864  *	Queue a packet for resolution. It gets locked cache entry!
865  */
866 
867 static int
868 ip6mr_cache_unresolved(struct net *net, mifi_t mifi, struct sk_buff *skb)
869 {
870 	int err;
871 	struct mfc6_cache *c;
872 
873 	spin_lock_bh(&mfc_unres_lock);
874 	for (c = mfc_unres_queue; c; c = c->next) {
875 		if (net_eq(mfc6_net(c), net) &&
876 		    ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
877 		    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr))
878 			break;
879 	}
880 
881 	if (c == NULL) {
882 		/*
883 		 *	Create a new entry if allowable
884 		 */
885 
886 		if (atomic_read(&net->ipv6.cache_resolve_queue_len) >= 10 ||
887 		    (c = ip6mr_cache_alloc_unres(net)) == NULL) {
888 			spin_unlock_bh(&mfc_unres_lock);
889 
890 			kfree_skb(skb);
891 			return -ENOBUFS;
892 		}
893 
894 		/*
895 		 *	Fill in the new cache entry
896 		 */
897 		c->mf6c_parent = -1;
898 		c->mf6c_origin = ipv6_hdr(skb)->saddr;
899 		c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
900 
901 		/*
902 		 *	Reflect first query at pim6sd
903 		 */
904 		err = ip6mr_cache_report(net, skb, mifi, MRT6MSG_NOCACHE);
905 		if (err < 0) {
906 			/* If the report failed throw the cache entry
907 			   out - Brad Parker
908 			 */
909 			spin_unlock_bh(&mfc_unres_lock);
910 
911 			ip6mr_cache_free(c);
912 			kfree_skb(skb);
913 			return err;
914 		}
915 
916 		atomic_inc(&net->ipv6.cache_resolve_queue_len);
917 		c->next = mfc_unres_queue;
918 		mfc_unres_queue = c;
919 
920 		ipmr_do_expire_process(1);
921 	}
922 
923 	/*
924 	 *	See if we can append the packet
925 	 */
926 	if (c->mfc_un.unres.unresolved.qlen > 3) {
927 		kfree_skb(skb);
928 		err = -ENOBUFS;
929 	} else {
930 		skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
931 		err = 0;
932 	}
933 
934 	spin_unlock_bh(&mfc_unres_lock);
935 	return err;
936 }
937 
938 /*
939  *	MFC6 cache manipulation by user space
940  */
941 
942 static int ip6mr_mfc_delete(struct net *net, struct mf6cctl *mfc)
943 {
944 	int line;
945 	struct mfc6_cache *c, **cp;
946 
947 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
948 
949 	for (cp = &net->ipv6.mfc6_cache_array[line];
950 	     (c = *cp) != NULL; cp = &c->next) {
951 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
952 		    ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
953 			write_lock_bh(&mrt_lock);
954 			*cp = c->next;
955 			write_unlock_bh(&mrt_lock);
956 
957 			ip6mr_cache_free(c);
958 			return 0;
959 		}
960 	}
961 	return -ENOENT;
962 }
963 
964 static int ip6mr_device_event(struct notifier_block *this,
965 			      unsigned long event, void *ptr)
966 {
967 	struct net_device *dev = ptr;
968 	struct net *net = dev_net(dev);
969 	struct mif_device *v;
970 	int ct;
971 
972 	if (event != NETDEV_UNREGISTER)
973 		return NOTIFY_DONE;
974 
975 	v = &net->ipv6.vif6_table[0];
976 	for (ct = 0; ct < net->ipv6.maxvif; ct++, v++) {
977 		if (v->dev == dev)
978 			mif6_delete(net, ct);
979 	}
980 	return NOTIFY_DONE;
981 }
982 
983 static struct notifier_block ip6_mr_notifier = {
984 	.notifier_call = ip6mr_device_event
985 };
986 
987 /*
988  *	Setup for IP multicast routing
989  */
990 
991 static int __net_init ip6mr_net_init(struct net *net)
992 {
993 	int err = 0;
994 	net->ipv6.vif6_table = kcalloc(MAXMIFS, sizeof(struct mif_device),
995 				       GFP_KERNEL);
996 	if (!net->ipv6.vif6_table) {
997 		err = -ENOMEM;
998 		goto fail;
999 	}
1000 
1001 	/* Forwarding cache */
1002 	net->ipv6.mfc6_cache_array = kcalloc(MFC6_LINES,
1003 					     sizeof(struct mfc6_cache *),
1004 					     GFP_KERNEL);
1005 	if (!net->ipv6.mfc6_cache_array) {
1006 		err = -ENOMEM;
1007 		goto fail_mfc6_cache;
1008 	}
1009 
1010 #ifdef CONFIG_IPV6_PIMSM_V2
1011 	net->ipv6.mroute_reg_vif_num = -1;
1012 #endif
1013 
1014 #ifdef CONFIG_PROC_FS
1015 	err = -ENOMEM;
1016 	if (!proc_net_fops_create(net, "ip6_mr_vif", 0, &ip6mr_vif_fops))
1017 		goto proc_vif_fail;
1018 	if (!proc_net_fops_create(net, "ip6_mr_cache", 0, &ip6mr_mfc_fops))
1019 		goto proc_cache_fail;
1020 #endif
1021 	return 0;
1022 
1023 #ifdef CONFIG_PROC_FS
1024 proc_cache_fail:
1025 	proc_net_remove(net, "ip6_mr_vif");
1026 proc_vif_fail:
1027 	kfree(net->ipv6.mfc6_cache_array);
1028 #endif
1029 fail_mfc6_cache:
1030 	kfree(net->ipv6.vif6_table);
1031 fail:
1032 	return err;
1033 }
1034 
1035 static void __net_exit ip6mr_net_exit(struct net *net)
1036 {
1037 #ifdef CONFIG_PROC_FS
1038 	proc_net_remove(net, "ip6_mr_cache");
1039 	proc_net_remove(net, "ip6_mr_vif");
1040 #endif
1041 	mroute_clean_tables(net);
1042 	kfree(net->ipv6.mfc6_cache_array);
1043 	kfree(net->ipv6.vif6_table);
1044 }
1045 
1046 static struct pernet_operations ip6mr_net_ops = {
1047 	.init = ip6mr_net_init,
1048 	.exit = ip6mr_net_exit,
1049 };
1050 
1051 int __init ip6_mr_init(void)
1052 {
1053 	int err;
1054 
1055 	mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1056 				       sizeof(struct mfc6_cache),
1057 				       0, SLAB_HWCACHE_ALIGN,
1058 				       NULL);
1059 	if (!mrt_cachep)
1060 		return -ENOMEM;
1061 
1062 	err = register_pernet_subsys(&ip6mr_net_ops);
1063 	if (err)
1064 		goto reg_pernet_fail;
1065 
1066 	setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
1067 	err = register_netdevice_notifier(&ip6_mr_notifier);
1068 	if (err)
1069 		goto reg_notif_fail;
1070 	return 0;
1071 reg_notif_fail:
1072 	del_timer(&ipmr_expire_timer);
1073 	unregister_pernet_subsys(&ip6mr_net_ops);
1074 reg_pernet_fail:
1075 	kmem_cache_destroy(mrt_cachep);
1076 	return err;
1077 }
1078 
1079 void ip6_mr_cleanup(void)
1080 {
1081 	unregister_netdevice_notifier(&ip6_mr_notifier);
1082 	del_timer(&ipmr_expire_timer);
1083 	unregister_pernet_subsys(&ip6mr_net_ops);
1084 	kmem_cache_destroy(mrt_cachep);
1085 }
1086 
1087 static int ip6mr_mfc_add(struct net *net, struct mf6cctl *mfc, int mrtsock)
1088 {
1089 	int line;
1090 	struct mfc6_cache *uc, *c, **cp;
1091 	unsigned char ttls[MAXMIFS];
1092 	int i;
1093 
1094 	memset(ttls, 255, MAXMIFS);
1095 	for (i = 0; i < MAXMIFS; i++) {
1096 		if (IF_ISSET(i, &mfc->mf6cc_ifset))
1097 			ttls[i] = 1;
1098 
1099 	}
1100 
1101 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1102 
1103 	for (cp = &net->ipv6.mfc6_cache_array[line];
1104 	     (c = *cp) != NULL; cp = &c->next) {
1105 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1106 		    ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr))
1107 			break;
1108 	}
1109 
1110 	if (c != NULL) {
1111 		write_lock_bh(&mrt_lock);
1112 		c->mf6c_parent = mfc->mf6cc_parent;
1113 		ip6mr_update_thresholds(c, ttls);
1114 		if (!mrtsock)
1115 			c->mfc_flags |= MFC_STATIC;
1116 		write_unlock_bh(&mrt_lock);
1117 		return 0;
1118 	}
1119 
1120 	if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1121 		return -EINVAL;
1122 
1123 	c = ip6mr_cache_alloc(net);
1124 	if (c == NULL)
1125 		return -ENOMEM;
1126 
1127 	c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1128 	c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1129 	c->mf6c_parent = mfc->mf6cc_parent;
1130 	ip6mr_update_thresholds(c, ttls);
1131 	if (!mrtsock)
1132 		c->mfc_flags |= MFC_STATIC;
1133 
1134 	write_lock_bh(&mrt_lock);
1135 	c->next = net->ipv6.mfc6_cache_array[line];
1136 	net->ipv6.mfc6_cache_array[line] = c;
1137 	write_unlock_bh(&mrt_lock);
1138 
1139 	/*
1140 	 *	Check to see if we resolved a queued list. If so we
1141 	 *	need to send on the frames and tidy up.
1142 	 */
1143 	spin_lock_bh(&mfc_unres_lock);
1144 	for (cp = &mfc_unres_queue; (uc = *cp) != NULL;
1145 	     cp = &uc->next) {
1146 		if (net_eq(mfc6_net(uc), net) &&
1147 		    ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1148 		    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1149 			*cp = uc->next;
1150 			atomic_dec(&net->ipv6.cache_resolve_queue_len);
1151 			break;
1152 		}
1153 	}
1154 	if (mfc_unres_queue == NULL)
1155 		del_timer(&ipmr_expire_timer);
1156 	spin_unlock_bh(&mfc_unres_lock);
1157 
1158 	if (uc) {
1159 		ip6mr_cache_resolve(uc, c);
1160 		ip6mr_cache_free(uc);
1161 	}
1162 	return 0;
1163 }
1164 
1165 /*
1166  *	Close the multicast socket, and clear the vif tables etc
1167  */
1168 
1169 static void mroute_clean_tables(struct net *net)
1170 {
1171 	int i;
1172 
1173 	/*
1174 	 *	Shut down all active vif entries
1175 	 */
1176 	for (i = 0; i < net->ipv6.maxvif; i++) {
1177 		if (!(net->ipv6.vif6_table[i].flags & VIFF_STATIC))
1178 			mif6_delete(net, i);
1179 	}
1180 
1181 	/*
1182 	 *	Wipe the cache
1183 	 */
1184 	for (i = 0; i < MFC6_LINES; i++) {
1185 		struct mfc6_cache *c, **cp;
1186 
1187 		cp = &net->ipv6.mfc6_cache_array[i];
1188 		while ((c = *cp) != NULL) {
1189 			if (c->mfc_flags & MFC_STATIC) {
1190 				cp = &c->next;
1191 				continue;
1192 			}
1193 			write_lock_bh(&mrt_lock);
1194 			*cp = c->next;
1195 			write_unlock_bh(&mrt_lock);
1196 
1197 			ip6mr_cache_free(c);
1198 		}
1199 	}
1200 
1201 	if (atomic_read(&net->ipv6.cache_resolve_queue_len) != 0) {
1202 		struct mfc6_cache *c, **cp;
1203 
1204 		spin_lock_bh(&mfc_unres_lock);
1205 		cp = &mfc_unres_queue;
1206 		while ((c = *cp) != NULL) {
1207 			if (!net_eq(mfc6_net(c), net)) {
1208 				cp = &c->next;
1209 				continue;
1210 			}
1211 			*cp = c->next;
1212 			ip6mr_destroy_unres(c);
1213 		}
1214 		spin_unlock_bh(&mfc_unres_lock);
1215 	}
1216 }
1217 
1218 static int ip6mr_sk_init(struct sock *sk)
1219 {
1220 	int err = 0;
1221 	struct net *net = sock_net(sk);
1222 
1223 	rtnl_lock();
1224 	write_lock_bh(&mrt_lock);
1225 	if (likely(net->ipv6.mroute6_sk == NULL))
1226 		net->ipv6.mroute6_sk = sk;
1227 	else
1228 		err = -EADDRINUSE;
1229 	write_unlock_bh(&mrt_lock);
1230 
1231 	rtnl_unlock();
1232 
1233 	return err;
1234 }
1235 
1236 int ip6mr_sk_done(struct sock *sk)
1237 {
1238 	int err = 0;
1239 	struct net *net = sock_net(sk);
1240 
1241 	rtnl_lock();
1242 	if (sk == net->ipv6.mroute6_sk) {
1243 		write_lock_bh(&mrt_lock);
1244 		net->ipv6.mroute6_sk = NULL;
1245 		write_unlock_bh(&mrt_lock);
1246 
1247 		mroute_clean_tables(net);
1248 	} else
1249 		err = -EACCES;
1250 	rtnl_unlock();
1251 
1252 	return err;
1253 }
1254 
1255 /*
1256  *	Socket options and virtual interface manipulation. The whole
1257  *	virtual interface system is a complete heap, but unfortunately
1258  *	that's how BSD mrouted happens to think. Maybe one day with a proper
1259  *	MOSPF/PIM router set up we can clean this up.
1260  */
1261 
1262 int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int optlen)
1263 {
1264 	int ret;
1265 	struct mif6ctl vif;
1266 	struct mf6cctl mfc;
1267 	mifi_t mifi;
1268 	struct net *net = sock_net(sk);
1269 
1270 	if (optname != MRT6_INIT) {
1271 		if (sk != net->ipv6.mroute6_sk && !capable(CAP_NET_ADMIN))
1272 			return -EACCES;
1273 	}
1274 
1275 	switch (optname) {
1276 	case MRT6_INIT:
1277 		if (sk->sk_type != SOCK_RAW ||
1278 		    inet_sk(sk)->num != IPPROTO_ICMPV6)
1279 			return -EOPNOTSUPP;
1280 		if (optlen < sizeof(int))
1281 			return -EINVAL;
1282 
1283 		return ip6mr_sk_init(sk);
1284 
1285 	case MRT6_DONE:
1286 		return ip6mr_sk_done(sk);
1287 
1288 	case MRT6_ADD_MIF:
1289 		if (optlen < sizeof(vif))
1290 			return -EINVAL;
1291 		if (copy_from_user(&vif, optval, sizeof(vif)))
1292 			return -EFAULT;
1293 		if (vif.mif6c_mifi >= MAXMIFS)
1294 			return -ENFILE;
1295 		rtnl_lock();
1296 		ret = mif6_add(net, &vif, sk == net->ipv6.mroute6_sk);
1297 		rtnl_unlock();
1298 		return ret;
1299 
1300 	case MRT6_DEL_MIF:
1301 		if (optlen < sizeof(mifi_t))
1302 			return -EINVAL;
1303 		if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1304 			return -EFAULT;
1305 		rtnl_lock();
1306 		ret = mif6_delete(net, mifi);
1307 		rtnl_unlock();
1308 		return ret;
1309 
1310 	/*
1311 	 *	Manipulate the forwarding caches. These live
1312 	 *	in a sort of kernel/user symbiosis.
1313 	 */
1314 	case MRT6_ADD_MFC:
1315 	case MRT6_DEL_MFC:
1316 		if (optlen < sizeof(mfc))
1317 			return -EINVAL;
1318 		if (copy_from_user(&mfc, optval, sizeof(mfc)))
1319 			return -EFAULT;
1320 		rtnl_lock();
1321 		if (optname == MRT6_DEL_MFC)
1322 			ret = ip6mr_mfc_delete(net, &mfc);
1323 		else
1324 			ret = ip6mr_mfc_add(net, &mfc,
1325 					    sk == net->ipv6.mroute6_sk);
1326 		rtnl_unlock();
1327 		return ret;
1328 
1329 	/*
1330 	 *	Control PIM assert (to activate pim will activate assert)
1331 	 */
1332 	case MRT6_ASSERT:
1333 	{
1334 		int v;
1335 		if (get_user(v, (int __user *)optval))
1336 			return -EFAULT;
1337 		net->ipv6.mroute_do_assert = !!v;
1338 		return 0;
1339 	}
1340 
1341 #ifdef CONFIG_IPV6_PIMSM_V2
1342 	case MRT6_PIM:
1343 	{
1344 		int v;
1345 		if (get_user(v, (int __user *)optval))
1346 			return -EFAULT;
1347 		v = !!v;
1348 		rtnl_lock();
1349 		ret = 0;
1350 		if (v != net->ipv6.mroute_do_pim) {
1351 			net->ipv6.mroute_do_pim = v;
1352 			net->ipv6.mroute_do_assert = v;
1353 			if (net->ipv6.mroute_do_pim)
1354 				ret = inet6_add_protocol(&pim6_protocol,
1355 							 IPPROTO_PIM);
1356 			else
1357 				ret = inet6_del_protocol(&pim6_protocol,
1358 							 IPPROTO_PIM);
1359 			if (ret < 0)
1360 				ret = -EAGAIN;
1361 		}
1362 		rtnl_unlock();
1363 		return ret;
1364 	}
1365 
1366 #endif
1367 	/*
1368 	 *	Spurious command, or MRT6_VERSION which you cannot
1369 	 *	set.
1370 	 */
1371 	default:
1372 		return -ENOPROTOOPT;
1373 	}
1374 }
1375 
1376 /*
1377  *	Getsock opt support for the multicast routing system.
1378  */
1379 
1380 int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1381 			  int __user *optlen)
1382 {
1383 	int olr;
1384 	int val;
1385 	struct net *net = sock_net(sk);
1386 
1387 	switch (optname) {
1388 	case MRT6_VERSION:
1389 		val = 0x0305;
1390 		break;
1391 #ifdef CONFIG_IPV6_PIMSM_V2
1392 	case MRT6_PIM:
1393 		val = net->ipv6.mroute_do_pim;
1394 		break;
1395 #endif
1396 	case MRT6_ASSERT:
1397 		val = net->ipv6.mroute_do_assert;
1398 		break;
1399 	default:
1400 		return -ENOPROTOOPT;
1401 	}
1402 
1403 	if (get_user(olr, optlen))
1404 		return -EFAULT;
1405 
1406 	olr = min_t(int, olr, sizeof(int));
1407 	if (olr < 0)
1408 		return -EINVAL;
1409 
1410 	if (put_user(olr, optlen))
1411 		return -EFAULT;
1412 	if (copy_to_user(optval, &val, olr))
1413 		return -EFAULT;
1414 	return 0;
1415 }
1416 
1417 /*
1418  *	The IP multicast ioctl support routines.
1419  */
1420 
1421 int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1422 {
1423 	struct sioc_sg_req6 sr;
1424 	struct sioc_mif_req6 vr;
1425 	struct mif_device *vif;
1426 	struct mfc6_cache *c;
1427 	struct net *net = sock_net(sk);
1428 
1429 	switch (cmd) {
1430 	case SIOCGETMIFCNT_IN6:
1431 		if (copy_from_user(&vr, arg, sizeof(vr)))
1432 			return -EFAULT;
1433 		if (vr.mifi >= net->ipv6.maxvif)
1434 			return -EINVAL;
1435 		read_lock(&mrt_lock);
1436 		vif = &net->ipv6.vif6_table[vr.mifi];
1437 		if (MIF_EXISTS(net, vr.mifi)) {
1438 			vr.icount = vif->pkt_in;
1439 			vr.ocount = vif->pkt_out;
1440 			vr.ibytes = vif->bytes_in;
1441 			vr.obytes = vif->bytes_out;
1442 			read_unlock(&mrt_lock);
1443 
1444 			if (copy_to_user(arg, &vr, sizeof(vr)))
1445 				return -EFAULT;
1446 			return 0;
1447 		}
1448 		read_unlock(&mrt_lock);
1449 		return -EADDRNOTAVAIL;
1450 	case SIOCGETSGCNT_IN6:
1451 		if (copy_from_user(&sr, arg, sizeof(sr)))
1452 			return -EFAULT;
1453 
1454 		read_lock(&mrt_lock);
1455 		c = ip6mr_cache_find(net, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1456 		if (c) {
1457 			sr.pktcnt = c->mfc_un.res.pkt;
1458 			sr.bytecnt = c->mfc_un.res.bytes;
1459 			sr.wrong_if = c->mfc_un.res.wrong_if;
1460 			read_unlock(&mrt_lock);
1461 
1462 			if (copy_to_user(arg, &sr, sizeof(sr)))
1463 				return -EFAULT;
1464 			return 0;
1465 		}
1466 		read_unlock(&mrt_lock);
1467 		return -EADDRNOTAVAIL;
1468 	default:
1469 		return -ENOIOCTLCMD;
1470 	}
1471 }
1472 
1473 
1474 static inline int ip6mr_forward2_finish(struct sk_buff *skb)
1475 {
1476 	IP6_INC_STATS_BH(dev_net(skb->dst->dev), ip6_dst_idev(skb->dst),
1477 			 IPSTATS_MIB_OUTFORWDATAGRAMS);
1478 	return dst_output(skb);
1479 }
1480 
1481 /*
1482  *	Processing handlers for ip6mr_forward
1483  */
1484 
1485 static int ip6mr_forward2(struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1486 {
1487 	struct ipv6hdr *ipv6h;
1488 	struct net *net = mfc6_net(c);
1489 	struct mif_device *vif = &net->ipv6.vif6_table[vifi];
1490 	struct net_device *dev;
1491 	struct dst_entry *dst;
1492 	struct flowi fl;
1493 
1494 	if (vif->dev == NULL)
1495 		goto out_free;
1496 
1497 #ifdef CONFIG_IPV6_PIMSM_V2
1498 	if (vif->flags & MIFF_REGISTER) {
1499 		vif->pkt_out++;
1500 		vif->bytes_out += skb->len;
1501 		vif->dev->stats.tx_bytes += skb->len;
1502 		vif->dev->stats.tx_packets++;
1503 		ip6mr_cache_report(net, skb, vifi, MRT6MSG_WHOLEPKT);
1504 		goto out_free;
1505 	}
1506 #endif
1507 
1508 	ipv6h = ipv6_hdr(skb);
1509 
1510 	fl = (struct flowi) {
1511 		.oif = vif->link,
1512 		.nl_u = { .ip6_u =
1513 				{ .daddr = ipv6h->daddr, }
1514 		}
1515 	};
1516 
1517 	dst = ip6_route_output(net, NULL, &fl);
1518 	if (!dst)
1519 		goto out_free;
1520 
1521 	dst_release(skb->dst);
1522 	skb->dst = dst;
1523 
1524 	/*
1525 	 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1526 	 * not only before forwarding, but after forwarding on all output
1527 	 * interfaces. It is clear, if mrouter runs a multicasting
1528 	 * program, it should receive packets not depending to what interface
1529 	 * program is joined.
1530 	 * If we will not make it, the program will have to join on all
1531 	 * interfaces. On the other hand, multihoming host (or router, but
1532 	 * not mrouter) cannot join to more than one interface - it will
1533 	 * result in receiving multiple packets.
1534 	 */
1535 	dev = vif->dev;
1536 	skb->dev = dev;
1537 	vif->pkt_out++;
1538 	vif->bytes_out += skb->len;
1539 
1540 	/* We are about to write */
1541 	/* XXX: extension headers? */
1542 	if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
1543 		goto out_free;
1544 
1545 	ipv6h = ipv6_hdr(skb);
1546 	ipv6h->hop_limit--;
1547 
1548 	IP6CB(skb)->flags |= IP6SKB_FORWARDED;
1549 
1550 	return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dev,
1551 		       ip6mr_forward2_finish);
1552 
1553 out_free:
1554 	kfree_skb(skb);
1555 	return 0;
1556 }
1557 
1558 static int ip6mr_find_vif(struct net_device *dev)
1559 {
1560 	struct net *net = dev_net(dev);
1561 	int ct;
1562 	for (ct = net->ipv6.maxvif - 1; ct >= 0; ct--) {
1563 		if (net->ipv6.vif6_table[ct].dev == dev)
1564 			break;
1565 	}
1566 	return ct;
1567 }
1568 
1569 static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache)
1570 {
1571 	int psend = -1;
1572 	int vif, ct;
1573 	struct net *net = mfc6_net(cache);
1574 
1575 	vif = cache->mf6c_parent;
1576 	cache->mfc_un.res.pkt++;
1577 	cache->mfc_un.res.bytes += skb->len;
1578 
1579 	/*
1580 	 * Wrong interface: drop packet and (maybe) send PIM assert.
1581 	 */
1582 	if (net->ipv6.vif6_table[vif].dev != skb->dev) {
1583 		int true_vifi;
1584 
1585 		cache->mfc_un.res.wrong_if++;
1586 		true_vifi = ip6mr_find_vif(skb->dev);
1587 
1588 		if (true_vifi >= 0 && net->ipv6.mroute_do_assert &&
1589 		    /* pimsm uses asserts, when switching from RPT to SPT,
1590 		       so that we cannot check that packet arrived on an oif.
1591 		       It is bad, but otherwise we would need to move pretty
1592 		       large chunk of pimd to kernel. Ough... --ANK
1593 		     */
1594 		    (net->ipv6.mroute_do_pim ||
1595 		     cache->mfc_un.res.ttls[true_vifi] < 255) &&
1596 		    time_after(jiffies,
1597 			       cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1598 			cache->mfc_un.res.last_assert = jiffies;
1599 			ip6mr_cache_report(net, skb, true_vifi, MRT6MSG_WRONGMIF);
1600 		}
1601 		goto dont_forward;
1602 	}
1603 
1604 	net->ipv6.vif6_table[vif].pkt_in++;
1605 	net->ipv6.vif6_table[vif].bytes_in += skb->len;
1606 
1607 	/*
1608 	 *	Forward the frame
1609 	 */
1610 	for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
1611 		if (ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
1612 			if (psend != -1) {
1613 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1614 				if (skb2)
1615 					ip6mr_forward2(skb2, cache, psend);
1616 			}
1617 			psend = ct;
1618 		}
1619 	}
1620 	if (psend != -1) {
1621 		ip6mr_forward2(skb, cache, psend);
1622 		return 0;
1623 	}
1624 
1625 dont_forward:
1626 	kfree_skb(skb);
1627 	return 0;
1628 }
1629 
1630 
1631 /*
1632  *	Multicast packets for forwarding arrive here
1633  */
1634 
1635 int ip6_mr_input(struct sk_buff *skb)
1636 {
1637 	struct mfc6_cache *cache;
1638 	struct net *net = dev_net(skb->dev);
1639 
1640 	read_lock(&mrt_lock);
1641 	cache = ip6mr_cache_find(net,
1642 				 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
1643 
1644 	/*
1645 	 *	No usable cache entry
1646 	 */
1647 	if (cache == NULL) {
1648 		int vif;
1649 
1650 		vif = ip6mr_find_vif(skb->dev);
1651 		if (vif >= 0) {
1652 			int err = ip6mr_cache_unresolved(net, vif, skb);
1653 			read_unlock(&mrt_lock);
1654 
1655 			return err;
1656 		}
1657 		read_unlock(&mrt_lock);
1658 		kfree_skb(skb);
1659 		return -ENODEV;
1660 	}
1661 
1662 	ip6_mr_forward(skb, cache);
1663 
1664 	read_unlock(&mrt_lock);
1665 
1666 	return 0;
1667 }
1668 
1669 
1670 static int
1671 ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm)
1672 {
1673 	int ct;
1674 	struct rtnexthop *nhp;
1675 	struct net *net = mfc6_net(c);
1676 	struct net_device *dev = net->ipv6.vif6_table[c->mf6c_parent].dev;
1677 	u8 *b = skb_tail_pointer(skb);
1678 	struct rtattr *mp_head;
1679 
1680 	if (dev)
1681 		RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1682 
1683 	mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1684 
1685 	for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1686 		if (c->mfc_un.res.ttls[ct] < 255) {
1687 			if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1688 				goto rtattr_failure;
1689 			nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1690 			nhp->rtnh_flags = 0;
1691 			nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1692 			nhp->rtnh_ifindex = net->ipv6.vif6_table[ct].dev->ifindex;
1693 			nhp->rtnh_len = sizeof(*nhp);
1694 		}
1695 	}
1696 	mp_head->rta_type = RTA_MULTIPATH;
1697 	mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1698 	rtm->rtm_type = RTN_MULTICAST;
1699 	return 1;
1700 
1701 rtattr_failure:
1702 	nlmsg_trim(skb, b);
1703 	return -EMSGSIZE;
1704 }
1705 
1706 int ip6mr_get_route(struct net *net,
1707 		    struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1708 {
1709 	int err;
1710 	struct mfc6_cache *cache;
1711 	struct rt6_info *rt = (struct rt6_info *)skb->dst;
1712 
1713 	read_lock(&mrt_lock);
1714 	cache = ip6mr_cache_find(net, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
1715 
1716 	if (!cache) {
1717 		struct sk_buff *skb2;
1718 		struct ipv6hdr *iph;
1719 		struct net_device *dev;
1720 		int vif;
1721 
1722 		if (nowait) {
1723 			read_unlock(&mrt_lock);
1724 			return -EAGAIN;
1725 		}
1726 
1727 		dev = skb->dev;
1728 		if (dev == NULL || (vif = ip6mr_find_vif(dev)) < 0) {
1729 			read_unlock(&mrt_lock);
1730 			return -ENODEV;
1731 		}
1732 
1733 		/* really correct? */
1734 		skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
1735 		if (!skb2) {
1736 			read_unlock(&mrt_lock);
1737 			return -ENOMEM;
1738 		}
1739 
1740 		skb_reset_transport_header(skb2);
1741 
1742 		skb_put(skb2, sizeof(struct ipv6hdr));
1743 		skb_reset_network_header(skb2);
1744 
1745 		iph = ipv6_hdr(skb2);
1746 		iph->version = 0;
1747 		iph->priority = 0;
1748 		iph->flow_lbl[0] = 0;
1749 		iph->flow_lbl[1] = 0;
1750 		iph->flow_lbl[2] = 0;
1751 		iph->payload_len = 0;
1752 		iph->nexthdr = IPPROTO_NONE;
1753 		iph->hop_limit = 0;
1754 		ipv6_addr_copy(&iph->saddr, &rt->rt6i_src.addr);
1755 		ipv6_addr_copy(&iph->daddr, &rt->rt6i_dst.addr);
1756 
1757 		err = ip6mr_cache_unresolved(net, vif, skb2);
1758 		read_unlock(&mrt_lock);
1759 
1760 		return err;
1761 	}
1762 
1763 	if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1764 		cache->mfc_flags |= MFC_NOTIFY;
1765 
1766 	err = ip6mr_fill_mroute(skb, cache, rtm);
1767 	read_unlock(&mrt_lock);
1768 	return err;
1769 }
1770 
1771