xref: /openbmc/linux/net/ipv6/ip6mr.c (revision b8bb76713ec50df2f11efee386e16f93d51e1076)
1 /*
2  *	Linux IPv6 multicast routing support for BSD pim6sd
3  *	Based on net/ipv4/ipmr.c.
4  *
5  *	(c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
6  *		LSIIT Laboratory, Strasbourg, France
7  *	(c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
8  *		6WIND, Paris, France
9  *	Copyright (C)2007,2008 USAGI/WIDE Project
10  *		YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
11  *
12  *	This program is free software; you can redistribute it and/or
13  *	modify it under the terms of the GNU General Public License
14  *	as published by the Free Software Foundation; either version
15  *	2 of the License, or (at your option) any later version.
16  *
17  */
18 
19 #include <asm/system.h>
20 #include <asm/uaccess.h>
21 #include <linux/types.h>
22 #include <linux/sched.h>
23 #include <linux/errno.h>
24 #include <linux/timer.h>
25 #include <linux/mm.h>
26 #include <linux/kernel.h>
27 #include <linux/fcntl.h>
28 #include <linux/stat.h>
29 #include <linux/socket.h>
30 #include <linux/inet.h>
31 #include <linux/netdevice.h>
32 #include <linux/inetdevice.h>
33 #include <linux/proc_fs.h>
34 #include <linux/seq_file.h>
35 #include <linux/init.h>
36 #include <net/protocol.h>
37 #include <linux/skbuff.h>
38 #include <net/sock.h>
39 #include <net/raw.h>
40 #include <linux/notifier.h>
41 #include <linux/if_arp.h>
42 #include <net/checksum.h>
43 #include <net/netlink.h>
44 
45 #include <net/ipv6.h>
46 #include <net/ip6_route.h>
47 #include <linux/mroute6.h>
48 #include <linux/pim.h>
49 #include <net/addrconf.h>
50 #include <linux/netfilter_ipv6.h>
51 #include <net/ip6_checksum.h>
52 
53 /* Big lock, protecting vif table, mrt cache and mroute socket state.
54    Note that the changes are semaphored via rtnl_lock.
55  */
56 
57 static DEFINE_RWLOCK(mrt_lock);
58 
59 /*
60  *	Multicast router control variables
61  */
62 
63 #define MIF_EXISTS(_net, _idx) ((_net)->ipv6.vif6_table[_idx].dev != NULL)
64 
65 static struct mfc6_cache *mfc_unres_queue;		/* Queue of unresolved entries */
66 
67 /* Special spinlock for queue of unresolved entries */
68 static DEFINE_SPINLOCK(mfc_unres_lock);
69 
70 /* We return to original Alan's scheme. Hash table of resolved
71    entries is changed only in process context and protected
72    with weak lock mrt_lock. Queue of unresolved entries is protected
73    with strong spinlock mfc_unres_lock.
74 
75    In this case data path is free of exclusive locks at all.
76  */
77 
78 static struct kmem_cache *mrt_cachep __read_mostly;
79 
80 static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache);
81 static int ip6mr_cache_report(struct net *net, struct sk_buff *pkt,
82 			      mifi_t mifi, int assert);
83 static int ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm);
84 static void mroute_clean_tables(struct net *net);
85 
86 #ifdef CONFIG_IPV6_PIMSM_V2
87 static struct inet6_protocol pim6_protocol;
88 #endif
89 
90 static struct timer_list ipmr_expire_timer;
91 
92 
93 #ifdef CONFIG_PROC_FS
94 
95 struct ipmr_mfc_iter {
96 	struct seq_net_private p;
97 	struct mfc6_cache **cache;
98 	int ct;
99 };
100 
101 
102 static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
103 					   struct ipmr_mfc_iter *it, loff_t pos)
104 {
105 	struct mfc6_cache *mfc;
106 
107 	it->cache = net->ipv6.mfc6_cache_array;
108 	read_lock(&mrt_lock);
109 	for (it->ct = 0; it->ct < MFC6_LINES; it->ct++)
110 		for (mfc = net->ipv6.mfc6_cache_array[it->ct];
111 		     mfc; mfc = mfc->next)
112 			if (pos-- == 0)
113 				return mfc;
114 	read_unlock(&mrt_lock);
115 
116 	it->cache = &mfc_unres_queue;
117 	spin_lock_bh(&mfc_unres_lock);
118 	for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
119 		if (net_eq(mfc6_net(mfc), net) &&
120 		    pos-- == 0)
121 			return mfc;
122 	spin_unlock_bh(&mfc_unres_lock);
123 
124 	it->cache = NULL;
125 	return NULL;
126 }
127 
128 
129 
130 
131 /*
132  *	The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
133  */
134 
135 struct ipmr_vif_iter {
136 	struct seq_net_private p;
137 	int ct;
138 };
139 
140 static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
141 					    struct ipmr_vif_iter *iter,
142 					    loff_t pos)
143 {
144 	for (iter->ct = 0; iter->ct < net->ipv6.maxvif; ++iter->ct) {
145 		if (!MIF_EXISTS(net, iter->ct))
146 			continue;
147 		if (pos-- == 0)
148 			return &net->ipv6.vif6_table[iter->ct];
149 	}
150 	return NULL;
151 }
152 
153 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
154 	__acquires(mrt_lock)
155 {
156 	struct net *net = seq_file_net(seq);
157 
158 	read_lock(&mrt_lock);
159 	return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1)
160 		: SEQ_START_TOKEN;
161 }
162 
163 static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
164 {
165 	struct ipmr_vif_iter *iter = seq->private;
166 	struct net *net = seq_file_net(seq);
167 
168 	++*pos;
169 	if (v == SEQ_START_TOKEN)
170 		return ip6mr_vif_seq_idx(net, iter, 0);
171 
172 	while (++iter->ct < net->ipv6.maxvif) {
173 		if (!MIF_EXISTS(net, iter->ct))
174 			continue;
175 		return &net->ipv6.vif6_table[iter->ct];
176 	}
177 	return NULL;
178 }
179 
180 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
181 	__releases(mrt_lock)
182 {
183 	read_unlock(&mrt_lock);
184 }
185 
186 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
187 {
188 	struct net *net = seq_file_net(seq);
189 
190 	if (v == SEQ_START_TOKEN) {
191 		seq_puts(seq,
192 			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
193 	} else {
194 		const struct mif_device *vif = v;
195 		const char *name = vif->dev ? vif->dev->name : "none";
196 
197 		seq_printf(seq,
198 			   "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
199 			   vif - net->ipv6.vif6_table,
200 			   name, vif->bytes_in, vif->pkt_in,
201 			   vif->bytes_out, vif->pkt_out,
202 			   vif->flags);
203 	}
204 	return 0;
205 }
206 
207 static struct seq_operations ip6mr_vif_seq_ops = {
208 	.start = ip6mr_vif_seq_start,
209 	.next  = ip6mr_vif_seq_next,
210 	.stop  = ip6mr_vif_seq_stop,
211 	.show  = ip6mr_vif_seq_show,
212 };
213 
214 static int ip6mr_vif_open(struct inode *inode, struct file *file)
215 {
216 	return seq_open_net(inode, file, &ip6mr_vif_seq_ops,
217 			    sizeof(struct ipmr_vif_iter));
218 }
219 
220 static struct file_operations ip6mr_vif_fops = {
221 	.owner	 = THIS_MODULE,
222 	.open    = ip6mr_vif_open,
223 	.read    = seq_read,
224 	.llseek  = seq_lseek,
225 	.release = seq_release_net,
226 };
227 
228 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
229 {
230 	struct net *net = seq_file_net(seq);
231 
232 	return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
233 		: SEQ_START_TOKEN;
234 }
235 
236 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
237 {
238 	struct mfc6_cache *mfc = v;
239 	struct ipmr_mfc_iter *it = seq->private;
240 	struct net *net = seq_file_net(seq);
241 
242 	++*pos;
243 
244 	if (v == SEQ_START_TOKEN)
245 		return ipmr_mfc_seq_idx(net, seq->private, 0);
246 
247 	if (mfc->next)
248 		return mfc->next;
249 
250 	if (it->cache == &mfc_unres_queue)
251 		goto end_of_list;
252 
253 	BUG_ON(it->cache != net->ipv6.mfc6_cache_array);
254 
255 	while (++it->ct < MFC6_LINES) {
256 		mfc = net->ipv6.mfc6_cache_array[it->ct];
257 		if (mfc)
258 			return mfc;
259 	}
260 
261 	/* exhausted cache_array, show unresolved */
262 	read_unlock(&mrt_lock);
263 	it->cache = &mfc_unres_queue;
264 	it->ct = 0;
265 
266 	spin_lock_bh(&mfc_unres_lock);
267 	mfc = mfc_unres_queue;
268 	if (mfc)
269 		return mfc;
270 
271  end_of_list:
272 	spin_unlock_bh(&mfc_unres_lock);
273 	it->cache = NULL;
274 
275 	return NULL;
276 }
277 
278 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
279 {
280 	struct ipmr_mfc_iter *it = seq->private;
281 	struct net *net = seq_file_net(seq);
282 
283 	if (it->cache == &mfc_unres_queue)
284 		spin_unlock_bh(&mfc_unres_lock);
285 	else if (it->cache == net->ipv6.mfc6_cache_array)
286 		read_unlock(&mrt_lock);
287 }
288 
289 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
290 {
291 	int n;
292 	struct net *net = seq_file_net(seq);
293 
294 	if (v == SEQ_START_TOKEN) {
295 		seq_puts(seq,
296 			 "Group                            "
297 			 "Origin                           "
298 			 "Iif      Pkts  Bytes     Wrong  Oifs\n");
299 	} else {
300 		const struct mfc6_cache *mfc = v;
301 		const struct ipmr_mfc_iter *it = seq->private;
302 
303 		seq_printf(seq, "%pI6 %pI6 %-3hd",
304 			   &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
305 			   mfc->mf6c_parent);
306 
307 		if (it->cache != &mfc_unres_queue) {
308 			seq_printf(seq, " %8lu %8lu %8lu",
309 				   mfc->mfc_un.res.pkt,
310 				   mfc->mfc_un.res.bytes,
311 				   mfc->mfc_un.res.wrong_if);
312 			for (n = mfc->mfc_un.res.minvif;
313 			     n < mfc->mfc_un.res.maxvif; n++) {
314 				if (MIF_EXISTS(net, n) &&
315 				    mfc->mfc_un.res.ttls[n] < 255)
316 					seq_printf(seq,
317 						   " %2d:%-3d",
318 						   n, mfc->mfc_un.res.ttls[n]);
319 			}
320 		} else {
321 			/* unresolved mfc_caches don't contain
322 			 * pkt, bytes and wrong_if values
323 			 */
324 			seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
325 		}
326 		seq_putc(seq, '\n');
327 	}
328 	return 0;
329 }
330 
331 static struct seq_operations ipmr_mfc_seq_ops = {
332 	.start = ipmr_mfc_seq_start,
333 	.next  = ipmr_mfc_seq_next,
334 	.stop  = ipmr_mfc_seq_stop,
335 	.show  = ipmr_mfc_seq_show,
336 };
337 
338 static int ipmr_mfc_open(struct inode *inode, struct file *file)
339 {
340 	return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
341 			    sizeof(struct ipmr_mfc_iter));
342 }
343 
344 static struct file_operations ip6mr_mfc_fops = {
345 	.owner	 = THIS_MODULE,
346 	.open    = ipmr_mfc_open,
347 	.read    = seq_read,
348 	.llseek  = seq_lseek,
349 	.release = seq_release_net,
350 };
351 #endif
352 
353 #ifdef CONFIG_IPV6_PIMSM_V2
354 
355 static int pim6_rcv(struct sk_buff *skb)
356 {
357 	struct pimreghdr *pim;
358 	struct ipv6hdr   *encap;
359 	struct net_device  *reg_dev = NULL;
360 	struct net *net = dev_net(skb->dev);
361 	int reg_vif_num = net->ipv6.mroute_reg_vif_num;
362 
363 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
364 		goto drop;
365 
366 	pim = (struct pimreghdr *)skb_transport_header(skb);
367 	if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
368 	    (pim->flags & PIM_NULL_REGISTER) ||
369 	    (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
370 			     sizeof(*pim), IPPROTO_PIM,
371 			     csum_partial((void *)pim, sizeof(*pim), 0)) &&
372 	     csum_fold(skb_checksum(skb, 0, skb->len, 0))))
373 		goto drop;
374 
375 	/* check if the inner packet is destined to mcast group */
376 	encap = (struct ipv6hdr *)(skb_transport_header(skb) +
377 				   sizeof(*pim));
378 
379 	if (!ipv6_addr_is_multicast(&encap->daddr) ||
380 	    encap->payload_len == 0 ||
381 	    ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
382 		goto drop;
383 
384 	read_lock(&mrt_lock);
385 	if (reg_vif_num >= 0)
386 		reg_dev = net->ipv6.vif6_table[reg_vif_num].dev;
387 	if (reg_dev)
388 		dev_hold(reg_dev);
389 	read_unlock(&mrt_lock);
390 
391 	if (reg_dev == NULL)
392 		goto drop;
393 
394 	skb->mac_header = skb->network_header;
395 	skb_pull(skb, (u8 *)encap - skb->data);
396 	skb_reset_network_header(skb);
397 	skb->dev = reg_dev;
398 	skb->protocol = htons(ETH_P_IPV6);
399 	skb->ip_summed = 0;
400 	skb->pkt_type = PACKET_HOST;
401 	dst_release(skb->dst);
402 	reg_dev->stats.rx_bytes += skb->len;
403 	reg_dev->stats.rx_packets++;
404 	skb->dst = NULL;
405 	nf_reset(skb);
406 	netif_rx(skb);
407 	dev_put(reg_dev);
408 	return 0;
409  drop:
410 	kfree_skb(skb);
411 	return 0;
412 }
413 
414 static struct inet6_protocol pim6_protocol = {
415 	.handler	=	pim6_rcv,
416 };
417 
418 /* Service routines creating virtual interfaces: PIMREG */
419 
420 static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
421 {
422 	struct net *net = dev_net(dev);
423 
424 	read_lock(&mrt_lock);
425 	dev->stats.tx_bytes += skb->len;
426 	dev->stats.tx_packets++;
427 	ip6mr_cache_report(net, skb, net->ipv6.mroute_reg_vif_num,
428 			   MRT6MSG_WHOLEPKT);
429 	read_unlock(&mrt_lock);
430 	kfree_skb(skb);
431 	return 0;
432 }
433 
434 static const struct net_device_ops reg_vif_netdev_ops = {
435 	.ndo_start_xmit	= reg_vif_xmit,
436 };
437 
438 static void reg_vif_setup(struct net_device *dev)
439 {
440 	dev->type		= ARPHRD_PIMREG;
441 	dev->mtu		= 1500 - sizeof(struct ipv6hdr) - 8;
442 	dev->flags		= IFF_NOARP;
443 	dev->netdev_ops		= &reg_vif_netdev_ops;
444 	dev->destructor		= free_netdev;
445 }
446 
447 static struct net_device *ip6mr_reg_vif(struct net *net)
448 {
449 	struct net_device *dev;
450 
451 	dev = alloc_netdev(0, "pim6reg", reg_vif_setup);
452 	if (dev == NULL)
453 		return NULL;
454 
455 	dev_net_set(dev, net);
456 
457 	if (register_netdevice(dev)) {
458 		free_netdev(dev);
459 		return NULL;
460 	}
461 	dev->iflink = 0;
462 
463 	if (dev_open(dev))
464 		goto failure;
465 
466 	dev_hold(dev);
467 	return dev;
468 
469 failure:
470 	/* allow the register to be completed before unregistering. */
471 	rtnl_unlock();
472 	rtnl_lock();
473 
474 	unregister_netdevice(dev);
475 	return NULL;
476 }
477 #endif
478 
479 /*
480  *	Delete a VIF entry
481  */
482 
483 static int mif6_delete(struct net *net, int vifi)
484 {
485 	struct mif_device *v;
486 	struct net_device *dev;
487 	struct inet6_dev *in6_dev;
488 	if (vifi < 0 || vifi >= net->ipv6.maxvif)
489 		return -EADDRNOTAVAIL;
490 
491 	v = &net->ipv6.vif6_table[vifi];
492 
493 	write_lock_bh(&mrt_lock);
494 	dev = v->dev;
495 	v->dev = NULL;
496 
497 	if (!dev) {
498 		write_unlock_bh(&mrt_lock);
499 		return -EADDRNOTAVAIL;
500 	}
501 
502 #ifdef CONFIG_IPV6_PIMSM_V2
503 	if (vifi == net->ipv6.mroute_reg_vif_num)
504 		net->ipv6.mroute_reg_vif_num = -1;
505 #endif
506 
507 	if (vifi + 1 == net->ipv6.maxvif) {
508 		int tmp;
509 		for (tmp = vifi - 1; tmp >= 0; tmp--) {
510 			if (MIF_EXISTS(net, tmp))
511 				break;
512 		}
513 		net->ipv6.maxvif = tmp + 1;
514 	}
515 
516 	write_unlock_bh(&mrt_lock);
517 
518 	dev_set_allmulti(dev, -1);
519 
520 	in6_dev = __in6_dev_get(dev);
521 	if (in6_dev)
522 		in6_dev->cnf.mc_forwarding--;
523 
524 	if (v->flags & MIFF_REGISTER)
525 		unregister_netdevice(dev);
526 
527 	dev_put(dev);
528 	return 0;
529 }
530 
531 static inline void ip6mr_cache_free(struct mfc6_cache *c)
532 {
533 	release_net(mfc6_net(c));
534 	kmem_cache_free(mrt_cachep, c);
535 }
536 
537 /* Destroy an unresolved cache entry, killing queued skbs
538    and reporting error to netlink readers.
539  */
540 
541 static void ip6mr_destroy_unres(struct mfc6_cache *c)
542 {
543 	struct sk_buff *skb;
544 	struct net *net = mfc6_net(c);
545 
546 	atomic_dec(&net->ipv6.cache_resolve_queue_len);
547 
548 	while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
549 		if (ipv6_hdr(skb)->version == 0) {
550 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
551 			nlh->nlmsg_type = NLMSG_ERROR;
552 			nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
553 			skb_trim(skb, nlh->nlmsg_len);
554 			((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
555 			rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
556 		} else
557 			kfree_skb(skb);
558 	}
559 
560 	ip6mr_cache_free(c);
561 }
562 
563 
564 /* Single timer process for all the unresolved queue. */
565 
566 static void ipmr_do_expire_process(unsigned long dummy)
567 {
568 	unsigned long now = jiffies;
569 	unsigned long expires = 10 * HZ;
570 	struct mfc6_cache *c, **cp;
571 
572 	cp = &mfc_unres_queue;
573 
574 	while ((c = *cp) != NULL) {
575 		if (time_after(c->mfc_un.unres.expires, now)) {
576 			/* not yet... */
577 			unsigned long interval = c->mfc_un.unres.expires - now;
578 			if (interval < expires)
579 				expires = interval;
580 			cp = &c->next;
581 			continue;
582 		}
583 
584 		*cp = c->next;
585 		ip6mr_destroy_unres(c);
586 	}
587 
588 	if (mfc_unres_queue != NULL)
589 		mod_timer(&ipmr_expire_timer, jiffies + expires);
590 }
591 
592 static void ipmr_expire_process(unsigned long dummy)
593 {
594 	if (!spin_trylock(&mfc_unres_lock)) {
595 		mod_timer(&ipmr_expire_timer, jiffies + 1);
596 		return;
597 	}
598 
599 	if (mfc_unres_queue != NULL)
600 		ipmr_do_expire_process(dummy);
601 
602 	spin_unlock(&mfc_unres_lock);
603 }
604 
605 /* Fill oifs list. It is called under write locked mrt_lock. */
606 
607 static void ip6mr_update_thresholds(struct mfc6_cache *cache, unsigned char *ttls)
608 {
609 	int vifi;
610 	struct net *net = mfc6_net(cache);
611 
612 	cache->mfc_un.res.minvif = MAXMIFS;
613 	cache->mfc_un.res.maxvif = 0;
614 	memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
615 
616 	for (vifi = 0; vifi < net->ipv6.maxvif; vifi++) {
617 		if (MIF_EXISTS(net, vifi) &&
618 		    ttls[vifi] && ttls[vifi] < 255) {
619 			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
620 			if (cache->mfc_un.res.minvif > vifi)
621 				cache->mfc_un.res.minvif = vifi;
622 			if (cache->mfc_un.res.maxvif <= vifi)
623 				cache->mfc_un.res.maxvif = vifi + 1;
624 		}
625 	}
626 }
627 
628 static int mif6_add(struct net *net, struct mif6ctl *vifc, int mrtsock)
629 {
630 	int vifi = vifc->mif6c_mifi;
631 	struct mif_device *v = &net->ipv6.vif6_table[vifi];
632 	struct net_device *dev;
633 	struct inet6_dev *in6_dev;
634 	int err;
635 
636 	/* Is vif busy ? */
637 	if (MIF_EXISTS(net, vifi))
638 		return -EADDRINUSE;
639 
640 	switch (vifc->mif6c_flags) {
641 #ifdef CONFIG_IPV6_PIMSM_V2
642 	case MIFF_REGISTER:
643 		/*
644 		 * Special Purpose VIF in PIM
645 		 * All the packets will be sent to the daemon
646 		 */
647 		if (net->ipv6.mroute_reg_vif_num >= 0)
648 			return -EADDRINUSE;
649 		dev = ip6mr_reg_vif(net);
650 		if (!dev)
651 			return -ENOBUFS;
652 		err = dev_set_allmulti(dev, 1);
653 		if (err) {
654 			unregister_netdevice(dev);
655 			dev_put(dev);
656 			return err;
657 		}
658 		break;
659 #endif
660 	case 0:
661 		dev = dev_get_by_index(net, vifc->mif6c_pifi);
662 		if (!dev)
663 			return -EADDRNOTAVAIL;
664 		err = dev_set_allmulti(dev, 1);
665 		if (err) {
666 			dev_put(dev);
667 			return err;
668 		}
669 		break;
670 	default:
671 		return -EINVAL;
672 	}
673 
674 	in6_dev = __in6_dev_get(dev);
675 	if (in6_dev)
676 		in6_dev->cnf.mc_forwarding++;
677 
678 	/*
679 	 *	Fill in the VIF structures
680 	 */
681 	v->rate_limit = vifc->vifc_rate_limit;
682 	v->flags = vifc->mif6c_flags;
683 	if (!mrtsock)
684 		v->flags |= VIFF_STATIC;
685 	v->threshold = vifc->vifc_threshold;
686 	v->bytes_in = 0;
687 	v->bytes_out = 0;
688 	v->pkt_in = 0;
689 	v->pkt_out = 0;
690 	v->link = dev->ifindex;
691 	if (v->flags & MIFF_REGISTER)
692 		v->link = dev->iflink;
693 
694 	/* And finish update writing critical data */
695 	write_lock_bh(&mrt_lock);
696 	v->dev = dev;
697 #ifdef CONFIG_IPV6_PIMSM_V2
698 	if (v->flags & MIFF_REGISTER)
699 		net->ipv6.mroute_reg_vif_num = vifi;
700 #endif
701 	if (vifi + 1 > net->ipv6.maxvif)
702 		net->ipv6.maxvif = vifi + 1;
703 	write_unlock_bh(&mrt_lock);
704 	return 0;
705 }
706 
707 static struct mfc6_cache *ip6mr_cache_find(struct net *net,
708 					   struct in6_addr *origin,
709 					   struct in6_addr *mcastgrp)
710 {
711 	int line = MFC6_HASH(mcastgrp, origin);
712 	struct mfc6_cache *c;
713 
714 	for (c = net->ipv6.mfc6_cache_array[line]; c; c = c->next) {
715 		if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
716 		    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
717 			break;
718 	}
719 	return c;
720 }
721 
722 /*
723  *	Allocate a multicast cache entry
724  */
725 static struct mfc6_cache *ip6mr_cache_alloc(struct net *net)
726 {
727 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
728 	if (c == NULL)
729 		return NULL;
730 	c->mfc_un.res.minvif = MAXMIFS;
731 	mfc6_net_set(c, net);
732 	return c;
733 }
734 
735 static struct mfc6_cache *ip6mr_cache_alloc_unres(struct net *net)
736 {
737 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
738 	if (c == NULL)
739 		return NULL;
740 	skb_queue_head_init(&c->mfc_un.unres.unresolved);
741 	c->mfc_un.unres.expires = jiffies + 10 * HZ;
742 	mfc6_net_set(c, net);
743 	return c;
744 }
745 
746 /*
747  *	A cache entry has gone into a resolved state from queued
748  */
749 
750 static void ip6mr_cache_resolve(struct mfc6_cache *uc, struct mfc6_cache *c)
751 {
752 	struct sk_buff *skb;
753 
754 	/*
755 	 *	Play the pending entries through our router
756 	 */
757 
758 	while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
759 		if (ipv6_hdr(skb)->version == 0) {
760 			int err;
761 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
762 
763 			if (ip6mr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
764 				nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
765 			} else {
766 				nlh->nlmsg_type = NLMSG_ERROR;
767 				nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
768 				skb_trim(skb, nlh->nlmsg_len);
769 				((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE;
770 			}
771 			err = rtnl_unicast(skb, mfc6_net(uc), NETLINK_CB(skb).pid);
772 		} else
773 			ip6_mr_forward(skb, c);
774 	}
775 }
776 
777 /*
778  *	Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
779  *	expects the following bizarre scheme.
780  *
781  *	Called under mrt_lock.
782  */
783 
784 static int ip6mr_cache_report(struct net *net, struct sk_buff *pkt, mifi_t mifi,
785 			      int assert)
786 {
787 	struct sk_buff *skb;
788 	struct mrt6msg *msg;
789 	int ret;
790 
791 #ifdef CONFIG_IPV6_PIMSM_V2
792 	if (assert == MRT6MSG_WHOLEPKT)
793 		skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
794 						+sizeof(*msg));
795 	else
796 #endif
797 		skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
798 
799 	if (!skb)
800 		return -ENOBUFS;
801 
802 	/* I suppose that internal messages
803 	 * do not require checksums */
804 
805 	skb->ip_summed = CHECKSUM_UNNECESSARY;
806 
807 #ifdef CONFIG_IPV6_PIMSM_V2
808 	if (assert == MRT6MSG_WHOLEPKT) {
809 		/* Ugly, but we have no choice with this interface.
810 		   Duplicate old header, fix length etc.
811 		   And all this only to mangle msg->im6_msgtype and
812 		   to set msg->im6_mbz to "mbz" :-)
813 		 */
814 		skb_push(skb, -skb_network_offset(pkt));
815 
816 		skb_push(skb, sizeof(*msg));
817 		skb_reset_transport_header(skb);
818 		msg = (struct mrt6msg *)skb_transport_header(skb);
819 		msg->im6_mbz = 0;
820 		msg->im6_msgtype = MRT6MSG_WHOLEPKT;
821 		msg->im6_mif = net->ipv6.mroute_reg_vif_num;
822 		msg->im6_pad = 0;
823 		ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
824 		ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
825 
826 		skb->ip_summed = CHECKSUM_UNNECESSARY;
827 	} else
828 #endif
829 	{
830 	/*
831 	 *	Copy the IP header
832 	 */
833 
834 	skb_put(skb, sizeof(struct ipv6hdr));
835 	skb_reset_network_header(skb);
836 	skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
837 
838 	/*
839 	 *	Add our header
840 	 */
841 	skb_put(skb, sizeof(*msg));
842 	skb_reset_transport_header(skb);
843 	msg = (struct mrt6msg *)skb_transport_header(skb);
844 
845 	msg->im6_mbz = 0;
846 	msg->im6_msgtype = assert;
847 	msg->im6_mif = mifi;
848 	msg->im6_pad = 0;
849 	ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
850 	ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
851 
852 	skb->dst = dst_clone(pkt->dst);
853 	skb->ip_summed = CHECKSUM_UNNECESSARY;
854 	}
855 
856 	if (net->ipv6.mroute6_sk == NULL) {
857 		kfree_skb(skb);
858 		return -EINVAL;
859 	}
860 
861 	/*
862 	 *	Deliver to user space multicast routing algorithms
863 	 */
864 	ret = sock_queue_rcv_skb(net->ipv6.mroute6_sk, skb);
865 	if (ret < 0) {
866 		if (net_ratelimit())
867 			printk(KERN_WARNING "mroute6: pending queue full, dropping entries.\n");
868 		kfree_skb(skb);
869 	}
870 
871 	return ret;
872 }
873 
874 /*
875  *	Queue a packet for resolution. It gets locked cache entry!
876  */
877 
878 static int
879 ip6mr_cache_unresolved(struct net *net, mifi_t mifi, struct sk_buff *skb)
880 {
881 	int err;
882 	struct mfc6_cache *c;
883 
884 	spin_lock_bh(&mfc_unres_lock);
885 	for (c = mfc_unres_queue; c; c = c->next) {
886 		if (net_eq(mfc6_net(c), net) &&
887 		    ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
888 		    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr))
889 			break;
890 	}
891 
892 	if (c == NULL) {
893 		/*
894 		 *	Create a new entry if allowable
895 		 */
896 
897 		if (atomic_read(&net->ipv6.cache_resolve_queue_len) >= 10 ||
898 		    (c = ip6mr_cache_alloc_unres(net)) == NULL) {
899 			spin_unlock_bh(&mfc_unres_lock);
900 
901 			kfree_skb(skb);
902 			return -ENOBUFS;
903 		}
904 
905 		/*
906 		 *	Fill in the new cache entry
907 		 */
908 		c->mf6c_parent = -1;
909 		c->mf6c_origin = ipv6_hdr(skb)->saddr;
910 		c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
911 
912 		/*
913 		 *	Reflect first query at pim6sd
914 		 */
915 		err = ip6mr_cache_report(net, skb, mifi, MRT6MSG_NOCACHE);
916 		if (err < 0) {
917 			/* If the report failed throw the cache entry
918 			   out - Brad Parker
919 			 */
920 			spin_unlock_bh(&mfc_unres_lock);
921 
922 			ip6mr_cache_free(c);
923 			kfree_skb(skb);
924 			return err;
925 		}
926 
927 		atomic_inc(&net->ipv6.cache_resolve_queue_len);
928 		c->next = mfc_unres_queue;
929 		mfc_unres_queue = c;
930 
931 		ipmr_do_expire_process(1);
932 	}
933 
934 	/*
935 	 *	See if we can append the packet
936 	 */
937 	if (c->mfc_un.unres.unresolved.qlen > 3) {
938 		kfree_skb(skb);
939 		err = -ENOBUFS;
940 	} else {
941 		skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
942 		err = 0;
943 	}
944 
945 	spin_unlock_bh(&mfc_unres_lock);
946 	return err;
947 }
948 
949 /*
950  *	MFC6 cache manipulation by user space
951  */
952 
953 static int ip6mr_mfc_delete(struct net *net, struct mf6cctl *mfc)
954 {
955 	int line;
956 	struct mfc6_cache *c, **cp;
957 
958 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
959 
960 	for (cp = &net->ipv6.mfc6_cache_array[line];
961 	     (c = *cp) != NULL; cp = &c->next) {
962 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
963 		    ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
964 			write_lock_bh(&mrt_lock);
965 			*cp = c->next;
966 			write_unlock_bh(&mrt_lock);
967 
968 			ip6mr_cache_free(c);
969 			return 0;
970 		}
971 	}
972 	return -ENOENT;
973 }
974 
975 static int ip6mr_device_event(struct notifier_block *this,
976 			      unsigned long event, void *ptr)
977 {
978 	struct net_device *dev = ptr;
979 	struct net *net = dev_net(dev);
980 	struct mif_device *v;
981 	int ct;
982 
983 	if (event != NETDEV_UNREGISTER)
984 		return NOTIFY_DONE;
985 
986 	v = &net->ipv6.vif6_table[0];
987 	for (ct = 0; ct < net->ipv6.maxvif; ct++, v++) {
988 		if (v->dev == dev)
989 			mif6_delete(net, ct);
990 	}
991 	return NOTIFY_DONE;
992 }
993 
994 static struct notifier_block ip6_mr_notifier = {
995 	.notifier_call = ip6mr_device_event
996 };
997 
998 /*
999  *	Setup for IP multicast routing
1000  */
1001 
1002 static int __net_init ip6mr_net_init(struct net *net)
1003 {
1004 	int err = 0;
1005 	net->ipv6.vif6_table = kcalloc(MAXMIFS, sizeof(struct mif_device),
1006 				       GFP_KERNEL);
1007 	if (!net->ipv6.vif6_table) {
1008 		err = -ENOMEM;
1009 		goto fail;
1010 	}
1011 
1012 	/* Forwarding cache */
1013 	net->ipv6.mfc6_cache_array = kcalloc(MFC6_LINES,
1014 					     sizeof(struct mfc6_cache *),
1015 					     GFP_KERNEL);
1016 	if (!net->ipv6.mfc6_cache_array) {
1017 		err = -ENOMEM;
1018 		goto fail_mfc6_cache;
1019 	}
1020 
1021 #ifdef CONFIG_IPV6_PIMSM_V2
1022 	net->ipv6.mroute_reg_vif_num = -1;
1023 #endif
1024 
1025 #ifdef CONFIG_PROC_FS
1026 	err = -ENOMEM;
1027 	if (!proc_net_fops_create(net, "ip6_mr_vif", 0, &ip6mr_vif_fops))
1028 		goto proc_vif_fail;
1029 	if (!proc_net_fops_create(net, "ip6_mr_cache", 0, &ip6mr_mfc_fops))
1030 		goto proc_cache_fail;
1031 #endif
1032 	return 0;
1033 
1034 #ifdef CONFIG_PROC_FS
1035 proc_cache_fail:
1036 	proc_net_remove(net, "ip6_mr_vif");
1037 proc_vif_fail:
1038 	kfree(net->ipv6.mfc6_cache_array);
1039 #endif
1040 fail_mfc6_cache:
1041 	kfree(net->ipv6.vif6_table);
1042 fail:
1043 	return err;
1044 }
1045 
1046 static void __net_exit ip6mr_net_exit(struct net *net)
1047 {
1048 #ifdef CONFIG_PROC_FS
1049 	proc_net_remove(net, "ip6_mr_cache");
1050 	proc_net_remove(net, "ip6_mr_vif");
1051 #endif
1052 	mroute_clean_tables(net);
1053 	kfree(net->ipv6.mfc6_cache_array);
1054 	kfree(net->ipv6.vif6_table);
1055 }
1056 
1057 static struct pernet_operations ip6mr_net_ops = {
1058 	.init = ip6mr_net_init,
1059 	.exit = ip6mr_net_exit,
1060 };
1061 
1062 int __init ip6_mr_init(void)
1063 {
1064 	int err;
1065 
1066 	mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1067 				       sizeof(struct mfc6_cache),
1068 				       0, SLAB_HWCACHE_ALIGN,
1069 				       NULL);
1070 	if (!mrt_cachep)
1071 		return -ENOMEM;
1072 
1073 	err = register_pernet_subsys(&ip6mr_net_ops);
1074 	if (err)
1075 		goto reg_pernet_fail;
1076 
1077 	setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
1078 	err = register_netdevice_notifier(&ip6_mr_notifier);
1079 	if (err)
1080 		goto reg_notif_fail;
1081 	return 0;
1082 reg_notif_fail:
1083 	del_timer(&ipmr_expire_timer);
1084 	unregister_pernet_subsys(&ip6mr_net_ops);
1085 reg_pernet_fail:
1086 	kmem_cache_destroy(mrt_cachep);
1087 	return err;
1088 }
1089 
1090 void ip6_mr_cleanup(void)
1091 {
1092 	unregister_netdevice_notifier(&ip6_mr_notifier);
1093 	del_timer(&ipmr_expire_timer);
1094 	unregister_pernet_subsys(&ip6mr_net_ops);
1095 	kmem_cache_destroy(mrt_cachep);
1096 }
1097 
1098 static int ip6mr_mfc_add(struct net *net, struct mf6cctl *mfc, int mrtsock)
1099 {
1100 	int line;
1101 	struct mfc6_cache *uc, *c, **cp;
1102 	unsigned char ttls[MAXMIFS];
1103 	int i;
1104 
1105 	memset(ttls, 255, MAXMIFS);
1106 	for (i = 0; i < MAXMIFS; i++) {
1107 		if (IF_ISSET(i, &mfc->mf6cc_ifset))
1108 			ttls[i] = 1;
1109 
1110 	}
1111 
1112 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1113 
1114 	for (cp = &net->ipv6.mfc6_cache_array[line];
1115 	     (c = *cp) != NULL; cp = &c->next) {
1116 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1117 		    ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr))
1118 			break;
1119 	}
1120 
1121 	if (c != NULL) {
1122 		write_lock_bh(&mrt_lock);
1123 		c->mf6c_parent = mfc->mf6cc_parent;
1124 		ip6mr_update_thresholds(c, ttls);
1125 		if (!mrtsock)
1126 			c->mfc_flags |= MFC_STATIC;
1127 		write_unlock_bh(&mrt_lock);
1128 		return 0;
1129 	}
1130 
1131 	if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1132 		return -EINVAL;
1133 
1134 	c = ip6mr_cache_alloc(net);
1135 	if (c == NULL)
1136 		return -ENOMEM;
1137 
1138 	c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1139 	c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1140 	c->mf6c_parent = mfc->mf6cc_parent;
1141 	ip6mr_update_thresholds(c, ttls);
1142 	if (!mrtsock)
1143 		c->mfc_flags |= MFC_STATIC;
1144 
1145 	write_lock_bh(&mrt_lock);
1146 	c->next = net->ipv6.mfc6_cache_array[line];
1147 	net->ipv6.mfc6_cache_array[line] = c;
1148 	write_unlock_bh(&mrt_lock);
1149 
1150 	/*
1151 	 *	Check to see if we resolved a queued list. If so we
1152 	 *	need to send on the frames and tidy up.
1153 	 */
1154 	spin_lock_bh(&mfc_unres_lock);
1155 	for (cp = &mfc_unres_queue; (uc = *cp) != NULL;
1156 	     cp = &uc->next) {
1157 		if (net_eq(mfc6_net(uc), net) &&
1158 		    ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1159 		    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1160 			*cp = uc->next;
1161 			atomic_dec(&net->ipv6.cache_resolve_queue_len);
1162 			break;
1163 		}
1164 	}
1165 	if (mfc_unres_queue == NULL)
1166 		del_timer(&ipmr_expire_timer);
1167 	spin_unlock_bh(&mfc_unres_lock);
1168 
1169 	if (uc) {
1170 		ip6mr_cache_resolve(uc, c);
1171 		ip6mr_cache_free(uc);
1172 	}
1173 	return 0;
1174 }
1175 
1176 /*
1177  *	Close the multicast socket, and clear the vif tables etc
1178  */
1179 
1180 static void mroute_clean_tables(struct net *net)
1181 {
1182 	int i;
1183 
1184 	/*
1185 	 *	Shut down all active vif entries
1186 	 */
1187 	for (i = 0; i < net->ipv6.maxvif; i++) {
1188 		if (!(net->ipv6.vif6_table[i].flags & VIFF_STATIC))
1189 			mif6_delete(net, i);
1190 	}
1191 
1192 	/*
1193 	 *	Wipe the cache
1194 	 */
1195 	for (i = 0; i < MFC6_LINES; i++) {
1196 		struct mfc6_cache *c, **cp;
1197 
1198 		cp = &net->ipv6.mfc6_cache_array[i];
1199 		while ((c = *cp) != NULL) {
1200 			if (c->mfc_flags & MFC_STATIC) {
1201 				cp = &c->next;
1202 				continue;
1203 			}
1204 			write_lock_bh(&mrt_lock);
1205 			*cp = c->next;
1206 			write_unlock_bh(&mrt_lock);
1207 
1208 			ip6mr_cache_free(c);
1209 		}
1210 	}
1211 
1212 	if (atomic_read(&net->ipv6.cache_resolve_queue_len) != 0) {
1213 		struct mfc6_cache *c, **cp;
1214 
1215 		spin_lock_bh(&mfc_unres_lock);
1216 		cp = &mfc_unres_queue;
1217 		while ((c = *cp) != NULL) {
1218 			if (!net_eq(mfc6_net(c), net)) {
1219 				cp = &c->next;
1220 				continue;
1221 			}
1222 			*cp = c->next;
1223 			ip6mr_destroy_unres(c);
1224 		}
1225 		spin_unlock_bh(&mfc_unres_lock);
1226 	}
1227 }
1228 
1229 static int ip6mr_sk_init(struct sock *sk)
1230 {
1231 	int err = 0;
1232 	struct net *net = sock_net(sk);
1233 
1234 	rtnl_lock();
1235 	write_lock_bh(&mrt_lock);
1236 	if (likely(net->ipv6.mroute6_sk == NULL)) {
1237 		net->ipv6.mroute6_sk = sk;
1238 		net->ipv6.devconf_all->mc_forwarding++;
1239 	}
1240 	else
1241 		err = -EADDRINUSE;
1242 	write_unlock_bh(&mrt_lock);
1243 
1244 	rtnl_unlock();
1245 
1246 	return err;
1247 }
1248 
1249 int ip6mr_sk_done(struct sock *sk)
1250 {
1251 	int err = 0;
1252 	struct net *net = sock_net(sk);
1253 
1254 	rtnl_lock();
1255 	if (sk == net->ipv6.mroute6_sk) {
1256 		write_lock_bh(&mrt_lock);
1257 		net->ipv6.mroute6_sk = NULL;
1258 		net->ipv6.devconf_all->mc_forwarding--;
1259 		write_unlock_bh(&mrt_lock);
1260 
1261 		mroute_clean_tables(net);
1262 	} else
1263 		err = -EACCES;
1264 	rtnl_unlock();
1265 
1266 	return err;
1267 }
1268 
1269 /*
1270  *	Socket options and virtual interface manipulation. The whole
1271  *	virtual interface system is a complete heap, but unfortunately
1272  *	that's how BSD mrouted happens to think. Maybe one day with a proper
1273  *	MOSPF/PIM router set up we can clean this up.
1274  */
1275 
1276 int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int optlen)
1277 {
1278 	int ret;
1279 	struct mif6ctl vif;
1280 	struct mf6cctl mfc;
1281 	mifi_t mifi;
1282 	struct net *net = sock_net(sk);
1283 
1284 	if (optname != MRT6_INIT) {
1285 		if (sk != net->ipv6.mroute6_sk && !capable(CAP_NET_ADMIN))
1286 			return -EACCES;
1287 	}
1288 
1289 	switch (optname) {
1290 	case MRT6_INIT:
1291 		if (sk->sk_type != SOCK_RAW ||
1292 		    inet_sk(sk)->num != IPPROTO_ICMPV6)
1293 			return -EOPNOTSUPP;
1294 		if (optlen < sizeof(int))
1295 			return -EINVAL;
1296 
1297 		return ip6mr_sk_init(sk);
1298 
1299 	case MRT6_DONE:
1300 		return ip6mr_sk_done(sk);
1301 
1302 	case MRT6_ADD_MIF:
1303 		if (optlen < sizeof(vif))
1304 			return -EINVAL;
1305 		if (copy_from_user(&vif, optval, sizeof(vif)))
1306 			return -EFAULT;
1307 		if (vif.mif6c_mifi >= MAXMIFS)
1308 			return -ENFILE;
1309 		rtnl_lock();
1310 		ret = mif6_add(net, &vif, sk == net->ipv6.mroute6_sk);
1311 		rtnl_unlock();
1312 		return ret;
1313 
1314 	case MRT6_DEL_MIF:
1315 		if (optlen < sizeof(mifi_t))
1316 			return -EINVAL;
1317 		if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1318 			return -EFAULT;
1319 		rtnl_lock();
1320 		ret = mif6_delete(net, mifi);
1321 		rtnl_unlock();
1322 		return ret;
1323 
1324 	/*
1325 	 *	Manipulate the forwarding caches. These live
1326 	 *	in a sort of kernel/user symbiosis.
1327 	 */
1328 	case MRT6_ADD_MFC:
1329 	case MRT6_DEL_MFC:
1330 		if (optlen < sizeof(mfc))
1331 			return -EINVAL;
1332 		if (copy_from_user(&mfc, optval, sizeof(mfc)))
1333 			return -EFAULT;
1334 		rtnl_lock();
1335 		if (optname == MRT6_DEL_MFC)
1336 			ret = ip6mr_mfc_delete(net, &mfc);
1337 		else
1338 			ret = ip6mr_mfc_add(net, &mfc,
1339 					    sk == net->ipv6.mroute6_sk);
1340 		rtnl_unlock();
1341 		return ret;
1342 
1343 	/*
1344 	 *	Control PIM assert (to activate pim will activate assert)
1345 	 */
1346 	case MRT6_ASSERT:
1347 	{
1348 		int v;
1349 		if (get_user(v, (int __user *)optval))
1350 			return -EFAULT;
1351 		net->ipv6.mroute_do_assert = !!v;
1352 		return 0;
1353 	}
1354 
1355 #ifdef CONFIG_IPV6_PIMSM_V2
1356 	case MRT6_PIM:
1357 	{
1358 		int v;
1359 		if (get_user(v, (int __user *)optval))
1360 			return -EFAULT;
1361 		v = !!v;
1362 		rtnl_lock();
1363 		ret = 0;
1364 		if (v != net->ipv6.mroute_do_pim) {
1365 			net->ipv6.mroute_do_pim = v;
1366 			net->ipv6.mroute_do_assert = v;
1367 			if (net->ipv6.mroute_do_pim)
1368 				ret = inet6_add_protocol(&pim6_protocol,
1369 							 IPPROTO_PIM);
1370 			else
1371 				ret = inet6_del_protocol(&pim6_protocol,
1372 							 IPPROTO_PIM);
1373 			if (ret < 0)
1374 				ret = -EAGAIN;
1375 		}
1376 		rtnl_unlock();
1377 		return ret;
1378 	}
1379 
1380 #endif
1381 	/*
1382 	 *	Spurious command, or MRT6_VERSION which you cannot
1383 	 *	set.
1384 	 */
1385 	default:
1386 		return -ENOPROTOOPT;
1387 	}
1388 }
1389 
1390 /*
1391  *	Getsock opt support for the multicast routing system.
1392  */
1393 
1394 int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1395 			  int __user *optlen)
1396 {
1397 	int olr;
1398 	int val;
1399 	struct net *net = sock_net(sk);
1400 
1401 	switch (optname) {
1402 	case MRT6_VERSION:
1403 		val = 0x0305;
1404 		break;
1405 #ifdef CONFIG_IPV6_PIMSM_V2
1406 	case MRT6_PIM:
1407 		val = net->ipv6.mroute_do_pim;
1408 		break;
1409 #endif
1410 	case MRT6_ASSERT:
1411 		val = net->ipv6.mroute_do_assert;
1412 		break;
1413 	default:
1414 		return -ENOPROTOOPT;
1415 	}
1416 
1417 	if (get_user(olr, optlen))
1418 		return -EFAULT;
1419 
1420 	olr = min_t(int, olr, sizeof(int));
1421 	if (olr < 0)
1422 		return -EINVAL;
1423 
1424 	if (put_user(olr, optlen))
1425 		return -EFAULT;
1426 	if (copy_to_user(optval, &val, olr))
1427 		return -EFAULT;
1428 	return 0;
1429 }
1430 
1431 /*
1432  *	The IP multicast ioctl support routines.
1433  */
1434 
1435 int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1436 {
1437 	struct sioc_sg_req6 sr;
1438 	struct sioc_mif_req6 vr;
1439 	struct mif_device *vif;
1440 	struct mfc6_cache *c;
1441 	struct net *net = sock_net(sk);
1442 
1443 	switch (cmd) {
1444 	case SIOCGETMIFCNT_IN6:
1445 		if (copy_from_user(&vr, arg, sizeof(vr)))
1446 			return -EFAULT;
1447 		if (vr.mifi >= net->ipv6.maxvif)
1448 			return -EINVAL;
1449 		read_lock(&mrt_lock);
1450 		vif = &net->ipv6.vif6_table[vr.mifi];
1451 		if (MIF_EXISTS(net, vr.mifi)) {
1452 			vr.icount = vif->pkt_in;
1453 			vr.ocount = vif->pkt_out;
1454 			vr.ibytes = vif->bytes_in;
1455 			vr.obytes = vif->bytes_out;
1456 			read_unlock(&mrt_lock);
1457 
1458 			if (copy_to_user(arg, &vr, sizeof(vr)))
1459 				return -EFAULT;
1460 			return 0;
1461 		}
1462 		read_unlock(&mrt_lock);
1463 		return -EADDRNOTAVAIL;
1464 	case SIOCGETSGCNT_IN6:
1465 		if (copy_from_user(&sr, arg, sizeof(sr)))
1466 			return -EFAULT;
1467 
1468 		read_lock(&mrt_lock);
1469 		c = ip6mr_cache_find(net, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1470 		if (c) {
1471 			sr.pktcnt = c->mfc_un.res.pkt;
1472 			sr.bytecnt = c->mfc_un.res.bytes;
1473 			sr.wrong_if = c->mfc_un.res.wrong_if;
1474 			read_unlock(&mrt_lock);
1475 
1476 			if (copy_to_user(arg, &sr, sizeof(sr)))
1477 				return -EFAULT;
1478 			return 0;
1479 		}
1480 		read_unlock(&mrt_lock);
1481 		return -EADDRNOTAVAIL;
1482 	default:
1483 		return -ENOIOCTLCMD;
1484 	}
1485 }
1486 
1487 
1488 static inline int ip6mr_forward2_finish(struct sk_buff *skb)
1489 {
1490 	IP6_INC_STATS_BH(dev_net(skb->dst->dev), ip6_dst_idev(skb->dst),
1491 			 IPSTATS_MIB_OUTFORWDATAGRAMS);
1492 	return dst_output(skb);
1493 }
1494 
1495 /*
1496  *	Processing handlers for ip6mr_forward
1497  */
1498 
1499 static int ip6mr_forward2(struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1500 {
1501 	struct ipv6hdr *ipv6h;
1502 	struct net *net = mfc6_net(c);
1503 	struct mif_device *vif = &net->ipv6.vif6_table[vifi];
1504 	struct net_device *dev;
1505 	struct dst_entry *dst;
1506 	struct flowi fl;
1507 
1508 	if (vif->dev == NULL)
1509 		goto out_free;
1510 
1511 #ifdef CONFIG_IPV6_PIMSM_V2
1512 	if (vif->flags & MIFF_REGISTER) {
1513 		vif->pkt_out++;
1514 		vif->bytes_out += skb->len;
1515 		vif->dev->stats.tx_bytes += skb->len;
1516 		vif->dev->stats.tx_packets++;
1517 		ip6mr_cache_report(net, skb, vifi, MRT6MSG_WHOLEPKT);
1518 		goto out_free;
1519 	}
1520 #endif
1521 
1522 	ipv6h = ipv6_hdr(skb);
1523 
1524 	fl = (struct flowi) {
1525 		.oif = vif->link,
1526 		.nl_u = { .ip6_u =
1527 				{ .daddr = ipv6h->daddr, }
1528 		}
1529 	};
1530 
1531 	dst = ip6_route_output(net, NULL, &fl);
1532 	if (!dst)
1533 		goto out_free;
1534 
1535 	dst_release(skb->dst);
1536 	skb->dst = dst;
1537 
1538 	/*
1539 	 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1540 	 * not only before forwarding, but after forwarding on all output
1541 	 * interfaces. It is clear, if mrouter runs a multicasting
1542 	 * program, it should receive packets not depending to what interface
1543 	 * program is joined.
1544 	 * If we will not make it, the program will have to join on all
1545 	 * interfaces. On the other hand, multihoming host (or router, but
1546 	 * not mrouter) cannot join to more than one interface - it will
1547 	 * result in receiving multiple packets.
1548 	 */
1549 	dev = vif->dev;
1550 	skb->dev = dev;
1551 	vif->pkt_out++;
1552 	vif->bytes_out += skb->len;
1553 
1554 	/* We are about to write */
1555 	/* XXX: extension headers? */
1556 	if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
1557 		goto out_free;
1558 
1559 	ipv6h = ipv6_hdr(skb);
1560 	ipv6h->hop_limit--;
1561 
1562 	IP6CB(skb)->flags |= IP6SKB_FORWARDED;
1563 
1564 	return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dev,
1565 		       ip6mr_forward2_finish);
1566 
1567 out_free:
1568 	kfree_skb(skb);
1569 	return 0;
1570 }
1571 
1572 static int ip6mr_find_vif(struct net_device *dev)
1573 {
1574 	struct net *net = dev_net(dev);
1575 	int ct;
1576 	for (ct = net->ipv6.maxvif - 1; ct >= 0; ct--) {
1577 		if (net->ipv6.vif6_table[ct].dev == dev)
1578 			break;
1579 	}
1580 	return ct;
1581 }
1582 
1583 static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache)
1584 {
1585 	int psend = -1;
1586 	int vif, ct;
1587 	struct net *net = mfc6_net(cache);
1588 
1589 	vif = cache->mf6c_parent;
1590 	cache->mfc_un.res.pkt++;
1591 	cache->mfc_un.res.bytes += skb->len;
1592 
1593 	/*
1594 	 * Wrong interface: drop packet and (maybe) send PIM assert.
1595 	 */
1596 	if (net->ipv6.vif6_table[vif].dev != skb->dev) {
1597 		int true_vifi;
1598 
1599 		cache->mfc_un.res.wrong_if++;
1600 		true_vifi = ip6mr_find_vif(skb->dev);
1601 
1602 		if (true_vifi >= 0 && net->ipv6.mroute_do_assert &&
1603 		    /* pimsm uses asserts, when switching from RPT to SPT,
1604 		       so that we cannot check that packet arrived on an oif.
1605 		       It is bad, but otherwise we would need to move pretty
1606 		       large chunk of pimd to kernel. Ough... --ANK
1607 		     */
1608 		    (net->ipv6.mroute_do_pim ||
1609 		     cache->mfc_un.res.ttls[true_vifi] < 255) &&
1610 		    time_after(jiffies,
1611 			       cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1612 			cache->mfc_un.res.last_assert = jiffies;
1613 			ip6mr_cache_report(net, skb, true_vifi, MRT6MSG_WRONGMIF);
1614 		}
1615 		goto dont_forward;
1616 	}
1617 
1618 	net->ipv6.vif6_table[vif].pkt_in++;
1619 	net->ipv6.vif6_table[vif].bytes_in += skb->len;
1620 
1621 	/*
1622 	 *	Forward the frame
1623 	 */
1624 	for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
1625 		if (ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
1626 			if (psend != -1) {
1627 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1628 				if (skb2)
1629 					ip6mr_forward2(skb2, cache, psend);
1630 			}
1631 			psend = ct;
1632 		}
1633 	}
1634 	if (psend != -1) {
1635 		ip6mr_forward2(skb, cache, psend);
1636 		return 0;
1637 	}
1638 
1639 dont_forward:
1640 	kfree_skb(skb);
1641 	return 0;
1642 }
1643 
1644 
1645 /*
1646  *	Multicast packets for forwarding arrive here
1647  */
1648 
1649 int ip6_mr_input(struct sk_buff *skb)
1650 {
1651 	struct mfc6_cache *cache;
1652 	struct net *net = dev_net(skb->dev);
1653 
1654 	read_lock(&mrt_lock);
1655 	cache = ip6mr_cache_find(net,
1656 				 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
1657 
1658 	/*
1659 	 *	No usable cache entry
1660 	 */
1661 	if (cache == NULL) {
1662 		int vif;
1663 
1664 		vif = ip6mr_find_vif(skb->dev);
1665 		if (vif >= 0) {
1666 			int err = ip6mr_cache_unresolved(net, vif, skb);
1667 			read_unlock(&mrt_lock);
1668 
1669 			return err;
1670 		}
1671 		read_unlock(&mrt_lock);
1672 		kfree_skb(skb);
1673 		return -ENODEV;
1674 	}
1675 
1676 	ip6_mr_forward(skb, cache);
1677 
1678 	read_unlock(&mrt_lock);
1679 
1680 	return 0;
1681 }
1682 
1683 
1684 static int
1685 ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm)
1686 {
1687 	int ct;
1688 	struct rtnexthop *nhp;
1689 	struct net *net = mfc6_net(c);
1690 	struct net_device *dev = net->ipv6.vif6_table[c->mf6c_parent].dev;
1691 	u8 *b = skb_tail_pointer(skb);
1692 	struct rtattr *mp_head;
1693 
1694 	if (dev)
1695 		RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1696 
1697 	mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1698 
1699 	for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1700 		if (c->mfc_un.res.ttls[ct] < 255) {
1701 			if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1702 				goto rtattr_failure;
1703 			nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1704 			nhp->rtnh_flags = 0;
1705 			nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1706 			nhp->rtnh_ifindex = net->ipv6.vif6_table[ct].dev->ifindex;
1707 			nhp->rtnh_len = sizeof(*nhp);
1708 		}
1709 	}
1710 	mp_head->rta_type = RTA_MULTIPATH;
1711 	mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1712 	rtm->rtm_type = RTN_MULTICAST;
1713 	return 1;
1714 
1715 rtattr_failure:
1716 	nlmsg_trim(skb, b);
1717 	return -EMSGSIZE;
1718 }
1719 
1720 int ip6mr_get_route(struct net *net,
1721 		    struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1722 {
1723 	int err;
1724 	struct mfc6_cache *cache;
1725 	struct rt6_info *rt = (struct rt6_info *)skb->dst;
1726 
1727 	read_lock(&mrt_lock);
1728 	cache = ip6mr_cache_find(net, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
1729 
1730 	if (!cache) {
1731 		struct sk_buff *skb2;
1732 		struct ipv6hdr *iph;
1733 		struct net_device *dev;
1734 		int vif;
1735 
1736 		if (nowait) {
1737 			read_unlock(&mrt_lock);
1738 			return -EAGAIN;
1739 		}
1740 
1741 		dev = skb->dev;
1742 		if (dev == NULL || (vif = ip6mr_find_vif(dev)) < 0) {
1743 			read_unlock(&mrt_lock);
1744 			return -ENODEV;
1745 		}
1746 
1747 		/* really correct? */
1748 		skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
1749 		if (!skb2) {
1750 			read_unlock(&mrt_lock);
1751 			return -ENOMEM;
1752 		}
1753 
1754 		skb_reset_transport_header(skb2);
1755 
1756 		skb_put(skb2, sizeof(struct ipv6hdr));
1757 		skb_reset_network_header(skb2);
1758 
1759 		iph = ipv6_hdr(skb2);
1760 		iph->version = 0;
1761 		iph->priority = 0;
1762 		iph->flow_lbl[0] = 0;
1763 		iph->flow_lbl[1] = 0;
1764 		iph->flow_lbl[2] = 0;
1765 		iph->payload_len = 0;
1766 		iph->nexthdr = IPPROTO_NONE;
1767 		iph->hop_limit = 0;
1768 		ipv6_addr_copy(&iph->saddr, &rt->rt6i_src.addr);
1769 		ipv6_addr_copy(&iph->daddr, &rt->rt6i_dst.addr);
1770 
1771 		err = ip6mr_cache_unresolved(net, vif, skb2);
1772 		read_unlock(&mrt_lock);
1773 
1774 		return err;
1775 	}
1776 
1777 	if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1778 		cache->mfc_flags |= MFC_NOTIFY;
1779 
1780 	err = ip6mr_fill_mroute(skb, cache, rtm);
1781 	read_unlock(&mrt_lock);
1782 	return err;
1783 }
1784 
1785