xref: /openbmc/linux/net/ipv6/ip6mr.c (revision 9d56dd3b083a3bec56e9da35ce07baca81030b03)
1 /*
2  *	Linux IPv6 multicast routing support for BSD pim6sd
3  *	Based on net/ipv4/ipmr.c.
4  *
5  *	(c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
6  *		LSIIT Laboratory, Strasbourg, France
7  *	(c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
8  *		6WIND, Paris, France
9  *	Copyright (C)2007,2008 USAGI/WIDE Project
10  *		YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
11  *
12  *	This program is free software; you can redistribute it and/or
13  *	modify it under the terms of the GNU General Public License
14  *	as published by the Free Software Foundation; either version
15  *	2 of the License, or (at your option) any later version.
16  *
17  */
18 
19 #include <asm/system.h>
20 #include <asm/uaccess.h>
21 #include <linux/types.h>
22 #include <linux/sched.h>
23 #include <linux/errno.h>
24 #include <linux/timer.h>
25 #include <linux/mm.h>
26 #include <linux/kernel.h>
27 #include <linux/fcntl.h>
28 #include <linux/stat.h>
29 #include <linux/socket.h>
30 #include <linux/inet.h>
31 #include <linux/netdevice.h>
32 #include <linux/inetdevice.h>
33 #include <linux/proc_fs.h>
34 #include <linux/seq_file.h>
35 #include <linux/init.h>
36 #include <net/protocol.h>
37 #include <linux/skbuff.h>
38 #include <net/sock.h>
39 #include <net/raw.h>
40 #include <linux/notifier.h>
41 #include <linux/if_arp.h>
42 #include <net/checksum.h>
43 #include <net/netlink.h>
44 
45 #include <net/ipv6.h>
46 #include <net/ip6_route.h>
47 #include <linux/mroute6.h>
48 #include <linux/pim.h>
49 #include <net/addrconf.h>
50 #include <linux/netfilter_ipv6.h>
51 #include <net/ip6_checksum.h>
52 
53 /* Big lock, protecting vif table, mrt cache and mroute socket state.
54    Note that the changes are semaphored via rtnl_lock.
55  */
56 
57 static DEFINE_RWLOCK(mrt_lock);
58 
59 /*
60  *	Multicast router control variables
61  */
62 
63 #define MIF_EXISTS(_net, _idx) ((_net)->ipv6.vif6_table[_idx].dev != NULL)
64 
65 static struct mfc6_cache *mfc_unres_queue;		/* Queue of unresolved entries */
66 
67 /* Special spinlock for queue of unresolved entries */
68 static DEFINE_SPINLOCK(mfc_unres_lock);
69 
70 /* We return to original Alan's scheme. Hash table of resolved
71    entries is changed only in process context and protected
72    with weak lock mrt_lock. Queue of unresolved entries is protected
73    with strong spinlock mfc_unres_lock.
74 
75    In this case data path is free of exclusive locks at all.
76  */
77 
78 static struct kmem_cache *mrt_cachep __read_mostly;
79 
80 static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache);
81 static int ip6mr_cache_report(struct net *net, struct sk_buff *pkt,
82 			      mifi_t mifi, int assert);
83 static int ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm);
84 static void mroute_clean_tables(struct net *net);
85 
86 static struct timer_list ipmr_expire_timer;
87 
88 
89 #ifdef CONFIG_PROC_FS
90 
91 struct ipmr_mfc_iter {
92 	struct seq_net_private p;
93 	struct mfc6_cache **cache;
94 	int ct;
95 };
96 
97 
98 static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
99 					   struct ipmr_mfc_iter *it, loff_t pos)
100 {
101 	struct mfc6_cache *mfc;
102 
103 	it->cache = net->ipv6.mfc6_cache_array;
104 	read_lock(&mrt_lock);
105 	for (it->ct = 0; it->ct < MFC6_LINES; it->ct++)
106 		for (mfc = net->ipv6.mfc6_cache_array[it->ct];
107 		     mfc; mfc = mfc->next)
108 			if (pos-- == 0)
109 				return mfc;
110 	read_unlock(&mrt_lock);
111 
112 	it->cache = &mfc_unres_queue;
113 	spin_lock_bh(&mfc_unres_lock);
114 	for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
115 		if (net_eq(mfc6_net(mfc), net) &&
116 		    pos-- == 0)
117 			return mfc;
118 	spin_unlock_bh(&mfc_unres_lock);
119 
120 	it->cache = NULL;
121 	return NULL;
122 }
123 
124 
125 
126 
127 /*
128  *	The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
129  */
130 
131 struct ipmr_vif_iter {
132 	struct seq_net_private p;
133 	int ct;
134 };
135 
136 static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
137 					    struct ipmr_vif_iter *iter,
138 					    loff_t pos)
139 {
140 	for (iter->ct = 0; iter->ct < net->ipv6.maxvif; ++iter->ct) {
141 		if (!MIF_EXISTS(net, iter->ct))
142 			continue;
143 		if (pos-- == 0)
144 			return &net->ipv6.vif6_table[iter->ct];
145 	}
146 	return NULL;
147 }
148 
149 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
150 	__acquires(mrt_lock)
151 {
152 	struct net *net = seq_file_net(seq);
153 
154 	read_lock(&mrt_lock);
155 	return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1)
156 		: SEQ_START_TOKEN;
157 }
158 
159 static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
160 {
161 	struct ipmr_vif_iter *iter = seq->private;
162 	struct net *net = seq_file_net(seq);
163 
164 	++*pos;
165 	if (v == SEQ_START_TOKEN)
166 		return ip6mr_vif_seq_idx(net, iter, 0);
167 
168 	while (++iter->ct < net->ipv6.maxvif) {
169 		if (!MIF_EXISTS(net, iter->ct))
170 			continue;
171 		return &net->ipv6.vif6_table[iter->ct];
172 	}
173 	return NULL;
174 }
175 
176 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
177 	__releases(mrt_lock)
178 {
179 	read_unlock(&mrt_lock);
180 }
181 
182 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
183 {
184 	struct net *net = seq_file_net(seq);
185 
186 	if (v == SEQ_START_TOKEN) {
187 		seq_puts(seq,
188 			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
189 	} else {
190 		const struct mif_device *vif = v;
191 		const char *name = vif->dev ? vif->dev->name : "none";
192 
193 		seq_printf(seq,
194 			   "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
195 			   vif - net->ipv6.vif6_table,
196 			   name, vif->bytes_in, vif->pkt_in,
197 			   vif->bytes_out, vif->pkt_out,
198 			   vif->flags);
199 	}
200 	return 0;
201 }
202 
203 static const struct seq_operations ip6mr_vif_seq_ops = {
204 	.start = ip6mr_vif_seq_start,
205 	.next  = ip6mr_vif_seq_next,
206 	.stop  = ip6mr_vif_seq_stop,
207 	.show  = ip6mr_vif_seq_show,
208 };
209 
210 static int ip6mr_vif_open(struct inode *inode, struct file *file)
211 {
212 	return seq_open_net(inode, file, &ip6mr_vif_seq_ops,
213 			    sizeof(struct ipmr_vif_iter));
214 }
215 
216 static const struct file_operations ip6mr_vif_fops = {
217 	.owner	 = THIS_MODULE,
218 	.open    = ip6mr_vif_open,
219 	.read    = seq_read,
220 	.llseek  = seq_lseek,
221 	.release = seq_release_net,
222 };
223 
224 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
225 {
226 	struct net *net = seq_file_net(seq);
227 
228 	return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
229 		: SEQ_START_TOKEN;
230 }
231 
232 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
233 {
234 	struct mfc6_cache *mfc = v;
235 	struct ipmr_mfc_iter *it = seq->private;
236 	struct net *net = seq_file_net(seq);
237 
238 	++*pos;
239 
240 	if (v == SEQ_START_TOKEN)
241 		return ipmr_mfc_seq_idx(net, seq->private, 0);
242 
243 	if (mfc->next)
244 		return mfc->next;
245 
246 	if (it->cache == &mfc_unres_queue)
247 		goto end_of_list;
248 
249 	BUG_ON(it->cache != net->ipv6.mfc6_cache_array);
250 
251 	while (++it->ct < MFC6_LINES) {
252 		mfc = net->ipv6.mfc6_cache_array[it->ct];
253 		if (mfc)
254 			return mfc;
255 	}
256 
257 	/* exhausted cache_array, show unresolved */
258 	read_unlock(&mrt_lock);
259 	it->cache = &mfc_unres_queue;
260 	it->ct = 0;
261 
262 	spin_lock_bh(&mfc_unres_lock);
263 	mfc = mfc_unres_queue;
264 	if (mfc)
265 		return mfc;
266 
267  end_of_list:
268 	spin_unlock_bh(&mfc_unres_lock);
269 	it->cache = NULL;
270 
271 	return NULL;
272 }
273 
274 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
275 {
276 	struct ipmr_mfc_iter *it = seq->private;
277 	struct net *net = seq_file_net(seq);
278 
279 	if (it->cache == &mfc_unres_queue)
280 		spin_unlock_bh(&mfc_unres_lock);
281 	else if (it->cache == net->ipv6.mfc6_cache_array)
282 		read_unlock(&mrt_lock);
283 }
284 
285 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
286 {
287 	int n;
288 	struct net *net = seq_file_net(seq);
289 
290 	if (v == SEQ_START_TOKEN) {
291 		seq_puts(seq,
292 			 "Group                            "
293 			 "Origin                           "
294 			 "Iif      Pkts  Bytes     Wrong  Oifs\n");
295 	} else {
296 		const struct mfc6_cache *mfc = v;
297 		const struct ipmr_mfc_iter *it = seq->private;
298 
299 		seq_printf(seq, "%pI6 %pI6 %-3hd",
300 			   &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
301 			   mfc->mf6c_parent);
302 
303 		if (it->cache != &mfc_unres_queue) {
304 			seq_printf(seq, " %8lu %8lu %8lu",
305 				   mfc->mfc_un.res.pkt,
306 				   mfc->mfc_un.res.bytes,
307 				   mfc->mfc_un.res.wrong_if);
308 			for (n = mfc->mfc_un.res.minvif;
309 			     n < mfc->mfc_un.res.maxvif; n++) {
310 				if (MIF_EXISTS(net, n) &&
311 				    mfc->mfc_un.res.ttls[n] < 255)
312 					seq_printf(seq,
313 						   " %2d:%-3d",
314 						   n, mfc->mfc_un.res.ttls[n]);
315 			}
316 		} else {
317 			/* unresolved mfc_caches don't contain
318 			 * pkt, bytes and wrong_if values
319 			 */
320 			seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
321 		}
322 		seq_putc(seq, '\n');
323 	}
324 	return 0;
325 }
326 
327 static const struct seq_operations ipmr_mfc_seq_ops = {
328 	.start = ipmr_mfc_seq_start,
329 	.next  = ipmr_mfc_seq_next,
330 	.stop  = ipmr_mfc_seq_stop,
331 	.show  = ipmr_mfc_seq_show,
332 };
333 
334 static int ipmr_mfc_open(struct inode *inode, struct file *file)
335 {
336 	return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
337 			    sizeof(struct ipmr_mfc_iter));
338 }
339 
340 static const struct file_operations ip6mr_mfc_fops = {
341 	.owner	 = THIS_MODULE,
342 	.open    = ipmr_mfc_open,
343 	.read    = seq_read,
344 	.llseek  = seq_lseek,
345 	.release = seq_release_net,
346 };
347 #endif
348 
349 #ifdef CONFIG_IPV6_PIMSM_V2
350 
351 static int pim6_rcv(struct sk_buff *skb)
352 {
353 	struct pimreghdr *pim;
354 	struct ipv6hdr   *encap;
355 	struct net_device  *reg_dev = NULL;
356 	struct net *net = dev_net(skb->dev);
357 	int reg_vif_num = net->ipv6.mroute_reg_vif_num;
358 
359 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
360 		goto drop;
361 
362 	pim = (struct pimreghdr *)skb_transport_header(skb);
363 	if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
364 	    (pim->flags & PIM_NULL_REGISTER) ||
365 	    (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
366 			     sizeof(*pim), IPPROTO_PIM,
367 			     csum_partial((void *)pim, sizeof(*pim), 0)) &&
368 	     csum_fold(skb_checksum(skb, 0, skb->len, 0))))
369 		goto drop;
370 
371 	/* check if the inner packet is destined to mcast group */
372 	encap = (struct ipv6hdr *)(skb_transport_header(skb) +
373 				   sizeof(*pim));
374 
375 	if (!ipv6_addr_is_multicast(&encap->daddr) ||
376 	    encap->payload_len == 0 ||
377 	    ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
378 		goto drop;
379 
380 	read_lock(&mrt_lock);
381 	if (reg_vif_num >= 0)
382 		reg_dev = net->ipv6.vif6_table[reg_vif_num].dev;
383 	if (reg_dev)
384 		dev_hold(reg_dev);
385 	read_unlock(&mrt_lock);
386 
387 	if (reg_dev == NULL)
388 		goto drop;
389 
390 	skb->mac_header = skb->network_header;
391 	skb_pull(skb, (u8 *)encap - skb->data);
392 	skb_reset_network_header(skb);
393 	skb->dev = reg_dev;
394 	skb->protocol = htons(ETH_P_IPV6);
395 	skb->ip_summed = 0;
396 	skb->pkt_type = PACKET_HOST;
397 	skb_dst_drop(skb);
398 	reg_dev->stats.rx_bytes += skb->len;
399 	reg_dev->stats.rx_packets++;
400 	nf_reset(skb);
401 	netif_rx(skb);
402 	dev_put(reg_dev);
403 	return 0;
404  drop:
405 	kfree_skb(skb);
406 	return 0;
407 }
408 
409 static const struct inet6_protocol pim6_protocol = {
410 	.handler	=	pim6_rcv,
411 };
412 
413 /* Service routines creating virtual interfaces: PIMREG */
414 
415 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
416 				      struct net_device *dev)
417 {
418 	struct net *net = dev_net(dev);
419 
420 	read_lock(&mrt_lock);
421 	dev->stats.tx_bytes += skb->len;
422 	dev->stats.tx_packets++;
423 	ip6mr_cache_report(net, skb, net->ipv6.mroute_reg_vif_num,
424 			   MRT6MSG_WHOLEPKT);
425 	read_unlock(&mrt_lock);
426 	kfree_skb(skb);
427 	return NETDEV_TX_OK;
428 }
429 
430 static const struct net_device_ops reg_vif_netdev_ops = {
431 	.ndo_start_xmit	= reg_vif_xmit,
432 };
433 
434 static void reg_vif_setup(struct net_device *dev)
435 {
436 	dev->type		= ARPHRD_PIMREG;
437 	dev->mtu		= 1500 - sizeof(struct ipv6hdr) - 8;
438 	dev->flags		= IFF_NOARP;
439 	dev->netdev_ops		= &reg_vif_netdev_ops;
440 	dev->destructor		= free_netdev;
441 	dev->features		|= NETIF_F_NETNS_LOCAL;
442 }
443 
444 static struct net_device *ip6mr_reg_vif(struct net *net)
445 {
446 	struct net_device *dev;
447 
448 	dev = alloc_netdev(0, "pim6reg", reg_vif_setup);
449 	if (dev == NULL)
450 		return NULL;
451 
452 	dev_net_set(dev, net);
453 
454 	if (register_netdevice(dev)) {
455 		free_netdev(dev);
456 		return NULL;
457 	}
458 	dev->iflink = 0;
459 
460 	if (dev_open(dev))
461 		goto failure;
462 
463 	dev_hold(dev);
464 	return dev;
465 
466 failure:
467 	/* allow the register to be completed before unregistering. */
468 	rtnl_unlock();
469 	rtnl_lock();
470 
471 	unregister_netdevice(dev);
472 	return NULL;
473 }
474 #endif
475 
476 /*
477  *	Delete a VIF entry
478  */
479 
480 static int mif6_delete(struct net *net, int vifi, struct list_head *head)
481 {
482 	struct mif_device *v;
483 	struct net_device *dev;
484 	struct inet6_dev *in6_dev;
485 	if (vifi < 0 || vifi >= net->ipv6.maxvif)
486 		return -EADDRNOTAVAIL;
487 
488 	v = &net->ipv6.vif6_table[vifi];
489 
490 	write_lock_bh(&mrt_lock);
491 	dev = v->dev;
492 	v->dev = NULL;
493 
494 	if (!dev) {
495 		write_unlock_bh(&mrt_lock);
496 		return -EADDRNOTAVAIL;
497 	}
498 
499 #ifdef CONFIG_IPV6_PIMSM_V2
500 	if (vifi == net->ipv6.mroute_reg_vif_num)
501 		net->ipv6.mroute_reg_vif_num = -1;
502 #endif
503 
504 	if (vifi + 1 == net->ipv6.maxvif) {
505 		int tmp;
506 		for (tmp = vifi - 1; tmp >= 0; tmp--) {
507 			if (MIF_EXISTS(net, tmp))
508 				break;
509 		}
510 		net->ipv6.maxvif = tmp + 1;
511 	}
512 
513 	write_unlock_bh(&mrt_lock);
514 
515 	dev_set_allmulti(dev, -1);
516 
517 	in6_dev = __in6_dev_get(dev);
518 	if (in6_dev)
519 		in6_dev->cnf.mc_forwarding--;
520 
521 	if (v->flags & MIFF_REGISTER)
522 		unregister_netdevice_queue(dev, head);
523 
524 	dev_put(dev);
525 	return 0;
526 }
527 
528 static inline void ip6mr_cache_free(struct mfc6_cache *c)
529 {
530 	release_net(mfc6_net(c));
531 	kmem_cache_free(mrt_cachep, c);
532 }
533 
534 /* Destroy an unresolved cache entry, killing queued skbs
535    and reporting error to netlink readers.
536  */
537 
538 static void ip6mr_destroy_unres(struct mfc6_cache *c)
539 {
540 	struct sk_buff *skb;
541 	struct net *net = mfc6_net(c);
542 
543 	atomic_dec(&net->ipv6.cache_resolve_queue_len);
544 
545 	while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
546 		if (ipv6_hdr(skb)->version == 0) {
547 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
548 			nlh->nlmsg_type = NLMSG_ERROR;
549 			nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
550 			skb_trim(skb, nlh->nlmsg_len);
551 			((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
552 			rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
553 		} else
554 			kfree_skb(skb);
555 	}
556 
557 	ip6mr_cache_free(c);
558 }
559 
560 
561 /* Single timer process for all the unresolved queue. */
562 
563 static void ipmr_do_expire_process(unsigned long dummy)
564 {
565 	unsigned long now = jiffies;
566 	unsigned long expires = 10 * HZ;
567 	struct mfc6_cache *c, **cp;
568 
569 	cp = &mfc_unres_queue;
570 
571 	while ((c = *cp) != NULL) {
572 		if (time_after(c->mfc_un.unres.expires, now)) {
573 			/* not yet... */
574 			unsigned long interval = c->mfc_un.unres.expires - now;
575 			if (interval < expires)
576 				expires = interval;
577 			cp = &c->next;
578 			continue;
579 		}
580 
581 		*cp = c->next;
582 		ip6mr_destroy_unres(c);
583 	}
584 
585 	if (mfc_unres_queue != NULL)
586 		mod_timer(&ipmr_expire_timer, jiffies + expires);
587 }
588 
589 static void ipmr_expire_process(unsigned long dummy)
590 {
591 	if (!spin_trylock(&mfc_unres_lock)) {
592 		mod_timer(&ipmr_expire_timer, jiffies + 1);
593 		return;
594 	}
595 
596 	if (mfc_unres_queue != NULL)
597 		ipmr_do_expire_process(dummy);
598 
599 	spin_unlock(&mfc_unres_lock);
600 }
601 
602 /* Fill oifs list. It is called under write locked mrt_lock. */
603 
604 static void ip6mr_update_thresholds(struct mfc6_cache *cache, unsigned char *ttls)
605 {
606 	int vifi;
607 	struct net *net = mfc6_net(cache);
608 
609 	cache->mfc_un.res.minvif = MAXMIFS;
610 	cache->mfc_un.res.maxvif = 0;
611 	memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
612 
613 	for (vifi = 0; vifi < net->ipv6.maxvif; vifi++) {
614 		if (MIF_EXISTS(net, vifi) &&
615 		    ttls[vifi] && ttls[vifi] < 255) {
616 			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
617 			if (cache->mfc_un.res.minvif > vifi)
618 				cache->mfc_un.res.minvif = vifi;
619 			if (cache->mfc_un.res.maxvif <= vifi)
620 				cache->mfc_un.res.maxvif = vifi + 1;
621 		}
622 	}
623 }
624 
625 static int mif6_add(struct net *net, struct mif6ctl *vifc, int mrtsock)
626 {
627 	int vifi = vifc->mif6c_mifi;
628 	struct mif_device *v = &net->ipv6.vif6_table[vifi];
629 	struct net_device *dev;
630 	struct inet6_dev *in6_dev;
631 	int err;
632 
633 	/* Is vif busy ? */
634 	if (MIF_EXISTS(net, vifi))
635 		return -EADDRINUSE;
636 
637 	switch (vifc->mif6c_flags) {
638 #ifdef CONFIG_IPV6_PIMSM_V2
639 	case MIFF_REGISTER:
640 		/*
641 		 * Special Purpose VIF in PIM
642 		 * All the packets will be sent to the daemon
643 		 */
644 		if (net->ipv6.mroute_reg_vif_num >= 0)
645 			return -EADDRINUSE;
646 		dev = ip6mr_reg_vif(net);
647 		if (!dev)
648 			return -ENOBUFS;
649 		err = dev_set_allmulti(dev, 1);
650 		if (err) {
651 			unregister_netdevice(dev);
652 			dev_put(dev);
653 			return err;
654 		}
655 		break;
656 #endif
657 	case 0:
658 		dev = dev_get_by_index(net, vifc->mif6c_pifi);
659 		if (!dev)
660 			return -EADDRNOTAVAIL;
661 		err = dev_set_allmulti(dev, 1);
662 		if (err) {
663 			dev_put(dev);
664 			return err;
665 		}
666 		break;
667 	default:
668 		return -EINVAL;
669 	}
670 
671 	in6_dev = __in6_dev_get(dev);
672 	if (in6_dev)
673 		in6_dev->cnf.mc_forwarding++;
674 
675 	/*
676 	 *	Fill in the VIF structures
677 	 */
678 	v->rate_limit = vifc->vifc_rate_limit;
679 	v->flags = vifc->mif6c_flags;
680 	if (!mrtsock)
681 		v->flags |= VIFF_STATIC;
682 	v->threshold = vifc->vifc_threshold;
683 	v->bytes_in = 0;
684 	v->bytes_out = 0;
685 	v->pkt_in = 0;
686 	v->pkt_out = 0;
687 	v->link = dev->ifindex;
688 	if (v->flags & MIFF_REGISTER)
689 		v->link = dev->iflink;
690 
691 	/* And finish update writing critical data */
692 	write_lock_bh(&mrt_lock);
693 	v->dev = dev;
694 #ifdef CONFIG_IPV6_PIMSM_V2
695 	if (v->flags & MIFF_REGISTER)
696 		net->ipv6.mroute_reg_vif_num = vifi;
697 #endif
698 	if (vifi + 1 > net->ipv6.maxvif)
699 		net->ipv6.maxvif = vifi + 1;
700 	write_unlock_bh(&mrt_lock);
701 	return 0;
702 }
703 
704 static struct mfc6_cache *ip6mr_cache_find(struct net *net,
705 					   struct in6_addr *origin,
706 					   struct in6_addr *mcastgrp)
707 {
708 	int line = MFC6_HASH(mcastgrp, origin);
709 	struct mfc6_cache *c;
710 
711 	for (c = net->ipv6.mfc6_cache_array[line]; c; c = c->next) {
712 		if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
713 		    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
714 			break;
715 	}
716 	return c;
717 }
718 
719 /*
720  *	Allocate a multicast cache entry
721  */
722 static struct mfc6_cache *ip6mr_cache_alloc(struct net *net)
723 {
724 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
725 	if (c == NULL)
726 		return NULL;
727 	c->mfc_un.res.minvif = MAXMIFS;
728 	mfc6_net_set(c, net);
729 	return c;
730 }
731 
732 static struct mfc6_cache *ip6mr_cache_alloc_unres(struct net *net)
733 {
734 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
735 	if (c == NULL)
736 		return NULL;
737 	skb_queue_head_init(&c->mfc_un.unres.unresolved);
738 	c->mfc_un.unres.expires = jiffies + 10 * HZ;
739 	mfc6_net_set(c, net);
740 	return c;
741 }
742 
743 /*
744  *	A cache entry has gone into a resolved state from queued
745  */
746 
747 static void ip6mr_cache_resolve(struct mfc6_cache *uc, struct mfc6_cache *c)
748 {
749 	struct sk_buff *skb;
750 
751 	/*
752 	 *	Play the pending entries through our router
753 	 */
754 
755 	while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
756 		if (ipv6_hdr(skb)->version == 0) {
757 			int err;
758 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
759 
760 			if (ip6mr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
761 				nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
762 			} else {
763 				nlh->nlmsg_type = NLMSG_ERROR;
764 				nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
765 				skb_trim(skb, nlh->nlmsg_len);
766 				((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE;
767 			}
768 			err = rtnl_unicast(skb, mfc6_net(uc), NETLINK_CB(skb).pid);
769 		} else
770 			ip6_mr_forward(skb, c);
771 	}
772 }
773 
774 /*
775  *	Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
776  *	expects the following bizarre scheme.
777  *
778  *	Called under mrt_lock.
779  */
780 
781 static int ip6mr_cache_report(struct net *net, struct sk_buff *pkt, mifi_t mifi,
782 			      int assert)
783 {
784 	struct sk_buff *skb;
785 	struct mrt6msg *msg;
786 	int ret;
787 
788 #ifdef CONFIG_IPV6_PIMSM_V2
789 	if (assert == MRT6MSG_WHOLEPKT)
790 		skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
791 						+sizeof(*msg));
792 	else
793 #endif
794 		skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
795 
796 	if (!skb)
797 		return -ENOBUFS;
798 
799 	/* I suppose that internal messages
800 	 * do not require checksums */
801 
802 	skb->ip_summed = CHECKSUM_UNNECESSARY;
803 
804 #ifdef CONFIG_IPV6_PIMSM_V2
805 	if (assert == MRT6MSG_WHOLEPKT) {
806 		/* Ugly, but we have no choice with this interface.
807 		   Duplicate old header, fix length etc.
808 		   And all this only to mangle msg->im6_msgtype and
809 		   to set msg->im6_mbz to "mbz" :-)
810 		 */
811 		skb_push(skb, -skb_network_offset(pkt));
812 
813 		skb_push(skb, sizeof(*msg));
814 		skb_reset_transport_header(skb);
815 		msg = (struct mrt6msg *)skb_transport_header(skb);
816 		msg->im6_mbz = 0;
817 		msg->im6_msgtype = MRT6MSG_WHOLEPKT;
818 		msg->im6_mif = net->ipv6.mroute_reg_vif_num;
819 		msg->im6_pad = 0;
820 		ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
821 		ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
822 
823 		skb->ip_summed = CHECKSUM_UNNECESSARY;
824 	} else
825 #endif
826 	{
827 	/*
828 	 *	Copy the IP header
829 	 */
830 
831 	skb_put(skb, sizeof(struct ipv6hdr));
832 	skb_reset_network_header(skb);
833 	skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
834 
835 	/*
836 	 *	Add our header
837 	 */
838 	skb_put(skb, sizeof(*msg));
839 	skb_reset_transport_header(skb);
840 	msg = (struct mrt6msg *)skb_transport_header(skb);
841 
842 	msg->im6_mbz = 0;
843 	msg->im6_msgtype = assert;
844 	msg->im6_mif = mifi;
845 	msg->im6_pad = 0;
846 	ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
847 	ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
848 
849 	skb_dst_set(skb, dst_clone(skb_dst(pkt)));
850 	skb->ip_summed = CHECKSUM_UNNECESSARY;
851 	}
852 
853 	if (net->ipv6.mroute6_sk == NULL) {
854 		kfree_skb(skb);
855 		return -EINVAL;
856 	}
857 
858 	/*
859 	 *	Deliver to user space multicast routing algorithms
860 	 */
861 	ret = sock_queue_rcv_skb(net->ipv6.mroute6_sk, skb);
862 	if (ret < 0) {
863 		if (net_ratelimit())
864 			printk(KERN_WARNING "mroute6: pending queue full, dropping entries.\n");
865 		kfree_skb(skb);
866 	}
867 
868 	return ret;
869 }
870 
871 /*
872  *	Queue a packet for resolution. It gets locked cache entry!
873  */
874 
875 static int
876 ip6mr_cache_unresolved(struct net *net, mifi_t mifi, struct sk_buff *skb)
877 {
878 	int err;
879 	struct mfc6_cache *c;
880 
881 	spin_lock_bh(&mfc_unres_lock);
882 	for (c = mfc_unres_queue; c; c = c->next) {
883 		if (net_eq(mfc6_net(c), net) &&
884 		    ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
885 		    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr))
886 			break;
887 	}
888 
889 	if (c == NULL) {
890 		/*
891 		 *	Create a new entry if allowable
892 		 */
893 
894 		if (atomic_read(&net->ipv6.cache_resolve_queue_len) >= 10 ||
895 		    (c = ip6mr_cache_alloc_unres(net)) == NULL) {
896 			spin_unlock_bh(&mfc_unres_lock);
897 
898 			kfree_skb(skb);
899 			return -ENOBUFS;
900 		}
901 
902 		/*
903 		 *	Fill in the new cache entry
904 		 */
905 		c->mf6c_parent = -1;
906 		c->mf6c_origin = ipv6_hdr(skb)->saddr;
907 		c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
908 
909 		/*
910 		 *	Reflect first query at pim6sd
911 		 */
912 		err = ip6mr_cache_report(net, skb, mifi, MRT6MSG_NOCACHE);
913 		if (err < 0) {
914 			/* If the report failed throw the cache entry
915 			   out - Brad Parker
916 			 */
917 			spin_unlock_bh(&mfc_unres_lock);
918 
919 			ip6mr_cache_free(c);
920 			kfree_skb(skb);
921 			return err;
922 		}
923 
924 		atomic_inc(&net->ipv6.cache_resolve_queue_len);
925 		c->next = mfc_unres_queue;
926 		mfc_unres_queue = c;
927 
928 		ipmr_do_expire_process(1);
929 	}
930 
931 	/*
932 	 *	See if we can append the packet
933 	 */
934 	if (c->mfc_un.unres.unresolved.qlen > 3) {
935 		kfree_skb(skb);
936 		err = -ENOBUFS;
937 	} else {
938 		skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
939 		err = 0;
940 	}
941 
942 	spin_unlock_bh(&mfc_unres_lock);
943 	return err;
944 }
945 
946 /*
947  *	MFC6 cache manipulation by user space
948  */
949 
950 static int ip6mr_mfc_delete(struct net *net, struct mf6cctl *mfc)
951 {
952 	int line;
953 	struct mfc6_cache *c, **cp;
954 
955 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
956 
957 	for (cp = &net->ipv6.mfc6_cache_array[line];
958 	     (c = *cp) != NULL; cp = &c->next) {
959 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
960 		    ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
961 			write_lock_bh(&mrt_lock);
962 			*cp = c->next;
963 			write_unlock_bh(&mrt_lock);
964 
965 			ip6mr_cache_free(c);
966 			return 0;
967 		}
968 	}
969 	return -ENOENT;
970 }
971 
972 static int ip6mr_device_event(struct notifier_block *this,
973 			      unsigned long event, void *ptr)
974 {
975 	struct net_device *dev = ptr;
976 	struct net *net = dev_net(dev);
977 	struct mif_device *v;
978 	int ct;
979 	LIST_HEAD(list);
980 
981 	if (event != NETDEV_UNREGISTER)
982 		return NOTIFY_DONE;
983 
984 	v = &net->ipv6.vif6_table[0];
985 	for (ct = 0; ct < net->ipv6.maxvif; ct++, v++) {
986 		if (v->dev == dev)
987 			mif6_delete(net, ct, &list);
988 	}
989 	unregister_netdevice_many(&list);
990 
991 	return NOTIFY_DONE;
992 }
993 
994 static struct notifier_block ip6_mr_notifier = {
995 	.notifier_call = ip6mr_device_event
996 };
997 
998 /*
999  *	Setup for IP multicast routing
1000  */
1001 
1002 static int __net_init ip6mr_net_init(struct net *net)
1003 {
1004 	int err = 0;
1005 	net->ipv6.vif6_table = kcalloc(MAXMIFS, sizeof(struct mif_device),
1006 				       GFP_KERNEL);
1007 	if (!net->ipv6.vif6_table) {
1008 		err = -ENOMEM;
1009 		goto fail;
1010 	}
1011 
1012 	/* Forwarding cache */
1013 	net->ipv6.mfc6_cache_array = kcalloc(MFC6_LINES,
1014 					     sizeof(struct mfc6_cache *),
1015 					     GFP_KERNEL);
1016 	if (!net->ipv6.mfc6_cache_array) {
1017 		err = -ENOMEM;
1018 		goto fail_mfc6_cache;
1019 	}
1020 
1021 #ifdef CONFIG_IPV6_PIMSM_V2
1022 	net->ipv6.mroute_reg_vif_num = -1;
1023 #endif
1024 
1025 #ifdef CONFIG_PROC_FS
1026 	err = -ENOMEM;
1027 	if (!proc_net_fops_create(net, "ip6_mr_vif", 0, &ip6mr_vif_fops))
1028 		goto proc_vif_fail;
1029 	if (!proc_net_fops_create(net, "ip6_mr_cache", 0, &ip6mr_mfc_fops))
1030 		goto proc_cache_fail;
1031 #endif
1032 	return 0;
1033 
1034 #ifdef CONFIG_PROC_FS
1035 proc_cache_fail:
1036 	proc_net_remove(net, "ip6_mr_vif");
1037 proc_vif_fail:
1038 	kfree(net->ipv6.mfc6_cache_array);
1039 #endif
1040 fail_mfc6_cache:
1041 	kfree(net->ipv6.vif6_table);
1042 fail:
1043 	return err;
1044 }
1045 
1046 static void __net_exit ip6mr_net_exit(struct net *net)
1047 {
1048 #ifdef CONFIG_PROC_FS
1049 	proc_net_remove(net, "ip6_mr_cache");
1050 	proc_net_remove(net, "ip6_mr_vif");
1051 #endif
1052 	mroute_clean_tables(net);
1053 	kfree(net->ipv6.mfc6_cache_array);
1054 	kfree(net->ipv6.vif6_table);
1055 }
1056 
1057 static struct pernet_operations ip6mr_net_ops = {
1058 	.init = ip6mr_net_init,
1059 	.exit = ip6mr_net_exit,
1060 };
1061 
1062 int __init ip6_mr_init(void)
1063 {
1064 	int err;
1065 
1066 	mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1067 				       sizeof(struct mfc6_cache),
1068 				       0, SLAB_HWCACHE_ALIGN,
1069 				       NULL);
1070 	if (!mrt_cachep)
1071 		return -ENOMEM;
1072 
1073 	err = register_pernet_subsys(&ip6mr_net_ops);
1074 	if (err)
1075 		goto reg_pernet_fail;
1076 
1077 	setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
1078 	err = register_netdevice_notifier(&ip6_mr_notifier);
1079 	if (err)
1080 		goto reg_notif_fail;
1081 #ifdef CONFIG_IPV6_PIMSM_V2
1082 	if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1083 		printk(KERN_ERR "ip6_mr_init: can't add PIM protocol\n");
1084 		err = -EAGAIN;
1085 		goto add_proto_fail;
1086 	}
1087 #endif
1088 	return 0;
1089 #ifdef CONFIG_IPV6_PIMSM_V2
1090 add_proto_fail:
1091 	unregister_netdevice_notifier(&ip6_mr_notifier);
1092 #endif
1093 reg_notif_fail:
1094 	del_timer(&ipmr_expire_timer);
1095 	unregister_pernet_subsys(&ip6mr_net_ops);
1096 reg_pernet_fail:
1097 	kmem_cache_destroy(mrt_cachep);
1098 	return err;
1099 }
1100 
1101 void ip6_mr_cleanup(void)
1102 {
1103 	unregister_netdevice_notifier(&ip6_mr_notifier);
1104 	del_timer(&ipmr_expire_timer);
1105 	unregister_pernet_subsys(&ip6mr_net_ops);
1106 	kmem_cache_destroy(mrt_cachep);
1107 }
1108 
1109 static int ip6mr_mfc_add(struct net *net, struct mf6cctl *mfc, int mrtsock)
1110 {
1111 	int line;
1112 	struct mfc6_cache *uc, *c, **cp;
1113 	unsigned char ttls[MAXMIFS];
1114 	int i;
1115 
1116 	memset(ttls, 255, MAXMIFS);
1117 	for (i = 0; i < MAXMIFS; i++) {
1118 		if (IF_ISSET(i, &mfc->mf6cc_ifset))
1119 			ttls[i] = 1;
1120 
1121 	}
1122 
1123 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1124 
1125 	for (cp = &net->ipv6.mfc6_cache_array[line];
1126 	     (c = *cp) != NULL; cp = &c->next) {
1127 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1128 		    ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr))
1129 			break;
1130 	}
1131 
1132 	if (c != NULL) {
1133 		write_lock_bh(&mrt_lock);
1134 		c->mf6c_parent = mfc->mf6cc_parent;
1135 		ip6mr_update_thresholds(c, ttls);
1136 		if (!mrtsock)
1137 			c->mfc_flags |= MFC_STATIC;
1138 		write_unlock_bh(&mrt_lock);
1139 		return 0;
1140 	}
1141 
1142 	if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1143 		return -EINVAL;
1144 
1145 	c = ip6mr_cache_alloc(net);
1146 	if (c == NULL)
1147 		return -ENOMEM;
1148 
1149 	c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1150 	c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1151 	c->mf6c_parent = mfc->mf6cc_parent;
1152 	ip6mr_update_thresholds(c, ttls);
1153 	if (!mrtsock)
1154 		c->mfc_flags |= MFC_STATIC;
1155 
1156 	write_lock_bh(&mrt_lock);
1157 	c->next = net->ipv6.mfc6_cache_array[line];
1158 	net->ipv6.mfc6_cache_array[line] = c;
1159 	write_unlock_bh(&mrt_lock);
1160 
1161 	/*
1162 	 *	Check to see if we resolved a queued list. If so we
1163 	 *	need to send on the frames and tidy up.
1164 	 */
1165 	spin_lock_bh(&mfc_unres_lock);
1166 	for (cp = &mfc_unres_queue; (uc = *cp) != NULL;
1167 	     cp = &uc->next) {
1168 		if (net_eq(mfc6_net(uc), net) &&
1169 		    ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1170 		    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1171 			*cp = uc->next;
1172 			atomic_dec(&net->ipv6.cache_resolve_queue_len);
1173 			break;
1174 		}
1175 	}
1176 	if (mfc_unres_queue == NULL)
1177 		del_timer(&ipmr_expire_timer);
1178 	spin_unlock_bh(&mfc_unres_lock);
1179 
1180 	if (uc) {
1181 		ip6mr_cache_resolve(uc, c);
1182 		ip6mr_cache_free(uc);
1183 	}
1184 	return 0;
1185 }
1186 
1187 /*
1188  *	Close the multicast socket, and clear the vif tables etc
1189  */
1190 
1191 static void mroute_clean_tables(struct net *net)
1192 {
1193 	int i;
1194 	LIST_HEAD(list);
1195 
1196 	/*
1197 	 *	Shut down all active vif entries
1198 	 */
1199 	for (i = 0; i < net->ipv6.maxvif; i++) {
1200 		if (!(net->ipv6.vif6_table[i].flags & VIFF_STATIC))
1201 			mif6_delete(net, i, &list);
1202 	}
1203 	unregister_netdevice_many(&list);
1204 
1205 	/*
1206 	 *	Wipe the cache
1207 	 */
1208 	for (i = 0; i < MFC6_LINES; i++) {
1209 		struct mfc6_cache *c, **cp;
1210 
1211 		cp = &net->ipv6.mfc6_cache_array[i];
1212 		while ((c = *cp) != NULL) {
1213 			if (c->mfc_flags & MFC_STATIC) {
1214 				cp = &c->next;
1215 				continue;
1216 			}
1217 			write_lock_bh(&mrt_lock);
1218 			*cp = c->next;
1219 			write_unlock_bh(&mrt_lock);
1220 
1221 			ip6mr_cache_free(c);
1222 		}
1223 	}
1224 
1225 	if (atomic_read(&net->ipv6.cache_resolve_queue_len) != 0) {
1226 		struct mfc6_cache *c, **cp;
1227 
1228 		spin_lock_bh(&mfc_unres_lock);
1229 		cp = &mfc_unres_queue;
1230 		while ((c = *cp) != NULL) {
1231 			if (!net_eq(mfc6_net(c), net)) {
1232 				cp = &c->next;
1233 				continue;
1234 			}
1235 			*cp = c->next;
1236 			ip6mr_destroy_unres(c);
1237 		}
1238 		spin_unlock_bh(&mfc_unres_lock);
1239 	}
1240 }
1241 
1242 static int ip6mr_sk_init(struct sock *sk)
1243 {
1244 	int err = 0;
1245 	struct net *net = sock_net(sk);
1246 
1247 	rtnl_lock();
1248 	write_lock_bh(&mrt_lock);
1249 	if (likely(net->ipv6.mroute6_sk == NULL)) {
1250 		net->ipv6.mroute6_sk = sk;
1251 		net->ipv6.devconf_all->mc_forwarding++;
1252 	}
1253 	else
1254 		err = -EADDRINUSE;
1255 	write_unlock_bh(&mrt_lock);
1256 
1257 	rtnl_unlock();
1258 
1259 	return err;
1260 }
1261 
1262 int ip6mr_sk_done(struct sock *sk)
1263 {
1264 	int err = 0;
1265 	struct net *net = sock_net(sk);
1266 
1267 	rtnl_lock();
1268 	if (sk == net->ipv6.mroute6_sk) {
1269 		write_lock_bh(&mrt_lock);
1270 		net->ipv6.mroute6_sk = NULL;
1271 		net->ipv6.devconf_all->mc_forwarding--;
1272 		write_unlock_bh(&mrt_lock);
1273 
1274 		mroute_clean_tables(net);
1275 	} else
1276 		err = -EACCES;
1277 	rtnl_unlock();
1278 
1279 	return err;
1280 }
1281 
1282 /*
1283  *	Socket options and virtual interface manipulation. The whole
1284  *	virtual interface system is a complete heap, but unfortunately
1285  *	that's how BSD mrouted happens to think. Maybe one day with a proper
1286  *	MOSPF/PIM router set up we can clean this up.
1287  */
1288 
1289 int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1290 {
1291 	int ret;
1292 	struct mif6ctl vif;
1293 	struct mf6cctl mfc;
1294 	mifi_t mifi;
1295 	struct net *net = sock_net(sk);
1296 
1297 	if (optname != MRT6_INIT) {
1298 		if (sk != net->ipv6.mroute6_sk && !capable(CAP_NET_ADMIN))
1299 			return -EACCES;
1300 	}
1301 
1302 	switch (optname) {
1303 	case MRT6_INIT:
1304 		if (sk->sk_type != SOCK_RAW ||
1305 		    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1306 			return -EOPNOTSUPP;
1307 		if (optlen < sizeof(int))
1308 			return -EINVAL;
1309 
1310 		return ip6mr_sk_init(sk);
1311 
1312 	case MRT6_DONE:
1313 		return ip6mr_sk_done(sk);
1314 
1315 	case MRT6_ADD_MIF:
1316 		if (optlen < sizeof(vif))
1317 			return -EINVAL;
1318 		if (copy_from_user(&vif, optval, sizeof(vif)))
1319 			return -EFAULT;
1320 		if (vif.mif6c_mifi >= MAXMIFS)
1321 			return -ENFILE;
1322 		rtnl_lock();
1323 		ret = mif6_add(net, &vif, sk == net->ipv6.mroute6_sk);
1324 		rtnl_unlock();
1325 		return ret;
1326 
1327 	case MRT6_DEL_MIF:
1328 		if (optlen < sizeof(mifi_t))
1329 			return -EINVAL;
1330 		if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1331 			return -EFAULT;
1332 		rtnl_lock();
1333 		ret = mif6_delete(net, mifi, NULL);
1334 		rtnl_unlock();
1335 		return ret;
1336 
1337 	/*
1338 	 *	Manipulate the forwarding caches. These live
1339 	 *	in a sort of kernel/user symbiosis.
1340 	 */
1341 	case MRT6_ADD_MFC:
1342 	case MRT6_DEL_MFC:
1343 		if (optlen < sizeof(mfc))
1344 			return -EINVAL;
1345 		if (copy_from_user(&mfc, optval, sizeof(mfc)))
1346 			return -EFAULT;
1347 		rtnl_lock();
1348 		if (optname == MRT6_DEL_MFC)
1349 			ret = ip6mr_mfc_delete(net, &mfc);
1350 		else
1351 			ret = ip6mr_mfc_add(net, &mfc,
1352 					    sk == net->ipv6.mroute6_sk);
1353 		rtnl_unlock();
1354 		return ret;
1355 
1356 	/*
1357 	 *	Control PIM assert (to activate pim will activate assert)
1358 	 */
1359 	case MRT6_ASSERT:
1360 	{
1361 		int v;
1362 		if (get_user(v, (int __user *)optval))
1363 			return -EFAULT;
1364 		net->ipv6.mroute_do_assert = !!v;
1365 		return 0;
1366 	}
1367 
1368 #ifdef CONFIG_IPV6_PIMSM_V2
1369 	case MRT6_PIM:
1370 	{
1371 		int v;
1372 		if (get_user(v, (int __user *)optval))
1373 			return -EFAULT;
1374 		v = !!v;
1375 		rtnl_lock();
1376 		ret = 0;
1377 		if (v != net->ipv6.mroute_do_pim) {
1378 			net->ipv6.mroute_do_pim = v;
1379 			net->ipv6.mroute_do_assert = v;
1380 		}
1381 		rtnl_unlock();
1382 		return ret;
1383 	}
1384 
1385 #endif
1386 	/*
1387 	 *	Spurious command, or MRT6_VERSION which you cannot
1388 	 *	set.
1389 	 */
1390 	default:
1391 		return -ENOPROTOOPT;
1392 	}
1393 }
1394 
1395 /*
1396  *	Getsock opt support for the multicast routing system.
1397  */
1398 
1399 int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1400 			  int __user *optlen)
1401 {
1402 	int olr;
1403 	int val;
1404 	struct net *net = sock_net(sk);
1405 
1406 	switch (optname) {
1407 	case MRT6_VERSION:
1408 		val = 0x0305;
1409 		break;
1410 #ifdef CONFIG_IPV6_PIMSM_V2
1411 	case MRT6_PIM:
1412 		val = net->ipv6.mroute_do_pim;
1413 		break;
1414 #endif
1415 	case MRT6_ASSERT:
1416 		val = net->ipv6.mroute_do_assert;
1417 		break;
1418 	default:
1419 		return -ENOPROTOOPT;
1420 	}
1421 
1422 	if (get_user(olr, optlen))
1423 		return -EFAULT;
1424 
1425 	olr = min_t(int, olr, sizeof(int));
1426 	if (olr < 0)
1427 		return -EINVAL;
1428 
1429 	if (put_user(olr, optlen))
1430 		return -EFAULT;
1431 	if (copy_to_user(optval, &val, olr))
1432 		return -EFAULT;
1433 	return 0;
1434 }
1435 
1436 /*
1437  *	The IP multicast ioctl support routines.
1438  */
1439 
1440 int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1441 {
1442 	struct sioc_sg_req6 sr;
1443 	struct sioc_mif_req6 vr;
1444 	struct mif_device *vif;
1445 	struct mfc6_cache *c;
1446 	struct net *net = sock_net(sk);
1447 
1448 	switch (cmd) {
1449 	case SIOCGETMIFCNT_IN6:
1450 		if (copy_from_user(&vr, arg, sizeof(vr)))
1451 			return -EFAULT;
1452 		if (vr.mifi >= net->ipv6.maxvif)
1453 			return -EINVAL;
1454 		read_lock(&mrt_lock);
1455 		vif = &net->ipv6.vif6_table[vr.mifi];
1456 		if (MIF_EXISTS(net, vr.mifi)) {
1457 			vr.icount = vif->pkt_in;
1458 			vr.ocount = vif->pkt_out;
1459 			vr.ibytes = vif->bytes_in;
1460 			vr.obytes = vif->bytes_out;
1461 			read_unlock(&mrt_lock);
1462 
1463 			if (copy_to_user(arg, &vr, sizeof(vr)))
1464 				return -EFAULT;
1465 			return 0;
1466 		}
1467 		read_unlock(&mrt_lock);
1468 		return -EADDRNOTAVAIL;
1469 	case SIOCGETSGCNT_IN6:
1470 		if (copy_from_user(&sr, arg, sizeof(sr)))
1471 			return -EFAULT;
1472 
1473 		read_lock(&mrt_lock);
1474 		c = ip6mr_cache_find(net, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1475 		if (c) {
1476 			sr.pktcnt = c->mfc_un.res.pkt;
1477 			sr.bytecnt = c->mfc_un.res.bytes;
1478 			sr.wrong_if = c->mfc_un.res.wrong_if;
1479 			read_unlock(&mrt_lock);
1480 
1481 			if (copy_to_user(arg, &sr, sizeof(sr)))
1482 				return -EFAULT;
1483 			return 0;
1484 		}
1485 		read_unlock(&mrt_lock);
1486 		return -EADDRNOTAVAIL;
1487 	default:
1488 		return -ENOIOCTLCMD;
1489 	}
1490 }
1491 
1492 
1493 static inline int ip6mr_forward2_finish(struct sk_buff *skb)
1494 {
1495 	IP6_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
1496 			 IPSTATS_MIB_OUTFORWDATAGRAMS);
1497 	return dst_output(skb);
1498 }
1499 
1500 /*
1501  *	Processing handlers for ip6mr_forward
1502  */
1503 
1504 static int ip6mr_forward2(struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1505 {
1506 	struct ipv6hdr *ipv6h;
1507 	struct net *net = mfc6_net(c);
1508 	struct mif_device *vif = &net->ipv6.vif6_table[vifi];
1509 	struct net_device *dev;
1510 	struct dst_entry *dst;
1511 	struct flowi fl;
1512 
1513 	if (vif->dev == NULL)
1514 		goto out_free;
1515 
1516 #ifdef CONFIG_IPV6_PIMSM_V2
1517 	if (vif->flags & MIFF_REGISTER) {
1518 		vif->pkt_out++;
1519 		vif->bytes_out += skb->len;
1520 		vif->dev->stats.tx_bytes += skb->len;
1521 		vif->dev->stats.tx_packets++;
1522 		ip6mr_cache_report(net, skb, vifi, MRT6MSG_WHOLEPKT);
1523 		goto out_free;
1524 	}
1525 #endif
1526 
1527 	ipv6h = ipv6_hdr(skb);
1528 
1529 	fl = (struct flowi) {
1530 		.oif = vif->link,
1531 		.nl_u = { .ip6_u =
1532 				{ .daddr = ipv6h->daddr, }
1533 		}
1534 	};
1535 
1536 	dst = ip6_route_output(net, NULL, &fl);
1537 	if (!dst)
1538 		goto out_free;
1539 
1540 	skb_dst_drop(skb);
1541 	skb_dst_set(skb, dst);
1542 
1543 	/*
1544 	 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1545 	 * not only before forwarding, but after forwarding on all output
1546 	 * interfaces. It is clear, if mrouter runs a multicasting
1547 	 * program, it should receive packets not depending to what interface
1548 	 * program is joined.
1549 	 * If we will not make it, the program will have to join on all
1550 	 * interfaces. On the other hand, multihoming host (or router, but
1551 	 * not mrouter) cannot join to more than one interface - it will
1552 	 * result in receiving multiple packets.
1553 	 */
1554 	dev = vif->dev;
1555 	skb->dev = dev;
1556 	vif->pkt_out++;
1557 	vif->bytes_out += skb->len;
1558 
1559 	/* We are about to write */
1560 	/* XXX: extension headers? */
1561 	if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
1562 		goto out_free;
1563 
1564 	ipv6h = ipv6_hdr(skb);
1565 	ipv6h->hop_limit--;
1566 
1567 	IP6CB(skb)->flags |= IP6SKB_FORWARDED;
1568 
1569 	return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dev,
1570 		       ip6mr_forward2_finish);
1571 
1572 out_free:
1573 	kfree_skb(skb);
1574 	return 0;
1575 }
1576 
1577 static int ip6mr_find_vif(struct net_device *dev)
1578 {
1579 	struct net *net = dev_net(dev);
1580 	int ct;
1581 	for (ct = net->ipv6.maxvif - 1; ct >= 0; ct--) {
1582 		if (net->ipv6.vif6_table[ct].dev == dev)
1583 			break;
1584 	}
1585 	return ct;
1586 }
1587 
1588 static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache)
1589 {
1590 	int psend = -1;
1591 	int vif, ct;
1592 	struct net *net = mfc6_net(cache);
1593 
1594 	vif = cache->mf6c_parent;
1595 	cache->mfc_un.res.pkt++;
1596 	cache->mfc_un.res.bytes += skb->len;
1597 
1598 	/*
1599 	 * Wrong interface: drop packet and (maybe) send PIM assert.
1600 	 */
1601 	if (net->ipv6.vif6_table[vif].dev != skb->dev) {
1602 		int true_vifi;
1603 
1604 		cache->mfc_un.res.wrong_if++;
1605 		true_vifi = ip6mr_find_vif(skb->dev);
1606 
1607 		if (true_vifi >= 0 && net->ipv6.mroute_do_assert &&
1608 		    /* pimsm uses asserts, when switching from RPT to SPT,
1609 		       so that we cannot check that packet arrived on an oif.
1610 		       It is bad, but otherwise we would need to move pretty
1611 		       large chunk of pimd to kernel. Ough... --ANK
1612 		     */
1613 		    (net->ipv6.mroute_do_pim ||
1614 		     cache->mfc_un.res.ttls[true_vifi] < 255) &&
1615 		    time_after(jiffies,
1616 			       cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1617 			cache->mfc_un.res.last_assert = jiffies;
1618 			ip6mr_cache_report(net, skb, true_vifi, MRT6MSG_WRONGMIF);
1619 		}
1620 		goto dont_forward;
1621 	}
1622 
1623 	net->ipv6.vif6_table[vif].pkt_in++;
1624 	net->ipv6.vif6_table[vif].bytes_in += skb->len;
1625 
1626 	/*
1627 	 *	Forward the frame
1628 	 */
1629 	for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
1630 		if (ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
1631 			if (psend != -1) {
1632 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1633 				if (skb2)
1634 					ip6mr_forward2(skb2, cache, psend);
1635 			}
1636 			psend = ct;
1637 		}
1638 	}
1639 	if (psend != -1) {
1640 		ip6mr_forward2(skb, cache, psend);
1641 		return 0;
1642 	}
1643 
1644 dont_forward:
1645 	kfree_skb(skb);
1646 	return 0;
1647 }
1648 
1649 
1650 /*
1651  *	Multicast packets for forwarding arrive here
1652  */
1653 
1654 int ip6_mr_input(struct sk_buff *skb)
1655 {
1656 	struct mfc6_cache *cache;
1657 	struct net *net = dev_net(skb->dev);
1658 
1659 	read_lock(&mrt_lock);
1660 	cache = ip6mr_cache_find(net,
1661 				 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
1662 
1663 	/*
1664 	 *	No usable cache entry
1665 	 */
1666 	if (cache == NULL) {
1667 		int vif;
1668 
1669 		vif = ip6mr_find_vif(skb->dev);
1670 		if (vif >= 0) {
1671 			int err = ip6mr_cache_unresolved(net, vif, skb);
1672 			read_unlock(&mrt_lock);
1673 
1674 			return err;
1675 		}
1676 		read_unlock(&mrt_lock);
1677 		kfree_skb(skb);
1678 		return -ENODEV;
1679 	}
1680 
1681 	ip6_mr_forward(skb, cache);
1682 
1683 	read_unlock(&mrt_lock);
1684 
1685 	return 0;
1686 }
1687 
1688 
1689 static int
1690 ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm)
1691 {
1692 	int ct;
1693 	struct rtnexthop *nhp;
1694 	struct net *net = mfc6_net(c);
1695 	struct net_device *dev = net->ipv6.vif6_table[c->mf6c_parent].dev;
1696 	u8 *b = skb_tail_pointer(skb);
1697 	struct rtattr *mp_head;
1698 
1699 	if (dev)
1700 		RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1701 
1702 	mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1703 
1704 	for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1705 		if (c->mfc_un.res.ttls[ct] < 255) {
1706 			if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1707 				goto rtattr_failure;
1708 			nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1709 			nhp->rtnh_flags = 0;
1710 			nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1711 			nhp->rtnh_ifindex = net->ipv6.vif6_table[ct].dev->ifindex;
1712 			nhp->rtnh_len = sizeof(*nhp);
1713 		}
1714 	}
1715 	mp_head->rta_type = RTA_MULTIPATH;
1716 	mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1717 	rtm->rtm_type = RTN_MULTICAST;
1718 	return 1;
1719 
1720 rtattr_failure:
1721 	nlmsg_trim(skb, b);
1722 	return -EMSGSIZE;
1723 }
1724 
1725 int ip6mr_get_route(struct net *net,
1726 		    struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1727 {
1728 	int err;
1729 	struct mfc6_cache *cache;
1730 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1731 
1732 	read_lock(&mrt_lock);
1733 	cache = ip6mr_cache_find(net, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
1734 
1735 	if (!cache) {
1736 		struct sk_buff *skb2;
1737 		struct ipv6hdr *iph;
1738 		struct net_device *dev;
1739 		int vif;
1740 
1741 		if (nowait) {
1742 			read_unlock(&mrt_lock);
1743 			return -EAGAIN;
1744 		}
1745 
1746 		dev = skb->dev;
1747 		if (dev == NULL || (vif = ip6mr_find_vif(dev)) < 0) {
1748 			read_unlock(&mrt_lock);
1749 			return -ENODEV;
1750 		}
1751 
1752 		/* really correct? */
1753 		skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
1754 		if (!skb2) {
1755 			read_unlock(&mrt_lock);
1756 			return -ENOMEM;
1757 		}
1758 
1759 		skb_reset_transport_header(skb2);
1760 
1761 		skb_put(skb2, sizeof(struct ipv6hdr));
1762 		skb_reset_network_header(skb2);
1763 
1764 		iph = ipv6_hdr(skb2);
1765 		iph->version = 0;
1766 		iph->priority = 0;
1767 		iph->flow_lbl[0] = 0;
1768 		iph->flow_lbl[1] = 0;
1769 		iph->flow_lbl[2] = 0;
1770 		iph->payload_len = 0;
1771 		iph->nexthdr = IPPROTO_NONE;
1772 		iph->hop_limit = 0;
1773 		ipv6_addr_copy(&iph->saddr, &rt->rt6i_src.addr);
1774 		ipv6_addr_copy(&iph->daddr, &rt->rt6i_dst.addr);
1775 
1776 		err = ip6mr_cache_unresolved(net, vif, skb2);
1777 		read_unlock(&mrt_lock);
1778 
1779 		return err;
1780 	}
1781 
1782 	if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1783 		cache->mfc_flags |= MFC_NOTIFY;
1784 
1785 	err = ip6mr_fill_mroute(skb, cache, rtm);
1786 	read_unlock(&mrt_lock);
1787 	return err;
1788 }
1789 
1790