xref: /openbmc/linux/net/ipv6/ip6mr.c (revision f42b3800)
1 /*
2  *	Linux IPv6 multicast routing support for BSD pim6sd
3  *	Based on net/ipv4/ipmr.c.
4  *
5  *	(c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
6  *		LSIIT Laboratory, Strasbourg, France
7  *	(c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
8  *		6WIND, Paris, France
9  *	Copyright (C)2007,2008 USAGI/WIDE Project
10  *		YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
11  *
12  *	This program is free software; you can redistribute it and/or
13  *	modify it under the terms of the GNU General Public License
14  *	as published by the Free Software Foundation; either version
15  *	2 of the License, or (at your option) any later version.
16  *
17  */
18 
19 #include <asm/system.h>
20 #include <asm/uaccess.h>
21 #include <linux/types.h>
22 #include <linux/sched.h>
23 #include <linux/errno.h>
24 #include <linux/timer.h>
25 #include <linux/mm.h>
26 #include <linux/kernel.h>
27 #include <linux/fcntl.h>
28 #include <linux/stat.h>
29 #include <linux/socket.h>
30 #include <linux/inet.h>
31 #include <linux/netdevice.h>
32 #include <linux/inetdevice.h>
33 #include <linux/proc_fs.h>
34 #include <linux/seq_file.h>
35 #include <linux/init.h>
36 #include <net/protocol.h>
37 #include <linux/skbuff.h>
38 #include <net/sock.h>
39 #include <net/raw.h>
40 #include <linux/notifier.h>
41 #include <linux/if_arp.h>
42 #include <net/checksum.h>
43 #include <net/netlink.h>
44 
45 #include <net/ipv6.h>
46 #include <net/ip6_route.h>
47 #include <linux/mroute6.h>
48 #include <linux/pim.h>
49 #include <net/addrconf.h>
50 #include <linux/netfilter_ipv6.h>
51 
52 struct sock *mroute6_socket;
53 
54 
55 /* Big lock, protecting vif table, mrt cache and mroute socket state.
56    Note that the changes are semaphored via rtnl_lock.
57  */
58 
59 static DEFINE_RWLOCK(mrt_lock);
60 
61 /*
62  *	Multicast router control variables
63  */
64 
65 static struct mif_device vif6_table[MAXMIFS];		/* Devices 		*/
66 static int maxvif;
67 
68 #define MIF_EXISTS(idx) (vif6_table[idx].dev != NULL)
69 
70 static int mroute_do_assert;				/* Set in PIM assert	*/
71 #ifdef CONFIG_IPV6_PIMSM_V2
72 static int mroute_do_pim;
73 #else
74 #define mroute_do_pim 0
75 #endif
76 
77 static struct mfc6_cache *mfc6_cache_array[MFC6_LINES];	/* Forwarding cache	*/
78 
79 static struct mfc6_cache *mfc_unres_queue;		/* Queue of unresolved entries */
80 static atomic_t cache_resolve_queue_len;		/* Size of unresolved	*/
81 
82 /* Special spinlock for queue of unresolved entries */
83 static DEFINE_SPINLOCK(mfc_unres_lock);
84 
85 /* We return to original Alan's scheme. Hash table of resolved
86    entries is changed only in process context and protected
87    with weak lock mrt_lock. Queue of unresolved entries is protected
88    with strong spinlock mfc_unres_lock.
89 
90    In this case data path is free of exclusive locks at all.
91  */
92 
93 static struct kmem_cache *mrt_cachep __read_mostly;
94 
95 static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache);
96 static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert);
97 static int ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm);
98 
99 #ifdef CONFIG_IPV6_PIMSM_V2
100 static struct inet6_protocol pim6_protocol;
101 #endif
102 
103 static struct timer_list ipmr_expire_timer;
104 
105 
106 #ifdef CONFIG_PROC_FS
107 
108 struct ipmr_mfc_iter {
109 	struct mfc6_cache **cache;
110 	int ct;
111 };
112 
113 
114 static struct mfc6_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos)
115 {
116 	struct mfc6_cache *mfc;
117 
118 	it->cache = mfc6_cache_array;
119 	read_lock(&mrt_lock);
120 	for (it->ct = 0; it->ct < ARRAY_SIZE(mfc6_cache_array); it->ct++)
121 		for (mfc = mfc6_cache_array[it->ct]; mfc; mfc = mfc->next)
122 			if (pos-- == 0)
123 				return mfc;
124 	read_unlock(&mrt_lock);
125 
126 	it->cache = &mfc_unres_queue;
127 	spin_lock_bh(&mfc_unres_lock);
128 	for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
129 		if (pos-- == 0)
130 			return mfc;
131 	spin_unlock_bh(&mfc_unres_lock);
132 
133 	it->cache = NULL;
134 	return NULL;
135 }
136 
137 
138 
139 
140 /*
141  *	The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
142  */
143 
144 struct ipmr_vif_iter {
145 	int ct;
146 };
147 
148 static struct mif_device *ip6mr_vif_seq_idx(struct ipmr_vif_iter *iter,
149 					    loff_t pos)
150 {
151 	for (iter->ct = 0; iter->ct < maxvif; ++iter->ct) {
152 		if (!MIF_EXISTS(iter->ct))
153 			continue;
154 		if (pos-- == 0)
155 			return &vif6_table[iter->ct];
156 	}
157 	return NULL;
158 }
159 
160 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
161 	__acquires(mrt_lock)
162 {
163 	read_lock(&mrt_lock);
164 	return (*pos ? ip6mr_vif_seq_idx(seq->private, *pos - 1)
165 		: SEQ_START_TOKEN);
166 }
167 
168 static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
169 {
170 	struct ipmr_vif_iter *iter = seq->private;
171 
172 	++*pos;
173 	if (v == SEQ_START_TOKEN)
174 		return ip6mr_vif_seq_idx(iter, 0);
175 
176 	while (++iter->ct < maxvif) {
177 		if (!MIF_EXISTS(iter->ct))
178 			continue;
179 		return &vif6_table[iter->ct];
180 	}
181 	return NULL;
182 }
183 
184 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
185 	__releases(mrt_lock)
186 {
187 	read_unlock(&mrt_lock);
188 }
189 
190 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
191 {
192 	if (v == SEQ_START_TOKEN) {
193 		seq_puts(seq,
194 			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
195 	} else {
196 		const struct mif_device *vif = v;
197 		const char *name = vif->dev ? vif->dev->name : "none";
198 
199 		seq_printf(seq,
200 			   "%2Zd %-10s %8ld %7ld  %8ld %7ld %05X\n",
201 			   vif - vif6_table,
202 			   name, vif->bytes_in, vif->pkt_in,
203 			   vif->bytes_out, vif->pkt_out,
204 			   vif->flags);
205 	}
206 	return 0;
207 }
208 
209 static struct seq_operations ip6mr_vif_seq_ops = {
210 	.start = ip6mr_vif_seq_start,
211 	.next  = ip6mr_vif_seq_next,
212 	.stop  = ip6mr_vif_seq_stop,
213 	.show  = ip6mr_vif_seq_show,
214 };
215 
216 static int ip6mr_vif_open(struct inode *inode, struct file *file)
217 {
218 	return seq_open_private(file, &ip6mr_vif_seq_ops,
219 				sizeof(struct ipmr_vif_iter));
220 }
221 
222 static struct file_operations ip6mr_vif_fops = {
223 	.owner	 = THIS_MODULE,
224 	.open    = ip6mr_vif_open,
225 	.read    = seq_read,
226 	.llseek  = seq_lseek,
227 	.release = seq_release,
228 };
229 
230 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
231 {
232 	return (*pos ? ipmr_mfc_seq_idx(seq->private, *pos - 1)
233 		: SEQ_START_TOKEN);
234 }
235 
236 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
237 {
238 	struct mfc6_cache *mfc = v;
239 	struct ipmr_mfc_iter *it = seq->private;
240 
241 	++*pos;
242 
243 	if (v == SEQ_START_TOKEN)
244 		return ipmr_mfc_seq_idx(seq->private, 0);
245 
246 	if (mfc->next)
247 		return mfc->next;
248 
249 	if (it->cache == &mfc_unres_queue)
250 		goto end_of_list;
251 
252 	BUG_ON(it->cache != mfc6_cache_array);
253 
254 	while (++it->ct < ARRAY_SIZE(mfc6_cache_array)) {
255 		mfc = mfc6_cache_array[it->ct];
256 		if (mfc)
257 			return mfc;
258 	}
259 
260 	/* exhausted cache_array, show unresolved */
261 	read_unlock(&mrt_lock);
262 	it->cache = &mfc_unres_queue;
263 	it->ct = 0;
264 
265 	spin_lock_bh(&mfc_unres_lock);
266 	mfc = mfc_unres_queue;
267 	if (mfc)
268 		return mfc;
269 
270  end_of_list:
271 	spin_unlock_bh(&mfc_unres_lock);
272 	it->cache = NULL;
273 
274 	return NULL;
275 }
276 
277 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
278 {
279 	struct ipmr_mfc_iter *it = seq->private;
280 
281 	if (it->cache == &mfc_unres_queue)
282 		spin_unlock_bh(&mfc_unres_lock);
283 	else if (it->cache == mfc6_cache_array)
284 		read_unlock(&mrt_lock);
285 }
286 
287 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
288 {
289 	int n;
290 
291 	if (v == SEQ_START_TOKEN) {
292 		seq_puts(seq,
293 			 "Group                            "
294 			 "Origin                           "
295 			 "Iif      Pkts  Bytes     Wrong  Oifs\n");
296 	} else {
297 		const struct mfc6_cache *mfc = v;
298 		const struct ipmr_mfc_iter *it = seq->private;
299 
300 		seq_printf(seq,
301 			   NIP6_FMT " " NIP6_FMT " %-3d %8ld %8ld %8ld",
302 			   NIP6(mfc->mf6c_mcastgrp), NIP6(mfc->mf6c_origin),
303 			   mfc->mf6c_parent,
304 			   mfc->mfc_un.res.pkt,
305 			   mfc->mfc_un.res.bytes,
306 			   mfc->mfc_un.res.wrong_if);
307 
308 		if (it->cache != &mfc_unres_queue) {
309 			for (n = mfc->mfc_un.res.minvif;
310 			     n < mfc->mfc_un.res.maxvif; n++) {
311 				if (MIF_EXISTS(n) &&
312 				    mfc->mfc_un.res.ttls[n] < 255)
313 					seq_printf(seq,
314 						   " %2d:%-3d",
315 						   n, mfc->mfc_un.res.ttls[n]);
316 			}
317 		}
318 		seq_putc(seq, '\n');
319 	}
320 	return 0;
321 }
322 
323 static struct seq_operations ipmr_mfc_seq_ops = {
324 	.start = ipmr_mfc_seq_start,
325 	.next  = ipmr_mfc_seq_next,
326 	.stop  = ipmr_mfc_seq_stop,
327 	.show  = ipmr_mfc_seq_show,
328 };
329 
330 static int ipmr_mfc_open(struct inode *inode, struct file *file)
331 {
332 	return seq_open_private(file, &ipmr_mfc_seq_ops,
333 				sizeof(struct ipmr_mfc_iter));
334 }
335 
336 static struct file_operations ip6mr_mfc_fops = {
337 	.owner	 = THIS_MODULE,
338 	.open    = ipmr_mfc_open,
339 	.read    = seq_read,
340 	.llseek  = seq_lseek,
341 	.release = seq_release,
342 };
343 #endif
344 
345 #ifdef CONFIG_IPV6_PIMSM_V2
346 static int reg_vif_num = -1;
347 
348 static int pim6_rcv(struct sk_buff *skb)
349 {
350 	struct pimreghdr *pim;
351 	struct ipv6hdr   *encap;
352 	struct net_device  *reg_dev = NULL;
353 
354 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
355 		goto drop;
356 
357 	pim = (struct pimreghdr *)skb_transport_header(skb);
358 	if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
359 	    (pim->flags & PIM_NULL_REGISTER) ||
360 	    (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
361 	     (u16)csum_fold(skb_checksum(skb, 0, skb->len, 0))))
362 		goto drop;
363 
364 	/* check if the inner packet is destined to mcast group */
365 	encap = (struct ipv6hdr *)(skb_transport_header(skb) +
366 				   sizeof(*pim));
367 
368 	if (!ipv6_addr_is_multicast(&encap->daddr) ||
369 	    encap->payload_len == 0 ||
370 	    ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
371 		goto drop;
372 
373 	read_lock(&mrt_lock);
374 	if (reg_vif_num >= 0)
375 		reg_dev = vif6_table[reg_vif_num].dev;
376 	if (reg_dev)
377 		dev_hold(reg_dev);
378 	read_unlock(&mrt_lock);
379 
380 	if (reg_dev == NULL)
381 		goto drop;
382 
383 	skb->mac_header = skb->network_header;
384 	skb_pull(skb, (u8 *)encap - skb->data);
385 	skb_reset_network_header(skb);
386 	skb->dev = reg_dev;
387 	skb->protocol = htons(ETH_P_IP);
388 	skb->ip_summed = 0;
389 	skb->pkt_type = PACKET_HOST;
390 	dst_release(skb->dst);
391 	((struct net_device_stats *)netdev_priv(reg_dev))->rx_bytes += skb->len;
392 	((struct net_device_stats *)netdev_priv(reg_dev))->rx_packets++;
393 	skb->dst = NULL;
394 	nf_reset(skb);
395 	netif_rx(skb);
396 	dev_put(reg_dev);
397 	return 0;
398  drop:
399 	kfree_skb(skb);
400 	return 0;
401 }
402 
403 static struct inet6_protocol pim6_protocol = {
404 	.handler	=	pim6_rcv,
405 };
406 
407 /* Service routines creating virtual interfaces: PIMREG */
408 
409 static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
410 {
411 	read_lock(&mrt_lock);
412 	((struct net_device_stats *)netdev_priv(dev))->tx_bytes += skb->len;
413 	((struct net_device_stats *)netdev_priv(dev))->tx_packets++;
414 	ip6mr_cache_report(skb, reg_vif_num, MRT6MSG_WHOLEPKT);
415 	read_unlock(&mrt_lock);
416 	kfree_skb(skb);
417 	return 0;
418 }
419 
420 static struct net_device_stats *reg_vif_get_stats(struct net_device *dev)
421 {
422 	return (struct net_device_stats *)netdev_priv(dev);
423 }
424 
425 static void reg_vif_setup(struct net_device *dev)
426 {
427 	dev->type		= ARPHRD_PIMREG;
428 	dev->mtu		= 1500 - sizeof(struct ipv6hdr) - 8;
429 	dev->flags		= IFF_NOARP;
430 	dev->hard_start_xmit	= reg_vif_xmit;
431 	dev->get_stats		= reg_vif_get_stats;
432 	dev->destructor		= free_netdev;
433 }
434 
435 static struct net_device *ip6mr_reg_vif(void)
436 {
437 	struct net_device *dev;
438 
439 	dev = alloc_netdev(sizeof(struct net_device_stats), "pim6reg",
440 			   reg_vif_setup);
441 
442 	if (dev == NULL)
443 		return NULL;
444 
445 	if (register_netdevice(dev)) {
446 		free_netdev(dev);
447 		return NULL;
448 	}
449 	dev->iflink = 0;
450 
451 	if (dev_open(dev))
452 		goto failure;
453 
454 	return dev;
455 
456 failure:
457 	/* allow the register to be completed before unregistering. */
458 	rtnl_unlock();
459 	rtnl_lock();
460 
461 	unregister_netdevice(dev);
462 	return NULL;
463 }
464 #endif
465 
466 /*
467  *	Delete a VIF entry
468  */
469 
470 static int mif6_delete(int vifi)
471 {
472 	struct mif_device *v;
473 	struct net_device *dev;
474 	if (vifi < 0 || vifi >= maxvif)
475 		return -EADDRNOTAVAIL;
476 
477 	v = &vif6_table[vifi];
478 
479 	write_lock_bh(&mrt_lock);
480 	dev = v->dev;
481 	v->dev = NULL;
482 
483 	if (!dev) {
484 		write_unlock_bh(&mrt_lock);
485 		return -EADDRNOTAVAIL;
486 	}
487 
488 #ifdef CONFIG_IPV6_PIMSM_V2
489 	if (vifi == reg_vif_num)
490 		reg_vif_num = -1;
491 #endif
492 
493 	if (vifi + 1 == maxvif) {
494 		int tmp;
495 		for (tmp = vifi - 1; tmp >= 0; tmp--) {
496 			if (MIF_EXISTS(tmp))
497 				break;
498 		}
499 		maxvif = tmp + 1;
500 	}
501 
502 	write_unlock_bh(&mrt_lock);
503 
504 	dev_set_allmulti(dev, -1);
505 
506 	if (v->flags & MIFF_REGISTER)
507 		unregister_netdevice(dev);
508 
509 	dev_put(dev);
510 	return 0;
511 }
512 
513 /* Destroy an unresolved cache entry, killing queued skbs
514    and reporting error to netlink readers.
515  */
516 
517 static void ip6mr_destroy_unres(struct mfc6_cache *c)
518 {
519 	struct sk_buff *skb;
520 
521 	atomic_dec(&cache_resolve_queue_len);
522 
523 	while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
524 		if (ipv6_hdr(skb)->version == 0) {
525 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
526 			nlh->nlmsg_type = NLMSG_ERROR;
527 			nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
528 			skb_trim(skb, nlh->nlmsg_len);
529 			((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
530 			rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
531 		} else
532 			kfree_skb(skb);
533 	}
534 
535 	kmem_cache_free(mrt_cachep, c);
536 }
537 
538 
539 /* Single timer process for all the unresolved queue. */
540 
541 static void ipmr_do_expire_process(unsigned long dummy)
542 {
543 	unsigned long now = jiffies;
544 	unsigned long expires = 10 * HZ;
545 	struct mfc6_cache *c, **cp;
546 
547 	cp = &mfc_unres_queue;
548 
549 	while ((c = *cp) != NULL) {
550 		if (time_after(c->mfc_un.unres.expires, now)) {
551 			/* not yet... */
552 			unsigned long interval = c->mfc_un.unres.expires - now;
553 			if (interval < expires)
554 				expires = interval;
555 			cp = &c->next;
556 			continue;
557 		}
558 
559 		*cp = c->next;
560 		ip6mr_destroy_unres(c);
561 	}
562 
563 	if (atomic_read(&cache_resolve_queue_len))
564 		mod_timer(&ipmr_expire_timer, jiffies + expires);
565 }
566 
567 static void ipmr_expire_process(unsigned long dummy)
568 {
569 	if (!spin_trylock(&mfc_unres_lock)) {
570 		mod_timer(&ipmr_expire_timer, jiffies + 1);
571 		return;
572 	}
573 
574 	if (atomic_read(&cache_resolve_queue_len))
575 		ipmr_do_expire_process(dummy);
576 
577 	spin_unlock(&mfc_unres_lock);
578 }
579 
580 /* Fill oifs list. It is called under write locked mrt_lock. */
581 
582 static void ip6mr_update_thresholds(struct mfc6_cache *cache, unsigned char *ttls)
583 {
584 	int vifi;
585 
586 	cache->mfc_un.res.minvif = MAXMIFS;
587 	cache->mfc_un.res.maxvif = 0;
588 	memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
589 
590 	for (vifi = 0; vifi < maxvif; vifi++) {
591 		if (MIF_EXISTS(vifi) && ttls[vifi] && ttls[vifi] < 255) {
592 			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
593 			if (cache->mfc_un.res.minvif > vifi)
594 				cache->mfc_un.res.minvif = vifi;
595 			if (cache->mfc_un.res.maxvif <= vifi)
596 				cache->mfc_un.res.maxvif = vifi + 1;
597 		}
598 	}
599 }
600 
601 static int mif6_add(struct mif6ctl *vifc, int mrtsock)
602 {
603 	int vifi = vifc->mif6c_mifi;
604 	struct mif_device *v = &vif6_table[vifi];
605 	struct net_device *dev;
606 
607 	/* Is vif busy ? */
608 	if (MIF_EXISTS(vifi))
609 		return -EADDRINUSE;
610 
611 	switch (vifc->mif6c_flags) {
612 #ifdef CONFIG_IPV6_PIMSM_V2
613 	case MIFF_REGISTER:
614 		/*
615 		 * Special Purpose VIF in PIM
616 		 * All the packets will be sent to the daemon
617 		 */
618 		if (reg_vif_num >= 0)
619 			return -EADDRINUSE;
620 		dev = ip6mr_reg_vif();
621 		if (!dev)
622 			return -ENOBUFS;
623 		break;
624 #endif
625 	case 0:
626 		dev = dev_get_by_index(&init_net, vifc->mif6c_pifi);
627 		if (!dev)
628 			return -EADDRNOTAVAIL;
629 		dev_put(dev);
630 		break;
631 	default:
632 		return -EINVAL;
633 	}
634 
635 	dev_set_allmulti(dev, 1);
636 
637 	/*
638 	 *	Fill in the VIF structures
639 	 */
640 	v->rate_limit = vifc->vifc_rate_limit;
641 	v->flags = vifc->mif6c_flags;
642 	if (!mrtsock)
643 		v->flags |= VIFF_STATIC;
644 	v->threshold = vifc->vifc_threshold;
645 	v->bytes_in = 0;
646 	v->bytes_out = 0;
647 	v->pkt_in = 0;
648 	v->pkt_out = 0;
649 	v->link = dev->ifindex;
650 	if (v->flags & MIFF_REGISTER)
651 		v->link = dev->iflink;
652 
653 	/* And finish update writing critical data */
654 	write_lock_bh(&mrt_lock);
655 	dev_hold(dev);
656 	v->dev = dev;
657 #ifdef CONFIG_IPV6_PIMSM_V2
658 	if (v->flags & MIFF_REGISTER)
659 		reg_vif_num = vifi;
660 #endif
661 	if (vifi + 1 > maxvif)
662 		maxvif = vifi + 1;
663 	write_unlock_bh(&mrt_lock);
664 	return 0;
665 }
666 
667 static struct mfc6_cache *ip6mr_cache_find(struct in6_addr *origin, struct in6_addr *mcastgrp)
668 {
669 	int line = MFC6_HASH(mcastgrp, origin);
670 	struct mfc6_cache *c;
671 
672 	for (c = mfc6_cache_array[line]; c; c = c->next) {
673 		if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
674 		    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
675 			break;
676 	}
677 	return c;
678 }
679 
680 /*
681  *	Allocate a multicast cache entry
682  */
683 static struct mfc6_cache *ip6mr_cache_alloc(void)
684 {
685 	struct mfc6_cache *c = kmem_cache_alloc(mrt_cachep, GFP_KERNEL);
686 	if (c == NULL)
687 		return NULL;
688 	memset(c, 0, sizeof(*c));
689 	c->mfc_un.res.minvif = MAXMIFS;
690 	return c;
691 }
692 
693 static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
694 {
695 	struct mfc6_cache *c = kmem_cache_alloc(mrt_cachep, GFP_ATOMIC);
696 	if (c == NULL)
697 		return NULL;
698 	memset(c, 0, sizeof(*c));
699 	skb_queue_head_init(&c->mfc_un.unres.unresolved);
700 	c->mfc_un.unres.expires = jiffies + 10 * HZ;
701 	return c;
702 }
703 
704 /*
705  *	A cache entry has gone into a resolved state from queued
706  */
707 
708 static void ip6mr_cache_resolve(struct mfc6_cache *uc, struct mfc6_cache *c)
709 {
710 	struct sk_buff *skb;
711 
712 	/*
713 	 *	Play the pending entries through our router
714 	 */
715 
716 	while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
717 		if (ipv6_hdr(skb)->version == 0) {
718 			int err;
719 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
720 
721 			if (ip6mr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
722 				nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
723 			} else {
724 				nlh->nlmsg_type = NLMSG_ERROR;
725 				nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
726 				skb_trim(skb, nlh->nlmsg_len);
727 				((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE;
728 			}
729 			err = rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
730 		} else
731 			ip6_mr_forward(skb, c);
732 	}
733 }
734 
735 /*
736  *	Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
737  *	expects the following bizarre scheme.
738  *
739  *	Called under mrt_lock.
740  */
741 
742 static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert)
743 {
744 	struct sk_buff *skb;
745 	struct mrt6msg *msg;
746 	int ret;
747 
748 #ifdef CONFIG_IPV6_PIMSM_V2
749 	if (assert == MRT6MSG_WHOLEPKT)
750 		skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
751 						+sizeof(*msg));
752 	else
753 #endif
754 		skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
755 
756 	if (!skb)
757 		return -ENOBUFS;
758 
759 	/* I suppose that internal messages
760 	 * do not require checksums */
761 
762 	skb->ip_summed = CHECKSUM_UNNECESSARY;
763 
764 #ifdef CONFIG_IPV6_PIMSM_V2
765 	if (assert == MRT6MSG_WHOLEPKT) {
766 		/* Ugly, but we have no choice with this interface.
767 		   Duplicate old header, fix length etc.
768 		   And all this only to mangle msg->im6_msgtype and
769 		   to set msg->im6_mbz to "mbz" :-)
770 		 */
771 		skb_push(skb, -skb_network_offset(pkt));
772 
773 		skb_push(skb, sizeof(*msg));
774 		skb_reset_transport_header(skb);
775 		msg = (struct mrt6msg *)skb_transport_header(skb);
776 		msg->im6_mbz = 0;
777 		msg->im6_msgtype = MRT6MSG_WHOLEPKT;
778 		msg->im6_mif = reg_vif_num;
779 		msg->im6_pad = 0;
780 		ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
781 		ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
782 
783 		skb->ip_summed = CHECKSUM_UNNECESSARY;
784 	} else
785 #endif
786 	{
787 	/*
788 	 *	Copy the IP header
789 	 */
790 
791 	skb_put(skb, sizeof(struct ipv6hdr));
792 	skb_reset_network_header(skb);
793 	skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
794 
795 	/*
796 	 *	Add our header
797 	 */
798 	skb_put(skb, sizeof(*msg));
799 	skb_reset_transport_header(skb);
800 	msg = (struct mrt6msg *)skb_transport_header(skb);
801 
802 	msg->im6_mbz = 0;
803 	msg->im6_msgtype = assert;
804 	msg->im6_mif = mifi;
805 	msg->im6_pad = 0;
806 	ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
807 	ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
808 
809 	skb->dst = dst_clone(pkt->dst);
810 	skb->ip_summed = CHECKSUM_UNNECESSARY;
811 
812 	skb_pull(skb, sizeof(struct ipv6hdr));
813 	}
814 
815 	if (mroute6_socket == NULL) {
816 		kfree_skb(skb);
817 		return -EINVAL;
818 	}
819 
820 	/*
821 	 *	Deliver to user space multicast routing algorithms
822 	 */
823 	if ((ret = sock_queue_rcv_skb(mroute6_socket, skb)) < 0) {
824 		if (net_ratelimit())
825 			printk(KERN_WARNING "mroute6: pending queue full, dropping entries.\n");
826 		kfree_skb(skb);
827 	}
828 
829 	return ret;
830 }
831 
832 /*
833  *	Queue a packet for resolution. It gets locked cache entry!
834  */
835 
836 static int
837 ip6mr_cache_unresolved(mifi_t mifi, struct sk_buff *skb)
838 {
839 	int err;
840 	struct mfc6_cache *c;
841 
842 	spin_lock_bh(&mfc_unres_lock);
843 	for (c = mfc_unres_queue; c; c = c->next) {
844 		if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
845 		    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr))
846 			break;
847 	}
848 
849 	if (c == NULL) {
850 		/*
851 		 *	Create a new entry if allowable
852 		 */
853 
854 		if (atomic_read(&cache_resolve_queue_len) >= 10 ||
855 		    (c = ip6mr_cache_alloc_unres()) == NULL) {
856 			spin_unlock_bh(&mfc_unres_lock);
857 
858 			kfree_skb(skb);
859 			return -ENOBUFS;
860 		}
861 
862 		/*
863 		 *	Fill in the new cache entry
864 		 */
865 		c->mf6c_parent = -1;
866 		c->mf6c_origin = ipv6_hdr(skb)->saddr;
867 		c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
868 
869 		/*
870 		 *	Reflect first query at pim6sd
871 		 */
872 		if ((err = ip6mr_cache_report(skb, mifi, MRT6MSG_NOCACHE)) < 0) {
873 			/* If the report failed throw the cache entry
874 			   out - Brad Parker
875 			 */
876 			spin_unlock_bh(&mfc_unres_lock);
877 
878 			kmem_cache_free(mrt_cachep, c);
879 			kfree_skb(skb);
880 			return err;
881 		}
882 
883 		atomic_inc(&cache_resolve_queue_len);
884 		c->next = mfc_unres_queue;
885 		mfc_unres_queue = c;
886 
887 		ipmr_do_expire_process(1);
888 	}
889 
890 	/*
891 	 *	See if we can append the packet
892 	 */
893 	if (c->mfc_un.unres.unresolved.qlen > 3) {
894 		kfree_skb(skb);
895 		err = -ENOBUFS;
896 	} else {
897 		skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
898 		err = 0;
899 	}
900 
901 	spin_unlock_bh(&mfc_unres_lock);
902 	return err;
903 }
904 
905 /*
906  *	MFC6 cache manipulation by user space
907  */
908 
909 static int ip6mr_mfc_delete(struct mf6cctl *mfc)
910 {
911 	int line;
912 	struct mfc6_cache *c, **cp;
913 
914 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
915 
916 	for (cp = &mfc6_cache_array[line]; (c = *cp) != NULL; cp = &c->next) {
917 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
918 		    ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
919 			write_lock_bh(&mrt_lock);
920 			*cp = c->next;
921 			write_unlock_bh(&mrt_lock);
922 
923 			kmem_cache_free(mrt_cachep, c);
924 			return 0;
925 		}
926 	}
927 	return -ENOENT;
928 }
929 
930 static int ip6mr_device_event(struct notifier_block *this,
931 			      unsigned long event, void *ptr)
932 {
933 	struct net_device *dev = ptr;
934 	struct mif_device *v;
935 	int ct;
936 
937 	if (dev_net(dev) != &init_net)
938 		return NOTIFY_DONE;
939 
940 	if (event != NETDEV_UNREGISTER)
941 		return NOTIFY_DONE;
942 
943 	v = &vif6_table[0];
944 	for (ct = 0; ct < maxvif; ct++, v++) {
945 		if (v->dev == dev)
946 			mif6_delete(ct);
947 	}
948 	return NOTIFY_DONE;
949 }
950 
951 static struct notifier_block ip6_mr_notifier = {
952 	.notifier_call = ip6mr_device_event
953 };
954 
955 /*
956  *	Setup for IP multicast routing
957  */
958 
959 void __init ip6_mr_init(void)
960 {
961 	mrt_cachep = kmem_cache_create("ip6_mrt_cache",
962 				       sizeof(struct mfc6_cache),
963 				       0, SLAB_HWCACHE_ALIGN,
964 				       NULL);
965 	if (!mrt_cachep)
966 		panic("cannot allocate ip6_mrt_cache");
967 
968 	setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
969 	register_netdevice_notifier(&ip6_mr_notifier);
970 #ifdef CONFIG_PROC_FS
971 	proc_net_fops_create(&init_net, "ip6_mr_vif", 0, &ip6mr_vif_fops);
972 	proc_net_fops_create(&init_net, "ip6_mr_cache", 0, &ip6mr_mfc_fops);
973 #endif
974 }
975 
976 
977 static int ip6mr_mfc_add(struct mf6cctl *mfc, int mrtsock)
978 {
979 	int line;
980 	struct mfc6_cache *uc, *c, **cp;
981 	unsigned char ttls[MAXMIFS];
982 	int i;
983 
984 	memset(ttls, 255, MAXMIFS);
985 	for (i = 0; i < MAXMIFS; i++) {
986 		if (IF_ISSET(i, &mfc->mf6cc_ifset))
987 			ttls[i] = 1;
988 
989 	}
990 
991 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
992 
993 	for (cp = &mfc6_cache_array[line]; (c = *cp) != NULL; cp = &c->next) {
994 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
995 		    ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr))
996 			break;
997 	}
998 
999 	if (c != NULL) {
1000 		write_lock_bh(&mrt_lock);
1001 		c->mf6c_parent = mfc->mf6cc_parent;
1002 		ip6mr_update_thresholds(c, ttls);
1003 		if (!mrtsock)
1004 			c->mfc_flags |= MFC_STATIC;
1005 		write_unlock_bh(&mrt_lock);
1006 		return 0;
1007 	}
1008 
1009 	if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1010 		return -EINVAL;
1011 
1012 	c = ip6mr_cache_alloc();
1013 	if (c == NULL)
1014 		return -ENOMEM;
1015 
1016 	c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1017 	c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1018 	c->mf6c_parent = mfc->mf6cc_parent;
1019 	ip6mr_update_thresholds(c, ttls);
1020 	if (!mrtsock)
1021 		c->mfc_flags |= MFC_STATIC;
1022 
1023 	write_lock_bh(&mrt_lock);
1024 	c->next = mfc6_cache_array[line];
1025 	mfc6_cache_array[line] = c;
1026 	write_unlock_bh(&mrt_lock);
1027 
1028 	/*
1029 	 *	Check to see if we resolved a queued list. If so we
1030 	 *	need to send on the frames and tidy up.
1031 	 */
1032 	spin_lock_bh(&mfc_unres_lock);
1033 	for (cp = &mfc_unres_queue; (uc = *cp) != NULL;
1034 	     cp = &uc->next) {
1035 		if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1036 		    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1037 			*cp = uc->next;
1038 			if (atomic_dec_and_test(&cache_resolve_queue_len))
1039 				del_timer(&ipmr_expire_timer);
1040 			break;
1041 		}
1042 	}
1043 	spin_unlock_bh(&mfc_unres_lock);
1044 
1045 	if (uc) {
1046 		ip6mr_cache_resolve(uc, c);
1047 		kmem_cache_free(mrt_cachep, uc);
1048 	}
1049 	return 0;
1050 }
1051 
1052 /*
1053  *	Close the multicast socket, and clear the vif tables etc
1054  */
1055 
1056 static void mroute_clean_tables(struct sock *sk)
1057 {
1058 	int i;
1059 
1060 	/*
1061 	 *	Shut down all active vif entries
1062 	 */
1063 	for (i = 0; i < maxvif; i++) {
1064 		if (!(vif6_table[i].flags & VIFF_STATIC))
1065 			mif6_delete(i);
1066 	}
1067 
1068 	/*
1069 	 *	Wipe the cache
1070 	 */
1071 	for (i = 0; i < ARRAY_SIZE(mfc6_cache_array); i++) {
1072 		struct mfc6_cache *c, **cp;
1073 
1074 		cp = &mfc6_cache_array[i];
1075 		while ((c = *cp) != NULL) {
1076 			if (c->mfc_flags & MFC_STATIC) {
1077 				cp = &c->next;
1078 				continue;
1079 			}
1080 			write_lock_bh(&mrt_lock);
1081 			*cp = c->next;
1082 			write_unlock_bh(&mrt_lock);
1083 
1084 			kmem_cache_free(mrt_cachep, c);
1085 		}
1086 	}
1087 
1088 	if (atomic_read(&cache_resolve_queue_len) != 0) {
1089 		struct mfc6_cache *c;
1090 
1091 		spin_lock_bh(&mfc_unres_lock);
1092 		while (mfc_unres_queue != NULL) {
1093 			c = mfc_unres_queue;
1094 			mfc_unres_queue = c->next;
1095 			spin_unlock_bh(&mfc_unres_lock);
1096 
1097 			ip6mr_destroy_unres(c);
1098 
1099 			spin_lock_bh(&mfc_unres_lock);
1100 		}
1101 		spin_unlock_bh(&mfc_unres_lock);
1102 	}
1103 }
1104 
1105 static int ip6mr_sk_init(struct sock *sk)
1106 {
1107 	int err = 0;
1108 
1109 	rtnl_lock();
1110 	write_lock_bh(&mrt_lock);
1111 	if (likely(mroute6_socket == NULL))
1112 		mroute6_socket = sk;
1113 	else
1114 		err = -EADDRINUSE;
1115 	write_unlock_bh(&mrt_lock);
1116 
1117 	rtnl_unlock();
1118 
1119 	return err;
1120 }
1121 
1122 int ip6mr_sk_done(struct sock *sk)
1123 {
1124 	int err = 0;
1125 
1126 	rtnl_lock();
1127 	if (sk == mroute6_socket) {
1128 		write_lock_bh(&mrt_lock);
1129 		mroute6_socket = NULL;
1130 		write_unlock_bh(&mrt_lock);
1131 
1132 		mroute_clean_tables(sk);
1133 	} else
1134 		err = -EACCES;
1135 	rtnl_unlock();
1136 
1137 	return err;
1138 }
1139 
1140 /*
1141  *	Socket options and virtual interface manipulation. The whole
1142  *	virtual interface system is a complete heap, but unfortunately
1143  *	that's how BSD mrouted happens to think. Maybe one day with a proper
1144  *	MOSPF/PIM router set up we can clean this up.
1145  */
1146 
1147 int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int optlen)
1148 {
1149 	int ret;
1150 	struct mif6ctl vif;
1151 	struct mf6cctl mfc;
1152 	mifi_t mifi;
1153 
1154 	if (optname != MRT6_INIT) {
1155 		if (sk != mroute6_socket && !capable(CAP_NET_ADMIN))
1156 			return -EACCES;
1157 	}
1158 
1159 	switch (optname) {
1160 	case MRT6_INIT:
1161 		if (sk->sk_type != SOCK_RAW ||
1162 		    inet_sk(sk)->num != IPPROTO_ICMPV6)
1163 			return -EOPNOTSUPP;
1164 		if (optlen < sizeof(int))
1165 			return -EINVAL;
1166 
1167 		return ip6mr_sk_init(sk);
1168 
1169 	case MRT6_DONE:
1170 		return ip6mr_sk_done(sk);
1171 
1172 	case MRT6_ADD_MIF:
1173 		if (optlen < sizeof(vif))
1174 			return -EINVAL;
1175 		if (copy_from_user(&vif, optval, sizeof(vif)))
1176 			return -EFAULT;
1177 		if (vif.mif6c_mifi >= MAXMIFS)
1178 			return -ENFILE;
1179 		rtnl_lock();
1180 		ret = mif6_add(&vif, sk == mroute6_socket);
1181 		rtnl_unlock();
1182 		return ret;
1183 
1184 	case MRT6_DEL_MIF:
1185 		if (optlen < sizeof(mifi_t))
1186 			return -EINVAL;
1187 		if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1188 			return -EFAULT;
1189 		rtnl_lock();
1190 		ret = mif6_delete(mifi);
1191 		rtnl_unlock();
1192 		return ret;
1193 
1194 	/*
1195 	 *	Manipulate the forwarding caches. These live
1196 	 *	in a sort of kernel/user symbiosis.
1197 	 */
1198 	case MRT6_ADD_MFC:
1199 	case MRT6_DEL_MFC:
1200 		if (optlen < sizeof(mfc))
1201 			return -EINVAL;
1202 		if (copy_from_user(&mfc, optval, sizeof(mfc)))
1203 			return -EFAULT;
1204 		rtnl_lock();
1205 		if (optname == MRT6_DEL_MFC)
1206 			ret = ip6mr_mfc_delete(&mfc);
1207 		else
1208 			ret = ip6mr_mfc_add(&mfc, sk == mroute6_socket);
1209 		rtnl_unlock();
1210 		return ret;
1211 
1212 	/*
1213 	 *	Control PIM assert (to activate pim will activate assert)
1214 	 */
1215 	case MRT6_ASSERT:
1216 	{
1217 		int v;
1218 		if (get_user(v, (int __user *)optval))
1219 			return -EFAULT;
1220 		mroute_do_assert = !!v;
1221 		return 0;
1222 	}
1223 
1224 #ifdef CONFIG_IPV6_PIMSM_V2
1225 	case MRT6_PIM:
1226 	{
1227 		int v;
1228 		if (get_user(v, (int __user *)optval))
1229 			return -EFAULT;
1230 		v = !!v;
1231 		rtnl_lock();
1232 		ret = 0;
1233 		if (v != mroute_do_pim) {
1234 			mroute_do_pim = v;
1235 			mroute_do_assert = v;
1236 			if (mroute_do_pim)
1237 				ret = inet6_add_protocol(&pim6_protocol,
1238 							 IPPROTO_PIM);
1239 			else
1240 				ret = inet6_del_protocol(&pim6_protocol,
1241 							 IPPROTO_PIM);
1242 			if (ret < 0)
1243 				ret = -EAGAIN;
1244 		}
1245 		rtnl_unlock();
1246 		return ret;
1247 	}
1248 
1249 #endif
1250 	/*
1251 	 *	Spurious command, or MRT_VERSION which you cannot
1252 	 *	set.
1253 	 */
1254 	default:
1255 		return -ENOPROTOOPT;
1256 	}
1257 }
1258 
1259 /*
1260  *	Getsock opt support for the multicast routing system.
1261  */
1262 
1263 int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1264 			  int __user *optlen)
1265 {
1266 	int olr;
1267 	int val;
1268 
1269 	switch (optname) {
1270 	case MRT6_VERSION:
1271 		val = 0x0305;
1272 		break;
1273 #ifdef CONFIG_IPV6_PIMSM_V2
1274 	case MRT6_PIM:
1275 		val = mroute_do_pim;
1276 		break;
1277 #endif
1278 	case MRT6_ASSERT:
1279 		val = mroute_do_assert;
1280 		break;
1281 	default:
1282 		return -ENOPROTOOPT;
1283 	}
1284 
1285 	if (get_user(olr, optlen))
1286 		return -EFAULT;
1287 
1288 	olr = min_t(int, olr, sizeof(int));
1289 	if (olr < 0)
1290 		return -EINVAL;
1291 
1292 	if (put_user(olr, optlen))
1293 		return -EFAULT;
1294 	if (copy_to_user(optval, &val, olr))
1295 		return -EFAULT;
1296 	return 0;
1297 }
1298 
1299 /*
1300  *	The IP multicast ioctl support routines.
1301  */
1302 
1303 int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1304 {
1305 	struct sioc_sg_req6 sr;
1306 	struct sioc_mif_req6 vr;
1307 	struct mif_device *vif;
1308 	struct mfc6_cache *c;
1309 
1310 	switch (cmd) {
1311 	case SIOCGETMIFCNT_IN6:
1312 		if (copy_from_user(&vr, arg, sizeof(vr)))
1313 			return -EFAULT;
1314 		if (vr.mifi >= maxvif)
1315 			return -EINVAL;
1316 		read_lock(&mrt_lock);
1317 		vif = &vif6_table[vr.mifi];
1318 		if (MIF_EXISTS(vr.mifi)) {
1319 			vr.icount = vif->pkt_in;
1320 			vr.ocount = vif->pkt_out;
1321 			vr.ibytes = vif->bytes_in;
1322 			vr.obytes = vif->bytes_out;
1323 			read_unlock(&mrt_lock);
1324 
1325 			if (copy_to_user(arg, &vr, sizeof(vr)))
1326 				return -EFAULT;
1327 			return 0;
1328 		}
1329 		read_unlock(&mrt_lock);
1330 		return -EADDRNOTAVAIL;
1331 	case SIOCGETSGCNT_IN6:
1332 		if (copy_from_user(&sr, arg, sizeof(sr)))
1333 			return -EFAULT;
1334 
1335 		read_lock(&mrt_lock);
1336 		c = ip6mr_cache_find(&sr.src.sin6_addr, &sr.grp.sin6_addr);
1337 		if (c) {
1338 			sr.pktcnt = c->mfc_un.res.pkt;
1339 			sr.bytecnt = c->mfc_un.res.bytes;
1340 			sr.wrong_if = c->mfc_un.res.wrong_if;
1341 			read_unlock(&mrt_lock);
1342 
1343 			if (copy_to_user(arg, &sr, sizeof(sr)))
1344 				return -EFAULT;
1345 			return 0;
1346 		}
1347 		read_unlock(&mrt_lock);
1348 		return -EADDRNOTAVAIL;
1349 	default:
1350 		return -ENOIOCTLCMD;
1351 	}
1352 }
1353 
1354 
1355 static inline int ip6mr_forward2_finish(struct sk_buff *skb)
1356 {
1357 	IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
1358 	return dst_output(skb);
1359 }
1360 
1361 /*
1362  *	Processing handlers for ip6mr_forward
1363  */
1364 
1365 static int ip6mr_forward2(struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1366 {
1367 	struct ipv6hdr *ipv6h;
1368 	struct mif_device *vif = &vif6_table[vifi];
1369 	struct net_device *dev;
1370 	struct dst_entry *dst;
1371 	struct flowi fl;
1372 
1373 	if (vif->dev == NULL)
1374 		goto out_free;
1375 
1376 #ifdef CONFIG_IPV6_PIMSM_V2
1377 	if (vif->flags & MIFF_REGISTER) {
1378 		vif->pkt_out++;
1379 		vif->bytes_out += skb->len;
1380 		((struct net_device_stats *)netdev_priv(vif->dev))->tx_bytes += skb->len;
1381 		((struct net_device_stats *)netdev_priv(vif->dev))->tx_packets++;
1382 		ip6mr_cache_report(skb, vifi, MRT6MSG_WHOLEPKT);
1383 		kfree_skb(skb);
1384 		return 0;
1385 	}
1386 #endif
1387 
1388 	ipv6h = ipv6_hdr(skb);
1389 
1390 	fl = (struct flowi) {
1391 		.oif = vif->link,
1392 		.nl_u = { .ip6_u =
1393 				{ .daddr = ipv6h->daddr, }
1394 		}
1395 	};
1396 
1397 	dst = ip6_route_output(&init_net, NULL, &fl);
1398 	if (!dst)
1399 		goto out_free;
1400 
1401 	dst_release(skb->dst);
1402 	skb->dst = dst;
1403 
1404 	/*
1405 	 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1406 	 * not only before forwarding, but after forwarding on all output
1407 	 * interfaces. It is clear, if mrouter runs a multicasting
1408 	 * program, it should receive packets not depending to what interface
1409 	 * program is joined.
1410 	 * If we will not make it, the program will have to join on all
1411 	 * interfaces. On the other hand, multihoming host (or router, but
1412 	 * not mrouter) cannot join to more than one interface - it will
1413 	 * result in receiving multiple packets.
1414 	 */
1415 	dev = vif->dev;
1416 	skb->dev = dev;
1417 	vif->pkt_out++;
1418 	vif->bytes_out += skb->len;
1419 
1420 	/* We are about to write */
1421 	/* XXX: extension headers? */
1422 	if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
1423 		goto out_free;
1424 
1425 	ipv6h = ipv6_hdr(skb);
1426 	ipv6h->hop_limit--;
1427 
1428 	IP6CB(skb)->flags |= IP6SKB_FORWARDED;
1429 
1430 	return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dev,
1431 		       ip6mr_forward2_finish);
1432 
1433 out_free:
1434 	kfree_skb(skb);
1435 	return 0;
1436 }
1437 
1438 static int ip6mr_find_vif(struct net_device *dev)
1439 {
1440 	int ct;
1441 	for (ct = maxvif - 1; ct >= 0; ct--) {
1442 		if (vif6_table[ct].dev == dev)
1443 			break;
1444 	}
1445 	return ct;
1446 }
1447 
1448 static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache)
1449 {
1450 	int psend = -1;
1451 	int vif, ct;
1452 
1453 	vif = cache->mf6c_parent;
1454 	cache->mfc_un.res.pkt++;
1455 	cache->mfc_un.res.bytes += skb->len;
1456 
1457 	/*
1458 	 * Wrong interface: drop packet and (maybe) send PIM assert.
1459 	 */
1460 	if (vif6_table[vif].dev != skb->dev) {
1461 		int true_vifi;
1462 
1463 		cache->mfc_un.res.wrong_if++;
1464 		true_vifi = ip6mr_find_vif(skb->dev);
1465 
1466 		if (true_vifi >= 0 && mroute_do_assert &&
1467 		    /* pimsm uses asserts, when switching from RPT to SPT,
1468 		       so that we cannot check that packet arrived on an oif.
1469 		       It is bad, but otherwise we would need to move pretty
1470 		       large chunk of pimd to kernel. Ough... --ANK
1471 		     */
1472 		    (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) &&
1473 		    time_after(jiffies,
1474 			       cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1475 			cache->mfc_un.res.last_assert = jiffies;
1476 			ip6mr_cache_report(skb, true_vifi, MRT6MSG_WRONGMIF);
1477 		}
1478 		goto dont_forward;
1479 	}
1480 
1481 	vif6_table[vif].pkt_in++;
1482 	vif6_table[vif].bytes_in += skb->len;
1483 
1484 	/*
1485 	 *	Forward the frame
1486 	 */
1487 	for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
1488 		if (ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
1489 			if (psend != -1) {
1490 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1491 				if (skb2)
1492 					ip6mr_forward2(skb2, cache, psend);
1493 			}
1494 			psend = ct;
1495 		}
1496 	}
1497 	if (psend != -1) {
1498 		ip6mr_forward2(skb, cache, psend);
1499 		return 0;
1500 	}
1501 
1502 dont_forward:
1503 	kfree_skb(skb);
1504 	return 0;
1505 }
1506 
1507 
1508 /*
1509  *	Multicast packets for forwarding arrive here
1510  */
1511 
1512 int ip6_mr_input(struct sk_buff *skb)
1513 {
1514 	struct mfc6_cache *cache;
1515 
1516 	read_lock(&mrt_lock);
1517 	cache = ip6mr_cache_find(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
1518 
1519 	/*
1520 	 *	No usable cache entry
1521 	 */
1522 	if (cache == NULL) {
1523 		int vif;
1524 
1525 		vif = ip6mr_find_vif(skb->dev);
1526 		if (vif >= 0) {
1527 			int err = ip6mr_cache_unresolved(vif, skb);
1528 			read_unlock(&mrt_lock);
1529 
1530 			return err;
1531 		}
1532 		read_unlock(&mrt_lock);
1533 		kfree_skb(skb);
1534 		return -ENODEV;
1535 	}
1536 
1537 	ip6_mr_forward(skb, cache);
1538 
1539 	read_unlock(&mrt_lock);
1540 
1541 	return 0;
1542 }
1543 
1544 
1545 static int
1546 ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm)
1547 {
1548 	int ct;
1549 	struct rtnexthop *nhp;
1550 	struct net_device *dev = vif6_table[c->mf6c_parent].dev;
1551 	u8 *b = skb_tail_pointer(skb);
1552 	struct rtattr *mp_head;
1553 
1554 	if (dev)
1555 		RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1556 
1557 	mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1558 
1559 	for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1560 		if (c->mfc_un.res.ttls[ct] < 255) {
1561 			if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1562 				goto rtattr_failure;
1563 			nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1564 			nhp->rtnh_flags = 0;
1565 			nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1566 			nhp->rtnh_ifindex = vif6_table[ct].dev->ifindex;
1567 			nhp->rtnh_len = sizeof(*nhp);
1568 		}
1569 	}
1570 	mp_head->rta_type = RTA_MULTIPATH;
1571 	mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1572 	rtm->rtm_type = RTN_MULTICAST;
1573 	return 1;
1574 
1575 rtattr_failure:
1576 	nlmsg_trim(skb, b);
1577 	return -EMSGSIZE;
1578 }
1579 
1580 int ip6mr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1581 {
1582 	int err;
1583 	struct mfc6_cache *cache;
1584 	struct rt6_info *rt = (struct rt6_info *)skb->dst;
1585 
1586 	read_lock(&mrt_lock);
1587 	cache = ip6mr_cache_find(&rt->rt6i_src.addr, &rt->rt6i_dst.addr);
1588 
1589 	if (!cache) {
1590 		struct sk_buff *skb2;
1591 		struct ipv6hdr *iph;
1592 		struct net_device *dev;
1593 		int vif;
1594 
1595 		if (nowait) {
1596 			read_unlock(&mrt_lock);
1597 			return -EAGAIN;
1598 		}
1599 
1600 		dev = skb->dev;
1601 		if (dev == NULL || (vif = ip6mr_find_vif(dev)) < 0) {
1602 			read_unlock(&mrt_lock);
1603 			return -ENODEV;
1604 		}
1605 
1606 		/* really correct? */
1607 		skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
1608 		if (!skb2) {
1609 			read_unlock(&mrt_lock);
1610 			return -ENOMEM;
1611 		}
1612 
1613 		skb_reset_transport_header(skb2);
1614 
1615 		skb_put(skb2, sizeof(struct ipv6hdr));
1616 		skb_reset_network_header(skb2);
1617 
1618 		iph = ipv6_hdr(skb2);
1619 		iph->version = 0;
1620 		iph->priority = 0;
1621 		iph->flow_lbl[0] = 0;
1622 		iph->flow_lbl[1] = 0;
1623 		iph->flow_lbl[2] = 0;
1624 		iph->payload_len = 0;
1625 		iph->nexthdr = IPPROTO_NONE;
1626 		iph->hop_limit = 0;
1627 		ipv6_addr_copy(&iph->saddr, &rt->rt6i_src.addr);
1628 		ipv6_addr_copy(&iph->daddr, &rt->rt6i_dst.addr);
1629 
1630 		err = ip6mr_cache_unresolved(vif, skb2);
1631 		read_unlock(&mrt_lock);
1632 
1633 		return err;
1634 	}
1635 
1636 	if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1637 		cache->mfc_flags |= MFC_NOTIFY;
1638 
1639 	err = ip6mr_fill_mroute(skb, cache, rtm);
1640 	read_unlock(&mrt_lock);
1641 	return err;
1642 }
1643 
1644