xref: /openbmc/linux/net/ipv6/ip6mr.c (revision 5d4a2e29)
1 /*
2  *	Linux IPv6 multicast routing support for BSD pim6sd
3  *	Based on net/ipv4/ipmr.c.
4  *
5  *	(c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
6  *		LSIIT Laboratory, Strasbourg, France
7  *	(c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
8  *		6WIND, Paris, France
9  *	Copyright (C)2007,2008 USAGI/WIDE Project
10  *		YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
11  *
12  *	This program is free software; you can redistribute it and/or
13  *	modify it under the terms of the GNU General Public License
14  *	as published by the Free Software Foundation; either version
15  *	2 of the License, or (at your option) any later version.
16  *
17  */
18 
19 #include <asm/system.h>
20 #include <asm/uaccess.h>
21 #include <linux/types.h>
22 #include <linux/sched.h>
23 #include <linux/errno.h>
24 #include <linux/timer.h>
25 #include <linux/mm.h>
26 #include <linux/kernel.h>
27 #include <linux/fcntl.h>
28 #include <linux/stat.h>
29 #include <linux/socket.h>
30 #include <linux/inet.h>
31 #include <linux/netdevice.h>
32 #include <linux/inetdevice.h>
33 #include <linux/proc_fs.h>
34 #include <linux/seq_file.h>
35 #include <linux/init.h>
36 #include <linux/slab.h>
37 #include <net/protocol.h>
38 #include <linux/skbuff.h>
39 #include <net/sock.h>
40 #include <net/raw.h>
41 #include <linux/notifier.h>
42 #include <linux/if_arp.h>
43 #include <net/checksum.h>
44 #include <net/netlink.h>
45 #include <net/fib_rules.h>
46 
47 #include <net/ipv6.h>
48 #include <net/ip6_route.h>
49 #include <linux/mroute6.h>
50 #include <linux/pim.h>
51 #include <net/addrconf.h>
52 #include <linux/netfilter_ipv6.h>
53 #include <net/ip6_checksum.h>
54 
55 struct mr6_table {
56 	struct list_head	list;
57 #ifdef CONFIG_NET_NS
58 	struct net		*net;
59 #endif
60 	u32			id;
61 	struct sock		*mroute6_sk;
62 	struct timer_list	ipmr_expire_timer;
63 	struct list_head	mfc6_unres_queue;
64 	struct list_head	mfc6_cache_array[MFC6_LINES];
65 	struct mif_device	vif6_table[MAXMIFS];
66 	int			maxvif;
67 	atomic_t		cache_resolve_queue_len;
68 	int			mroute_do_assert;
69 	int			mroute_do_pim;
70 #ifdef CONFIG_IPV6_PIMSM_V2
71 	int			mroute_reg_vif_num;
72 #endif
73 };
74 
75 struct ip6mr_rule {
76 	struct fib_rule		common;
77 };
78 
79 struct ip6mr_result {
80 	struct mr6_table	*mrt;
81 };
82 
83 /* Big lock, protecting vif table, mrt cache and mroute socket state.
84    Note that the changes are semaphored via rtnl_lock.
85  */
86 
87 static DEFINE_RWLOCK(mrt_lock);
88 
89 /*
90  *	Multicast router control variables
91  */
92 
93 #define MIF_EXISTS(_mrt, _idx) ((_mrt)->vif6_table[_idx].dev != NULL)
94 
95 /* Special spinlock for queue of unresolved entries */
96 static DEFINE_SPINLOCK(mfc_unres_lock);
97 
98 /* We return to original Alan's scheme. Hash table of resolved
99    entries is changed only in process context and protected
100    with weak lock mrt_lock. Queue of unresolved entries is protected
101    with strong spinlock mfc_unres_lock.
102 
103    In this case data path is free of exclusive locks at all.
104  */
105 
106 static struct kmem_cache *mrt_cachep __read_mostly;
107 
108 static struct mr6_table *ip6mr_new_table(struct net *net, u32 id);
109 static void ip6mr_free_table(struct mr6_table *mrt);
110 
111 static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
112 			  struct sk_buff *skb, struct mfc6_cache *cache);
113 static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
114 			      mifi_t mifi, int assert);
115 static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
116 			       struct mfc6_cache *c, struct rtmsg *rtm);
117 static int ip6mr_rtm_dumproute(struct sk_buff *skb,
118 			       struct netlink_callback *cb);
119 static void mroute_clean_tables(struct mr6_table *mrt);
120 static void ipmr_expire_process(unsigned long arg);
121 
122 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
123 #define ip6mr_for_each_table(mrt, net) \
124 	list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
125 
126 static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
127 {
128 	struct mr6_table *mrt;
129 
130 	ip6mr_for_each_table(mrt, net) {
131 		if (mrt->id == id)
132 			return mrt;
133 	}
134 	return NULL;
135 }
136 
137 static int ip6mr_fib_lookup(struct net *net, struct flowi *flp,
138 			    struct mr6_table **mrt)
139 {
140 	struct ip6mr_result res;
141 	struct fib_lookup_arg arg = { .result = &res, };
142 	int err;
143 
144 	err = fib_rules_lookup(net->ipv6.mr6_rules_ops, flp, 0, &arg);
145 	if (err < 0)
146 		return err;
147 	*mrt = res.mrt;
148 	return 0;
149 }
150 
151 static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
152 			     int flags, struct fib_lookup_arg *arg)
153 {
154 	struct ip6mr_result *res = arg->result;
155 	struct mr6_table *mrt;
156 
157 	switch (rule->action) {
158 	case FR_ACT_TO_TBL:
159 		break;
160 	case FR_ACT_UNREACHABLE:
161 		return -ENETUNREACH;
162 	case FR_ACT_PROHIBIT:
163 		return -EACCES;
164 	case FR_ACT_BLACKHOLE:
165 	default:
166 		return -EINVAL;
167 	}
168 
169 	mrt = ip6mr_get_table(rule->fr_net, rule->table);
170 	if (mrt == NULL)
171 		return -EAGAIN;
172 	res->mrt = mrt;
173 	return 0;
174 }
175 
176 static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
177 {
178 	return 1;
179 }
180 
181 static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = {
182 	FRA_GENERIC_POLICY,
183 };
184 
185 static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
186 				struct fib_rule_hdr *frh, struct nlattr **tb)
187 {
188 	return 0;
189 }
190 
191 static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
192 			      struct nlattr **tb)
193 {
194 	return 1;
195 }
196 
197 static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
198 			   struct fib_rule_hdr *frh)
199 {
200 	frh->dst_len = 0;
201 	frh->src_len = 0;
202 	frh->tos     = 0;
203 	return 0;
204 }
205 
206 static const struct fib_rules_ops __net_initdata ip6mr_rules_ops_template = {
207 	.family		= RTNL_FAMILY_IP6MR,
208 	.rule_size	= sizeof(struct ip6mr_rule),
209 	.addr_size	= sizeof(struct in6_addr),
210 	.action		= ip6mr_rule_action,
211 	.match		= ip6mr_rule_match,
212 	.configure	= ip6mr_rule_configure,
213 	.compare	= ip6mr_rule_compare,
214 	.default_pref	= fib_default_rule_pref,
215 	.fill		= ip6mr_rule_fill,
216 	.nlgroup	= RTNLGRP_IPV6_RULE,
217 	.policy		= ip6mr_rule_policy,
218 	.owner		= THIS_MODULE,
219 };
220 
221 static int __net_init ip6mr_rules_init(struct net *net)
222 {
223 	struct fib_rules_ops *ops;
224 	struct mr6_table *mrt;
225 	int err;
226 
227 	ops = fib_rules_register(&ip6mr_rules_ops_template, net);
228 	if (IS_ERR(ops))
229 		return PTR_ERR(ops);
230 
231 	INIT_LIST_HEAD(&net->ipv6.mr6_tables);
232 
233 	mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
234 	if (mrt == NULL) {
235 		err = -ENOMEM;
236 		goto err1;
237 	}
238 
239 	err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
240 	if (err < 0)
241 		goto err2;
242 
243 	net->ipv6.mr6_rules_ops = ops;
244 	return 0;
245 
246 err2:
247 	kfree(mrt);
248 err1:
249 	fib_rules_unregister(ops);
250 	return err;
251 }
252 
253 static void __net_exit ip6mr_rules_exit(struct net *net)
254 {
255 	struct mr6_table *mrt, *next;
256 
257 	list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
258 		list_del(&mrt->list);
259 		ip6mr_free_table(mrt);
260 	}
261 	fib_rules_unregister(net->ipv6.mr6_rules_ops);
262 }
263 #else
264 #define ip6mr_for_each_table(mrt, net) \
265 	for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
266 
267 static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
268 {
269 	return net->ipv6.mrt6;
270 }
271 
272 static int ip6mr_fib_lookup(struct net *net, struct flowi *flp,
273 			    struct mr6_table **mrt)
274 {
275 	*mrt = net->ipv6.mrt6;
276 	return 0;
277 }
278 
279 static int __net_init ip6mr_rules_init(struct net *net)
280 {
281 	net->ipv6.mrt6 = ip6mr_new_table(net, RT6_TABLE_DFLT);
282 	return net->ipv6.mrt6 ? 0 : -ENOMEM;
283 }
284 
285 static void __net_exit ip6mr_rules_exit(struct net *net)
286 {
287 	ip6mr_free_table(net->ipv6.mrt6);
288 }
289 #endif
290 
291 static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
292 {
293 	struct mr6_table *mrt;
294 	unsigned int i;
295 
296 	mrt = ip6mr_get_table(net, id);
297 	if (mrt != NULL)
298 		return mrt;
299 
300 	mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
301 	if (mrt == NULL)
302 		return NULL;
303 	mrt->id = id;
304 	write_pnet(&mrt->net, net);
305 
306 	/* Forwarding cache */
307 	for (i = 0; i < MFC6_LINES; i++)
308 		INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]);
309 
310 	INIT_LIST_HEAD(&mrt->mfc6_unres_queue);
311 
312 	setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
313 		    (unsigned long)mrt);
314 
315 #ifdef CONFIG_IPV6_PIMSM_V2
316 	mrt->mroute_reg_vif_num = -1;
317 #endif
318 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
319 	list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
320 #endif
321 	return mrt;
322 }
323 
324 static void ip6mr_free_table(struct mr6_table *mrt)
325 {
326 	del_timer(&mrt->ipmr_expire_timer);
327 	mroute_clean_tables(mrt);
328 	kfree(mrt);
329 }
330 
331 #ifdef CONFIG_PROC_FS
332 
333 struct ipmr_mfc_iter {
334 	struct seq_net_private p;
335 	struct mr6_table *mrt;
336 	struct list_head *cache;
337 	int ct;
338 };
339 
340 
341 static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
342 					   struct ipmr_mfc_iter *it, loff_t pos)
343 {
344 	struct mr6_table *mrt = it->mrt;
345 	struct mfc6_cache *mfc;
346 
347 	read_lock(&mrt_lock);
348 	for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) {
349 		it->cache = &mrt->mfc6_cache_array[it->ct];
350 		list_for_each_entry(mfc, it->cache, list)
351 			if (pos-- == 0)
352 				return mfc;
353 	}
354 	read_unlock(&mrt_lock);
355 
356 	spin_lock_bh(&mfc_unres_lock);
357 	it->cache = &mrt->mfc6_unres_queue;
358 	list_for_each_entry(mfc, it->cache, list)
359 		if (pos-- == 0)
360 			return mfc;
361 	spin_unlock_bh(&mfc_unres_lock);
362 
363 	it->cache = NULL;
364 	return NULL;
365 }
366 
367 /*
368  *	The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
369  */
370 
371 struct ipmr_vif_iter {
372 	struct seq_net_private p;
373 	struct mr6_table *mrt;
374 	int ct;
375 };
376 
377 static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
378 					    struct ipmr_vif_iter *iter,
379 					    loff_t pos)
380 {
381 	struct mr6_table *mrt = iter->mrt;
382 
383 	for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
384 		if (!MIF_EXISTS(mrt, iter->ct))
385 			continue;
386 		if (pos-- == 0)
387 			return &mrt->vif6_table[iter->ct];
388 	}
389 	return NULL;
390 }
391 
392 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
393 	__acquires(mrt_lock)
394 {
395 	struct ipmr_vif_iter *iter = seq->private;
396 	struct net *net = seq_file_net(seq);
397 	struct mr6_table *mrt;
398 
399 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
400 	if (mrt == NULL)
401 		return ERR_PTR(-ENOENT);
402 
403 	iter->mrt = mrt;
404 
405 	read_lock(&mrt_lock);
406 	return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1)
407 		: SEQ_START_TOKEN;
408 }
409 
410 static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
411 {
412 	struct ipmr_vif_iter *iter = seq->private;
413 	struct net *net = seq_file_net(seq);
414 	struct mr6_table *mrt = iter->mrt;
415 
416 	++*pos;
417 	if (v == SEQ_START_TOKEN)
418 		return ip6mr_vif_seq_idx(net, iter, 0);
419 
420 	while (++iter->ct < mrt->maxvif) {
421 		if (!MIF_EXISTS(mrt, iter->ct))
422 			continue;
423 		return &mrt->vif6_table[iter->ct];
424 	}
425 	return NULL;
426 }
427 
428 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
429 	__releases(mrt_lock)
430 {
431 	read_unlock(&mrt_lock);
432 }
433 
434 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
435 {
436 	struct ipmr_vif_iter *iter = seq->private;
437 	struct mr6_table *mrt = iter->mrt;
438 
439 	if (v == SEQ_START_TOKEN) {
440 		seq_puts(seq,
441 			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
442 	} else {
443 		const struct mif_device *vif = v;
444 		const char *name = vif->dev ? vif->dev->name : "none";
445 
446 		seq_printf(seq,
447 			   "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
448 			   vif - mrt->vif6_table,
449 			   name, vif->bytes_in, vif->pkt_in,
450 			   vif->bytes_out, vif->pkt_out,
451 			   vif->flags);
452 	}
453 	return 0;
454 }
455 
456 static const struct seq_operations ip6mr_vif_seq_ops = {
457 	.start = ip6mr_vif_seq_start,
458 	.next  = ip6mr_vif_seq_next,
459 	.stop  = ip6mr_vif_seq_stop,
460 	.show  = ip6mr_vif_seq_show,
461 };
462 
463 static int ip6mr_vif_open(struct inode *inode, struct file *file)
464 {
465 	return seq_open_net(inode, file, &ip6mr_vif_seq_ops,
466 			    sizeof(struct ipmr_vif_iter));
467 }
468 
469 static const struct file_operations ip6mr_vif_fops = {
470 	.owner	 = THIS_MODULE,
471 	.open    = ip6mr_vif_open,
472 	.read    = seq_read,
473 	.llseek  = seq_lseek,
474 	.release = seq_release_net,
475 };
476 
477 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
478 {
479 	struct ipmr_mfc_iter *it = seq->private;
480 	struct net *net = seq_file_net(seq);
481 	struct mr6_table *mrt;
482 
483 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
484 	if (mrt == NULL)
485 		return ERR_PTR(-ENOENT);
486 
487 	it->mrt = mrt;
488 	return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
489 		: SEQ_START_TOKEN;
490 }
491 
492 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
493 {
494 	struct mfc6_cache *mfc = v;
495 	struct ipmr_mfc_iter *it = seq->private;
496 	struct net *net = seq_file_net(seq);
497 	struct mr6_table *mrt = it->mrt;
498 
499 	++*pos;
500 
501 	if (v == SEQ_START_TOKEN)
502 		return ipmr_mfc_seq_idx(net, seq->private, 0);
503 
504 	if (mfc->list.next != it->cache)
505 		return list_entry(mfc->list.next, struct mfc6_cache, list);
506 
507 	if (it->cache == &mrt->mfc6_unres_queue)
508 		goto end_of_list;
509 
510 	BUG_ON(it->cache != &mrt->mfc6_cache_array[it->ct]);
511 
512 	while (++it->ct < MFC6_LINES) {
513 		it->cache = &mrt->mfc6_cache_array[it->ct];
514 		if (list_empty(it->cache))
515 			continue;
516 		return list_first_entry(it->cache, struct mfc6_cache, list);
517 	}
518 
519 	/* exhausted cache_array, show unresolved */
520 	read_unlock(&mrt_lock);
521 	it->cache = &mrt->mfc6_unres_queue;
522 	it->ct = 0;
523 
524 	spin_lock_bh(&mfc_unres_lock);
525 	if (!list_empty(it->cache))
526 		return list_first_entry(it->cache, struct mfc6_cache, list);
527 
528  end_of_list:
529 	spin_unlock_bh(&mfc_unres_lock);
530 	it->cache = NULL;
531 
532 	return NULL;
533 }
534 
535 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
536 {
537 	struct ipmr_mfc_iter *it = seq->private;
538 	struct mr6_table *mrt = it->mrt;
539 
540 	if (it->cache == &mrt->mfc6_unres_queue)
541 		spin_unlock_bh(&mfc_unres_lock);
542 	else if (it->cache == mrt->mfc6_cache_array)
543 		read_unlock(&mrt_lock);
544 }
545 
546 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
547 {
548 	int n;
549 
550 	if (v == SEQ_START_TOKEN) {
551 		seq_puts(seq,
552 			 "Group                            "
553 			 "Origin                           "
554 			 "Iif      Pkts  Bytes     Wrong  Oifs\n");
555 	} else {
556 		const struct mfc6_cache *mfc = v;
557 		const struct ipmr_mfc_iter *it = seq->private;
558 		struct mr6_table *mrt = it->mrt;
559 
560 		seq_printf(seq, "%pI6 %pI6 %-3hd",
561 			   &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
562 			   mfc->mf6c_parent);
563 
564 		if (it->cache != &mrt->mfc6_unres_queue) {
565 			seq_printf(seq, " %8lu %8lu %8lu",
566 				   mfc->mfc_un.res.pkt,
567 				   mfc->mfc_un.res.bytes,
568 				   mfc->mfc_un.res.wrong_if);
569 			for (n = mfc->mfc_un.res.minvif;
570 			     n < mfc->mfc_un.res.maxvif; n++) {
571 				if (MIF_EXISTS(mrt, n) &&
572 				    mfc->mfc_un.res.ttls[n] < 255)
573 					seq_printf(seq,
574 						   " %2d:%-3d",
575 						   n, mfc->mfc_un.res.ttls[n]);
576 			}
577 		} else {
578 			/* unresolved mfc_caches don't contain
579 			 * pkt, bytes and wrong_if values
580 			 */
581 			seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
582 		}
583 		seq_putc(seq, '\n');
584 	}
585 	return 0;
586 }
587 
588 static const struct seq_operations ipmr_mfc_seq_ops = {
589 	.start = ipmr_mfc_seq_start,
590 	.next  = ipmr_mfc_seq_next,
591 	.stop  = ipmr_mfc_seq_stop,
592 	.show  = ipmr_mfc_seq_show,
593 };
594 
595 static int ipmr_mfc_open(struct inode *inode, struct file *file)
596 {
597 	return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
598 			    sizeof(struct ipmr_mfc_iter));
599 }
600 
601 static const struct file_operations ip6mr_mfc_fops = {
602 	.owner	 = THIS_MODULE,
603 	.open    = ipmr_mfc_open,
604 	.read    = seq_read,
605 	.llseek  = seq_lseek,
606 	.release = seq_release_net,
607 };
608 #endif
609 
610 #ifdef CONFIG_IPV6_PIMSM_V2
611 
612 static int pim6_rcv(struct sk_buff *skb)
613 {
614 	struct pimreghdr *pim;
615 	struct ipv6hdr   *encap;
616 	struct net_device  *reg_dev = NULL;
617 	struct net *net = dev_net(skb->dev);
618 	struct mr6_table *mrt;
619 	struct flowi fl = {
620 		.iif	= skb->dev->ifindex,
621 		.mark	= skb->mark,
622 	};
623 	int reg_vif_num;
624 
625 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
626 		goto drop;
627 
628 	pim = (struct pimreghdr *)skb_transport_header(skb);
629 	if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
630 	    (pim->flags & PIM_NULL_REGISTER) ||
631 	    (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
632 			     sizeof(*pim), IPPROTO_PIM,
633 			     csum_partial((void *)pim, sizeof(*pim), 0)) &&
634 	     csum_fold(skb_checksum(skb, 0, skb->len, 0))))
635 		goto drop;
636 
637 	/* check if the inner packet is destined to mcast group */
638 	encap = (struct ipv6hdr *)(skb_transport_header(skb) +
639 				   sizeof(*pim));
640 
641 	if (!ipv6_addr_is_multicast(&encap->daddr) ||
642 	    encap->payload_len == 0 ||
643 	    ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
644 		goto drop;
645 
646 	if (ip6mr_fib_lookup(net, &fl, &mrt) < 0)
647 		goto drop;
648 	reg_vif_num = mrt->mroute_reg_vif_num;
649 
650 	read_lock(&mrt_lock);
651 	if (reg_vif_num >= 0)
652 		reg_dev = mrt->vif6_table[reg_vif_num].dev;
653 	if (reg_dev)
654 		dev_hold(reg_dev);
655 	read_unlock(&mrt_lock);
656 
657 	if (reg_dev == NULL)
658 		goto drop;
659 
660 	skb->mac_header = skb->network_header;
661 	skb_pull(skb, (u8 *)encap - skb->data);
662 	skb_reset_network_header(skb);
663 	skb->protocol = htons(ETH_P_IPV6);
664 	skb->ip_summed = 0;
665 	skb->pkt_type = PACKET_HOST;
666 
667 	skb_tunnel_rx(skb, reg_dev);
668 
669 	netif_rx(skb);
670 	dev_put(reg_dev);
671 	return 0;
672  drop:
673 	kfree_skb(skb);
674 	return 0;
675 }
676 
677 static const struct inet6_protocol pim6_protocol = {
678 	.handler	=	pim6_rcv,
679 };
680 
681 /* Service routines creating virtual interfaces: PIMREG */
682 
683 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
684 				      struct net_device *dev)
685 {
686 	struct net *net = dev_net(dev);
687 	struct mr6_table *mrt;
688 	struct flowi fl = {
689 		.oif		= dev->ifindex,
690 		.iif		= skb->skb_iif,
691 		.mark		= skb->mark,
692 	};
693 	int err;
694 
695 	err = ip6mr_fib_lookup(net, &fl, &mrt);
696 	if (err < 0)
697 		return err;
698 
699 	read_lock(&mrt_lock);
700 	dev->stats.tx_bytes += skb->len;
701 	dev->stats.tx_packets++;
702 	ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
703 	read_unlock(&mrt_lock);
704 	kfree_skb(skb);
705 	return NETDEV_TX_OK;
706 }
707 
708 static const struct net_device_ops reg_vif_netdev_ops = {
709 	.ndo_start_xmit	= reg_vif_xmit,
710 };
711 
712 static void reg_vif_setup(struct net_device *dev)
713 {
714 	dev->type		= ARPHRD_PIMREG;
715 	dev->mtu		= 1500 - sizeof(struct ipv6hdr) - 8;
716 	dev->flags		= IFF_NOARP;
717 	dev->netdev_ops		= &reg_vif_netdev_ops;
718 	dev->destructor		= free_netdev;
719 	dev->features		|= NETIF_F_NETNS_LOCAL;
720 }
721 
722 static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt)
723 {
724 	struct net_device *dev;
725 	char name[IFNAMSIZ];
726 
727 	if (mrt->id == RT6_TABLE_DFLT)
728 		sprintf(name, "pim6reg");
729 	else
730 		sprintf(name, "pim6reg%u", mrt->id);
731 
732 	dev = alloc_netdev(0, name, reg_vif_setup);
733 	if (dev == NULL)
734 		return NULL;
735 
736 	dev_net_set(dev, net);
737 
738 	if (register_netdevice(dev)) {
739 		free_netdev(dev);
740 		return NULL;
741 	}
742 	dev->iflink = 0;
743 
744 	if (dev_open(dev))
745 		goto failure;
746 
747 	dev_hold(dev);
748 	return dev;
749 
750 failure:
751 	/* allow the register to be completed before unregistering. */
752 	rtnl_unlock();
753 	rtnl_lock();
754 
755 	unregister_netdevice(dev);
756 	return NULL;
757 }
758 #endif
759 
760 /*
761  *	Delete a VIF entry
762  */
763 
764 static int mif6_delete(struct mr6_table *mrt, int vifi, struct list_head *head)
765 {
766 	struct mif_device *v;
767 	struct net_device *dev;
768 	struct inet6_dev *in6_dev;
769 
770 	if (vifi < 0 || vifi >= mrt->maxvif)
771 		return -EADDRNOTAVAIL;
772 
773 	v = &mrt->vif6_table[vifi];
774 
775 	write_lock_bh(&mrt_lock);
776 	dev = v->dev;
777 	v->dev = NULL;
778 
779 	if (!dev) {
780 		write_unlock_bh(&mrt_lock);
781 		return -EADDRNOTAVAIL;
782 	}
783 
784 #ifdef CONFIG_IPV6_PIMSM_V2
785 	if (vifi == mrt->mroute_reg_vif_num)
786 		mrt->mroute_reg_vif_num = -1;
787 #endif
788 
789 	if (vifi + 1 == mrt->maxvif) {
790 		int tmp;
791 		for (tmp = vifi - 1; tmp >= 0; tmp--) {
792 			if (MIF_EXISTS(mrt, tmp))
793 				break;
794 		}
795 		mrt->maxvif = tmp + 1;
796 	}
797 
798 	write_unlock_bh(&mrt_lock);
799 
800 	dev_set_allmulti(dev, -1);
801 
802 	in6_dev = __in6_dev_get(dev);
803 	if (in6_dev)
804 		in6_dev->cnf.mc_forwarding--;
805 
806 	if (v->flags & MIFF_REGISTER)
807 		unregister_netdevice_queue(dev, head);
808 
809 	dev_put(dev);
810 	return 0;
811 }
812 
813 static inline void ip6mr_cache_free(struct mfc6_cache *c)
814 {
815 	kmem_cache_free(mrt_cachep, c);
816 }
817 
818 /* Destroy an unresolved cache entry, killing queued skbs
819    and reporting error to netlink readers.
820  */
821 
822 static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
823 {
824 	struct net *net = read_pnet(&mrt->net);
825 	struct sk_buff *skb;
826 
827 	atomic_dec(&mrt->cache_resolve_queue_len);
828 
829 	while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
830 		if (ipv6_hdr(skb)->version == 0) {
831 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
832 			nlh->nlmsg_type = NLMSG_ERROR;
833 			nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
834 			skb_trim(skb, nlh->nlmsg_len);
835 			((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
836 			rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
837 		} else
838 			kfree_skb(skb);
839 	}
840 
841 	ip6mr_cache_free(c);
842 }
843 
844 
845 /* Timer process for all the unresolved queue. */
846 
847 static void ipmr_do_expire_process(struct mr6_table *mrt)
848 {
849 	unsigned long now = jiffies;
850 	unsigned long expires = 10 * HZ;
851 	struct mfc6_cache *c, *next;
852 
853 	list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
854 		if (time_after(c->mfc_un.unres.expires, now)) {
855 			/* not yet... */
856 			unsigned long interval = c->mfc_un.unres.expires - now;
857 			if (interval < expires)
858 				expires = interval;
859 			continue;
860 		}
861 
862 		list_del(&c->list);
863 		ip6mr_destroy_unres(mrt, c);
864 	}
865 
866 	if (!list_empty(&mrt->mfc6_unres_queue))
867 		mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
868 }
869 
870 static void ipmr_expire_process(unsigned long arg)
871 {
872 	struct mr6_table *mrt = (struct mr6_table *)arg;
873 
874 	if (!spin_trylock(&mfc_unres_lock)) {
875 		mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
876 		return;
877 	}
878 
879 	if (!list_empty(&mrt->mfc6_unres_queue))
880 		ipmr_do_expire_process(mrt);
881 
882 	spin_unlock(&mfc_unres_lock);
883 }
884 
885 /* Fill oifs list. It is called under write locked mrt_lock. */
886 
887 static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *cache,
888 				    unsigned char *ttls)
889 {
890 	int vifi;
891 
892 	cache->mfc_un.res.minvif = MAXMIFS;
893 	cache->mfc_un.res.maxvif = 0;
894 	memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
895 
896 	for (vifi = 0; vifi < mrt->maxvif; vifi++) {
897 		if (MIF_EXISTS(mrt, vifi) &&
898 		    ttls[vifi] && ttls[vifi] < 255) {
899 			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
900 			if (cache->mfc_un.res.minvif > vifi)
901 				cache->mfc_un.res.minvif = vifi;
902 			if (cache->mfc_un.res.maxvif <= vifi)
903 				cache->mfc_un.res.maxvif = vifi + 1;
904 		}
905 	}
906 }
907 
908 static int mif6_add(struct net *net, struct mr6_table *mrt,
909 		    struct mif6ctl *vifc, int mrtsock)
910 {
911 	int vifi = vifc->mif6c_mifi;
912 	struct mif_device *v = &mrt->vif6_table[vifi];
913 	struct net_device *dev;
914 	struct inet6_dev *in6_dev;
915 	int err;
916 
917 	/* Is vif busy ? */
918 	if (MIF_EXISTS(mrt, vifi))
919 		return -EADDRINUSE;
920 
921 	switch (vifc->mif6c_flags) {
922 #ifdef CONFIG_IPV6_PIMSM_V2
923 	case MIFF_REGISTER:
924 		/*
925 		 * Special Purpose VIF in PIM
926 		 * All the packets will be sent to the daemon
927 		 */
928 		if (mrt->mroute_reg_vif_num >= 0)
929 			return -EADDRINUSE;
930 		dev = ip6mr_reg_vif(net, mrt);
931 		if (!dev)
932 			return -ENOBUFS;
933 		err = dev_set_allmulti(dev, 1);
934 		if (err) {
935 			unregister_netdevice(dev);
936 			dev_put(dev);
937 			return err;
938 		}
939 		break;
940 #endif
941 	case 0:
942 		dev = dev_get_by_index(net, vifc->mif6c_pifi);
943 		if (!dev)
944 			return -EADDRNOTAVAIL;
945 		err = dev_set_allmulti(dev, 1);
946 		if (err) {
947 			dev_put(dev);
948 			return err;
949 		}
950 		break;
951 	default:
952 		return -EINVAL;
953 	}
954 
955 	in6_dev = __in6_dev_get(dev);
956 	if (in6_dev)
957 		in6_dev->cnf.mc_forwarding++;
958 
959 	/*
960 	 *	Fill in the VIF structures
961 	 */
962 	v->rate_limit = vifc->vifc_rate_limit;
963 	v->flags = vifc->mif6c_flags;
964 	if (!mrtsock)
965 		v->flags |= VIFF_STATIC;
966 	v->threshold = vifc->vifc_threshold;
967 	v->bytes_in = 0;
968 	v->bytes_out = 0;
969 	v->pkt_in = 0;
970 	v->pkt_out = 0;
971 	v->link = dev->ifindex;
972 	if (v->flags & MIFF_REGISTER)
973 		v->link = dev->iflink;
974 
975 	/* And finish update writing critical data */
976 	write_lock_bh(&mrt_lock);
977 	v->dev = dev;
978 #ifdef CONFIG_IPV6_PIMSM_V2
979 	if (v->flags & MIFF_REGISTER)
980 		mrt->mroute_reg_vif_num = vifi;
981 #endif
982 	if (vifi + 1 > mrt->maxvif)
983 		mrt->maxvif = vifi + 1;
984 	write_unlock_bh(&mrt_lock);
985 	return 0;
986 }
987 
988 static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt,
989 					   struct in6_addr *origin,
990 					   struct in6_addr *mcastgrp)
991 {
992 	int line = MFC6_HASH(mcastgrp, origin);
993 	struct mfc6_cache *c;
994 
995 	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
996 		if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
997 		    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
998 			return c;
999 	}
1000 	return NULL;
1001 }
1002 
1003 /*
1004  *	Allocate a multicast cache entry
1005  */
1006 static struct mfc6_cache *ip6mr_cache_alloc(void)
1007 {
1008 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
1009 	if (c == NULL)
1010 		return NULL;
1011 	c->mfc_un.res.minvif = MAXMIFS;
1012 	return c;
1013 }
1014 
1015 static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
1016 {
1017 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
1018 	if (c == NULL)
1019 		return NULL;
1020 	skb_queue_head_init(&c->mfc_un.unres.unresolved);
1021 	c->mfc_un.unres.expires = jiffies + 10 * HZ;
1022 	return c;
1023 }
1024 
1025 /*
1026  *	A cache entry has gone into a resolved state from queued
1027  */
1028 
1029 static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
1030 				struct mfc6_cache *uc, struct mfc6_cache *c)
1031 {
1032 	struct sk_buff *skb;
1033 
1034 	/*
1035 	 *	Play the pending entries through our router
1036 	 */
1037 
1038 	while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
1039 		if (ipv6_hdr(skb)->version == 0) {
1040 			int err;
1041 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
1042 
1043 			if (__ip6mr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
1044 				nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1045 			} else {
1046 				nlh->nlmsg_type = NLMSG_ERROR;
1047 				nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
1048 				skb_trim(skb, nlh->nlmsg_len);
1049 				((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE;
1050 			}
1051 			err = rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
1052 		} else
1053 			ip6_mr_forward(net, mrt, skb, c);
1054 	}
1055 }
1056 
1057 /*
1058  *	Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
1059  *	expects the following bizarre scheme.
1060  *
1061  *	Called under mrt_lock.
1062  */
1063 
1064 static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
1065 			      mifi_t mifi, int assert)
1066 {
1067 	struct sk_buff *skb;
1068 	struct mrt6msg *msg;
1069 	int ret;
1070 
1071 #ifdef CONFIG_IPV6_PIMSM_V2
1072 	if (assert == MRT6MSG_WHOLEPKT)
1073 		skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
1074 						+sizeof(*msg));
1075 	else
1076 #endif
1077 		skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
1078 
1079 	if (!skb)
1080 		return -ENOBUFS;
1081 
1082 	/* I suppose that internal messages
1083 	 * do not require checksums */
1084 
1085 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1086 
1087 #ifdef CONFIG_IPV6_PIMSM_V2
1088 	if (assert == MRT6MSG_WHOLEPKT) {
1089 		/* Ugly, but we have no choice with this interface.
1090 		   Duplicate old header, fix length etc.
1091 		   And all this only to mangle msg->im6_msgtype and
1092 		   to set msg->im6_mbz to "mbz" :-)
1093 		 */
1094 		skb_push(skb, -skb_network_offset(pkt));
1095 
1096 		skb_push(skb, sizeof(*msg));
1097 		skb_reset_transport_header(skb);
1098 		msg = (struct mrt6msg *)skb_transport_header(skb);
1099 		msg->im6_mbz = 0;
1100 		msg->im6_msgtype = MRT6MSG_WHOLEPKT;
1101 		msg->im6_mif = mrt->mroute_reg_vif_num;
1102 		msg->im6_pad = 0;
1103 		ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
1104 		ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
1105 
1106 		skb->ip_summed = CHECKSUM_UNNECESSARY;
1107 	} else
1108 #endif
1109 	{
1110 	/*
1111 	 *	Copy the IP header
1112 	 */
1113 
1114 	skb_put(skb, sizeof(struct ipv6hdr));
1115 	skb_reset_network_header(skb);
1116 	skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
1117 
1118 	/*
1119 	 *	Add our header
1120 	 */
1121 	skb_put(skb, sizeof(*msg));
1122 	skb_reset_transport_header(skb);
1123 	msg = (struct mrt6msg *)skb_transport_header(skb);
1124 
1125 	msg->im6_mbz = 0;
1126 	msg->im6_msgtype = assert;
1127 	msg->im6_mif = mifi;
1128 	msg->im6_pad = 0;
1129 	ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
1130 	ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
1131 
1132 	skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1133 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1134 	}
1135 
1136 	if (mrt->mroute6_sk == NULL) {
1137 		kfree_skb(skb);
1138 		return -EINVAL;
1139 	}
1140 
1141 	/*
1142 	 *	Deliver to user space multicast routing algorithms
1143 	 */
1144 	ret = sock_queue_rcv_skb(mrt->mroute6_sk, skb);
1145 	if (ret < 0) {
1146 		if (net_ratelimit())
1147 			printk(KERN_WARNING "mroute6: pending queue full, dropping entries.\n");
1148 		kfree_skb(skb);
1149 	}
1150 
1151 	return ret;
1152 }
1153 
1154 /*
1155  *	Queue a packet for resolution. It gets locked cache entry!
1156  */
1157 
1158 static int
1159 ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
1160 {
1161 	bool found = false;
1162 	int err;
1163 	struct mfc6_cache *c;
1164 
1165 	spin_lock_bh(&mfc_unres_lock);
1166 	list_for_each_entry(c, &mrt->mfc6_unres_queue, list) {
1167 		if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1168 		    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
1169 			found = true;
1170 			break;
1171 		}
1172 	}
1173 
1174 	if (!found) {
1175 		/*
1176 		 *	Create a new entry if allowable
1177 		 */
1178 
1179 		if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
1180 		    (c = ip6mr_cache_alloc_unres()) == NULL) {
1181 			spin_unlock_bh(&mfc_unres_lock);
1182 
1183 			kfree_skb(skb);
1184 			return -ENOBUFS;
1185 		}
1186 
1187 		/*
1188 		 *	Fill in the new cache entry
1189 		 */
1190 		c->mf6c_parent = -1;
1191 		c->mf6c_origin = ipv6_hdr(skb)->saddr;
1192 		c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
1193 
1194 		/*
1195 		 *	Reflect first query at pim6sd
1196 		 */
1197 		err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1198 		if (err < 0) {
1199 			/* If the report failed throw the cache entry
1200 			   out - Brad Parker
1201 			 */
1202 			spin_unlock_bh(&mfc_unres_lock);
1203 
1204 			ip6mr_cache_free(c);
1205 			kfree_skb(skb);
1206 			return err;
1207 		}
1208 
1209 		atomic_inc(&mrt->cache_resolve_queue_len);
1210 		list_add(&c->list, &mrt->mfc6_unres_queue);
1211 
1212 		ipmr_do_expire_process(mrt);
1213 	}
1214 
1215 	/*
1216 	 *	See if we can append the packet
1217 	 */
1218 	if (c->mfc_un.unres.unresolved.qlen > 3) {
1219 		kfree_skb(skb);
1220 		err = -ENOBUFS;
1221 	} else {
1222 		skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1223 		err = 0;
1224 	}
1225 
1226 	spin_unlock_bh(&mfc_unres_lock);
1227 	return err;
1228 }
1229 
1230 /*
1231  *	MFC6 cache manipulation by user space
1232  */
1233 
1234 static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc)
1235 {
1236 	int line;
1237 	struct mfc6_cache *c, *next;
1238 
1239 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1240 
1241 	list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) {
1242 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1243 		    ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
1244 			write_lock_bh(&mrt_lock);
1245 			list_del(&c->list);
1246 			write_unlock_bh(&mrt_lock);
1247 
1248 			ip6mr_cache_free(c);
1249 			return 0;
1250 		}
1251 	}
1252 	return -ENOENT;
1253 }
1254 
1255 static int ip6mr_device_event(struct notifier_block *this,
1256 			      unsigned long event, void *ptr)
1257 {
1258 	struct net_device *dev = ptr;
1259 	struct net *net = dev_net(dev);
1260 	struct mr6_table *mrt;
1261 	struct mif_device *v;
1262 	int ct;
1263 	LIST_HEAD(list);
1264 
1265 	if (event != NETDEV_UNREGISTER)
1266 		return NOTIFY_DONE;
1267 
1268 	ip6mr_for_each_table(mrt, net) {
1269 		v = &mrt->vif6_table[0];
1270 		for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1271 			if (v->dev == dev)
1272 				mif6_delete(mrt, ct, &list);
1273 		}
1274 	}
1275 	unregister_netdevice_many(&list);
1276 
1277 	return NOTIFY_DONE;
1278 }
1279 
1280 static struct notifier_block ip6_mr_notifier = {
1281 	.notifier_call = ip6mr_device_event
1282 };
1283 
1284 /*
1285  *	Setup for IP multicast routing
1286  */
1287 
1288 static int __net_init ip6mr_net_init(struct net *net)
1289 {
1290 	int err;
1291 
1292 	err = ip6mr_rules_init(net);
1293 	if (err < 0)
1294 		goto fail;
1295 
1296 #ifdef CONFIG_PROC_FS
1297 	err = -ENOMEM;
1298 	if (!proc_net_fops_create(net, "ip6_mr_vif", 0, &ip6mr_vif_fops))
1299 		goto proc_vif_fail;
1300 	if (!proc_net_fops_create(net, "ip6_mr_cache", 0, &ip6mr_mfc_fops))
1301 		goto proc_cache_fail;
1302 #endif
1303 
1304 	return 0;
1305 
1306 #ifdef CONFIG_PROC_FS
1307 proc_cache_fail:
1308 	proc_net_remove(net, "ip6_mr_vif");
1309 proc_vif_fail:
1310 	ip6mr_rules_exit(net);
1311 #endif
1312 fail:
1313 	return err;
1314 }
1315 
1316 static void __net_exit ip6mr_net_exit(struct net *net)
1317 {
1318 #ifdef CONFIG_PROC_FS
1319 	proc_net_remove(net, "ip6_mr_cache");
1320 	proc_net_remove(net, "ip6_mr_vif");
1321 #endif
1322 	ip6mr_rules_exit(net);
1323 }
1324 
1325 static struct pernet_operations ip6mr_net_ops = {
1326 	.init = ip6mr_net_init,
1327 	.exit = ip6mr_net_exit,
1328 };
1329 
1330 int __init ip6_mr_init(void)
1331 {
1332 	int err;
1333 
1334 	mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1335 				       sizeof(struct mfc6_cache),
1336 				       0, SLAB_HWCACHE_ALIGN,
1337 				       NULL);
1338 	if (!mrt_cachep)
1339 		return -ENOMEM;
1340 
1341 	err = register_pernet_subsys(&ip6mr_net_ops);
1342 	if (err)
1343 		goto reg_pernet_fail;
1344 
1345 	err = register_netdevice_notifier(&ip6_mr_notifier);
1346 	if (err)
1347 		goto reg_notif_fail;
1348 #ifdef CONFIG_IPV6_PIMSM_V2
1349 	if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1350 		printk(KERN_ERR "ip6_mr_init: can't add PIM protocol\n");
1351 		err = -EAGAIN;
1352 		goto add_proto_fail;
1353 	}
1354 #endif
1355 	rtnl_register(RTNL_FAMILY_IP6MR, RTM_GETROUTE, NULL, ip6mr_rtm_dumproute);
1356 	return 0;
1357 #ifdef CONFIG_IPV6_PIMSM_V2
1358 add_proto_fail:
1359 	unregister_netdevice_notifier(&ip6_mr_notifier);
1360 #endif
1361 reg_notif_fail:
1362 	unregister_pernet_subsys(&ip6mr_net_ops);
1363 reg_pernet_fail:
1364 	kmem_cache_destroy(mrt_cachep);
1365 	return err;
1366 }
1367 
1368 void ip6_mr_cleanup(void)
1369 {
1370 	unregister_netdevice_notifier(&ip6_mr_notifier);
1371 	unregister_pernet_subsys(&ip6mr_net_ops);
1372 	kmem_cache_destroy(mrt_cachep);
1373 }
1374 
1375 static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
1376 			 struct mf6cctl *mfc, int mrtsock)
1377 {
1378 	bool found = false;
1379 	int line;
1380 	struct mfc6_cache *uc, *c;
1381 	unsigned char ttls[MAXMIFS];
1382 	int i;
1383 
1384 	if (mfc->mf6cc_parent >= MAXMIFS)
1385 		return -ENFILE;
1386 
1387 	memset(ttls, 255, MAXMIFS);
1388 	for (i = 0; i < MAXMIFS; i++) {
1389 		if (IF_ISSET(i, &mfc->mf6cc_ifset))
1390 			ttls[i] = 1;
1391 
1392 	}
1393 
1394 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1395 
1396 	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
1397 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1398 		    ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
1399 			found = true;
1400 			break;
1401 		}
1402 	}
1403 
1404 	if (found) {
1405 		write_lock_bh(&mrt_lock);
1406 		c->mf6c_parent = mfc->mf6cc_parent;
1407 		ip6mr_update_thresholds(mrt, c, ttls);
1408 		if (!mrtsock)
1409 			c->mfc_flags |= MFC_STATIC;
1410 		write_unlock_bh(&mrt_lock);
1411 		return 0;
1412 	}
1413 
1414 	if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1415 		return -EINVAL;
1416 
1417 	c = ip6mr_cache_alloc();
1418 	if (c == NULL)
1419 		return -ENOMEM;
1420 
1421 	c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1422 	c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1423 	c->mf6c_parent = mfc->mf6cc_parent;
1424 	ip6mr_update_thresholds(mrt, c, ttls);
1425 	if (!mrtsock)
1426 		c->mfc_flags |= MFC_STATIC;
1427 
1428 	write_lock_bh(&mrt_lock);
1429 	list_add(&c->list, &mrt->mfc6_cache_array[line]);
1430 	write_unlock_bh(&mrt_lock);
1431 
1432 	/*
1433 	 *	Check to see if we resolved a queued list. If so we
1434 	 *	need to send on the frames and tidy up.
1435 	 */
1436 	found = false;
1437 	spin_lock_bh(&mfc_unres_lock);
1438 	list_for_each_entry(uc, &mrt->mfc6_unres_queue, list) {
1439 		if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1440 		    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1441 			list_del(&uc->list);
1442 			atomic_dec(&mrt->cache_resolve_queue_len);
1443 			found = true;
1444 			break;
1445 		}
1446 	}
1447 	if (list_empty(&mrt->mfc6_unres_queue))
1448 		del_timer(&mrt->ipmr_expire_timer);
1449 	spin_unlock_bh(&mfc_unres_lock);
1450 
1451 	if (found) {
1452 		ip6mr_cache_resolve(net, mrt, uc, c);
1453 		ip6mr_cache_free(uc);
1454 	}
1455 	return 0;
1456 }
1457 
1458 /*
1459  *	Close the multicast socket, and clear the vif tables etc
1460  */
1461 
1462 static void mroute_clean_tables(struct mr6_table *mrt)
1463 {
1464 	int i;
1465 	LIST_HEAD(list);
1466 	struct mfc6_cache *c, *next;
1467 
1468 	/*
1469 	 *	Shut down all active vif entries
1470 	 */
1471 	for (i = 0; i < mrt->maxvif; i++) {
1472 		if (!(mrt->vif6_table[i].flags & VIFF_STATIC))
1473 			mif6_delete(mrt, i, &list);
1474 	}
1475 	unregister_netdevice_many(&list);
1476 
1477 	/*
1478 	 *	Wipe the cache
1479 	 */
1480 	for (i = 0; i < MFC6_LINES; i++) {
1481 		list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) {
1482 			if (c->mfc_flags & MFC_STATIC)
1483 				continue;
1484 			write_lock_bh(&mrt_lock);
1485 			list_del(&c->list);
1486 			write_unlock_bh(&mrt_lock);
1487 
1488 			ip6mr_cache_free(c);
1489 		}
1490 	}
1491 
1492 	if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1493 		spin_lock_bh(&mfc_unres_lock);
1494 		list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
1495 			list_del(&c->list);
1496 			ip6mr_destroy_unres(mrt, c);
1497 		}
1498 		spin_unlock_bh(&mfc_unres_lock);
1499 	}
1500 }
1501 
1502 static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
1503 {
1504 	int err = 0;
1505 	struct net *net = sock_net(sk);
1506 
1507 	rtnl_lock();
1508 	write_lock_bh(&mrt_lock);
1509 	if (likely(mrt->mroute6_sk == NULL)) {
1510 		mrt->mroute6_sk = sk;
1511 		net->ipv6.devconf_all->mc_forwarding++;
1512 	}
1513 	else
1514 		err = -EADDRINUSE;
1515 	write_unlock_bh(&mrt_lock);
1516 
1517 	rtnl_unlock();
1518 
1519 	return err;
1520 }
1521 
1522 int ip6mr_sk_done(struct sock *sk)
1523 {
1524 	int err = -EACCES;
1525 	struct net *net = sock_net(sk);
1526 	struct mr6_table *mrt;
1527 
1528 	rtnl_lock();
1529 	ip6mr_for_each_table(mrt, net) {
1530 		if (sk == mrt->mroute6_sk) {
1531 			write_lock_bh(&mrt_lock);
1532 			mrt->mroute6_sk = NULL;
1533 			net->ipv6.devconf_all->mc_forwarding--;
1534 			write_unlock_bh(&mrt_lock);
1535 
1536 			mroute_clean_tables(mrt);
1537 			err = 0;
1538 			break;
1539 		}
1540 	}
1541 	rtnl_unlock();
1542 
1543 	return err;
1544 }
1545 
1546 struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
1547 {
1548 	struct mr6_table *mrt;
1549 	struct flowi fl = {
1550 		.iif	= skb->skb_iif,
1551 		.oif	= skb->dev->ifindex,
1552 		.mark	= skb->mark,
1553 	};
1554 
1555 	if (ip6mr_fib_lookup(net, &fl, &mrt) < 0)
1556 		return NULL;
1557 
1558 	return mrt->mroute6_sk;
1559 }
1560 
1561 /*
1562  *	Socket options and virtual interface manipulation. The whole
1563  *	virtual interface system is a complete heap, but unfortunately
1564  *	that's how BSD mrouted happens to think. Maybe one day with a proper
1565  *	MOSPF/PIM router set up we can clean this up.
1566  */
1567 
1568 int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1569 {
1570 	int ret;
1571 	struct mif6ctl vif;
1572 	struct mf6cctl mfc;
1573 	mifi_t mifi;
1574 	struct net *net = sock_net(sk);
1575 	struct mr6_table *mrt;
1576 
1577 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1578 	if (mrt == NULL)
1579 		return -ENOENT;
1580 
1581 	if (optname != MRT6_INIT) {
1582 		if (sk != mrt->mroute6_sk && !capable(CAP_NET_ADMIN))
1583 			return -EACCES;
1584 	}
1585 
1586 	switch (optname) {
1587 	case MRT6_INIT:
1588 		if (sk->sk_type != SOCK_RAW ||
1589 		    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1590 			return -EOPNOTSUPP;
1591 		if (optlen < sizeof(int))
1592 			return -EINVAL;
1593 
1594 		return ip6mr_sk_init(mrt, sk);
1595 
1596 	case MRT6_DONE:
1597 		return ip6mr_sk_done(sk);
1598 
1599 	case MRT6_ADD_MIF:
1600 		if (optlen < sizeof(vif))
1601 			return -EINVAL;
1602 		if (copy_from_user(&vif, optval, sizeof(vif)))
1603 			return -EFAULT;
1604 		if (vif.mif6c_mifi >= MAXMIFS)
1605 			return -ENFILE;
1606 		rtnl_lock();
1607 		ret = mif6_add(net, mrt, &vif, sk == mrt->mroute6_sk);
1608 		rtnl_unlock();
1609 		return ret;
1610 
1611 	case MRT6_DEL_MIF:
1612 		if (optlen < sizeof(mifi_t))
1613 			return -EINVAL;
1614 		if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1615 			return -EFAULT;
1616 		rtnl_lock();
1617 		ret = mif6_delete(mrt, mifi, NULL);
1618 		rtnl_unlock();
1619 		return ret;
1620 
1621 	/*
1622 	 *	Manipulate the forwarding caches. These live
1623 	 *	in a sort of kernel/user symbiosis.
1624 	 */
1625 	case MRT6_ADD_MFC:
1626 	case MRT6_DEL_MFC:
1627 		if (optlen < sizeof(mfc))
1628 			return -EINVAL;
1629 		if (copy_from_user(&mfc, optval, sizeof(mfc)))
1630 			return -EFAULT;
1631 		rtnl_lock();
1632 		if (optname == MRT6_DEL_MFC)
1633 			ret = ip6mr_mfc_delete(mrt, &mfc);
1634 		else
1635 			ret = ip6mr_mfc_add(net, mrt, &mfc, sk == mrt->mroute6_sk);
1636 		rtnl_unlock();
1637 		return ret;
1638 
1639 	/*
1640 	 *	Control PIM assert (to activate pim will activate assert)
1641 	 */
1642 	case MRT6_ASSERT:
1643 	{
1644 		int v;
1645 		if (get_user(v, (int __user *)optval))
1646 			return -EFAULT;
1647 		mrt->mroute_do_assert = !!v;
1648 		return 0;
1649 	}
1650 
1651 #ifdef CONFIG_IPV6_PIMSM_V2
1652 	case MRT6_PIM:
1653 	{
1654 		int v;
1655 		if (get_user(v, (int __user *)optval))
1656 			return -EFAULT;
1657 		v = !!v;
1658 		rtnl_lock();
1659 		ret = 0;
1660 		if (v != mrt->mroute_do_pim) {
1661 			mrt->mroute_do_pim = v;
1662 			mrt->mroute_do_assert = v;
1663 		}
1664 		rtnl_unlock();
1665 		return ret;
1666 	}
1667 
1668 #endif
1669 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1670 	case MRT6_TABLE:
1671 	{
1672 		u32 v;
1673 
1674 		if (optlen != sizeof(u32))
1675 			return -EINVAL;
1676 		if (get_user(v, (u32 __user *)optval))
1677 			return -EFAULT;
1678 		if (sk == mrt->mroute6_sk)
1679 			return -EBUSY;
1680 
1681 		rtnl_lock();
1682 		ret = 0;
1683 		if (!ip6mr_new_table(net, v))
1684 			ret = -ENOMEM;
1685 		raw6_sk(sk)->ip6mr_table = v;
1686 		rtnl_unlock();
1687 		return ret;
1688 	}
1689 #endif
1690 	/*
1691 	 *	Spurious command, or MRT6_VERSION which you cannot
1692 	 *	set.
1693 	 */
1694 	default:
1695 		return -ENOPROTOOPT;
1696 	}
1697 }
1698 
1699 /*
1700  *	Getsock opt support for the multicast routing system.
1701  */
1702 
1703 int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1704 			  int __user *optlen)
1705 {
1706 	int olr;
1707 	int val;
1708 	struct net *net = sock_net(sk);
1709 	struct mr6_table *mrt;
1710 
1711 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1712 	if (mrt == NULL)
1713 		return -ENOENT;
1714 
1715 	switch (optname) {
1716 	case MRT6_VERSION:
1717 		val = 0x0305;
1718 		break;
1719 #ifdef CONFIG_IPV6_PIMSM_V2
1720 	case MRT6_PIM:
1721 		val = mrt->mroute_do_pim;
1722 		break;
1723 #endif
1724 	case MRT6_ASSERT:
1725 		val = mrt->mroute_do_assert;
1726 		break;
1727 	default:
1728 		return -ENOPROTOOPT;
1729 	}
1730 
1731 	if (get_user(olr, optlen))
1732 		return -EFAULT;
1733 
1734 	olr = min_t(int, olr, sizeof(int));
1735 	if (olr < 0)
1736 		return -EINVAL;
1737 
1738 	if (put_user(olr, optlen))
1739 		return -EFAULT;
1740 	if (copy_to_user(optval, &val, olr))
1741 		return -EFAULT;
1742 	return 0;
1743 }
1744 
1745 /*
1746  *	The IP multicast ioctl support routines.
1747  */
1748 
1749 int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1750 {
1751 	struct sioc_sg_req6 sr;
1752 	struct sioc_mif_req6 vr;
1753 	struct mif_device *vif;
1754 	struct mfc6_cache *c;
1755 	struct net *net = sock_net(sk);
1756 	struct mr6_table *mrt;
1757 
1758 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1759 	if (mrt == NULL)
1760 		return -ENOENT;
1761 
1762 	switch (cmd) {
1763 	case SIOCGETMIFCNT_IN6:
1764 		if (copy_from_user(&vr, arg, sizeof(vr)))
1765 			return -EFAULT;
1766 		if (vr.mifi >= mrt->maxvif)
1767 			return -EINVAL;
1768 		read_lock(&mrt_lock);
1769 		vif = &mrt->vif6_table[vr.mifi];
1770 		if (MIF_EXISTS(mrt, vr.mifi)) {
1771 			vr.icount = vif->pkt_in;
1772 			vr.ocount = vif->pkt_out;
1773 			vr.ibytes = vif->bytes_in;
1774 			vr.obytes = vif->bytes_out;
1775 			read_unlock(&mrt_lock);
1776 
1777 			if (copy_to_user(arg, &vr, sizeof(vr)))
1778 				return -EFAULT;
1779 			return 0;
1780 		}
1781 		read_unlock(&mrt_lock);
1782 		return -EADDRNOTAVAIL;
1783 	case SIOCGETSGCNT_IN6:
1784 		if (copy_from_user(&sr, arg, sizeof(sr)))
1785 			return -EFAULT;
1786 
1787 		read_lock(&mrt_lock);
1788 		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1789 		if (c) {
1790 			sr.pktcnt = c->mfc_un.res.pkt;
1791 			sr.bytecnt = c->mfc_un.res.bytes;
1792 			sr.wrong_if = c->mfc_un.res.wrong_if;
1793 			read_unlock(&mrt_lock);
1794 
1795 			if (copy_to_user(arg, &sr, sizeof(sr)))
1796 				return -EFAULT;
1797 			return 0;
1798 		}
1799 		read_unlock(&mrt_lock);
1800 		return -EADDRNOTAVAIL;
1801 	default:
1802 		return -ENOIOCTLCMD;
1803 	}
1804 }
1805 
1806 
1807 static inline int ip6mr_forward2_finish(struct sk_buff *skb)
1808 {
1809 	IP6_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
1810 			 IPSTATS_MIB_OUTFORWDATAGRAMS);
1811 	return dst_output(skb);
1812 }
1813 
1814 /*
1815  *	Processing handlers for ip6mr_forward
1816  */
1817 
1818 static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
1819 			  struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1820 {
1821 	struct ipv6hdr *ipv6h;
1822 	struct mif_device *vif = &mrt->vif6_table[vifi];
1823 	struct net_device *dev;
1824 	struct dst_entry *dst;
1825 	struct flowi fl;
1826 
1827 	if (vif->dev == NULL)
1828 		goto out_free;
1829 
1830 #ifdef CONFIG_IPV6_PIMSM_V2
1831 	if (vif->flags & MIFF_REGISTER) {
1832 		vif->pkt_out++;
1833 		vif->bytes_out += skb->len;
1834 		vif->dev->stats.tx_bytes += skb->len;
1835 		vif->dev->stats.tx_packets++;
1836 		ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
1837 		goto out_free;
1838 	}
1839 #endif
1840 
1841 	ipv6h = ipv6_hdr(skb);
1842 
1843 	fl = (struct flowi) {
1844 		.oif = vif->link,
1845 		.nl_u = { .ip6_u =
1846 				{ .daddr = ipv6h->daddr, }
1847 		}
1848 	};
1849 
1850 	dst = ip6_route_output(net, NULL, &fl);
1851 	if (!dst)
1852 		goto out_free;
1853 
1854 	skb_dst_drop(skb);
1855 	skb_dst_set(skb, dst);
1856 
1857 	/*
1858 	 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1859 	 * not only before forwarding, but after forwarding on all output
1860 	 * interfaces. It is clear, if mrouter runs a multicasting
1861 	 * program, it should receive packets not depending to what interface
1862 	 * program is joined.
1863 	 * If we will not make it, the program will have to join on all
1864 	 * interfaces. On the other hand, multihoming host (or router, but
1865 	 * not mrouter) cannot join to more than one interface - it will
1866 	 * result in receiving multiple packets.
1867 	 */
1868 	dev = vif->dev;
1869 	skb->dev = dev;
1870 	vif->pkt_out++;
1871 	vif->bytes_out += skb->len;
1872 
1873 	/* We are about to write */
1874 	/* XXX: extension headers? */
1875 	if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
1876 		goto out_free;
1877 
1878 	ipv6h = ipv6_hdr(skb);
1879 	ipv6h->hop_limit--;
1880 
1881 	IP6CB(skb)->flags |= IP6SKB_FORWARDED;
1882 
1883 	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dev,
1884 		       ip6mr_forward2_finish);
1885 
1886 out_free:
1887 	kfree_skb(skb);
1888 	return 0;
1889 }
1890 
1891 static int ip6mr_find_vif(struct mr6_table *mrt, struct net_device *dev)
1892 {
1893 	int ct;
1894 
1895 	for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
1896 		if (mrt->vif6_table[ct].dev == dev)
1897 			break;
1898 	}
1899 	return ct;
1900 }
1901 
1902 static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
1903 			  struct sk_buff *skb, struct mfc6_cache *cache)
1904 {
1905 	int psend = -1;
1906 	int vif, ct;
1907 
1908 	vif = cache->mf6c_parent;
1909 	cache->mfc_un.res.pkt++;
1910 	cache->mfc_un.res.bytes += skb->len;
1911 
1912 	/*
1913 	 * Wrong interface: drop packet and (maybe) send PIM assert.
1914 	 */
1915 	if (mrt->vif6_table[vif].dev != skb->dev) {
1916 		int true_vifi;
1917 
1918 		cache->mfc_un.res.wrong_if++;
1919 		true_vifi = ip6mr_find_vif(mrt, skb->dev);
1920 
1921 		if (true_vifi >= 0 && mrt->mroute_do_assert &&
1922 		    /* pimsm uses asserts, when switching from RPT to SPT,
1923 		       so that we cannot check that packet arrived on an oif.
1924 		       It is bad, but otherwise we would need to move pretty
1925 		       large chunk of pimd to kernel. Ough... --ANK
1926 		     */
1927 		    (mrt->mroute_do_pim ||
1928 		     cache->mfc_un.res.ttls[true_vifi] < 255) &&
1929 		    time_after(jiffies,
1930 			       cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1931 			cache->mfc_un.res.last_assert = jiffies;
1932 			ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
1933 		}
1934 		goto dont_forward;
1935 	}
1936 
1937 	mrt->vif6_table[vif].pkt_in++;
1938 	mrt->vif6_table[vif].bytes_in += skb->len;
1939 
1940 	/*
1941 	 *	Forward the frame
1942 	 */
1943 	for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
1944 		if (ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
1945 			if (psend != -1) {
1946 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1947 				if (skb2)
1948 					ip6mr_forward2(net, mrt, skb2, cache, psend);
1949 			}
1950 			psend = ct;
1951 		}
1952 	}
1953 	if (psend != -1) {
1954 		ip6mr_forward2(net, mrt, skb, cache, psend);
1955 		return 0;
1956 	}
1957 
1958 dont_forward:
1959 	kfree_skb(skb);
1960 	return 0;
1961 }
1962 
1963 
1964 /*
1965  *	Multicast packets for forwarding arrive here
1966  */
1967 
1968 int ip6_mr_input(struct sk_buff *skb)
1969 {
1970 	struct mfc6_cache *cache;
1971 	struct net *net = dev_net(skb->dev);
1972 	struct mr6_table *mrt;
1973 	struct flowi fl = {
1974 		.iif	= skb->dev->ifindex,
1975 		.mark	= skb->mark,
1976 	};
1977 	int err;
1978 
1979 	err = ip6mr_fib_lookup(net, &fl, &mrt);
1980 	if (err < 0)
1981 		return err;
1982 
1983 	read_lock(&mrt_lock);
1984 	cache = ip6mr_cache_find(mrt,
1985 				 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
1986 
1987 	/*
1988 	 *	No usable cache entry
1989 	 */
1990 	if (cache == NULL) {
1991 		int vif;
1992 
1993 		vif = ip6mr_find_vif(mrt, skb->dev);
1994 		if (vif >= 0) {
1995 			int err = ip6mr_cache_unresolved(mrt, vif, skb);
1996 			read_unlock(&mrt_lock);
1997 
1998 			return err;
1999 		}
2000 		read_unlock(&mrt_lock);
2001 		kfree_skb(skb);
2002 		return -ENODEV;
2003 	}
2004 
2005 	ip6_mr_forward(net, mrt, skb, cache);
2006 
2007 	read_unlock(&mrt_lock);
2008 
2009 	return 0;
2010 }
2011 
2012 
2013 static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2014 			       struct mfc6_cache *c, struct rtmsg *rtm)
2015 {
2016 	int ct;
2017 	struct rtnexthop *nhp;
2018 	u8 *b = skb_tail_pointer(skb);
2019 	struct rtattr *mp_head;
2020 
2021 	/* If cache is unresolved, don't try to parse IIF and OIF */
2022 	if (c->mf6c_parent >= MAXMIFS)
2023 		return -ENOENT;
2024 
2025 	if (MIF_EXISTS(mrt, c->mf6c_parent))
2026 		RTA_PUT(skb, RTA_IIF, 4, &mrt->vif6_table[c->mf6c_parent].dev->ifindex);
2027 
2028 	mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
2029 
2030 	for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
2031 		if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
2032 			if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
2033 				goto rtattr_failure;
2034 			nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
2035 			nhp->rtnh_flags = 0;
2036 			nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
2037 			nhp->rtnh_ifindex = mrt->vif6_table[ct].dev->ifindex;
2038 			nhp->rtnh_len = sizeof(*nhp);
2039 		}
2040 	}
2041 	mp_head->rta_type = RTA_MULTIPATH;
2042 	mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
2043 	rtm->rtm_type = RTN_MULTICAST;
2044 	return 1;
2045 
2046 rtattr_failure:
2047 	nlmsg_trim(skb, b);
2048 	return -EMSGSIZE;
2049 }
2050 
2051 int ip6mr_get_route(struct net *net,
2052 		    struct sk_buff *skb, struct rtmsg *rtm, int nowait)
2053 {
2054 	int err;
2055 	struct mr6_table *mrt;
2056 	struct mfc6_cache *cache;
2057 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
2058 
2059 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
2060 	if (mrt == NULL)
2061 		return -ENOENT;
2062 
2063 	read_lock(&mrt_lock);
2064 	cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
2065 
2066 	if (!cache) {
2067 		struct sk_buff *skb2;
2068 		struct ipv6hdr *iph;
2069 		struct net_device *dev;
2070 		int vif;
2071 
2072 		if (nowait) {
2073 			read_unlock(&mrt_lock);
2074 			return -EAGAIN;
2075 		}
2076 
2077 		dev = skb->dev;
2078 		if (dev == NULL || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2079 			read_unlock(&mrt_lock);
2080 			return -ENODEV;
2081 		}
2082 
2083 		/* really correct? */
2084 		skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
2085 		if (!skb2) {
2086 			read_unlock(&mrt_lock);
2087 			return -ENOMEM;
2088 		}
2089 
2090 		skb_reset_transport_header(skb2);
2091 
2092 		skb_put(skb2, sizeof(struct ipv6hdr));
2093 		skb_reset_network_header(skb2);
2094 
2095 		iph = ipv6_hdr(skb2);
2096 		iph->version = 0;
2097 		iph->priority = 0;
2098 		iph->flow_lbl[0] = 0;
2099 		iph->flow_lbl[1] = 0;
2100 		iph->flow_lbl[2] = 0;
2101 		iph->payload_len = 0;
2102 		iph->nexthdr = IPPROTO_NONE;
2103 		iph->hop_limit = 0;
2104 		ipv6_addr_copy(&iph->saddr, &rt->rt6i_src.addr);
2105 		ipv6_addr_copy(&iph->daddr, &rt->rt6i_dst.addr);
2106 
2107 		err = ip6mr_cache_unresolved(mrt, vif, skb2);
2108 		read_unlock(&mrt_lock);
2109 
2110 		return err;
2111 	}
2112 
2113 	if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
2114 		cache->mfc_flags |= MFC_NOTIFY;
2115 
2116 	err = __ip6mr_fill_mroute(mrt, skb, cache, rtm);
2117 	read_unlock(&mrt_lock);
2118 	return err;
2119 }
2120 
2121 static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2122 			     u32 pid, u32 seq, struct mfc6_cache *c)
2123 {
2124 	struct nlmsghdr *nlh;
2125 	struct rtmsg *rtm;
2126 
2127 	nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI);
2128 	if (nlh == NULL)
2129 		return -EMSGSIZE;
2130 
2131 	rtm = nlmsg_data(nlh);
2132 	rtm->rtm_family   = RTNL_FAMILY_IPMR;
2133 	rtm->rtm_dst_len  = 128;
2134 	rtm->rtm_src_len  = 128;
2135 	rtm->rtm_tos      = 0;
2136 	rtm->rtm_table    = mrt->id;
2137 	NLA_PUT_U32(skb, RTA_TABLE, mrt->id);
2138 	rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
2139 	rtm->rtm_protocol = RTPROT_UNSPEC;
2140 	rtm->rtm_flags    = 0;
2141 
2142 	NLA_PUT(skb, RTA_SRC, 16, &c->mf6c_origin);
2143 	NLA_PUT(skb, RTA_DST, 16, &c->mf6c_mcastgrp);
2144 
2145 	if (__ip6mr_fill_mroute(mrt, skb, c, rtm) < 0)
2146 		goto nla_put_failure;
2147 
2148 	return nlmsg_end(skb, nlh);
2149 
2150 nla_put_failure:
2151 	nlmsg_cancel(skb, nlh);
2152 	return -EMSGSIZE;
2153 }
2154 
2155 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2156 {
2157 	struct net *net = sock_net(skb->sk);
2158 	struct mr6_table *mrt;
2159 	struct mfc6_cache *mfc;
2160 	unsigned int t = 0, s_t;
2161 	unsigned int h = 0, s_h;
2162 	unsigned int e = 0, s_e;
2163 
2164 	s_t = cb->args[0];
2165 	s_h = cb->args[1];
2166 	s_e = cb->args[2];
2167 
2168 	read_lock(&mrt_lock);
2169 	ip6mr_for_each_table(mrt, net) {
2170 		if (t < s_t)
2171 			goto next_table;
2172 		if (t > s_t)
2173 			s_h = 0;
2174 		for (h = s_h; h < MFC6_LINES; h++) {
2175 			list_for_each_entry(mfc, &mrt->mfc6_cache_array[h], list) {
2176 				if (e < s_e)
2177 					goto next_entry;
2178 				if (ip6mr_fill_mroute(mrt, skb,
2179 						      NETLINK_CB(cb->skb).pid,
2180 						      cb->nlh->nlmsg_seq,
2181 						      mfc) < 0)
2182 					goto done;
2183 next_entry:
2184 				e++;
2185 			}
2186 			e = s_e = 0;
2187 		}
2188 		s_h = 0;
2189 next_table:
2190 		t++;
2191 	}
2192 done:
2193 	read_unlock(&mrt_lock);
2194 
2195 	cb->args[2] = e;
2196 	cb->args[1] = h;
2197 	cb->args[0] = t;
2198 
2199 	return skb->len;
2200 }
2201