xref: /openbmc/linux/net/ipv6/ip6mr.c (revision 6a108a14)
1 /*
2  *	Linux IPv6 multicast routing support for BSD pim6sd
3  *	Based on net/ipv4/ipmr.c.
4  *
5  *	(c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
6  *		LSIIT Laboratory, Strasbourg, France
7  *	(c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
8  *		6WIND, Paris, France
9  *	Copyright (C)2007,2008 USAGI/WIDE Project
10  *		YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
11  *
12  *	This program is free software; you can redistribute it and/or
13  *	modify it under the terms of the GNU General Public License
14  *	as published by the Free Software Foundation; either version
15  *	2 of the License, or (at your option) any later version.
16  *
17  */
18 
19 #include <asm/system.h>
20 #include <asm/uaccess.h>
21 #include <linux/types.h>
22 #include <linux/sched.h>
23 #include <linux/errno.h>
24 #include <linux/timer.h>
25 #include <linux/mm.h>
26 #include <linux/kernel.h>
27 #include <linux/fcntl.h>
28 #include <linux/stat.h>
29 #include <linux/socket.h>
30 #include <linux/inet.h>
31 #include <linux/netdevice.h>
32 #include <linux/inetdevice.h>
33 #include <linux/proc_fs.h>
34 #include <linux/seq_file.h>
35 #include <linux/init.h>
36 #include <linux/slab.h>
37 #include <net/protocol.h>
38 #include <linux/skbuff.h>
39 #include <net/sock.h>
40 #include <net/raw.h>
41 #include <linux/notifier.h>
42 #include <linux/if_arp.h>
43 #include <net/checksum.h>
44 #include <net/netlink.h>
45 #include <net/fib_rules.h>
46 
47 #include <net/ipv6.h>
48 #include <net/ip6_route.h>
49 #include <linux/mroute6.h>
50 #include <linux/pim.h>
51 #include <net/addrconf.h>
52 #include <linux/netfilter_ipv6.h>
53 #include <net/ip6_checksum.h>
54 
55 struct mr6_table {
56 	struct list_head	list;
57 #ifdef CONFIG_NET_NS
58 	struct net		*net;
59 #endif
60 	u32			id;
61 	struct sock		*mroute6_sk;
62 	struct timer_list	ipmr_expire_timer;
63 	struct list_head	mfc6_unres_queue;
64 	struct list_head	mfc6_cache_array[MFC6_LINES];
65 	struct mif_device	vif6_table[MAXMIFS];
66 	int			maxvif;
67 	atomic_t		cache_resolve_queue_len;
68 	int			mroute_do_assert;
69 	int			mroute_do_pim;
70 #ifdef CONFIG_IPV6_PIMSM_V2
71 	int			mroute_reg_vif_num;
72 #endif
73 };
74 
75 struct ip6mr_rule {
76 	struct fib_rule		common;
77 };
78 
79 struct ip6mr_result {
80 	struct mr6_table	*mrt;
81 };
82 
83 /* Big lock, protecting vif table, mrt cache and mroute socket state.
84    Note that the changes are semaphored via rtnl_lock.
85  */
86 
87 static DEFINE_RWLOCK(mrt_lock);
88 
89 /*
90  *	Multicast router control variables
91  */
92 
93 #define MIF_EXISTS(_mrt, _idx) ((_mrt)->vif6_table[_idx].dev != NULL)
94 
95 /* Special spinlock for queue of unresolved entries */
96 static DEFINE_SPINLOCK(mfc_unres_lock);
97 
98 /* We return to original Alan's scheme. Hash table of resolved
99    entries is changed only in process context and protected
100    with weak lock mrt_lock. Queue of unresolved entries is protected
101    with strong spinlock mfc_unres_lock.
102 
103    In this case data path is free of exclusive locks at all.
104  */
105 
106 static struct kmem_cache *mrt_cachep __read_mostly;
107 
108 static struct mr6_table *ip6mr_new_table(struct net *net, u32 id);
109 static void ip6mr_free_table(struct mr6_table *mrt);
110 
111 static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
112 			  struct sk_buff *skb, struct mfc6_cache *cache);
113 static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
114 			      mifi_t mifi, int assert);
115 static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
116 			       struct mfc6_cache *c, struct rtmsg *rtm);
117 static int ip6mr_rtm_dumproute(struct sk_buff *skb,
118 			       struct netlink_callback *cb);
119 static void mroute_clean_tables(struct mr6_table *mrt);
120 static void ipmr_expire_process(unsigned long arg);
121 
122 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
123 #define ip6mr_for_each_table(mrt, net) \
124 	list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
125 
126 static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
127 {
128 	struct mr6_table *mrt;
129 
130 	ip6mr_for_each_table(mrt, net) {
131 		if (mrt->id == id)
132 			return mrt;
133 	}
134 	return NULL;
135 }
136 
137 static int ip6mr_fib_lookup(struct net *net, struct flowi *flp,
138 			    struct mr6_table **mrt)
139 {
140 	struct ip6mr_result res;
141 	struct fib_lookup_arg arg = { .result = &res, };
142 	int err;
143 
144 	err = fib_rules_lookup(net->ipv6.mr6_rules_ops, flp, 0, &arg);
145 	if (err < 0)
146 		return err;
147 	*mrt = res.mrt;
148 	return 0;
149 }
150 
151 static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
152 			     int flags, struct fib_lookup_arg *arg)
153 {
154 	struct ip6mr_result *res = arg->result;
155 	struct mr6_table *mrt;
156 
157 	switch (rule->action) {
158 	case FR_ACT_TO_TBL:
159 		break;
160 	case FR_ACT_UNREACHABLE:
161 		return -ENETUNREACH;
162 	case FR_ACT_PROHIBIT:
163 		return -EACCES;
164 	case FR_ACT_BLACKHOLE:
165 	default:
166 		return -EINVAL;
167 	}
168 
169 	mrt = ip6mr_get_table(rule->fr_net, rule->table);
170 	if (mrt == NULL)
171 		return -EAGAIN;
172 	res->mrt = mrt;
173 	return 0;
174 }
175 
176 static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
177 {
178 	return 1;
179 }
180 
181 static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = {
182 	FRA_GENERIC_POLICY,
183 };
184 
185 static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
186 				struct fib_rule_hdr *frh, struct nlattr **tb)
187 {
188 	return 0;
189 }
190 
191 static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
192 			      struct nlattr **tb)
193 {
194 	return 1;
195 }
196 
197 static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
198 			   struct fib_rule_hdr *frh)
199 {
200 	frh->dst_len = 0;
201 	frh->src_len = 0;
202 	frh->tos     = 0;
203 	return 0;
204 }
205 
206 static const struct fib_rules_ops __net_initdata ip6mr_rules_ops_template = {
207 	.family		= RTNL_FAMILY_IP6MR,
208 	.rule_size	= sizeof(struct ip6mr_rule),
209 	.addr_size	= sizeof(struct in6_addr),
210 	.action		= ip6mr_rule_action,
211 	.match		= ip6mr_rule_match,
212 	.configure	= ip6mr_rule_configure,
213 	.compare	= ip6mr_rule_compare,
214 	.default_pref	= fib_default_rule_pref,
215 	.fill		= ip6mr_rule_fill,
216 	.nlgroup	= RTNLGRP_IPV6_RULE,
217 	.policy		= ip6mr_rule_policy,
218 	.owner		= THIS_MODULE,
219 };
220 
221 static int __net_init ip6mr_rules_init(struct net *net)
222 {
223 	struct fib_rules_ops *ops;
224 	struct mr6_table *mrt;
225 	int err;
226 
227 	ops = fib_rules_register(&ip6mr_rules_ops_template, net);
228 	if (IS_ERR(ops))
229 		return PTR_ERR(ops);
230 
231 	INIT_LIST_HEAD(&net->ipv6.mr6_tables);
232 
233 	mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
234 	if (mrt == NULL) {
235 		err = -ENOMEM;
236 		goto err1;
237 	}
238 
239 	err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
240 	if (err < 0)
241 		goto err2;
242 
243 	net->ipv6.mr6_rules_ops = ops;
244 	return 0;
245 
246 err2:
247 	kfree(mrt);
248 err1:
249 	fib_rules_unregister(ops);
250 	return err;
251 }
252 
253 static void __net_exit ip6mr_rules_exit(struct net *net)
254 {
255 	struct mr6_table *mrt, *next;
256 
257 	list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
258 		list_del(&mrt->list);
259 		ip6mr_free_table(mrt);
260 	}
261 	fib_rules_unregister(net->ipv6.mr6_rules_ops);
262 }
263 #else
264 #define ip6mr_for_each_table(mrt, net) \
265 	for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
266 
267 static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
268 {
269 	return net->ipv6.mrt6;
270 }
271 
272 static int ip6mr_fib_lookup(struct net *net, struct flowi *flp,
273 			    struct mr6_table **mrt)
274 {
275 	*mrt = net->ipv6.mrt6;
276 	return 0;
277 }
278 
279 static int __net_init ip6mr_rules_init(struct net *net)
280 {
281 	net->ipv6.mrt6 = ip6mr_new_table(net, RT6_TABLE_DFLT);
282 	return net->ipv6.mrt6 ? 0 : -ENOMEM;
283 }
284 
285 static void __net_exit ip6mr_rules_exit(struct net *net)
286 {
287 	ip6mr_free_table(net->ipv6.mrt6);
288 }
289 #endif
290 
291 static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
292 {
293 	struct mr6_table *mrt;
294 	unsigned int i;
295 
296 	mrt = ip6mr_get_table(net, id);
297 	if (mrt != NULL)
298 		return mrt;
299 
300 	mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
301 	if (mrt == NULL)
302 		return NULL;
303 	mrt->id = id;
304 	write_pnet(&mrt->net, net);
305 
306 	/* Forwarding cache */
307 	for (i = 0; i < MFC6_LINES; i++)
308 		INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]);
309 
310 	INIT_LIST_HEAD(&mrt->mfc6_unres_queue);
311 
312 	setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
313 		    (unsigned long)mrt);
314 
315 #ifdef CONFIG_IPV6_PIMSM_V2
316 	mrt->mroute_reg_vif_num = -1;
317 #endif
318 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
319 	list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
320 #endif
321 	return mrt;
322 }
323 
324 static void ip6mr_free_table(struct mr6_table *mrt)
325 {
326 	del_timer(&mrt->ipmr_expire_timer);
327 	mroute_clean_tables(mrt);
328 	kfree(mrt);
329 }
330 
331 #ifdef CONFIG_PROC_FS
332 
333 struct ipmr_mfc_iter {
334 	struct seq_net_private p;
335 	struct mr6_table *mrt;
336 	struct list_head *cache;
337 	int ct;
338 };
339 
340 
341 static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
342 					   struct ipmr_mfc_iter *it, loff_t pos)
343 {
344 	struct mr6_table *mrt = it->mrt;
345 	struct mfc6_cache *mfc;
346 
347 	read_lock(&mrt_lock);
348 	for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) {
349 		it->cache = &mrt->mfc6_cache_array[it->ct];
350 		list_for_each_entry(mfc, it->cache, list)
351 			if (pos-- == 0)
352 				return mfc;
353 	}
354 	read_unlock(&mrt_lock);
355 
356 	spin_lock_bh(&mfc_unres_lock);
357 	it->cache = &mrt->mfc6_unres_queue;
358 	list_for_each_entry(mfc, it->cache, list)
359 		if (pos-- == 0)
360 			return mfc;
361 	spin_unlock_bh(&mfc_unres_lock);
362 
363 	it->cache = NULL;
364 	return NULL;
365 }
366 
367 /*
368  *	The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
369  */
370 
371 struct ipmr_vif_iter {
372 	struct seq_net_private p;
373 	struct mr6_table *mrt;
374 	int ct;
375 };
376 
377 static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
378 					    struct ipmr_vif_iter *iter,
379 					    loff_t pos)
380 {
381 	struct mr6_table *mrt = iter->mrt;
382 
383 	for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
384 		if (!MIF_EXISTS(mrt, iter->ct))
385 			continue;
386 		if (pos-- == 0)
387 			return &mrt->vif6_table[iter->ct];
388 	}
389 	return NULL;
390 }
391 
392 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
393 	__acquires(mrt_lock)
394 {
395 	struct ipmr_vif_iter *iter = seq->private;
396 	struct net *net = seq_file_net(seq);
397 	struct mr6_table *mrt;
398 
399 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
400 	if (mrt == NULL)
401 		return ERR_PTR(-ENOENT);
402 
403 	iter->mrt = mrt;
404 
405 	read_lock(&mrt_lock);
406 	return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1)
407 		: SEQ_START_TOKEN;
408 }
409 
410 static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
411 {
412 	struct ipmr_vif_iter *iter = seq->private;
413 	struct net *net = seq_file_net(seq);
414 	struct mr6_table *mrt = iter->mrt;
415 
416 	++*pos;
417 	if (v == SEQ_START_TOKEN)
418 		return ip6mr_vif_seq_idx(net, iter, 0);
419 
420 	while (++iter->ct < mrt->maxvif) {
421 		if (!MIF_EXISTS(mrt, iter->ct))
422 			continue;
423 		return &mrt->vif6_table[iter->ct];
424 	}
425 	return NULL;
426 }
427 
428 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
429 	__releases(mrt_lock)
430 {
431 	read_unlock(&mrt_lock);
432 }
433 
434 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
435 {
436 	struct ipmr_vif_iter *iter = seq->private;
437 	struct mr6_table *mrt = iter->mrt;
438 
439 	if (v == SEQ_START_TOKEN) {
440 		seq_puts(seq,
441 			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
442 	} else {
443 		const struct mif_device *vif = v;
444 		const char *name = vif->dev ? vif->dev->name : "none";
445 
446 		seq_printf(seq,
447 			   "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
448 			   vif - mrt->vif6_table,
449 			   name, vif->bytes_in, vif->pkt_in,
450 			   vif->bytes_out, vif->pkt_out,
451 			   vif->flags);
452 	}
453 	return 0;
454 }
455 
456 static const struct seq_operations ip6mr_vif_seq_ops = {
457 	.start = ip6mr_vif_seq_start,
458 	.next  = ip6mr_vif_seq_next,
459 	.stop  = ip6mr_vif_seq_stop,
460 	.show  = ip6mr_vif_seq_show,
461 };
462 
463 static int ip6mr_vif_open(struct inode *inode, struct file *file)
464 {
465 	return seq_open_net(inode, file, &ip6mr_vif_seq_ops,
466 			    sizeof(struct ipmr_vif_iter));
467 }
468 
469 static const struct file_operations ip6mr_vif_fops = {
470 	.owner	 = THIS_MODULE,
471 	.open    = ip6mr_vif_open,
472 	.read    = seq_read,
473 	.llseek  = seq_lseek,
474 	.release = seq_release_net,
475 };
476 
477 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
478 {
479 	struct ipmr_mfc_iter *it = seq->private;
480 	struct net *net = seq_file_net(seq);
481 	struct mr6_table *mrt;
482 
483 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
484 	if (mrt == NULL)
485 		return ERR_PTR(-ENOENT);
486 
487 	it->mrt = mrt;
488 	return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
489 		: SEQ_START_TOKEN;
490 }
491 
492 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
493 {
494 	struct mfc6_cache *mfc = v;
495 	struct ipmr_mfc_iter *it = seq->private;
496 	struct net *net = seq_file_net(seq);
497 	struct mr6_table *mrt = it->mrt;
498 
499 	++*pos;
500 
501 	if (v == SEQ_START_TOKEN)
502 		return ipmr_mfc_seq_idx(net, seq->private, 0);
503 
504 	if (mfc->list.next != it->cache)
505 		return list_entry(mfc->list.next, struct mfc6_cache, list);
506 
507 	if (it->cache == &mrt->mfc6_unres_queue)
508 		goto end_of_list;
509 
510 	BUG_ON(it->cache != &mrt->mfc6_cache_array[it->ct]);
511 
512 	while (++it->ct < MFC6_LINES) {
513 		it->cache = &mrt->mfc6_cache_array[it->ct];
514 		if (list_empty(it->cache))
515 			continue;
516 		return list_first_entry(it->cache, struct mfc6_cache, list);
517 	}
518 
519 	/* exhausted cache_array, show unresolved */
520 	read_unlock(&mrt_lock);
521 	it->cache = &mrt->mfc6_unres_queue;
522 	it->ct = 0;
523 
524 	spin_lock_bh(&mfc_unres_lock);
525 	if (!list_empty(it->cache))
526 		return list_first_entry(it->cache, struct mfc6_cache, list);
527 
528  end_of_list:
529 	spin_unlock_bh(&mfc_unres_lock);
530 	it->cache = NULL;
531 
532 	return NULL;
533 }
534 
535 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
536 {
537 	struct ipmr_mfc_iter *it = seq->private;
538 	struct mr6_table *mrt = it->mrt;
539 
540 	if (it->cache == &mrt->mfc6_unres_queue)
541 		spin_unlock_bh(&mfc_unres_lock);
542 	else if (it->cache == mrt->mfc6_cache_array)
543 		read_unlock(&mrt_lock);
544 }
545 
546 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
547 {
548 	int n;
549 
550 	if (v == SEQ_START_TOKEN) {
551 		seq_puts(seq,
552 			 "Group                            "
553 			 "Origin                           "
554 			 "Iif      Pkts  Bytes     Wrong  Oifs\n");
555 	} else {
556 		const struct mfc6_cache *mfc = v;
557 		const struct ipmr_mfc_iter *it = seq->private;
558 		struct mr6_table *mrt = it->mrt;
559 
560 		seq_printf(seq, "%pI6 %pI6 %-3hd",
561 			   &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
562 			   mfc->mf6c_parent);
563 
564 		if (it->cache != &mrt->mfc6_unres_queue) {
565 			seq_printf(seq, " %8lu %8lu %8lu",
566 				   mfc->mfc_un.res.pkt,
567 				   mfc->mfc_un.res.bytes,
568 				   mfc->mfc_un.res.wrong_if);
569 			for (n = mfc->mfc_un.res.minvif;
570 			     n < mfc->mfc_un.res.maxvif; n++) {
571 				if (MIF_EXISTS(mrt, n) &&
572 				    mfc->mfc_un.res.ttls[n] < 255)
573 					seq_printf(seq,
574 						   " %2d:%-3d",
575 						   n, mfc->mfc_un.res.ttls[n]);
576 			}
577 		} else {
578 			/* unresolved mfc_caches don't contain
579 			 * pkt, bytes and wrong_if values
580 			 */
581 			seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
582 		}
583 		seq_putc(seq, '\n');
584 	}
585 	return 0;
586 }
587 
588 static const struct seq_operations ipmr_mfc_seq_ops = {
589 	.start = ipmr_mfc_seq_start,
590 	.next  = ipmr_mfc_seq_next,
591 	.stop  = ipmr_mfc_seq_stop,
592 	.show  = ipmr_mfc_seq_show,
593 };
594 
595 static int ipmr_mfc_open(struct inode *inode, struct file *file)
596 {
597 	return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
598 			    sizeof(struct ipmr_mfc_iter));
599 }
600 
601 static const struct file_operations ip6mr_mfc_fops = {
602 	.owner	 = THIS_MODULE,
603 	.open    = ipmr_mfc_open,
604 	.read    = seq_read,
605 	.llseek  = seq_lseek,
606 	.release = seq_release_net,
607 };
608 #endif
609 
610 #ifdef CONFIG_IPV6_PIMSM_V2
611 
612 static int pim6_rcv(struct sk_buff *skb)
613 {
614 	struct pimreghdr *pim;
615 	struct ipv6hdr   *encap;
616 	struct net_device  *reg_dev = NULL;
617 	struct net *net = dev_net(skb->dev);
618 	struct mr6_table *mrt;
619 	struct flowi fl = {
620 		.iif	= skb->dev->ifindex,
621 		.mark	= skb->mark,
622 	};
623 	int reg_vif_num;
624 
625 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
626 		goto drop;
627 
628 	pim = (struct pimreghdr *)skb_transport_header(skb);
629 	if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
630 	    (pim->flags & PIM_NULL_REGISTER) ||
631 	    (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
632 			     sizeof(*pim), IPPROTO_PIM,
633 			     csum_partial((void *)pim, sizeof(*pim), 0)) &&
634 	     csum_fold(skb_checksum(skb, 0, skb->len, 0))))
635 		goto drop;
636 
637 	/* check if the inner packet is destined to mcast group */
638 	encap = (struct ipv6hdr *)(skb_transport_header(skb) +
639 				   sizeof(*pim));
640 
641 	if (!ipv6_addr_is_multicast(&encap->daddr) ||
642 	    encap->payload_len == 0 ||
643 	    ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
644 		goto drop;
645 
646 	if (ip6mr_fib_lookup(net, &fl, &mrt) < 0)
647 		goto drop;
648 	reg_vif_num = mrt->mroute_reg_vif_num;
649 
650 	read_lock(&mrt_lock);
651 	if (reg_vif_num >= 0)
652 		reg_dev = mrt->vif6_table[reg_vif_num].dev;
653 	if (reg_dev)
654 		dev_hold(reg_dev);
655 	read_unlock(&mrt_lock);
656 
657 	if (reg_dev == NULL)
658 		goto drop;
659 
660 	skb->mac_header = skb->network_header;
661 	skb_pull(skb, (u8 *)encap - skb->data);
662 	skb_reset_network_header(skb);
663 	skb->protocol = htons(ETH_P_IPV6);
664 	skb->ip_summed = 0;
665 	skb->pkt_type = PACKET_HOST;
666 
667 	skb_tunnel_rx(skb, reg_dev);
668 
669 	netif_rx(skb);
670 
671 	dev_put(reg_dev);
672 	return 0;
673  drop:
674 	kfree_skb(skb);
675 	return 0;
676 }
677 
678 static const struct inet6_protocol pim6_protocol = {
679 	.handler	=	pim6_rcv,
680 };
681 
682 /* Service routines creating virtual interfaces: PIMREG */
683 
684 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
685 				      struct net_device *dev)
686 {
687 	struct net *net = dev_net(dev);
688 	struct mr6_table *mrt;
689 	struct flowi fl = {
690 		.oif		= dev->ifindex,
691 		.iif		= skb->skb_iif,
692 		.mark		= skb->mark,
693 	};
694 	int err;
695 
696 	err = ip6mr_fib_lookup(net, &fl, &mrt);
697 	if (err < 0)
698 		return err;
699 
700 	read_lock(&mrt_lock);
701 	dev->stats.tx_bytes += skb->len;
702 	dev->stats.tx_packets++;
703 	ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
704 	read_unlock(&mrt_lock);
705 	kfree_skb(skb);
706 	return NETDEV_TX_OK;
707 }
708 
709 static const struct net_device_ops reg_vif_netdev_ops = {
710 	.ndo_start_xmit	= reg_vif_xmit,
711 };
712 
713 static void reg_vif_setup(struct net_device *dev)
714 {
715 	dev->type		= ARPHRD_PIMREG;
716 	dev->mtu		= 1500 - sizeof(struct ipv6hdr) - 8;
717 	dev->flags		= IFF_NOARP;
718 	dev->netdev_ops		= &reg_vif_netdev_ops;
719 	dev->destructor		= free_netdev;
720 	dev->features		|= NETIF_F_NETNS_LOCAL;
721 }
722 
723 static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt)
724 {
725 	struct net_device *dev;
726 	char name[IFNAMSIZ];
727 
728 	if (mrt->id == RT6_TABLE_DFLT)
729 		sprintf(name, "pim6reg");
730 	else
731 		sprintf(name, "pim6reg%u", mrt->id);
732 
733 	dev = alloc_netdev(0, name, reg_vif_setup);
734 	if (dev == NULL)
735 		return NULL;
736 
737 	dev_net_set(dev, net);
738 
739 	if (register_netdevice(dev)) {
740 		free_netdev(dev);
741 		return NULL;
742 	}
743 	dev->iflink = 0;
744 
745 	if (dev_open(dev))
746 		goto failure;
747 
748 	dev_hold(dev);
749 	return dev;
750 
751 failure:
752 	/* allow the register to be completed before unregistering. */
753 	rtnl_unlock();
754 	rtnl_lock();
755 
756 	unregister_netdevice(dev);
757 	return NULL;
758 }
759 #endif
760 
761 /*
762  *	Delete a VIF entry
763  */
764 
765 static int mif6_delete(struct mr6_table *mrt, int vifi, struct list_head *head)
766 {
767 	struct mif_device *v;
768 	struct net_device *dev;
769 	struct inet6_dev *in6_dev;
770 
771 	if (vifi < 0 || vifi >= mrt->maxvif)
772 		return -EADDRNOTAVAIL;
773 
774 	v = &mrt->vif6_table[vifi];
775 
776 	write_lock_bh(&mrt_lock);
777 	dev = v->dev;
778 	v->dev = NULL;
779 
780 	if (!dev) {
781 		write_unlock_bh(&mrt_lock);
782 		return -EADDRNOTAVAIL;
783 	}
784 
785 #ifdef CONFIG_IPV6_PIMSM_V2
786 	if (vifi == mrt->mroute_reg_vif_num)
787 		mrt->mroute_reg_vif_num = -1;
788 #endif
789 
790 	if (vifi + 1 == mrt->maxvif) {
791 		int tmp;
792 		for (tmp = vifi - 1; tmp >= 0; tmp--) {
793 			if (MIF_EXISTS(mrt, tmp))
794 				break;
795 		}
796 		mrt->maxvif = tmp + 1;
797 	}
798 
799 	write_unlock_bh(&mrt_lock);
800 
801 	dev_set_allmulti(dev, -1);
802 
803 	in6_dev = __in6_dev_get(dev);
804 	if (in6_dev)
805 		in6_dev->cnf.mc_forwarding--;
806 
807 	if (v->flags & MIFF_REGISTER)
808 		unregister_netdevice_queue(dev, head);
809 
810 	dev_put(dev);
811 	return 0;
812 }
813 
814 static inline void ip6mr_cache_free(struct mfc6_cache *c)
815 {
816 	kmem_cache_free(mrt_cachep, c);
817 }
818 
819 /* Destroy an unresolved cache entry, killing queued skbs
820    and reporting error to netlink readers.
821  */
822 
823 static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
824 {
825 	struct net *net = read_pnet(&mrt->net);
826 	struct sk_buff *skb;
827 
828 	atomic_dec(&mrt->cache_resolve_queue_len);
829 
830 	while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
831 		if (ipv6_hdr(skb)->version == 0) {
832 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
833 			nlh->nlmsg_type = NLMSG_ERROR;
834 			nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
835 			skb_trim(skb, nlh->nlmsg_len);
836 			((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
837 			rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
838 		} else
839 			kfree_skb(skb);
840 	}
841 
842 	ip6mr_cache_free(c);
843 }
844 
845 
846 /* Timer process for all the unresolved queue. */
847 
848 static void ipmr_do_expire_process(struct mr6_table *mrt)
849 {
850 	unsigned long now = jiffies;
851 	unsigned long expires = 10 * HZ;
852 	struct mfc6_cache *c, *next;
853 
854 	list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
855 		if (time_after(c->mfc_un.unres.expires, now)) {
856 			/* not yet... */
857 			unsigned long interval = c->mfc_un.unres.expires - now;
858 			if (interval < expires)
859 				expires = interval;
860 			continue;
861 		}
862 
863 		list_del(&c->list);
864 		ip6mr_destroy_unres(mrt, c);
865 	}
866 
867 	if (!list_empty(&mrt->mfc6_unres_queue))
868 		mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
869 }
870 
871 static void ipmr_expire_process(unsigned long arg)
872 {
873 	struct mr6_table *mrt = (struct mr6_table *)arg;
874 
875 	if (!spin_trylock(&mfc_unres_lock)) {
876 		mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
877 		return;
878 	}
879 
880 	if (!list_empty(&mrt->mfc6_unres_queue))
881 		ipmr_do_expire_process(mrt);
882 
883 	spin_unlock(&mfc_unres_lock);
884 }
885 
886 /* Fill oifs list. It is called under write locked mrt_lock. */
887 
888 static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *cache,
889 				    unsigned char *ttls)
890 {
891 	int vifi;
892 
893 	cache->mfc_un.res.minvif = MAXMIFS;
894 	cache->mfc_un.res.maxvif = 0;
895 	memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
896 
897 	for (vifi = 0; vifi < mrt->maxvif; vifi++) {
898 		if (MIF_EXISTS(mrt, vifi) &&
899 		    ttls[vifi] && ttls[vifi] < 255) {
900 			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
901 			if (cache->mfc_un.res.minvif > vifi)
902 				cache->mfc_un.res.minvif = vifi;
903 			if (cache->mfc_un.res.maxvif <= vifi)
904 				cache->mfc_un.res.maxvif = vifi + 1;
905 		}
906 	}
907 }
908 
909 static int mif6_add(struct net *net, struct mr6_table *mrt,
910 		    struct mif6ctl *vifc, int mrtsock)
911 {
912 	int vifi = vifc->mif6c_mifi;
913 	struct mif_device *v = &mrt->vif6_table[vifi];
914 	struct net_device *dev;
915 	struct inet6_dev *in6_dev;
916 	int err;
917 
918 	/* Is vif busy ? */
919 	if (MIF_EXISTS(mrt, vifi))
920 		return -EADDRINUSE;
921 
922 	switch (vifc->mif6c_flags) {
923 #ifdef CONFIG_IPV6_PIMSM_V2
924 	case MIFF_REGISTER:
925 		/*
926 		 * Special Purpose VIF in PIM
927 		 * All the packets will be sent to the daemon
928 		 */
929 		if (mrt->mroute_reg_vif_num >= 0)
930 			return -EADDRINUSE;
931 		dev = ip6mr_reg_vif(net, mrt);
932 		if (!dev)
933 			return -ENOBUFS;
934 		err = dev_set_allmulti(dev, 1);
935 		if (err) {
936 			unregister_netdevice(dev);
937 			dev_put(dev);
938 			return err;
939 		}
940 		break;
941 #endif
942 	case 0:
943 		dev = dev_get_by_index(net, vifc->mif6c_pifi);
944 		if (!dev)
945 			return -EADDRNOTAVAIL;
946 		err = dev_set_allmulti(dev, 1);
947 		if (err) {
948 			dev_put(dev);
949 			return err;
950 		}
951 		break;
952 	default:
953 		return -EINVAL;
954 	}
955 
956 	in6_dev = __in6_dev_get(dev);
957 	if (in6_dev)
958 		in6_dev->cnf.mc_forwarding++;
959 
960 	/*
961 	 *	Fill in the VIF structures
962 	 */
963 	v->rate_limit = vifc->vifc_rate_limit;
964 	v->flags = vifc->mif6c_flags;
965 	if (!mrtsock)
966 		v->flags |= VIFF_STATIC;
967 	v->threshold = vifc->vifc_threshold;
968 	v->bytes_in = 0;
969 	v->bytes_out = 0;
970 	v->pkt_in = 0;
971 	v->pkt_out = 0;
972 	v->link = dev->ifindex;
973 	if (v->flags & MIFF_REGISTER)
974 		v->link = dev->iflink;
975 
976 	/* And finish update writing critical data */
977 	write_lock_bh(&mrt_lock);
978 	v->dev = dev;
979 #ifdef CONFIG_IPV6_PIMSM_V2
980 	if (v->flags & MIFF_REGISTER)
981 		mrt->mroute_reg_vif_num = vifi;
982 #endif
983 	if (vifi + 1 > mrt->maxvif)
984 		mrt->maxvif = vifi + 1;
985 	write_unlock_bh(&mrt_lock);
986 	return 0;
987 }
988 
989 static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt,
990 					   struct in6_addr *origin,
991 					   struct in6_addr *mcastgrp)
992 {
993 	int line = MFC6_HASH(mcastgrp, origin);
994 	struct mfc6_cache *c;
995 
996 	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
997 		if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
998 		    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
999 			return c;
1000 	}
1001 	return NULL;
1002 }
1003 
1004 /*
1005  *	Allocate a multicast cache entry
1006  */
1007 static struct mfc6_cache *ip6mr_cache_alloc(void)
1008 {
1009 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
1010 	if (c == NULL)
1011 		return NULL;
1012 	c->mfc_un.res.minvif = MAXMIFS;
1013 	return c;
1014 }
1015 
1016 static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
1017 {
1018 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
1019 	if (c == NULL)
1020 		return NULL;
1021 	skb_queue_head_init(&c->mfc_un.unres.unresolved);
1022 	c->mfc_un.unres.expires = jiffies + 10 * HZ;
1023 	return c;
1024 }
1025 
1026 /*
1027  *	A cache entry has gone into a resolved state from queued
1028  */
1029 
1030 static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
1031 				struct mfc6_cache *uc, struct mfc6_cache *c)
1032 {
1033 	struct sk_buff *skb;
1034 
1035 	/*
1036 	 *	Play the pending entries through our router
1037 	 */
1038 
1039 	while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
1040 		if (ipv6_hdr(skb)->version == 0) {
1041 			int err;
1042 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
1043 
1044 			if (__ip6mr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
1045 				nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1046 			} else {
1047 				nlh->nlmsg_type = NLMSG_ERROR;
1048 				nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
1049 				skb_trim(skb, nlh->nlmsg_len);
1050 				((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE;
1051 			}
1052 			err = rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
1053 		} else
1054 			ip6_mr_forward(net, mrt, skb, c);
1055 	}
1056 }
1057 
1058 /*
1059  *	Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
1060  *	expects the following bizarre scheme.
1061  *
1062  *	Called under mrt_lock.
1063  */
1064 
1065 static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
1066 			      mifi_t mifi, int assert)
1067 {
1068 	struct sk_buff *skb;
1069 	struct mrt6msg *msg;
1070 	int ret;
1071 
1072 #ifdef CONFIG_IPV6_PIMSM_V2
1073 	if (assert == MRT6MSG_WHOLEPKT)
1074 		skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
1075 						+sizeof(*msg));
1076 	else
1077 #endif
1078 		skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
1079 
1080 	if (!skb)
1081 		return -ENOBUFS;
1082 
1083 	/* I suppose that internal messages
1084 	 * do not require checksums */
1085 
1086 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1087 
1088 #ifdef CONFIG_IPV6_PIMSM_V2
1089 	if (assert == MRT6MSG_WHOLEPKT) {
1090 		/* Ugly, but we have no choice with this interface.
1091 		   Duplicate old header, fix length etc.
1092 		   And all this only to mangle msg->im6_msgtype and
1093 		   to set msg->im6_mbz to "mbz" :-)
1094 		 */
1095 		skb_push(skb, -skb_network_offset(pkt));
1096 
1097 		skb_push(skb, sizeof(*msg));
1098 		skb_reset_transport_header(skb);
1099 		msg = (struct mrt6msg *)skb_transport_header(skb);
1100 		msg->im6_mbz = 0;
1101 		msg->im6_msgtype = MRT6MSG_WHOLEPKT;
1102 		msg->im6_mif = mrt->mroute_reg_vif_num;
1103 		msg->im6_pad = 0;
1104 		ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
1105 		ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
1106 
1107 		skb->ip_summed = CHECKSUM_UNNECESSARY;
1108 	} else
1109 #endif
1110 	{
1111 	/*
1112 	 *	Copy the IP header
1113 	 */
1114 
1115 	skb_put(skb, sizeof(struct ipv6hdr));
1116 	skb_reset_network_header(skb);
1117 	skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
1118 
1119 	/*
1120 	 *	Add our header
1121 	 */
1122 	skb_put(skb, sizeof(*msg));
1123 	skb_reset_transport_header(skb);
1124 	msg = (struct mrt6msg *)skb_transport_header(skb);
1125 
1126 	msg->im6_mbz = 0;
1127 	msg->im6_msgtype = assert;
1128 	msg->im6_mif = mifi;
1129 	msg->im6_pad = 0;
1130 	ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
1131 	ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
1132 
1133 	skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1134 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1135 	}
1136 
1137 	if (mrt->mroute6_sk == NULL) {
1138 		kfree_skb(skb);
1139 		return -EINVAL;
1140 	}
1141 
1142 	/*
1143 	 *	Deliver to user space multicast routing algorithms
1144 	 */
1145 	ret = sock_queue_rcv_skb(mrt->mroute6_sk, skb);
1146 	if (ret < 0) {
1147 		if (net_ratelimit())
1148 			printk(KERN_WARNING "mroute6: pending queue full, dropping entries.\n");
1149 		kfree_skb(skb);
1150 	}
1151 
1152 	return ret;
1153 }
1154 
1155 /*
1156  *	Queue a packet for resolution. It gets locked cache entry!
1157  */
1158 
1159 static int
1160 ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
1161 {
1162 	bool found = false;
1163 	int err;
1164 	struct mfc6_cache *c;
1165 
1166 	spin_lock_bh(&mfc_unres_lock);
1167 	list_for_each_entry(c, &mrt->mfc6_unres_queue, list) {
1168 		if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1169 		    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
1170 			found = true;
1171 			break;
1172 		}
1173 	}
1174 
1175 	if (!found) {
1176 		/*
1177 		 *	Create a new entry if allowable
1178 		 */
1179 
1180 		if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
1181 		    (c = ip6mr_cache_alloc_unres()) == NULL) {
1182 			spin_unlock_bh(&mfc_unres_lock);
1183 
1184 			kfree_skb(skb);
1185 			return -ENOBUFS;
1186 		}
1187 
1188 		/*
1189 		 *	Fill in the new cache entry
1190 		 */
1191 		c->mf6c_parent = -1;
1192 		c->mf6c_origin = ipv6_hdr(skb)->saddr;
1193 		c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
1194 
1195 		/*
1196 		 *	Reflect first query at pim6sd
1197 		 */
1198 		err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1199 		if (err < 0) {
1200 			/* If the report failed throw the cache entry
1201 			   out - Brad Parker
1202 			 */
1203 			spin_unlock_bh(&mfc_unres_lock);
1204 
1205 			ip6mr_cache_free(c);
1206 			kfree_skb(skb);
1207 			return err;
1208 		}
1209 
1210 		atomic_inc(&mrt->cache_resolve_queue_len);
1211 		list_add(&c->list, &mrt->mfc6_unres_queue);
1212 
1213 		ipmr_do_expire_process(mrt);
1214 	}
1215 
1216 	/*
1217 	 *	See if we can append the packet
1218 	 */
1219 	if (c->mfc_un.unres.unresolved.qlen > 3) {
1220 		kfree_skb(skb);
1221 		err = -ENOBUFS;
1222 	} else {
1223 		skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1224 		err = 0;
1225 	}
1226 
1227 	spin_unlock_bh(&mfc_unres_lock);
1228 	return err;
1229 }
1230 
1231 /*
1232  *	MFC6 cache manipulation by user space
1233  */
1234 
1235 static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc)
1236 {
1237 	int line;
1238 	struct mfc6_cache *c, *next;
1239 
1240 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1241 
1242 	list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) {
1243 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1244 		    ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
1245 			write_lock_bh(&mrt_lock);
1246 			list_del(&c->list);
1247 			write_unlock_bh(&mrt_lock);
1248 
1249 			ip6mr_cache_free(c);
1250 			return 0;
1251 		}
1252 	}
1253 	return -ENOENT;
1254 }
1255 
1256 static int ip6mr_device_event(struct notifier_block *this,
1257 			      unsigned long event, void *ptr)
1258 {
1259 	struct net_device *dev = ptr;
1260 	struct net *net = dev_net(dev);
1261 	struct mr6_table *mrt;
1262 	struct mif_device *v;
1263 	int ct;
1264 	LIST_HEAD(list);
1265 
1266 	if (event != NETDEV_UNREGISTER)
1267 		return NOTIFY_DONE;
1268 
1269 	ip6mr_for_each_table(mrt, net) {
1270 		v = &mrt->vif6_table[0];
1271 		for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1272 			if (v->dev == dev)
1273 				mif6_delete(mrt, ct, &list);
1274 		}
1275 	}
1276 	unregister_netdevice_many(&list);
1277 
1278 	return NOTIFY_DONE;
1279 }
1280 
1281 static struct notifier_block ip6_mr_notifier = {
1282 	.notifier_call = ip6mr_device_event
1283 };
1284 
1285 /*
1286  *	Setup for IP multicast routing
1287  */
1288 
1289 static int __net_init ip6mr_net_init(struct net *net)
1290 {
1291 	int err;
1292 
1293 	err = ip6mr_rules_init(net);
1294 	if (err < 0)
1295 		goto fail;
1296 
1297 #ifdef CONFIG_PROC_FS
1298 	err = -ENOMEM;
1299 	if (!proc_net_fops_create(net, "ip6_mr_vif", 0, &ip6mr_vif_fops))
1300 		goto proc_vif_fail;
1301 	if (!proc_net_fops_create(net, "ip6_mr_cache", 0, &ip6mr_mfc_fops))
1302 		goto proc_cache_fail;
1303 #endif
1304 
1305 	return 0;
1306 
1307 #ifdef CONFIG_PROC_FS
1308 proc_cache_fail:
1309 	proc_net_remove(net, "ip6_mr_vif");
1310 proc_vif_fail:
1311 	ip6mr_rules_exit(net);
1312 #endif
1313 fail:
1314 	return err;
1315 }
1316 
1317 static void __net_exit ip6mr_net_exit(struct net *net)
1318 {
1319 #ifdef CONFIG_PROC_FS
1320 	proc_net_remove(net, "ip6_mr_cache");
1321 	proc_net_remove(net, "ip6_mr_vif");
1322 #endif
1323 	ip6mr_rules_exit(net);
1324 }
1325 
1326 static struct pernet_operations ip6mr_net_ops = {
1327 	.init = ip6mr_net_init,
1328 	.exit = ip6mr_net_exit,
1329 };
1330 
1331 int __init ip6_mr_init(void)
1332 {
1333 	int err;
1334 
1335 	mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1336 				       sizeof(struct mfc6_cache),
1337 				       0, SLAB_HWCACHE_ALIGN,
1338 				       NULL);
1339 	if (!mrt_cachep)
1340 		return -ENOMEM;
1341 
1342 	err = register_pernet_subsys(&ip6mr_net_ops);
1343 	if (err)
1344 		goto reg_pernet_fail;
1345 
1346 	err = register_netdevice_notifier(&ip6_mr_notifier);
1347 	if (err)
1348 		goto reg_notif_fail;
1349 #ifdef CONFIG_IPV6_PIMSM_V2
1350 	if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1351 		printk(KERN_ERR "ip6_mr_init: can't add PIM protocol\n");
1352 		err = -EAGAIN;
1353 		goto add_proto_fail;
1354 	}
1355 #endif
1356 	rtnl_register(RTNL_FAMILY_IP6MR, RTM_GETROUTE, NULL, ip6mr_rtm_dumproute);
1357 	return 0;
1358 #ifdef CONFIG_IPV6_PIMSM_V2
1359 add_proto_fail:
1360 	unregister_netdevice_notifier(&ip6_mr_notifier);
1361 #endif
1362 reg_notif_fail:
1363 	unregister_pernet_subsys(&ip6mr_net_ops);
1364 reg_pernet_fail:
1365 	kmem_cache_destroy(mrt_cachep);
1366 	return err;
1367 }
1368 
1369 void ip6_mr_cleanup(void)
1370 {
1371 	unregister_netdevice_notifier(&ip6_mr_notifier);
1372 	unregister_pernet_subsys(&ip6mr_net_ops);
1373 	kmem_cache_destroy(mrt_cachep);
1374 }
1375 
1376 static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
1377 			 struct mf6cctl *mfc, int mrtsock)
1378 {
1379 	bool found = false;
1380 	int line;
1381 	struct mfc6_cache *uc, *c;
1382 	unsigned char ttls[MAXMIFS];
1383 	int i;
1384 
1385 	if (mfc->mf6cc_parent >= MAXMIFS)
1386 		return -ENFILE;
1387 
1388 	memset(ttls, 255, MAXMIFS);
1389 	for (i = 0; i < MAXMIFS; i++) {
1390 		if (IF_ISSET(i, &mfc->mf6cc_ifset))
1391 			ttls[i] = 1;
1392 
1393 	}
1394 
1395 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1396 
1397 	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
1398 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1399 		    ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
1400 			found = true;
1401 			break;
1402 		}
1403 	}
1404 
1405 	if (found) {
1406 		write_lock_bh(&mrt_lock);
1407 		c->mf6c_parent = mfc->mf6cc_parent;
1408 		ip6mr_update_thresholds(mrt, c, ttls);
1409 		if (!mrtsock)
1410 			c->mfc_flags |= MFC_STATIC;
1411 		write_unlock_bh(&mrt_lock);
1412 		return 0;
1413 	}
1414 
1415 	if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1416 		return -EINVAL;
1417 
1418 	c = ip6mr_cache_alloc();
1419 	if (c == NULL)
1420 		return -ENOMEM;
1421 
1422 	c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1423 	c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1424 	c->mf6c_parent = mfc->mf6cc_parent;
1425 	ip6mr_update_thresholds(mrt, c, ttls);
1426 	if (!mrtsock)
1427 		c->mfc_flags |= MFC_STATIC;
1428 
1429 	write_lock_bh(&mrt_lock);
1430 	list_add(&c->list, &mrt->mfc6_cache_array[line]);
1431 	write_unlock_bh(&mrt_lock);
1432 
1433 	/*
1434 	 *	Check to see if we resolved a queued list. If so we
1435 	 *	need to send on the frames and tidy up.
1436 	 */
1437 	found = false;
1438 	spin_lock_bh(&mfc_unres_lock);
1439 	list_for_each_entry(uc, &mrt->mfc6_unres_queue, list) {
1440 		if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1441 		    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1442 			list_del(&uc->list);
1443 			atomic_dec(&mrt->cache_resolve_queue_len);
1444 			found = true;
1445 			break;
1446 		}
1447 	}
1448 	if (list_empty(&mrt->mfc6_unres_queue))
1449 		del_timer(&mrt->ipmr_expire_timer);
1450 	spin_unlock_bh(&mfc_unres_lock);
1451 
1452 	if (found) {
1453 		ip6mr_cache_resolve(net, mrt, uc, c);
1454 		ip6mr_cache_free(uc);
1455 	}
1456 	return 0;
1457 }
1458 
1459 /*
1460  *	Close the multicast socket, and clear the vif tables etc
1461  */
1462 
1463 static void mroute_clean_tables(struct mr6_table *mrt)
1464 {
1465 	int i;
1466 	LIST_HEAD(list);
1467 	struct mfc6_cache *c, *next;
1468 
1469 	/*
1470 	 *	Shut down all active vif entries
1471 	 */
1472 	for (i = 0; i < mrt->maxvif; i++) {
1473 		if (!(mrt->vif6_table[i].flags & VIFF_STATIC))
1474 			mif6_delete(mrt, i, &list);
1475 	}
1476 	unregister_netdevice_many(&list);
1477 
1478 	/*
1479 	 *	Wipe the cache
1480 	 */
1481 	for (i = 0; i < MFC6_LINES; i++) {
1482 		list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) {
1483 			if (c->mfc_flags & MFC_STATIC)
1484 				continue;
1485 			write_lock_bh(&mrt_lock);
1486 			list_del(&c->list);
1487 			write_unlock_bh(&mrt_lock);
1488 
1489 			ip6mr_cache_free(c);
1490 		}
1491 	}
1492 
1493 	if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1494 		spin_lock_bh(&mfc_unres_lock);
1495 		list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
1496 			list_del(&c->list);
1497 			ip6mr_destroy_unres(mrt, c);
1498 		}
1499 		spin_unlock_bh(&mfc_unres_lock);
1500 	}
1501 }
1502 
1503 static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
1504 {
1505 	int err = 0;
1506 	struct net *net = sock_net(sk);
1507 
1508 	rtnl_lock();
1509 	write_lock_bh(&mrt_lock);
1510 	if (likely(mrt->mroute6_sk == NULL)) {
1511 		mrt->mroute6_sk = sk;
1512 		net->ipv6.devconf_all->mc_forwarding++;
1513 	}
1514 	else
1515 		err = -EADDRINUSE;
1516 	write_unlock_bh(&mrt_lock);
1517 
1518 	rtnl_unlock();
1519 
1520 	return err;
1521 }
1522 
1523 int ip6mr_sk_done(struct sock *sk)
1524 {
1525 	int err = -EACCES;
1526 	struct net *net = sock_net(sk);
1527 	struct mr6_table *mrt;
1528 
1529 	rtnl_lock();
1530 	ip6mr_for_each_table(mrt, net) {
1531 		if (sk == mrt->mroute6_sk) {
1532 			write_lock_bh(&mrt_lock);
1533 			mrt->mroute6_sk = NULL;
1534 			net->ipv6.devconf_all->mc_forwarding--;
1535 			write_unlock_bh(&mrt_lock);
1536 
1537 			mroute_clean_tables(mrt);
1538 			err = 0;
1539 			break;
1540 		}
1541 	}
1542 	rtnl_unlock();
1543 
1544 	return err;
1545 }
1546 
1547 struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
1548 {
1549 	struct mr6_table *mrt;
1550 	struct flowi fl = {
1551 		.iif	= skb->skb_iif,
1552 		.oif	= skb->dev->ifindex,
1553 		.mark	= skb->mark,
1554 	};
1555 
1556 	if (ip6mr_fib_lookup(net, &fl, &mrt) < 0)
1557 		return NULL;
1558 
1559 	return mrt->mroute6_sk;
1560 }
1561 
1562 /*
1563  *	Socket options and virtual interface manipulation. The whole
1564  *	virtual interface system is a complete heap, but unfortunately
1565  *	that's how BSD mrouted happens to think. Maybe one day with a proper
1566  *	MOSPF/PIM router set up we can clean this up.
1567  */
1568 
1569 int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1570 {
1571 	int ret;
1572 	struct mif6ctl vif;
1573 	struct mf6cctl mfc;
1574 	mifi_t mifi;
1575 	struct net *net = sock_net(sk);
1576 	struct mr6_table *mrt;
1577 
1578 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1579 	if (mrt == NULL)
1580 		return -ENOENT;
1581 
1582 	if (optname != MRT6_INIT) {
1583 		if (sk != mrt->mroute6_sk && !capable(CAP_NET_ADMIN))
1584 			return -EACCES;
1585 	}
1586 
1587 	switch (optname) {
1588 	case MRT6_INIT:
1589 		if (sk->sk_type != SOCK_RAW ||
1590 		    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1591 			return -EOPNOTSUPP;
1592 		if (optlen < sizeof(int))
1593 			return -EINVAL;
1594 
1595 		return ip6mr_sk_init(mrt, sk);
1596 
1597 	case MRT6_DONE:
1598 		return ip6mr_sk_done(sk);
1599 
1600 	case MRT6_ADD_MIF:
1601 		if (optlen < sizeof(vif))
1602 			return -EINVAL;
1603 		if (copy_from_user(&vif, optval, sizeof(vif)))
1604 			return -EFAULT;
1605 		if (vif.mif6c_mifi >= MAXMIFS)
1606 			return -ENFILE;
1607 		rtnl_lock();
1608 		ret = mif6_add(net, mrt, &vif, sk == mrt->mroute6_sk);
1609 		rtnl_unlock();
1610 		return ret;
1611 
1612 	case MRT6_DEL_MIF:
1613 		if (optlen < sizeof(mifi_t))
1614 			return -EINVAL;
1615 		if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1616 			return -EFAULT;
1617 		rtnl_lock();
1618 		ret = mif6_delete(mrt, mifi, NULL);
1619 		rtnl_unlock();
1620 		return ret;
1621 
1622 	/*
1623 	 *	Manipulate the forwarding caches. These live
1624 	 *	in a sort of kernel/user symbiosis.
1625 	 */
1626 	case MRT6_ADD_MFC:
1627 	case MRT6_DEL_MFC:
1628 		if (optlen < sizeof(mfc))
1629 			return -EINVAL;
1630 		if (copy_from_user(&mfc, optval, sizeof(mfc)))
1631 			return -EFAULT;
1632 		rtnl_lock();
1633 		if (optname == MRT6_DEL_MFC)
1634 			ret = ip6mr_mfc_delete(mrt, &mfc);
1635 		else
1636 			ret = ip6mr_mfc_add(net, mrt, &mfc, sk == mrt->mroute6_sk);
1637 		rtnl_unlock();
1638 		return ret;
1639 
1640 	/*
1641 	 *	Control PIM assert (to activate pim will activate assert)
1642 	 */
1643 	case MRT6_ASSERT:
1644 	{
1645 		int v;
1646 		if (get_user(v, (int __user *)optval))
1647 			return -EFAULT;
1648 		mrt->mroute_do_assert = !!v;
1649 		return 0;
1650 	}
1651 
1652 #ifdef CONFIG_IPV6_PIMSM_V2
1653 	case MRT6_PIM:
1654 	{
1655 		int v;
1656 		if (get_user(v, (int __user *)optval))
1657 			return -EFAULT;
1658 		v = !!v;
1659 		rtnl_lock();
1660 		ret = 0;
1661 		if (v != mrt->mroute_do_pim) {
1662 			mrt->mroute_do_pim = v;
1663 			mrt->mroute_do_assert = v;
1664 		}
1665 		rtnl_unlock();
1666 		return ret;
1667 	}
1668 
1669 #endif
1670 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1671 	case MRT6_TABLE:
1672 	{
1673 		u32 v;
1674 
1675 		if (optlen != sizeof(u32))
1676 			return -EINVAL;
1677 		if (get_user(v, (u32 __user *)optval))
1678 			return -EFAULT;
1679 		if (sk == mrt->mroute6_sk)
1680 			return -EBUSY;
1681 
1682 		rtnl_lock();
1683 		ret = 0;
1684 		if (!ip6mr_new_table(net, v))
1685 			ret = -ENOMEM;
1686 		raw6_sk(sk)->ip6mr_table = v;
1687 		rtnl_unlock();
1688 		return ret;
1689 	}
1690 #endif
1691 	/*
1692 	 *	Spurious command, or MRT6_VERSION which you cannot
1693 	 *	set.
1694 	 */
1695 	default:
1696 		return -ENOPROTOOPT;
1697 	}
1698 }
1699 
1700 /*
1701  *	Getsock opt support for the multicast routing system.
1702  */
1703 
1704 int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1705 			  int __user *optlen)
1706 {
1707 	int olr;
1708 	int val;
1709 	struct net *net = sock_net(sk);
1710 	struct mr6_table *mrt;
1711 
1712 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1713 	if (mrt == NULL)
1714 		return -ENOENT;
1715 
1716 	switch (optname) {
1717 	case MRT6_VERSION:
1718 		val = 0x0305;
1719 		break;
1720 #ifdef CONFIG_IPV6_PIMSM_V2
1721 	case MRT6_PIM:
1722 		val = mrt->mroute_do_pim;
1723 		break;
1724 #endif
1725 	case MRT6_ASSERT:
1726 		val = mrt->mroute_do_assert;
1727 		break;
1728 	default:
1729 		return -ENOPROTOOPT;
1730 	}
1731 
1732 	if (get_user(olr, optlen))
1733 		return -EFAULT;
1734 
1735 	olr = min_t(int, olr, sizeof(int));
1736 	if (olr < 0)
1737 		return -EINVAL;
1738 
1739 	if (put_user(olr, optlen))
1740 		return -EFAULT;
1741 	if (copy_to_user(optval, &val, olr))
1742 		return -EFAULT;
1743 	return 0;
1744 }
1745 
1746 /*
1747  *	The IP multicast ioctl support routines.
1748  */
1749 
1750 int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1751 {
1752 	struct sioc_sg_req6 sr;
1753 	struct sioc_mif_req6 vr;
1754 	struct mif_device *vif;
1755 	struct mfc6_cache *c;
1756 	struct net *net = sock_net(sk);
1757 	struct mr6_table *mrt;
1758 
1759 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1760 	if (mrt == NULL)
1761 		return -ENOENT;
1762 
1763 	switch (cmd) {
1764 	case SIOCGETMIFCNT_IN6:
1765 		if (copy_from_user(&vr, arg, sizeof(vr)))
1766 			return -EFAULT;
1767 		if (vr.mifi >= mrt->maxvif)
1768 			return -EINVAL;
1769 		read_lock(&mrt_lock);
1770 		vif = &mrt->vif6_table[vr.mifi];
1771 		if (MIF_EXISTS(mrt, vr.mifi)) {
1772 			vr.icount = vif->pkt_in;
1773 			vr.ocount = vif->pkt_out;
1774 			vr.ibytes = vif->bytes_in;
1775 			vr.obytes = vif->bytes_out;
1776 			read_unlock(&mrt_lock);
1777 
1778 			if (copy_to_user(arg, &vr, sizeof(vr)))
1779 				return -EFAULT;
1780 			return 0;
1781 		}
1782 		read_unlock(&mrt_lock);
1783 		return -EADDRNOTAVAIL;
1784 	case SIOCGETSGCNT_IN6:
1785 		if (copy_from_user(&sr, arg, sizeof(sr)))
1786 			return -EFAULT;
1787 
1788 		read_lock(&mrt_lock);
1789 		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1790 		if (c) {
1791 			sr.pktcnt = c->mfc_un.res.pkt;
1792 			sr.bytecnt = c->mfc_un.res.bytes;
1793 			sr.wrong_if = c->mfc_un.res.wrong_if;
1794 			read_unlock(&mrt_lock);
1795 
1796 			if (copy_to_user(arg, &sr, sizeof(sr)))
1797 				return -EFAULT;
1798 			return 0;
1799 		}
1800 		read_unlock(&mrt_lock);
1801 		return -EADDRNOTAVAIL;
1802 	default:
1803 		return -ENOIOCTLCMD;
1804 	}
1805 }
1806 
1807 
1808 static inline int ip6mr_forward2_finish(struct sk_buff *skb)
1809 {
1810 	IP6_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
1811 			 IPSTATS_MIB_OUTFORWDATAGRAMS);
1812 	return dst_output(skb);
1813 }
1814 
1815 /*
1816  *	Processing handlers for ip6mr_forward
1817  */
1818 
1819 static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
1820 			  struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1821 {
1822 	struct ipv6hdr *ipv6h;
1823 	struct mif_device *vif = &mrt->vif6_table[vifi];
1824 	struct net_device *dev;
1825 	struct dst_entry *dst;
1826 	struct flowi fl;
1827 
1828 	if (vif->dev == NULL)
1829 		goto out_free;
1830 
1831 #ifdef CONFIG_IPV6_PIMSM_V2
1832 	if (vif->flags & MIFF_REGISTER) {
1833 		vif->pkt_out++;
1834 		vif->bytes_out += skb->len;
1835 		vif->dev->stats.tx_bytes += skb->len;
1836 		vif->dev->stats.tx_packets++;
1837 		ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
1838 		goto out_free;
1839 	}
1840 #endif
1841 
1842 	ipv6h = ipv6_hdr(skb);
1843 
1844 	fl = (struct flowi) {
1845 		.oif = vif->link,
1846 		.fl6_dst = ipv6h->daddr,
1847 	};
1848 
1849 	dst = ip6_route_output(net, NULL, &fl);
1850 	if (!dst)
1851 		goto out_free;
1852 
1853 	skb_dst_drop(skb);
1854 	skb_dst_set(skb, dst);
1855 
1856 	/*
1857 	 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1858 	 * not only before forwarding, but after forwarding on all output
1859 	 * interfaces. It is clear, if mrouter runs a multicasting
1860 	 * program, it should receive packets not depending to what interface
1861 	 * program is joined.
1862 	 * If we will not make it, the program will have to join on all
1863 	 * interfaces. On the other hand, multihoming host (or router, but
1864 	 * not mrouter) cannot join to more than one interface - it will
1865 	 * result in receiving multiple packets.
1866 	 */
1867 	dev = vif->dev;
1868 	skb->dev = dev;
1869 	vif->pkt_out++;
1870 	vif->bytes_out += skb->len;
1871 
1872 	/* We are about to write */
1873 	/* XXX: extension headers? */
1874 	if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
1875 		goto out_free;
1876 
1877 	ipv6h = ipv6_hdr(skb);
1878 	ipv6h->hop_limit--;
1879 
1880 	IP6CB(skb)->flags |= IP6SKB_FORWARDED;
1881 
1882 	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dev,
1883 		       ip6mr_forward2_finish);
1884 
1885 out_free:
1886 	kfree_skb(skb);
1887 	return 0;
1888 }
1889 
1890 static int ip6mr_find_vif(struct mr6_table *mrt, struct net_device *dev)
1891 {
1892 	int ct;
1893 
1894 	for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
1895 		if (mrt->vif6_table[ct].dev == dev)
1896 			break;
1897 	}
1898 	return ct;
1899 }
1900 
1901 static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
1902 			  struct sk_buff *skb, struct mfc6_cache *cache)
1903 {
1904 	int psend = -1;
1905 	int vif, ct;
1906 
1907 	vif = cache->mf6c_parent;
1908 	cache->mfc_un.res.pkt++;
1909 	cache->mfc_un.res.bytes += skb->len;
1910 
1911 	/*
1912 	 * Wrong interface: drop packet and (maybe) send PIM assert.
1913 	 */
1914 	if (mrt->vif6_table[vif].dev != skb->dev) {
1915 		int true_vifi;
1916 
1917 		cache->mfc_un.res.wrong_if++;
1918 		true_vifi = ip6mr_find_vif(mrt, skb->dev);
1919 
1920 		if (true_vifi >= 0 && mrt->mroute_do_assert &&
1921 		    /* pimsm uses asserts, when switching from RPT to SPT,
1922 		       so that we cannot check that packet arrived on an oif.
1923 		       It is bad, but otherwise we would need to move pretty
1924 		       large chunk of pimd to kernel. Ough... --ANK
1925 		     */
1926 		    (mrt->mroute_do_pim ||
1927 		     cache->mfc_un.res.ttls[true_vifi] < 255) &&
1928 		    time_after(jiffies,
1929 			       cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1930 			cache->mfc_un.res.last_assert = jiffies;
1931 			ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
1932 		}
1933 		goto dont_forward;
1934 	}
1935 
1936 	mrt->vif6_table[vif].pkt_in++;
1937 	mrt->vif6_table[vif].bytes_in += skb->len;
1938 
1939 	/*
1940 	 *	Forward the frame
1941 	 */
1942 	for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
1943 		if (ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
1944 			if (psend != -1) {
1945 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1946 				if (skb2)
1947 					ip6mr_forward2(net, mrt, skb2, cache, psend);
1948 			}
1949 			psend = ct;
1950 		}
1951 	}
1952 	if (psend != -1) {
1953 		ip6mr_forward2(net, mrt, skb, cache, psend);
1954 		return 0;
1955 	}
1956 
1957 dont_forward:
1958 	kfree_skb(skb);
1959 	return 0;
1960 }
1961 
1962 
1963 /*
1964  *	Multicast packets for forwarding arrive here
1965  */
1966 
1967 int ip6_mr_input(struct sk_buff *skb)
1968 {
1969 	struct mfc6_cache *cache;
1970 	struct net *net = dev_net(skb->dev);
1971 	struct mr6_table *mrt;
1972 	struct flowi fl = {
1973 		.iif	= skb->dev->ifindex,
1974 		.mark	= skb->mark,
1975 	};
1976 	int err;
1977 
1978 	err = ip6mr_fib_lookup(net, &fl, &mrt);
1979 	if (err < 0)
1980 		return err;
1981 
1982 	read_lock(&mrt_lock);
1983 	cache = ip6mr_cache_find(mrt,
1984 				 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
1985 
1986 	/*
1987 	 *	No usable cache entry
1988 	 */
1989 	if (cache == NULL) {
1990 		int vif;
1991 
1992 		vif = ip6mr_find_vif(mrt, skb->dev);
1993 		if (vif >= 0) {
1994 			int err = ip6mr_cache_unresolved(mrt, vif, skb);
1995 			read_unlock(&mrt_lock);
1996 
1997 			return err;
1998 		}
1999 		read_unlock(&mrt_lock);
2000 		kfree_skb(skb);
2001 		return -ENODEV;
2002 	}
2003 
2004 	ip6_mr_forward(net, mrt, skb, cache);
2005 
2006 	read_unlock(&mrt_lock);
2007 
2008 	return 0;
2009 }
2010 
2011 
2012 static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2013 			       struct mfc6_cache *c, struct rtmsg *rtm)
2014 {
2015 	int ct;
2016 	struct rtnexthop *nhp;
2017 	u8 *b = skb_tail_pointer(skb);
2018 	struct rtattr *mp_head;
2019 
2020 	/* If cache is unresolved, don't try to parse IIF and OIF */
2021 	if (c->mf6c_parent >= MAXMIFS)
2022 		return -ENOENT;
2023 
2024 	if (MIF_EXISTS(mrt, c->mf6c_parent))
2025 		RTA_PUT(skb, RTA_IIF, 4, &mrt->vif6_table[c->mf6c_parent].dev->ifindex);
2026 
2027 	mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
2028 
2029 	for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
2030 		if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
2031 			if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
2032 				goto rtattr_failure;
2033 			nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
2034 			nhp->rtnh_flags = 0;
2035 			nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
2036 			nhp->rtnh_ifindex = mrt->vif6_table[ct].dev->ifindex;
2037 			nhp->rtnh_len = sizeof(*nhp);
2038 		}
2039 	}
2040 	mp_head->rta_type = RTA_MULTIPATH;
2041 	mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
2042 	rtm->rtm_type = RTN_MULTICAST;
2043 	return 1;
2044 
2045 rtattr_failure:
2046 	nlmsg_trim(skb, b);
2047 	return -EMSGSIZE;
2048 }
2049 
2050 int ip6mr_get_route(struct net *net,
2051 		    struct sk_buff *skb, struct rtmsg *rtm, int nowait)
2052 {
2053 	int err;
2054 	struct mr6_table *mrt;
2055 	struct mfc6_cache *cache;
2056 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
2057 
2058 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
2059 	if (mrt == NULL)
2060 		return -ENOENT;
2061 
2062 	read_lock(&mrt_lock);
2063 	cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
2064 
2065 	if (!cache) {
2066 		struct sk_buff *skb2;
2067 		struct ipv6hdr *iph;
2068 		struct net_device *dev;
2069 		int vif;
2070 
2071 		if (nowait) {
2072 			read_unlock(&mrt_lock);
2073 			return -EAGAIN;
2074 		}
2075 
2076 		dev = skb->dev;
2077 		if (dev == NULL || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2078 			read_unlock(&mrt_lock);
2079 			return -ENODEV;
2080 		}
2081 
2082 		/* really correct? */
2083 		skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
2084 		if (!skb2) {
2085 			read_unlock(&mrt_lock);
2086 			return -ENOMEM;
2087 		}
2088 
2089 		skb_reset_transport_header(skb2);
2090 
2091 		skb_put(skb2, sizeof(struct ipv6hdr));
2092 		skb_reset_network_header(skb2);
2093 
2094 		iph = ipv6_hdr(skb2);
2095 		iph->version = 0;
2096 		iph->priority = 0;
2097 		iph->flow_lbl[0] = 0;
2098 		iph->flow_lbl[1] = 0;
2099 		iph->flow_lbl[2] = 0;
2100 		iph->payload_len = 0;
2101 		iph->nexthdr = IPPROTO_NONE;
2102 		iph->hop_limit = 0;
2103 		ipv6_addr_copy(&iph->saddr, &rt->rt6i_src.addr);
2104 		ipv6_addr_copy(&iph->daddr, &rt->rt6i_dst.addr);
2105 
2106 		err = ip6mr_cache_unresolved(mrt, vif, skb2);
2107 		read_unlock(&mrt_lock);
2108 
2109 		return err;
2110 	}
2111 
2112 	if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
2113 		cache->mfc_flags |= MFC_NOTIFY;
2114 
2115 	err = __ip6mr_fill_mroute(mrt, skb, cache, rtm);
2116 	read_unlock(&mrt_lock);
2117 	return err;
2118 }
2119 
2120 static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2121 			     u32 pid, u32 seq, struct mfc6_cache *c)
2122 {
2123 	struct nlmsghdr *nlh;
2124 	struct rtmsg *rtm;
2125 
2126 	nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI);
2127 	if (nlh == NULL)
2128 		return -EMSGSIZE;
2129 
2130 	rtm = nlmsg_data(nlh);
2131 	rtm->rtm_family   = RTNL_FAMILY_IPMR;
2132 	rtm->rtm_dst_len  = 128;
2133 	rtm->rtm_src_len  = 128;
2134 	rtm->rtm_tos      = 0;
2135 	rtm->rtm_table    = mrt->id;
2136 	NLA_PUT_U32(skb, RTA_TABLE, mrt->id);
2137 	rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
2138 	rtm->rtm_protocol = RTPROT_UNSPEC;
2139 	rtm->rtm_flags    = 0;
2140 
2141 	NLA_PUT(skb, RTA_SRC, 16, &c->mf6c_origin);
2142 	NLA_PUT(skb, RTA_DST, 16, &c->mf6c_mcastgrp);
2143 
2144 	if (__ip6mr_fill_mroute(mrt, skb, c, rtm) < 0)
2145 		goto nla_put_failure;
2146 
2147 	return nlmsg_end(skb, nlh);
2148 
2149 nla_put_failure:
2150 	nlmsg_cancel(skb, nlh);
2151 	return -EMSGSIZE;
2152 }
2153 
2154 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2155 {
2156 	struct net *net = sock_net(skb->sk);
2157 	struct mr6_table *mrt;
2158 	struct mfc6_cache *mfc;
2159 	unsigned int t = 0, s_t;
2160 	unsigned int h = 0, s_h;
2161 	unsigned int e = 0, s_e;
2162 
2163 	s_t = cb->args[0];
2164 	s_h = cb->args[1];
2165 	s_e = cb->args[2];
2166 
2167 	read_lock(&mrt_lock);
2168 	ip6mr_for_each_table(mrt, net) {
2169 		if (t < s_t)
2170 			goto next_table;
2171 		if (t > s_t)
2172 			s_h = 0;
2173 		for (h = s_h; h < MFC6_LINES; h++) {
2174 			list_for_each_entry(mfc, &mrt->mfc6_cache_array[h], list) {
2175 				if (e < s_e)
2176 					goto next_entry;
2177 				if (ip6mr_fill_mroute(mrt, skb,
2178 						      NETLINK_CB(cb->skb).pid,
2179 						      cb->nlh->nlmsg_seq,
2180 						      mfc) < 0)
2181 					goto done;
2182 next_entry:
2183 				e++;
2184 			}
2185 			e = s_e = 0;
2186 		}
2187 		s_h = 0;
2188 next_table:
2189 		t++;
2190 	}
2191 done:
2192 	read_unlock(&mrt_lock);
2193 
2194 	cb->args[2] = e;
2195 	cb->args[1] = h;
2196 	cb->args[0] = t;
2197 
2198 	return skb->len;
2199 }
2200