xref: /openbmc/linux/net/ipv6/ip6mr.c (revision 95e9fd10)
1 /*
2  *	Linux IPv6 multicast routing support for BSD pim6sd
3  *	Based on net/ipv4/ipmr.c.
4  *
5  *	(c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
6  *		LSIIT Laboratory, Strasbourg, France
7  *	(c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
8  *		6WIND, Paris, France
9  *	Copyright (C)2007,2008 USAGI/WIDE Project
10  *		YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
11  *
12  *	This program is free software; you can redistribute it and/or
13  *	modify it under the terms of the GNU General Public License
14  *	as published by the Free Software Foundation; either version
15  *	2 of the License, or (at your option) any later version.
16  *
17  */
18 
19 #include <asm/uaccess.h>
20 #include <linux/types.h>
21 #include <linux/sched.h>
22 #include <linux/errno.h>
23 #include <linux/timer.h>
24 #include <linux/mm.h>
25 #include <linux/kernel.h>
26 #include <linux/fcntl.h>
27 #include <linux/stat.h>
28 #include <linux/socket.h>
29 #include <linux/inet.h>
30 #include <linux/netdevice.h>
31 #include <linux/inetdevice.h>
32 #include <linux/proc_fs.h>
33 #include <linux/seq_file.h>
34 #include <linux/init.h>
35 #include <linux/slab.h>
36 #include <linux/compat.h>
37 #include <net/protocol.h>
38 #include <linux/skbuff.h>
39 #include <net/sock.h>
40 #include <net/raw.h>
41 #include <linux/notifier.h>
42 #include <linux/if_arp.h>
43 #include <net/checksum.h>
44 #include <net/netlink.h>
45 #include <net/fib_rules.h>
46 
47 #include <net/ipv6.h>
48 #include <net/ip6_route.h>
49 #include <linux/mroute6.h>
50 #include <linux/pim.h>
51 #include <net/addrconf.h>
52 #include <linux/netfilter_ipv6.h>
53 #include <linux/export.h>
54 #include <net/ip6_checksum.h>
55 
56 struct mr6_table {
57 	struct list_head	list;
58 #ifdef CONFIG_NET_NS
59 	struct net		*net;
60 #endif
61 	u32			id;
62 	struct sock		*mroute6_sk;
63 	struct timer_list	ipmr_expire_timer;
64 	struct list_head	mfc6_unres_queue;
65 	struct list_head	mfc6_cache_array[MFC6_LINES];
66 	struct mif_device	vif6_table[MAXMIFS];
67 	int			maxvif;
68 	atomic_t		cache_resolve_queue_len;
69 	int			mroute_do_assert;
70 	int			mroute_do_pim;
71 #ifdef CONFIG_IPV6_PIMSM_V2
72 	int			mroute_reg_vif_num;
73 #endif
74 };
75 
76 struct ip6mr_rule {
77 	struct fib_rule		common;
78 };
79 
80 struct ip6mr_result {
81 	struct mr6_table	*mrt;
82 };
83 
84 /* Big lock, protecting vif table, mrt cache and mroute socket state.
85    Note that the changes are semaphored via rtnl_lock.
86  */
87 
88 static DEFINE_RWLOCK(mrt_lock);
89 
90 /*
91  *	Multicast router control variables
92  */
93 
94 #define MIF_EXISTS(_mrt, _idx) ((_mrt)->vif6_table[_idx].dev != NULL)
95 
96 /* Special spinlock for queue of unresolved entries */
97 static DEFINE_SPINLOCK(mfc_unres_lock);
98 
99 /* We return to original Alan's scheme. Hash table of resolved
100    entries is changed only in process context and protected
101    with weak lock mrt_lock. Queue of unresolved entries is protected
102    with strong spinlock mfc_unres_lock.
103 
104    In this case data path is free of exclusive locks at all.
105  */
106 
107 static struct kmem_cache *mrt_cachep __read_mostly;
108 
109 static struct mr6_table *ip6mr_new_table(struct net *net, u32 id);
110 static void ip6mr_free_table(struct mr6_table *mrt);
111 
112 static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
113 			  struct sk_buff *skb, struct mfc6_cache *cache);
114 static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
115 			      mifi_t mifi, int assert);
116 static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
117 			       struct mfc6_cache *c, struct rtmsg *rtm);
118 static int ip6mr_rtm_dumproute(struct sk_buff *skb,
119 			       struct netlink_callback *cb);
120 static void mroute_clean_tables(struct mr6_table *mrt);
121 static void ipmr_expire_process(unsigned long arg);
122 
123 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
124 #define ip6mr_for_each_table(mrt, net) \
125 	list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
126 
127 static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
128 {
129 	struct mr6_table *mrt;
130 
131 	ip6mr_for_each_table(mrt, net) {
132 		if (mrt->id == id)
133 			return mrt;
134 	}
135 	return NULL;
136 }
137 
138 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
139 			    struct mr6_table **mrt)
140 {
141 	struct ip6mr_result res;
142 	struct fib_lookup_arg arg = { .result = &res, };
143 	int err;
144 
145 	err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
146 			       flowi6_to_flowi(flp6), 0, &arg);
147 	if (err < 0)
148 		return err;
149 	*mrt = res.mrt;
150 	return 0;
151 }
152 
153 static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
154 			     int flags, struct fib_lookup_arg *arg)
155 {
156 	struct ip6mr_result *res = arg->result;
157 	struct mr6_table *mrt;
158 
159 	switch (rule->action) {
160 	case FR_ACT_TO_TBL:
161 		break;
162 	case FR_ACT_UNREACHABLE:
163 		return -ENETUNREACH;
164 	case FR_ACT_PROHIBIT:
165 		return -EACCES;
166 	case FR_ACT_BLACKHOLE:
167 	default:
168 		return -EINVAL;
169 	}
170 
171 	mrt = ip6mr_get_table(rule->fr_net, rule->table);
172 	if (mrt == NULL)
173 		return -EAGAIN;
174 	res->mrt = mrt;
175 	return 0;
176 }
177 
178 static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
179 {
180 	return 1;
181 }
182 
183 static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = {
184 	FRA_GENERIC_POLICY,
185 };
186 
187 static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
188 				struct fib_rule_hdr *frh, struct nlattr **tb)
189 {
190 	return 0;
191 }
192 
193 static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
194 			      struct nlattr **tb)
195 {
196 	return 1;
197 }
198 
199 static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
200 			   struct fib_rule_hdr *frh)
201 {
202 	frh->dst_len = 0;
203 	frh->src_len = 0;
204 	frh->tos     = 0;
205 	return 0;
206 }
207 
208 static const struct fib_rules_ops __net_initdata ip6mr_rules_ops_template = {
209 	.family		= RTNL_FAMILY_IP6MR,
210 	.rule_size	= sizeof(struct ip6mr_rule),
211 	.addr_size	= sizeof(struct in6_addr),
212 	.action		= ip6mr_rule_action,
213 	.match		= ip6mr_rule_match,
214 	.configure	= ip6mr_rule_configure,
215 	.compare	= ip6mr_rule_compare,
216 	.default_pref	= fib_default_rule_pref,
217 	.fill		= ip6mr_rule_fill,
218 	.nlgroup	= RTNLGRP_IPV6_RULE,
219 	.policy		= ip6mr_rule_policy,
220 	.owner		= THIS_MODULE,
221 };
222 
223 static int __net_init ip6mr_rules_init(struct net *net)
224 {
225 	struct fib_rules_ops *ops;
226 	struct mr6_table *mrt;
227 	int err;
228 
229 	ops = fib_rules_register(&ip6mr_rules_ops_template, net);
230 	if (IS_ERR(ops))
231 		return PTR_ERR(ops);
232 
233 	INIT_LIST_HEAD(&net->ipv6.mr6_tables);
234 
235 	mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
236 	if (mrt == NULL) {
237 		err = -ENOMEM;
238 		goto err1;
239 	}
240 
241 	err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
242 	if (err < 0)
243 		goto err2;
244 
245 	net->ipv6.mr6_rules_ops = ops;
246 	return 0;
247 
248 err2:
249 	kfree(mrt);
250 err1:
251 	fib_rules_unregister(ops);
252 	return err;
253 }
254 
255 static void __net_exit ip6mr_rules_exit(struct net *net)
256 {
257 	struct mr6_table *mrt, *next;
258 
259 	list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
260 		list_del(&mrt->list);
261 		ip6mr_free_table(mrt);
262 	}
263 	fib_rules_unregister(net->ipv6.mr6_rules_ops);
264 }
265 #else
266 #define ip6mr_for_each_table(mrt, net) \
267 	for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
268 
269 static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
270 {
271 	return net->ipv6.mrt6;
272 }
273 
274 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
275 			    struct mr6_table **mrt)
276 {
277 	*mrt = net->ipv6.mrt6;
278 	return 0;
279 }
280 
281 static int __net_init ip6mr_rules_init(struct net *net)
282 {
283 	net->ipv6.mrt6 = ip6mr_new_table(net, RT6_TABLE_DFLT);
284 	return net->ipv6.mrt6 ? 0 : -ENOMEM;
285 }
286 
287 static void __net_exit ip6mr_rules_exit(struct net *net)
288 {
289 	ip6mr_free_table(net->ipv6.mrt6);
290 }
291 #endif
292 
293 static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
294 {
295 	struct mr6_table *mrt;
296 	unsigned int i;
297 
298 	mrt = ip6mr_get_table(net, id);
299 	if (mrt != NULL)
300 		return mrt;
301 
302 	mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
303 	if (mrt == NULL)
304 		return NULL;
305 	mrt->id = id;
306 	write_pnet(&mrt->net, net);
307 
308 	/* Forwarding cache */
309 	for (i = 0; i < MFC6_LINES; i++)
310 		INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]);
311 
312 	INIT_LIST_HEAD(&mrt->mfc6_unres_queue);
313 
314 	setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
315 		    (unsigned long)mrt);
316 
317 #ifdef CONFIG_IPV6_PIMSM_V2
318 	mrt->mroute_reg_vif_num = -1;
319 #endif
320 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
321 	list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
322 #endif
323 	return mrt;
324 }
325 
326 static void ip6mr_free_table(struct mr6_table *mrt)
327 {
328 	del_timer(&mrt->ipmr_expire_timer);
329 	mroute_clean_tables(mrt);
330 	kfree(mrt);
331 }
332 
333 #ifdef CONFIG_PROC_FS
334 
335 struct ipmr_mfc_iter {
336 	struct seq_net_private p;
337 	struct mr6_table *mrt;
338 	struct list_head *cache;
339 	int ct;
340 };
341 
342 
343 static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
344 					   struct ipmr_mfc_iter *it, loff_t pos)
345 {
346 	struct mr6_table *mrt = it->mrt;
347 	struct mfc6_cache *mfc;
348 
349 	read_lock(&mrt_lock);
350 	for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) {
351 		it->cache = &mrt->mfc6_cache_array[it->ct];
352 		list_for_each_entry(mfc, it->cache, list)
353 			if (pos-- == 0)
354 				return mfc;
355 	}
356 	read_unlock(&mrt_lock);
357 
358 	spin_lock_bh(&mfc_unres_lock);
359 	it->cache = &mrt->mfc6_unres_queue;
360 	list_for_each_entry(mfc, it->cache, list)
361 		if (pos-- == 0)
362 			return mfc;
363 	spin_unlock_bh(&mfc_unres_lock);
364 
365 	it->cache = NULL;
366 	return NULL;
367 }
368 
369 /*
370  *	The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
371  */
372 
373 struct ipmr_vif_iter {
374 	struct seq_net_private p;
375 	struct mr6_table *mrt;
376 	int ct;
377 };
378 
379 static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
380 					    struct ipmr_vif_iter *iter,
381 					    loff_t pos)
382 {
383 	struct mr6_table *mrt = iter->mrt;
384 
385 	for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
386 		if (!MIF_EXISTS(mrt, iter->ct))
387 			continue;
388 		if (pos-- == 0)
389 			return &mrt->vif6_table[iter->ct];
390 	}
391 	return NULL;
392 }
393 
394 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
395 	__acquires(mrt_lock)
396 {
397 	struct ipmr_vif_iter *iter = seq->private;
398 	struct net *net = seq_file_net(seq);
399 	struct mr6_table *mrt;
400 
401 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
402 	if (mrt == NULL)
403 		return ERR_PTR(-ENOENT);
404 
405 	iter->mrt = mrt;
406 
407 	read_lock(&mrt_lock);
408 	return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1)
409 		: SEQ_START_TOKEN;
410 }
411 
412 static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
413 {
414 	struct ipmr_vif_iter *iter = seq->private;
415 	struct net *net = seq_file_net(seq);
416 	struct mr6_table *mrt = iter->mrt;
417 
418 	++*pos;
419 	if (v == SEQ_START_TOKEN)
420 		return ip6mr_vif_seq_idx(net, iter, 0);
421 
422 	while (++iter->ct < mrt->maxvif) {
423 		if (!MIF_EXISTS(mrt, iter->ct))
424 			continue;
425 		return &mrt->vif6_table[iter->ct];
426 	}
427 	return NULL;
428 }
429 
430 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
431 	__releases(mrt_lock)
432 {
433 	read_unlock(&mrt_lock);
434 }
435 
436 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
437 {
438 	struct ipmr_vif_iter *iter = seq->private;
439 	struct mr6_table *mrt = iter->mrt;
440 
441 	if (v == SEQ_START_TOKEN) {
442 		seq_puts(seq,
443 			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
444 	} else {
445 		const struct mif_device *vif = v;
446 		const char *name = vif->dev ? vif->dev->name : "none";
447 
448 		seq_printf(seq,
449 			   "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
450 			   vif - mrt->vif6_table,
451 			   name, vif->bytes_in, vif->pkt_in,
452 			   vif->bytes_out, vif->pkt_out,
453 			   vif->flags);
454 	}
455 	return 0;
456 }
457 
458 static const struct seq_operations ip6mr_vif_seq_ops = {
459 	.start = ip6mr_vif_seq_start,
460 	.next  = ip6mr_vif_seq_next,
461 	.stop  = ip6mr_vif_seq_stop,
462 	.show  = ip6mr_vif_seq_show,
463 };
464 
465 static int ip6mr_vif_open(struct inode *inode, struct file *file)
466 {
467 	return seq_open_net(inode, file, &ip6mr_vif_seq_ops,
468 			    sizeof(struct ipmr_vif_iter));
469 }
470 
471 static const struct file_operations ip6mr_vif_fops = {
472 	.owner	 = THIS_MODULE,
473 	.open    = ip6mr_vif_open,
474 	.read    = seq_read,
475 	.llseek  = seq_lseek,
476 	.release = seq_release_net,
477 };
478 
479 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
480 {
481 	struct ipmr_mfc_iter *it = seq->private;
482 	struct net *net = seq_file_net(seq);
483 	struct mr6_table *mrt;
484 
485 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
486 	if (mrt == NULL)
487 		return ERR_PTR(-ENOENT);
488 
489 	it->mrt = mrt;
490 	return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
491 		: SEQ_START_TOKEN;
492 }
493 
494 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
495 {
496 	struct mfc6_cache *mfc = v;
497 	struct ipmr_mfc_iter *it = seq->private;
498 	struct net *net = seq_file_net(seq);
499 	struct mr6_table *mrt = it->mrt;
500 
501 	++*pos;
502 
503 	if (v == SEQ_START_TOKEN)
504 		return ipmr_mfc_seq_idx(net, seq->private, 0);
505 
506 	if (mfc->list.next != it->cache)
507 		return list_entry(mfc->list.next, struct mfc6_cache, list);
508 
509 	if (it->cache == &mrt->mfc6_unres_queue)
510 		goto end_of_list;
511 
512 	BUG_ON(it->cache != &mrt->mfc6_cache_array[it->ct]);
513 
514 	while (++it->ct < MFC6_LINES) {
515 		it->cache = &mrt->mfc6_cache_array[it->ct];
516 		if (list_empty(it->cache))
517 			continue;
518 		return list_first_entry(it->cache, struct mfc6_cache, list);
519 	}
520 
521 	/* exhausted cache_array, show unresolved */
522 	read_unlock(&mrt_lock);
523 	it->cache = &mrt->mfc6_unres_queue;
524 	it->ct = 0;
525 
526 	spin_lock_bh(&mfc_unres_lock);
527 	if (!list_empty(it->cache))
528 		return list_first_entry(it->cache, struct mfc6_cache, list);
529 
530  end_of_list:
531 	spin_unlock_bh(&mfc_unres_lock);
532 	it->cache = NULL;
533 
534 	return NULL;
535 }
536 
537 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
538 {
539 	struct ipmr_mfc_iter *it = seq->private;
540 	struct mr6_table *mrt = it->mrt;
541 
542 	if (it->cache == &mrt->mfc6_unres_queue)
543 		spin_unlock_bh(&mfc_unres_lock);
544 	else if (it->cache == mrt->mfc6_cache_array)
545 		read_unlock(&mrt_lock);
546 }
547 
548 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
549 {
550 	int n;
551 
552 	if (v == SEQ_START_TOKEN) {
553 		seq_puts(seq,
554 			 "Group                            "
555 			 "Origin                           "
556 			 "Iif      Pkts  Bytes     Wrong  Oifs\n");
557 	} else {
558 		const struct mfc6_cache *mfc = v;
559 		const struct ipmr_mfc_iter *it = seq->private;
560 		struct mr6_table *mrt = it->mrt;
561 
562 		seq_printf(seq, "%pI6 %pI6 %-3hd",
563 			   &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
564 			   mfc->mf6c_parent);
565 
566 		if (it->cache != &mrt->mfc6_unres_queue) {
567 			seq_printf(seq, " %8lu %8lu %8lu",
568 				   mfc->mfc_un.res.pkt,
569 				   mfc->mfc_un.res.bytes,
570 				   mfc->mfc_un.res.wrong_if);
571 			for (n = mfc->mfc_un.res.minvif;
572 			     n < mfc->mfc_un.res.maxvif; n++) {
573 				if (MIF_EXISTS(mrt, n) &&
574 				    mfc->mfc_un.res.ttls[n] < 255)
575 					seq_printf(seq,
576 						   " %2d:%-3d",
577 						   n, mfc->mfc_un.res.ttls[n]);
578 			}
579 		} else {
580 			/* unresolved mfc_caches don't contain
581 			 * pkt, bytes and wrong_if values
582 			 */
583 			seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
584 		}
585 		seq_putc(seq, '\n');
586 	}
587 	return 0;
588 }
589 
590 static const struct seq_operations ipmr_mfc_seq_ops = {
591 	.start = ipmr_mfc_seq_start,
592 	.next  = ipmr_mfc_seq_next,
593 	.stop  = ipmr_mfc_seq_stop,
594 	.show  = ipmr_mfc_seq_show,
595 };
596 
597 static int ipmr_mfc_open(struct inode *inode, struct file *file)
598 {
599 	return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
600 			    sizeof(struct ipmr_mfc_iter));
601 }
602 
603 static const struct file_operations ip6mr_mfc_fops = {
604 	.owner	 = THIS_MODULE,
605 	.open    = ipmr_mfc_open,
606 	.read    = seq_read,
607 	.llseek  = seq_lseek,
608 	.release = seq_release_net,
609 };
610 #endif
611 
612 #ifdef CONFIG_IPV6_PIMSM_V2
613 
614 static int pim6_rcv(struct sk_buff *skb)
615 {
616 	struct pimreghdr *pim;
617 	struct ipv6hdr   *encap;
618 	struct net_device  *reg_dev = NULL;
619 	struct net *net = dev_net(skb->dev);
620 	struct mr6_table *mrt;
621 	struct flowi6 fl6 = {
622 		.flowi6_iif	= skb->dev->ifindex,
623 		.flowi6_mark	= skb->mark,
624 	};
625 	int reg_vif_num;
626 
627 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
628 		goto drop;
629 
630 	pim = (struct pimreghdr *)skb_transport_header(skb);
631 	if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
632 	    (pim->flags & PIM_NULL_REGISTER) ||
633 	    (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
634 			     sizeof(*pim), IPPROTO_PIM,
635 			     csum_partial((void *)pim, sizeof(*pim), 0)) &&
636 	     csum_fold(skb_checksum(skb, 0, skb->len, 0))))
637 		goto drop;
638 
639 	/* check if the inner packet is destined to mcast group */
640 	encap = (struct ipv6hdr *)(skb_transport_header(skb) +
641 				   sizeof(*pim));
642 
643 	if (!ipv6_addr_is_multicast(&encap->daddr) ||
644 	    encap->payload_len == 0 ||
645 	    ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
646 		goto drop;
647 
648 	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
649 		goto drop;
650 	reg_vif_num = mrt->mroute_reg_vif_num;
651 
652 	read_lock(&mrt_lock);
653 	if (reg_vif_num >= 0)
654 		reg_dev = mrt->vif6_table[reg_vif_num].dev;
655 	if (reg_dev)
656 		dev_hold(reg_dev);
657 	read_unlock(&mrt_lock);
658 
659 	if (reg_dev == NULL)
660 		goto drop;
661 
662 	skb->mac_header = skb->network_header;
663 	skb_pull(skb, (u8 *)encap - skb->data);
664 	skb_reset_network_header(skb);
665 	skb->protocol = htons(ETH_P_IPV6);
666 	skb->ip_summed = CHECKSUM_NONE;
667 	skb->pkt_type = PACKET_HOST;
668 
669 	skb_tunnel_rx(skb, reg_dev);
670 
671 	netif_rx(skb);
672 
673 	dev_put(reg_dev);
674 	return 0;
675  drop:
676 	kfree_skb(skb);
677 	return 0;
678 }
679 
680 static const struct inet6_protocol pim6_protocol = {
681 	.handler	=	pim6_rcv,
682 };
683 
684 /* Service routines creating virtual interfaces: PIMREG */
685 
686 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
687 				      struct net_device *dev)
688 {
689 	struct net *net = dev_net(dev);
690 	struct mr6_table *mrt;
691 	struct flowi6 fl6 = {
692 		.flowi6_oif	= dev->ifindex,
693 		.flowi6_iif	= skb->skb_iif,
694 		.flowi6_mark	= skb->mark,
695 	};
696 	int err;
697 
698 	err = ip6mr_fib_lookup(net, &fl6, &mrt);
699 	if (err < 0) {
700 		kfree_skb(skb);
701 		return err;
702 	}
703 
704 	read_lock(&mrt_lock);
705 	dev->stats.tx_bytes += skb->len;
706 	dev->stats.tx_packets++;
707 	ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
708 	read_unlock(&mrt_lock);
709 	kfree_skb(skb);
710 	return NETDEV_TX_OK;
711 }
712 
713 static const struct net_device_ops reg_vif_netdev_ops = {
714 	.ndo_start_xmit	= reg_vif_xmit,
715 };
716 
717 static void reg_vif_setup(struct net_device *dev)
718 {
719 	dev->type		= ARPHRD_PIMREG;
720 	dev->mtu		= 1500 - sizeof(struct ipv6hdr) - 8;
721 	dev->flags		= IFF_NOARP;
722 	dev->netdev_ops		= &reg_vif_netdev_ops;
723 	dev->destructor		= free_netdev;
724 	dev->features		|= NETIF_F_NETNS_LOCAL;
725 }
726 
727 static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt)
728 {
729 	struct net_device *dev;
730 	char name[IFNAMSIZ];
731 
732 	if (mrt->id == RT6_TABLE_DFLT)
733 		sprintf(name, "pim6reg");
734 	else
735 		sprintf(name, "pim6reg%u", mrt->id);
736 
737 	dev = alloc_netdev(0, name, reg_vif_setup);
738 	if (dev == NULL)
739 		return NULL;
740 
741 	dev_net_set(dev, net);
742 
743 	if (register_netdevice(dev)) {
744 		free_netdev(dev);
745 		return NULL;
746 	}
747 	dev->iflink = 0;
748 
749 	if (dev_open(dev))
750 		goto failure;
751 
752 	dev_hold(dev);
753 	return dev;
754 
755 failure:
756 	/* allow the register to be completed before unregistering. */
757 	rtnl_unlock();
758 	rtnl_lock();
759 
760 	unregister_netdevice(dev);
761 	return NULL;
762 }
763 #endif
764 
765 /*
766  *	Delete a VIF entry
767  */
768 
769 static int mif6_delete(struct mr6_table *mrt, int vifi, struct list_head *head)
770 {
771 	struct mif_device *v;
772 	struct net_device *dev;
773 	struct inet6_dev *in6_dev;
774 
775 	if (vifi < 0 || vifi >= mrt->maxvif)
776 		return -EADDRNOTAVAIL;
777 
778 	v = &mrt->vif6_table[vifi];
779 
780 	write_lock_bh(&mrt_lock);
781 	dev = v->dev;
782 	v->dev = NULL;
783 
784 	if (!dev) {
785 		write_unlock_bh(&mrt_lock);
786 		return -EADDRNOTAVAIL;
787 	}
788 
789 #ifdef CONFIG_IPV6_PIMSM_V2
790 	if (vifi == mrt->mroute_reg_vif_num)
791 		mrt->mroute_reg_vif_num = -1;
792 #endif
793 
794 	if (vifi + 1 == mrt->maxvif) {
795 		int tmp;
796 		for (tmp = vifi - 1; tmp >= 0; tmp--) {
797 			if (MIF_EXISTS(mrt, tmp))
798 				break;
799 		}
800 		mrt->maxvif = tmp + 1;
801 	}
802 
803 	write_unlock_bh(&mrt_lock);
804 
805 	dev_set_allmulti(dev, -1);
806 
807 	in6_dev = __in6_dev_get(dev);
808 	if (in6_dev)
809 		in6_dev->cnf.mc_forwarding--;
810 
811 	if (v->flags & MIFF_REGISTER)
812 		unregister_netdevice_queue(dev, head);
813 
814 	dev_put(dev);
815 	return 0;
816 }
817 
818 static inline void ip6mr_cache_free(struct mfc6_cache *c)
819 {
820 	kmem_cache_free(mrt_cachep, c);
821 }
822 
823 /* Destroy an unresolved cache entry, killing queued skbs
824    and reporting error to netlink readers.
825  */
826 
827 static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
828 {
829 	struct net *net = read_pnet(&mrt->net);
830 	struct sk_buff *skb;
831 
832 	atomic_dec(&mrt->cache_resolve_queue_len);
833 
834 	while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
835 		if (ipv6_hdr(skb)->version == 0) {
836 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
837 			nlh->nlmsg_type = NLMSG_ERROR;
838 			nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
839 			skb_trim(skb, nlh->nlmsg_len);
840 			((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
841 			rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
842 		} else
843 			kfree_skb(skb);
844 	}
845 
846 	ip6mr_cache_free(c);
847 }
848 
849 
850 /* Timer process for all the unresolved queue. */
851 
852 static void ipmr_do_expire_process(struct mr6_table *mrt)
853 {
854 	unsigned long now = jiffies;
855 	unsigned long expires = 10 * HZ;
856 	struct mfc6_cache *c, *next;
857 
858 	list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
859 		if (time_after(c->mfc_un.unres.expires, now)) {
860 			/* not yet... */
861 			unsigned long interval = c->mfc_un.unres.expires - now;
862 			if (interval < expires)
863 				expires = interval;
864 			continue;
865 		}
866 
867 		list_del(&c->list);
868 		ip6mr_destroy_unres(mrt, c);
869 	}
870 
871 	if (!list_empty(&mrt->mfc6_unres_queue))
872 		mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
873 }
874 
875 static void ipmr_expire_process(unsigned long arg)
876 {
877 	struct mr6_table *mrt = (struct mr6_table *)arg;
878 
879 	if (!spin_trylock(&mfc_unres_lock)) {
880 		mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
881 		return;
882 	}
883 
884 	if (!list_empty(&mrt->mfc6_unres_queue))
885 		ipmr_do_expire_process(mrt);
886 
887 	spin_unlock(&mfc_unres_lock);
888 }
889 
890 /* Fill oifs list. It is called under write locked mrt_lock. */
891 
892 static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *cache,
893 				    unsigned char *ttls)
894 {
895 	int vifi;
896 
897 	cache->mfc_un.res.minvif = MAXMIFS;
898 	cache->mfc_un.res.maxvif = 0;
899 	memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
900 
901 	for (vifi = 0; vifi < mrt->maxvif; vifi++) {
902 		if (MIF_EXISTS(mrt, vifi) &&
903 		    ttls[vifi] && ttls[vifi] < 255) {
904 			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
905 			if (cache->mfc_un.res.minvif > vifi)
906 				cache->mfc_un.res.minvif = vifi;
907 			if (cache->mfc_un.res.maxvif <= vifi)
908 				cache->mfc_un.res.maxvif = vifi + 1;
909 		}
910 	}
911 }
912 
913 static int mif6_add(struct net *net, struct mr6_table *mrt,
914 		    struct mif6ctl *vifc, int mrtsock)
915 {
916 	int vifi = vifc->mif6c_mifi;
917 	struct mif_device *v = &mrt->vif6_table[vifi];
918 	struct net_device *dev;
919 	struct inet6_dev *in6_dev;
920 	int err;
921 
922 	/* Is vif busy ? */
923 	if (MIF_EXISTS(mrt, vifi))
924 		return -EADDRINUSE;
925 
926 	switch (vifc->mif6c_flags) {
927 #ifdef CONFIG_IPV6_PIMSM_V2
928 	case MIFF_REGISTER:
929 		/*
930 		 * Special Purpose VIF in PIM
931 		 * All the packets will be sent to the daemon
932 		 */
933 		if (mrt->mroute_reg_vif_num >= 0)
934 			return -EADDRINUSE;
935 		dev = ip6mr_reg_vif(net, mrt);
936 		if (!dev)
937 			return -ENOBUFS;
938 		err = dev_set_allmulti(dev, 1);
939 		if (err) {
940 			unregister_netdevice(dev);
941 			dev_put(dev);
942 			return err;
943 		}
944 		break;
945 #endif
946 	case 0:
947 		dev = dev_get_by_index(net, vifc->mif6c_pifi);
948 		if (!dev)
949 			return -EADDRNOTAVAIL;
950 		err = dev_set_allmulti(dev, 1);
951 		if (err) {
952 			dev_put(dev);
953 			return err;
954 		}
955 		break;
956 	default:
957 		return -EINVAL;
958 	}
959 
960 	in6_dev = __in6_dev_get(dev);
961 	if (in6_dev)
962 		in6_dev->cnf.mc_forwarding++;
963 
964 	/*
965 	 *	Fill in the VIF structures
966 	 */
967 	v->rate_limit = vifc->vifc_rate_limit;
968 	v->flags = vifc->mif6c_flags;
969 	if (!mrtsock)
970 		v->flags |= VIFF_STATIC;
971 	v->threshold = vifc->vifc_threshold;
972 	v->bytes_in = 0;
973 	v->bytes_out = 0;
974 	v->pkt_in = 0;
975 	v->pkt_out = 0;
976 	v->link = dev->ifindex;
977 	if (v->flags & MIFF_REGISTER)
978 		v->link = dev->iflink;
979 
980 	/* And finish update writing critical data */
981 	write_lock_bh(&mrt_lock);
982 	v->dev = dev;
983 #ifdef CONFIG_IPV6_PIMSM_V2
984 	if (v->flags & MIFF_REGISTER)
985 		mrt->mroute_reg_vif_num = vifi;
986 #endif
987 	if (vifi + 1 > mrt->maxvif)
988 		mrt->maxvif = vifi + 1;
989 	write_unlock_bh(&mrt_lock);
990 	return 0;
991 }
992 
993 static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt,
994 					   const struct in6_addr *origin,
995 					   const struct in6_addr *mcastgrp)
996 {
997 	int line = MFC6_HASH(mcastgrp, origin);
998 	struct mfc6_cache *c;
999 
1000 	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
1001 		if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
1002 		    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
1003 			return c;
1004 	}
1005 	return NULL;
1006 }
1007 
1008 /*
1009  *	Allocate a multicast cache entry
1010  */
1011 static struct mfc6_cache *ip6mr_cache_alloc(void)
1012 {
1013 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
1014 	if (c == NULL)
1015 		return NULL;
1016 	c->mfc_un.res.minvif = MAXMIFS;
1017 	return c;
1018 }
1019 
1020 static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
1021 {
1022 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
1023 	if (c == NULL)
1024 		return NULL;
1025 	skb_queue_head_init(&c->mfc_un.unres.unresolved);
1026 	c->mfc_un.unres.expires = jiffies + 10 * HZ;
1027 	return c;
1028 }
1029 
1030 /*
1031  *	A cache entry has gone into a resolved state from queued
1032  */
1033 
1034 static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
1035 				struct mfc6_cache *uc, struct mfc6_cache *c)
1036 {
1037 	struct sk_buff *skb;
1038 
1039 	/*
1040 	 *	Play the pending entries through our router
1041 	 */
1042 
1043 	while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
1044 		if (ipv6_hdr(skb)->version == 0) {
1045 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
1046 
1047 			if (__ip6mr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
1048 				nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1049 			} else {
1050 				nlh->nlmsg_type = NLMSG_ERROR;
1051 				nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
1052 				skb_trim(skb, nlh->nlmsg_len);
1053 				((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE;
1054 			}
1055 			rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
1056 		} else
1057 			ip6_mr_forward(net, mrt, skb, c);
1058 	}
1059 }
1060 
1061 /*
1062  *	Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
1063  *	expects the following bizarre scheme.
1064  *
1065  *	Called under mrt_lock.
1066  */
1067 
1068 static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
1069 			      mifi_t mifi, int assert)
1070 {
1071 	struct sk_buff *skb;
1072 	struct mrt6msg *msg;
1073 	int ret;
1074 
1075 #ifdef CONFIG_IPV6_PIMSM_V2
1076 	if (assert == MRT6MSG_WHOLEPKT)
1077 		skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
1078 						+sizeof(*msg));
1079 	else
1080 #endif
1081 		skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
1082 
1083 	if (!skb)
1084 		return -ENOBUFS;
1085 
1086 	/* I suppose that internal messages
1087 	 * do not require checksums */
1088 
1089 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1090 
1091 #ifdef CONFIG_IPV6_PIMSM_V2
1092 	if (assert == MRT6MSG_WHOLEPKT) {
1093 		/* Ugly, but we have no choice with this interface.
1094 		   Duplicate old header, fix length etc.
1095 		   And all this only to mangle msg->im6_msgtype and
1096 		   to set msg->im6_mbz to "mbz" :-)
1097 		 */
1098 		skb_push(skb, -skb_network_offset(pkt));
1099 
1100 		skb_push(skb, sizeof(*msg));
1101 		skb_reset_transport_header(skb);
1102 		msg = (struct mrt6msg *)skb_transport_header(skb);
1103 		msg->im6_mbz = 0;
1104 		msg->im6_msgtype = MRT6MSG_WHOLEPKT;
1105 		msg->im6_mif = mrt->mroute_reg_vif_num;
1106 		msg->im6_pad = 0;
1107 		msg->im6_src = ipv6_hdr(pkt)->saddr;
1108 		msg->im6_dst = ipv6_hdr(pkt)->daddr;
1109 
1110 		skb->ip_summed = CHECKSUM_UNNECESSARY;
1111 	} else
1112 #endif
1113 	{
1114 	/*
1115 	 *	Copy the IP header
1116 	 */
1117 
1118 	skb_put(skb, sizeof(struct ipv6hdr));
1119 	skb_reset_network_header(skb);
1120 	skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
1121 
1122 	/*
1123 	 *	Add our header
1124 	 */
1125 	skb_put(skb, sizeof(*msg));
1126 	skb_reset_transport_header(skb);
1127 	msg = (struct mrt6msg *)skb_transport_header(skb);
1128 
1129 	msg->im6_mbz = 0;
1130 	msg->im6_msgtype = assert;
1131 	msg->im6_mif = mifi;
1132 	msg->im6_pad = 0;
1133 	msg->im6_src = ipv6_hdr(pkt)->saddr;
1134 	msg->im6_dst = ipv6_hdr(pkt)->daddr;
1135 
1136 	skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1137 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1138 	}
1139 
1140 	if (mrt->mroute6_sk == NULL) {
1141 		kfree_skb(skb);
1142 		return -EINVAL;
1143 	}
1144 
1145 	/*
1146 	 *	Deliver to user space multicast routing algorithms
1147 	 */
1148 	ret = sock_queue_rcv_skb(mrt->mroute6_sk, skb);
1149 	if (ret < 0) {
1150 		net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
1151 		kfree_skb(skb);
1152 	}
1153 
1154 	return ret;
1155 }
1156 
1157 /*
1158  *	Queue a packet for resolution. It gets locked cache entry!
1159  */
1160 
1161 static int
1162 ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
1163 {
1164 	bool found = false;
1165 	int err;
1166 	struct mfc6_cache *c;
1167 
1168 	spin_lock_bh(&mfc_unres_lock);
1169 	list_for_each_entry(c, &mrt->mfc6_unres_queue, list) {
1170 		if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1171 		    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
1172 			found = true;
1173 			break;
1174 		}
1175 	}
1176 
1177 	if (!found) {
1178 		/*
1179 		 *	Create a new entry if allowable
1180 		 */
1181 
1182 		if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
1183 		    (c = ip6mr_cache_alloc_unres()) == NULL) {
1184 			spin_unlock_bh(&mfc_unres_lock);
1185 
1186 			kfree_skb(skb);
1187 			return -ENOBUFS;
1188 		}
1189 
1190 		/*
1191 		 *	Fill in the new cache entry
1192 		 */
1193 		c->mf6c_parent = -1;
1194 		c->mf6c_origin = ipv6_hdr(skb)->saddr;
1195 		c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
1196 
1197 		/*
1198 		 *	Reflect first query at pim6sd
1199 		 */
1200 		err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1201 		if (err < 0) {
1202 			/* If the report failed throw the cache entry
1203 			   out - Brad Parker
1204 			 */
1205 			spin_unlock_bh(&mfc_unres_lock);
1206 
1207 			ip6mr_cache_free(c);
1208 			kfree_skb(skb);
1209 			return err;
1210 		}
1211 
1212 		atomic_inc(&mrt->cache_resolve_queue_len);
1213 		list_add(&c->list, &mrt->mfc6_unres_queue);
1214 
1215 		ipmr_do_expire_process(mrt);
1216 	}
1217 
1218 	/*
1219 	 *	See if we can append the packet
1220 	 */
1221 	if (c->mfc_un.unres.unresolved.qlen > 3) {
1222 		kfree_skb(skb);
1223 		err = -ENOBUFS;
1224 	} else {
1225 		skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1226 		err = 0;
1227 	}
1228 
1229 	spin_unlock_bh(&mfc_unres_lock);
1230 	return err;
1231 }
1232 
1233 /*
1234  *	MFC6 cache manipulation by user space
1235  */
1236 
1237 static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc)
1238 {
1239 	int line;
1240 	struct mfc6_cache *c, *next;
1241 
1242 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1243 
1244 	list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) {
1245 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1246 		    ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
1247 			write_lock_bh(&mrt_lock);
1248 			list_del(&c->list);
1249 			write_unlock_bh(&mrt_lock);
1250 
1251 			ip6mr_cache_free(c);
1252 			return 0;
1253 		}
1254 	}
1255 	return -ENOENT;
1256 }
1257 
1258 static int ip6mr_device_event(struct notifier_block *this,
1259 			      unsigned long event, void *ptr)
1260 {
1261 	struct net_device *dev = ptr;
1262 	struct net *net = dev_net(dev);
1263 	struct mr6_table *mrt;
1264 	struct mif_device *v;
1265 	int ct;
1266 	LIST_HEAD(list);
1267 
1268 	if (event != NETDEV_UNREGISTER)
1269 		return NOTIFY_DONE;
1270 
1271 	ip6mr_for_each_table(mrt, net) {
1272 		v = &mrt->vif6_table[0];
1273 		for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1274 			if (v->dev == dev)
1275 				mif6_delete(mrt, ct, &list);
1276 		}
1277 	}
1278 	unregister_netdevice_many(&list);
1279 
1280 	return NOTIFY_DONE;
1281 }
1282 
1283 static struct notifier_block ip6_mr_notifier = {
1284 	.notifier_call = ip6mr_device_event
1285 };
1286 
1287 /*
1288  *	Setup for IP multicast routing
1289  */
1290 
1291 static int __net_init ip6mr_net_init(struct net *net)
1292 {
1293 	int err;
1294 
1295 	err = ip6mr_rules_init(net);
1296 	if (err < 0)
1297 		goto fail;
1298 
1299 #ifdef CONFIG_PROC_FS
1300 	err = -ENOMEM;
1301 	if (!proc_net_fops_create(net, "ip6_mr_vif", 0, &ip6mr_vif_fops))
1302 		goto proc_vif_fail;
1303 	if (!proc_net_fops_create(net, "ip6_mr_cache", 0, &ip6mr_mfc_fops))
1304 		goto proc_cache_fail;
1305 #endif
1306 
1307 	return 0;
1308 
1309 #ifdef CONFIG_PROC_FS
1310 proc_cache_fail:
1311 	proc_net_remove(net, "ip6_mr_vif");
1312 proc_vif_fail:
1313 	ip6mr_rules_exit(net);
1314 #endif
1315 fail:
1316 	return err;
1317 }
1318 
1319 static void __net_exit ip6mr_net_exit(struct net *net)
1320 {
1321 #ifdef CONFIG_PROC_FS
1322 	proc_net_remove(net, "ip6_mr_cache");
1323 	proc_net_remove(net, "ip6_mr_vif");
1324 #endif
1325 	ip6mr_rules_exit(net);
1326 }
1327 
1328 static struct pernet_operations ip6mr_net_ops = {
1329 	.init = ip6mr_net_init,
1330 	.exit = ip6mr_net_exit,
1331 };
1332 
1333 int __init ip6_mr_init(void)
1334 {
1335 	int err;
1336 
1337 	mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1338 				       sizeof(struct mfc6_cache),
1339 				       0, SLAB_HWCACHE_ALIGN,
1340 				       NULL);
1341 	if (!mrt_cachep)
1342 		return -ENOMEM;
1343 
1344 	err = register_pernet_subsys(&ip6mr_net_ops);
1345 	if (err)
1346 		goto reg_pernet_fail;
1347 
1348 	err = register_netdevice_notifier(&ip6_mr_notifier);
1349 	if (err)
1350 		goto reg_notif_fail;
1351 #ifdef CONFIG_IPV6_PIMSM_V2
1352 	if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1353 		pr_err("%s: can't add PIM protocol\n", __func__);
1354 		err = -EAGAIN;
1355 		goto add_proto_fail;
1356 	}
1357 #endif
1358 	rtnl_register(RTNL_FAMILY_IP6MR, RTM_GETROUTE, NULL,
1359 		      ip6mr_rtm_dumproute, NULL);
1360 	return 0;
1361 #ifdef CONFIG_IPV6_PIMSM_V2
1362 add_proto_fail:
1363 	unregister_netdevice_notifier(&ip6_mr_notifier);
1364 #endif
1365 reg_notif_fail:
1366 	unregister_pernet_subsys(&ip6mr_net_ops);
1367 reg_pernet_fail:
1368 	kmem_cache_destroy(mrt_cachep);
1369 	return err;
1370 }
1371 
1372 void ip6_mr_cleanup(void)
1373 {
1374 	unregister_netdevice_notifier(&ip6_mr_notifier);
1375 	unregister_pernet_subsys(&ip6mr_net_ops);
1376 	kmem_cache_destroy(mrt_cachep);
1377 }
1378 
1379 static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
1380 			 struct mf6cctl *mfc, int mrtsock)
1381 {
1382 	bool found = false;
1383 	int line;
1384 	struct mfc6_cache *uc, *c;
1385 	unsigned char ttls[MAXMIFS];
1386 	int i;
1387 
1388 	if (mfc->mf6cc_parent >= MAXMIFS)
1389 		return -ENFILE;
1390 
1391 	memset(ttls, 255, MAXMIFS);
1392 	for (i = 0; i < MAXMIFS; i++) {
1393 		if (IF_ISSET(i, &mfc->mf6cc_ifset))
1394 			ttls[i] = 1;
1395 
1396 	}
1397 
1398 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1399 
1400 	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
1401 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1402 		    ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
1403 			found = true;
1404 			break;
1405 		}
1406 	}
1407 
1408 	if (found) {
1409 		write_lock_bh(&mrt_lock);
1410 		c->mf6c_parent = mfc->mf6cc_parent;
1411 		ip6mr_update_thresholds(mrt, c, ttls);
1412 		if (!mrtsock)
1413 			c->mfc_flags |= MFC_STATIC;
1414 		write_unlock_bh(&mrt_lock);
1415 		return 0;
1416 	}
1417 
1418 	if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1419 		return -EINVAL;
1420 
1421 	c = ip6mr_cache_alloc();
1422 	if (c == NULL)
1423 		return -ENOMEM;
1424 
1425 	c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1426 	c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1427 	c->mf6c_parent = mfc->mf6cc_parent;
1428 	ip6mr_update_thresholds(mrt, c, ttls);
1429 	if (!mrtsock)
1430 		c->mfc_flags |= MFC_STATIC;
1431 
1432 	write_lock_bh(&mrt_lock);
1433 	list_add(&c->list, &mrt->mfc6_cache_array[line]);
1434 	write_unlock_bh(&mrt_lock);
1435 
1436 	/*
1437 	 *	Check to see if we resolved a queued list. If so we
1438 	 *	need to send on the frames and tidy up.
1439 	 */
1440 	found = false;
1441 	spin_lock_bh(&mfc_unres_lock);
1442 	list_for_each_entry(uc, &mrt->mfc6_unres_queue, list) {
1443 		if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1444 		    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1445 			list_del(&uc->list);
1446 			atomic_dec(&mrt->cache_resolve_queue_len);
1447 			found = true;
1448 			break;
1449 		}
1450 	}
1451 	if (list_empty(&mrt->mfc6_unres_queue))
1452 		del_timer(&mrt->ipmr_expire_timer);
1453 	spin_unlock_bh(&mfc_unres_lock);
1454 
1455 	if (found) {
1456 		ip6mr_cache_resolve(net, mrt, uc, c);
1457 		ip6mr_cache_free(uc);
1458 	}
1459 	return 0;
1460 }
1461 
1462 /*
1463  *	Close the multicast socket, and clear the vif tables etc
1464  */
1465 
1466 static void mroute_clean_tables(struct mr6_table *mrt)
1467 {
1468 	int i;
1469 	LIST_HEAD(list);
1470 	struct mfc6_cache *c, *next;
1471 
1472 	/*
1473 	 *	Shut down all active vif entries
1474 	 */
1475 	for (i = 0; i < mrt->maxvif; i++) {
1476 		if (!(mrt->vif6_table[i].flags & VIFF_STATIC))
1477 			mif6_delete(mrt, i, &list);
1478 	}
1479 	unregister_netdevice_many(&list);
1480 
1481 	/*
1482 	 *	Wipe the cache
1483 	 */
1484 	for (i = 0; i < MFC6_LINES; i++) {
1485 		list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) {
1486 			if (c->mfc_flags & MFC_STATIC)
1487 				continue;
1488 			write_lock_bh(&mrt_lock);
1489 			list_del(&c->list);
1490 			write_unlock_bh(&mrt_lock);
1491 
1492 			ip6mr_cache_free(c);
1493 		}
1494 	}
1495 
1496 	if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1497 		spin_lock_bh(&mfc_unres_lock);
1498 		list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
1499 			list_del(&c->list);
1500 			ip6mr_destroy_unres(mrt, c);
1501 		}
1502 		spin_unlock_bh(&mfc_unres_lock);
1503 	}
1504 }
1505 
1506 static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
1507 {
1508 	int err = 0;
1509 	struct net *net = sock_net(sk);
1510 
1511 	rtnl_lock();
1512 	write_lock_bh(&mrt_lock);
1513 	if (likely(mrt->mroute6_sk == NULL)) {
1514 		mrt->mroute6_sk = sk;
1515 		net->ipv6.devconf_all->mc_forwarding++;
1516 	}
1517 	else
1518 		err = -EADDRINUSE;
1519 	write_unlock_bh(&mrt_lock);
1520 
1521 	rtnl_unlock();
1522 
1523 	return err;
1524 }
1525 
1526 int ip6mr_sk_done(struct sock *sk)
1527 {
1528 	int err = -EACCES;
1529 	struct net *net = sock_net(sk);
1530 	struct mr6_table *mrt;
1531 
1532 	rtnl_lock();
1533 	ip6mr_for_each_table(mrt, net) {
1534 		if (sk == mrt->mroute6_sk) {
1535 			write_lock_bh(&mrt_lock);
1536 			mrt->mroute6_sk = NULL;
1537 			net->ipv6.devconf_all->mc_forwarding--;
1538 			write_unlock_bh(&mrt_lock);
1539 
1540 			mroute_clean_tables(mrt);
1541 			err = 0;
1542 			break;
1543 		}
1544 	}
1545 	rtnl_unlock();
1546 
1547 	return err;
1548 }
1549 
1550 struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
1551 {
1552 	struct mr6_table *mrt;
1553 	struct flowi6 fl6 = {
1554 		.flowi6_iif	= skb->skb_iif,
1555 		.flowi6_oif	= skb->dev->ifindex,
1556 		.flowi6_mark	= skb->mark,
1557 	};
1558 
1559 	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
1560 		return NULL;
1561 
1562 	return mrt->mroute6_sk;
1563 }
1564 
1565 /*
1566  *	Socket options and virtual interface manipulation. The whole
1567  *	virtual interface system is a complete heap, but unfortunately
1568  *	that's how BSD mrouted happens to think. Maybe one day with a proper
1569  *	MOSPF/PIM router set up we can clean this up.
1570  */
1571 
1572 int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1573 {
1574 	int ret;
1575 	struct mif6ctl vif;
1576 	struct mf6cctl mfc;
1577 	mifi_t mifi;
1578 	struct net *net = sock_net(sk);
1579 	struct mr6_table *mrt;
1580 
1581 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1582 	if (mrt == NULL)
1583 		return -ENOENT;
1584 
1585 	if (optname != MRT6_INIT) {
1586 		if (sk != mrt->mroute6_sk && !capable(CAP_NET_ADMIN))
1587 			return -EACCES;
1588 	}
1589 
1590 	switch (optname) {
1591 	case MRT6_INIT:
1592 		if (sk->sk_type != SOCK_RAW ||
1593 		    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1594 			return -EOPNOTSUPP;
1595 		if (optlen < sizeof(int))
1596 			return -EINVAL;
1597 
1598 		return ip6mr_sk_init(mrt, sk);
1599 
1600 	case MRT6_DONE:
1601 		return ip6mr_sk_done(sk);
1602 
1603 	case MRT6_ADD_MIF:
1604 		if (optlen < sizeof(vif))
1605 			return -EINVAL;
1606 		if (copy_from_user(&vif, optval, sizeof(vif)))
1607 			return -EFAULT;
1608 		if (vif.mif6c_mifi >= MAXMIFS)
1609 			return -ENFILE;
1610 		rtnl_lock();
1611 		ret = mif6_add(net, mrt, &vif, sk == mrt->mroute6_sk);
1612 		rtnl_unlock();
1613 		return ret;
1614 
1615 	case MRT6_DEL_MIF:
1616 		if (optlen < sizeof(mifi_t))
1617 			return -EINVAL;
1618 		if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1619 			return -EFAULT;
1620 		rtnl_lock();
1621 		ret = mif6_delete(mrt, mifi, NULL);
1622 		rtnl_unlock();
1623 		return ret;
1624 
1625 	/*
1626 	 *	Manipulate the forwarding caches. These live
1627 	 *	in a sort of kernel/user symbiosis.
1628 	 */
1629 	case MRT6_ADD_MFC:
1630 	case MRT6_DEL_MFC:
1631 		if (optlen < sizeof(mfc))
1632 			return -EINVAL;
1633 		if (copy_from_user(&mfc, optval, sizeof(mfc)))
1634 			return -EFAULT;
1635 		rtnl_lock();
1636 		if (optname == MRT6_DEL_MFC)
1637 			ret = ip6mr_mfc_delete(mrt, &mfc);
1638 		else
1639 			ret = ip6mr_mfc_add(net, mrt, &mfc, sk == mrt->mroute6_sk);
1640 		rtnl_unlock();
1641 		return ret;
1642 
1643 	/*
1644 	 *	Control PIM assert (to activate pim will activate assert)
1645 	 */
1646 	case MRT6_ASSERT:
1647 	{
1648 		int v;
1649 		if (get_user(v, (int __user *)optval))
1650 			return -EFAULT;
1651 		mrt->mroute_do_assert = !!v;
1652 		return 0;
1653 	}
1654 
1655 #ifdef CONFIG_IPV6_PIMSM_V2
1656 	case MRT6_PIM:
1657 	{
1658 		int v;
1659 		if (get_user(v, (int __user *)optval))
1660 			return -EFAULT;
1661 		v = !!v;
1662 		rtnl_lock();
1663 		ret = 0;
1664 		if (v != mrt->mroute_do_pim) {
1665 			mrt->mroute_do_pim = v;
1666 			mrt->mroute_do_assert = v;
1667 		}
1668 		rtnl_unlock();
1669 		return ret;
1670 	}
1671 
1672 #endif
1673 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1674 	case MRT6_TABLE:
1675 	{
1676 		u32 v;
1677 
1678 		if (optlen != sizeof(u32))
1679 			return -EINVAL;
1680 		if (get_user(v, (u32 __user *)optval))
1681 			return -EFAULT;
1682 		if (sk == mrt->mroute6_sk)
1683 			return -EBUSY;
1684 
1685 		rtnl_lock();
1686 		ret = 0;
1687 		if (!ip6mr_new_table(net, v))
1688 			ret = -ENOMEM;
1689 		raw6_sk(sk)->ip6mr_table = v;
1690 		rtnl_unlock();
1691 		return ret;
1692 	}
1693 #endif
1694 	/*
1695 	 *	Spurious command, or MRT6_VERSION which you cannot
1696 	 *	set.
1697 	 */
1698 	default:
1699 		return -ENOPROTOOPT;
1700 	}
1701 }
1702 
1703 /*
1704  *	Getsock opt support for the multicast routing system.
1705  */
1706 
1707 int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1708 			  int __user *optlen)
1709 {
1710 	int olr;
1711 	int val;
1712 	struct net *net = sock_net(sk);
1713 	struct mr6_table *mrt;
1714 
1715 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1716 	if (mrt == NULL)
1717 		return -ENOENT;
1718 
1719 	switch (optname) {
1720 	case MRT6_VERSION:
1721 		val = 0x0305;
1722 		break;
1723 #ifdef CONFIG_IPV6_PIMSM_V2
1724 	case MRT6_PIM:
1725 		val = mrt->mroute_do_pim;
1726 		break;
1727 #endif
1728 	case MRT6_ASSERT:
1729 		val = mrt->mroute_do_assert;
1730 		break;
1731 	default:
1732 		return -ENOPROTOOPT;
1733 	}
1734 
1735 	if (get_user(olr, optlen))
1736 		return -EFAULT;
1737 
1738 	olr = min_t(int, olr, sizeof(int));
1739 	if (olr < 0)
1740 		return -EINVAL;
1741 
1742 	if (put_user(olr, optlen))
1743 		return -EFAULT;
1744 	if (copy_to_user(optval, &val, olr))
1745 		return -EFAULT;
1746 	return 0;
1747 }
1748 
1749 /*
1750  *	The IP multicast ioctl support routines.
1751  */
1752 
1753 int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1754 {
1755 	struct sioc_sg_req6 sr;
1756 	struct sioc_mif_req6 vr;
1757 	struct mif_device *vif;
1758 	struct mfc6_cache *c;
1759 	struct net *net = sock_net(sk);
1760 	struct mr6_table *mrt;
1761 
1762 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1763 	if (mrt == NULL)
1764 		return -ENOENT;
1765 
1766 	switch (cmd) {
1767 	case SIOCGETMIFCNT_IN6:
1768 		if (copy_from_user(&vr, arg, sizeof(vr)))
1769 			return -EFAULT;
1770 		if (vr.mifi >= mrt->maxvif)
1771 			return -EINVAL;
1772 		read_lock(&mrt_lock);
1773 		vif = &mrt->vif6_table[vr.mifi];
1774 		if (MIF_EXISTS(mrt, vr.mifi)) {
1775 			vr.icount = vif->pkt_in;
1776 			vr.ocount = vif->pkt_out;
1777 			vr.ibytes = vif->bytes_in;
1778 			vr.obytes = vif->bytes_out;
1779 			read_unlock(&mrt_lock);
1780 
1781 			if (copy_to_user(arg, &vr, sizeof(vr)))
1782 				return -EFAULT;
1783 			return 0;
1784 		}
1785 		read_unlock(&mrt_lock);
1786 		return -EADDRNOTAVAIL;
1787 	case SIOCGETSGCNT_IN6:
1788 		if (copy_from_user(&sr, arg, sizeof(sr)))
1789 			return -EFAULT;
1790 
1791 		read_lock(&mrt_lock);
1792 		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1793 		if (c) {
1794 			sr.pktcnt = c->mfc_un.res.pkt;
1795 			sr.bytecnt = c->mfc_un.res.bytes;
1796 			sr.wrong_if = c->mfc_un.res.wrong_if;
1797 			read_unlock(&mrt_lock);
1798 
1799 			if (copy_to_user(arg, &sr, sizeof(sr)))
1800 				return -EFAULT;
1801 			return 0;
1802 		}
1803 		read_unlock(&mrt_lock);
1804 		return -EADDRNOTAVAIL;
1805 	default:
1806 		return -ENOIOCTLCMD;
1807 	}
1808 }
1809 
1810 #ifdef CONFIG_COMPAT
1811 struct compat_sioc_sg_req6 {
1812 	struct sockaddr_in6 src;
1813 	struct sockaddr_in6 grp;
1814 	compat_ulong_t pktcnt;
1815 	compat_ulong_t bytecnt;
1816 	compat_ulong_t wrong_if;
1817 };
1818 
1819 struct compat_sioc_mif_req6 {
1820 	mifi_t	mifi;
1821 	compat_ulong_t icount;
1822 	compat_ulong_t ocount;
1823 	compat_ulong_t ibytes;
1824 	compat_ulong_t obytes;
1825 };
1826 
1827 int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1828 {
1829 	struct compat_sioc_sg_req6 sr;
1830 	struct compat_sioc_mif_req6 vr;
1831 	struct mif_device *vif;
1832 	struct mfc6_cache *c;
1833 	struct net *net = sock_net(sk);
1834 	struct mr6_table *mrt;
1835 
1836 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1837 	if (mrt == NULL)
1838 		return -ENOENT;
1839 
1840 	switch (cmd) {
1841 	case SIOCGETMIFCNT_IN6:
1842 		if (copy_from_user(&vr, arg, sizeof(vr)))
1843 			return -EFAULT;
1844 		if (vr.mifi >= mrt->maxvif)
1845 			return -EINVAL;
1846 		read_lock(&mrt_lock);
1847 		vif = &mrt->vif6_table[vr.mifi];
1848 		if (MIF_EXISTS(mrt, vr.mifi)) {
1849 			vr.icount = vif->pkt_in;
1850 			vr.ocount = vif->pkt_out;
1851 			vr.ibytes = vif->bytes_in;
1852 			vr.obytes = vif->bytes_out;
1853 			read_unlock(&mrt_lock);
1854 
1855 			if (copy_to_user(arg, &vr, sizeof(vr)))
1856 				return -EFAULT;
1857 			return 0;
1858 		}
1859 		read_unlock(&mrt_lock);
1860 		return -EADDRNOTAVAIL;
1861 	case SIOCGETSGCNT_IN6:
1862 		if (copy_from_user(&sr, arg, sizeof(sr)))
1863 			return -EFAULT;
1864 
1865 		read_lock(&mrt_lock);
1866 		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1867 		if (c) {
1868 			sr.pktcnt = c->mfc_un.res.pkt;
1869 			sr.bytecnt = c->mfc_un.res.bytes;
1870 			sr.wrong_if = c->mfc_un.res.wrong_if;
1871 			read_unlock(&mrt_lock);
1872 
1873 			if (copy_to_user(arg, &sr, sizeof(sr)))
1874 				return -EFAULT;
1875 			return 0;
1876 		}
1877 		read_unlock(&mrt_lock);
1878 		return -EADDRNOTAVAIL;
1879 	default:
1880 		return -ENOIOCTLCMD;
1881 	}
1882 }
1883 #endif
1884 
1885 static inline int ip6mr_forward2_finish(struct sk_buff *skb)
1886 {
1887 	IP6_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
1888 			 IPSTATS_MIB_OUTFORWDATAGRAMS);
1889 	IP6_ADD_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
1890 			 IPSTATS_MIB_OUTOCTETS, skb->len);
1891 	return dst_output(skb);
1892 }
1893 
1894 /*
1895  *	Processing handlers for ip6mr_forward
1896  */
1897 
1898 static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
1899 			  struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1900 {
1901 	struct ipv6hdr *ipv6h;
1902 	struct mif_device *vif = &mrt->vif6_table[vifi];
1903 	struct net_device *dev;
1904 	struct dst_entry *dst;
1905 	struct flowi6 fl6;
1906 
1907 	if (vif->dev == NULL)
1908 		goto out_free;
1909 
1910 #ifdef CONFIG_IPV6_PIMSM_V2
1911 	if (vif->flags & MIFF_REGISTER) {
1912 		vif->pkt_out++;
1913 		vif->bytes_out += skb->len;
1914 		vif->dev->stats.tx_bytes += skb->len;
1915 		vif->dev->stats.tx_packets++;
1916 		ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
1917 		goto out_free;
1918 	}
1919 #endif
1920 
1921 	ipv6h = ipv6_hdr(skb);
1922 
1923 	fl6 = (struct flowi6) {
1924 		.flowi6_oif = vif->link,
1925 		.daddr = ipv6h->daddr,
1926 	};
1927 
1928 	dst = ip6_route_output(net, NULL, &fl6);
1929 	if (dst->error) {
1930 		dst_release(dst);
1931 		goto out_free;
1932 	}
1933 
1934 	skb_dst_drop(skb);
1935 	skb_dst_set(skb, dst);
1936 
1937 	/*
1938 	 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1939 	 * not only before forwarding, but after forwarding on all output
1940 	 * interfaces. It is clear, if mrouter runs a multicasting
1941 	 * program, it should receive packets not depending to what interface
1942 	 * program is joined.
1943 	 * If we will not make it, the program will have to join on all
1944 	 * interfaces. On the other hand, multihoming host (or router, but
1945 	 * not mrouter) cannot join to more than one interface - it will
1946 	 * result in receiving multiple packets.
1947 	 */
1948 	dev = vif->dev;
1949 	skb->dev = dev;
1950 	vif->pkt_out++;
1951 	vif->bytes_out += skb->len;
1952 
1953 	/* We are about to write */
1954 	/* XXX: extension headers? */
1955 	if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
1956 		goto out_free;
1957 
1958 	ipv6h = ipv6_hdr(skb);
1959 	ipv6h->hop_limit--;
1960 
1961 	IP6CB(skb)->flags |= IP6SKB_FORWARDED;
1962 
1963 	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dev,
1964 		       ip6mr_forward2_finish);
1965 
1966 out_free:
1967 	kfree_skb(skb);
1968 	return 0;
1969 }
1970 
1971 static int ip6mr_find_vif(struct mr6_table *mrt, struct net_device *dev)
1972 {
1973 	int ct;
1974 
1975 	for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
1976 		if (mrt->vif6_table[ct].dev == dev)
1977 			break;
1978 	}
1979 	return ct;
1980 }
1981 
1982 static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
1983 			  struct sk_buff *skb, struct mfc6_cache *cache)
1984 {
1985 	int psend = -1;
1986 	int vif, ct;
1987 
1988 	vif = cache->mf6c_parent;
1989 	cache->mfc_un.res.pkt++;
1990 	cache->mfc_un.res.bytes += skb->len;
1991 
1992 	/*
1993 	 * Wrong interface: drop packet and (maybe) send PIM assert.
1994 	 */
1995 	if (mrt->vif6_table[vif].dev != skb->dev) {
1996 		int true_vifi;
1997 
1998 		cache->mfc_un.res.wrong_if++;
1999 		true_vifi = ip6mr_find_vif(mrt, skb->dev);
2000 
2001 		if (true_vifi >= 0 && mrt->mroute_do_assert &&
2002 		    /* pimsm uses asserts, when switching from RPT to SPT,
2003 		       so that we cannot check that packet arrived on an oif.
2004 		       It is bad, but otherwise we would need to move pretty
2005 		       large chunk of pimd to kernel. Ough... --ANK
2006 		     */
2007 		    (mrt->mroute_do_pim ||
2008 		     cache->mfc_un.res.ttls[true_vifi] < 255) &&
2009 		    time_after(jiffies,
2010 			       cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
2011 			cache->mfc_un.res.last_assert = jiffies;
2012 			ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
2013 		}
2014 		goto dont_forward;
2015 	}
2016 
2017 	mrt->vif6_table[vif].pkt_in++;
2018 	mrt->vif6_table[vif].bytes_in += skb->len;
2019 
2020 	/*
2021 	 *	Forward the frame
2022 	 */
2023 	for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
2024 		if (ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
2025 			if (psend != -1) {
2026 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2027 				if (skb2)
2028 					ip6mr_forward2(net, mrt, skb2, cache, psend);
2029 			}
2030 			psend = ct;
2031 		}
2032 	}
2033 	if (psend != -1) {
2034 		ip6mr_forward2(net, mrt, skb, cache, psend);
2035 		return 0;
2036 	}
2037 
2038 dont_forward:
2039 	kfree_skb(skb);
2040 	return 0;
2041 }
2042 
2043 
2044 /*
2045  *	Multicast packets for forwarding arrive here
2046  */
2047 
2048 int ip6_mr_input(struct sk_buff *skb)
2049 {
2050 	struct mfc6_cache *cache;
2051 	struct net *net = dev_net(skb->dev);
2052 	struct mr6_table *mrt;
2053 	struct flowi6 fl6 = {
2054 		.flowi6_iif	= skb->dev->ifindex,
2055 		.flowi6_mark	= skb->mark,
2056 	};
2057 	int err;
2058 
2059 	err = ip6mr_fib_lookup(net, &fl6, &mrt);
2060 	if (err < 0) {
2061 		kfree_skb(skb);
2062 		return err;
2063 	}
2064 
2065 	read_lock(&mrt_lock);
2066 	cache = ip6mr_cache_find(mrt,
2067 				 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2068 
2069 	/*
2070 	 *	No usable cache entry
2071 	 */
2072 	if (cache == NULL) {
2073 		int vif;
2074 
2075 		vif = ip6mr_find_vif(mrt, skb->dev);
2076 		if (vif >= 0) {
2077 			int err = ip6mr_cache_unresolved(mrt, vif, skb);
2078 			read_unlock(&mrt_lock);
2079 
2080 			return err;
2081 		}
2082 		read_unlock(&mrt_lock);
2083 		kfree_skb(skb);
2084 		return -ENODEV;
2085 	}
2086 
2087 	ip6_mr_forward(net, mrt, skb, cache);
2088 
2089 	read_unlock(&mrt_lock);
2090 
2091 	return 0;
2092 }
2093 
2094 
2095 static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2096 			       struct mfc6_cache *c, struct rtmsg *rtm)
2097 {
2098 	int ct;
2099 	struct rtnexthop *nhp;
2100 	u8 *b = skb_tail_pointer(skb);
2101 	struct rtattr *mp_head;
2102 
2103 	/* If cache is unresolved, don't try to parse IIF and OIF */
2104 	if (c->mf6c_parent >= MAXMIFS)
2105 		return -ENOENT;
2106 
2107 	if (MIF_EXISTS(mrt, c->mf6c_parent) &&
2108 	    nla_put_u32(skb, RTA_IIF, mrt->vif6_table[c->mf6c_parent].dev->ifindex) < 0)
2109 		return -EMSGSIZE;
2110 
2111 	mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
2112 
2113 	for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
2114 		if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
2115 			if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
2116 				goto rtattr_failure;
2117 			nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
2118 			nhp->rtnh_flags = 0;
2119 			nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
2120 			nhp->rtnh_ifindex = mrt->vif6_table[ct].dev->ifindex;
2121 			nhp->rtnh_len = sizeof(*nhp);
2122 		}
2123 	}
2124 	mp_head->rta_type = RTA_MULTIPATH;
2125 	mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
2126 	rtm->rtm_type = RTN_MULTICAST;
2127 	return 1;
2128 
2129 rtattr_failure:
2130 	nlmsg_trim(skb, b);
2131 	return -EMSGSIZE;
2132 }
2133 
2134 int ip6mr_get_route(struct net *net,
2135 		    struct sk_buff *skb, struct rtmsg *rtm, int nowait)
2136 {
2137 	int err;
2138 	struct mr6_table *mrt;
2139 	struct mfc6_cache *cache;
2140 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
2141 
2142 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
2143 	if (mrt == NULL)
2144 		return -ENOENT;
2145 
2146 	read_lock(&mrt_lock);
2147 	cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
2148 
2149 	if (!cache) {
2150 		struct sk_buff *skb2;
2151 		struct ipv6hdr *iph;
2152 		struct net_device *dev;
2153 		int vif;
2154 
2155 		if (nowait) {
2156 			read_unlock(&mrt_lock);
2157 			return -EAGAIN;
2158 		}
2159 
2160 		dev = skb->dev;
2161 		if (dev == NULL || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2162 			read_unlock(&mrt_lock);
2163 			return -ENODEV;
2164 		}
2165 
2166 		/* really correct? */
2167 		skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
2168 		if (!skb2) {
2169 			read_unlock(&mrt_lock);
2170 			return -ENOMEM;
2171 		}
2172 
2173 		skb_reset_transport_header(skb2);
2174 
2175 		skb_put(skb2, sizeof(struct ipv6hdr));
2176 		skb_reset_network_header(skb2);
2177 
2178 		iph = ipv6_hdr(skb2);
2179 		iph->version = 0;
2180 		iph->priority = 0;
2181 		iph->flow_lbl[0] = 0;
2182 		iph->flow_lbl[1] = 0;
2183 		iph->flow_lbl[2] = 0;
2184 		iph->payload_len = 0;
2185 		iph->nexthdr = IPPROTO_NONE;
2186 		iph->hop_limit = 0;
2187 		iph->saddr = rt->rt6i_src.addr;
2188 		iph->daddr = rt->rt6i_dst.addr;
2189 
2190 		err = ip6mr_cache_unresolved(mrt, vif, skb2);
2191 		read_unlock(&mrt_lock);
2192 
2193 		return err;
2194 	}
2195 
2196 	if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
2197 		cache->mfc_flags |= MFC_NOTIFY;
2198 
2199 	err = __ip6mr_fill_mroute(mrt, skb, cache, rtm);
2200 	read_unlock(&mrt_lock);
2201 	return err;
2202 }
2203 
2204 static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2205 			     u32 pid, u32 seq, struct mfc6_cache *c)
2206 {
2207 	struct nlmsghdr *nlh;
2208 	struct rtmsg *rtm;
2209 
2210 	nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI);
2211 	if (nlh == NULL)
2212 		return -EMSGSIZE;
2213 
2214 	rtm = nlmsg_data(nlh);
2215 	rtm->rtm_family   = RTNL_FAMILY_IPMR;
2216 	rtm->rtm_dst_len  = 128;
2217 	rtm->rtm_src_len  = 128;
2218 	rtm->rtm_tos      = 0;
2219 	rtm->rtm_table    = mrt->id;
2220 	if (nla_put_u32(skb, RTA_TABLE, mrt->id))
2221 		goto nla_put_failure;
2222 	rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
2223 	rtm->rtm_protocol = RTPROT_UNSPEC;
2224 	rtm->rtm_flags    = 0;
2225 
2226 	if (nla_put(skb, RTA_SRC, 16, &c->mf6c_origin) ||
2227 	    nla_put(skb, RTA_DST, 16, &c->mf6c_mcastgrp))
2228 		goto nla_put_failure;
2229 	if (__ip6mr_fill_mroute(mrt, skb, c, rtm) < 0)
2230 		goto nla_put_failure;
2231 
2232 	return nlmsg_end(skb, nlh);
2233 
2234 nla_put_failure:
2235 	nlmsg_cancel(skb, nlh);
2236 	return -EMSGSIZE;
2237 }
2238 
2239 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2240 {
2241 	struct net *net = sock_net(skb->sk);
2242 	struct mr6_table *mrt;
2243 	struct mfc6_cache *mfc;
2244 	unsigned int t = 0, s_t;
2245 	unsigned int h = 0, s_h;
2246 	unsigned int e = 0, s_e;
2247 
2248 	s_t = cb->args[0];
2249 	s_h = cb->args[1];
2250 	s_e = cb->args[2];
2251 
2252 	read_lock(&mrt_lock);
2253 	ip6mr_for_each_table(mrt, net) {
2254 		if (t < s_t)
2255 			goto next_table;
2256 		if (t > s_t)
2257 			s_h = 0;
2258 		for (h = s_h; h < MFC6_LINES; h++) {
2259 			list_for_each_entry(mfc, &mrt->mfc6_cache_array[h], list) {
2260 				if (e < s_e)
2261 					goto next_entry;
2262 				if (ip6mr_fill_mroute(mrt, skb,
2263 						      NETLINK_CB(cb->skb).pid,
2264 						      cb->nlh->nlmsg_seq,
2265 						      mfc) < 0)
2266 					goto done;
2267 next_entry:
2268 				e++;
2269 			}
2270 			e = s_e = 0;
2271 		}
2272 		s_h = 0;
2273 next_table:
2274 		t++;
2275 	}
2276 done:
2277 	read_unlock(&mrt_lock);
2278 
2279 	cb->args[2] = e;
2280 	cb->args[1] = h;
2281 	cb->args[0] = t;
2282 
2283 	return skb->len;
2284 }
2285