xref: /openbmc/linux/net/ipv6/ip6mr.c (revision 75f25bd3)
1 /*
2  *	Linux IPv6 multicast routing support for BSD pim6sd
3  *	Based on net/ipv4/ipmr.c.
4  *
5  *	(c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
6  *		LSIIT Laboratory, Strasbourg, France
7  *	(c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
8  *		6WIND, Paris, France
9  *	Copyright (C)2007,2008 USAGI/WIDE Project
10  *		YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
11  *
12  *	This program is free software; you can redistribute it and/or
13  *	modify it under the terms of the GNU General Public License
14  *	as published by the Free Software Foundation; either version
15  *	2 of the License, or (at your option) any later version.
16  *
17  */
18 
19 #include <asm/system.h>
20 #include <asm/uaccess.h>
21 #include <linux/types.h>
22 #include <linux/sched.h>
23 #include <linux/errno.h>
24 #include <linux/timer.h>
25 #include <linux/mm.h>
26 #include <linux/kernel.h>
27 #include <linux/fcntl.h>
28 #include <linux/stat.h>
29 #include <linux/socket.h>
30 #include <linux/inet.h>
31 #include <linux/netdevice.h>
32 #include <linux/inetdevice.h>
33 #include <linux/proc_fs.h>
34 #include <linux/seq_file.h>
35 #include <linux/init.h>
36 #include <linux/slab.h>
37 #include <linux/compat.h>
38 #include <net/protocol.h>
39 #include <linux/skbuff.h>
40 #include <net/sock.h>
41 #include <net/raw.h>
42 #include <linux/notifier.h>
43 #include <linux/if_arp.h>
44 #include <net/checksum.h>
45 #include <net/netlink.h>
46 #include <net/fib_rules.h>
47 
48 #include <net/ipv6.h>
49 #include <net/ip6_route.h>
50 #include <linux/mroute6.h>
51 #include <linux/pim.h>
52 #include <net/addrconf.h>
53 #include <linux/netfilter_ipv6.h>
54 #include <net/ip6_checksum.h>
55 
56 struct mr6_table {
57 	struct list_head	list;
58 #ifdef CONFIG_NET_NS
59 	struct net		*net;
60 #endif
61 	u32			id;
62 	struct sock		*mroute6_sk;
63 	struct timer_list	ipmr_expire_timer;
64 	struct list_head	mfc6_unres_queue;
65 	struct list_head	mfc6_cache_array[MFC6_LINES];
66 	struct mif_device	vif6_table[MAXMIFS];
67 	int			maxvif;
68 	atomic_t		cache_resolve_queue_len;
69 	int			mroute_do_assert;
70 	int			mroute_do_pim;
71 #ifdef CONFIG_IPV6_PIMSM_V2
72 	int			mroute_reg_vif_num;
73 #endif
74 };
75 
76 struct ip6mr_rule {
77 	struct fib_rule		common;
78 };
79 
80 struct ip6mr_result {
81 	struct mr6_table	*mrt;
82 };
83 
84 /* Big lock, protecting vif table, mrt cache and mroute socket state.
85    Note that the changes are semaphored via rtnl_lock.
86  */
87 
88 static DEFINE_RWLOCK(mrt_lock);
89 
90 /*
91  *	Multicast router control variables
92  */
93 
94 #define MIF_EXISTS(_mrt, _idx) ((_mrt)->vif6_table[_idx].dev != NULL)
95 
96 /* Special spinlock for queue of unresolved entries */
97 static DEFINE_SPINLOCK(mfc_unres_lock);
98 
99 /* We return to original Alan's scheme. Hash table of resolved
100    entries is changed only in process context and protected
101    with weak lock mrt_lock. Queue of unresolved entries is protected
102    with strong spinlock mfc_unres_lock.
103 
104    In this case data path is free of exclusive locks at all.
105  */
106 
107 static struct kmem_cache *mrt_cachep __read_mostly;
108 
109 static struct mr6_table *ip6mr_new_table(struct net *net, u32 id);
110 static void ip6mr_free_table(struct mr6_table *mrt);
111 
112 static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
113 			  struct sk_buff *skb, struct mfc6_cache *cache);
114 static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
115 			      mifi_t mifi, int assert);
116 static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
117 			       struct mfc6_cache *c, struct rtmsg *rtm);
118 static int ip6mr_rtm_dumproute(struct sk_buff *skb,
119 			       struct netlink_callback *cb);
120 static void mroute_clean_tables(struct mr6_table *mrt);
121 static void ipmr_expire_process(unsigned long arg);
122 
123 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
124 #define ip6mr_for_each_table(mrt, net) \
125 	list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
126 
127 static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
128 {
129 	struct mr6_table *mrt;
130 
131 	ip6mr_for_each_table(mrt, net) {
132 		if (mrt->id == id)
133 			return mrt;
134 	}
135 	return NULL;
136 }
137 
138 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
139 			    struct mr6_table **mrt)
140 {
141 	struct ip6mr_result res;
142 	struct fib_lookup_arg arg = { .result = &res, };
143 	int err;
144 
145 	err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
146 			       flowi6_to_flowi(flp6), 0, &arg);
147 	if (err < 0)
148 		return err;
149 	*mrt = res.mrt;
150 	return 0;
151 }
152 
153 static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
154 			     int flags, struct fib_lookup_arg *arg)
155 {
156 	struct ip6mr_result *res = arg->result;
157 	struct mr6_table *mrt;
158 
159 	switch (rule->action) {
160 	case FR_ACT_TO_TBL:
161 		break;
162 	case FR_ACT_UNREACHABLE:
163 		return -ENETUNREACH;
164 	case FR_ACT_PROHIBIT:
165 		return -EACCES;
166 	case FR_ACT_BLACKHOLE:
167 	default:
168 		return -EINVAL;
169 	}
170 
171 	mrt = ip6mr_get_table(rule->fr_net, rule->table);
172 	if (mrt == NULL)
173 		return -EAGAIN;
174 	res->mrt = mrt;
175 	return 0;
176 }
177 
178 static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
179 {
180 	return 1;
181 }
182 
183 static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = {
184 	FRA_GENERIC_POLICY,
185 };
186 
187 static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
188 				struct fib_rule_hdr *frh, struct nlattr **tb)
189 {
190 	return 0;
191 }
192 
193 static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
194 			      struct nlattr **tb)
195 {
196 	return 1;
197 }
198 
199 static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
200 			   struct fib_rule_hdr *frh)
201 {
202 	frh->dst_len = 0;
203 	frh->src_len = 0;
204 	frh->tos     = 0;
205 	return 0;
206 }
207 
208 static const struct fib_rules_ops __net_initdata ip6mr_rules_ops_template = {
209 	.family		= RTNL_FAMILY_IP6MR,
210 	.rule_size	= sizeof(struct ip6mr_rule),
211 	.addr_size	= sizeof(struct in6_addr),
212 	.action		= ip6mr_rule_action,
213 	.match		= ip6mr_rule_match,
214 	.configure	= ip6mr_rule_configure,
215 	.compare	= ip6mr_rule_compare,
216 	.default_pref	= fib_default_rule_pref,
217 	.fill		= ip6mr_rule_fill,
218 	.nlgroup	= RTNLGRP_IPV6_RULE,
219 	.policy		= ip6mr_rule_policy,
220 	.owner		= THIS_MODULE,
221 };
222 
223 static int __net_init ip6mr_rules_init(struct net *net)
224 {
225 	struct fib_rules_ops *ops;
226 	struct mr6_table *mrt;
227 	int err;
228 
229 	ops = fib_rules_register(&ip6mr_rules_ops_template, net);
230 	if (IS_ERR(ops))
231 		return PTR_ERR(ops);
232 
233 	INIT_LIST_HEAD(&net->ipv6.mr6_tables);
234 
235 	mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
236 	if (mrt == NULL) {
237 		err = -ENOMEM;
238 		goto err1;
239 	}
240 
241 	err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
242 	if (err < 0)
243 		goto err2;
244 
245 	net->ipv6.mr6_rules_ops = ops;
246 	return 0;
247 
248 err2:
249 	kfree(mrt);
250 err1:
251 	fib_rules_unregister(ops);
252 	return err;
253 }
254 
255 static void __net_exit ip6mr_rules_exit(struct net *net)
256 {
257 	struct mr6_table *mrt, *next;
258 
259 	list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
260 		list_del(&mrt->list);
261 		ip6mr_free_table(mrt);
262 	}
263 	fib_rules_unregister(net->ipv6.mr6_rules_ops);
264 }
265 #else
266 #define ip6mr_for_each_table(mrt, net) \
267 	for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
268 
269 static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
270 {
271 	return net->ipv6.mrt6;
272 }
273 
274 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
275 			    struct mr6_table **mrt)
276 {
277 	*mrt = net->ipv6.mrt6;
278 	return 0;
279 }
280 
281 static int __net_init ip6mr_rules_init(struct net *net)
282 {
283 	net->ipv6.mrt6 = ip6mr_new_table(net, RT6_TABLE_DFLT);
284 	return net->ipv6.mrt6 ? 0 : -ENOMEM;
285 }
286 
287 static void __net_exit ip6mr_rules_exit(struct net *net)
288 {
289 	ip6mr_free_table(net->ipv6.mrt6);
290 }
291 #endif
292 
293 static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
294 {
295 	struct mr6_table *mrt;
296 	unsigned int i;
297 
298 	mrt = ip6mr_get_table(net, id);
299 	if (mrt != NULL)
300 		return mrt;
301 
302 	mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
303 	if (mrt == NULL)
304 		return NULL;
305 	mrt->id = id;
306 	write_pnet(&mrt->net, net);
307 
308 	/* Forwarding cache */
309 	for (i = 0; i < MFC6_LINES; i++)
310 		INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]);
311 
312 	INIT_LIST_HEAD(&mrt->mfc6_unres_queue);
313 
314 	setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
315 		    (unsigned long)mrt);
316 
317 #ifdef CONFIG_IPV6_PIMSM_V2
318 	mrt->mroute_reg_vif_num = -1;
319 #endif
320 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
321 	list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
322 #endif
323 	return mrt;
324 }
325 
326 static void ip6mr_free_table(struct mr6_table *mrt)
327 {
328 	del_timer(&mrt->ipmr_expire_timer);
329 	mroute_clean_tables(mrt);
330 	kfree(mrt);
331 }
332 
333 #ifdef CONFIG_PROC_FS
334 
335 struct ipmr_mfc_iter {
336 	struct seq_net_private p;
337 	struct mr6_table *mrt;
338 	struct list_head *cache;
339 	int ct;
340 };
341 
342 
343 static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
344 					   struct ipmr_mfc_iter *it, loff_t pos)
345 {
346 	struct mr6_table *mrt = it->mrt;
347 	struct mfc6_cache *mfc;
348 
349 	read_lock(&mrt_lock);
350 	for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) {
351 		it->cache = &mrt->mfc6_cache_array[it->ct];
352 		list_for_each_entry(mfc, it->cache, list)
353 			if (pos-- == 0)
354 				return mfc;
355 	}
356 	read_unlock(&mrt_lock);
357 
358 	spin_lock_bh(&mfc_unres_lock);
359 	it->cache = &mrt->mfc6_unres_queue;
360 	list_for_each_entry(mfc, it->cache, list)
361 		if (pos-- == 0)
362 			return mfc;
363 	spin_unlock_bh(&mfc_unres_lock);
364 
365 	it->cache = NULL;
366 	return NULL;
367 }
368 
369 /*
370  *	The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
371  */
372 
373 struct ipmr_vif_iter {
374 	struct seq_net_private p;
375 	struct mr6_table *mrt;
376 	int ct;
377 };
378 
379 static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
380 					    struct ipmr_vif_iter *iter,
381 					    loff_t pos)
382 {
383 	struct mr6_table *mrt = iter->mrt;
384 
385 	for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
386 		if (!MIF_EXISTS(mrt, iter->ct))
387 			continue;
388 		if (pos-- == 0)
389 			return &mrt->vif6_table[iter->ct];
390 	}
391 	return NULL;
392 }
393 
394 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
395 	__acquires(mrt_lock)
396 {
397 	struct ipmr_vif_iter *iter = seq->private;
398 	struct net *net = seq_file_net(seq);
399 	struct mr6_table *mrt;
400 
401 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
402 	if (mrt == NULL)
403 		return ERR_PTR(-ENOENT);
404 
405 	iter->mrt = mrt;
406 
407 	read_lock(&mrt_lock);
408 	return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1)
409 		: SEQ_START_TOKEN;
410 }
411 
412 static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
413 {
414 	struct ipmr_vif_iter *iter = seq->private;
415 	struct net *net = seq_file_net(seq);
416 	struct mr6_table *mrt = iter->mrt;
417 
418 	++*pos;
419 	if (v == SEQ_START_TOKEN)
420 		return ip6mr_vif_seq_idx(net, iter, 0);
421 
422 	while (++iter->ct < mrt->maxvif) {
423 		if (!MIF_EXISTS(mrt, iter->ct))
424 			continue;
425 		return &mrt->vif6_table[iter->ct];
426 	}
427 	return NULL;
428 }
429 
430 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
431 	__releases(mrt_lock)
432 {
433 	read_unlock(&mrt_lock);
434 }
435 
436 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
437 {
438 	struct ipmr_vif_iter *iter = seq->private;
439 	struct mr6_table *mrt = iter->mrt;
440 
441 	if (v == SEQ_START_TOKEN) {
442 		seq_puts(seq,
443 			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
444 	} else {
445 		const struct mif_device *vif = v;
446 		const char *name = vif->dev ? vif->dev->name : "none";
447 
448 		seq_printf(seq,
449 			   "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
450 			   vif - mrt->vif6_table,
451 			   name, vif->bytes_in, vif->pkt_in,
452 			   vif->bytes_out, vif->pkt_out,
453 			   vif->flags);
454 	}
455 	return 0;
456 }
457 
458 static const struct seq_operations ip6mr_vif_seq_ops = {
459 	.start = ip6mr_vif_seq_start,
460 	.next  = ip6mr_vif_seq_next,
461 	.stop  = ip6mr_vif_seq_stop,
462 	.show  = ip6mr_vif_seq_show,
463 };
464 
465 static int ip6mr_vif_open(struct inode *inode, struct file *file)
466 {
467 	return seq_open_net(inode, file, &ip6mr_vif_seq_ops,
468 			    sizeof(struct ipmr_vif_iter));
469 }
470 
471 static const struct file_operations ip6mr_vif_fops = {
472 	.owner	 = THIS_MODULE,
473 	.open    = ip6mr_vif_open,
474 	.read    = seq_read,
475 	.llseek  = seq_lseek,
476 	.release = seq_release_net,
477 };
478 
479 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
480 {
481 	struct ipmr_mfc_iter *it = seq->private;
482 	struct net *net = seq_file_net(seq);
483 	struct mr6_table *mrt;
484 
485 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
486 	if (mrt == NULL)
487 		return ERR_PTR(-ENOENT);
488 
489 	it->mrt = mrt;
490 	return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
491 		: SEQ_START_TOKEN;
492 }
493 
494 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
495 {
496 	struct mfc6_cache *mfc = v;
497 	struct ipmr_mfc_iter *it = seq->private;
498 	struct net *net = seq_file_net(seq);
499 	struct mr6_table *mrt = it->mrt;
500 
501 	++*pos;
502 
503 	if (v == SEQ_START_TOKEN)
504 		return ipmr_mfc_seq_idx(net, seq->private, 0);
505 
506 	if (mfc->list.next != it->cache)
507 		return list_entry(mfc->list.next, struct mfc6_cache, list);
508 
509 	if (it->cache == &mrt->mfc6_unres_queue)
510 		goto end_of_list;
511 
512 	BUG_ON(it->cache != &mrt->mfc6_cache_array[it->ct]);
513 
514 	while (++it->ct < MFC6_LINES) {
515 		it->cache = &mrt->mfc6_cache_array[it->ct];
516 		if (list_empty(it->cache))
517 			continue;
518 		return list_first_entry(it->cache, struct mfc6_cache, list);
519 	}
520 
521 	/* exhausted cache_array, show unresolved */
522 	read_unlock(&mrt_lock);
523 	it->cache = &mrt->mfc6_unres_queue;
524 	it->ct = 0;
525 
526 	spin_lock_bh(&mfc_unres_lock);
527 	if (!list_empty(it->cache))
528 		return list_first_entry(it->cache, struct mfc6_cache, list);
529 
530  end_of_list:
531 	spin_unlock_bh(&mfc_unres_lock);
532 	it->cache = NULL;
533 
534 	return NULL;
535 }
536 
537 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
538 {
539 	struct ipmr_mfc_iter *it = seq->private;
540 	struct mr6_table *mrt = it->mrt;
541 
542 	if (it->cache == &mrt->mfc6_unres_queue)
543 		spin_unlock_bh(&mfc_unres_lock);
544 	else if (it->cache == mrt->mfc6_cache_array)
545 		read_unlock(&mrt_lock);
546 }
547 
548 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
549 {
550 	int n;
551 
552 	if (v == SEQ_START_TOKEN) {
553 		seq_puts(seq,
554 			 "Group                            "
555 			 "Origin                           "
556 			 "Iif      Pkts  Bytes     Wrong  Oifs\n");
557 	} else {
558 		const struct mfc6_cache *mfc = v;
559 		const struct ipmr_mfc_iter *it = seq->private;
560 		struct mr6_table *mrt = it->mrt;
561 
562 		seq_printf(seq, "%pI6 %pI6 %-3hd",
563 			   &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
564 			   mfc->mf6c_parent);
565 
566 		if (it->cache != &mrt->mfc6_unres_queue) {
567 			seq_printf(seq, " %8lu %8lu %8lu",
568 				   mfc->mfc_un.res.pkt,
569 				   mfc->mfc_un.res.bytes,
570 				   mfc->mfc_un.res.wrong_if);
571 			for (n = mfc->mfc_un.res.minvif;
572 			     n < mfc->mfc_un.res.maxvif; n++) {
573 				if (MIF_EXISTS(mrt, n) &&
574 				    mfc->mfc_un.res.ttls[n] < 255)
575 					seq_printf(seq,
576 						   " %2d:%-3d",
577 						   n, mfc->mfc_un.res.ttls[n]);
578 			}
579 		} else {
580 			/* unresolved mfc_caches don't contain
581 			 * pkt, bytes and wrong_if values
582 			 */
583 			seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
584 		}
585 		seq_putc(seq, '\n');
586 	}
587 	return 0;
588 }
589 
590 static const struct seq_operations ipmr_mfc_seq_ops = {
591 	.start = ipmr_mfc_seq_start,
592 	.next  = ipmr_mfc_seq_next,
593 	.stop  = ipmr_mfc_seq_stop,
594 	.show  = ipmr_mfc_seq_show,
595 };
596 
597 static int ipmr_mfc_open(struct inode *inode, struct file *file)
598 {
599 	return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
600 			    sizeof(struct ipmr_mfc_iter));
601 }
602 
603 static const struct file_operations ip6mr_mfc_fops = {
604 	.owner	 = THIS_MODULE,
605 	.open    = ipmr_mfc_open,
606 	.read    = seq_read,
607 	.llseek  = seq_lseek,
608 	.release = seq_release_net,
609 };
610 #endif
611 
612 #ifdef CONFIG_IPV6_PIMSM_V2
613 
614 static int pim6_rcv(struct sk_buff *skb)
615 {
616 	struct pimreghdr *pim;
617 	struct ipv6hdr   *encap;
618 	struct net_device  *reg_dev = NULL;
619 	struct net *net = dev_net(skb->dev);
620 	struct mr6_table *mrt;
621 	struct flowi6 fl6 = {
622 		.flowi6_iif	= skb->dev->ifindex,
623 		.flowi6_mark	= skb->mark,
624 	};
625 	int reg_vif_num;
626 
627 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
628 		goto drop;
629 
630 	pim = (struct pimreghdr *)skb_transport_header(skb);
631 	if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
632 	    (pim->flags & PIM_NULL_REGISTER) ||
633 	    (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
634 			     sizeof(*pim), IPPROTO_PIM,
635 			     csum_partial((void *)pim, sizeof(*pim), 0)) &&
636 	     csum_fold(skb_checksum(skb, 0, skb->len, 0))))
637 		goto drop;
638 
639 	/* check if the inner packet is destined to mcast group */
640 	encap = (struct ipv6hdr *)(skb_transport_header(skb) +
641 				   sizeof(*pim));
642 
643 	if (!ipv6_addr_is_multicast(&encap->daddr) ||
644 	    encap->payload_len == 0 ||
645 	    ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
646 		goto drop;
647 
648 	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
649 		goto drop;
650 	reg_vif_num = mrt->mroute_reg_vif_num;
651 
652 	read_lock(&mrt_lock);
653 	if (reg_vif_num >= 0)
654 		reg_dev = mrt->vif6_table[reg_vif_num].dev;
655 	if (reg_dev)
656 		dev_hold(reg_dev);
657 	read_unlock(&mrt_lock);
658 
659 	if (reg_dev == NULL)
660 		goto drop;
661 
662 	skb->mac_header = skb->network_header;
663 	skb_pull(skb, (u8 *)encap - skb->data);
664 	skb_reset_network_header(skb);
665 	skb->protocol = htons(ETH_P_IPV6);
666 	skb->ip_summed = CHECKSUM_NONE;
667 	skb->pkt_type = PACKET_HOST;
668 
669 	skb_tunnel_rx(skb, reg_dev);
670 
671 	netif_rx(skb);
672 
673 	dev_put(reg_dev);
674 	return 0;
675  drop:
676 	kfree_skb(skb);
677 	return 0;
678 }
679 
680 static const struct inet6_protocol pim6_protocol = {
681 	.handler	=	pim6_rcv,
682 };
683 
684 /* Service routines creating virtual interfaces: PIMREG */
685 
686 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
687 				      struct net_device *dev)
688 {
689 	struct net *net = dev_net(dev);
690 	struct mr6_table *mrt;
691 	struct flowi6 fl6 = {
692 		.flowi6_oif	= dev->ifindex,
693 		.flowi6_iif	= skb->skb_iif,
694 		.flowi6_mark	= skb->mark,
695 	};
696 	int err;
697 
698 	err = ip6mr_fib_lookup(net, &fl6, &mrt);
699 	if (err < 0)
700 		return err;
701 
702 	read_lock(&mrt_lock);
703 	dev->stats.tx_bytes += skb->len;
704 	dev->stats.tx_packets++;
705 	ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
706 	read_unlock(&mrt_lock);
707 	kfree_skb(skb);
708 	return NETDEV_TX_OK;
709 }
710 
711 static const struct net_device_ops reg_vif_netdev_ops = {
712 	.ndo_start_xmit	= reg_vif_xmit,
713 };
714 
715 static void reg_vif_setup(struct net_device *dev)
716 {
717 	dev->type		= ARPHRD_PIMREG;
718 	dev->mtu		= 1500 - sizeof(struct ipv6hdr) - 8;
719 	dev->flags		= IFF_NOARP;
720 	dev->netdev_ops		= &reg_vif_netdev_ops;
721 	dev->destructor		= free_netdev;
722 	dev->features		|= NETIF_F_NETNS_LOCAL;
723 }
724 
725 static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt)
726 {
727 	struct net_device *dev;
728 	char name[IFNAMSIZ];
729 
730 	if (mrt->id == RT6_TABLE_DFLT)
731 		sprintf(name, "pim6reg");
732 	else
733 		sprintf(name, "pim6reg%u", mrt->id);
734 
735 	dev = alloc_netdev(0, name, reg_vif_setup);
736 	if (dev == NULL)
737 		return NULL;
738 
739 	dev_net_set(dev, net);
740 
741 	if (register_netdevice(dev)) {
742 		free_netdev(dev);
743 		return NULL;
744 	}
745 	dev->iflink = 0;
746 
747 	if (dev_open(dev))
748 		goto failure;
749 
750 	dev_hold(dev);
751 	return dev;
752 
753 failure:
754 	/* allow the register to be completed before unregistering. */
755 	rtnl_unlock();
756 	rtnl_lock();
757 
758 	unregister_netdevice(dev);
759 	return NULL;
760 }
761 #endif
762 
763 /*
764  *	Delete a VIF entry
765  */
766 
767 static int mif6_delete(struct mr6_table *mrt, int vifi, struct list_head *head)
768 {
769 	struct mif_device *v;
770 	struct net_device *dev;
771 	struct inet6_dev *in6_dev;
772 
773 	if (vifi < 0 || vifi >= mrt->maxvif)
774 		return -EADDRNOTAVAIL;
775 
776 	v = &mrt->vif6_table[vifi];
777 
778 	write_lock_bh(&mrt_lock);
779 	dev = v->dev;
780 	v->dev = NULL;
781 
782 	if (!dev) {
783 		write_unlock_bh(&mrt_lock);
784 		return -EADDRNOTAVAIL;
785 	}
786 
787 #ifdef CONFIG_IPV6_PIMSM_V2
788 	if (vifi == mrt->mroute_reg_vif_num)
789 		mrt->mroute_reg_vif_num = -1;
790 #endif
791 
792 	if (vifi + 1 == mrt->maxvif) {
793 		int tmp;
794 		for (tmp = vifi - 1; tmp >= 0; tmp--) {
795 			if (MIF_EXISTS(mrt, tmp))
796 				break;
797 		}
798 		mrt->maxvif = tmp + 1;
799 	}
800 
801 	write_unlock_bh(&mrt_lock);
802 
803 	dev_set_allmulti(dev, -1);
804 
805 	in6_dev = __in6_dev_get(dev);
806 	if (in6_dev)
807 		in6_dev->cnf.mc_forwarding--;
808 
809 	if (v->flags & MIFF_REGISTER)
810 		unregister_netdevice_queue(dev, head);
811 
812 	dev_put(dev);
813 	return 0;
814 }
815 
816 static inline void ip6mr_cache_free(struct mfc6_cache *c)
817 {
818 	kmem_cache_free(mrt_cachep, c);
819 }
820 
821 /* Destroy an unresolved cache entry, killing queued skbs
822    and reporting error to netlink readers.
823  */
824 
825 static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
826 {
827 	struct net *net = read_pnet(&mrt->net);
828 	struct sk_buff *skb;
829 
830 	atomic_dec(&mrt->cache_resolve_queue_len);
831 
832 	while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
833 		if (ipv6_hdr(skb)->version == 0) {
834 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
835 			nlh->nlmsg_type = NLMSG_ERROR;
836 			nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
837 			skb_trim(skb, nlh->nlmsg_len);
838 			((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
839 			rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
840 		} else
841 			kfree_skb(skb);
842 	}
843 
844 	ip6mr_cache_free(c);
845 }
846 
847 
848 /* Timer process for all the unresolved queue. */
849 
850 static void ipmr_do_expire_process(struct mr6_table *mrt)
851 {
852 	unsigned long now = jiffies;
853 	unsigned long expires = 10 * HZ;
854 	struct mfc6_cache *c, *next;
855 
856 	list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
857 		if (time_after(c->mfc_un.unres.expires, now)) {
858 			/* not yet... */
859 			unsigned long interval = c->mfc_un.unres.expires - now;
860 			if (interval < expires)
861 				expires = interval;
862 			continue;
863 		}
864 
865 		list_del(&c->list);
866 		ip6mr_destroy_unres(mrt, c);
867 	}
868 
869 	if (!list_empty(&mrt->mfc6_unres_queue))
870 		mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
871 }
872 
873 static void ipmr_expire_process(unsigned long arg)
874 {
875 	struct mr6_table *mrt = (struct mr6_table *)arg;
876 
877 	if (!spin_trylock(&mfc_unres_lock)) {
878 		mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
879 		return;
880 	}
881 
882 	if (!list_empty(&mrt->mfc6_unres_queue))
883 		ipmr_do_expire_process(mrt);
884 
885 	spin_unlock(&mfc_unres_lock);
886 }
887 
888 /* Fill oifs list. It is called under write locked mrt_lock. */
889 
890 static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *cache,
891 				    unsigned char *ttls)
892 {
893 	int vifi;
894 
895 	cache->mfc_un.res.minvif = MAXMIFS;
896 	cache->mfc_un.res.maxvif = 0;
897 	memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
898 
899 	for (vifi = 0; vifi < mrt->maxvif; vifi++) {
900 		if (MIF_EXISTS(mrt, vifi) &&
901 		    ttls[vifi] && ttls[vifi] < 255) {
902 			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
903 			if (cache->mfc_un.res.minvif > vifi)
904 				cache->mfc_un.res.minvif = vifi;
905 			if (cache->mfc_un.res.maxvif <= vifi)
906 				cache->mfc_un.res.maxvif = vifi + 1;
907 		}
908 	}
909 }
910 
911 static int mif6_add(struct net *net, struct mr6_table *mrt,
912 		    struct mif6ctl *vifc, int mrtsock)
913 {
914 	int vifi = vifc->mif6c_mifi;
915 	struct mif_device *v = &mrt->vif6_table[vifi];
916 	struct net_device *dev;
917 	struct inet6_dev *in6_dev;
918 	int err;
919 
920 	/* Is vif busy ? */
921 	if (MIF_EXISTS(mrt, vifi))
922 		return -EADDRINUSE;
923 
924 	switch (vifc->mif6c_flags) {
925 #ifdef CONFIG_IPV6_PIMSM_V2
926 	case MIFF_REGISTER:
927 		/*
928 		 * Special Purpose VIF in PIM
929 		 * All the packets will be sent to the daemon
930 		 */
931 		if (mrt->mroute_reg_vif_num >= 0)
932 			return -EADDRINUSE;
933 		dev = ip6mr_reg_vif(net, mrt);
934 		if (!dev)
935 			return -ENOBUFS;
936 		err = dev_set_allmulti(dev, 1);
937 		if (err) {
938 			unregister_netdevice(dev);
939 			dev_put(dev);
940 			return err;
941 		}
942 		break;
943 #endif
944 	case 0:
945 		dev = dev_get_by_index(net, vifc->mif6c_pifi);
946 		if (!dev)
947 			return -EADDRNOTAVAIL;
948 		err = dev_set_allmulti(dev, 1);
949 		if (err) {
950 			dev_put(dev);
951 			return err;
952 		}
953 		break;
954 	default:
955 		return -EINVAL;
956 	}
957 
958 	in6_dev = __in6_dev_get(dev);
959 	if (in6_dev)
960 		in6_dev->cnf.mc_forwarding++;
961 
962 	/*
963 	 *	Fill in the VIF structures
964 	 */
965 	v->rate_limit = vifc->vifc_rate_limit;
966 	v->flags = vifc->mif6c_flags;
967 	if (!mrtsock)
968 		v->flags |= VIFF_STATIC;
969 	v->threshold = vifc->vifc_threshold;
970 	v->bytes_in = 0;
971 	v->bytes_out = 0;
972 	v->pkt_in = 0;
973 	v->pkt_out = 0;
974 	v->link = dev->ifindex;
975 	if (v->flags & MIFF_REGISTER)
976 		v->link = dev->iflink;
977 
978 	/* And finish update writing critical data */
979 	write_lock_bh(&mrt_lock);
980 	v->dev = dev;
981 #ifdef CONFIG_IPV6_PIMSM_V2
982 	if (v->flags & MIFF_REGISTER)
983 		mrt->mroute_reg_vif_num = vifi;
984 #endif
985 	if (vifi + 1 > mrt->maxvif)
986 		mrt->maxvif = vifi + 1;
987 	write_unlock_bh(&mrt_lock);
988 	return 0;
989 }
990 
991 static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt,
992 					   const struct in6_addr *origin,
993 					   const struct in6_addr *mcastgrp)
994 {
995 	int line = MFC6_HASH(mcastgrp, origin);
996 	struct mfc6_cache *c;
997 
998 	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
999 		if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
1000 		    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
1001 			return c;
1002 	}
1003 	return NULL;
1004 }
1005 
1006 /*
1007  *	Allocate a multicast cache entry
1008  */
1009 static struct mfc6_cache *ip6mr_cache_alloc(void)
1010 {
1011 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
1012 	if (c == NULL)
1013 		return NULL;
1014 	c->mfc_un.res.minvif = MAXMIFS;
1015 	return c;
1016 }
1017 
1018 static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
1019 {
1020 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
1021 	if (c == NULL)
1022 		return NULL;
1023 	skb_queue_head_init(&c->mfc_un.unres.unresolved);
1024 	c->mfc_un.unres.expires = jiffies + 10 * HZ;
1025 	return c;
1026 }
1027 
1028 /*
1029  *	A cache entry has gone into a resolved state from queued
1030  */
1031 
1032 static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
1033 				struct mfc6_cache *uc, struct mfc6_cache *c)
1034 {
1035 	struct sk_buff *skb;
1036 
1037 	/*
1038 	 *	Play the pending entries through our router
1039 	 */
1040 
1041 	while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
1042 		if (ipv6_hdr(skb)->version == 0) {
1043 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
1044 
1045 			if (__ip6mr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
1046 				nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1047 			} else {
1048 				nlh->nlmsg_type = NLMSG_ERROR;
1049 				nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
1050 				skb_trim(skb, nlh->nlmsg_len);
1051 				((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE;
1052 			}
1053 			rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
1054 		} else
1055 			ip6_mr_forward(net, mrt, skb, c);
1056 	}
1057 }
1058 
1059 /*
1060  *	Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
1061  *	expects the following bizarre scheme.
1062  *
1063  *	Called under mrt_lock.
1064  */
1065 
1066 static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
1067 			      mifi_t mifi, int assert)
1068 {
1069 	struct sk_buff *skb;
1070 	struct mrt6msg *msg;
1071 	int ret;
1072 
1073 #ifdef CONFIG_IPV6_PIMSM_V2
1074 	if (assert == MRT6MSG_WHOLEPKT)
1075 		skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
1076 						+sizeof(*msg));
1077 	else
1078 #endif
1079 		skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
1080 
1081 	if (!skb)
1082 		return -ENOBUFS;
1083 
1084 	/* I suppose that internal messages
1085 	 * do not require checksums */
1086 
1087 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1088 
1089 #ifdef CONFIG_IPV6_PIMSM_V2
1090 	if (assert == MRT6MSG_WHOLEPKT) {
1091 		/* Ugly, but we have no choice with this interface.
1092 		   Duplicate old header, fix length etc.
1093 		   And all this only to mangle msg->im6_msgtype and
1094 		   to set msg->im6_mbz to "mbz" :-)
1095 		 */
1096 		skb_push(skb, -skb_network_offset(pkt));
1097 
1098 		skb_push(skb, sizeof(*msg));
1099 		skb_reset_transport_header(skb);
1100 		msg = (struct mrt6msg *)skb_transport_header(skb);
1101 		msg->im6_mbz = 0;
1102 		msg->im6_msgtype = MRT6MSG_WHOLEPKT;
1103 		msg->im6_mif = mrt->mroute_reg_vif_num;
1104 		msg->im6_pad = 0;
1105 		ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
1106 		ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
1107 
1108 		skb->ip_summed = CHECKSUM_UNNECESSARY;
1109 	} else
1110 #endif
1111 	{
1112 	/*
1113 	 *	Copy the IP header
1114 	 */
1115 
1116 	skb_put(skb, sizeof(struct ipv6hdr));
1117 	skb_reset_network_header(skb);
1118 	skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
1119 
1120 	/*
1121 	 *	Add our header
1122 	 */
1123 	skb_put(skb, sizeof(*msg));
1124 	skb_reset_transport_header(skb);
1125 	msg = (struct mrt6msg *)skb_transport_header(skb);
1126 
1127 	msg->im6_mbz = 0;
1128 	msg->im6_msgtype = assert;
1129 	msg->im6_mif = mifi;
1130 	msg->im6_pad = 0;
1131 	ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
1132 	ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
1133 
1134 	skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1135 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1136 	}
1137 
1138 	if (mrt->mroute6_sk == NULL) {
1139 		kfree_skb(skb);
1140 		return -EINVAL;
1141 	}
1142 
1143 	/*
1144 	 *	Deliver to user space multicast routing algorithms
1145 	 */
1146 	ret = sock_queue_rcv_skb(mrt->mroute6_sk, skb);
1147 	if (ret < 0) {
1148 		if (net_ratelimit())
1149 			printk(KERN_WARNING "mroute6: pending queue full, dropping entries.\n");
1150 		kfree_skb(skb);
1151 	}
1152 
1153 	return ret;
1154 }
1155 
1156 /*
1157  *	Queue a packet for resolution. It gets locked cache entry!
1158  */
1159 
1160 static int
1161 ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
1162 {
1163 	bool found = false;
1164 	int err;
1165 	struct mfc6_cache *c;
1166 
1167 	spin_lock_bh(&mfc_unres_lock);
1168 	list_for_each_entry(c, &mrt->mfc6_unres_queue, list) {
1169 		if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1170 		    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
1171 			found = true;
1172 			break;
1173 		}
1174 	}
1175 
1176 	if (!found) {
1177 		/*
1178 		 *	Create a new entry if allowable
1179 		 */
1180 
1181 		if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
1182 		    (c = ip6mr_cache_alloc_unres()) == NULL) {
1183 			spin_unlock_bh(&mfc_unres_lock);
1184 
1185 			kfree_skb(skb);
1186 			return -ENOBUFS;
1187 		}
1188 
1189 		/*
1190 		 *	Fill in the new cache entry
1191 		 */
1192 		c->mf6c_parent = -1;
1193 		c->mf6c_origin = ipv6_hdr(skb)->saddr;
1194 		c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
1195 
1196 		/*
1197 		 *	Reflect first query at pim6sd
1198 		 */
1199 		err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1200 		if (err < 0) {
1201 			/* If the report failed throw the cache entry
1202 			   out - Brad Parker
1203 			 */
1204 			spin_unlock_bh(&mfc_unres_lock);
1205 
1206 			ip6mr_cache_free(c);
1207 			kfree_skb(skb);
1208 			return err;
1209 		}
1210 
1211 		atomic_inc(&mrt->cache_resolve_queue_len);
1212 		list_add(&c->list, &mrt->mfc6_unres_queue);
1213 
1214 		ipmr_do_expire_process(mrt);
1215 	}
1216 
1217 	/*
1218 	 *	See if we can append the packet
1219 	 */
1220 	if (c->mfc_un.unres.unresolved.qlen > 3) {
1221 		kfree_skb(skb);
1222 		err = -ENOBUFS;
1223 	} else {
1224 		skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1225 		err = 0;
1226 	}
1227 
1228 	spin_unlock_bh(&mfc_unres_lock);
1229 	return err;
1230 }
1231 
1232 /*
1233  *	MFC6 cache manipulation by user space
1234  */
1235 
1236 static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc)
1237 {
1238 	int line;
1239 	struct mfc6_cache *c, *next;
1240 
1241 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1242 
1243 	list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) {
1244 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1245 		    ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
1246 			write_lock_bh(&mrt_lock);
1247 			list_del(&c->list);
1248 			write_unlock_bh(&mrt_lock);
1249 
1250 			ip6mr_cache_free(c);
1251 			return 0;
1252 		}
1253 	}
1254 	return -ENOENT;
1255 }
1256 
1257 static int ip6mr_device_event(struct notifier_block *this,
1258 			      unsigned long event, void *ptr)
1259 {
1260 	struct net_device *dev = ptr;
1261 	struct net *net = dev_net(dev);
1262 	struct mr6_table *mrt;
1263 	struct mif_device *v;
1264 	int ct;
1265 	LIST_HEAD(list);
1266 
1267 	if (event != NETDEV_UNREGISTER)
1268 		return NOTIFY_DONE;
1269 
1270 	ip6mr_for_each_table(mrt, net) {
1271 		v = &mrt->vif6_table[0];
1272 		for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1273 			if (v->dev == dev)
1274 				mif6_delete(mrt, ct, &list);
1275 		}
1276 	}
1277 	unregister_netdevice_many(&list);
1278 
1279 	return NOTIFY_DONE;
1280 }
1281 
1282 static struct notifier_block ip6_mr_notifier = {
1283 	.notifier_call = ip6mr_device_event
1284 };
1285 
1286 /*
1287  *	Setup for IP multicast routing
1288  */
1289 
1290 static int __net_init ip6mr_net_init(struct net *net)
1291 {
1292 	int err;
1293 
1294 	err = ip6mr_rules_init(net);
1295 	if (err < 0)
1296 		goto fail;
1297 
1298 #ifdef CONFIG_PROC_FS
1299 	err = -ENOMEM;
1300 	if (!proc_net_fops_create(net, "ip6_mr_vif", 0, &ip6mr_vif_fops))
1301 		goto proc_vif_fail;
1302 	if (!proc_net_fops_create(net, "ip6_mr_cache", 0, &ip6mr_mfc_fops))
1303 		goto proc_cache_fail;
1304 #endif
1305 
1306 	return 0;
1307 
1308 #ifdef CONFIG_PROC_FS
1309 proc_cache_fail:
1310 	proc_net_remove(net, "ip6_mr_vif");
1311 proc_vif_fail:
1312 	ip6mr_rules_exit(net);
1313 #endif
1314 fail:
1315 	return err;
1316 }
1317 
1318 static void __net_exit ip6mr_net_exit(struct net *net)
1319 {
1320 #ifdef CONFIG_PROC_FS
1321 	proc_net_remove(net, "ip6_mr_cache");
1322 	proc_net_remove(net, "ip6_mr_vif");
1323 #endif
1324 	ip6mr_rules_exit(net);
1325 }
1326 
1327 static struct pernet_operations ip6mr_net_ops = {
1328 	.init = ip6mr_net_init,
1329 	.exit = ip6mr_net_exit,
1330 };
1331 
1332 int __init ip6_mr_init(void)
1333 {
1334 	int err;
1335 
1336 	mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1337 				       sizeof(struct mfc6_cache),
1338 				       0, SLAB_HWCACHE_ALIGN,
1339 				       NULL);
1340 	if (!mrt_cachep)
1341 		return -ENOMEM;
1342 
1343 	err = register_pernet_subsys(&ip6mr_net_ops);
1344 	if (err)
1345 		goto reg_pernet_fail;
1346 
1347 	err = register_netdevice_notifier(&ip6_mr_notifier);
1348 	if (err)
1349 		goto reg_notif_fail;
1350 #ifdef CONFIG_IPV6_PIMSM_V2
1351 	if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1352 		printk(KERN_ERR "ip6_mr_init: can't add PIM protocol\n");
1353 		err = -EAGAIN;
1354 		goto add_proto_fail;
1355 	}
1356 #endif
1357 	rtnl_register(RTNL_FAMILY_IP6MR, RTM_GETROUTE, NULL,
1358 		      ip6mr_rtm_dumproute, NULL);
1359 	return 0;
1360 #ifdef CONFIG_IPV6_PIMSM_V2
1361 add_proto_fail:
1362 	unregister_netdevice_notifier(&ip6_mr_notifier);
1363 #endif
1364 reg_notif_fail:
1365 	unregister_pernet_subsys(&ip6mr_net_ops);
1366 reg_pernet_fail:
1367 	kmem_cache_destroy(mrt_cachep);
1368 	return err;
1369 }
1370 
1371 void ip6_mr_cleanup(void)
1372 {
1373 	unregister_netdevice_notifier(&ip6_mr_notifier);
1374 	unregister_pernet_subsys(&ip6mr_net_ops);
1375 	kmem_cache_destroy(mrt_cachep);
1376 }
1377 
1378 static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
1379 			 struct mf6cctl *mfc, int mrtsock)
1380 {
1381 	bool found = false;
1382 	int line;
1383 	struct mfc6_cache *uc, *c;
1384 	unsigned char ttls[MAXMIFS];
1385 	int i;
1386 
1387 	if (mfc->mf6cc_parent >= MAXMIFS)
1388 		return -ENFILE;
1389 
1390 	memset(ttls, 255, MAXMIFS);
1391 	for (i = 0; i < MAXMIFS; i++) {
1392 		if (IF_ISSET(i, &mfc->mf6cc_ifset))
1393 			ttls[i] = 1;
1394 
1395 	}
1396 
1397 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1398 
1399 	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
1400 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1401 		    ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
1402 			found = true;
1403 			break;
1404 		}
1405 	}
1406 
1407 	if (found) {
1408 		write_lock_bh(&mrt_lock);
1409 		c->mf6c_parent = mfc->mf6cc_parent;
1410 		ip6mr_update_thresholds(mrt, c, ttls);
1411 		if (!mrtsock)
1412 			c->mfc_flags |= MFC_STATIC;
1413 		write_unlock_bh(&mrt_lock);
1414 		return 0;
1415 	}
1416 
1417 	if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1418 		return -EINVAL;
1419 
1420 	c = ip6mr_cache_alloc();
1421 	if (c == NULL)
1422 		return -ENOMEM;
1423 
1424 	c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1425 	c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1426 	c->mf6c_parent = mfc->mf6cc_parent;
1427 	ip6mr_update_thresholds(mrt, c, ttls);
1428 	if (!mrtsock)
1429 		c->mfc_flags |= MFC_STATIC;
1430 
1431 	write_lock_bh(&mrt_lock);
1432 	list_add(&c->list, &mrt->mfc6_cache_array[line]);
1433 	write_unlock_bh(&mrt_lock);
1434 
1435 	/*
1436 	 *	Check to see if we resolved a queued list. If so we
1437 	 *	need to send on the frames and tidy up.
1438 	 */
1439 	found = false;
1440 	spin_lock_bh(&mfc_unres_lock);
1441 	list_for_each_entry(uc, &mrt->mfc6_unres_queue, list) {
1442 		if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1443 		    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1444 			list_del(&uc->list);
1445 			atomic_dec(&mrt->cache_resolve_queue_len);
1446 			found = true;
1447 			break;
1448 		}
1449 	}
1450 	if (list_empty(&mrt->mfc6_unres_queue))
1451 		del_timer(&mrt->ipmr_expire_timer);
1452 	spin_unlock_bh(&mfc_unres_lock);
1453 
1454 	if (found) {
1455 		ip6mr_cache_resolve(net, mrt, uc, c);
1456 		ip6mr_cache_free(uc);
1457 	}
1458 	return 0;
1459 }
1460 
1461 /*
1462  *	Close the multicast socket, and clear the vif tables etc
1463  */
1464 
1465 static void mroute_clean_tables(struct mr6_table *mrt)
1466 {
1467 	int i;
1468 	LIST_HEAD(list);
1469 	struct mfc6_cache *c, *next;
1470 
1471 	/*
1472 	 *	Shut down all active vif entries
1473 	 */
1474 	for (i = 0; i < mrt->maxvif; i++) {
1475 		if (!(mrt->vif6_table[i].flags & VIFF_STATIC))
1476 			mif6_delete(mrt, i, &list);
1477 	}
1478 	unregister_netdevice_many(&list);
1479 
1480 	/*
1481 	 *	Wipe the cache
1482 	 */
1483 	for (i = 0; i < MFC6_LINES; i++) {
1484 		list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) {
1485 			if (c->mfc_flags & MFC_STATIC)
1486 				continue;
1487 			write_lock_bh(&mrt_lock);
1488 			list_del(&c->list);
1489 			write_unlock_bh(&mrt_lock);
1490 
1491 			ip6mr_cache_free(c);
1492 		}
1493 	}
1494 
1495 	if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1496 		spin_lock_bh(&mfc_unres_lock);
1497 		list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
1498 			list_del(&c->list);
1499 			ip6mr_destroy_unres(mrt, c);
1500 		}
1501 		spin_unlock_bh(&mfc_unres_lock);
1502 	}
1503 }
1504 
1505 static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
1506 {
1507 	int err = 0;
1508 	struct net *net = sock_net(sk);
1509 
1510 	rtnl_lock();
1511 	write_lock_bh(&mrt_lock);
1512 	if (likely(mrt->mroute6_sk == NULL)) {
1513 		mrt->mroute6_sk = sk;
1514 		net->ipv6.devconf_all->mc_forwarding++;
1515 	}
1516 	else
1517 		err = -EADDRINUSE;
1518 	write_unlock_bh(&mrt_lock);
1519 
1520 	rtnl_unlock();
1521 
1522 	return err;
1523 }
1524 
1525 int ip6mr_sk_done(struct sock *sk)
1526 {
1527 	int err = -EACCES;
1528 	struct net *net = sock_net(sk);
1529 	struct mr6_table *mrt;
1530 
1531 	rtnl_lock();
1532 	ip6mr_for_each_table(mrt, net) {
1533 		if (sk == mrt->mroute6_sk) {
1534 			write_lock_bh(&mrt_lock);
1535 			mrt->mroute6_sk = NULL;
1536 			net->ipv6.devconf_all->mc_forwarding--;
1537 			write_unlock_bh(&mrt_lock);
1538 
1539 			mroute_clean_tables(mrt);
1540 			err = 0;
1541 			break;
1542 		}
1543 	}
1544 	rtnl_unlock();
1545 
1546 	return err;
1547 }
1548 
1549 struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
1550 {
1551 	struct mr6_table *mrt;
1552 	struct flowi6 fl6 = {
1553 		.flowi6_iif	= skb->skb_iif,
1554 		.flowi6_oif	= skb->dev->ifindex,
1555 		.flowi6_mark	= skb->mark,
1556 	};
1557 
1558 	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
1559 		return NULL;
1560 
1561 	return mrt->mroute6_sk;
1562 }
1563 
1564 /*
1565  *	Socket options and virtual interface manipulation. The whole
1566  *	virtual interface system is a complete heap, but unfortunately
1567  *	that's how BSD mrouted happens to think. Maybe one day with a proper
1568  *	MOSPF/PIM router set up we can clean this up.
1569  */
1570 
1571 int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1572 {
1573 	int ret;
1574 	struct mif6ctl vif;
1575 	struct mf6cctl mfc;
1576 	mifi_t mifi;
1577 	struct net *net = sock_net(sk);
1578 	struct mr6_table *mrt;
1579 
1580 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1581 	if (mrt == NULL)
1582 		return -ENOENT;
1583 
1584 	if (optname != MRT6_INIT) {
1585 		if (sk != mrt->mroute6_sk && !capable(CAP_NET_ADMIN))
1586 			return -EACCES;
1587 	}
1588 
1589 	switch (optname) {
1590 	case MRT6_INIT:
1591 		if (sk->sk_type != SOCK_RAW ||
1592 		    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1593 			return -EOPNOTSUPP;
1594 		if (optlen < sizeof(int))
1595 			return -EINVAL;
1596 
1597 		return ip6mr_sk_init(mrt, sk);
1598 
1599 	case MRT6_DONE:
1600 		return ip6mr_sk_done(sk);
1601 
1602 	case MRT6_ADD_MIF:
1603 		if (optlen < sizeof(vif))
1604 			return -EINVAL;
1605 		if (copy_from_user(&vif, optval, sizeof(vif)))
1606 			return -EFAULT;
1607 		if (vif.mif6c_mifi >= MAXMIFS)
1608 			return -ENFILE;
1609 		rtnl_lock();
1610 		ret = mif6_add(net, mrt, &vif, sk == mrt->mroute6_sk);
1611 		rtnl_unlock();
1612 		return ret;
1613 
1614 	case MRT6_DEL_MIF:
1615 		if (optlen < sizeof(mifi_t))
1616 			return -EINVAL;
1617 		if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1618 			return -EFAULT;
1619 		rtnl_lock();
1620 		ret = mif6_delete(mrt, mifi, NULL);
1621 		rtnl_unlock();
1622 		return ret;
1623 
1624 	/*
1625 	 *	Manipulate the forwarding caches. These live
1626 	 *	in a sort of kernel/user symbiosis.
1627 	 */
1628 	case MRT6_ADD_MFC:
1629 	case MRT6_DEL_MFC:
1630 		if (optlen < sizeof(mfc))
1631 			return -EINVAL;
1632 		if (copy_from_user(&mfc, optval, sizeof(mfc)))
1633 			return -EFAULT;
1634 		rtnl_lock();
1635 		if (optname == MRT6_DEL_MFC)
1636 			ret = ip6mr_mfc_delete(mrt, &mfc);
1637 		else
1638 			ret = ip6mr_mfc_add(net, mrt, &mfc, sk == mrt->mroute6_sk);
1639 		rtnl_unlock();
1640 		return ret;
1641 
1642 	/*
1643 	 *	Control PIM assert (to activate pim will activate assert)
1644 	 */
1645 	case MRT6_ASSERT:
1646 	{
1647 		int v;
1648 		if (get_user(v, (int __user *)optval))
1649 			return -EFAULT;
1650 		mrt->mroute_do_assert = !!v;
1651 		return 0;
1652 	}
1653 
1654 #ifdef CONFIG_IPV6_PIMSM_V2
1655 	case MRT6_PIM:
1656 	{
1657 		int v;
1658 		if (get_user(v, (int __user *)optval))
1659 			return -EFAULT;
1660 		v = !!v;
1661 		rtnl_lock();
1662 		ret = 0;
1663 		if (v != mrt->mroute_do_pim) {
1664 			mrt->mroute_do_pim = v;
1665 			mrt->mroute_do_assert = v;
1666 		}
1667 		rtnl_unlock();
1668 		return ret;
1669 	}
1670 
1671 #endif
1672 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1673 	case MRT6_TABLE:
1674 	{
1675 		u32 v;
1676 
1677 		if (optlen != sizeof(u32))
1678 			return -EINVAL;
1679 		if (get_user(v, (u32 __user *)optval))
1680 			return -EFAULT;
1681 		if (sk == mrt->mroute6_sk)
1682 			return -EBUSY;
1683 
1684 		rtnl_lock();
1685 		ret = 0;
1686 		if (!ip6mr_new_table(net, v))
1687 			ret = -ENOMEM;
1688 		raw6_sk(sk)->ip6mr_table = v;
1689 		rtnl_unlock();
1690 		return ret;
1691 	}
1692 #endif
1693 	/*
1694 	 *	Spurious command, or MRT6_VERSION which you cannot
1695 	 *	set.
1696 	 */
1697 	default:
1698 		return -ENOPROTOOPT;
1699 	}
1700 }
1701 
1702 /*
1703  *	Getsock opt support for the multicast routing system.
1704  */
1705 
1706 int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1707 			  int __user *optlen)
1708 {
1709 	int olr;
1710 	int val;
1711 	struct net *net = sock_net(sk);
1712 	struct mr6_table *mrt;
1713 
1714 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1715 	if (mrt == NULL)
1716 		return -ENOENT;
1717 
1718 	switch (optname) {
1719 	case MRT6_VERSION:
1720 		val = 0x0305;
1721 		break;
1722 #ifdef CONFIG_IPV6_PIMSM_V2
1723 	case MRT6_PIM:
1724 		val = mrt->mroute_do_pim;
1725 		break;
1726 #endif
1727 	case MRT6_ASSERT:
1728 		val = mrt->mroute_do_assert;
1729 		break;
1730 	default:
1731 		return -ENOPROTOOPT;
1732 	}
1733 
1734 	if (get_user(olr, optlen))
1735 		return -EFAULT;
1736 
1737 	olr = min_t(int, olr, sizeof(int));
1738 	if (olr < 0)
1739 		return -EINVAL;
1740 
1741 	if (put_user(olr, optlen))
1742 		return -EFAULT;
1743 	if (copy_to_user(optval, &val, olr))
1744 		return -EFAULT;
1745 	return 0;
1746 }
1747 
1748 /*
1749  *	The IP multicast ioctl support routines.
1750  */
1751 
1752 int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1753 {
1754 	struct sioc_sg_req6 sr;
1755 	struct sioc_mif_req6 vr;
1756 	struct mif_device *vif;
1757 	struct mfc6_cache *c;
1758 	struct net *net = sock_net(sk);
1759 	struct mr6_table *mrt;
1760 
1761 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1762 	if (mrt == NULL)
1763 		return -ENOENT;
1764 
1765 	switch (cmd) {
1766 	case SIOCGETMIFCNT_IN6:
1767 		if (copy_from_user(&vr, arg, sizeof(vr)))
1768 			return -EFAULT;
1769 		if (vr.mifi >= mrt->maxvif)
1770 			return -EINVAL;
1771 		read_lock(&mrt_lock);
1772 		vif = &mrt->vif6_table[vr.mifi];
1773 		if (MIF_EXISTS(mrt, vr.mifi)) {
1774 			vr.icount = vif->pkt_in;
1775 			vr.ocount = vif->pkt_out;
1776 			vr.ibytes = vif->bytes_in;
1777 			vr.obytes = vif->bytes_out;
1778 			read_unlock(&mrt_lock);
1779 
1780 			if (copy_to_user(arg, &vr, sizeof(vr)))
1781 				return -EFAULT;
1782 			return 0;
1783 		}
1784 		read_unlock(&mrt_lock);
1785 		return -EADDRNOTAVAIL;
1786 	case SIOCGETSGCNT_IN6:
1787 		if (copy_from_user(&sr, arg, sizeof(sr)))
1788 			return -EFAULT;
1789 
1790 		read_lock(&mrt_lock);
1791 		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1792 		if (c) {
1793 			sr.pktcnt = c->mfc_un.res.pkt;
1794 			sr.bytecnt = c->mfc_un.res.bytes;
1795 			sr.wrong_if = c->mfc_un.res.wrong_if;
1796 			read_unlock(&mrt_lock);
1797 
1798 			if (copy_to_user(arg, &sr, sizeof(sr)))
1799 				return -EFAULT;
1800 			return 0;
1801 		}
1802 		read_unlock(&mrt_lock);
1803 		return -EADDRNOTAVAIL;
1804 	default:
1805 		return -ENOIOCTLCMD;
1806 	}
1807 }
1808 
1809 #ifdef CONFIG_COMPAT
1810 struct compat_sioc_sg_req6 {
1811 	struct sockaddr_in6 src;
1812 	struct sockaddr_in6 grp;
1813 	compat_ulong_t pktcnt;
1814 	compat_ulong_t bytecnt;
1815 	compat_ulong_t wrong_if;
1816 };
1817 
1818 struct compat_sioc_mif_req6 {
1819 	mifi_t	mifi;
1820 	compat_ulong_t icount;
1821 	compat_ulong_t ocount;
1822 	compat_ulong_t ibytes;
1823 	compat_ulong_t obytes;
1824 };
1825 
1826 int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1827 {
1828 	struct compat_sioc_sg_req6 sr;
1829 	struct compat_sioc_mif_req6 vr;
1830 	struct mif_device *vif;
1831 	struct mfc6_cache *c;
1832 	struct net *net = sock_net(sk);
1833 	struct mr6_table *mrt;
1834 
1835 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1836 	if (mrt == NULL)
1837 		return -ENOENT;
1838 
1839 	switch (cmd) {
1840 	case SIOCGETMIFCNT_IN6:
1841 		if (copy_from_user(&vr, arg, sizeof(vr)))
1842 			return -EFAULT;
1843 		if (vr.mifi >= mrt->maxvif)
1844 			return -EINVAL;
1845 		read_lock(&mrt_lock);
1846 		vif = &mrt->vif6_table[vr.mifi];
1847 		if (MIF_EXISTS(mrt, vr.mifi)) {
1848 			vr.icount = vif->pkt_in;
1849 			vr.ocount = vif->pkt_out;
1850 			vr.ibytes = vif->bytes_in;
1851 			vr.obytes = vif->bytes_out;
1852 			read_unlock(&mrt_lock);
1853 
1854 			if (copy_to_user(arg, &vr, sizeof(vr)))
1855 				return -EFAULT;
1856 			return 0;
1857 		}
1858 		read_unlock(&mrt_lock);
1859 		return -EADDRNOTAVAIL;
1860 	case SIOCGETSGCNT_IN6:
1861 		if (copy_from_user(&sr, arg, sizeof(sr)))
1862 			return -EFAULT;
1863 
1864 		read_lock(&mrt_lock);
1865 		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1866 		if (c) {
1867 			sr.pktcnt = c->mfc_un.res.pkt;
1868 			sr.bytecnt = c->mfc_un.res.bytes;
1869 			sr.wrong_if = c->mfc_un.res.wrong_if;
1870 			read_unlock(&mrt_lock);
1871 
1872 			if (copy_to_user(arg, &sr, sizeof(sr)))
1873 				return -EFAULT;
1874 			return 0;
1875 		}
1876 		read_unlock(&mrt_lock);
1877 		return -EADDRNOTAVAIL;
1878 	default:
1879 		return -ENOIOCTLCMD;
1880 	}
1881 }
1882 #endif
1883 
1884 static inline int ip6mr_forward2_finish(struct sk_buff *skb)
1885 {
1886 	IP6_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
1887 			 IPSTATS_MIB_OUTFORWDATAGRAMS);
1888 	return dst_output(skb);
1889 }
1890 
1891 /*
1892  *	Processing handlers for ip6mr_forward
1893  */
1894 
1895 static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
1896 			  struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1897 {
1898 	struct ipv6hdr *ipv6h;
1899 	struct mif_device *vif = &mrt->vif6_table[vifi];
1900 	struct net_device *dev;
1901 	struct dst_entry *dst;
1902 	struct flowi6 fl6;
1903 
1904 	if (vif->dev == NULL)
1905 		goto out_free;
1906 
1907 #ifdef CONFIG_IPV6_PIMSM_V2
1908 	if (vif->flags & MIFF_REGISTER) {
1909 		vif->pkt_out++;
1910 		vif->bytes_out += skb->len;
1911 		vif->dev->stats.tx_bytes += skb->len;
1912 		vif->dev->stats.tx_packets++;
1913 		ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
1914 		goto out_free;
1915 	}
1916 #endif
1917 
1918 	ipv6h = ipv6_hdr(skb);
1919 
1920 	fl6 = (struct flowi6) {
1921 		.flowi6_oif = vif->link,
1922 		.daddr = ipv6h->daddr,
1923 	};
1924 
1925 	dst = ip6_route_output(net, NULL, &fl6);
1926 	if (!dst)
1927 		goto out_free;
1928 
1929 	skb_dst_drop(skb);
1930 	skb_dst_set(skb, dst);
1931 
1932 	/*
1933 	 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1934 	 * not only before forwarding, but after forwarding on all output
1935 	 * interfaces. It is clear, if mrouter runs a multicasting
1936 	 * program, it should receive packets not depending to what interface
1937 	 * program is joined.
1938 	 * If we will not make it, the program will have to join on all
1939 	 * interfaces. On the other hand, multihoming host (or router, but
1940 	 * not mrouter) cannot join to more than one interface - it will
1941 	 * result in receiving multiple packets.
1942 	 */
1943 	dev = vif->dev;
1944 	skb->dev = dev;
1945 	vif->pkt_out++;
1946 	vif->bytes_out += skb->len;
1947 
1948 	/* We are about to write */
1949 	/* XXX: extension headers? */
1950 	if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
1951 		goto out_free;
1952 
1953 	ipv6h = ipv6_hdr(skb);
1954 	ipv6h->hop_limit--;
1955 
1956 	IP6CB(skb)->flags |= IP6SKB_FORWARDED;
1957 
1958 	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dev,
1959 		       ip6mr_forward2_finish);
1960 
1961 out_free:
1962 	kfree_skb(skb);
1963 	return 0;
1964 }
1965 
1966 static int ip6mr_find_vif(struct mr6_table *mrt, struct net_device *dev)
1967 {
1968 	int ct;
1969 
1970 	for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
1971 		if (mrt->vif6_table[ct].dev == dev)
1972 			break;
1973 	}
1974 	return ct;
1975 }
1976 
1977 static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
1978 			  struct sk_buff *skb, struct mfc6_cache *cache)
1979 {
1980 	int psend = -1;
1981 	int vif, ct;
1982 
1983 	vif = cache->mf6c_parent;
1984 	cache->mfc_un.res.pkt++;
1985 	cache->mfc_un.res.bytes += skb->len;
1986 
1987 	/*
1988 	 * Wrong interface: drop packet and (maybe) send PIM assert.
1989 	 */
1990 	if (mrt->vif6_table[vif].dev != skb->dev) {
1991 		int true_vifi;
1992 
1993 		cache->mfc_un.res.wrong_if++;
1994 		true_vifi = ip6mr_find_vif(mrt, skb->dev);
1995 
1996 		if (true_vifi >= 0 && mrt->mroute_do_assert &&
1997 		    /* pimsm uses asserts, when switching from RPT to SPT,
1998 		       so that we cannot check that packet arrived on an oif.
1999 		       It is bad, but otherwise we would need to move pretty
2000 		       large chunk of pimd to kernel. Ough... --ANK
2001 		     */
2002 		    (mrt->mroute_do_pim ||
2003 		     cache->mfc_un.res.ttls[true_vifi] < 255) &&
2004 		    time_after(jiffies,
2005 			       cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
2006 			cache->mfc_un.res.last_assert = jiffies;
2007 			ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
2008 		}
2009 		goto dont_forward;
2010 	}
2011 
2012 	mrt->vif6_table[vif].pkt_in++;
2013 	mrt->vif6_table[vif].bytes_in += skb->len;
2014 
2015 	/*
2016 	 *	Forward the frame
2017 	 */
2018 	for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
2019 		if (ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
2020 			if (psend != -1) {
2021 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2022 				if (skb2)
2023 					ip6mr_forward2(net, mrt, skb2, cache, psend);
2024 			}
2025 			psend = ct;
2026 		}
2027 	}
2028 	if (psend != -1) {
2029 		ip6mr_forward2(net, mrt, skb, cache, psend);
2030 		return 0;
2031 	}
2032 
2033 dont_forward:
2034 	kfree_skb(skb);
2035 	return 0;
2036 }
2037 
2038 
2039 /*
2040  *	Multicast packets for forwarding arrive here
2041  */
2042 
2043 int ip6_mr_input(struct sk_buff *skb)
2044 {
2045 	struct mfc6_cache *cache;
2046 	struct net *net = dev_net(skb->dev);
2047 	struct mr6_table *mrt;
2048 	struct flowi6 fl6 = {
2049 		.flowi6_iif	= skb->dev->ifindex,
2050 		.flowi6_mark	= skb->mark,
2051 	};
2052 	int err;
2053 
2054 	err = ip6mr_fib_lookup(net, &fl6, &mrt);
2055 	if (err < 0)
2056 		return err;
2057 
2058 	read_lock(&mrt_lock);
2059 	cache = ip6mr_cache_find(mrt,
2060 				 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2061 
2062 	/*
2063 	 *	No usable cache entry
2064 	 */
2065 	if (cache == NULL) {
2066 		int vif;
2067 
2068 		vif = ip6mr_find_vif(mrt, skb->dev);
2069 		if (vif >= 0) {
2070 			int err = ip6mr_cache_unresolved(mrt, vif, skb);
2071 			read_unlock(&mrt_lock);
2072 
2073 			return err;
2074 		}
2075 		read_unlock(&mrt_lock);
2076 		kfree_skb(skb);
2077 		return -ENODEV;
2078 	}
2079 
2080 	ip6_mr_forward(net, mrt, skb, cache);
2081 
2082 	read_unlock(&mrt_lock);
2083 
2084 	return 0;
2085 }
2086 
2087 
2088 static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2089 			       struct mfc6_cache *c, struct rtmsg *rtm)
2090 {
2091 	int ct;
2092 	struct rtnexthop *nhp;
2093 	u8 *b = skb_tail_pointer(skb);
2094 	struct rtattr *mp_head;
2095 
2096 	/* If cache is unresolved, don't try to parse IIF and OIF */
2097 	if (c->mf6c_parent >= MAXMIFS)
2098 		return -ENOENT;
2099 
2100 	if (MIF_EXISTS(mrt, c->mf6c_parent))
2101 		RTA_PUT(skb, RTA_IIF, 4, &mrt->vif6_table[c->mf6c_parent].dev->ifindex);
2102 
2103 	mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
2104 
2105 	for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
2106 		if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
2107 			if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
2108 				goto rtattr_failure;
2109 			nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
2110 			nhp->rtnh_flags = 0;
2111 			nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
2112 			nhp->rtnh_ifindex = mrt->vif6_table[ct].dev->ifindex;
2113 			nhp->rtnh_len = sizeof(*nhp);
2114 		}
2115 	}
2116 	mp_head->rta_type = RTA_MULTIPATH;
2117 	mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
2118 	rtm->rtm_type = RTN_MULTICAST;
2119 	return 1;
2120 
2121 rtattr_failure:
2122 	nlmsg_trim(skb, b);
2123 	return -EMSGSIZE;
2124 }
2125 
2126 int ip6mr_get_route(struct net *net,
2127 		    struct sk_buff *skb, struct rtmsg *rtm, int nowait)
2128 {
2129 	int err;
2130 	struct mr6_table *mrt;
2131 	struct mfc6_cache *cache;
2132 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
2133 
2134 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
2135 	if (mrt == NULL)
2136 		return -ENOENT;
2137 
2138 	read_lock(&mrt_lock);
2139 	cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
2140 
2141 	if (!cache) {
2142 		struct sk_buff *skb2;
2143 		struct ipv6hdr *iph;
2144 		struct net_device *dev;
2145 		int vif;
2146 
2147 		if (nowait) {
2148 			read_unlock(&mrt_lock);
2149 			return -EAGAIN;
2150 		}
2151 
2152 		dev = skb->dev;
2153 		if (dev == NULL || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2154 			read_unlock(&mrt_lock);
2155 			return -ENODEV;
2156 		}
2157 
2158 		/* really correct? */
2159 		skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
2160 		if (!skb2) {
2161 			read_unlock(&mrt_lock);
2162 			return -ENOMEM;
2163 		}
2164 
2165 		skb_reset_transport_header(skb2);
2166 
2167 		skb_put(skb2, sizeof(struct ipv6hdr));
2168 		skb_reset_network_header(skb2);
2169 
2170 		iph = ipv6_hdr(skb2);
2171 		iph->version = 0;
2172 		iph->priority = 0;
2173 		iph->flow_lbl[0] = 0;
2174 		iph->flow_lbl[1] = 0;
2175 		iph->flow_lbl[2] = 0;
2176 		iph->payload_len = 0;
2177 		iph->nexthdr = IPPROTO_NONE;
2178 		iph->hop_limit = 0;
2179 		ipv6_addr_copy(&iph->saddr, &rt->rt6i_src.addr);
2180 		ipv6_addr_copy(&iph->daddr, &rt->rt6i_dst.addr);
2181 
2182 		err = ip6mr_cache_unresolved(mrt, vif, skb2);
2183 		read_unlock(&mrt_lock);
2184 
2185 		return err;
2186 	}
2187 
2188 	if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
2189 		cache->mfc_flags |= MFC_NOTIFY;
2190 
2191 	err = __ip6mr_fill_mroute(mrt, skb, cache, rtm);
2192 	read_unlock(&mrt_lock);
2193 	return err;
2194 }
2195 
2196 static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2197 			     u32 pid, u32 seq, struct mfc6_cache *c)
2198 {
2199 	struct nlmsghdr *nlh;
2200 	struct rtmsg *rtm;
2201 
2202 	nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI);
2203 	if (nlh == NULL)
2204 		return -EMSGSIZE;
2205 
2206 	rtm = nlmsg_data(nlh);
2207 	rtm->rtm_family   = RTNL_FAMILY_IPMR;
2208 	rtm->rtm_dst_len  = 128;
2209 	rtm->rtm_src_len  = 128;
2210 	rtm->rtm_tos      = 0;
2211 	rtm->rtm_table    = mrt->id;
2212 	NLA_PUT_U32(skb, RTA_TABLE, mrt->id);
2213 	rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
2214 	rtm->rtm_protocol = RTPROT_UNSPEC;
2215 	rtm->rtm_flags    = 0;
2216 
2217 	NLA_PUT(skb, RTA_SRC, 16, &c->mf6c_origin);
2218 	NLA_PUT(skb, RTA_DST, 16, &c->mf6c_mcastgrp);
2219 
2220 	if (__ip6mr_fill_mroute(mrt, skb, c, rtm) < 0)
2221 		goto nla_put_failure;
2222 
2223 	return nlmsg_end(skb, nlh);
2224 
2225 nla_put_failure:
2226 	nlmsg_cancel(skb, nlh);
2227 	return -EMSGSIZE;
2228 }
2229 
2230 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2231 {
2232 	struct net *net = sock_net(skb->sk);
2233 	struct mr6_table *mrt;
2234 	struct mfc6_cache *mfc;
2235 	unsigned int t = 0, s_t;
2236 	unsigned int h = 0, s_h;
2237 	unsigned int e = 0, s_e;
2238 
2239 	s_t = cb->args[0];
2240 	s_h = cb->args[1];
2241 	s_e = cb->args[2];
2242 
2243 	read_lock(&mrt_lock);
2244 	ip6mr_for_each_table(mrt, net) {
2245 		if (t < s_t)
2246 			goto next_table;
2247 		if (t > s_t)
2248 			s_h = 0;
2249 		for (h = s_h; h < MFC6_LINES; h++) {
2250 			list_for_each_entry(mfc, &mrt->mfc6_cache_array[h], list) {
2251 				if (e < s_e)
2252 					goto next_entry;
2253 				if (ip6mr_fill_mroute(mrt, skb,
2254 						      NETLINK_CB(cb->skb).pid,
2255 						      cb->nlh->nlmsg_seq,
2256 						      mfc) < 0)
2257 					goto done;
2258 next_entry:
2259 				e++;
2260 			}
2261 			e = s_e = 0;
2262 		}
2263 		s_h = 0;
2264 next_table:
2265 		t++;
2266 	}
2267 done:
2268 	read_unlock(&mrt_lock);
2269 
2270 	cb->args[2] = e;
2271 	cb->args[1] = h;
2272 	cb->args[0] = t;
2273 
2274 	return skb->len;
2275 }
2276