xref: /openbmc/linux/net/ipv6/ip6mr.c (revision 184748cc)
1 /*
2  *	Linux IPv6 multicast routing support for BSD pim6sd
3  *	Based on net/ipv4/ipmr.c.
4  *
5  *	(c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
6  *		LSIIT Laboratory, Strasbourg, France
7  *	(c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
8  *		6WIND, Paris, France
9  *	Copyright (C)2007,2008 USAGI/WIDE Project
10  *		YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
11  *
12  *	This program is free software; you can redistribute it and/or
13  *	modify it under the terms of the GNU General Public License
14  *	as published by the Free Software Foundation; either version
15  *	2 of the License, or (at your option) any later version.
16  *
17  */
18 
19 #include <asm/system.h>
20 #include <asm/uaccess.h>
21 #include <linux/types.h>
22 #include <linux/sched.h>
23 #include <linux/errno.h>
24 #include <linux/timer.h>
25 #include <linux/mm.h>
26 #include <linux/kernel.h>
27 #include <linux/fcntl.h>
28 #include <linux/stat.h>
29 #include <linux/socket.h>
30 #include <linux/inet.h>
31 #include <linux/netdevice.h>
32 #include <linux/inetdevice.h>
33 #include <linux/proc_fs.h>
34 #include <linux/seq_file.h>
35 #include <linux/init.h>
36 #include <linux/slab.h>
37 #include <linux/compat.h>
38 #include <net/protocol.h>
39 #include <linux/skbuff.h>
40 #include <net/sock.h>
41 #include <net/raw.h>
42 #include <linux/notifier.h>
43 #include <linux/if_arp.h>
44 #include <net/checksum.h>
45 #include <net/netlink.h>
46 #include <net/fib_rules.h>
47 
48 #include <net/ipv6.h>
49 #include <net/ip6_route.h>
50 #include <linux/mroute6.h>
51 #include <linux/pim.h>
52 #include <net/addrconf.h>
53 #include <linux/netfilter_ipv6.h>
54 #include <net/ip6_checksum.h>
55 
56 struct mr6_table {
57 	struct list_head	list;
58 #ifdef CONFIG_NET_NS
59 	struct net		*net;
60 #endif
61 	u32			id;
62 	struct sock		*mroute6_sk;
63 	struct timer_list	ipmr_expire_timer;
64 	struct list_head	mfc6_unres_queue;
65 	struct list_head	mfc6_cache_array[MFC6_LINES];
66 	struct mif_device	vif6_table[MAXMIFS];
67 	int			maxvif;
68 	atomic_t		cache_resolve_queue_len;
69 	int			mroute_do_assert;
70 	int			mroute_do_pim;
71 #ifdef CONFIG_IPV6_PIMSM_V2
72 	int			mroute_reg_vif_num;
73 #endif
74 };
75 
76 struct ip6mr_rule {
77 	struct fib_rule		common;
78 };
79 
80 struct ip6mr_result {
81 	struct mr6_table	*mrt;
82 };
83 
84 /* Big lock, protecting vif table, mrt cache and mroute socket state.
85    Note that the changes are semaphored via rtnl_lock.
86  */
87 
88 static DEFINE_RWLOCK(mrt_lock);
89 
90 /*
91  *	Multicast router control variables
92  */
93 
94 #define MIF_EXISTS(_mrt, _idx) ((_mrt)->vif6_table[_idx].dev != NULL)
95 
96 /* Special spinlock for queue of unresolved entries */
97 static DEFINE_SPINLOCK(mfc_unres_lock);
98 
99 /* We return to original Alan's scheme. Hash table of resolved
100    entries is changed only in process context and protected
101    with weak lock mrt_lock. Queue of unresolved entries is protected
102    with strong spinlock mfc_unres_lock.
103 
104    In this case data path is free of exclusive locks at all.
105  */
106 
107 static struct kmem_cache *mrt_cachep __read_mostly;
108 
109 static struct mr6_table *ip6mr_new_table(struct net *net, u32 id);
110 static void ip6mr_free_table(struct mr6_table *mrt);
111 
112 static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
113 			  struct sk_buff *skb, struct mfc6_cache *cache);
114 static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
115 			      mifi_t mifi, int assert);
116 static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
117 			       struct mfc6_cache *c, struct rtmsg *rtm);
118 static int ip6mr_rtm_dumproute(struct sk_buff *skb,
119 			       struct netlink_callback *cb);
120 static void mroute_clean_tables(struct mr6_table *mrt);
121 static void ipmr_expire_process(unsigned long arg);
122 
123 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
124 #define ip6mr_for_each_table(mrt, net) \
125 	list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
126 
127 static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
128 {
129 	struct mr6_table *mrt;
130 
131 	ip6mr_for_each_table(mrt, net) {
132 		if (mrt->id == id)
133 			return mrt;
134 	}
135 	return NULL;
136 }
137 
138 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
139 			    struct mr6_table **mrt)
140 {
141 	struct ip6mr_result res;
142 	struct fib_lookup_arg arg = { .result = &res, };
143 	int err;
144 
145 	err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
146 			       flowi6_to_flowi(flp6), 0, &arg);
147 	if (err < 0)
148 		return err;
149 	*mrt = res.mrt;
150 	return 0;
151 }
152 
153 static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
154 			     int flags, struct fib_lookup_arg *arg)
155 {
156 	struct ip6mr_result *res = arg->result;
157 	struct mr6_table *mrt;
158 
159 	switch (rule->action) {
160 	case FR_ACT_TO_TBL:
161 		break;
162 	case FR_ACT_UNREACHABLE:
163 		return -ENETUNREACH;
164 	case FR_ACT_PROHIBIT:
165 		return -EACCES;
166 	case FR_ACT_BLACKHOLE:
167 	default:
168 		return -EINVAL;
169 	}
170 
171 	mrt = ip6mr_get_table(rule->fr_net, rule->table);
172 	if (mrt == NULL)
173 		return -EAGAIN;
174 	res->mrt = mrt;
175 	return 0;
176 }
177 
178 static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
179 {
180 	return 1;
181 }
182 
183 static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = {
184 	FRA_GENERIC_POLICY,
185 };
186 
187 static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
188 				struct fib_rule_hdr *frh, struct nlattr **tb)
189 {
190 	return 0;
191 }
192 
193 static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
194 			      struct nlattr **tb)
195 {
196 	return 1;
197 }
198 
199 static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
200 			   struct fib_rule_hdr *frh)
201 {
202 	frh->dst_len = 0;
203 	frh->src_len = 0;
204 	frh->tos     = 0;
205 	return 0;
206 }
207 
208 static const struct fib_rules_ops __net_initdata ip6mr_rules_ops_template = {
209 	.family		= RTNL_FAMILY_IP6MR,
210 	.rule_size	= sizeof(struct ip6mr_rule),
211 	.addr_size	= sizeof(struct in6_addr),
212 	.action		= ip6mr_rule_action,
213 	.match		= ip6mr_rule_match,
214 	.configure	= ip6mr_rule_configure,
215 	.compare	= ip6mr_rule_compare,
216 	.default_pref	= fib_default_rule_pref,
217 	.fill		= ip6mr_rule_fill,
218 	.nlgroup	= RTNLGRP_IPV6_RULE,
219 	.policy		= ip6mr_rule_policy,
220 	.owner		= THIS_MODULE,
221 };
222 
223 static int __net_init ip6mr_rules_init(struct net *net)
224 {
225 	struct fib_rules_ops *ops;
226 	struct mr6_table *mrt;
227 	int err;
228 
229 	ops = fib_rules_register(&ip6mr_rules_ops_template, net);
230 	if (IS_ERR(ops))
231 		return PTR_ERR(ops);
232 
233 	INIT_LIST_HEAD(&net->ipv6.mr6_tables);
234 
235 	mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
236 	if (mrt == NULL) {
237 		err = -ENOMEM;
238 		goto err1;
239 	}
240 
241 	err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
242 	if (err < 0)
243 		goto err2;
244 
245 	net->ipv6.mr6_rules_ops = ops;
246 	return 0;
247 
248 err2:
249 	kfree(mrt);
250 err1:
251 	fib_rules_unregister(ops);
252 	return err;
253 }
254 
255 static void __net_exit ip6mr_rules_exit(struct net *net)
256 {
257 	struct mr6_table *mrt, *next;
258 
259 	list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
260 		list_del(&mrt->list);
261 		ip6mr_free_table(mrt);
262 	}
263 	fib_rules_unregister(net->ipv6.mr6_rules_ops);
264 }
265 #else
266 #define ip6mr_for_each_table(mrt, net) \
267 	for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
268 
269 static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
270 {
271 	return net->ipv6.mrt6;
272 }
273 
274 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
275 			    struct mr6_table **mrt)
276 {
277 	*mrt = net->ipv6.mrt6;
278 	return 0;
279 }
280 
281 static int __net_init ip6mr_rules_init(struct net *net)
282 {
283 	net->ipv6.mrt6 = ip6mr_new_table(net, RT6_TABLE_DFLT);
284 	return net->ipv6.mrt6 ? 0 : -ENOMEM;
285 }
286 
287 static void __net_exit ip6mr_rules_exit(struct net *net)
288 {
289 	ip6mr_free_table(net->ipv6.mrt6);
290 }
291 #endif
292 
293 static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
294 {
295 	struct mr6_table *mrt;
296 	unsigned int i;
297 
298 	mrt = ip6mr_get_table(net, id);
299 	if (mrt != NULL)
300 		return mrt;
301 
302 	mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
303 	if (mrt == NULL)
304 		return NULL;
305 	mrt->id = id;
306 	write_pnet(&mrt->net, net);
307 
308 	/* Forwarding cache */
309 	for (i = 0; i < MFC6_LINES; i++)
310 		INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]);
311 
312 	INIT_LIST_HEAD(&mrt->mfc6_unres_queue);
313 
314 	setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
315 		    (unsigned long)mrt);
316 
317 #ifdef CONFIG_IPV6_PIMSM_V2
318 	mrt->mroute_reg_vif_num = -1;
319 #endif
320 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
321 	list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
322 #endif
323 	return mrt;
324 }
325 
326 static void ip6mr_free_table(struct mr6_table *mrt)
327 {
328 	del_timer(&mrt->ipmr_expire_timer);
329 	mroute_clean_tables(mrt);
330 	kfree(mrt);
331 }
332 
333 #ifdef CONFIG_PROC_FS
334 
335 struct ipmr_mfc_iter {
336 	struct seq_net_private p;
337 	struct mr6_table *mrt;
338 	struct list_head *cache;
339 	int ct;
340 };
341 
342 
343 static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
344 					   struct ipmr_mfc_iter *it, loff_t pos)
345 {
346 	struct mr6_table *mrt = it->mrt;
347 	struct mfc6_cache *mfc;
348 
349 	read_lock(&mrt_lock);
350 	for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) {
351 		it->cache = &mrt->mfc6_cache_array[it->ct];
352 		list_for_each_entry(mfc, it->cache, list)
353 			if (pos-- == 0)
354 				return mfc;
355 	}
356 	read_unlock(&mrt_lock);
357 
358 	spin_lock_bh(&mfc_unres_lock);
359 	it->cache = &mrt->mfc6_unres_queue;
360 	list_for_each_entry(mfc, it->cache, list)
361 		if (pos-- == 0)
362 			return mfc;
363 	spin_unlock_bh(&mfc_unres_lock);
364 
365 	it->cache = NULL;
366 	return NULL;
367 }
368 
369 /*
370  *	The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
371  */
372 
373 struct ipmr_vif_iter {
374 	struct seq_net_private p;
375 	struct mr6_table *mrt;
376 	int ct;
377 };
378 
379 static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
380 					    struct ipmr_vif_iter *iter,
381 					    loff_t pos)
382 {
383 	struct mr6_table *mrt = iter->mrt;
384 
385 	for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
386 		if (!MIF_EXISTS(mrt, iter->ct))
387 			continue;
388 		if (pos-- == 0)
389 			return &mrt->vif6_table[iter->ct];
390 	}
391 	return NULL;
392 }
393 
394 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
395 	__acquires(mrt_lock)
396 {
397 	struct ipmr_vif_iter *iter = seq->private;
398 	struct net *net = seq_file_net(seq);
399 	struct mr6_table *mrt;
400 
401 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
402 	if (mrt == NULL)
403 		return ERR_PTR(-ENOENT);
404 
405 	iter->mrt = mrt;
406 
407 	read_lock(&mrt_lock);
408 	return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1)
409 		: SEQ_START_TOKEN;
410 }
411 
412 static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
413 {
414 	struct ipmr_vif_iter *iter = seq->private;
415 	struct net *net = seq_file_net(seq);
416 	struct mr6_table *mrt = iter->mrt;
417 
418 	++*pos;
419 	if (v == SEQ_START_TOKEN)
420 		return ip6mr_vif_seq_idx(net, iter, 0);
421 
422 	while (++iter->ct < mrt->maxvif) {
423 		if (!MIF_EXISTS(mrt, iter->ct))
424 			continue;
425 		return &mrt->vif6_table[iter->ct];
426 	}
427 	return NULL;
428 }
429 
430 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
431 	__releases(mrt_lock)
432 {
433 	read_unlock(&mrt_lock);
434 }
435 
436 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
437 {
438 	struct ipmr_vif_iter *iter = seq->private;
439 	struct mr6_table *mrt = iter->mrt;
440 
441 	if (v == SEQ_START_TOKEN) {
442 		seq_puts(seq,
443 			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
444 	} else {
445 		const struct mif_device *vif = v;
446 		const char *name = vif->dev ? vif->dev->name : "none";
447 
448 		seq_printf(seq,
449 			   "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
450 			   vif - mrt->vif6_table,
451 			   name, vif->bytes_in, vif->pkt_in,
452 			   vif->bytes_out, vif->pkt_out,
453 			   vif->flags);
454 	}
455 	return 0;
456 }
457 
458 static const struct seq_operations ip6mr_vif_seq_ops = {
459 	.start = ip6mr_vif_seq_start,
460 	.next  = ip6mr_vif_seq_next,
461 	.stop  = ip6mr_vif_seq_stop,
462 	.show  = ip6mr_vif_seq_show,
463 };
464 
465 static int ip6mr_vif_open(struct inode *inode, struct file *file)
466 {
467 	return seq_open_net(inode, file, &ip6mr_vif_seq_ops,
468 			    sizeof(struct ipmr_vif_iter));
469 }
470 
471 static const struct file_operations ip6mr_vif_fops = {
472 	.owner	 = THIS_MODULE,
473 	.open    = ip6mr_vif_open,
474 	.read    = seq_read,
475 	.llseek  = seq_lseek,
476 	.release = seq_release_net,
477 };
478 
479 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
480 {
481 	struct ipmr_mfc_iter *it = seq->private;
482 	struct net *net = seq_file_net(seq);
483 	struct mr6_table *mrt;
484 
485 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
486 	if (mrt == NULL)
487 		return ERR_PTR(-ENOENT);
488 
489 	it->mrt = mrt;
490 	return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
491 		: SEQ_START_TOKEN;
492 }
493 
494 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
495 {
496 	struct mfc6_cache *mfc = v;
497 	struct ipmr_mfc_iter *it = seq->private;
498 	struct net *net = seq_file_net(seq);
499 	struct mr6_table *mrt = it->mrt;
500 
501 	++*pos;
502 
503 	if (v == SEQ_START_TOKEN)
504 		return ipmr_mfc_seq_idx(net, seq->private, 0);
505 
506 	if (mfc->list.next != it->cache)
507 		return list_entry(mfc->list.next, struct mfc6_cache, list);
508 
509 	if (it->cache == &mrt->mfc6_unres_queue)
510 		goto end_of_list;
511 
512 	BUG_ON(it->cache != &mrt->mfc6_cache_array[it->ct]);
513 
514 	while (++it->ct < MFC6_LINES) {
515 		it->cache = &mrt->mfc6_cache_array[it->ct];
516 		if (list_empty(it->cache))
517 			continue;
518 		return list_first_entry(it->cache, struct mfc6_cache, list);
519 	}
520 
521 	/* exhausted cache_array, show unresolved */
522 	read_unlock(&mrt_lock);
523 	it->cache = &mrt->mfc6_unres_queue;
524 	it->ct = 0;
525 
526 	spin_lock_bh(&mfc_unres_lock);
527 	if (!list_empty(it->cache))
528 		return list_first_entry(it->cache, struct mfc6_cache, list);
529 
530  end_of_list:
531 	spin_unlock_bh(&mfc_unres_lock);
532 	it->cache = NULL;
533 
534 	return NULL;
535 }
536 
537 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
538 {
539 	struct ipmr_mfc_iter *it = seq->private;
540 	struct mr6_table *mrt = it->mrt;
541 
542 	if (it->cache == &mrt->mfc6_unres_queue)
543 		spin_unlock_bh(&mfc_unres_lock);
544 	else if (it->cache == mrt->mfc6_cache_array)
545 		read_unlock(&mrt_lock);
546 }
547 
548 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
549 {
550 	int n;
551 
552 	if (v == SEQ_START_TOKEN) {
553 		seq_puts(seq,
554 			 "Group                            "
555 			 "Origin                           "
556 			 "Iif      Pkts  Bytes     Wrong  Oifs\n");
557 	} else {
558 		const struct mfc6_cache *mfc = v;
559 		const struct ipmr_mfc_iter *it = seq->private;
560 		struct mr6_table *mrt = it->mrt;
561 
562 		seq_printf(seq, "%pI6 %pI6 %-3hd",
563 			   &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
564 			   mfc->mf6c_parent);
565 
566 		if (it->cache != &mrt->mfc6_unres_queue) {
567 			seq_printf(seq, " %8lu %8lu %8lu",
568 				   mfc->mfc_un.res.pkt,
569 				   mfc->mfc_un.res.bytes,
570 				   mfc->mfc_un.res.wrong_if);
571 			for (n = mfc->mfc_un.res.minvif;
572 			     n < mfc->mfc_un.res.maxvif; n++) {
573 				if (MIF_EXISTS(mrt, n) &&
574 				    mfc->mfc_un.res.ttls[n] < 255)
575 					seq_printf(seq,
576 						   " %2d:%-3d",
577 						   n, mfc->mfc_un.res.ttls[n]);
578 			}
579 		} else {
580 			/* unresolved mfc_caches don't contain
581 			 * pkt, bytes and wrong_if values
582 			 */
583 			seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
584 		}
585 		seq_putc(seq, '\n');
586 	}
587 	return 0;
588 }
589 
590 static const struct seq_operations ipmr_mfc_seq_ops = {
591 	.start = ipmr_mfc_seq_start,
592 	.next  = ipmr_mfc_seq_next,
593 	.stop  = ipmr_mfc_seq_stop,
594 	.show  = ipmr_mfc_seq_show,
595 };
596 
597 static int ipmr_mfc_open(struct inode *inode, struct file *file)
598 {
599 	return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
600 			    sizeof(struct ipmr_mfc_iter));
601 }
602 
603 static const struct file_operations ip6mr_mfc_fops = {
604 	.owner	 = THIS_MODULE,
605 	.open    = ipmr_mfc_open,
606 	.read    = seq_read,
607 	.llseek  = seq_lseek,
608 	.release = seq_release_net,
609 };
610 #endif
611 
612 #ifdef CONFIG_IPV6_PIMSM_V2
613 
614 static int pim6_rcv(struct sk_buff *skb)
615 {
616 	struct pimreghdr *pim;
617 	struct ipv6hdr   *encap;
618 	struct net_device  *reg_dev = NULL;
619 	struct net *net = dev_net(skb->dev);
620 	struct mr6_table *mrt;
621 	struct flowi6 fl6 = {
622 		.flowi6_iif	= skb->dev->ifindex,
623 		.flowi6_mark	= skb->mark,
624 	};
625 	int reg_vif_num;
626 
627 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
628 		goto drop;
629 
630 	pim = (struct pimreghdr *)skb_transport_header(skb);
631 	if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
632 	    (pim->flags & PIM_NULL_REGISTER) ||
633 	    (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
634 			     sizeof(*pim), IPPROTO_PIM,
635 			     csum_partial((void *)pim, sizeof(*pim), 0)) &&
636 	     csum_fold(skb_checksum(skb, 0, skb->len, 0))))
637 		goto drop;
638 
639 	/* check if the inner packet is destined to mcast group */
640 	encap = (struct ipv6hdr *)(skb_transport_header(skb) +
641 				   sizeof(*pim));
642 
643 	if (!ipv6_addr_is_multicast(&encap->daddr) ||
644 	    encap->payload_len == 0 ||
645 	    ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
646 		goto drop;
647 
648 	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
649 		goto drop;
650 	reg_vif_num = mrt->mroute_reg_vif_num;
651 
652 	read_lock(&mrt_lock);
653 	if (reg_vif_num >= 0)
654 		reg_dev = mrt->vif6_table[reg_vif_num].dev;
655 	if (reg_dev)
656 		dev_hold(reg_dev);
657 	read_unlock(&mrt_lock);
658 
659 	if (reg_dev == NULL)
660 		goto drop;
661 
662 	skb->mac_header = skb->network_header;
663 	skb_pull(skb, (u8 *)encap - skb->data);
664 	skb_reset_network_header(skb);
665 	skb->protocol = htons(ETH_P_IPV6);
666 	skb->ip_summed = CHECKSUM_NONE;
667 	skb->pkt_type = PACKET_HOST;
668 
669 	skb_tunnel_rx(skb, reg_dev);
670 
671 	netif_rx(skb);
672 
673 	dev_put(reg_dev);
674 	return 0;
675  drop:
676 	kfree_skb(skb);
677 	return 0;
678 }
679 
680 static const struct inet6_protocol pim6_protocol = {
681 	.handler	=	pim6_rcv,
682 };
683 
684 /* Service routines creating virtual interfaces: PIMREG */
685 
686 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
687 				      struct net_device *dev)
688 {
689 	struct net *net = dev_net(dev);
690 	struct mr6_table *mrt;
691 	struct flowi6 fl6 = {
692 		.flowi6_oif	= dev->ifindex,
693 		.flowi6_iif	= skb->skb_iif,
694 		.flowi6_mark	= skb->mark,
695 	};
696 	int err;
697 
698 	err = ip6mr_fib_lookup(net, &fl6, &mrt);
699 	if (err < 0)
700 		return err;
701 
702 	read_lock(&mrt_lock);
703 	dev->stats.tx_bytes += skb->len;
704 	dev->stats.tx_packets++;
705 	ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
706 	read_unlock(&mrt_lock);
707 	kfree_skb(skb);
708 	return NETDEV_TX_OK;
709 }
710 
711 static const struct net_device_ops reg_vif_netdev_ops = {
712 	.ndo_start_xmit	= reg_vif_xmit,
713 };
714 
715 static void reg_vif_setup(struct net_device *dev)
716 {
717 	dev->type		= ARPHRD_PIMREG;
718 	dev->mtu		= 1500 - sizeof(struct ipv6hdr) - 8;
719 	dev->flags		= IFF_NOARP;
720 	dev->netdev_ops		= &reg_vif_netdev_ops;
721 	dev->destructor		= free_netdev;
722 	dev->features		|= NETIF_F_NETNS_LOCAL;
723 }
724 
725 static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt)
726 {
727 	struct net_device *dev;
728 	char name[IFNAMSIZ];
729 
730 	if (mrt->id == RT6_TABLE_DFLT)
731 		sprintf(name, "pim6reg");
732 	else
733 		sprintf(name, "pim6reg%u", mrt->id);
734 
735 	dev = alloc_netdev(0, name, reg_vif_setup);
736 	if (dev == NULL)
737 		return NULL;
738 
739 	dev_net_set(dev, net);
740 
741 	if (register_netdevice(dev)) {
742 		free_netdev(dev);
743 		return NULL;
744 	}
745 	dev->iflink = 0;
746 
747 	if (dev_open(dev))
748 		goto failure;
749 
750 	dev_hold(dev);
751 	return dev;
752 
753 failure:
754 	/* allow the register to be completed before unregistering. */
755 	rtnl_unlock();
756 	rtnl_lock();
757 
758 	unregister_netdevice(dev);
759 	return NULL;
760 }
761 #endif
762 
763 /*
764  *	Delete a VIF entry
765  */
766 
767 static int mif6_delete(struct mr6_table *mrt, int vifi, struct list_head *head)
768 {
769 	struct mif_device *v;
770 	struct net_device *dev;
771 	struct inet6_dev *in6_dev;
772 
773 	if (vifi < 0 || vifi >= mrt->maxvif)
774 		return -EADDRNOTAVAIL;
775 
776 	v = &mrt->vif6_table[vifi];
777 
778 	write_lock_bh(&mrt_lock);
779 	dev = v->dev;
780 	v->dev = NULL;
781 
782 	if (!dev) {
783 		write_unlock_bh(&mrt_lock);
784 		return -EADDRNOTAVAIL;
785 	}
786 
787 #ifdef CONFIG_IPV6_PIMSM_V2
788 	if (vifi == mrt->mroute_reg_vif_num)
789 		mrt->mroute_reg_vif_num = -1;
790 #endif
791 
792 	if (vifi + 1 == mrt->maxvif) {
793 		int tmp;
794 		for (tmp = vifi - 1; tmp >= 0; tmp--) {
795 			if (MIF_EXISTS(mrt, tmp))
796 				break;
797 		}
798 		mrt->maxvif = tmp + 1;
799 	}
800 
801 	write_unlock_bh(&mrt_lock);
802 
803 	dev_set_allmulti(dev, -1);
804 
805 	in6_dev = __in6_dev_get(dev);
806 	if (in6_dev)
807 		in6_dev->cnf.mc_forwarding--;
808 
809 	if (v->flags & MIFF_REGISTER)
810 		unregister_netdevice_queue(dev, head);
811 
812 	dev_put(dev);
813 	return 0;
814 }
815 
816 static inline void ip6mr_cache_free(struct mfc6_cache *c)
817 {
818 	kmem_cache_free(mrt_cachep, c);
819 }
820 
821 /* Destroy an unresolved cache entry, killing queued skbs
822    and reporting error to netlink readers.
823  */
824 
825 static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
826 {
827 	struct net *net = read_pnet(&mrt->net);
828 	struct sk_buff *skb;
829 
830 	atomic_dec(&mrt->cache_resolve_queue_len);
831 
832 	while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
833 		if (ipv6_hdr(skb)->version == 0) {
834 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
835 			nlh->nlmsg_type = NLMSG_ERROR;
836 			nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
837 			skb_trim(skb, nlh->nlmsg_len);
838 			((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
839 			rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
840 		} else
841 			kfree_skb(skb);
842 	}
843 
844 	ip6mr_cache_free(c);
845 }
846 
847 
848 /* Timer process for all the unresolved queue. */
849 
850 static void ipmr_do_expire_process(struct mr6_table *mrt)
851 {
852 	unsigned long now = jiffies;
853 	unsigned long expires = 10 * HZ;
854 	struct mfc6_cache *c, *next;
855 
856 	list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
857 		if (time_after(c->mfc_un.unres.expires, now)) {
858 			/* not yet... */
859 			unsigned long interval = c->mfc_un.unres.expires - now;
860 			if (interval < expires)
861 				expires = interval;
862 			continue;
863 		}
864 
865 		list_del(&c->list);
866 		ip6mr_destroy_unres(mrt, c);
867 	}
868 
869 	if (!list_empty(&mrt->mfc6_unres_queue))
870 		mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
871 }
872 
873 static void ipmr_expire_process(unsigned long arg)
874 {
875 	struct mr6_table *mrt = (struct mr6_table *)arg;
876 
877 	if (!spin_trylock(&mfc_unres_lock)) {
878 		mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
879 		return;
880 	}
881 
882 	if (!list_empty(&mrt->mfc6_unres_queue))
883 		ipmr_do_expire_process(mrt);
884 
885 	spin_unlock(&mfc_unres_lock);
886 }
887 
888 /* Fill oifs list. It is called under write locked mrt_lock. */
889 
890 static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *cache,
891 				    unsigned char *ttls)
892 {
893 	int vifi;
894 
895 	cache->mfc_un.res.minvif = MAXMIFS;
896 	cache->mfc_un.res.maxvif = 0;
897 	memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
898 
899 	for (vifi = 0; vifi < mrt->maxvif; vifi++) {
900 		if (MIF_EXISTS(mrt, vifi) &&
901 		    ttls[vifi] && ttls[vifi] < 255) {
902 			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
903 			if (cache->mfc_un.res.minvif > vifi)
904 				cache->mfc_un.res.minvif = vifi;
905 			if (cache->mfc_un.res.maxvif <= vifi)
906 				cache->mfc_un.res.maxvif = vifi + 1;
907 		}
908 	}
909 }
910 
911 static int mif6_add(struct net *net, struct mr6_table *mrt,
912 		    struct mif6ctl *vifc, int mrtsock)
913 {
914 	int vifi = vifc->mif6c_mifi;
915 	struct mif_device *v = &mrt->vif6_table[vifi];
916 	struct net_device *dev;
917 	struct inet6_dev *in6_dev;
918 	int err;
919 
920 	/* Is vif busy ? */
921 	if (MIF_EXISTS(mrt, vifi))
922 		return -EADDRINUSE;
923 
924 	switch (vifc->mif6c_flags) {
925 #ifdef CONFIG_IPV6_PIMSM_V2
926 	case MIFF_REGISTER:
927 		/*
928 		 * Special Purpose VIF in PIM
929 		 * All the packets will be sent to the daemon
930 		 */
931 		if (mrt->mroute_reg_vif_num >= 0)
932 			return -EADDRINUSE;
933 		dev = ip6mr_reg_vif(net, mrt);
934 		if (!dev)
935 			return -ENOBUFS;
936 		err = dev_set_allmulti(dev, 1);
937 		if (err) {
938 			unregister_netdevice(dev);
939 			dev_put(dev);
940 			return err;
941 		}
942 		break;
943 #endif
944 	case 0:
945 		dev = dev_get_by_index(net, vifc->mif6c_pifi);
946 		if (!dev)
947 			return -EADDRNOTAVAIL;
948 		err = dev_set_allmulti(dev, 1);
949 		if (err) {
950 			dev_put(dev);
951 			return err;
952 		}
953 		break;
954 	default:
955 		return -EINVAL;
956 	}
957 
958 	in6_dev = __in6_dev_get(dev);
959 	if (in6_dev)
960 		in6_dev->cnf.mc_forwarding++;
961 
962 	/*
963 	 *	Fill in the VIF structures
964 	 */
965 	v->rate_limit = vifc->vifc_rate_limit;
966 	v->flags = vifc->mif6c_flags;
967 	if (!mrtsock)
968 		v->flags |= VIFF_STATIC;
969 	v->threshold = vifc->vifc_threshold;
970 	v->bytes_in = 0;
971 	v->bytes_out = 0;
972 	v->pkt_in = 0;
973 	v->pkt_out = 0;
974 	v->link = dev->ifindex;
975 	if (v->flags & MIFF_REGISTER)
976 		v->link = dev->iflink;
977 
978 	/* And finish update writing critical data */
979 	write_lock_bh(&mrt_lock);
980 	v->dev = dev;
981 #ifdef CONFIG_IPV6_PIMSM_V2
982 	if (v->flags & MIFF_REGISTER)
983 		mrt->mroute_reg_vif_num = vifi;
984 #endif
985 	if (vifi + 1 > mrt->maxvif)
986 		mrt->maxvif = vifi + 1;
987 	write_unlock_bh(&mrt_lock);
988 	return 0;
989 }
990 
991 static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt,
992 					   struct in6_addr *origin,
993 					   struct in6_addr *mcastgrp)
994 {
995 	int line = MFC6_HASH(mcastgrp, origin);
996 	struct mfc6_cache *c;
997 
998 	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
999 		if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
1000 		    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
1001 			return c;
1002 	}
1003 	return NULL;
1004 }
1005 
1006 /*
1007  *	Allocate a multicast cache entry
1008  */
1009 static struct mfc6_cache *ip6mr_cache_alloc(void)
1010 {
1011 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
1012 	if (c == NULL)
1013 		return NULL;
1014 	c->mfc_un.res.minvif = MAXMIFS;
1015 	return c;
1016 }
1017 
1018 static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
1019 {
1020 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
1021 	if (c == NULL)
1022 		return NULL;
1023 	skb_queue_head_init(&c->mfc_un.unres.unresolved);
1024 	c->mfc_un.unres.expires = jiffies + 10 * HZ;
1025 	return c;
1026 }
1027 
1028 /*
1029  *	A cache entry has gone into a resolved state from queued
1030  */
1031 
1032 static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
1033 				struct mfc6_cache *uc, struct mfc6_cache *c)
1034 {
1035 	struct sk_buff *skb;
1036 
1037 	/*
1038 	 *	Play the pending entries through our router
1039 	 */
1040 
1041 	while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
1042 		if (ipv6_hdr(skb)->version == 0) {
1043 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
1044 
1045 			if (__ip6mr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
1046 				nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1047 			} else {
1048 				nlh->nlmsg_type = NLMSG_ERROR;
1049 				nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
1050 				skb_trim(skb, nlh->nlmsg_len);
1051 				((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE;
1052 			}
1053 			rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
1054 		} else
1055 			ip6_mr_forward(net, mrt, skb, c);
1056 	}
1057 }
1058 
1059 /*
1060  *	Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
1061  *	expects the following bizarre scheme.
1062  *
1063  *	Called under mrt_lock.
1064  */
1065 
1066 static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
1067 			      mifi_t mifi, int assert)
1068 {
1069 	struct sk_buff *skb;
1070 	struct mrt6msg *msg;
1071 	int ret;
1072 
1073 #ifdef CONFIG_IPV6_PIMSM_V2
1074 	if (assert == MRT6MSG_WHOLEPKT)
1075 		skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
1076 						+sizeof(*msg));
1077 	else
1078 #endif
1079 		skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
1080 
1081 	if (!skb)
1082 		return -ENOBUFS;
1083 
1084 	/* I suppose that internal messages
1085 	 * do not require checksums */
1086 
1087 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1088 
1089 #ifdef CONFIG_IPV6_PIMSM_V2
1090 	if (assert == MRT6MSG_WHOLEPKT) {
1091 		/* Ugly, but we have no choice with this interface.
1092 		   Duplicate old header, fix length etc.
1093 		   And all this only to mangle msg->im6_msgtype and
1094 		   to set msg->im6_mbz to "mbz" :-)
1095 		 */
1096 		skb_push(skb, -skb_network_offset(pkt));
1097 
1098 		skb_push(skb, sizeof(*msg));
1099 		skb_reset_transport_header(skb);
1100 		msg = (struct mrt6msg *)skb_transport_header(skb);
1101 		msg->im6_mbz = 0;
1102 		msg->im6_msgtype = MRT6MSG_WHOLEPKT;
1103 		msg->im6_mif = mrt->mroute_reg_vif_num;
1104 		msg->im6_pad = 0;
1105 		ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
1106 		ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
1107 
1108 		skb->ip_summed = CHECKSUM_UNNECESSARY;
1109 	} else
1110 #endif
1111 	{
1112 	/*
1113 	 *	Copy the IP header
1114 	 */
1115 
1116 	skb_put(skb, sizeof(struct ipv6hdr));
1117 	skb_reset_network_header(skb);
1118 	skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
1119 
1120 	/*
1121 	 *	Add our header
1122 	 */
1123 	skb_put(skb, sizeof(*msg));
1124 	skb_reset_transport_header(skb);
1125 	msg = (struct mrt6msg *)skb_transport_header(skb);
1126 
1127 	msg->im6_mbz = 0;
1128 	msg->im6_msgtype = assert;
1129 	msg->im6_mif = mifi;
1130 	msg->im6_pad = 0;
1131 	ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
1132 	ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
1133 
1134 	skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1135 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1136 	}
1137 
1138 	if (mrt->mroute6_sk == NULL) {
1139 		kfree_skb(skb);
1140 		return -EINVAL;
1141 	}
1142 
1143 	/*
1144 	 *	Deliver to user space multicast routing algorithms
1145 	 */
1146 	ret = sock_queue_rcv_skb(mrt->mroute6_sk, skb);
1147 	if (ret < 0) {
1148 		if (net_ratelimit())
1149 			printk(KERN_WARNING "mroute6: pending queue full, dropping entries.\n");
1150 		kfree_skb(skb);
1151 	}
1152 
1153 	return ret;
1154 }
1155 
1156 /*
1157  *	Queue a packet for resolution. It gets locked cache entry!
1158  */
1159 
1160 static int
1161 ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
1162 {
1163 	bool found = false;
1164 	int err;
1165 	struct mfc6_cache *c;
1166 
1167 	spin_lock_bh(&mfc_unres_lock);
1168 	list_for_each_entry(c, &mrt->mfc6_unres_queue, list) {
1169 		if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1170 		    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
1171 			found = true;
1172 			break;
1173 		}
1174 	}
1175 
1176 	if (!found) {
1177 		/*
1178 		 *	Create a new entry if allowable
1179 		 */
1180 
1181 		if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
1182 		    (c = ip6mr_cache_alloc_unres()) == NULL) {
1183 			spin_unlock_bh(&mfc_unres_lock);
1184 
1185 			kfree_skb(skb);
1186 			return -ENOBUFS;
1187 		}
1188 
1189 		/*
1190 		 *	Fill in the new cache entry
1191 		 */
1192 		c->mf6c_parent = -1;
1193 		c->mf6c_origin = ipv6_hdr(skb)->saddr;
1194 		c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
1195 
1196 		/*
1197 		 *	Reflect first query at pim6sd
1198 		 */
1199 		err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1200 		if (err < 0) {
1201 			/* If the report failed throw the cache entry
1202 			   out - Brad Parker
1203 			 */
1204 			spin_unlock_bh(&mfc_unres_lock);
1205 
1206 			ip6mr_cache_free(c);
1207 			kfree_skb(skb);
1208 			return err;
1209 		}
1210 
1211 		atomic_inc(&mrt->cache_resolve_queue_len);
1212 		list_add(&c->list, &mrt->mfc6_unres_queue);
1213 
1214 		ipmr_do_expire_process(mrt);
1215 	}
1216 
1217 	/*
1218 	 *	See if we can append the packet
1219 	 */
1220 	if (c->mfc_un.unres.unresolved.qlen > 3) {
1221 		kfree_skb(skb);
1222 		err = -ENOBUFS;
1223 	} else {
1224 		skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1225 		err = 0;
1226 	}
1227 
1228 	spin_unlock_bh(&mfc_unres_lock);
1229 	return err;
1230 }
1231 
1232 /*
1233  *	MFC6 cache manipulation by user space
1234  */
1235 
1236 static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc)
1237 {
1238 	int line;
1239 	struct mfc6_cache *c, *next;
1240 
1241 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1242 
1243 	list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) {
1244 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1245 		    ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
1246 			write_lock_bh(&mrt_lock);
1247 			list_del(&c->list);
1248 			write_unlock_bh(&mrt_lock);
1249 
1250 			ip6mr_cache_free(c);
1251 			return 0;
1252 		}
1253 	}
1254 	return -ENOENT;
1255 }
1256 
1257 static int ip6mr_device_event(struct notifier_block *this,
1258 			      unsigned long event, void *ptr)
1259 {
1260 	struct net_device *dev = ptr;
1261 	struct net *net = dev_net(dev);
1262 	struct mr6_table *mrt;
1263 	struct mif_device *v;
1264 	int ct;
1265 	LIST_HEAD(list);
1266 
1267 	if (event != NETDEV_UNREGISTER)
1268 		return NOTIFY_DONE;
1269 
1270 	ip6mr_for_each_table(mrt, net) {
1271 		v = &mrt->vif6_table[0];
1272 		for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1273 			if (v->dev == dev)
1274 				mif6_delete(mrt, ct, &list);
1275 		}
1276 	}
1277 	unregister_netdevice_many(&list);
1278 
1279 	return NOTIFY_DONE;
1280 }
1281 
1282 static struct notifier_block ip6_mr_notifier = {
1283 	.notifier_call = ip6mr_device_event
1284 };
1285 
1286 /*
1287  *	Setup for IP multicast routing
1288  */
1289 
1290 static int __net_init ip6mr_net_init(struct net *net)
1291 {
1292 	int err;
1293 
1294 	err = ip6mr_rules_init(net);
1295 	if (err < 0)
1296 		goto fail;
1297 
1298 #ifdef CONFIG_PROC_FS
1299 	err = -ENOMEM;
1300 	if (!proc_net_fops_create(net, "ip6_mr_vif", 0, &ip6mr_vif_fops))
1301 		goto proc_vif_fail;
1302 	if (!proc_net_fops_create(net, "ip6_mr_cache", 0, &ip6mr_mfc_fops))
1303 		goto proc_cache_fail;
1304 #endif
1305 
1306 	return 0;
1307 
1308 #ifdef CONFIG_PROC_FS
1309 proc_cache_fail:
1310 	proc_net_remove(net, "ip6_mr_vif");
1311 proc_vif_fail:
1312 	ip6mr_rules_exit(net);
1313 #endif
1314 fail:
1315 	return err;
1316 }
1317 
1318 static void __net_exit ip6mr_net_exit(struct net *net)
1319 {
1320 #ifdef CONFIG_PROC_FS
1321 	proc_net_remove(net, "ip6_mr_cache");
1322 	proc_net_remove(net, "ip6_mr_vif");
1323 #endif
1324 	ip6mr_rules_exit(net);
1325 }
1326 
1327 static struct pernet_operations ip6mr_net_ops = {
1328 	.init = ip6mr_net_init,
1329 	.exit = ip6mr_net_exit,
1330 };
1331 
1332 int __init ip6_mr_init(void)
1333 {
1334 	int err;
1335 
1336 	mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1337 				       sizeof(struct mfc6_cache),
1338 				       0, SLAB_HWCACHE_ALIGN,
1339 				       NULL);
1340 	if (!mrt_cachep)
1341 		return -ENOMEM;
1342 
1343 	err = register_pernet_subsys(&ip6mr_net_ops);
1344 	if (err)
1345 		goto reg_pernet_fail;
1346 
1347 	err = register_netdevice_notifier(&ip6_mr_notifier);
1348 	if (err)
1349 		goto reg_notif_fail;
1350 #ifdef CONFIG_IPV6_PIMSM_V2
1351 	if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1352 		printk(KERN_ERR "ip6_mr_init: can't add PIM protocol\n");
1353 		err = -EAGAIN;
1354 		goto add_proto_fail;
1355 	}
1356 #endif
1357 	rtnl_register(RTNL_FAMILY_IP6MR, RTM_GETROUTE, NULL, ip6mr_rtm_dumproute);
1358 	return 0;
1359 #ifdef CONFIG_IPV6_PIMSM_V2
1360 add_proto_fail:
1361 	unregister_netdevice_notifier(&ip6_mr_notifier);
1362 #endif
1363 reg_notif_fail:
1364 	unregister_pernet_subsys(&ip6mr_net_ops);
1365 reg_pernet_fail:
1366 	kmem_cache_destroy(mrt_cachep);
1367 	return err;
1368 }
1369 
1370 void ip6_mr_cleanup(void)
1371 {
1372 	unregister_netdevice_notifier(&ip6_mr_notifier);
1373 	unregister_pernet_subsys(&ip6mr_net_ops);
1374 	kmem_cache_destroy(mrt_cachep);
1375 }
1376 
1377 static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
1378 			 struct mf6cctl *mfc, int mrtsock)
1379 {
1380 	bool found = false;
1381 	int line;
1382 	struct mfc6_cache *uc, *c;
1383 	unsigned char ttls[MAXMIFS];
1384 	int i;
1385 
1386 	if (mfc->mf6cc_parent >= MAXMIFS)
1387 		return -ENFILE;
1388 
1389 	memset(ttls, 255, MAXMIFS);
1390 	for (i = 0; i < MAXMIFS; i++) {
1391 		if (IF_ISSET(i, &mfc->mf6cc_ifset))
1392 			ttls[i] = 1;
1393 
1394 	}
1395 
1396 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1397 
1398 	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
1399 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1400 		    ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
1401 			found = true;
1402 			break;
1403 		}
1404 	}
1405 
1406 	if (found) {
1407 		write_lock_bh(&mrt_lock);
1408 		c->mf6c_parent = mfc->mf6cc_parent;
1409 		ip6mr_update_thresholds(mrt, c, ttls);
1410 		if (!mrtsock)
1411 			c->mfc_flags |= MFC_STATIC;
1412 		write_unlock_bh(&mrt_lock);
1413 		return 0;
1414 	}
1415 
1416 	if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1417 		return -EINVAL;
1418 
1419 	c = ip6mr_cache_alloc();
1420 	if (c == NULL)
1421 		return -ENOMEM;
1422 
1423 	c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1424 	c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1425 	c->mf6c_parent = mfc->mf6cc_parent;
1426 	ip6mr_update_thresholds(mrt, c, ttls);
1427 	if (!mrtsock)
1428 		c->mfc_flags |= MFC_STATIC;
1429 
1430 	write_lock_bh(&mrt_lock);
1431 	list_add(&c->list, &mrt->mfc6_cache_array[line]);
1432 	write_unlock_bh(&mrt_lock);
1433 
1434 	/*
1435 	 *	Check to see if we resolved a queued list. If so we
1436 	 *	need to send on the frames and tidy up.
1437 	 */
1438 	found = false;
1439 	spin_lock_bh(&mfc_unres_lock);
1440 	list_for_each_entry(uc, &mrt->mfc6_unres_queue, list) {
1441 		if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1442 		    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1443 			list_del(&uc->list);
1444 			atomic_dec(&mrt->cache_resolve_queue_len);
1445 			found = true;
1446 			break;
1447 		}
1448 	}
1449 	if (list_empty(&mrt->mfc6_unres_queue))
1450 		del_timer(&mrt->ipmr_expire_timer);
1451 	spin_unlock_bh(&mfc_unres_lock);
1452 
1453 	if (found) {
1454 		ip6mr_cache_resolve(net, mrt, uc, c);
1455 		ip6mr_cache_free(uc);
1456 	}
1457 	return 0;
1458 }
1459 
1460 /*
1461  *	Close the multicast socket, and clear the vif tables etc
1462  */
1463 
1464 static void mroute_clean_tables(struct mr6_table *mrt)
1465 {
1466 	int i;
1467 	LIST_HEAD(list);
1468 	struct mfc6_cache *c, *next;
1469 
1470 	/*
1471 	 *	Shut down all active vif entries
1472 	 */
1473 	for (i = 0; i < mrt->maxvif; i++) {
1474 		if (!(mrt->vif6_table[i].flags & VIFF_STATIC))
1475 			mif6_delete(mrt, i, &list);
1476 	}
1477 	unregister_netdevice_many(&list);
1478 
1479 	/*
1480 	 *	Wipe the cache
1481 	 */
1482 	for (i = 0; i < MFC6_LINES; i++) {
1483 		list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) {
1484 			if (c->mfc_flags & MFC_STATIC)
1485 				continue;
1486 			write_lock_bh(&mrt_lock);
1487 			list_del(&c->list);
1488 			write_unlock_bh(&mrt_lock);
1489 
1490 			ip6mr_cache_free(c);
1491 		}
1492 	}
1493 
1494 	if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1495 		spin_lock_bh(&mfc_unres_lock);
1496 		list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
1497 			list_del(&c->list);
1498 			ip6mr_destroy_unres(mrt, c);
1499 		}
1500 		spin_unlock_bh(&mfc_unres_lock);
1501 	}
1502 }
1503 
1504 static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
1505 {
1506 	int err = 0;
1507 	struct net *net = sock_net(sk);
1508 
1509 	rtnl_lock();
1510 	write_lock_bh(&mrt_lock);
1511 	if (likely(mrt->mroute6_sk == NULL)) {
1512 		mrt->mroute6_sk = sk;
1513 		net->ipv6.devconf_all->mc_forwarding++;
1514 	}
1515 	else
1516 		err = -EADDRINUSE;
1517 	write_unlock_bh(&mrt_lock);
1518 
1519 	rtnl_unlock();
1520 
1521 	return err;
1522 }
1523 
1524 int ip6mr_sk_done(struct sock *sk)
1525 {
1526 	int err = -EACCES;
1527 	struct net *net = sock_net(sk);
1528 	struct mr6_table *mrt;
1529 
1530 	rtnl_lock();
1531 	ip6mr_for_each_table(mrt, net) {
1532 		if (sk == mrt->mroute6_sk) {
1533 			write_lock_bh(&mrt_lock);
1534 			mrt->mroute6_sk = NULL;
1535 			net->ipv6.devconf_all->mc_forwarding--;
1536 			write_unlock_bh(&mrt_lock);
1537 
1538 			mroute_clean_tables(mrt);
1539 			err = 0;
1540 			break;
1541 		}
1542 	}
1543 	rtnl_unlock();
1544 
1545 	return err;
1546 }
1547 
1548 struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
1549 {
1550 	struct mr6_table *mrt;
1551 	struct flowi6 fl6 = {
1552 		.flowi6_iif	= skb->skb_iif,
1553 		.flowi6_oif	= skb->dev->ifindex,
1554 		.flowi6_mark	= skb->mark,
1555 	};
1556 
1557 	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
1558 		return NULL;
1559 
1560 	return mrt->mroute6_sk;
1561 }
1562 
1563 /*
1564  *	Socket options and virtual interface manipulation. The whole
1565  *	virtual interface system is a complete heap, but unfortunately
1566  *	that's how BSD mrouted happens to think. Maybe one day with a proper
1567  *	MOSPF/PIM router set up we can clean this up.
1568  */
1569 
1570 int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1571 {
1572 	int ret;
1573 	struct mif6ctl vif;
1574 	struct mf6cctl mfc;
1575 	mifi_t mifi;
1576 	struct net *net = sock_net(sk);
1577 	struct mr6_table *mrt;
1578 
1579 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1580 	if (mrt == NULL)
1581 		return -ENOENT;
1582 
1583 	if (optname != MRT6_INIT) {
1584 		if (sk != mrt->mroute6_sk && !capable(CAP_NET_ADMIN))
1585 			return -EACCES;
1586 	}
1587 
1588 	switch (optname) {
1589 	case MRT6_INIT:
1590 		if (sk->sk_type != SOCK_RAW ||
1591 		    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1592 			return -EOPNOTSUPP;
1593 		if (optlen < sizeof(int))
1594 			return -EINVAL;
1595 
1596 		return ip6mr_sk_init(mrt, sk);
1597 
1598 	case MRT6_DONE:
1599 		return ip6mr_sk_done(sk);
1600 
1601 	case MRT6_ADD_MIF:
1602 		if (optlen < sizeof(vif))
1603 			return -EINVAL;
1604 		if (copy_from_user(&vif, optval, sizeof(vif)))
1605 			return -EFAULT;
1606 		if (vif.mif6c_mifi >= MAXMIFS)
1607 			return -ENFILE;
1608 		rtnl_lock();
1609 		ret = mif6_add(net, mrt, &vif, sk == mrt->mroute6_sk);
1610 		rtnl_unlock();
1611 		return ret;
1612 
1613 	case MRT6_DEL_MIF:
1614 		if (optlen < sizeof(mifi_t))
1615 			return -EINVAL;
1616 		if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1617 			return -EFAULT;
1618 		rtnl_lock();
1619 		ret = mif6_delete(mrt, mifi, NULL);
1620 		rtnl_unlock();
1621 		return ret;
1622 
1623 	/*
1624 	 *	Manipulate the forwarding caches. These live
1625 	 *	in a sort of kernel/user symbiosis.
1626 	 */
1627 	case MRT6_ADD_MFC:
1628 	case MRT6_DEL_MFC:
1629 		if (optlen < sizeof(mfc))
1630 			return -EINVAL;
1631 		if (copy_from_user(&mfc, optval, sizeof(mfc)))
1632 			return -EFAULT;
1633 		rtnl_lock();
1634 		if (optname == MRT6_DEL_MFC)
1635 			ret = ip6mr_mfc_delete(mrt, &mfc);
1636 		else
1637 			ret = ip6mr_mfc_add(net, mrt, &mfc, sk == mrt->mroute6_sk);
1638 		rtnl_unlock();
1639 		return ret;
1640 
1641 	/*
1642 	 *	Control PIM assert (to activate pim will activate assert)
1643 	 */
1644 	case MRT6_ASSERT:
1645 	{
1646 		int v;
1647 		if (get_user(v, (int __user *)optval))
1648 			return -EFAULT;
1649 		mrt->mroute_do_assert = !!v;
1650 		return 0;
1651 	}
1652 
1653 #ifdef CONFIG_IPV6_PIMSM_V2
1654 	case MRT6_PIM:
1655 	{
1656 		int v;
1657 		if (get_user(v, (int __user *)optval))
1658 			return -EFAULT;
1659 		v = !!v;
1660 		rtnl_lock();
1661 		ret = 0;
1662 		if (v != mrt->mroute_do_pim) {
1663 			mrt->mroute_do_pim = v;
1664 			mrt->mroute_do_assert = v;
1665 		}
1666 		rtnl_unlock();
1667 		return ret;
1668 	}
1669 
1670 #endif
1671 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1672 	case MRT6_TABLE:
1673 	{
1674 		u32 v;
1675 
1676 		if (optlen != sizeof(u32))
1677 			return -EINVAL;
1678 		if (get_user(v, (u32 __user *)optval))
1679 			return -EFAULT;
1680 		if (sk == mrt->mroute6_sk)
1681 			return -EBUSY;
1682 
1683 		rtnl_lock();
1684 		ret = 0;
1685 		if (!ip6mr_new_table(net, v))
1686 			ret = -ENOMEM;
1687 		raw6_sk(sk)->ip6mr_table = v;
1688 		rtnl_unlock();
1689 		return ret;
1690 	}
1691 #endif
1692 	/*
1693 	 *	Spurious command, or MRT6_VERSION which you cannot
1694 	 *	set.
1695 	 */
1696 	default:
1697 		return -ENOPROTOOPT;
1698 	}
1699 }
1700 
1701 /*
1702  *	Getsock opt support for the multicast routing system.
1703  */
1704 
1705 int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1706 			  int __user *optlen)
1707 {
1708 	int olr;
1709 	int val;
1710 	struct net *net = sock_net(sk);
1711 	struct mr6_table *mrt;
1712 
1713 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1714 	if (mrt == NULL)
1715 		return -ENOENT;
1716 
1717 	switch (optname) {
1718 	case MRT6_VERSION:
1719 		val = 0x0305;
1720 		break;
1721 #ifdef CONFIG_IPV6_PIMSM_V2
1722 	case MRT6_PIM:
1723 		val = mrt->mroute_do_pim;
1724 		break;
1725 #endif
1726 	case MRT6_ASSERT:
1727 		val = mrt->mroute_do_assert;
1728 		break;
1729 	default:
1730 		return -ENOPROTOOPT;
1731 	}
1732 
1733 	if (get_user(olr, optlen))
1734 		return -EFAULT;
1735 
1736 	olr = min_t(int, olr, sizeof(int));
1737 	if (olr < 0)
1738 		return -EINVAL;
1739 
1740 	if (put_user(olr, optlen))
1741 		return -EFAULT;
1742 	if (copy_to_user(optval, &val, olr))
1743 		return -EFAULT;
1744 	return 0;
1745 }
1746 
1747 /*
1748  *	The IP multicast ioctl support routines.
1749  */
1750 
1751 int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1752 {
1753 	struct sioc_sg_req6 sr;
1754 	struct sioc_mif_req6 vr;
1755 	struct mif_device *vif;
1756 	struct mfc6_cache *c;
1757 	struct net *net = sock_net(sk);
1758 	struct mr6_table *mrt;
1759 
1760 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1761 	if (mrt == NULL)
1762 		return -ENOENT;
1763 
1764 	switch (cmd) {
1765 	case SIOCGETMIFCNT_IN6:
1766 		if (copy_from_user(&vr, arg, sizeof(vr)))
1767 			return -EFAULT;
1768 		if (vr.mifi >= mrt->maxvif)
1769 			return -EINVAL;
1770 		read_lock(&mrt_lock);
1771 		vif = &mrt->vif6_table[vr.mifi];
1772 		if (MIF_EXISTS(mrt, vr.mifi)) {
1773 			vr.icount = vif->pkt_in;
1774 			vr.ocount = vif->pkt_out;
1775 			vr.ibytes = vif->bytes_in;
1776 			vr.obytes = vif->bytes_out;
1777 			read_unlock(&mrt_lock);
1778 
1779 			if (copy_to_user(arg, &vr, sizeof(vr)))
1780 				return -EFAULT;
1781 			return 0;
1782 		}
1783 		read_unlock(&mrt_lock);
1784 		return -EADDRNOTAVAIL;
1785 	case SIOCGETSGCNT_IN6:
1786 		if (copy_from_user(&sr, arg, sizeof(sr)))
1787 			return -EFAULT;
1788 
1789 		read_lock(&mrt_lock);
1790 		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1791 		if (c) {
1792 			sr.pktcnt = c->mfc_un.res.pkt;
1793 			sr.bytecnt = c->mfc_un.res.bytes;
1794 			sr.wrong_if = c->mfc_un.res.wrong_if;
1795 			read_unlock(&mrt_lock);
1796 
1797 			if (copy_to_user(arg, &sr, sizeof(sr)))
1798 				return -EFAULT;
1799 			return 0;
1800 		}
1801 		read_unlock(&mrt_lock);
1802 		return -EADDRNOTAVAIL;
1803 	default:
1804 		return -ENOIOCTLCMD;
1805 	}
1806 }
1807 
1808 #ifdef CONFIG_COMPAT
1809 struct compat_sioc_sg_req6 {
1810 	struct sockaddr_in6 src;
1811 	struct sockaddr_in6 grp;
1812 	compat_ulong_t pktcnt;
1813 	compat_ulong_t bytecnt;
1814 	compat_ulong_t wrong_if;
1815 };
1816 
1817 struct compat_sioc_mif_req6 {
1818 	mifi_t	mifi;
1819 	compat_ulong_t icount;
1820 	compat_ulong_t ocount;
1821 	compat_ulong_t ibytes;
1822 	compat_ulong_t obytes;
1823 };
1824 
1825 int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1826 {
1827 	struct compat_sioc_sg_req6 sr;
1828 	struct compat_sioc_mif_req6 vr;
1829 	struct mif_device *vif;
1830 	struct mfc6_cache *c;
1831 	struct net *net = sock_net(sk);
1832 	struct mr6_table *mrt;
1833 
1834 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1835 	if (mrt == NULL)
1836 		return -ENOENT;
1837 
1838 	switch (cmd) {
1839 	case SIOCGETMIFCNT_IN6:
1840 		if (copy_from_user(&vr, arg, sizeof(vr)))
1841 			return -EFAULT;
1842 		if (vr.mifi >= mrt->maxvif)
1843 			return -EINVAL;
1844 		read_lock(&mrt_lock);
1845 		vif = &mrt->vif6_table[vr.mifi];
1846 		if (MIF_EXISTS(mrt, vr.mifi)) {
1847 			vr.icount = vif->pkt_in;
1848 			vr.ocount = vif->pkt_out;
1849 			vr.ibytes = vif->bytes_in;
1850 			vr.obytes = vif->bytes_out;
1851 			read_unlock(&mrt_lock);
1852 
1853 			if (copy_to_user(arg, &vr, sizeof(vr)))
1854 				return -EFAULT;
1855 			return 0;
1856 		}
1857 		read_unlock(&mrt_lock);
1858 		return -EADDRNOTAVAIL;
1859 	case SIOCGETSGCNT_IN6:
1860 		if (copy_from_user(&sr, arg, sizeof(sr)))
1861 			return -EFAULT;
1862 
1863 		read_lock(&mrt_lock);
1864 		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1865 		if (c) {
1866 			sr.pktcnt = c->mfc_un.res.pkt;
1867 			sr.bytecnt = c->mfc_un.res.bytes;
1868 			sr.wrong_if = c->mfc_un.res.wrong_if;
1869 			read_unlock(&mrt_lock);
1870 
1871 			if (copy_to_user(arg, &sr, sizeof(sr)))
1872 				return -EFAULT;
1873 			return 0;
1874 		}
1875 		read_unlock(&mrt_lock);
1876 		return -EADDRNOTAVAIL;
1877 	default:
1878 		return -ENOIOCTLCMD;
1879 	}
1880 }
1881 #endif
1882 
1883 static inline int ip6mr_forward2_finish(struct sk_buff *skb)
1884 {
1885 	IP6_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
1886 			 IPSTATS_MIB_OUTFORWDATAGRAMS);
1887 	return dst_output(skb);
1888 }
1889 
1890 /*
1891  *	Processing handlers for ip6mr_forward
1892  */
1893 
1894 static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
1895 			  struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1896 {
1897 	struct ipv6hdr *ipv6h;
1898 	struct mif_device *vif = &mrt->vif6_table[vifi];
1899 	struct net_device *dev;
1900 	struct dst_entry *dst;
1901 	struct flowi6 fl6;
1902 
1903 	if (vif->dev == NULL)
1904 		goto out_free;
1905 
1906 #ifdef CONFIG_IPV6_PIMSM_V2
1907 	if (vif->flags & MIFF_REGISTER) {
1908 		vif->pkt_out++;
1909 		vif->bytes_out += skb->len;
1910 		vif->dev->stats.tx_bytes += skb->len;
1911 		vif->dev->stats.tx_packets++;
1912 		ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
1913 		goto out_free;
1914 	}
1915 #endif
1916 
1917 	ipv6h = ipv6_hdr(skb);
1918 
1919 	fl6 = (struct flowi6) {
1920 		.flowi6_oif = vif->link,
1921 		.daddr = ipv6h->daddr,
1922 	};
1923 
1924 	dst = ip6_route_output(net, NULL, &fl6);
1925 	if (!dst)
1926 		goto out_free;
1927 
1928 	skb_dst_drop(skb);
1929 	skb_dst_set(skb, dst);
1930 
1931 	/*
1932 	 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1933 	 * not only before forwarding, but after forwarding on all output
1934 	 * interfaces. It is clear, if mrouter runs a multicasting
1935 	 * program, it should receive packets not depending to what interface
1936 	 * program is joined.
1937 	 * If we will not make it, the program will have to join on all
1938 	 * interfaces. On the other hand, multihoming host (or router, but
1939 	 * not mrouter) cannot join to more than one interface - it will
1940 	 * result in receiving multiple packets.
1941 	 */
1942 	dev = vif->dev;
1943 	skb->dev = dev;
1944 	vif->pkt_out++;
1945 	vif->bytes_out += skb->len;
1946 
1947 	/* We are about to write */
1948 	/* XXX: extension headers? */
1949 	if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
1950 		goto out_free;
1951 
1952 	ipv6h = ipv6_hdr(skb);
1953 	ipv6h->hop_limit--;
1954 
1955 	IP6CB(skb)->flags |= IP6SKB_FORWARDED;
1956 
1957 	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dev,
1958 		       ip6mr_forward2_finish);
1959 
1960 out_free:
1961 	kfree_skb(skb);
1962 	return 0;
1963 }
1964 
1965 static int ip6mr_find_vif(struct mr6_table *mrt, struct net_device *dev)
1966 {
1967 	int ct;
1968 
1969 	for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
1970 		if (mrt->vif6_table[ct].dev == dev)
1971 			break;
1972 	}
1973 	return ct;
1974 }
1975 
1976 static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
1977 			  struct sk_buff *skb, struct mfc6_cache *cache)
1978 {
1979 	int psend = -1;
1980 	int vif, ct;
1981 
1982 	vif = cache->mf6c_parent;
1983 	cache->mfc_un.res.pkt++;
1984 	cache->mfc_un.res.bytes += skb->len;
1985 
1986 	/*
1987 	 * Wrong interface: drop packet and (maybe) send PIM assert.
1988 	 */
1989 	if (mrt->vif6_table[vif].dev != skb->dev) {
1990 		int true_vifi;
1991 
1992 		cache->mfc_un.res.wrong_if++;
1993 		true_vifi = ip6mr_find_vif(mrt, skb->dev);
1994 
1995 		if (true_vifi >= 0 && mrt->mroute_do_assert &&
1996 		    /* pimsm uses asserts, when switching from RPT to SPT,
1997 		       so that we cannot check that packet arrived on an oif.
1998 		       It is bad, but otherwise we would need to move pretty
1999 		       large chunk of pimd to kernel. Ough... --ANK
2000 		     */
2001 		    (mrt->mroute_do_pim ||
2002 		     cache->mfc_un.res.ttls[true_vifi] < 255) &&
2003 		    time_after(jiffies,
2004 			       cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
2005 			cache->mfc_un.res.last_assert = jiffies;
2006 			ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
2007 		}
2008 		goto dont_forward;
2009 	}
2010 
2011 	mrt->vif6_table[vif].pkt_in++;
2012 	mrt->vif6_table[vif].bytes_in += skb->len;
2013 
2014 	/*
2015 	 *	Forward the frame
2016 	 */
2017 	for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
2018 		if (ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
2019 			if (psend != -1) {
2020 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2021 				if (skb2)
2022 					ip6mr_forward2(net, mrt, skb2, cache, psend);
2023 			}
2024 			psend = ct;
2025 		}
2026 	}
2027 	if (psend != -1) {
2028 		ip6mr_forward2(net, mrt, skb, cache, psend);
2029 		return 0;
2030 	}
2031 
2032 dont_forward:
2033 	kfree_skb(skb);
2034 	return 0;
2035 }
2036 
2037 
2038 /*
2039  *	Multicast packets for forwarding arrive here
2040  */
2041 
2042 int ip6_mr_input(struct sk_buff *skb)
2043 {
2044 	struct mfc6_cache *cache;
2045 	struct net *net = dev_net(skb->dev);
2046 	struct mr6_table *mrt;
2047 	struct flowi6 fl6 = {
2048 		.flowi6_iif	= skb->dev->ifindex,
2049 		.flowi6_mark	= skb->mark,
2050 	};
2051 	int err;
2052 
2053 	err = ip6mr_fib_lookup(net, &fl6, &mrt);
2054 	if (err < 0)
2055 		return err;
2056 
2057 	read_lock(&mrt_lock);
2058 	cache = ip6mr_cache_find(mrt,
2059 				 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2060 
2061 	/*
2062 	 *	No usable cache entry
2063 	 */
2064 	if (cache == NULL) {
2065 		int vif;
2066 
2067 		vif = ip6mr_find_vif(mrt, skb->dev);
2068 		if (vif >= 0) {
2069 			int err = ip6mr_cache_unresolved(mrt, vif, skb);
2070 			read_unlock(&mrt_lock);
2071 
2072 			return err;
2073 		}
2074 		read_unlock(&mrt_lock);
2075 		kfree_skb(skb);
2076 		return -ENODEV;
2077 	}
2078 
2079 	ip6_mr_forward(net, mrt, skb, cache);
2080 
2081 	read_unlock(&mrt_lock);
2082 
2083 	return 0;
2084 }
2085 
2086 
2087 static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2088 			       struct mfc6_cache *c, struct rtmsg *rtm)
2089 {
2090 	int ct;
2091 	struct rtnexthop *nhp;
2092 	u8 *b = skb_tail_pointer(skb);
2093 	struct rtattr *mp_head;
2094 
2095 	/* If cache is unresolved, don't try to parse IIF and OIF */
2096 	if (c->mf6c_parent >= MAXMIFS)
2097 		return -ENOENT;
2098 
2099 	if (MIF_EXISTS(mrt, c->mf6c_parent))
2100 		RTA_PUT(skb, RTA_IIF, 4, &mrt->vif6_table[c->mf6c_parent].dev->ifindex);
2101 
2102 	mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
2103 
2104 	for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
2105 		if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
2106 			if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
2107 				goto rtattr_failure;
2108 			nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
2109 			nhp->rtnh_flags = 0;
2110 			nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
2111 			nhp->rtnh_ifindex = mrt->vif6_table[ct].dev->ifindex;
2112 			nhp->rtnh_len = sizeof(*nhp);
2113 		}
2114 	}
2115 	mp_head->rta_type = RTA_MULTIPATH;
2116 	mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
2117 	rtm->rtm_type = RTN_MULTICAST;
2118 	return 1;
2119 
2120 rtattr_failure:
2121 	nlmsg_trim(skb, b);
2122 	return -EMSGSIZE;
2123 }
2124 
2125 int ip6mr_get_route(struct net *net,
2126 		    struct sk_buff *skb, struct rtmsg *rtm, int nowait)
2127 {
2128 	int err;
2129 	struct mr6_table *mrt;
2130 	struct mfc6_cache *cache;
2131 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
2132 
2133 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
2134 	if (mrt == NULL)
2135 		return -ENOENT;
2136 
2137 	read_lock(&mrt_lock);
2138 	cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
2139 
2140 	if (!cache) {
2141 		struct sk_buff *skb2;
2142 		struct ipv6hdr *iph;
2143 		struct net_device *dev;
2144 		int vif;
2145 
2146 		if (nowait) {
2147 			read_unlock(&mrt_lock);
2148 			return -EAGAIN;
2149 		}
2150 
2151 		dev = skb->dev;
2152 		if (dev == NULL || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2153 			read_unlock(&mrt_lock);
2154 			return -ENODEV;
2155 		}
2156 
2157 		/* really correct? */
2158 		skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
2159 		if (!skb2) {
2160 			read_unlock(&mrt_lock);
2161 			return -ENOMEM;
2162 		}
2163 
2164 		skb_reset_transport_header(skb2);
2165 
2166 		skb_put(skb2, sizeof(struct ipv6hdr));
2167 		skb_reset_network_header(skb2);
2168 
2169 		iph = ipv6_hdr(skb2);
2170 		iph->version = 0;
2171 		iph->priority = 0;
2172 		iph->flow_lbl[0] = 0;
2173 		iph->flow_lbl[1] = 0;
2174 		iph->flow_lbl[2] = 0;
2175 		iph->payload_len = 0;
2176 		iph->nexthdr = IPPROTO_NONE;
2177 		iph->hop_limit = 0;
2178 		ipv6_addr_copy(&iph->saddr, &rt->rt6i_src.addr);
2179 		ipv6_addr_copy(&iph->daddr, &rt->rt6i_dst.addr);
2180 
2181 		err = ip6mr_cache_unresolved(mrt, vif, skb2);
2182 		read_unlock(&mrt_lock);
2183 
2184 		return err;
2185 	}
2186 
2187 	if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
2188 		cache->mfc_flags |= MFC_NOTIFY;
2189 
2190 	err = __ip6mr_fill_mroute(mrt, skb, cache, rtm);
2191 	read_unlock(&mrt_lock);
2192 	return err;
2193 }
2194 
2195 static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2196 			     u32 pid, u32 seq, struct mfc6_cache *c)
2197 {
2198 	struct nlmsghdr *nlh;
2199 	struct rtmsg *rtm;
2200 
2201 	nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI);
2202 	if (nlh == NULL)
2203 		return -EMSGSIZE;
2204 
2205 	rtm = nlmsg_data(nlh);
2206 	rtm->rtm_family   = RTNL_FAMILY_IPMR;
2207 	rtm->rtm_dst_len  = 128;
2208 	rtm->rtm_src_len  = 128;
2209 	rtm->rtm_tos      = 0;
2210 	rtm->rtm_table    = mrt->id;
2211 	NLA_PUT_U32(skb, RTA_TABLE, mrt->id);
2212 	rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
2213 	rtm->rtm_protocol = RTPROT_UNSPEC;
2214 	rtm->rtm_flags    = 0;
2215 
2216 	NLA_PUT(skb, RTA_SRC, 16, &c->mf6c_origin);
2217 	NLA_PUT(skb, RTA_DST, 16, &c->mf6c_mcastgrp);
2218 
2219 	if (__ip6mr_fill_mroute(mrt, skb, c, rtm) < 0)
2220 		goto nla_put_failure;
2221 
2222 	return nlmsg_end(skb, nlh);
2223 
2224 nla_put_failure:
2225 	nlmsg_cancel(skb, nlh);
2226 	return -EMSGSIZE;
2227 }
2228 
2229 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2230 {
2231 	struct net *net = sock_net(skb->sk);
2232 	struct mr6_table *mrt;
2233 	struct mfc6_cache *mfc;
2234 	unsigned int t = 0, s_t;
2235 	unsigned int h = 0, s_h;
2236 	unsigned int e = 0, s_e;
2237 
2238 	s_t = cb->args[0];
2239 	s_h = cb->args[1];
2240 	s_e = cb->args[2];
2241 
2242 	read_lock(&mrt_lock);
2243 	ip6mr_for_each_table(mrt, net) {
2244 		if (t < s_t)
2245 			goto next_table;
2246 		if (t > s_t)
2247 			s_h = 0;
2248 		for (h = s_h; h < MFC6_LINES; h++) {
2249 			list_for_each_entry(mfc, &mrt->mfc6_cache_array[h], list) {
2250 				if (e < s_e)
2251 					goto next_entry;
2252 				if (ip6mr_fill_mroute(mrt, skb,
2253 						      NETLINK_CB(cb->skb).pid,
2254 						      cb->nlh->nlmsg_seq,
2255 						      mfc) < 0)
2256 					goto done;
2257 next_entry:
2258 				e++;
2259 			}
2260 			e = s_e = 0;
2261 		}
2262 		s_h = 0;
2263 next_table:
2264 		t++;
2265 	}
2266 done:
2267 	read_unlock(&mrt_lock);
2268 
2269 	cb->args[2] = e;
2270 	cb->args[1] = h;
2271 	cb->args[0] = t;
2272 
2273 	return skb->len;
2274 }
2275