xref: /openbmc/linux/net/ipv6/ip6mr.c (revision 8ac727c1)
1 /*
2  *	Linux IPv6 multicast routing support for BSD pim6sd
3  *	Based on net/ipv4/ipmr.c.
4  *
5  *	(c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
6  *		LSIIT Laboratory, Strasbourg, France
7  *	(c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
8  *		6WIND, Paris, France
9  *	Copyright (C)2007,2008 USAGI/WIDE Project
10  *		YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
11  *
12  *	This program is free software; you can redistribute it and/or
13  *	modify it under the terms of the GNU General Public License
14  *	as published by the Free Software Foundation; either version
15  *	2 of the License, or (at your option) any later version.
16  *
17  */
18 
19 #include <asm/system.h>
20 #include <asm/uaccess.h>
21 #include <linux/types.h>
22 #include <linux/sched.h>
23 #include <linux/errno.h>
24 #include <linux/timer.h>
25 #include <linux/mm.h>
26 #include <linux/kernel.h>
27 #include <linux/fcntl.h>
28 #include <linux/stat.h>
29 #include <linux/socket.h>
30 #include <linux/inet.h>
31 #include <linux/netdevice.h>
32 #include <linux/inetdevice.h>
33 #include <linux/proc_fs.h>
34 #include <linux/seq_file.h>
35 #include <linux/init.h>
36 #include <linux/slab.h>
37 #include <linux/compat.h>
38 #include <net/protocol.h>
39 #include <linux/skbuff.h>
40 #include <net/sock.h>
41 #include <net/raw.h>
42 #include <linux/notifier.h>
43 #include <linux/if_arp.h>
44 #include <net/checksum.h>
45 #include <net/netlink.h>
46 #include <net/fib_rules.h>
47 
48 #include <net/ipv6.h>
49 #include <net/ip6_route.h>
50 #include <linux/mroute6.h>
51 #include <linux/pim.h>
52 #include <net/addrconf.h>
53 #include <linux/netfilter_ipv6.h>
54 #include <net/ip6_checksum.h>
55 
56 struct mr6_table {
57 	struct list_head	list;
58 #ifdef CONFIG_NET_NS
59 	struct net		*net;
60 #endif
61 	u32			id;
62 	struct sock		*mroute6_sk;
63 	struct timer_list	ipmr_expire_timer;
64 	struct list_head	mfc6_unres_queue;
65 	struct list_head	mfc6_cache_array[MFC6_LINES];
66 	struct mif_device	vif6_table[MAXMIFS];
67 	int			maxvif;
68 	atomic_t		cache_resolve_queue_len;
69 	int			mroute_do_assert;
70 	int			mroute_do_pim;
71 #ifdef CONFIG_IPV6_PIMSM_V2
72 	int			mroute_reg_vif_num;
73 #endif
74 };
75 
76 struct ip6mr_rule {
77 	struct fib_rule		common;
78 };
79 
80 struct ip6mr_result {
81 	struct mr6_table	*mrt;
82 };
83 
84 /* Big lock, protecting vif table, mrt cache and mroute socket state.
85    Note that the changes are semaphored via rtnl_lock.
86  */
87 
88 static DEFINE_RWLOCK(mrt_lock);
89 
90 /*
91  *	Multicast router control variables
92  */
93 
94 #define MIF_EXISTS(_mrt, _idx) ((_mrt)->vif6_table[_idx].dev != NULL)
95 
96 /* Special spinlock for queue of unresolved entries */
97 static DEFINE_SPINLOCK(mfc_unres_lock);
98 
99 /* We return to original Alan's scheme. Hash table of resolved
100    entries is changed only in process context and protected
101    with weak lock mrt_lock. Queue of unresolved entries is protected
102    with strong spinlock mfc_unres_lock.
103 
104    In this case data path is free of exclusive locks at all.
105  */
106 
107 static struct kmem_cache *mrt_cachep __read_mostly;
108 
109 static struct mr6_table *ip6mr_new_table(struct net *net, u32 id);
110 static void ip6mr_free_table(struct mr6_table *mrt);
111 
112 static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
113 			  struct sk_buff *skb, struct mfc6_cache *cache);
114 static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
115 			      mifi_t mifi, int assert);
116 static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
117 			       struct mfc6_cache *c, struct rtmsg *rtm);
118 static int ip6mr_rtm_dumproute(struct sk_buff *skb,
119 			       struct netlink_callback *cb);
120 static void mroute_clean_tables(struct mr6_table *mrt);
121 static void ipmr_expire_process(unsigned long arg);
122 
123 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
124 #define ip6mr_for_each_table(mrt, net) \
125 	list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
126 
127 static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
128 {
129 	struct mr6_table *mrt;
130 
131 	ip6mr_for_each_table(mrt, net) {
132 		if (mrt->id == id)
133 			return mrt;
134 	}
135 	return NULL;
136 }
137 
138 static int ip6mr_fib_lookup(struct net *net, struct flowi *flp,
139 			    struct mr6_table **mrt)
140 {
141 	struct ip6mr_result res;
142 	struct fib_lookup_arg arg = { .result = &res, };
143 	int err;
144 
145 	err = fib_rules_lookup(net->ipv6.mr6_rules_ops, flp, 0, &arg);
146 	if (err < 0)
147 		return err;
148 	*mrt = res.mrt;
149 	return 0;
150 }
151 
152 static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
153 			     int flags, struct fib_lookup_arg *arg)
154 {
155 	struct ip6mr_result *res = arg->result;
156 	struct mr6_table *mrt;
157 
158 	switch (rule->action) {
159 	case FR_ACT_TO_TBL:
160 		break;
161 	case FR_ACT_UNREACHABLE:
162 		return -ENETUNREACH;
163 	case FR_ACT_PROHIBIT:
164 		return -EACCES;
165 	case FR_ACT_BLACKHOLE:
166 	default:
167 		return -EINVAL;
168 	}
169 
170 	mrt = ip6mr_get_table(rule->fr_net, rule->table);
171 	if (mrt == NULL)
172 		return -EAGAIN;
173 	res->mrt = mrt;
174 	return 0;
175 }
176 
177 static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
178 {
179 	return 1;
180 }
181 
182 static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = {
183 	FRA_GENERIC_POLICY,
184 };
185 
186 static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
187 				struct fib_rule_hdr *frh, struct nlattr **tb)
188 {
189 	return 0;
190 }
191 
192 static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
193 			      struct nlattr **tb)
194 {
195 	return 1;
196 }
197 
198 static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
199 			   struct fib_rule_hdr *frh)
200 {
201 	frh->dst_len = 0;
202 	frh->src_len = 0;
203 	frh->tos     = 0;
204 	return 0;
205 }
206 
207 static const struct fib_rules_ops __net_initdata ip6mr_rules_ops_template = {
208 	.family		= RTNL_FAMILY_IP6MR,
209 	.rule_size	= sizeof(struct ip6mr_rule),
210 	.addr_size	= sizeof(struct in6_addr),
211 	.action		= ip6mr_rule_action,
212 	.match		= ip6mr_rule_match,
213 	.configure	= ip6mr_rule_configure,
214 	.compare	= ip6mr_rule_compare,
215 	.default_pref	= fib_default_rule_pref,
216 	.fill		= ip6mr_rule_fill,
217 	.nlgroup	= RTNLGRP_IPV6_RULE,
218 	.policy		= ip6mr_rule_policy,
219 	.owner		= THIS_MODULE,
220 };
221 
222 static int __net_init ip6mr_rules_init(struct net *net)
223 {
224 	struct fib_rules_ops *ops;
225 	struct mr6_table *mrt;
226 	int err;
227 
228 	ops = fib_rules_register(&ip6mr_rules_ops_template, net);
229 	if (IS_ERR(ops))
230 		return PTR_ERR(ops);
231 
232 	INIT_LIST_HEAD(&net->ipv6.mr6_tables);
233 
234 	mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
235 	if (mrt == NULL) {
236 		err = -ENOMEM;
237 		goto err1;
238 	}
239 
240 	err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
241 	if (err < 0)
242 		goto err2;
243 
244 	net->ipv6.mr6_rules_ops = ops;
245 	return 0;
246 
247 err2:
248 	kfree(mrt);
249 err1:
250 	fib_rules_unregister(ops);
251 	return err;
252 }
253 
254 static void __net_exit ip6mr_rules_exit(struct net *net)
255 {
256 	struct mr6_table *mrt, *next;
257 
258 	list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
259 		list_del(&mrt->list);
260 		ip6mr_free_table(mrt);
261 	}
262 	fib_rules_unregister(net->ipv6.mr6_rules_ops);
263 }
264 #else
265 #define ip6mr_for_each_table(mrt, net) \
266 	for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
267 
268 static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
269 {
270 	return net->ipv6.mrt6;
271 }
272 
273 static int ip6mr_fib_lookup(struct net *net, struct flowi *flp,
274 			    struct mr6_table **mrt)
275 {
276 	*mrt = net->ipv6.mrt6;
277 	return 0;
278 }
279 
280 static int __net_init ip6mr_rules_init(struct net *net)
281 {
282 	net->ipv6.mrt6 = ip6mr_new_table(net, RT6_TABLE_DFLT);
283 	return net->ipv6.mrt6 ? 0 : -ENOMEM;
284 }
285 
286 static void __net_exit ip6mr_rules_exit(struct net *net)
287 {
288 	ip6mr_free_table(net->ipv6.mrt6);
289 }
290 #endif
291 
292 static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
293 {
294 	struct mr6_table *mrt;
295 	unsigned int i;
296 
297 	mrt = ip6mr_get_table(net, id);
298 	if (mrt != NULL)
299 		return mrt;
300 
301 	mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
302 	if (mrt == NULL)
303 		return NULL;
304 	mrt->id = id;
305 	write_pnet(&mrt->net, net);
306 
307 	/* Forwarding cache */
308 	for (i = 0; i < MFC6_LINES; i++)
309 		INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]);
310 
311 	INIT_LIST_HEAD(&mrt->mfc6_unres_queue);
312 
313 	setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
314 		    (unsigned long)mrt);
315 
316 #ifdef CONFIG_IPV6_PIMSM_V2
317 	mrt->mroute_reg_vif_num = -1;
318 #endif
319 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
320 	list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
321 #endif
322 	return mrt;
323 }
324 
325 static void ip6mr_free_table(struct mr6_table *mrt)
326 {
327 	del_timer(&mrt->ipmr_expire_timer);
328 	mroute_clean_tables(mrt);
329 	kfree(mrt);
330 }
331 
332 #ifdef CONFIG_PROC_FS
333 
334 struct ipmr_mfc_iter {
335 	struct seq_net_private p;
336 	struct mr6_table *mrt;
337 	struct list_head *cache;
338 	int ct;
339 };
340 
341 
342 static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
343 					   struct ipmr_mfc_iter *it, loff_t pos)
344 {
345 	struct mr6_table *mrt = it->mrt;
346 	struct mfc6_cache *mfc;
347 
348 	read_lock(&mrt_lock);
349 	for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) {
350 		it->cache = &mrt->mfc6_cache_array[it->ct];
351 		list_for_each_entry(mfc, it->cache, list)
352 			if (pos-- == 0)
353 				return mfc;
354 	}
355 	read_unlock(&mrt_lock);
356 
357 	spin_lock_bh(&mfc_unres_lock);
358 	it->cache = &mrt->mfc6_unres_queue;
359 	list_for_each_entry(mfc, it->cache, list)
360 		if (pos-- == 0)
361 			return mfc;
362 	spin_unlock_bh(&mfc_unres_lock);
363 
364 	it->cache = NULL;
365 	return NULL;
366 }
367 
368 /*
369  *	The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
370  */
371 
372 struct ipmr_vif_iter {
373 	struct seq_net_private p;
374 	struct mr6_table *mrt;
375 	int ct;
376 };
377 
378 static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
379 					    struct ipmr_vif_iter *iter,
380 					    loff_t pos)
381 {
382 	struct mr6_table *mrt = iter->mrt;
383 
384 	for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
385 		if (!MIF_EXISTS(mrt, iter->ct))
386 			continue;
387 		if (pos-- == 0)
388 			return &mrt->vif6_table[iter->ct];
389 	}
390 	return NULL;
391 }
392 
393 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
394 	__acquires(mrt_lock)
395 {
396 	struct ipmr_vif_iter *iter = seq->private;
397 	struct net *net = seq_file_net(seq);
398 	struct mr6_table *mrt;
399 
400 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
401 	if (mrt == NULL)
402 		return ERR_PTR(-ENOENT);
403 
404 	iter->mrt = mrt;
405 
406 	read_lock(&mrt_lock);
407 	return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1)
408 		: SEQ_START_TOKEN;
409 }
410 
411 static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
412 {
413 	struct ipmr_vif_iter *iter = seq->private;
414 	struct net *net = seq_file_net(seq);
415 	struct mr6_table *mrt = iter->mrt;
416 
417 	++*pos;
418 	if (v == SEQ_START_TOKEN)
419 		return ip6mr_vif_seq_idx(net, iter, 0);
420 
421 	while (++iter->ct < mrt->maxvif) {
422 		if (!MIF_EXISTS(mrt, iter->ct))
423 			continue;
424 		return &mrt->vif6_table[iter->ct];
425 	}
426 	return NULL;
427 }
428 
429 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
430 	__releases(mrt_lock)
431 {
432 	read_unlock(&mrt_lock);
433 }
434 
435 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
436 {
437 	struct ipmr_vif_iter *iter = seq->private;
438 	struct mr6_table *mrt = iter->mrt;
439 
440 	if (v == SEQ_START_TOKEN) {
441 		seq_puts(seq,
442 			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
443 	} else {
444 		const struct mif_device *vif = v;
445 		const char *name = vif->dev ? vif->dev->name : "none";
446 
447 		seq_printf(seq,
448 			   "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
449 			   vif - mrt->vif6_table,
450 			   name, vif->bytes_in, vif->pkt_in,
451 			   vif->bytes_out, vif->pkt_out,
452 			   vif->flags);
453 	}
454 	return 0;
455 }
456 
457 static const struct seq_operations ip6mr_vif_seq_ops = {
458 	.start = ip6mr_vif_seq_start,
459 	.next  = ip6mr_vif_seq_next,
460 	.stop  = ip6mr_vif_seq_stop,
461 	.show  = ip6mr_vif_seq_show,
462 };
463 
464 static int ip6mr_vif_open(struct inode *inode, struct file *file)
465 {
466 	return seq_open_net(inode, file, &ip6mr_vif_seq_ops,
467 			    sizeof(struct ipmr_vif_iter));
468 }
469 
470 static const struct file_operations ip6mr_vif_fops = {
471 	.owner	 = THIS_MODULE,
472 	.open    = ip6mr_vif_open,
473 	.read    = seq_read,
474 	.llseek  = seq_lseek,
475 	.release = seq_release_net,
476 };
477 
478 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
479 {
480 	struct ipmr_mfc_iter *it = seq->private;
481 	struct net *net = seq_file_net(seq);
482 	struct mr6_table *mrt;
483 
484 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
485 	if (mrt == NULL)
486 		return ERR_PTR(-ENOENT);
487 
488 	it->mrt = mrt;
489 	return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
490 		: SEQ_START_TOKEN;
491 }
492 
493 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
494 {
495 	struct mfc6_cache *mfc = v;
496 	struct ipmr_mfc_iter *it = seq->private;
497 	struct net *net = seq_file_net(seq);
498 	struct mr6_table *mrt = it->mrt;
499 
500 	++*pos;
501 
502 	if (v == SEQ_START_TOKEN)
503 		return ipmr_mfc_seq_idx(net, seq->private, 0);
504 
505 	if (mfc->list.next != it->cache)
506 		return list_entry(mfc->list.next, struct mfc6_cache, list);
507 
508 	if (it->cache == &mrt->mfc6_unres_queue)
509 		goto end_of_list;
510 
511 	BUG_ON(it->cache != &mrt->mfc6_cache_array[it->ct]);
512 
513 	while (++it->ct < MFC6_LINES) {
514 		it->cache = &mrt->mfc6_cache_array[it->ct];
515 		if (list_empty(it->cache))
516 			continue;
517 		return list_first_entry(it->cache, struct mfc6_cache, list);
518 	}
519 
520 	/* exhausted cache_array, show unresolved */
521 	read_unlock(&mrt_lock);
522 	it->cache = &mrt->mfc6_unres_queue;
523 	it->ct = 0;
524 
525 	spin_lock_bh(&mfc_unres_lock);
526 	if (!list_empty(it->cache))
527 		return list_first_entry(it->cache, struct mfc6_cache, list);
528 
529  end_of_list:
530 	spin_unlock_bh(&mfc_unres_lock);
531 	it->cache = NULL;
532 
533 	return NULL;
534 }
535 
536 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
537 {
538 	struct ipmr_mfc_iter *it = seq->private;
539 	struct mr6_table *mrt = it->mrt;
540 
541 	if (it->cache == &mrt->mfc6_unres_queue)
542 		spin_unlock_bh(&mfc_unres_lock);
543 	else if (it->cache == mrt->mfc6_cache_array)
544 		read_unlock(&mrt_lock);
545 }
546 
547 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
548 {
549 	int n;
550 
551 	if (v == SEQ_START_TOKEN) {
552 		seq_puts(seq,
553 			 "Group                            "
554 			 "Origin                           "
555 			 "Iif      Pkts  Bytes     Wrong  Oifs\n");
556 	} else {
557 		const struct mfc6_cache *mfc = v;
558 		const struct ipmr_mfc_iter *it = seq->private;
559 		struct mr6_table *mrt = it->mrt;
560 
561 		seq_printf(seq, "%pI6 %pI6 %-3hd",
562 			   &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
563 			   mfc->mf6c_parent);
564 
565 		if (it->cache != &mrt->mfc6_unres_queue) {
566 			seq_printf(seq, " %8lu %8lu %8lu",
567 				   mfc->mfc_un.res.pkt,
568 				   mfc->mfc_un.res.bytes,
569 				   mfc->mfc_un.res.wrong_if);
570 			for (n = mfc->mfc_un.res.minvif;
571 			     n < mfc->mfc_un.res.maxvif; n++) {
572 				if (MIF_EXISTS(mrt, n) &&
573 				    mfc->mfc_un.res.ttls[n] < 255)
574 					seq_printf(seq,
575 						   " %2d:%-3d",
576 						   n, mfc->mfc_un.res.ttls[n]);
577 			}
578 		} else {
579 			/* unresolved mfc_caches don't contain
580 			 * pkt, bytes and wrong_if values
581 			 */
582 			seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
583 		}
584 		seq_putc(seq, '\n');
585 	}
586 	return 0;
587 }
588 
589 static const struct seq_operations ipmr_mfc_seq_ops = {
590 	.start = ipmr_mfc_seq_start,
591 	.next  = ipmr_mfc_seq_next,
592 	.stop  = ipmr_mfc_seq_stop,
593 	.show  = ipmr_mfc_seq_show,
594 };
595 
596 static int ipmr_mfc_open(struct inode *inode, struct file *file)
597 {
598 	return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
599 			    sizeof(struct ipmr_mfc_iter));
600 }
601 
602 static const struct file_operations ip6mr_mfc_fops = {
603 	.owner	 = THIS_MODULE,
604 	.open    = ipmr_mfc_open,
605 	.read    = seq_read,
606 	.llseek  = seq_lseek,
607 	.release = seq_release_net,
608 };
609 #endif
610 
611 #ifdef CONFIG_IPV6_PIMSM_V2
612 
613 static int pim6_rcv(struct sk_buff *skb)
614 {
615 	struct pimreghdr *pim;
616 	struct ipv6hdr   *encap;
617 	struct net_device  *reg_dev = NULL;
618 	struct net *net = dev_net(skb->dev);
619 	struct mr6_table *mrt;
620 	struct flowi fl = {
621 		.iif	= skb->dev->ifindex,
622 		.mark	= skb->mark,
623 	};
624 	int reg_vif_num;
625 
626 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
627 		goto drop;
628 
629 	pim = (struct pimreghdr *)skb_transport_header(skb);
630 	if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
631 	    (pim->flags & PIM_NULL_REGISTER) ||
632 	    (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
633 			     sizeof(*pim), IPPROTO_PIM,
634 			     csum_partial((void *)pim, sizeof(*pim), 0)) &&
635 	     csum_fold(skb_checksum(skb, 0, skb->len, 0))))
636 		goto drop;
637 
638 	/* check if the inner packet is destined to mcast group */
639 	encap = (struct ipv6hdr *)(skb_transport_header(skb) +
640 				   sizeof(*pim));
641 
642 	if (!ipv6_addr_is_multicast(&encap->daddr) ||
643 	    encap->payload_len == 0 ||
644 	    ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
645 		goto drop;
646 
647 	if (ip6mr_fib_lookup(net, &fl, &mrt) < 0)
648 		goto drop;
649 	reg_vif_num = mrt->mroute_reg_vif_num;
650 
651 	read_lock(&mrt_lock);
652 	if (reg_vif_num >= 0)
653 		reg_dev = mrt->vif6_table[reg_vif_num].dev;
654 	if (reg_dev)
655 		dev_hold(reg_dev);
656 	read_unlock(&mrt_lock);
657 
658 	if (reg_dev == NULL)
659 		goto drop;
660 
661 	skb->mac_header = skb->network_header;
662 	skb_pull(skb, (u8 *)encap - skb->data);
663 	skb_reset_network_header(skb);
664 	skb->protocol = htons(ETH_P_IPV6);
665 	skb->ip_summed = 0;
666 	skb->pkt_type = PACKET_HOST;
667 
668 	skb_tunnel_rx(skb, reg_dev);
669 
670 	netif_rx(skb);
671 
672 	dev_put(reg_dev);
673 	return 0;
674  drop:
675 	kfree_skb(skb);
676 	return 0;
677 }
678 
679 static const struct inet6_protocol pim6_protocol = {
680 	.handler	=	pim6_rcv,
681 };
682 
683 /* Service routines creating virtual interfaces: PIMREG */
684 
685 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
686 				      struct net_device *dev)
687 {
688 	struct net *net = dev_net(dev);
689 	struct mr6_table *mrt;
690 	struct flowi fl = {
691 		.oif		= dev->ifindex,
692 		.iif		= skb->skb_iif,
693 		.mark		= skb->mark,
694 	};
695 	int err;
696 
697 	err = ip6mr_fib_lookup(net, &fl, &mrt);
698 	if (err < 0)
699 		return err;
700 
701 	read_lock(&mrt_lock);
702 	dev->stats.tx_bytes += skb->len;
703 	dev->stats.tx_packets++;
704 	ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
705 	read_unlock(&mrt_lock);
706 	kfree_skb(skb);
707 	return NETDEV_TX_OK;
708 }
709 
710 static const struct net_device_ops reg_vif_netdev_ops = {
711 	.ndo_start_xmit	= reg_vif_xmit,
712 };
713 
714 static void reg_vif_setup(struct net_device *dev)
715 {
716 	dev->type		= ARPHRD_PIMREG;
717 	dev->mtu		= 1500 - sizeof(struct ipv6hdr) - 8;
718 	dev->flags		= IFF_NOARP;
719 	dev->netdev_ops		= &reg_vif_netdev_ops;
720 	dev->destructor		= free_netdev;
721 	dev->features		|= NETIF_F_NETNS_LOCAL;
722 }
723 
724 static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt)
725 {
726 	struct net_device *dev;
727 	char name[IFNAMSIZ];
728 
729 	if (mrt->id == RT6_TABLE_DFLT)
730 		sprintf(name, "pim6reg");
731 	else
732 		sprintf(name, "pim6reg%u", mrt->id);
733 
734 	dev = alloc_netdev(0, name, reg_vif_setup);
735 	if (dev == NULL)
736 		return NULL;
737 
738 	dev_net_set(dev, net);
739 
740 	if (register_netdevice(dev)) {
741 		free_netdev(dev);
742 		return NULL;
743 	}
744 	dev->iflink = 0;
745 
746 	if (dev_open(dev))
747 		goto failure;
748 
749 	dev_hold(dev);
750 	return dev;
751 
752 failure:
753 	/* allow the register to be completed before unregistering. */
754 	rtnl_unlock();
755 	rtnl_lock();
756 
757 	unregister_netdevice(dev);
758 	return NULL;
759 }
760 #endif
761 
762 /*
763  *	Delete a VIF entry
764  */
765 
766 static int mif6_delete(struct mr6_table *mrt, int vifi, struct list_head *head)
767 {
768 	struct mif_device *v;
769 	struct net_device *dev;
770 	struct inet6_dev *in6_dev;
771 
772 	if (vifi < 0 || vifi >= mrt->maxvif)
773 		return -EADDRNOTAVAIL;
774 
775 	v = &mrt->vif6_table[vifi];
776 
777 	write_lock_bh(&mrt_lock);
778 	dev = v->dev;
779 	v->dev = NULL;
780 
781 	if (!dev) {
782 		write_unlock_bh(&mrt_lock);
783 		return -EADDRNOTAVAIL;
784 	}
785 
786 #ifdef CONFIG_IPV6_PIMSM_V2
787 	if (vifi == mrt->mroute_reg_vif_num)
788 		mrt->mroute_reg_vif_num = -1;
789 #endif
790 
791 	if (vifi + 1 == mrt->maxvif) {
792 		int tmp;
793 		for (tmp = vifi - 1; tmp >= 0; tmp--) {
794 			if (MIF_EXISTS(mrt, tmp))
795 				break;
796 		}
797 		mrt->maxvif = tmp + 1;
798 	}
799 
800 	write_unlock_bh(&mrt_lock);
801 
802 	dev_set_allmulti(dev, -1);
803 
804 	in6_dev = __in6_dev_get(dev);
805 	if (in6_dev)
806 		in6_dev->cnf.mc_forwarding--;
807 
808 	if (v->flags & MIFF_REGISTER)
809 		unregister_netdevice_queue(dev, head);
810 
811 	dev_put(dev);
812 	return 0;
813 }
814 
815 static inline void ip6mr_cache_free(struct mfc6_cache *c)
816 {
817 	kmem_cache_free(mrt_cachep, c);
818 }
819 
820 /* Destroy an unresolved cache entry, killing queued skbs
821    and reporting error to netlink readers.
822  */
823 
824 static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
825 {
826 	struct net *net = read_pnet(&mrt->net);
827 	struct sk_buff *skb;
828 
829 	atomic_dec(&mrt->cache_resolve_queue_len);
830 
831 	while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
832 		if (ipv6_hdr(skb)->version == 0) {
833 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
834 			nlh->nlmsg_type = NLMSG_ERROR;
835 			nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
836 			skb_trim(skb, nlh->nlmsg_len);
837 			((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
838 			rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
839 		} else
840 			kfree_skb(skb);
841 	}
842 
843 	ip6mr_cache_free(c);
844 }
845 
846 
847 /* Timer process for all the unresolved queue. */
848 
849 static void ipmr_do_expire_process(struct mr6_table *mrt)
850 {
851 	unsigned long now = jiffies;
852 	unsigned long expires = 10 * HZ;
853 	struct mfc6_cache *c, *next;
854 
855 	list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
856 		if (time_after(c->mfc_un.unres.expires, now)) {
857 			/* not yet... */
858 			unsigned long interval = c->mfc_un.unres.expires - now;
859 			if (interval < expires)
860 				expires = interval;
861 			continue;
862 		}
863 
864 		list_del(&c->list);
865 		ip6mr_destroy_unres(mrt, c);
866 	}
867 
868 	if (!list_empty(&mrt->mfc6_unres_queue))
869 		mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
870 }
871 
872 static void ipmr_expire_process(unsigned long arg)
873 {
874 	struct mr6_table *mrt = (struct mr6_table *)arg;
875 
876 	if (!spin_trylock(&mfc_unres_lock)) {
877 		mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
878 		return;
879 	}
880 
881 	if (!list_empty(&mrt->mfc6_unres_queue))
882 		ipmr_do_expire_process(mrt);
883 
884 	spin_unlock(&mfc_unres_lock);
885 }
886 
887 /* Fill oifs list. It is called under write locked mrt_lock. */
888 
889 static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *cache,
890 				    unsigned char *ttls)
891 {
892 	int vifi;
893 
894 	cache->mfc_un.res.minvif = MAXMIFS;
895 	cache->mfc_un.res.maxvif = 0;
896 	memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
897 
898 	for (vifi = 0; vifi < mrt->maxvif; vifi++) {
899 		if (MIF_EXISTS(mrt, vifi) &&
900 		    ttls[vifi] && ttls[vifi] < 255) {
901 			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
902 			if (cache->mfc_un.res.minvif > vifi)
903 				cache->mfc_un.res.minvif = vifi;
904 			if (cache->mfc_un.res.maxvif <= vifi)
905 				cache->mfc_un.res.maxvif = vifi + 1;
906 		}
907 	}
908 }
909 
910 static int mif6_add(struct net *net, struct mr6_table *mrt,
911 		    struct mif6ctl *vifc, int mrtsock)
912 {
913 	int vifi = vifc->mif6c_mifi;
914 	struct mif_device *v = &mrt->vif6_table[vifi];
915 	struct net_device *dev;
916 	struct inet6_dev *in6_dev;
917 	int err;
918 
919 	/* Is vif busy ? */
920 	if (MIF_EXISTS(mrt, vifi))
921 		return -EADDRINUSE;
922 
923 	switch (vifc->mif6c_flags) {
924 #ifdef CONFIG_IPV6_PIMSM_V2
925 	case MIFF_REGISTER:
926 		/*
927 		 * Special Purpose VIF in PIM
928 		 * All the packets will be sent to the daemon
929 		 */
930 		if (mrt->mroute_reg_vif_num >= 0)
931 			return -EADDRINUSE;
932 		dev = ip6mr_reg_vif(net, mrt);
933 		if (!dev)
934 			return -ENOBUFS;
935 		err = dev_set_allmulti(dev, 1);
936 		if (err) {
937 			unregister_netdevice(dev);
938 			dev_put(dev);
939 			return err;
940 		}
941 		break;
942 #endif
943 	case 0:
944 		dev = dev_get_by_index(net, vifc->mif6c_pifi);
945 		if (!dev)
946 			return -EADDRNOTAVAIL;
947 		err = dev_set_allmulti(dev, 1);
948 		if (err) {
949 			dev_put(dev);
950 			return err;
951 		}
952 		break;
953 	default:
954 		return -EINVAL;
955 	}
956 
957 	in6_dev = __in6_dev_get(dev);
958 	if (in6_dev)
959 		in6_dev->cnf.mc_forwarding++;
960 
961 	/*
962 	 *	Fill in the VIF structures
963 	 */
964 	v->rate_limit = vifc->vifc_rate_limit;
965 	v->flags = vifc->mif6c_flags;
966 	if (!mrtsock)
967 		v->flags |= VIFF_STATIC;
968 	v->threshold = vifc->vifc_threshold;
969 	v->bytes_in = 0;
970 	v->bytes_out = 0;
971 	v->pkt_in = 0;
972 	v->pkt_out = 0;
973 	v->link = dev->ifindex;
974 	if (v->flags & MIFF_REGISTER)
975 		v->link = dev->iflink;
976 
977 	/* And finish update writing critical data */
978 	write_lock_bh(&mrt_lock);
979 	v->dev = dev;
980 #ifdef CONFIG_IPV6_PIMSM_V2
981 	if (v->flags & MIFF_REGISTER)
982 		mrt->mroute_reg_vif_num = vifi;
983 #endif
984 	if (vifi + 1 > mrt->maxvif)
985 		mrt->maxvif = vifi + 1;
986 	write_unlock_bh(&mrt_lock);
987 	return 0;
988 }
989 
990 static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt,
991 					   struct in6_addr *origin,
992 					   struct in6_addr *mcastgrp)
993 {
994 	int line = MFC6_HASH(mcastgrp, origin);
995 	struct mfc6_cache *c;
996 
997 	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
998 		if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
999 		    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
1000 			return c;
1001 	}
1002 	return NULL;
1003 }
1004 
1005 /*
1006  *	Allocate a multicast cache entry
1007  */
1008 static struct mfc6_cache *ip6mr_cache_alloc(void)
1009 {
1010 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
1011 	if (c == NULL)
1012 		return NULL;
1013 	c->mfc_un.res.minvif = MAXMIFS;
1014 	return c;
1015 }
1016 
1017 static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
1018 {
1019 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
1020 	if (c == NULL)
1021 		return NULL;
1022 	skb_queue_head_init(&c->mfc_un.unres.unresolved);
1023 	c->mfc_un.unres.expires = jiffies + 10 * HZ;
1024 	return c;
1025 }
1026 
1027 /*
1028  *	A cache entry has gone into a resolved state from queued
1029  */
1030 
1031 static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
1032 				struct mfc6_cache *uc, struct mfc6_cache *c)
1033 {
1034 	struct sk_buff *skb;
1035 
1036 	/*
1037 	 *	Play the pending entries through our router
1038 	 */
1039 
1040 	while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
1041 		if (ipv6_hdr(skb)->version == 0) {
1042 			int err;
1043 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
1044 
1045 			if (__ip6mr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
1046 				nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1047 			} else {
1048 				nlh->nlmsg_type = NLMSG_ERROR;
1049 				nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
1050 				skb_trim(skb, nlh->nlmsg_len);
1051 				((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE;
1052 			}
1053 			err = rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
1054 		} else
1055 			ip6_mr_forward(net, mrt, skb, c);
1056 	}
1057 }
1058 
1059 /*
1060  *	Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
1061  *	expects the following bizarre scheme.
1062  *
1063  *	Called under mrt_lock.
1064  */
1065 
1066 static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
1067 			      mifi_t mifi, int assert)
1068 {
1069 	struct sk_buff *skb;
1070 	struct mrt6msg *msg;
1071 	int ret;
1072 
1073 #ifdef CONFIG_IPV6_PIMSM_V2
1074 	if (assert == MRT6MSG_WHOLEPKT)
1075 		skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
1076 						+sizeof(*msg));
1077 	else
1078 #endif
1079 		skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
1080 
1081 	if (!skb)
1082 		return -ENOBUFS;
1083 
1084 	/* I suppose that internal messages
1085 	 * do not require checksums */
1086 
1087 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1088 
1089 #ifdef CONFIG_IPV6_PIMSM_V2
1090 	if (assert == MRT6MSG_WHOLEPKT) {
1091 		/* Ugly, but we have no choice with this interface.
1092 		   Duplicate old header, fix length etc.
1093 		   And all this only to mangle msg->im6_msgtype and
1094 		   to set msg->im6_mbz to "mbz" :-)
1095 		 */
1096 		skb_push(skb, -skb_network_offset(pkt));
1097 
1098 		skb_push(skb, sizeof(*msg));
1099 		skb_reset_transport_header(skb);
1100 		msg = (struct mrt6msg *)skb_transport_header(skb);
1101 		msg->im6_mbz = 0;
1102 		msg->im6_msgtype = MRT6MSG_WHOLEPKT;
1103 		msg->im6_mif = mrt->mroute_reg_vif_num;
1104 		msg->im6_pad = 0;
1105 		ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
1106 		ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
1107 
1108 		skb->ip_summed = CHECKSUM_UNNECESSARY;
1109 	} else
1110 #endif
1111 	{
1112 	/*
1113 	 *	Copy the IP header
1114 	 */
1115 
1116 	skb_put(skb, sizeof(struct ipv6hdr));
1117 	skb_reset_network_header(skb);
1118 	skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
1119 
1120 	/*
1121 	 *	Add our header
1122 	 */
1123 	skb_put(skb, sizeof(*msg));
1124 	skb_reset_transport_header(skb);
1125 	msg = (struct mrt6msg *)skb_transport_header(skb);
1126 
1127 	msg->im6_mbz = 0;
1128 	msg->im6_msgtype = assert;
1129 	msg->im6_mif = mifi;
1130 	msg->im6_pad = 0;
1131 	ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
1132 	ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
1133 
1134 	skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1135 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1136 	}
1137 
1138 	if (mrt->mroute6_sk == NULL) {
1139 		kfree_skb(skb);
1140 		return -EINVAL;
1141 	}
1142 
1143 	/*
1144 	 *	Deliver to user space multicast routing algorithms
1145 	 */
1146 	ret = sock_queue_rcv_skb(mrt->mroute6_sk, skb);
1147 	if (ret < 0) {
1148 		if (net_ratelimit())
1149 			printk(KERN_WARNING "mroute6: pending queue full, dropping entries.\n");
1150 		kfree_skb(skb);
1151 	}
1152 
1153 	return ret;
1154 }
1155 
1156 /*
1157  *	Queue a packet for resolution. It gets locked cache entry!
1158  */
1159 
1160 static int
1161 ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
1162 {
1163 	bool found = false;
1164 	int err;
1165 	struct mfc6_cache *c;
1166 
1167 	spin_lock_bh(&mfc_unres_lock);
1168 	list_for_each_entry(c, &mrt->mfc6_unres_queue, list) {
1169 		if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1170 		    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
1171 			found = true;
1172 			break;
1173 		}
1174 	}
1175 
1176 	if (!found) {
1177 		/*
1178 		 *	Create a new entry if allowable
1179 		 */
1180 
1181 		if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
1182 		    (c = ip6mr_cache_alloc_unres()) == NULL) {
1183 			spin_unlock_bh(&mfc_unres_lock);
1184 
1185 			kfree_skb(skb);
1186 			return -ENOBUFS;
1187 		}
1188 
1189 		/*
1190 		 *	Fill in the new cache entry
1191 		 */
1192 		c->mf6c_parent = -1;
1193 		c->mf6c_origin = ipv6_hdr(skb)->saddr;
1194 		c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
1195 
1196 		/*
1197 		 *	Reflect first query at pim6sd
1198 		 */
1199 		err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1200 		if (err < 0) {
1201 			/* If the report failed throw the cache entry
1202 			   out - Brad Parker
1203 			 */
1204 			spin_unlock_bh(&mfc_unres_lock);
1205 
1206 			ip6mr_cache_free(c);
1207 			kfree_skb(skb);
1208 			return err;
1209 		}
1210 
1211 		atomic_inc(&mrt->cache_resolve_queue_len);
1212 		list_add(&c->list, &mrt->mfc6_unres_queue);
1213 
1214 		ipmr_do_expire_process(mrt);
1215 	}
1216 
1217 	/*
1218 	 *	See if we can append the packet
1219 	 */
1220 	if (c->mfc_un.unres.unresolved.qlen > 3) {
1221 		kfree_skb(skb);
1222 		err = -ENOBUFS;
1223 	} else {
1224 		skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1225 		err = 0;
1226 	}
1227 
1228 	spin_unlock_bh(&mfc_unres_lock);
1229 	return err;
1230 }
1231 
1232 /*
1233  *	MFC6 cache manipulation by user space
1234  */
1235 
1236 static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc)
1237 {
1238 	int line;
1239 	struct mfc6_cache *c, *next;
1240 
1241 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1242 
1243 	list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) {
1244 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1245 		    ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
1246 			write_lock_bh(&mrt_lock);
1247 			list_del(&c->list);
1248 			write_unlock_bh(&mrt_lock);
1249 
1250 			ip6mr_cache_free(c);
1251 			return 0;
1252 		}
1253 	}
1254 	return -ENOENT;
1255 }
1256 
1257 static int ip6mr_device_event(struct notifier_block *this,
1258 			      unsigned long event, void *ptr)
1259 {
1260 	struct net_device *dev = ptr;
1261 	struct net *net = dev_net(dev);
1262 	struct mr6_table *mrt;
1263 	struct mif_device *v;
1264 	int ct;
1265 	LIST_HEAD(list);
1266 
1267 	if (event != NETDEV_UNREGISTER)
1268 		return NOTIFY_DONE;
1269 
1270 	ip6mr_for_each_table(mrt, net) {
1271 		v = &mrt->vif6_table[0];
1272 		for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1273 			if (v->dev == dev)
1274 				mif6_delete(mrt, ct, &list);
1275 		}
1276 	}
1277 	unregister_netdevice_many(&list);
1278 
1279 	return NOTIFY_DONE;
1280 }
1281 
1282 static struct notifier_block ip6_mr_notifier = {
1283 	.notifier_call = ip6mr_device_event
1284 };
1285 
1286 /*
1287  *	Setup for IP multicast routing
1288  */
1289 
1290 static int __net_init ip6mr_net_init(struct net *net)
1291 {
1292 	int err;
1293 
1294 	err = ip6mr_rules_init(net);
1295 	if (err < 0)
1296 		goto fail;
1297 
1298 #ifdef CONFIG_PROC_FS
1299 	err = -ENOMEM;
1300 	if (!proc_net_fops_create(net, "ip6_mr_vif", 0, &ip6mr_vif_fops))
1301 		goto proc_vif_fail;
1302 	if (!proc_net_fops_create(net, "ip6_mr_cache", 0, &ip6mr_mfc_fops))
1303 		goto proc_cache_fail;
1304 #endif
1305 
1306 	return 0;
1307 
1308 #ifdef CONFIG_PROC_FS
1309 proc_cache_fail:
1310 	proc_net_remove(net, "ip6_mr_vif");
1311 proc_vif_fail:
1312 	ip6mr_rules_exit(net);
1313 #endif
1314 fail:
1315 	return err;
1316 }
1317 
1318 static void __net_exit ip6mr_net_exit(struct net *net)
1319 {
1320 #ifdef CONFIG_PROC_FS
1321 	proc_net_remove(net, "ip6_mr_cache");
1322 	proc_net_remove(net, "ip6_mr_vif");
1323 #endif
1324 	ip6mr_rules_exit(net);
1325 }
1326 
1327 static struct pernet_operations ip6mr_net_ops = {
1328 	.init = ip6mr_net_init,
1329 	.exit = ip6mr_net_exit,
1330 };
1331 
1332 int __init ip6_mr_init(void)
1333 {
1334 	int err;
1335 
1336 	mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1337 				       sizeof(struct mfc6_cache),
1338 				       0, SLAB_HWCACHE_ALIGN,
1339 				       NULL);
1340 	if (!mrt_cachep)
1341 		return -ENOMEM;
1342 
1343 	err = register_pernet_subsys(&ip6mr_net_ops);
1344 	if (err)
1345 		goto reg_pernet_fail;
1346 
1347 	err = register_netdevice_notifier(&ip6_mr_notifier);
1348 	if (err)
1349 		goto reg_notif_fail;
1350 #ifdef CONFIG_IPV6_PIMSM_V2
1351 	if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1352 		printk(KERN_ERR "ip6_mr_init: can't add PIM protocol\n");
1353 		err = -EAGAIN;
1354 		goto add_proto_fail;
1355 	}
1356 #endif
1357 	rtnl_register(RTNL_FAMILY_IP6MR, RTM_GETROUTE, NULL, ip6mr_rtm_dumproute);
1358 	return 0;
1359 #ifdef CONFIG_IPV6_PIMSM_V2
1360 add_proto_fail:
1361 	unregister_netdevice_notifier(&ip6_mr_notifier);
1362 #endif
1363 reg_notif_fail:
1364 	unregister_pernet_subsys(&ip6mr_net_ops);
1365 reg_pernet_fail:
1366 	kmem_cache_destroy(mrt_cachep);
1367 	return err;
1368 }
1369 
1370 void ip6_mr_cleanup(void)
1371 {
1372 	unregister_netdevice_notifier(&ip6_mr_notifier);
1373 	unregister_pernet_subsys(&ip6mr_net_ops);
1374 	kmem_cache_destroy(mrt_cachep);
1375 }
1376 
1377 static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
1378 			 struct mf6cctl *mfc, int mrtsock)
1379 {
1380 	bool found = false;
1381 	int line;
1382 	struct mfc6_cache *uc, *c;
1383 	unsigned char ttls[MAXMIFS];
1384 	int i;
1385 
1386 	if (mfc->mf6cc_parent >= MAXMIFS)
1387 		return -ENFILE;
1388 
1389 	memset(ttls, 255, MAXMIFS);
1390 	for (i = 0; i < MAXMIFS; i++) {
1391 		if (IF_ISSET(i, &mfc->mf6cc_ifset))
1392 			ttls[i] = 1;
1393 
1394 	}
1395 
1396 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1397 
1398 	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
1399 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1400 		    ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
1401 			found = true;
1402 			break;
1403 		}
1404 	}
1405 
1406 	if (found) {
1407 		write_lock_bh(&mrt_lock);
1408 		c->mf6c_parent = mfc->mf6cc_parent;
1409 		ip6mr_update_thresholds(mrt, c, ttls);
1410 		if (!mrtsock)
1411 			c->mfc_flags |= MFC_STATIC;
1412 		write_unlock_bh(&mrt_lock);
1413 		return 0;
1414 	}
1415 
1416 	if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1417 		return -EINVAL;
1418 
1419 	c = ip6mr_cache_alloc();
1420 	if (c == NULL)
1421 		return -ENOMEM;
1422 
1423 	c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1424 	c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1425 	c->mf6c_parent = mfc->mf6cc_parent;
1426 	ip6mr_update_thresholds(mrt, c, ttls);
1427 	if (!mrtsock)
1428 		c->mfc_flags |= MFC_STATIC;
1429 
1430 	write_lock_bh(&mrt_lock);
1431 	list_add(&c->list, &mrt->mfc6_cache_array[line]);
1432 	write_unlock_bh(&mrt_lock);
1433 
1434 	/*
1435 	 *	Check to see if we resolved a queued list. If so we
1436 	 *	need to send on the frames and tidy up.
1437 	 */
1438 	found = false;
1439 	spin_lock_bh(&mfc_unres_lock);
1440 	list_for_each_entry(uc, &mrt->mfc6_unres_queue, list) {
1441 		if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1442 		    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1443 			list_del(&uc->list);
1444 			atomic_dec(&mrt->cache_resolve_queue_len);
1445 			found = true;
1446 			break;
1447 		}
1448 	}
1449 	if (list_empty(&mrt->mfc6_unres_queue))
1450 		del_timer(&mrt->ipmr_expire_timer);
1451 	spin_unlock_bh(&mfc_unres_lock);
1452 
1453 	if (found) {
1454 		ip6mr_cache_resolve(net, mrt, uc, c);
1455 		ip6mr_cache_free(uc);
1456 	}
1457 	return 0;
1458 }
1459 
1460 /*
1461  *	Close the multicast socket, and clear the vif tables etc
1462  */
1463 
1464 static void mroute_clean_tables(struct mr6_table *mrt)
1465 {
1466 	int i;
1467 	LIST_HEAD(list);
1468 	struct mfc6_cache *c, *next;
1469 
1470 	/*
1471 	 *	Shut down all active vif entries
1472 	 */
1473 	for (i = 0; i < mrt->maxvif; i++) {
1474 		if (!(mrt->vif6_table[i].flags & VIFF_STATIC))
1475 			mif6_delete(mrt, i, &list);
1476 	}
1477 	unregister_netdevice_many(&list);
1478 
1479 	/*
1480 	 *	Wipe the cache
1481 	 */
1482 	for (i = 0; i < MFC6_LINES; i++) {
1483 		list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) {
1484 			if (c->mfc_flags & MFC_STATIC)
1485 				continue;
1486 			write_lock_bh(&mrt_lock);
1487 			list_del(&c->list);
1488 			write_unlock_bh(&mrt_lock);
1489 
1490 			ip6mr_cache_free(c);
1491 		}
1492 	}
1493 
1494 	if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1495 		spin_lock_bh(&mfc_unres_lock);
1496 		list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
1497 			list_del(&c->list);
1498 			ip6mr_destroy_unres(mrt, c);
1499 		}
1500 		spin_unlock_bh(&mfc_unres_lock);
1501 	}
1502 }
1503 
1504 static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
1505 {
1506 	int err = 0;
1507 	struct net *net = sock_net(sk);
1508 
1509 	rtnl_lock();
1510 	write_lock_bh(&mrt_lock);
1511 	if (likely(mrt->mroute6_sk == NULL)) {
1512 		mrt->mroute6_sk = sk;
1513 		net->ipv6.devconf_all->mc_forwarding++;
1514 	}
1515 	else
1516 		err = -EADDRINUSE;
1517 	write_unlock_bh(&mrt_lock);
1518 
1519 	rtnl_unlock();
1520 
1521 	return err;
1522 }
1523 
1524 int ip6mr_sk_done(struct sock *sk)
1525 {
1526 	int err = -EACCES;
1527 	struct net *net = sock_net(sk);
1528 	struct mr6_table *mrt;
1529 
1530 	rtnl_lock();
1531 	ip6mr_for_each_table(mrt, net) {
1532 		if (sk == mrt->mroute6_sk) {
1533 			write_lock_bh(&mrt_lock);
1534 			mrt->mroute6_sk = NULL;
1535 			net->ipv6.devconf_all->mc_forwarding--;
1536 			write_unlock_bh(&mrt_lock);
1537 
1538 			mroute_clean_tables(mrt);
1539 			err = 0;
1540 			break;
1541 		}
1542 	}
1543 	rtnl_unlock();
1544 
1545 	return err;
1546 }
1547 
1548 struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
1549 {
1550 	struct mr6_table *mrt;
1551 	struct flowi fl = {
1552 		.iif	= skb->skb_iif,
1553 		.oif	= skb->dev->ifindex,
1554 		.mark	= skb->mark,
1555 	};
1556 
1557 	if (ip6mr_fib_lookup(net, &fl, &mrt) < 0)
1558 		return NULL;
1559 
1560 	return mrt->mroute6_sk;
1561 }
1562 
1563 /*
1564  *	Socket options and virtual interface manipulation. The whole
1565  *	virtual interface system is a complete heap, but unfortunately
1566  *	that's how BSD mrouted happens to think. Maybe one day with a proper
1567  *	MOSPF/PIM router set up we can clean this up.
1568  */
1569 
1570 int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1571 {
1572 	int ret;
1573 	struct mif6ctl vif;
1574 	struct mf6cctl mfc;
1575 	mifi_t mifi;
1576 	struct net *net = sock_net(sk);
1577 	struct mr6_table *mrt;
1578 
1579 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1580 	if (mrt == NULL)
1581 		return -ENOENT;
1582 
1583 	if (optname != MRT6_INIT) {
1584 		if (sk != mrt->mroute6_sk && !capable(CAP_NET_ADMIN))
1585 			return -EACCES;
1586 	}
1587 
1588 	switch (optname) {
1589 	case MRT6_INIT:
1590 		if (sk->sk_type != SOCK_RAW ||
1591 		    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1592 			return -EOPNOTSUPP;
1593 		if (optlen < sizeof(int))
1594 			return -EINVAL;
1595 
1596 		return ip6mr_sk_init(mrt, sk);
1597 
1598 	case MRT6_DONE:
1599 		return ip6mr_sk_done(sk);
1600 
1601 	case MRT6_ADD_MIF:
1602 		if (optlen < sizeof(vif))
1603 			return -EINVAL;
1604 		if (copy_from_user(&vif, optval, sizeof(vif)))
1605 			return -EFAULT;
1606 		if (vif.mif6c_mifi >= MAXMIFS)
1607 			return -ENFILE;
1608 		rtnl_lock();
1609 		ret = mif6_add(net, mrt, &vif, sk == mrt->mroute6_sk);
1610 		rtnl_unlock();
1611 		return ret;
1612 
1613 	case MRT6_DEL_MIF:
1614 		if (optlen < sizeof(mifi_t))
1615 			return -EINVAL;
1616 		if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1617 			return -EFAULT;
1618 		rtnl_lock();
1619 		ret = mif6_delete(mrt, mifi, NULL);
1620 		rtnl_unlock();
1621 		return ret;
1622 
1623 	/*
1624 	 *	Manipulate the forwarding caches. These live
1625 	 *	in a sort of kernel/user symbiosis.
1626 	 */
1627 	case MRT6_ADD_MFC:
1628 	case MRT6_DEL_MFC:
1629 		if (optlen < sizeof(mfc))
1630 			return -EINVAL;
1631 		if (copy_from_user(&mfc, optval, sizeof(mfc)))
1632 			return -EFAULT;
1633 		rtnl_lock();
1634 		if (optname == MRT6_DEL_MFC)
1635 			ret = ip6mr_mfc_delete(mrt, &mfc);
1636 		else
1637 			ret = ip6mr_mfc_add(net, mrt, &mfc, sk == mrt->mroute6_sk);
1638 		rtnl_unlock();
1639 		return ret;
1640 
1641 	/*
1642 	 *	Control PIM assert (to activate pim will activate assert)
1643 	 */
1644 	case MRT6_ASSERT:
1645 	{
1646 		int v;
1647 		if (get_user(v, (int __user *)optval))
1648 			return -EFAULT;
1649 		mrt->mroute_do_assert = !!v;
1650 		return 0;
1651 	}
1652 
1653 #ifdef CONFIG_IPV6_PIMSM_V2
1654 	case MRT6_PIM:
1655 	{
1656 		int v;
1657 		if (get_user(v, (int __user *)optval))
1658 			return -EFAULT;
1659 		v = !!v;
1660 		rtnl_lock();
1661 		ret = 0;
1662 		if (v != mrt->mroute_do_pim) {
1663 			mrt->mroute_do_pim = v;
1664 			mrt->mroute_do_assert = v;
1665 		}
1666 		rtnl_unlock();
1667 		return ret;
1668 	}
1669 
1670 #endif
1671 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1672 	case MRT6_TABLE:
1673 	{
1674 		u32 v;
1675 
1676 		if (optlen != sizeof(u32))
1677 			return -EINVAL;
1678 		if (get_user(v, (u32 __user *)optval))
1679 			return -EFAULT;
1680 		if (sk == mrt->mroute6_sk)
1681 			return -EBUSY;
1682 
1683 		rtnl_lock();
1684 		ret = 0;
1685 		if (!ip6mr_new_table(net, v))
1686 			ret = -ENOMEM;
1687 		raw6_sk(sk)->ip6mr_table = v;
1688 		rtnl_unlock();
1689 		return ret;
1690 	}
1691 #endif
1692 	/*
1693 	 *	Spurious command, or MRT6_VERSION which you cannot
1694 	 *	set.
1695 	 */
1696 	default:
1697 		return -ENOPROTOOPT;
1698 	}
1699 }
1700 
1701 /*
1702  *	Getsock opt support for the multicast routing system.
1703  */
1704 
1705 int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1706 			  int __user *optlen)
1707 {
1708 	int olr;
1709 	int val;
1710 	struct net *net = sock_net(sk);
1711 	struct mr6_table *mrt;
1712 
1713 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1714 	if (mrt == NULL)
1715 		return -ENOENT;
1716 
1717 	switch (optname) {
1718 	case MRT6_VERSION:
1719 		val = 0x0305;
1720 		break;
1721 #ifdef CONFIG_IPV6_PIMSM_V2
1722 	case MRT6_PIM:
1723 		val = mrt->mroute_do_pim;
1724 		break;
1725 #endif
1726 	case MRT6_ASSERT:
1727 		val = mrt->mroute_do_assert;
1728 		break;
1729 	default:
1730 		return -ENOPROTOOPT;
1731 	}
1732 
1733 	if (get_user(olr, optlen))
1734 		return -EFAULT;
1735 
1736 	olr = min_t(int, olr, sizeof(int));
1737 	if (olr < 0)
1738 		return -EINVAL;
1739 
1740 	if (put_user(olr, optlen))
1741 		return -EFAULT;
1742 	if (copy_to_user(optval, &val, olr))
1743 		return -EFAULT;
1744 	return 0;
1745 }
1746 
1747 /*
1748  *	The IP multicast ioctl support routines.
1749  */
1750 
1751 int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1752 {
1753 	struct sioc_sg_req6 sr;
1754 	struct sioc_mif_req6 vr;
1755 	struct mif_device *vif;
1756 	struct mfc6_cache *c;
1757 	struct net *net = sock_net(sk);
1758 	struct mr6_table *mrt;
1759 
1760 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1761 	if (mrt == NULL)
1762 		return -ENOENT;
1763 
1764 	switch (cmd) {
1765 	case SIOCGETMIFCNT_IN6:
1766 		if (copy_from_user(&vr, arg, sizeof(vr)))
1767 			return -EFAULT;
1768 		if (vr.mifi >= mrt->maxvif)
1769 			return -EINVAL;
1770 		read_lock(&mrt_lock);
1771 		vif = &mrt->vif6_table[vr.mifi];
1772 		if (MIF_EXISTS(mrt, vr.mifi)) {
1773 			vr.icount = vif->pkt_in;
1774 			vr.ocount = vif->pkt_out;
1775 			vr.ibytes = vif->bytes_in;
1776 			vr.obytes = vif->bytes_out;
1777 			read_unlock(&mrt_lock);
1778 
1779 			if (copy_to_user(arg, &vr, sizeof(vr)))
1780 				return -EFAULT;
1781 			return 0;
1782 		}
1783 		read_unlock(&mrt_lock);
1784 		return -EADDRNOTAVAIL;
1785 	case SIOCGETSGCNT_IN6:
1786 		if (copy_from_user(&sr, arg, sizeof(sr)))
1787 			return -EFAULT;
1788 
1789 		read_lock(&mrt_lock);
1790 		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1791 		if (c) {
1792 			sr.pktcnt = c->mfc_un.res.pkt;
1793 			sr.bytecnt = c->mfc_un.res.bytes;
1794 			sr.wrong_if = c->mfc_un.res.wrong_if;
1795 			read_unlock(&mrt_lock);
1796 
1797 			if (copy_to_user(arg, &sr, sizeof(sr)))
1798 				return -EFAULT;
1799 			return 0;
1800 		}
1801 		read_unlock(&mrt_lock);
1802 		return -EADDRNOTAVAIL;
1803 	default:
1804 		return -ENOIOCTLCMD;
1805 	}
1806 }
1807 
1808 #ifdef CONFIG_COMPAT
1809 struct compat_sioc_sg_req6 {
1810 	struct sockaddr_in6 src;
1811 	struct sockaddr_in6 grp;
1812 	compat_ulong_t pktcnt;
1813 	compat_ulong_t bytecnt;
1814 	compat_ulong_t wrong_if;
1815 };
1816 
1817 struct compat_sioc_mif_req6 {
1818 	mifi_t	mifi;
1819 	compat_ulong_t icount;
1820 	compat_ulong_t ocount;
1821 	compat_ulong_t ibytes;
1822 	compat_ulong_t obytes;
1823 };
1824 
1825 int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1826 {
1827 	struct compat_sioc_sg_req6 sr;
1828 	struct compat_sioc_mif_req6 vr;
1829 	struct mif_device *vif;
1830 	struct mfc6_cache *c;
1831 	struct net *net = sock_net(sk);
1832 	struct mr6_table *mrt;
1833 
1834 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1835 	if (mrt == NULL)
1836 		return -ENOENT;
1837 
1838 	switch (cmd) {
1839 	case SIOCGETMIFCNT_IN6:
1840 		if (copy_from_user(&vr, arg, sizeof(vr)))
1841 			return -EFAULT;
1842 		if (vr.mifi >= mrt->maxvif)
1843 			return -EINVAL;
1844 		read_lock(&mrt_lock);
1845 		vif = &mrt->vif6_table[vr.mifi];
1846 		if (MIF_EXISTS(mrt, vr.mifi)) {
1847 			vr.icount = vif->pkt_in;
1848 			vr.ocount = vif->pkt_out;
1849 			vr.ibytes = vif->bytes_in;
1850 			vr.obytes = vif->bytes_out;
1851 			read_unlock(&mrt_lock);
1852 
1853 			if (copy_to_user(arg, &vr, sizeof(vr)))
1854 				return -EFAULT;
1855 			return 0;
1856 		}
1857 		read_unlock(&mrt_lock);
1858 		return -EADDRNOTAVAIL;
1859 	case SIOCGETSGCNT_IN6:
1860 		if (copy_from_user(&sr, arg, sizeof(sr)))
1861 			return -EFAULT;
1862 
1863 		read_lock(&mrt_lock);
1864 		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1865 		if (c) {
1866 			sr.pktcnt = c->mfc_un.res.pkt;
1867 			sr.bytecnt = c->mfc_un.res.bytes;
1868 			sr.wrong_if = c->mfc_un.res.wrong_if;
1869 			read_unlock(&mrt_lock);
1870 
1871 			if (copy_to_user(arg, &sr, sizeof(sr)))
1872 				return -EFAULT;
1873 			return 0;
1874 		}
1875 		read_unlock(&mrt_lock);
1876 		return -EADDRNOTAVAIL;
1877 	default:
1878 		return -ENOIOCTLCMD;
1879 	}
1880 }
1881 #endif
1882 
1883 static inline int ip6mr_forward2_finish(struct sk_buff *skb)
1884 {
1885 	IP6_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
1886 			 IPSTATS_MIB_OUTFORWDATAGRAMS);
1887 	return dst_output(skb);
1888 }
1889 
1890 /*
1891  *	Processing handlers for ip6mr_forward
1892  */
1893 
1894 static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
1895 			  struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1896 {
1897 	struct ipv6hdr *ipv6h;
1898 	struct mif_device *vif = &mrt->vif6_table[vifi];
1899 	struct net_device *dev;
1900 	struct dst_entry *dst;
1901 	struct flowi fl;
1902 
1903 	if (vif->dev == NULL)
1904 		goto out_free;
1905 
1906 #ifdef CONFIG_IPV6_PIMSM_V2
1907 	if (vif->flags & MIFF_REGISTER) {
1908 		vif->pkt_out++;
1909 		vif->bytes_out += skb->len;
1910 		vif->dev->stats.tx_bytes += skb->len;
1911 		vif->dev->stats.tx_packets++;
1912 		ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
1913 		goto out_free;
1914 	}
1915 #endif
1916 
1917 	ipv6h = ipv6_hdr(skb);
1918 
1919 	fl = (struct flowi) {
1920 		.oif = vif->link,
1921 		.fl6_dst = ipv6h->daddr,
1922 	};
1923 
1924 	dst = ip6_route_output(net, NULL, &fl);
1925 	if (!dst)
1926 		goto out_free;
1927 
1928 	skb_dst_drop(skb);
1929 	skb_dst_set(skb, dst);
1930 
1931 	/*
1932 	 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1933 	 * not only before forwarding, but after forwarding on all output
1934 	 * interfaces. It is clear, if mrouter runs a multicasting
1935 	 * program, it should receive packets not depending to what interface
1936 	 * program is joined.
1937 	 * If we will not make it, the program will have to join on all
1938 	 * interfaces. On the other hand, multihoming host (or router, but
1939 	 * not mrouter) cannot join to more than one interface - it will
1940 	 * result in receiving multiple packets.
1941 	 */
1942 	dev = vif->dev;
1943 	skb->dev = dev;
1944 	vif->pkt_out++;
1945 	vif->bytes_out += skb->len;
1946 
1947 	/* We are about to write */
1948 	/* XXX: extension headers? */
1949 	if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
1950 		goto out_free;
1951 
1952 	ipv6h = ipv6_hdr(skb);
1953 	ipv6h->hop_limit--;
1954 
1955 	IP6CB(skb)->flags |= IP6SKB_FORWARDED;
1956 
1957 	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dev,
1958 		       ip6mr_forward2_finish);
1959 
1960 out_free:
1961 	kfree_skb(skb);
1962 	return 0;
1963 }
1964 
1965 static int ip6mr_find_vif(struct mr6_table *mrt, struct net_device *dev)
1966 {
1967 	int ct;
1968 
1969 	for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
1970 		if (mrt->vif6_table[ct].dev == dev)
1971 			break;
1972 	}
1973 	return ct;
1974 }
1975 
1976 static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
1977 			  struct sk_buff *skb, struct mfc6_cache *cache)
1978 {
1979 	int psend = -1;
1980 	int vif, ct;
1981 
1982 	vif = cache->mf6c_parent;
1983 	cache->mfc_un.res.pkt++;
1984 	cache->mfc_un.res.bytes += skb->len;
1985 
1986 	/*
1987 	 * Wrong interface: drop packet and (maybe) send PIM assert.
1988 	 */
1989 	if (mrt->vif6_table[vif].dev != skb->dev) {
1990 		int true_vifi;
1991 
1992 		cache->mfc_un.res.wrong_if++;
1993 		true_vifi = ip6mr_find_vif(mrt, skb->dev);
1994 
1995 		if (true_vifi >= 0 && mrt->mroute_do_assert &&
1996 		    /* pimsm uses asserts, when switching from RPT to SPT,
1997 		       so that we cannot check that packet arrived on an oif.
1998 		       It is bad, but otherwise we would need to move pretty
1999 		       large chunk of pimd to kernel. Ough... --ANK
2000 		     */
2001 		    (mrt->mroute_do_pim ||
2002 		     cache->mfc_un.res.ttls[true_vifi] < 255) &&
2003 		    time_after(jiffies,
2004 			       cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
2005 			cache->mfc_un.res.last_assert = jiffies;
2006 			ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
2007 		}
2008 		goto dont_forward;
2009 	}
2010 
2011 	mrt->vif6_table[vif].pkt_in++;
2012 	mrt->vif6_table[vif].bytes_in += skb->len;
2013 
2014 	/*
2015 	 *	Forward the frame
2016 	 */
2017 	for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
2018 		if (ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
2019 			if (psend != -1) {
2020 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2021 				if (skb2)
2022 					ip6mr_forward2(net, mrt, skb2, cache, psend);
2023 			}
2024 			psend = ct;
2025 		}
2026 	}
2027 	if (psend != -1) {
2028 		ip6mr_forward2(net, mrt, skb, cache, psend);
2029 		return 0;
2030 	}
2031 
2032 dont_forward:
2033 	kfree_skb(skb);
2034 	return 0;
2035 }
2036 
2037 
2038 /*
2039  *	Multicast packets for forwarding arrive here
2040  */
2041 
2042 int ip6_mr_input(struct sk_buff *skb)
2043 {
2044 	struct mfc6_cache *cache;
2045 	struct net *net = dev_net(skb->dev);
2046 	struct mr6_table *mrt;
2047 	struct flowi fl = {
2048 		.iif	= skb->dev->ifindex,
2049 		.mark	= skb->mark,
2050 	};
2051 	int err;
2052 
2053 	err = ip6mr_fib_lookup(net, &fl, &mrt);
2054 	if (err < 0)
2055 		return err;
2056 
2057 	read_lock(&mrt_lock);
2058 	cache = ip6mr_cache_find(mrt,
2059 				 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2060 
2061 	/*
2062 	 *	No usable cache entry
2063 	 */
2064 	if (cache == NULL) {
2065 		int vif;
2066 
2067 		vif = ip6mr_find_vif(mrt, skb->dev);
2068 		if (vif >= 0) {
2069 			int err = ip6mr_cache_unresolved(mrt, vif, skb);
2070 			read_unlock(&mrt_lock);
2071 
2072 			return err;
2073 		}
2074 		read_unlock(&mrt_lock);
2075 		kfree_skb(skb);
2076 		return -ENODEV;
2077 	}
2078 
2079 	ip6_mr_forward(net, mrt, skb, cache);
2080 
2081 	read_unlock(&mrt_lock);
2082 
2083 	return 0;
2084 }
2085 
2086 
2087 static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2088 			       struct mfc6_cache *c, struct rtmsg *rtm)
2089 {
2090 	int ct;
2091 	struct rtnexthop *nhp;
2092 	u8 *b = skb_tail_pointer(skb);
2093 	struct rtattr *mp_head;
2094 
2095 	/* If cache is unresolved, don't try to parse IIF and OIF */
2096 	if (c->mf6c_parent >= MAXMIFS)
2097 		return -ENOENT;
2098 
2099 	if (MIF_EXISTS(mrt, c->mf6c_parent))
2100 		RTA_PUT(skb, RTA_IIF, 4, &mrt->vif6_table[c->mf6c_parent].dev->ifindex);
2101 
2102 	mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
2103 
2104 	for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
2105 		if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
2106 			if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
2107 				goto rtattr_failure;
2108 			nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
2109 			nhp->rtnh_flags = 0;
2110 			nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
2111 			nhp->rtnh_ifindex = mrt->vif6_table[ct].dev->ifindex;
2112 			nhp->rtnh_len = sizeof(*nhp);
2113 		}
2114 	}
2115 	mp_head->rta_type = RTA_MULTIPATH;
2116 	mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
2117 	rtm->rtm_type = RTN_MULTICAST;
2118 	return 1;
2119 
2120 rtattr_failure:
2121 	nlmsg_trim(skb, b);
2122 	return -EMSGSIZE;
2123 }
2124 
2125 int ip6mr_get_route(struct net *net,
2126 		    struct sk_buff *skb, struct rtmsg *rtm, int nowait)
2127 {
2128 	int err;
2129 	struct mr6_table *mrt;
2130 	struct mfc6_cache *cache;
2131 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
2132 
2133 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
2134 	if (mrt == NULL)
2135 		return -ENOENT;
2136 
2137 	read_lock(&mrt_lock);
2138 	cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
2139 
2140 	if (!cache) {
2141 		struct sk_buff *skb2;
2142 		struct ipv6hdr *iph;
2143 		struct net_device *dev;
2144 		int vif;
2145 
2146 		if (nowait) {
2147 			read_unlock(&mrt_lock);
2148 			return -EAGAIN;
2149 		}
2150 
2151 		dev = skb->dev;
2152 		if (dev == NULL || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2153 			read_unlock(&mrt_lock);
2154 			return -ENODEV;
2155 		}
2156 
2157 		/* really correct? */
2158 		skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
2159 		if (!skb2) {
2160 			read_unlock(&mrt_lock);
2161 			return -ENOMEM;
2162 		}
2163 
2164 		skb_reset_transport_header(skb2);
2165 
2166 		skb_put(skb2, sizeof(struct ipv6hdr));
2167 		skb_reset_network_header(skb2);
2168 
2169 		iph = ipv6_hdr(skb2);
2170 		iph->version = 0;
2171 		iph->priority = 0;
2172 		iph->flow_lbl[0] = 0;
2173 		iph->flow_lbl[1] = 0;
2174 		iph->flow_lbl[2] = 0;
2175 		iph->payload_len = 0;
2176 		iph->nexthdr = IPPROTO_NONE;
2177 		iph->hop_limit = 0;
2178 		ipv6_addr_copy(&iph->saddr, &rt->rt6i_src.addr);
2179 		ipv6_addr_copy(&iph->daddr, &rt->rt6i_dst.addr);
2180 
2181 		err = ip6mr_cache_unresolved(mrt, vif, skb2);
2182 		read_unlock(&mrt_lock);
2183 
2184 		return err;
2185 	}
2186 
2187 	if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
2188 		cache->mfc_flags |= MFC_NOTIFY;
2189 
2190 	err = __ip6mr_fill_mroute(mrt, skb, cache, rtm);
2191 	read_unlock(&mrt_lock);
2192 	return err;
2193 }
2194 
2195 static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2196 			     u32 pid, u32 seq, struct mfc6_cache *c)
2197 {
2198 	struct nlmsghdr *nlh;
2199 	struct rtmsg *rtm;
2200 
2201 	nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI);
2202 	if (nlh == NULL)
2203 		return -EMSGSIZE;
2204 
2205 	rtm = nlmsg_data(nlh);
2206 	rtm->rtm_family   = RTNL_FAMILY_IPMR;
2207 	rtm->rtm_dst_len  = 128;
2208 	rtm->rtm_src_len  = 128;
2209 	rtm->rtm_tos      = 0;
2210 	rtm->rtm_table    = mrt->id;
2211 	NLA_PUT_U32(skb, RTA_TABLE, mrt->id);
2212 	rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
2213 	rtm->rtm_protocol = RTPROT_UNSPEC;
2214 	rtm->rtm_flags    = 0;
2215 
2216 	NLA_PUT(skb, RTA_SRC, 16, &c->mf6c_origin);
2217 	NLA_PUT(skb, RTA_DST, 16, &c->mf6c_mcastgrp);
2218 
2219 	if (__ip6mr_fill_mroute(mrt, skb, c, rtm) < 0)
2220 		goto nla_put_failure;
2221 
2222 	return nlmsg_end(skb, nlh);
2223 
2224 nla_put_failure:
2225 	nlmsg_cancel(skb, nlh);
2226 	return -EMSGSIZE;
2227 }
2228 
2229 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2230 {
2231 	struct net *net = sock_net(skb->sk);
2232 	struct mr6_table *mrt;
2233 	struct mfc6_cache *mfc;
2234 	unsigned int t = 0, s_t;
2235 	unsigned int h = 0, s_h;
2236 	unsigned int e = 0, s_e;
2237 
2238 	s_t = cb->args[0];
2239 	s_h = cb->args[1];
2240 	s_e = cb->args[2];
2241 
2242 	read_lock(&mrt_lock);
2243 	ip6mr_for_each_table(mrt, net) {
2244 		if (t < s_t)
2245 			goto next_table;
2246 		if (t > s_t)
2247 			s_h = 0;
2248 		for (h = s_h; h < MFC6_LINES; h++) {
2249 			list_for_each_entry(mfc, &mrt->mfc6_cache_array[h], list) {
2250 				if (e < s_e)
2251 					goto next_entry;
2252 				if (ip6mr_fill_mroute(mrt, skb,
2253 						      NETLINK_CB(cb->skb).pid,
2254 						      cb->nlh->nlmsg_seq,
2255 						      mfc) < 0)
2256 					goto done;
2257 next_entry:
2258 				e++;
2259 			}
2260 			e = s_e = 0;
2261 		}
2262 		s_h = 0;
2263 next_table:
2264 		t++;
2265 	}
2266 done:
2267 	read_unlock(&mrt_lock);
2268 
2269 	cb->args[2] = e;
2270 	cb->args[1] = h;
2271 	cb->args[0] = t;
2272 
2273 	return skb->len;
2274 }
2275