xref: /openbmc/linux/net/ipv6/ip6mr.c (revision 9f380456)
1 /*
2  *	Linux IPv6 multicast routing support for BSD pim6sd
3  *	Based on net/ipv4/ipmr.c.
4  *
5  *	(c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
6  *		LSIIT Laboratory, Strasbourg, France
7  *	(c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
8  *		6WIND, Paris, France
9  *	Copyright (C)2007,2008 USAGI/WIDE Project
10  *		YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
11  *
12  *	This program is free software; you can redistribute it and/or
13  *	modify it under the terms of the GNU General Public License
14  *	as published by the Free Software Foundation; either version
15  *	2 of the License, or (at your option) any later version.
16  *
17  */
18 
19 #include <asm/uaccess.h>
20 #include <linux/types.h>
21 #include <linux/sched.h>
22 #include <linux/errno.h>
23 #include <linux/timer.h>
24 #include <linux/mm.h>
25 #include <linux/kernel.h>
26 #include <linux/fcntl.h>
27 #include <linux/stat.h>
28 #include <linux/socket.h>
29 #include <linux/inet.h>
30 #include <linux/netdevice.h>
31 #include <linux/inetdevice.h>
32 #include <linux/proc_fs.h>
33 #include <linux/seq_file.h>
34 #include <linux/init.h>
35 #include <linux/slab.h>
36 #include <linux/compat.h>
37 #include <net/protocol.h>
38 #include <linux/skbuff.h>
39 #include <net/sock.h>
40 #include <net/raw.h>
41 #include <linux/notifier.h>
42 #include <linux/if_arp.h>
43 #include <net/checksum.h>
44 #include <net/netlink.h>
45 #include <net/fib_rules.h>
46 
47 #include <net/ipv6.h>
48 #include <net/ip6_route.h>
49 #include <linux/mroute6.h>
50 #include <linux/pim.h>
51 #include <net/addrconf.h>
52 #include <linux/netfilter_ipv6.h>
53 #include <linux/export.h>
54 #include <net/ip6_checksum.h>
55 
56 struct mr6_table {
57 	struct list_head	list;
58 #ifdef CONFIG_NET_NS
59 	struct net		*net;
60 #endif
61 	u32			id;
62 	struct sock		*mroute6_sk;
63 	struct timer_list	ipmr_expire_timer;
64 	struct list_head	mfc6_unres_queue;
65 	struct list_head	mfc6_cache_array[MFC6_LINES];
66 	struct mif_device	vif6_table[MAXMIFS];
67 	int			maxvif;
68 	atomic_t		cache_resolve_queue_len;
69 	int			mroute_do_assert;
70 	int			mroute_do_pim;
71 #ifdef CONFIG_IPV6_PIMSM_V2
72 	int			mroute_reg_vif_num;
73 #endif
74 };
75 
76 struct ip6mr_rule {
77 	struct fib_rule		common;
78 };
79 
80 struct ip6mr_result {
81 	struct mr6_table	*mrt;
82 };
83 
84 /* Big lock, protecting vif table, mrt cache and mroute socket state.
85    Note that the changes are semaphored via rtnl_lock.
86  */
87 
88 static DEFINE_RWLOCK(mrt_lock);
89 
90 /*
91  *	Multicast router control variables
92  */
93 
94 #define MIF_EXISTS(_mrt, _idx) ((_mrt)->vif6_table[_idx].dev != NULL)
95 
96 /* Special spinlock for queue of unresolved entries */
97 static DEFINE_SPINLOCK(mfc_unres_lock);
98 
99 /* We return to original Alan's scheme. Hash table of resolved
100    entries is changed only in process context and protected
101    with weak lock mrt_lock. Queue of unresolved entries is protected
102    with strong spinlock mfc_unres_lock.
103 
104    In this case data path is free of exclusive locks at all.
105  */
106 
107 static struct kmem_cache *mrt_cachep __read_mostly;
108 
109 static struct mr6_table *ip6mr_new_table(struct net *net, u32 id);
110 static void ip6mr_free_table(struct mr6_table *mrt);
111 
112 static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
113 			  struct sk_buff *skb, struct mfc6_cache *cache);
114 static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
115 			      mifi_t mifi, int assert);
116 static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
117 			       struct mfc6_cache *c, struct rtmsg *rtm);
118 static int ip6mr_rtm_dumproute(struct sk_buff *skb,
119 			       struct netlink_callback *cb);
120 static void mroute_clean_tables(struct mr6_table *mrt);
121 static void ipmr_expire_process(unsigned long arg);
122 
123 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
124 #define ip6mr_for_each_table(mrt, net) \
125 	list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
126 
127 static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
128 {
129 	struct mr6_table *mrt;
130 
131 	ip6mr_for_each_table(mrt, net) {
132 		if (mrt->id == id)
133 			return mrt;
134 	}
135 	return NULL;
136 }
137 
138 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
139 			    struct mr6_table **mrt)
140 {
141 	struct ip6mr_result res;
142 	struct fib_lookup_arg arg = { .result = &res, };
143 	int err;
144 
145 	err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
146 			       flowi6_to_flowi(flp6), 0, &arg);
147 	if (err < 0)
148 		return err;
149 	*mrt = res.mrt;
150 	return 0;
151 }
152 
153 static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
154 			     int flags, struct fib_lookup_arg *arg)
155 {
156 	struct ip6mr_result *res = arg->result;
157 	struct mr6_table *mrt;
158 
159 	switch (rule->action) {
160 	case FR_ACT_TO_TBL:
161 		break;
162 	case FR_ACT_UNREACHABLE:
163 		return -ENETUNREACH;
164 	case FR_ACT_PROHIBIT:
165 		return -EACCES;
166 	case FR_ACT_BLACKHOLE:
167 	default:
168 		return -EINVAL;
169 	}
170 
171 	mrt = ip6mr_get_table(rule->fr_net, rule->table);
172 	if (mrt == NULL)
173 		return -EAGAIN;
174 	res->mrt = mrt;
175 	return 0;
176 }
177 
178 static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
179 {
180 	return 1;
181 }
182 
183 static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = {
184 	FRA_GENERIC_POLICY,
185 };
186 
187 static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
188 				struct fib_rule_hdr *frh, struct nlattr **tb)
189 {
190 	return 0;
191 }
192 
193 static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
194 			      struct nlattr **tb)
195 {
196 	return 1;
197 }
198 
199 static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
200 			   struct fib_rule_hdr *frh)
201 {
202 	frh->dst_len = 0;
203 	frh->src_len = 0;
204 	frh->tos     = 0;
205 	return 0;
206 }
207 
208 static const struct fib_rules_ops __net_initdata ip6mr_rules_ops_template = {
209 	.family		= RTNL_FAMILY_IP6MR,
210 	.rule_size	= sizeof(struct ip6mr_rule),
211 	.addr_size	= sizeof(struct in6_addr),
212 	.action		= ip6mr_rule_action,
213 	.match		= ip6mr_rule_match,
214 	.configure	= ip6mr_rule_configure,
215 	.compare	= ip6mr_rule_compare,
216 	.default_pref	= fib_default_rule_pref,
217 	.fill		= ip6mr_rule_fill,
218 	.nlgroup	= RTNLGRP_IPV6_RULE,
219 	.policy		= ip6mr_rule_policy,
220 	.owner		= THIS_MODULE,
221 };
222 
223 static int __net_init ip6mr_rules_init(struct net *net)
224 {
225 	struct fib_rules_ops *ops;
226 	struct mr6_table *mrt;
227 	int err;
228 
229 	ops = fib_rules_register(&ip6mr_rules_ops_template, net);
230 	if (IS_ERR(ops))
231 		return PTR_ERR(ops);
232 
233 	INIT_LIST_HEAD(&net->ipv6.mr6_tables);
234 
235 	mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
236 	if (mrt == NULL) {
237 		err = -ENOMEM;
238 		goto err1;
239 	}
240 
241 	err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
242 	if (err < 0)
243 		goto err2;
244 
245 	net->ipv6.mr6_rules_ops = ops;
246 	return 0;
247 
248 err2:
249 	kfree(mrt);
250 err1:
251 	fib_rules_unregister(ops);
252 	return err;
253 }
254 
255 static void __net_exit ip6mr_rules_exit(struct net *net)
256 {
257 	struct mr6_table *mrt, *next;
258 
259 	list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
260 		list_del(&mrt->list);
261 		ip6mr_free_table(mrt);
262 	}
263 	fib_rules_unregister(net->ipv6.mr6_rules_ops);
264 }
265 #else
266 #define ip6mr_for_each_table(mrt, net) \
267 	for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
268 
269 static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
270 {
271 	return net->ipv6.mrt6;
272 }
273 
274 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
275 			    struct mr6_table **mrt)
276 {
277 	*mrt = net->ipv6.mrt6;
278 	return 0;
279 }
280 
281 static int __net_init ip6mr_rules_init(struct net *net)
282 {
283 	net->ipv6.mrt6 = ip6mr_new_table(net, RT6_TABLE_DFLT);
284 	return net->ipv6.mrt6 ? 0 : -ENOMEM;
285 }
286 
287 static void __net_exit ip6mr_rules_exit(struct net *net)
288 {
289 	ip6mr_free_table(net->ipv6.mrt6);
290 }
291 #endif
292 
293 static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
294 {
295 	struct mr6_table *mrt;
296 	unsigned int i;
297 
298 	mrt = ip6mr_get_table(net, id);
299 	if (mrt != NULL)
300 		return mrt;
301 
302 	mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
303 	if (mrt == NULL)
304 		return NULL;
305 	mrt->id = id;
306 	write_pnet(&mrt->net, net);
307 
308 	/* Forwarding cache */
309 	for (i = 0; i < MFC6_LINES; i++)
310 		INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]);
311 
312 	INIT_LIST_HEAD(&mrt->mfc6_unres_queue);
313 
314 	setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
315 		    (unsigned long)mrt);
316 
317 #ifdef CONFIG_IPV6_PIMSM_V2
318 	mrt->mroute_reg_vif_num = -1;
319 #endif
320 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
321 	list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
322 #endif
323 	return mrt;
324 }
325 
326 static void ip6mr_free_table(struct mr6_table *mrt)
327 {
328 	del_timer(&mrt->ipmr_expire_timer);
329 	mroute_clean_tables(mrt);
330 	kfree(mrt);
331 }
332 
333 #ifdef CONFIG_PROC_FS
334 
335 struct ipmr_mfc_iter {
336 	struct seq_net_private p;
337 	struct mr6_table *mrt;
338 	struct list_head *cache;
339 	int ct;
340 };
341 
342 
343 static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
344 					   struct ipmr_mfc_iter *it, loff_t pos)
345 {
346 	struct mr6_table *mrt = it->mrt;
347 	struct mfc6_cache *mfc;
348 
349 	read_lock(&mrt_lock);
350 	for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) {
351 		it->cache = &mrt->mfc6_cache_array[it->ct];
352 		list_for_each_entry(mfc, it->cache, list)
353 			if (pos-- == 0)
354 				return mfc;
355 	}
356 	read_unlock(&mrt_lock);
357 
358 	spin_lock_bh(&mfc_unres_lock);
359 	it->cache = &mrt->mfc6_unres_queue;
360 	list_for_each_entry(mfc, it->cache, list)
361 		if (pos-- == 0)
362 			return mfc;
363 	spin_unlock_bh(&mfc_unres_lock);
364 
365 	it->cache = NULL;
366 	return NULL;
367 }
368 
369 /*
370  *	The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
371  */
372 
373 struct ipmr_vif_iter {
374 	struct seq_net_private p;
375 	struct mr6_table *mrt;
376 	int ct;
377 };
378 
379 static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
380 					    struct ipmr_vif_iter *iter,
381 					    loff_t pos)
382 {
383 	struct mr6_table *mrt = iter->mrt;
384 
385 	for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
386 		if (!MIF_EXISTS(mrt, iter->ct))
387 			continue;
388 		if (pos-- == 0)
389 			return &mrt->vif6_table[iter->ct];
390 	}
391 	return NULL;
392 }
393 
394 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
395 	__acquires(mrt_lock)
396 {
397 	struct ipmr_vif_iter *iter = seq->private;
398 	struct net *net = seq_file_net(seq);
399 	struct mr6_table *mrt;
400 
401 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
402 	if (mrt == NULL)
403 		return ERR_PTR(-ENOENT);
404 
405 	iter->mrt = mrt;
406 
407 	read_lock(&mrt_lock);
408 	return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1)
409 		: SEQ_START_TOKEN;
410 }
411 
412 static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
413 {
414 	struct ipmr_vif_iter *iter = seq->private;
415 	struct net *net = seq_file_net(seq);
416 	struct mr6_table *mrt = iter->mrt;
417 
418 	++*pos;
419 	if (v == SEQ_START_TOKEN)
420 		return ip6mr_vif_seq_idx(net, iter, 0);
421 
422 	while (++iter->ct < mrt->maxvif) {
423 		if (!MIF_EXISTS(mrt, iter->ct))
424 			continue;
425 		return &mrt->vif6_table[iter->ct];
426 	}
427 	return NULL;
428 }
429 
430 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
431 	__releases(mrt_lock)
432 {
433 	read_unlock(&mrt_lock);
434 }
435 
436 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
437 {
438 	struct ipmr_vif_iter *iter = seq->private;
439 	struct mr6_table *mrt = iter->mrt;
440 
441 	if (v == SEQ_START_TOKEN) {
442 		seq_puts(seq,
443 			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
444 	} else {
445 		const struct mif_device *vif = v;
446 		const char *name = vif->dev ? vif->dev->name : "none";
447 
448 		seq_printf(seq,
449 			   "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
450 			   vif - mrt->vif6_table,
451 			   name, vif->bytes_in, vif->pkt_in,
452 			   vif->bytes_out, vif->pkt_out,
453 			   vif->flags);
454 	}
455 	return 0;
456 }
457 
458 static const struct seq_operations ip6mr_vif_seq_ops = {
459 	.start = ip6mr_vif_seq_start,
460 	.next  = ip6mr_vif_seq_next,
461 	.stop  = ip6mr_vif_seq_stop,
462 	.show  = ip6mr_vif_seq_show,
463 };
464 
465 static int ip6mr_vif_open(struct inode *inode, struct file *file)
466 {
467 	return seq_open_net(inode, file, &ip6mr_vif_seq_ops,
468 			    sizeof(struct ipmr_vif_iter));
469 }
470 
471 static const struct file_operations ip6mr_vif_fops = {
472 	.owner	 = THIS_MODULE,
473 	.open    = ip6mr_vif_open,
474 	.read    = seq_read,
475 	.llseek  = seq_lseek,
476 	.release = seq_release_net,
477 };
478 
479 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
480 {
481 	struct ipmr_mfc_iter *it = seq->private;
482 	struct net *net = seq_file_net(seq);
483 	struct mr6_table *mrt;
484 
485 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
486 	if (mrt == NULL)
487 		return ERR_PTR(-ENOENT);
488 
489 	it->mrt = mrt;
490 	return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
491 		: SEQ_START_TOKEN;
492 }
493 
494 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
495 {
496 	struct mfc6_cache *mfc = v;
497 	struct ipmr_mfc_iter *it = seq->private;
498 	struct net *net = seq_file_net(seq);
499 	struct mr6_table *mrt = it->mrt;
500 
501 	++*pos;
502 
503 	if (v == SEQ_START_TOKEN)
504 		return ipmr_mfc_seq_idx(net, seq->private, 0);
505 
506 	if (mfc->list.next != it->cache)
507 		return list_entry(mfc->list.next, struct mfc6_cache, list);
508 
509 	if (it->cache == &mrt->mfc6_unres_queue)
510 		goto end_of_list;
511 
512 	BUG_ON(it->cache != &mrt->mfc6_cache_array[it->ct]);
513 
514 	while (++it->ct < MFC6_LINES) {
515 		it->cache = &mrt->mfc6_cache_array[it->ct];
516 		if (list_empty(it->cache))
517 			continue;
518 		return list_first_entry(it->cache, struct mfc6_cache, list);
519 	}
520 
521 	/* exhausted cache_array, show unresolved */
522 	read_unlock(&mrt_lock);
523 	it->cache = &mrt->mfc6_unres_queue;
524 	it->ct = 0;
525 
526 	spin_lock_bh(&mfc_unres_lock);
527 	if (!list_empty(it->cache))
528 		return list_first_entry(it->cache, struct mfc6_cache, list);
529 
530  end_of_list:
531 	spin_unlock_bh(&mfc_unres_lock);
532 	it->cache = NULL;
533 
534 	return NULL;
535 }
536 
537 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
538 {
539 	struct ipmr_mfc_iter *it = seq->private;
540 	struct mr6_table *mrt = it->mrt;
541 
542 	if (it->cache == &mrt->mfc6_unres_queue)
543 		spin_unlock_bh(&mfc_unres_lock);
544 	else if (it->cache == mrt->mfc6_cache_array)
545 		read_unlock(&mrt_lock);
546 }
547 
548 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
549 {
550 	int n;
551 
552 	if (v == SEQ_START_TOKEN) {
553 		seq_puts(seq,
554 			 "Group                            "
555 			 "Origin                           "
556 			 "Iif      Pkts  Bytes     Wrong  Oifs\n");
557 	} else {
558 		const struct mfc6_cache *mfc = v;
559 		const struct ipmr_mfc_iter *it = seq->private;
560 		struct mr6_table *mrt = it->mrt;
561 
562 		seq_printf(seq, "%pI6 %pI6 %-3hd",
563 			   &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
564 			   mfc->mf6c_parent);
565 
566 		if (it->cache != &mrt->mfc6_unres_queue) {
567 			seq_printf(seq, " %8lu %8lu %8lu",
568 				   mfc->mfc_un.res.pkt,
569 				   mfc->mfc_un.res.bytes,
570 				   mfc->mfc_un.res.wrong_if);
571 			for (n = mfc->mfc_un.res.minvif;
572 			     n < mfc->mfc_un.res.maxvif; n++) {
573 				if (MIF_EXISTS(mrt, n) &&
574 				    mfc->mfc_un.res.ttls[n] < 255)
575 					seq_printf(seq,
576 						   " %2d:%-3d",
577 						   n, mfc->mfc_un.res.ttls[n]);
578 			}
579 		} else {
580 			/* unresolved mfc_caches don't contain
581 			 * pkt, bytes and wrong_if values
582 			 */
583 			seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
584 		}
585 		seq_putc(seq, '\n');
586 	}
587 	return 0;
588 }
589 
590 static const struct seq_operations ipmr_mfc_seq_ops = {
591 	.start = ipmr_mfc_seq_start,
592 	.next  = ipmr_mfc_seq_next,
593 	.stop  = ipmr_mfc_seq_stop,
594 	.show  = ipmr_mfc_seq_show,
595 };
596 
597 static int ipmr_mfc_open(struct inode *inode, struct file *file)
598 {
599 	return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
600 			    sizeof(struct ipmr_mfc_iter));
601 }
602 
603 static const struct file_operations ip6mr_mfc_fops = {
604 	.owner	 = THIS_MODULE,
605 	.open    = ipmr_mfc_open,
606 	.read    = seq_read,
607 	.llseek  = seq_lseek,
608 	.release = seq_release_net,
609 };
610 #endif
611 
612 #ifdef CONFIG_IPV6_PIMSM_V2
613 
614 static int pim6_rcv(struct sk_buff *skb)
615 {
616 	struct pimreghdr *pim;
617 	struct ipv6hdr   *encap;
618 	struct net_device  *reg_dev = NULL;
619 	struct net *net = dev_net(skb->dev);
620 	struct mr6_table *mrt;
621 	struct flowi6 fl6 = {
622 		.flowi6_iif	= skb->dev->ifindex,
623 		.flowi6_mark	= skb->mark,
624 	};
625 	int reg_vif_num;
626 
627 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
628 		goto drop;
629 
630 	pim = (struct pimreghdr *)skb_transport_header(skb);
631 	if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
632 	    (pim->flags & PIM_NULL_REGISTER) ||
633 	    (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
634 			     sizeof(*pim), IPPROTO_PIM,
635 			     csum_partial((void *)pim, sizeof(*pim), 0)) &&
636 	     csum_fold(skb_checksum(skb, 0, skb->len, 0))))
637 		goto drop;
638 
639 	/* check if the inner packet is destined to mcast group */
640 	encap = (struct ipv6hdr *)(skb_transport_header(skb) +
641 				   sizeof(*pim));
642 
643 	if (!ipv6_addr_is_multicast(&encap->daddr) ||
644 	    encap->payload_len == 0 ||
645 	    ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
646 		goto drop;
647 
648 	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
649 		goto drop;
650 	reg_vif_num = mrt->mroute_reg_vif_num;
651 
652 	read_lock(&mrt_lock);
653 	if (reg_vif_num >= 0)
654 		reg_dev = mrt->vif6_table[reg_vif_num].dev;
655 	if (reg_dev)
656 		dev_hold(reg_dev);
657 	read_unlock(&mrt_lock);
658 
659 	if (reg_dev == NULL)
660 		goto drop;
661 
662 	skb->mac_header = skb->network_header;
663 	skb_pull(skb, (u8 *)encap - skb->data);
664 	skb_reset_network_header(skb);
665 	skb->protocol = htons(ETH_P_IPV6);
666 	skb->ip_summed = CHECKSUM_NONE;
667 	skb->pkt_type = PACKET_HOST;
668 
669 	skb_tunnel_rx(skb, reg_dev);
670 
671 	netif_rx(skb);
672 
673 	dev_put(reg_dev);
674 	return 0;
675  drop:
676 	kfree_skb(skb);
677 	return 0;
678 }
679 
680 static const struct inet6_protocol pim6_protocol = {
681 	.handler	=	pim6_rcv,
682 };
683 
684 /* Service routines creating virtual interfaces: PIMREG */
685 
686 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
687 				      struct net_device *dev)
688 {
689 	struct net *net = dev_net(dev);
690 	struct mr6_table *mrt;
691 	struct flowi6 fl6 = {
692 		.flowi6_oif	= dev->ifindex,
693 		.flowi6_iif	= skb->skb_iif,
694 		.flowi6_mark	= skb->mark,
695 	};
696 	int err;
697 
698 	err = ip6mr_fib_lookup(net, &fl6, &mrt);
699 	if (err < 0) {
700 		kfree_skb(skb);
701 		return err;
702 	}
703 
704 	read_lock(&mrt_lock);
705 	dev->stats.tx_bytes += skb->len;
706 	dev->stats.tx_packets++;
707 	ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
708 	read_unlock(&mrt_lock);
709 	kfree_skb(skb);
710 	return NETDEV_TX_OK;
711 }
712 
713 static const struct net_device_ops reg_vif_netdev_ops = {
714 	.ndo_start_xmit	= reg_vif_xmit,
715 };
716 
717 static void reg_vif_setup(struct net_device *dev)
718 {
719 	dev->type		= ARPHRD_PIMREG;
720 	dev->mtu		= 1500 - sizeof(struct ipv6hdr) - 8;
721 	dev->flags		= IFF_NOARP;
722 	dev->netdev_ops		= &reg_vif_netdev_ops;
723 	dev->destructor		= free_netdev;
724 	dev->features		|= NETIF_F_NETNS_LOCAL;
725 }
726 
727 static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt)
728 {
729 	struct net_device *dev;
730 	char name[IFNAMSIZ];
731 
732 	if (mrt->id == RT6_TABLE_DFLT)
733 		sprintf(name, "pim6reg");
734 	else
735 		sprintf(name, "pim6reg%u", mrt->id);
736 
737 	dev = alloc_netdev(0, name, reg_vif_setup);
738 	if (dev == NULL)
739 		return NULL;
740 
741 	dev_net_set(dev, net);
742 
743 	if (register_netdevice(dev)) {
744 		free_netdev(dev);
745 		return NULL;
746 	}
747 	dev->iflink = 0;
748 
749 	if (dev_open(dev))
750 		goto failure;
751 
752 	dev_hold(dev);
753 	return dev;
754 
755 failure:
756 	/* allow the register to be completed before unregistering. */
757 	rtnl_unlock();
758 	rtnl_lock();
759 
760 	unregister_netdevice(dev);
761 	return NULL;
762 }
763 #endif
764 
765 /*
766  *	Delete a VIF entry
767  */
768 
769 static int mif6_delete(struct mr6_table *mrt, int vifi, struct list_head *head)
770 {
771 	struct mif_device *v;
772 	struct net_device *dev;
773 	struct inet6_dev *in6_dev;
774 
775 	if (vifi < 0 || vifi >= mrt->maxvif)
776 		return -EADDRNOTAVAIL;
777 
778 	v = &mrt->vif6_table[vifi];
779 
780 	write_lock_bh(&mrt_lock);
781 	dev = v->dev;
782 	v->dev = NULL;
783 
784 	if (!dev) {
785 		write_unlock_bh(&mrt_lock);
786 		return -EADDRNOTAVAIL;
787 	}
788 
789 #ifdef CONFIG_IPV6_PIMSM_V2
790 	if (vifi == mrt->mroute_reg_vif_num)
791 		mrt->mroute_reg_vif_num = -1;
792 #endif
793 
794 	if (vifi + 1 == mrt->maxvif) {
795 		int tmp;
796 		for (tmp = vifi - 1; tmp >= 0; tmp--) {
797 			if (MIF_EXISTS(mrt, tmp))
798 				break;
799 		}
800 		mrt->maxvif = tmp + 1;
801 	}
802 
803 	write_unlock_bh(&mrt_lock);
804 
805 	dev_set_allmulti(dev, -1);
806 
807 	in6_dev = __in6_dev_get(dev);
808 	if (in6_dev)
809 		in6_dev->cnf.mc_forwarding--;
810 
811 	if (v->flags & MIFF_REGISTER)
812 		unregister_netdevice_queue(dev, head);
813 
814 	dev_put(dev);
815 	return 0;
816 }
817 
818 static inline void ip6mr_cache_free(struct mfc6_cache *c)
819 {
820 	kmem_cache_free(mrt_cachep, c);
821 }
822 
823 /* Destroy an unresolved cache entry, killing queued skbs
824    and reporting error to netlink readers.
825  */
826 
827 static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
828 {
829 	struct net *net = read_pnet(&mrt->net);
830 	struct sk_buff *skb;
831 
832 	atomic_dec(&mrt->cache_resolve_queue_len);
833 
834 	while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
835 		if (ipv6_hdr(skb)->version == 0) {
836 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
837 			nlh->nlmsg_type = NLMSG_ERROR;
838 			nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
839 			skb_trim(skb, nlh->nlmsg_len);
840 			((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
841 			rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
842 		} else
843 			kfree_skb(skb);
844 	}
845 
846 	ip6mr_cache_free(c);
847 }
848 
849 
850 /* Timer process for all the unresolved queue. */
851 
852 static void ipmr_do_expire_process(struct mr6_table *mrt)
853 {
854 	unsigned long now = jiffies;
855 	unsigned long expires = 10 * HZ;
856 	struct mfc6_cache *c, *next;
857 
858 	list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
859 		if (time_after(c->mfc_un.unres.expires, now)) {
860 			/* not yet... */
861 			unsigned long interval = c->mfc_un.unres.expires - now;
862 			if (interval < expires)
863 				expires = interval;
864 			continue;
865 		}
866 
867 		list_del(&c->list);
868 		ip6mr_destroy_unres(mrt, c);
869 	}
870 
871 	if (!list_empty(&mrt->mfc6_unres_queue))
872 		mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
873 }
874 
875 static void ipmr_expire_process(unsigned long arg)
876 {
877 	struct mr6_table *mrt = (struct mr6_table *)arg;
878 
879 	if (!spin_trylock(&mfc_unres_lock)) {
880 		mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
881 		return;
882 	}
883 
884 	if (!list_empty(&mrt->mfc6_unres_queue))
885 		ipmr_do_expire_process(mrt);
886 
887 	spin_unlock(&mfc_unres_lock);
888 }
889 
890 /* Fill oifs list. It is called under write locked mrt_lock. */
891 
892 static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *cache,
893 				    unsigned char *ttls)
894 {
895 	int vifi;
896 
897 	cache->mfc_un.res.minvif = MAXMIFS;
898 	cache->mfc_un.res.maxvif = 0;
899 	memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
900 
901 	for (vifi = 0; vifi < mrt->maxvif; vifi++) {
902 		if (MIF_EXISTS(mrt, vifi) &&
903 		    ttls[vifi] && ttls[vifi] < 255) {
904 			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
905 			if (cache->mfc_un.res.minvif > vifi)
906 				cache->mfc_un.res.minvif = vifi;
907 			if (cache->mfc_un.res.maxvif <= vifi)
908 				cache->mfc_un.res.maxvif = vifi + 1;
909 		}
910 	}
911 }
912 
913 static int mif6_add(struct net *net, struct mr6_table *mrt,
914 		    struct mif6ctl *vifc, int mrtsock)
915 {
916 	int vifi = vifc->mif6c_mifi;
917 	struct mif_device *v = &mrt->vif6_table[vifi];
918 	struct net_device *dev;
919 	struct inet6_dev *in6_dev;
920 	int err;
921 
922 	/* Is vif busy ? */
923 	if (MIF_EXISTS(mrt, vifi))
924 		return -EADDRINUSE;
925 
926 	switch (vifc->mif6c_flags) {
927 #ifdef CONFIG_IPV6_PIMSM_V2
928 	case MIFF_REGISTER:
929 		/*
930 		 * Special Purpose VIF in PIM
931 		 * All the packets will be sent to the daemon
932 		 */
933 		if (mrt->mroute_reg_vif_num >= 0)
934 			return -EADDRINUSE;
935 		dev = ip6mr_reg_vif(net, mrt);
936 		if (!dev)
937 			return -ENOBUFS;
938 		err = dev_set_allmulti(dev, 1);
939 		if (err) {
940 			unregister_netdevice(dev);
941 			dev_put(dev);
942 			return err;
943 		}
944 		break;
945 #endif
946 	case 0:
947 		dev = dev_get_by_index(net, vifc->mif6c_pifi);
948 		if (!dev)
949 			return -EADDRNOTAVAIL;
950 		err = dev_set_allmulti(dev, 1);
951 		if (err) {
952 			dev_put(dev);
953 			return err;
954 		}
955 		break;
956 	default:
957 		return -EINVAL;
958 	}
959 
960 	in6_dev = __in6_dev_get(dev);
961 	if (in6_dev)
962 		in6_dev->cnf.mc_forwarding++;
963 
964 	/*
965 	 *	Fill in the VIF structures
966 	 */
967 	v->rate_limit = vifc->vifc_rate_limit;
968 	v->flags = vifc->mif6c_flags;
969 	if (!mrtsock)
970 		v->flags |= VIFF_STATIC;
971 	v->threshold = vifc->vifc_threshold;
972 	v->bytes_in = 0;
973 	v->bytes_out = 0;
974 	v->pkt_in = 0;
975 	v->pkt_out = 0;
976 	v->link = dev->ifindex;
977 	if (v->flags & MIFF_REGISTER)
978 		v->link = dev->iflink;
979 
980 	/* And finish update writing critical data */
981 	write_lock_bh(&mrt_lock);
982 	v->dev = dev;
983 #ifdef CONFIG_IPV6_PIMSM_V2
984 	if (v->flags & MIFF_REGISTER)
985 		mrt->mroute_reg_vif_num = vifi;
986 #endif
987 	if (vifi + 1 > mrt->maxvif)
988 		mrt->maxvif = vifi + 1;
989 	write_unlock_bh(&mrt_lock);
990 	return 0;
991 }
992 
993 static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt,
994 					   const struct in6_addr *origin,
995 					   const struct in6_addr *mcastgrp)
996 {
997 	int line = MFC6_HASH(mcastgrp, origin);
998 	struct mfc6_cache *c;
999 
1000 	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
1001 		if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
1002 		    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
1003 			return c;
1004 	}
1005 	return NULL;
1006 }
1007 
1008 /*
1009  *	Allocate a multicast cache entry
1010  */
1011 static struct mfc6_cache *ip6mr_cache_alloc(void)
1012 {
1013 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
1014 	if (c == NULL)
1015 		return NULL;
1016 	c->mfc_un.res.minvif = MAXMIFS;
1017 	return c;
1018 }
1019 
1020 static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
1021 {
1022 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
1023 	if (c == NULL)
1024 		return NULL;
1025 	skb_queue_head_init(&c->mfc_un.unres.unresolved);
1026 	c->mfc_un.unres.expires = jiffies + 10 * HZ;
1027 	return c;
1028 }
1029 
1030 /*
1031  *	A cache entry has gone into a resolved state from queued
1032  */
1033 
1034 static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
1035 				struct mfc6_cache *uc, struct mfc6_cache *c)
1036 {
1037 	struct sk_buff *skb;
1038 
1039 	/*
1040 	 *	Play the pending entries through our router
1041 	 */
1042 
1043 	while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
1044 		if (ipv6_hdr(skb)->version == 0) {
1045 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
1046 
1047 			if (__ip6mr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
1048 				nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1049 			} else {
1050 				nlh->nlmsg_type = NLMSG_ERROR;
1051 				nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
1052 				skb_trim(skb, nlh->nlmsg_len);
1053 				((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE;
1054 			}
1055 			rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
1056 		} else
1057 			ip6_mr_forward(net, mrt, skb, c);
1058 	}
1059 }
1060 
1061 /*
1062  *	Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
1063  *	expects the following bizarre scheme.
1064  *
1065  *	Called under mrt_lock.
1066  */
1067 
1068 static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
1069 			      mifi_t mifi, int assert)
1070 {
1071 	struct sk_buff *skb;
1072 	struct mrt6msg *msg;
1073 	int ret;
1074 
1075 #ifdef CONFIG_IPV6_PIMSM_V2
1076 	if (assert == MRT6MSG_WHOLEPKT)
1077 		skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
1078 						+sizeof(*msg));
1079 	else
1080 #endif
1081 		skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
1082 
1083 	if (!skb)
1084 		return -ENOBUFS;
1085 
1086 	/* I suppose that internal messages
1087 	 * do not require checksums */
1088 
1089 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1090 
1091 #ifdef CONFIG_IPV6_PIMSM_V2
1092 	if (assert == MRT6MSG_WHOLEPKT) {
1093 		/* Ugly, but we have no choice with this interface.
1094 		   Duplicate old header, fix length etc.
1095 		   And all this only to mangle msg->im6_msgtype and
1096 		   to set msg->im6_mbz to "mbz" :-)
1097 		 */
1098 		skb_push(skb, -skb_network_offset(pkt));
1099 
1100 		skb_push(skb, sizeof(*msg));
1101 		skb_reset_transport_header(skb);
1102 		msg = (struct mrt6msg *)skb_transport_header(skb);
1103 		msg->im6_mbz = 0;
1104 		msg->im6_msgtype = MRT6MSG_WHOLEPKT;
1105 		msg->im6_mif = mrt->mroute_reg_vif_num;
1106 		msg->im6_pad = 0;
1107 		msg->im6_src = ipv6_hdr(pkt)->saddr;
1108 		msg->im6_dst = ipv6_hdr(pkt)->daddr;
1109 
1110 		skb->ip_summed = CHECKSUM_UNNECESSARY;
1111 	} else
1112 #endif
1113 	{
1114 	/*
1115 	 *	Copy the IP header
1116 	 */
1117 
1118 	skb_put(skb, sizeof(struct ipv6hdr));
1119 	skb_reset_network_header(skb);
1120 	skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
1121 
1122 	/*
1123 	 *	Add our header
1124 	 */
1125 	skb_put(skb, sizeof(*msg));
1126 	skb_reset_transport_header(skb);
1127 	msg = (struct mrt6msg *)skb_transport_header(skb);
1128 
1129 	msg->im6_mbz = 0;
1130 	msg->im6_msgtype = assert;
1131 	msg->im6_mif = mifi;
1132 	msg->im6_pad = 0;
1133 	msg->im6_src = ipv6_hdr(pkt)->saddr;
1134 	msg->im6_dst = ipv6_hdr(pkt)->daddr;
1135 
1136 	skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1137 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1138 	}
1139 
1140 	if (mrt->mroute6_sk == NULL) {
1141 		kfree_skb(skb);
1142 		return -EINVAL;
1143 	}
1144 
1145 	/*
1146 	 *	Deliver to user space multicast routing algorithms
1147 	 */
1148 	ret = sock_queue_rcv_skb(mrt->mroute6_sk, skb);
1149 	if (ret < 0) {
1150 		if (net_ratelimit())
1151 			printk(KERN_WARNING "mroute6: pending queue full, dropping entries.\n");
1152 		kfree_skb(skb);
1153 	}
1154 
1155 	return ret;
1156 }
1157 
1158 /*
1159  *	Queue a packet for resolution. It gets locked cache entry!
1160  */
1161 
1162 static int
1163 ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
1164 {
1165 	bool found = false;
1166 	int err;
1167 	struct mfc6_cache *c;
1168 
1169 	spin_lock_bh(&mfc_unres_lock);
1170 	list_for_each_entry(c, &mrt->mfc6_unres_queue, list) {
1171 		if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1172 		    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
1173 			found = true;
1174 			break;
1175 		}
1176 	}
1177 
1178 	if (!found) {
1179 		/*
1180 		 *	Create a new entry if allowable
1181 		 */
1182 
1183 		if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
1184 		    (c = ip6mr_cache_alloc_unres()) == NULL) {
1185 			spin_unlock_bh(&mfc_unres_lock);
1186 
1187 			kfree_skb(skb);
1188 			return -ENOBUFS;
1189 		}
1190 
1191 		/*
1192 		 *	Fill in the new cache entry
1193 		 */
1194 		c->mf6c_parent = -1;
1195 		c->mf6c_origin = ipv6_hdr(skb)->saddr;
1196 		c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
1197 
1198 		/*
1199 		 *	Reflect first query at pim6sd
1200 		 */
1201 		err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1202 		if (err < 0) {
1203 			/* If the report failed throw the cache entry
1204 			   out - Brad Parker
1205 			 */
1206 			spin_unlock_bh(&mfc_unres_lock);
1207 
1208 			ip6mr_cache_free(c);
1209 			kfree_skb(skb);
1210 			return err;
1211 		}
1212 
1213 		atomic_inc(&mrt->cache_resolve_queue_len);
1214 		list_add(&c->list, &mrt->mfc6_unres_queue);
1215 
1216 		ipmr_do_expire_process(mrt);
1217 	}
1218 
1219 	/*
1220 	 *	See if we can append the packet
1221 	 */
1222 	if (c->mfc_un.unres.unresolved.qlen > 3) {
1223 		kfree_skb(skb);
1224 		err = -ENOBUFS;
1225 	} else {
1226 		skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1227 		err = 0;
1228 	}
1229 
1230 	spin_unlock_bh(&mfc_unres_lock);
1231 	return err;
1232 }
1233 
1234 /*
1235  *	MFC6 cache manipulation by user space
1236  */
1237 
1238 static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc)
1239 {
1240 	int line;
1241 	struct mfc6_cache *c, *next;
1242 
1243 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1244 
1245 	list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) {
1246 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1247 		    ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
1248 			write_lock_bh(&mrt_lock);
1249 			list_del(&c->list);
1250 			write_unlock_bh(&mrt_lock);
1251 
1252 			ip6mr_cache_free(c);
1253 			return 0;
1254 		}
1255 	}
1256 	return -ENOENT;
1257 }
1258 
1259 static int ip6mr_device_event(struct notifier_block *this,
1260 			      unsigned long event, void *ptr)
1261 {
1262 	struct net_device *dev = ptr;
1263 	struct net *net = dev_net(dev);
1264 	struct mr6_table *mrt;
1265 	struct mif_device *v;
1266 	int ct;
1267 	LIST_HEAD(list);
1268 
1269 	if (event != NETDEV_UNREGISTER)
1270 		return NOTIFY_DONE;
1271 
1272 	ip6mr_for_each_table(mrt, net) {
1273 		v = &mrt->vif6_table[0];
1274 		for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1275 			if (v->dev == dev)
1276 				mif6_delete(mrt, ct, &list);
1277 		}
1278 	}
1279 	unregister_netdevice_many(&list);
1280 
1281 	return NOTIFY_DONE;
1282 }
1283 
1284 static struct notifier_block ip6_mr_notifier = {
1285 	.notifier_call = ip6mr_device_event
1286 };
1287 
1288 /*
1289  *	Setup for IP multicast routing
1290  */
1291 
1292 static int __net_init ip6mr_net_init(struct net *net)
1293 {
1294 	int err;
1295 
1296 	err = ip6mr_rules_init(net);
1297 	if (err < 0)
1298 		goto fail;
1299 
1300 #ifdef CONFIG_PROC_FS
1301 	err = -ENOMEM;
1302 	if (!proc_net_fops_create(net, "ip6_mr_vif", 0, &ip6mr_vif_fops))
1303 		goto proc_vif_fail;
1304 	if (!proc_net_fops_create(net, "ip6_mr_cache", 0, &ip6mr_mfc_fops))
1305 		goto proc_cache_fail;
1306 #endif
1307 
1308 	return 0;
1309 
1310 #ifdef CONFIG_PROC_FS
1311 proc_cache_fail:
1312 	proc_net_remove(net, "ip6_mr_vif");
1313 proc_vif_fail:
1314 	ip6mr_rules_exit(net);
1315 #endif
1316 fail:
1317 	return err;
1318 }
1319 
1320 static void __net_exit ip6mr_net_exit(struct net *net)
1321 {
1322 #ifdef CONFIG_PROC_FS
1323 	proc_net_remove(net, "ip6_mr_cache");
1324 	proc_net_remove(net, "ip6_mr_vif");
1325 #endif
1326 	ip6mr_rules_exit(net);
1327 }
1328 
1329 static struct pernet_operations ip6mr_net_ops = {
1330 	.init = ip6mr_net_init,
1331 	.exit = ip6mr_net_exit,
1332 };
1333 
1334 int __init ip6_mr_init(void)
1335 {
1336 	int err;
1337 
1338 	mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1339 				       sizeof(struct mfc6_cache),
1340 				       0, SLAB_HWCACHE_ALIGN,
1341 				       NULL);
1342 	if (!mrt_cachep)
1343 		return -ENOMEM;
1344 
1345 	err = register_pernet_subsys(&ip6mr_net_ops);
1346 	if (err)
1347 		goto reg_pernet_fail;
1348 
1349 	err = register_netdevice_notifier(&ip6_mr_notifier);
1350 	if (err)
1351 		goto reg_notif_fail;
1352 #ifdef CONFIG_IPV6_PIMSM_V2
1353 	if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1354 		printk(KERN_ERR "ip6_mr_init: can't add PIM protocol\n");
1355 		err = -EAGAIN;
1356 		goto add_proto_fail;
1357 	}
1358 #endif
1359 	rtnl_register(RTNL_FAMILY_IP6MR, RTM_GETROUTE, NULL,
1360 		      ip6mr_rtm_dumproute, NULL);
1361 	return 0;
1362 #ifdef CONFIG_IPV6_PIMSM_V2
1363 add_proto_fail:
1364 	unregister_netdevice_notifier(&ip6_mr_notifier);
1365 #endif
1366 reg_notif_fail:
1367 	unregister_pernet_subsys(&ip6mr_net_ops);
1368 reg_pernet_fail:
1369 	kmem_cache_destroy(mrt_cachep);
1370 	return err;
1371 }
1372 
1373 void ip6_mr_cleanup(void)
1374 {
1375 	unregister_netdevice_notifier(&ip6_mr_notifier);
1376 	unregister_pernet_subsys(&ip6mr_net_ops);
1377 	kmem_cache_destroy(mrt_cachep);
1378 }
1379 
1380 static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
1381 			 struct mf6cctl *mfc, int mrtsock)
1382 {
1383 	bool found = false;
1384 	int line;
1385 	struct mfc6_cache *uc, *c;
1386 	unsigned char ttls[MAXMIFS];
1387 	int i;
1388 
1389 	if (mfc->mf6cc_parent >= MAXMIFS)
1390 		return -ENFILE;
1391 
1392 	memset(ttls, 255, MAXMIFS);
1393 	for (i = 0; i < MAXMIFS; i++) {
1394 		if (IF_ISSET(i, &mfc->mf6cc_ifset))
1395 			ttls[i] = 1;
1396 
1397 	}
1398 
1399 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1400 
1401 	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
1402 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1403 		    ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
1404 			found = true;
1405 			break;
1406 		}
1407 	}
1408 
1409 	if (found) {
1410 		write_lock_bh(&mrt_lock);
1411 		c->mf6c_parent = mfc->mf6cc_parent;
1412 		ip6mr_update_thresholds(mrt, c, ttls);
1413 		if (!mrtsock)
1414 			c->mfc_flags |= MFC_STATIC;
1415 		write_unlock_bh(&mrt_lock);
1416 		return 0;
1417 	}
1418 
1419 	if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1420 		return -EINVAL;
1421 
1422 	c = ip6mr_cache_alloc();
1423 	if (c == NULL)
1424 		return -ENOMEM;
1425 
1426 	c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1427 	c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1428 	c->mf6c_parent = mfc->mf6cc_parent;
1429 	ip6mr_update_thresholds(mrt, c, ttls);
1430 	if (!mrtsock)
1431 		c->mfc_flags |= MFC_STATIC;
1432 
1433 	write_lock_bh(&mrt_lock);
1434 	list_add(&c->list, &mrt->mfc6_cache_array[line]);
1435 	write_unlock_bh(&mrt_lock);
1436 
1437 	/*
1438 	 *	Check to see if we resolved a queued list. If so we
1439 	 *	need to send on the frames and tidy up.
1440 	 */
1441 	found = false;
1442 	spin_lock_bh(&mfc_unres_lock);
1443 	list_for_each_entry(uc, &mrt->mfc6_unres_queue, list) {
1444 		if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1445 		    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1446 			list_del(&uc->list);
1447 			atomic_dec(&mrt->cache_resolve_queue_len);
1448 			found = true;
1449 			break;
1450 		}
1451 	}
1452 	if (list_empty(&mrt->mfc6_unres_queue))
1453 		del_timer(&mrt->ipmr_expire_timer);
1454 	spin_unlock_bh(&mfc_unres_lock);
1455 
1456 	if (found) {
1457 		ip6mr_cache_resolve(net, mrt, uc, c);
1458 		ip6mr_cache_free(uc);
1459 	}
1460 	return 0;
1461 }
1462 
1463 /*
1464  *	Close the multicast socket, and clear the vif tables etc
1465  */
1466 
1467 static void mroute_clean_tables(struct mr6_table *mrt)
1468 {
1469 	int i;
1470 	LIST_HEAD(list);
1471 	struct mfc6_cache *c, *next;
1472 
1473 	/*
1474 	 *	Shut down all active vif entries
1475 	 */
1476 	for (i = 0; i < mrt->maxvif; i++) {
1477 		if (!(mrt->vif6_table[i].flags & VIFF_STATIC))
1478 			mif6_delete(mrt, i, &list);
1479 	}
1480 	unregister_netdevice_many(&list);
1481 
1482 	/*
1483 	 *	Wipe the cache
1484 	 */
1485 	for (i = 0; i < MFC6_LINES; i++) {
1486 		list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) {
1487 			if (c->mfc_flags & MFC_STATIC)
1488 				continue;
1489 			write_lock_bh(&mrt_lock);
1490 			list_del(&c->list);
1491 			write_unlock_bh(&mrt_lock);
1492 
1493 			ip6mr_cache_free(c);
1494 		}
1495 	}
1496 
1497 	if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1498 		spin_lock_bh(&mfc_unres_lock);
1499 		list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
1500 			list_del(&c->list);
1501 			ip6mr_destroy_unres(mrt, c);
1502 		}
1503 		spin_unlock_bh(&mfc_unres_lock);
1504 	}
1505 }
1506 
1507 static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
1508 {
1509 	int err = 0;
1510 	struct net *net = sock_net(sk);
1511 
1512 	rtnl_lock();
1513 	write_lock_bh(&mrt_lock);
1514 	if (likely(mrt->mroute6_sk == NULL)) {
1515 		mrt->mroute6_sk = sk;
1516 		net->ipv6.devconf_all->mc_forwarding++;
1517 	}
1518 	else
1519 		err = -EADDRINUSE;
1520 	write_unlock_bh(&mrt_lock);
1521 
1522 	rtnl_unlock();
1523 
1524 	return err;
1525 }
1526 
1527 int ip6mr_sk_done(struct sock *sk)
1528 {
1529 	int err = -EACCES;
1530 	struct net *net = sock_net(sk);
1531 	struct mr6_table *mrt;
1532 
1533 	rtnl_lock();
1534 	ip6mr_for_each_table(mrt, net) {
1535 		if (sk == mrt->mroute6_sk) {
1536 			write_lock_bh(&mrt_lock);
1537 			mrt->mroute6_sk = NULL;
1538 			net->ipv6.devconf_all->mc_forwarding--;
1539 			write_unlock_bh(&mrt_lock);
1540 
1541 			mroute_clean_tables(mrt);
1542 			err = 0;
1543 			break;
1544 		}
1545 	}
1546 	rtnl_unlock();
1547 
1548 	return err;
1549 }
1550 
1551 struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
1552 {
1553 	struct mr6_table *mrt;
1554 	struct flowi6 fl6 = {
1555 		.flowi6_iif	= skb->skb_iif,
1556 		.flowi6_oif	= skb->dev->ifindex,
1557 		.flowi6_mark	= skb->mark,
1558 	};
1559 
1560 	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
1561 		return NULL;
1562 
1563 	return mrt->mroute6_sk;
1564 }
1565 
1566 /*
1567  *	Socket options and virtual interface manipulation. The whole
1568  *	virtual interface system is a complete heap, but unfortunately
1569  *	that's how BSD mrouted happens to think. Maybe one day with a proper
1570  *	MOSPF/PIM router set up we can clean this up.
1571  */
1572 
1573 int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1574 {
1575 	int ret;
1576 	struct mif6ctl vif;
1577 	struct mf6cctl mfc;
1578 	mifi_t mifi;
1579 	struct net *net = sock_net(sk);
1580 	struct mr6_table *mrt;
1581 
1582 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1583 	if (mrt == NULL)
1584 		return -ENOENT;
1585 
1586 	if (optname != MRT6_INIT) {
1587 		if (sk != mrt->mroute6_sk && !capable(CAP_NET_ADMIN))
1588 			return -EACCES;
1589 	}
1590 
1591 	switch (optname) {
1592 	case MRT6_INIT:
1593 		if (sk->sk_type != SOCK_RAW ||
1594 		    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1595 			return -EOPNOTSUPP;
1596 		if (optlen < sizeof(int))
1597 			return -EINVAL;
1598 
1599 		return ip6mr_sk_init(mrt, sk);
1600 
1601 	case MRT6_DONE:
1602 		return ip6mr_sk_done(sk);
1603 
1604 	case MRT6_ADD_MIF:
1605 		if (optlen < sizeof(vif))
1606 			return -EINVAL;
1607 		if (copy_from_user(&vif, optval, sizeof(vif)))
1608 			return -EFAULT;
1609 		if (vif.mif6c_mifi >= MAXMIFS)
1610 			return -ENFILE;
1611 		rtnl_lock();
1612 		ret = mif6_add(net, mrt, &vif, sk == mrt->mroute6_sk);
1613 		rtnl_unlock();
1614 		return ret;
1615 
1616 	case MRT6_DEL_MIF:
1617 		if (optlen < sizeof(mifi_t))
1618 			return -EINVAL;
1619 		if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1620 			return -EFAULT;
1621 		rtnl_lock();
1622 		ret = mif6_delete(mrt, mifi, NULL);
1623 		rtnl_unlock();
1624 		return ret;
1625 
1626 	/*
1627 	 *	Manipulate the forwarding caches. These live
1628 	 *	in a sort of kernel/user symbiosis.
1629 	 */
1630 	case MRT6_ADD_MFC:
1631 	case MRT6_DEL_MFC:
1632 		if (optlen < sizeof(mfc))
1633 			return -EINVAL;
1634 		if (copy_from_user(&mfc, optval, sizeof(mfc)))
1635 			return -EFAULT;
1636 		rtnl_lock();
1637 		if (optname == MRT6_DEL_MFC)
1638 			ret = ip6mr_mfc_delete(mrt, &mfc);
1639 		else
1640 			ret = ip6mr_mfc_add(net, mrt, &mfc, sk == mrt->mroute6_sk);
1641 		rtnl_unlock();
1642 		return ret;
1643 
1644 	/*
1645 	 *	Control PIM assert (to activate pim will activate assert)
1646 	 */
1647 	case MRT6_ASSERT:
1648 	{
1649 		int v;
1650 		if (get_user(v, (int __user *)optval))
1651 			return -EFAULT;
1652 		mrt->mroute_do_assert = !!v;
1653 		return 0;
1654 	}
1655 
1656 #ifdef CONFIG_IPV6_PIMSM_V2
1657 	case MRT6_PIM:
1658 	{
1659 		int v;
1660 		if (get_user(v, (int __user *)optval))
1661 			return -EFAULT;
1662 		v = !!v;
1663 		rtnl_lock();
1664 		ret = 0;
1665 		if (v != mrt->mroute_do_pim) {
1666 			mrt->mroute_do_pim = v;
1667 			mrt->mroute_do_assert = v;
1668 		}
1669 		rtnl_unlock();
1670 		return ret;
1671 	}
1672 
1673 #endif
1674 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1675 	case MRT6_TABLE:
1676 	{
1677 		u32 v;
1678 
1679 		if (optlen != sizeof(u32))
1680 			return -EINVAL;
1681 		if (get_user(v, (u32 __user *)optval))
1682 			return -EFAULT;
1683 		if (sk == mrt->mroute6_sk)
1684 			return -EBUSY;
1685 
1686 		rtnl_lock();
1687 		ret = 0;
1688 		if (!ip6mr_new_table(net, v))
1689 			ret = -ENOMEM;
1690 		raw6_sk(sk)->ip6mr_table = v;
1691 		rtnl_unlock();
1692 		return ret;
1693 	}
1694 #endif
1695 	/*
1696 	 *	Spurious command, or MRT6_VERSION which you cannot
1697 	 *	set.
1698 	 */
1699 	default:
1700 		return -ENOPROTOOPT;
1701 	}
1702 }
1703 
1704 /*
1705  *	Getsock opt support for the multicast routing system.
1706  */
1707 
1708 int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1709 			  int __user *optlen)
1710 {
1711 	int olr;
1712 	int val;
1713 	struct net *net = sock_net(sk);
1714 	struct mr6_table *mrt;
1715 
1716 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1717 	if (mrt == NULL)
1718 		return -ENOENT;
1719 
1720 	switch (optname) {
1721 	case MRT6_VERSION:
1722 		val = 0x0305;
1723 		break;
1724 #ifdef CONFIG_IPV6_PIMSM_V2
1725 	case MRT6_PIM:
1726 		val = mrt->mroute_do_pim;
1727 		break;
1728 #endif
1729 	case MRT6_ASSERT:
1730 		val = mrt->mroute_do_assert;
1731 		break;
1732 	default:
1733 		return -ENOPROTOOPT;
1734 	}
1735 
1736 	if (get_user(olr, optlen))
1737 		return -EFAULT;
1738 
1739 	olr = min_t(int, olr, sizeof(int));
1740 	if (olr < 0)
1741 		return -EINVAL;
1742 
1743 	if (put_user(olr, optlen))
1744 		return -EFAULT;
1745 	if (copy_to_user(optval, &val, olr))
1746 		return -EFAULT;
1747 	return 0;
1748 }
1749 
1750 /*
1751  *	The IP multicast ioctl support routines.
1752  */
1753 
1754 int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1755 {
1756 	struct sioc_sg_req6 sr;
1757 	struct sioc_mif_req6 vr;
1758 	struct mif_device *vif;
1759 	struct mfc6_cache *c;
1760 	struct net *net = sock_net(sk);
1761 	struct mr6_table *mrt;
1762 
1763 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1764 	if (mrt == NULL)
1765 		return -ENOENT;
1766 
1767 	switch (cmd) {
1768 	case SIOCGETMIFCNT_IN6:
1769 		if (copy_from_user(&vr, arg, sizeof(vr)))
1770 			return -EFAULT;
1771 		if (vr.mifi >= mrt->maxvif)
1772 			return -EINVAL;
1773 		read_lock(&mrt_lock);
1774 		vif = &mrt->vif6_table[vr.mifi];
1775 		if (MIF_EXISTS(mrt, vr.mifi)) {
1776 			vr.icount = vif->pkt_in;
1777 			vr.ocount = vif->pkt_out;
1778 			vr.ibytes = vif->bytes_in;
1779 			vr.obytes = vif->bytes_out;
1780 			read_unlock(&mrt_lock);
1781 
1782 			if (copy_to_user(arg, &vr, sizeof(vr)))
1783 				return -EFAULT;
1784 			return 0;
1785 		}
1786 		read_unlock(&mrt_lock);
1787 		return -EADDRNOTAVAIL;
1788 	case SIOCGETSGCNT_IN6:
1789 		if (copy_from_user(&sr, arg, sizeof(sr)))
1790 			return -EFAULT;
1791 
1792 		read_lock(&mrt_lock);
1793 		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1794 		if (c) {
1795 			sr.pktcnt = c->mfc_un.res.pkt;
1796 			sr.bytecnt = c->mfc_un.res.bytes;
1797 			sr.wrong_if = c->mfc_un.res.wrong_if;
1798 			read_unlock(&mrt_lock);
1799 
1800 			if (copy_to_user(arg, &sr, sizeof(sr)))
1801 				return -EFAULT;
1802 			return 0;
1803 		}
1804 		read_unlock(&mrt_lock);
1805 		return -EADDRNOTAVAIL;
1806 	default:
1807 		return -ENOIOCTLCMD;
1808 	}
1809 }
1810 
1811 #ifdef CONFIG_COMPAT
1812 struct compat_sioc_sg_req6 {
1813 	struct sockaddr_in6 src;
1814 	struct sockaddr_in6 grp;
1815 	compat_ulong_t pktcnt;
1816 	compat_ulong_t bytecnt;
1817 	compat_ulong_t wrong_if;
1818 };
1819 
1820 struct compat_sioc_mif_req6 {
1821 	mifi_t	mifi;
1822 	compat_ulong_t icount;
1823 	compat_ulong_t ocount;
1824 	compat_ulong_t ibytes;
1825 	compat_ulong_t obytes;
1826 };
1827 
1828 int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1829 {
1830 	struct compat_sioc_sg_req6 sr;
1831 	struct compat_sioc_mif_req6 vr;
1832 	struct mif_device *vif;
1833 	struct mfc6_cache *c;
1834 	struct net *net = sock_net(sk);
1835 	struct mr6_table *mrt;
1836 
1837 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1838 	if (mrt == NULL)
1839 		return -ENOENT;
1840 
1841 	switch (cmd) {
1842 	case SIOCGETMIFCNT_IN6:
1843 		if (copy_from_user(&vr, arg, sizeof(vr)))
1844 			return -EFAULT;
1845 		if (vr.mifi >= mrt->maxvif)
1846 			return -EINVAL;
1847 		read_lock(&mrt_lock);
1848 		vif = &mrt->vif6_table[vr.mifi];
1849 		if (MIF_EXISTS(mrt, vr.mifi)) {
1850 			vr.icount = vif->pkt_in;
1851 			vr.ocount = vif->pkt_out;
1852 			vr.ibytes = vif->bytes_in;
1853 			vr.obytes = vif->bytes_out;
1854 			read_unlock(&mrt_lock);
1855 
1856 			if (copy_to_user(arg, &vr, sizeof(vr)))
1857 				return -EFAULT;
1858 			return 0;
1859 		}
1860 		read_unlock(&mrt_lock);
1861 		return -EADDRNOTAVAIL;
1862 	case SIOCGETSGCNT_IN6:
1863 		if (copy_from_user(&sr, arg, sizeof(sr)))
1864 			return -EFAULT;
1865 
1866 		read_lock(&mrt_lock);
1867 		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1868 		if (c) {
1869 			sr.pktcnt = c->mfc_un.res.pkt;
1870 			sr.bytecnt = c->mfc_un.res.bytes;
1871 			sr.wrong_if = c->mfc_un.res.wrong_if;
1872 			read_unlock(&mrt_lock);
1873 
1874 			if (copy_to_user(arg, &sr, sizeof(sr)))
1875 				return -EFAULT;
1876 			return 0;
1877 		}
1878 		read_unlock(&mrt_lock);
1879 		return -EADDRNOTAVAIL;
1880 	default:
1881 		return -ENOIOCTLCMD;
1882 	}
1883 }
1884 #endif
1885 
1886 static inline int ip6mr_forward2_finish(struct sk_buff *skb)
1887 {
1888 	IP6_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
1889 			 IPSTATS_MIB_OUTFORWDATAGRAMS);
1890 	return dst_output(skb);
1891 }
1892 
1893 /*
1894  *	Processing handlers for ip6mr_forward
1895  */
1896 
1897 static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
1898 			  struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1899 {
1900 	struct ipv6hdr *ipv6h;
1901 	struct mif_device *vif = &mrt->vif6_table[vifi];
1902 	struct net_device *dev;
1903 	struct dst_entry *dst;
1904 	struct flowi6 fl6;
1905 
1906 	if (vif->dev == NULL)
1907 		goto out_free;
1908 
1909 #ifdef CONFIG_IPV6_PIMSM_V2
1910 	if (vif->flags & MIFF_REGISTER) {
1911 		vif->pkt_out++;
1912 		vif->bytes_out += skb->len;
1913 		vif->dev->stats.tx_bytes += skb->len;
1914 		vif->dev->stats.tx_packets++;
1915 		ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
1916 		goto out_free;
1917 	}
1918 #endif
1919 
1920 	ipv6h = ipv6_hdr(skb);
1921 
1922 	fl6 = (struct flowi6) {
1923 		.flowi6_oif = vif->link,
1924 		.daddr = ipv6h->daddr,
1925 	};
1926 
1927 	dst = ip6_route_output(net, NULL, &fl6);
1928 	if (dst->error) {
1929 		dst_release(dst);
1930 		goto out_free;
1931 	}
1932 
1933 	skb_dst_drop(skb);
1934 	skb_dst_set(skb, dst);
1935 
1936 	/*
1937 	 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1938 	 * not only before forwarding, but after forwarding on all output
1939 	 * interfaces. It is clear, if mrouter runs a multicasting
1940 	 * program, it should receive packets not depending to what interface
1941 	 * program is joined.
1942 	 * If we will not make it, the program will have to join on all
1943 	 * interfaces. On the other hand, multihoming host (or router, but
1944 	 * not mrouter) cannot join to more than one interface - it will
1945 	 * result in receiving multiple packets.
1946 	 */
1947 	dev = vif->dev;
1948 	skb->dev = dev;
1949 	vif->pkt_out++;
1950 	vif->bytes_out += skb->len;
1951 
1952 	/* We are about to write */
1953 	/* XXX: extension headers? */
1954 	if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
1955 		goto out_free;
1956 
1957 	ipv6h = ipv6_hdr(skb);
1958 	ipv6h->hop_limit--;
1959 
1960 	IP6CB(skb)->flags |= IP6SKB_FORWARDED;
1961 
1962 	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dev,
1963 		       ip6mr_forward2_finish);
1964 
1965 out_free:
1966 	kfree_skb(skb);
1967 	return 0;
1968 }
1969 
1970 static int ip6mr_find_vif(struct mr6_table *mrt, struct net_device *dev)
1971 {
1972 	int ct;
1973 
1974 	for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
1975 		if (mrt->vif6_table[ct].dev == dev)
1976 			break;
1977 	}
1978 	return ct;
1979 }
1980 
1981 static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
1982 			  struct sk_buff *skb, struct mfc6_cache *cache)
1983 {
1984 	int psend = -1;
1985 	int vif, ct;
1986 
1987 	vif = cache->mf6c_parent;
1988 	cache->mfc_un.res.pkt++;
1989 	cache->mfc_un.res.bytes += skb->len;
1990 
1991 	/*
1992 	 * Wrong interface: drop packet and (maybe) send PIM assert.
1993 	 */
1994 	if (mrt->vif6_table[vif].dev != skb->dev) {
1995 		int true_vifi;
1996 
1997 		cache->mfc_un.res.wrong_if++;
1998 		true_vifi = ip6mr_find_vif(mrt, skb->dev);
1999 
2000 		if (true_vifi >= 0 && mrt->mroute_do_assert &&
2001 		    /* pimsm uses asserts, when switching from RPT to SPT,
2002 		       so that we cannot check that packet arrived on an oif.
2003 		       It is bad, but otherwise we would need to move pretty
2004 		       large chunk of pimd to kernel. Ough... --ANK
2005 		     */
2006 		    (mrt->mroute_do_pim ||
2007 		     cache->mfc_un.res.ttls[true_vifi] < 255) &&
2008 		    time_after(jiffies,
2009 			       cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
2010 			cache->mfc_un.res.last_assert = jiffies;
2011 			ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
2012 		}
2013 		goto dont_forward;
2014 	}
2015 
2016 	mrt->vif6_table[vif].pkt_in++;
2017 	mrt->vif6_table[vif].bytes_in += skb->len;
2018 
2019 	/*
2020 	 *	Forward the frame
2021 	 */
2022 	for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
2023 		if (ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
2024 			if (psend != -1) {
2025 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2026 				if (skb2)
2027 					ip6mr_forward2(net, mrt, skb2, cache, psend);
2028 			}
2029 			psend = ct;
2030 		}
2031 	}
2032 	if (psend != -1) {
2033 		ip6mr_forward2(net, mrt, skb, cache, psend);
2034 		return 0;
2035 	}
2036 
2037 dont_forward:
2038 	kfree_skb(skb);
2039 	return 0;
2040 }
2041 
2042 
2043 /*
2044  *	Multicast packets for forwarding arrive here
2045  */
2046 
2047 int ip6_mr_input(struct sk_buff *skb)
2048 {
2049 	struct mfc6_cache *cache;
2050 	struct net *net = dev_net(skb->dev);
2051 	struct mr6_table *mrt;
2052 	struct flowi6 fl6 = {
2053 		.flowi6_iif	= skb->dev->ifindex,
2054 		.flowi6_mark	= skb->mark,
2055 	};
2056 	int err;
2057 
2058 	err = ip6mr_fib_lookup(net, &fl6, &mrt);
2059 	if (err < 0) {
2060 		kfree_skb(skb);
2061 		return err;
2062 	}
2063 
2064 	read_lock(&mrt_lock);
2065 	cache = ip6mr_cache_find(mrt,
2066 				 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2067 
2068 	/*
2069 	 *	No usable cache entry
2070 	 */
2071 	if (cache == NULL) {
2072 		int vif;
2073 
2074 		vif = ip6mr_find_vif(mrt, skb->dev);
2075 		if (vif >= 0) {
2076 			int err = ip6mr_cache_unresolved(mrt, vif, skb);
2077 			read_unlock(&mrt_lock);
2078 
2079 			return err;
2080 		}
2081 		read_unlock(&mrt_lock);
2082 		kfree_skb(skb);
2083 		return -ENODEV;
2084 	}
2085 
2086 	ip6_mr_forward(net, mrt, skb, cache);
2087 
2088 	read_unlock(&mrt_lock);
2089 
2090 	return 0;
2091 }
2092 
2093 
2094 static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2095 			       struct mfc6_cache *c, struct rtmsg *rtm)
2096 {
2097 	int ct;
2098 	struct rtnexthop *nhp;
2099 	u8 *b = skb_tail_pointer(skb);
2100 	struct rtattr *mp_head;
2101 
2102 	/* If cache is unresolved, don't try to parse IIF and OIF */
2103 	if (c->mf6c_parent >= MAXMIFS)
2104 		return -ENOENT;
2105 
2106 	if (MIF_EXISTS(mrt, c->mf6c_parent))
2107 		RTA_PUT(skb, RTA_IIF, 4, &mrt->vif6_table[c->mf6c_parent].dev->ifindex);
2108 
2109 	mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
2110 
2111 	for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
2112 		if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
2113 			if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
2114 				goto rtattr_failure;
2115 			nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
2116 			nhp->rtnh_flags = 0;
2117 			nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
2118 			nhp->rtnh_ifindex = mrt->vif6_table[ct].dev->ifindex;
2119 			nhp->rtnh_len = sizeof(*nhp);
2120 		}
2121 	}
2122 	mp_head->rta_type = RTA_MULTIPATH;
2123 	mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
2124 	rtm->rtm_type = RTN_MULTICAST;
2125 	return 1;
2126 
2127 rtattr_failure:
2128 	nlmsg_trim(skb, b);
2129 	return -EMSGSIZE;
2130 }
2131 
2132 int ip6mr_get_route(struct net *net,
2133 		    struct sk_buff *skb, struct rtmsg *rtm, int nowait)
2134 {
2135 	int err;
2136 	struct mr6_table *mrt;
2137 	struct mfc6_cache *cache;
2138 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
2139 
2140 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
2141 	if (mrt == NULL)
2142 		return -ENOENT;
2143 
2144 	read_lock(&mrt_lock);
2145 	cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
2146 
2147 	if (!cache) {
2148 		struct sk_buff *skb2;
2149 		struct ipv6hdr *iph;
2150 		struct net_device *dev;
2151 		int vif;
2152 
2153 		if (nowait) {
2154 			read_unlock(&mrt_lock);
2155 			return -EAGAIN;
2156 		}
2157 
2158 		dev = skb->dev;
2159 		if (dev == NULL || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2160 			read_unlock(&mrt_lock);
2161 			return -ENODEV;
2162 		}
2163 
2164 		/* really correct? */
2165 		skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
2166 		if (!skb2) {
2167 			read_unlock(&mrt_lock);
2168 			return -ENOMEM;
2169 		}
2170 
2171 		skb_reset_transport_header(skb2);
2172 
2173 		skb_put(skb2, sizeof(struct ipv6hdr));
2174 		skb_reset_network_header(skb2);
2175 
2176 		iph = ipv6_hdr(skb2);
2177 		iph->version = 0;
2178 		iph->priority = 0;
2179 		iph->flow_lbl[0] = 0;
2180 		iph->flow_lbl[1] = 0;
2181 		iph->flow_lbl[2] = 0;
2182 		iph->payload_len = 0;
2183 		iph->nexthdr = IPPROTO_NONE;
2184 		iph->hop_limit = 0;
2185 		iph->saddr = rt->rt6i_src.addr;
2186 		iph->daddr = rt->rt6i_dst.addr;
2187 
2188 		err = ip6mr_cache_unresolved(mrt, vif, skb2);
2189 		read_unlock(&mrt_lock);
2190 
2191 		return err;
2192 	}
2193 
2194 	if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
2195 		cache->mfc_flags |= MFC_NOTIFY;
2196 
2197 	err = __ip6mr_fill_mroute(mrt, skb, cache, rtm);
2198 	read_unlock(&mrt_lock);
2199 	return err;
2200 }
2201 
2202 static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2203 			     u32 pid, u32 seq, struct mfc6_cache *c)
2204 {
2205 	struct nlmsghdr *nlh;
2206 	struct rtmsg *rtm;
2207 
2208 	nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI);
2209 	if (nlh == NULL)
2210 		return -EMSGSIZE;
2211 
2212 	rtm = nlmsg_data(nlh);
2213 	rtm->rtm_family   = RTNL_FAMILY_IPMR;
2214 	rtm->rtm_dst_len  = 128;
2215 	rtm->rtm_src_len  = 128;
2216 	rtm->rtm_tos      = 0;
2217 	rtm->rtm_table    = mrt->id;
2218 	NLA_PUT_U32(skb, RTA_TABLE, mrt->id);
2219 	rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
2220 	rtm->rtm_protocol = RTPROT_UNSPEC;
2221 	rtm->rtm_flags    = 0;
2222 
2223 	NLA_PUT(skb, RTA_SRC, 16, &c->mf6c_origin);
2224 	NLA_PUT(skb, RTA_DST, 16, &c->mf6c_mcastgrp);
2225 
2226 	if (__ip6mr_fill_mroute(mrt, skb, c, rtm) < 0)
2227 		goto nla_put_failure;
2228 
2229 	return nlmsg_end(skb, nlh);
2230 
2231 nla_put_failure:
2232 	nlmsg_cancel(skb, nlh);
2233 	return -EMSGSIZE;
2234 }
2235 
2236 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2237 {
2238 	struct net *net = sock_net(skb->sk);
2239 	struct mr6_table *mrt;
2240 	struct mfc6_cache *mfc;
2241 	unsigned int t = 0, s_t;
2242 	unsigned int h = 0, s_h;
2243 	unsigned int e = 0, s_e;
2244 
2245 	s_t = cb->args[0];
2246 	s_h = cb->args[1];
2247 	s_e = cb->args[2];
2248 
2249 	read_lock(&mrt_lock);
2250 	ip6mr_for_each_table(mrt, net) {
2251 		if (t < s_t)
2252 			goto next_table;
2253 		if (t > s_t)
2254 			s_h = 0;
2255 		for (h = s_h; h < MFC6_LINES; h++) {
2256 			list_for_each_entry(mfc, &mrt->mfc6_cache_array[h], list) {
2257 				if (e < s_e)
2258 					goto next_entry;
2259 				if (ip6mr_fill_mroute(mrt, skb,
2260 						      NETLINK_CB(cb->skb).pid,
2261 						      cb->nlh->nlmsg_seq,
2262 						      mfc) < 0)
2263 					goto done;
2264 next_entry:
2265 				e++;
2266 			}
2267 			e = s_e = 0;
2268 		}
2269 		s_h = 0;
2270 next_table:
2271 		t++;
2272 	}
2273 done:
2274 	read_unlock(&mrt_lock);
2275 
2276 	cb->args[2] = e;
2277 	cb->args[1] = h;
2278 	cb->args[0] = t;
2279 
2280 	return skb->len;
2281 }
2282