xref: /openbmc/linux/net/ipv6/ip6mr.c (revision 8ff374b9)
1 /*
2  *	Linux IPv6 multicast routing support for BSD pim6sd
3  *	Based on net/ipv4/ipmr.c.
4  *
5  *	(c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
6  *		LSIIT Laboratory, Strasbourg, France
7  *	(c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
8  *		6WIND, Paris, France
9  *	Copyright (C)2007,2008 USAGI/WIDE Project
10  *		YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
11  *
12  *	This program is free software; you can redistribute it and/or
13  *	modify it under the terms of the GNU General Public License
14  *	as published by the Free Software Foundation; either version
15  *	2 of the License, or (at your option) any later version.
16  *
17  */
18 
19 #include <asm/uaccess.h>
20 #include <linux/types.h>
21 #include <linux/sched.h>
22 #include <linux/errno.h>
23 #include <linux/timer.h>
24 #include <linux/mm.h>
25 #include <linux/kernel.h>
26 #include <linux/fcntl.h>
27 #include <linux/stat.h>
28 #include <linux/socket.h>
29 #include <linux/inet.h>
30 #include <linux/netdevice.h>
31 #include <linux/inetdevice.h>
32 #include <linux/proc_fs.h>
33 #include <linux/seq_file.h>
34 #include <linux/init.h>
35 #include <linux/slab.h>
36 #include <linux/compat.h>
37 #include <net/protocol.h>
38 #include <linux/skbuff.h>
39 #include <net/sock.h>
40 #include <net/raw.h>
41 #include <linux/notifier.h>
42 #include <linux/if_arp.h>
43 #include <net/checksum.h>
44 #include <net/netlink.h>
45 #include <net/fib_rules.h>
46 
47 #include <net/ipv6.h>
48 #include <net/ip6_route.h>
49 #include <linux/mroute6.h>
50 #include <linux/pim.h>
51 #include <net/addrconf.h>
52 #include <linux/netfilter_ipv6.h>
53 #include <linux/export.h>
54 #include <net/ip6_checksum.h>
55 #include <linux/netconf.h>
56 
57 struct mr6_table {
58 	struct list_head	list;
59 #ifdef CONFIG_NET_NS
60 	struct net		*net;
61 #endif
62 	u32			id;
63 	struct sock		*mroute6_sk;
64 	struct timer_list	ipmr_expire_timer;
65 	struct list_head	mfc6_unres_queue;
66 	struct list_head	mfc6_cache_array[MFC6_LINES];
67 	struct mif_device	vif6_table[MAXMIFS];
68 	int			maxvif;
69 	atomic_t		cache_resolve_queue_len;
70 	bool			mroute_do_assert;
71 	bool			mroute_do_pim;
72 #ifdef CONFIG_IPV6_PIMSM_V2
73 	int			mroute_reg_vif_num;
74 #endif
75 };
76 
77 struct ip6mr_rule {
78 	struct fib_rule		common;
79 };
80 
81 struct ip6mr_result {
82 	struct mr6_table	*mrt;
83 };
84 
85 /* Big lock, protecting vif table, mrt cache and mroute socket state.
86    Note that the changes are semaphored via rtnl_lock.
87  */
88 
89 static DEFINE_RWLOCK(mrt_lock);
90 
91 /*
92  *	Multicast router control variables
93  */
94 
95 #define MIF_EXISTS(_mrt, _idx) ((_mrt)->vif6_table[_idx].dev != NULL)
96 
97 /* Special spinlock for queue of unresolved entries */
98 static DEFINE_SPINLOCK(mfc_unres_lock);
99 
100 /* We return to original Alan's scheme. Hash table of resolved
101    entries is changed only in process context and protected
102    with weak lock mrt_lock. Queue of unresolved entries is protected
103    with strong spinlock mfc_unres_lock.
104 
105    In this case data path is free of exclusive locks at all.
106  */
107 
108 static struct kmem_cache *mrt_cachep __read_mostly;
109 
110 static struct mr6_table *ip6mr_new_table(struct net *net, u32 id);
111 static void ip6mr_free_table(struct mr6_table *mrt);
112 
113 static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
114 			   struct sk_buff *skb, struct mfc6_cache *cache);
115 static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
116 			      mifi_t mifi, int assert);
117 static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
118 			       struct mfc6_cache *c, struct rtmsg *rtm);
119 static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
120 			      int cmd);
121 static int ip6mr_rtm_dumproute(struct sk_buff *skb,
122 			       struct netlink_callback *cb);
123 static void mroute_clean_tables(struct mr6_table *mrt);
124 static void ipmr_expire_process(unsigned long arg);
125 
126 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
127 #define ip6mr_for_each_table(mrt, net) \
128 	list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
129 
130 static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
131 {
132 	struct mr6_table *mrt;
133 
134 	ip6mr_for_each_table(mrt, net) {
135 		if (mrt->id == id)
136 			return mrt;
137 	}
138 	return NULL;
139 }
140 
141 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
142 			    struct mr6_table **mrt)
143 {
144 	struct ip6mr_result res;
145 	struct fib_lookup_arg arg = { .result = &res, };
146 	int err;
147 
148 	err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
149 			       flowi6_to_flowi(flp6), 0, &arg);
150 	if (err < 0)
151 		return err;
152 	*mrt = res.mrt;
153 	return 0;
154 }
155 
156 static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
157 			     int flags, struct fib_lookup_arg *arg)
158 {
159 	struct ip6mr_result *res = arg->result;
160 	struct mr6_table *mrt;
161 
162 	switch (rule->action) {
163 	case FR_ACT_TO_TBL:
164 		break;
165 	case FR_ACT_UNREACHABLE:
166 		return -ENETUNREACH;
167 	case FR_ACT_PROHIBIT:
168 		return -EACCES;
169 	case FR_ACT_BLACKHOLE:
170 	default:
171 		return -EINVAL;
172 	}
173 
174 	mrt = ip6mr_get_table(rule->fr_net, rule->table);
175 	if (mrt == NULL)
176 		return -EAGAIN;
177 	res->mrt = mrt;
178 	return 0;
179 }
180 
181 static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
182 {
183 	return 1;
184 }
185 
186 static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = {
187 	FRA_GENERIC_POLICY,
188 };
189 
190 static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
191 				struct fib_rule_hdr *frh, struct nlattr **tb)
192 {
193 	return 0;
194 }
195 
196 static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
197 			      struct nlattr **tb)
198 {
199 	return 1;
200 }
201 
202 static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
203 			   struct fib_rule_hdr *frh)
204 {
205 	frh->dst_len = 0;
206 	frh->src_len = 0;
207 	frh->tos     = 0;
208 	return 0;
209 }
210 
211 static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
212 	.family		= RTNL_FAMILY_IP6MR,
213 	.rule_size	= sizeof(struct ip6mr_rule),
214 	.addr_size	= sizeof(struct in6_addr),
215 	.action		= ip6mr_rule_action,
216 	.match		= ip6mr_rule_match,
217 	.configure	= ip6mr_rule_configure,
218 	.compare	= ip6mr_rule_compare,
219 	.default_pref	= fib_default_rule_pref,
220 	.fill		= ip6mr_rule_fill,
221 	.nlgroup	= RTNLGRP_IPV6_RULE,
222 	.policy		= ip6mr_rule_policy,
223 	.owner		= THIS_MODULE,
224 };
225 
226 static int __net_init ip6mr_rules_init(struct net *net)
227 {
228 	struct fib_rules_ops *ops;
229 	struct mr6_table *mrt;
230 	int err;
231 
232 	ops = fib_rules_register(&ip6mr_rules_ops_template, net);
233 	if (IS_ERR(ops))
234 		return PTR_ERR(ops);
235 
236 	INIT_LIST_HEAD(&net->ipv6.mr6_tables);
237 
238 	mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
239 	if (mrt == NULL) {
240 		err = -ENOMEM;
241 		goto err1;
242 	}
243 
244 	err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
245 	if (err < 0)
246 		goto err2;
247 
248 	net->ipv6.mr6_rules_ops = ops;
249 	return 0;
250 
251 err2:
252 	kfree(mrt);
253 err1:
254 	fib_rules_unregister(ops);
255 	return err;
256 }
257 
258 static void __net_exit ip6mr_rules_exit(struct net *net)
259 {
260 	struct mr6_table *mrt, *next;
261 
262 	rtnl_lock();
263 	list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
264 		list_del(&mrt->list);
265 		ip6mr_free_table(mrt);
266 	}
267 	rtnl_unlock();
268 	fib_rules_unregister(net->ipv6.mr6_rules_ops);
269 }
270 #else
271 #define ip6mr_for_each_table(mrt, net) \
272 	for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
273 
274 static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
275 {
276 	return net->ipv6.mrt6;
277 }
278 
279 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
280 			    struct mr6_table **mrt)
281 {
282 	*mrt = net->ipv6.mrt6;
283 	return 0;
284 }
285 
286 static int __net_init ip6mr_rules_init(struct net *net)
287 {
288 	net->ipv6.mrt6 = ip6mr_new_table(net, RT6_TABLE_DFLT);
289 	return net->ipv6.mrt6 ? 0 : -ENOMEM;
290 }
291 
292 static void __net_exit ip6mr_rules_exit(struct net *net)
293 {
294 	rtnl_lock();
295 	ip6mr_free_table(net->ipv6.mrt6);
296 	net->ipv6.mrt6 = NULL;
297 	rtnl_unlock();
298 }
299 #endif
300 
301 static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
302 {
303 	struct mr6_table *mrt;
304 	unsigned int i;
305 
306 	mrt = ip6mr_get_table(net, id);
307 	if (mrt != NULL)
308 		return mrt;
309 
310 	mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
311 	if (mrt == NULL)
312 		return NULL;
313 	mrt->id = id;
314 	write_pnet(&mrt->net, net);
315 
316 	/* Forwarding cache */
317 	for (i = 0; i < MFC6_LINES; i++)
318 		INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]);
319 
320 	INIT_LIST_HEAD(&mrt->mfc6_unres_queue);
321 
322 	setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
323 		    (unsigned long)mrt);
324 
325 #ifdef CONFIG_IPV6_PIMSM_V2
326 	mrt->mroute_reg_vif_num = -1;
327 #endif
328 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
329 	list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
330 #endif
331 	return mrt;
332 }
333 
334 static void ip6mr_free_table(struct mr6_table *mrt)
335 {
336 	del_timer(&mrt->ipmr_expire_timer);
337 	mroute_clean_tables(mrt);
338 	kfree(mrt);
339 }
340 
341 #ifdef CONFIG_PROC_FS
342 
343 struct ipmr_mfc_iter {
344 	struct seq_net_private p;
345 	struct mr6_table *mrt;
346 	struct list_head *cache;
347 	int ct;
348 };
349 
350 
351 static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
352 					   struct ipmr_mfc_iter *it, loff_t pos)
353 {
354 	struct mr6_table *mrt = it->mrt;
355 	struct mfc6_cache *mfc;
356 
357 	read_lock(&mrt_lock);
358 	for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) {
359 		it->cache = &mrt->mfc6_cache_array[it->ct];
360 		list_for_each_entry(mfc, it->cache, list)
361 			if (pos-- == 0)
362 				return mfc;
363 	}
364 	read_unlock(&mrt_lock);
365 
366 	spin_lock_bh(&mfc_unres_lock);
367 	it->cache = &mrt->mfc6_unres_queue;
368 	list_for_each_entry(mfc, it->cache, list)
369 		if (pos-- == 0)
370 			return mfc;
371 	spin_unlock_bh(&mfc_unres_lock);
372 
373 	it->cache = NULL;
374 	return NULL;
375 }
376 
377 /*
378  *	The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
379  */
380 
381 struct ipmr_vif_iter {
382 	struct seq_net_private p;
383 	struct mr6_table *mrt;
384 	int ct;
385 };
386 
387 static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
388 					    struct ipmr_vif_iter *iter,
389 					    loff_t pos)
390 {
391 	struct mr6_table *mrt = iter->mrt;
392 
393 	for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
394 		if (!MIF_EXISTS(mrt, iter->ct))
395 			continue;
396 		if (pos-- == 0)
397 			return &mrt->vif6_table[iter->ct];
398 	}
399 	return NULL;
400 }
401 
402 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
403 	__acquires(mrt_lock)
404 {
405 	struct ipmr_vif_iter *iter = seq->private;
406 	struct net *net = seq_file_net(seq);
407 	struct mr6_table *mrt;
408 
409 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
410 	if (mrt == NULL)
411 		return ERR_PTR(-ENOENT);
412 
413 	iter->mrt = mrt;
414 
415 	read_lock(&mrt_lock);
416 	return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1)
417 		: SEQ_START_TOKEN;
418 }
419 
420 static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
421 {
422 	struct ipmr_vif_iter *iter = seq->private;
423 	struct net *net = seq_file_net(seq);
424 	struct mr6_table *mrt = iter->mrt;
425 
426 	++*pos;
427 	if (v == SEQ_START_TOKEN)
428 		return ip6mr_vif_seq_idx(net, iter, 0);
429 
430 	while (++iter->ct < mrt->maxvif) {
431 		if (!MIF_EXISTS(mrt, iter->ct))
432 			continue;
433 		return &mrt->vif6_table[iter->ct];
434 	}
435 	return NULL;
436 }
437 
438 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
439 	__releases(mrt_lock)
440 {
441 	read_unlock(&mrt_lock);
442 }
443 
444 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
445 {
446 	struct ipmr_vif_iter *iter = seq->private;
447 	struct mr6_table *mrt = iter->mrt;
448 
449 	if (v == SEQ_START_TOKEN) {
450 		seq_puts(seq,
451 			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
452 	} else {
453 		const struct mif_device *vif = v;
454 		const char *name = vif->dev ? vif->dev->name : "none";
455 
456 		seq_printf(seq,
457 			   "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
458 			   vif - mrt->vif6_table,
459 			   name, vif->bytes_in, vif->pkt_in,
460 			   vif->bytes_out, vif->pkt_out,
461 			   vif->flags);
462 	}
463 	return 0;
464 }
465 
466 static const struct seq_operations ip6mr_vif_seq_ops = {
467 	.start = ip6mr_vif_seq_start,
468 	.next  = ip6mr_vif_seq_next,
469 	.stop  = ip6mr_vif_seq_stop,
470 	.show  = ip6mr_vif_seq_show,
471 };
472 
473 static int ip6mr_vif_open(struct inode *inode, struct file *file)
474 {
475 	return seq_open_net(inode, file, &ip6mr_vif_seq_ops,
476 			    sizeof(struct ipmr_vif_iter));
477 }
478 
479 static const struct file_operations ip6mr_vif_fops = {
480 	.owner	 = THIS_MODULE,
481 	.open    = ip6mr_vif_open,
482 	.read    = seq_read,
483 	.llseek  = seq_lseek,
484 	.release = seq_release_net,
485 };
486 
487 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
488 {
489 	struct ipmr_mfc_iter *it = seq->private;
490 	struct net *net = seq_file_net(seq);
491 	struct mr6_table *mrt;
492 
493 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
494 	if (mrt == NULL)
495 		return ERR_PTR(-ENOENT);
496 
497 	it->mrt = mrt;
498 	return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
499 		: SEQ_START_TOKEN;
500 }
501 
502 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
503 {
504 	struct mfc6_cache *mfc = v;
505 	struct ipmr_mfc_iter *it = seq->private;
506 	struct net *net = seq_file_net(seq);
507 	struct mr6_table *mrt = it->mrt;
508 
509 	++*pos;
510 
511 	if (v == SEQ_START_TOKEN)
512 		return ipmr_mfc_seq_idx(net, seq->private, 0);
513 
514 	if (mfc->list.next != it->cache)
515 		return list_entry(mfc->list.next, struct mfc6_cache, list);
516 
517 	if (it->cache == &mrt->mfc6_unres_queue)
518 		goto end_of_list;
519 
520 	BUG_ON(it->cache != &mrt->mfc6_cache_array[it->ct]);
521 
522 	while (++it->ct < MFC6_LINES) {
523 		it->cache = &mrt->mfc6_cache_array[it->ct];
524 		if (list_empty(it->cache))
525 			continue;
526 		return list_first_entry(it->cache, struct mfc6_cache, list);
527 	}
528 
529 	/* exhausted cache_array, show unresolved */
530 	read_unlock(&mrt_lock);
531 	it->cache = &mrt->mfc6_unres_queue;
532 	it->ct = 0;
533 
534 	spin_lock_bh(&mfc_unres_lock);
535 	if (!list_empty(it->cache))
536 		return list_first_entry(it->cache, struct mfc6_cache, list);
537 
538  end_of_list:
539 	spin_unlock_bh(&mfc_unres_lock);
540 	it->cache = NULL;
541 
542 	return NULL;
543 }
544 
545 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
546 {
547 	struct ipmr_mfc_iter *it = seq->private;
548 	struct mr6_table *mrt = it->mrt;
549 
550 	if (it->cache == &mrt->mfc6_unres_queue)
551 		spin_unlock_bh(&mfc_unres_lock);
552 	else if (it->cache == mrt->mfc6_cache_array)
553 		read_unlock(&mrt_lock);
554 }
555 
556 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
557 {
558 	int n;
559 
560 	if (v == SEQ_START_TOKEN) {
561 		seq_puts(seq,
562 			 "Group                            "
563 			 "Origin                           "
564 			 "Iif      Pkts  Bytes     Wrong  Oifs\n");
565 	} else {
566 		const struct mfc6_cache *mfc = v;
567 		const struct ipmr_mfc_iter *it = seq->private;
568 		struct mr6_table *mrt = it->mrt;
569 
570 		seq_printf(seq, "%pI6 %pI6 %-3hd",
571 			   &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
572 			   mfc->mf6c_parent);
573 
574 		if (it->cache != &mrt->mfc6_unres_queue) {
575 			seq_printf(seq, " %8lu %8lu %8lu",
576 				   mfc->mfc_un.res.pkt,
577 				   mfc->mfc_un.res.bytes,
578 				   mfc->mfc_un.res.wrong_if);
579 			for (n = mfc->mfc_un.res.minvif;
580 			     n < mfc->mfc_un.res.maxvif; n++) {
581 				if (MIF_EXISTS(mrt, n) &&
582 				    mfc->mfc_un.res.ttls[n] < 255)
583 					seq_printf(seq,
584 						   " %2d:%-3d",
585 						   n, mfc->mfc_un.res.ttls[n]);
586 			}
587 		} else {
588 			/* unresolved mfc_caches don't contain
589 			 * pkt, bytes and wrong_if values
590 			 */
591 			seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
592 		}
593 		seq_putc(seq, '\n');
594 	}
595 	return 0;
596 }
597 
598 static const struct seq_operations ipmr_mfc_seq_ops = {
599 	.start = ipmr_mfc_seq_start,
600 	.next  = ipmr_mfc_seq_next,
601 	.stop  = ipmr_mfc_seq_stop,
602 	.show  = ipmr_mfc_seq_show,
603 };
604 
605 static int ipmr_mfc_open(struct inode *inode, struct file *file)
606 {
607 	return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
608 			    sizeof(struct ipmr_mfc_iter));
609 }
610 
611 static const struct file_operations ip6mr_mfc_fops = {
612 	.owner	 = THIS_MODULE,
613 	.open    = ipmr_mfc_open,
614 	.read    = seq_read,
615 	.llseek  = seq_lseek,
616 	.release = seq_release_net,
617 };
618 #endif
619 
620 #ifdef CONFIG_IPV6_PIMSM_V2
621 
622 static int pim6_rcv(struct sk_buff *skb)
623 {
624 	struct pimreghdr *pim;
625 	struct ipv6hdr   *encap;
626 	struct net_device  *reg_dev = NULL;
627 	struct net *net = dev_net(skb->dev);
628 	struct mr6_table *mrt;
629 	struct flowi6 fl6 = {
630 		.flowi6_iif	= skb->dev->ifindex,
631 		.flowi6_mark	= skb->mark,
632 	};
633 	int reg_vif_num;
634 
635 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
636 		goto drop;
637 
638 	pim = (struct pimreghdr *)skb_transport_header(skb);
639 	if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
640 	    (pim->flags & PIM_NULL_REGISTER) ||
641 	    (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
642 			     sizeof(*pim), IPPROTO_PIM,
643 			     csum_partial((void *)pim, sizeof(*pim), 0)) &&
644 	     csum_fold(skb_checksum(skb, 0, skb->len, 0))))
645 		goto drop;
646 
647 	/* check if the inner packet is destined to mcast group */
648 	encap = (struct ipv6hdr *)(skb_transport_header(skb) +
649 				   sizeof(*pim));
650 
651 	if (!ipv6_addr_is_multicast(&encap->daddr) ||
652 	    encap->payload_len == 0 ||
653 	    ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
654 		goto drop;
655 
656 	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
657 		goto drop;
658 	reg_vif_num = mrt->mroute_reg_vif_num;
659 
660 	read_lock(&mrt_lock);
661 	if (reg_vif_num >= 0)
662 		reg_dev = mrt->vif6_table[reg_vif_num].dev;
663 	if (reg_dev)
664 		dev_hold(reg_dev);
665 	read_unlock(&mrt_lock);
666 
667 	if (reg_dev == NULL)
668 		goto drop;
669 
670 	skb->mac_header = skb->network_header;
671 	skb_pull(skb, (u8 *)encap - skb->data);
672 	skb_reset_network_header(skb);
673 	skb->protocol = htons(ETH_P_IPV6);
674 	skb->ip_summed = CHECKSUM_NONE;
675 
676 	skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
677 
678 	netif_rx(skb);
679 
680 	dev_put(reg_dev);
681 	return 0;
682  drop:
683 	kfree_skb(skb);
684 	return 0;
685 }
686 
687 static const struct inet6_protocol pim6_protocol = {
688 	.handler	=	pim6_rcv,
689 };
690 
691 /* Service routines creating virtual interfaces: PIMREG */
692 
693 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
694 				      struct net_device *dev)
695 {
696 	struct net *net = dev_net(dev);
697 	struct mr6_table *mrt;
698 	struct flowi6 fl6 = {
699 		.flowi6_oif	= dev->ifindex,
700 		.flowi6_iif	= skb->skb_iif,
701 		.flowi6_mark	= skb->mark,
702 	};
703 	int err;
704 
705 	err = ip6mr_fib_lookup(net, &fl6, &mrt);
706 	if (err < 0) {
707 		kfree_skb(skb);
708 		return err;
709 	}
710 
711 	read_lock(&mrt_lock);
712 	dev->stats.tx_bytes += skb->len;
713 	dev->stats.tx_packets++;
714 	ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
715 	read_unlock(&mrt_lock);
716 	kfree_skb(skb);
717 	return NETDEV_TX_OK;
718 }
719 
720 static const struct net_device_ops reg_vif_netdev_ops = {
721 	.ndo_start_xmit	= reg_vif_xmit,
722 };
723 
724 static void reg_vif_setup(struct net_device *dev)
725 {
726 	dev->type		= ARPHRD_PIMREG;
727 	dev->mtu		= 1500 - sizeof(struct ipv6hdr) - 8;
728 	dev->flags		= IFF_NOARP;
729 	dev->netdev_ops		= &reg_vif_netdev_ops;
730 	dev->destructor		= free_netdev;
731 	dev->features		|= NETIF_F_NETNS_LOCAL;
732 }
733 
734 static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt)
735 {
736 	struct net_device *dev;
737 	char name[IFNAMSIZ];
738 
739 	if (mrt->id == RT6_TABLE_DFLT)
740 		sprintf(name, "pim6reg");
741 	else
742 		sprintf(name, "pim6reg%u", mrt->id);
743 
744 	dev = alloc_netdev(0, name, reg_vif_setup);
745 	if (dev == NULL)
746 		return NULL;
747 
748 	dev_net_set(dev, net);
749 
750 	if (register_netdevice(dev)) {
751 		free_netdev(dev);
752 		return NULL;
753 	}
754 	dev->iflink = 0;
755 
756 	if (dev_open(dev))
757 		goto failure;
758 
759 	dev_hold(dev);
760 	return dev;
761 
762 failure:
763 	/* allow the register to be completed before unregistering. */
764 	rtnl_unlock();
765 	rtnl_lock();
766 
767 	unregister_netdevice(dev);
768 	return NULL;
769 }
770 #endif
771 
772 /*
773  *	Delete a VIF entry
774  */
775 
776 static int mif6_delete(struct mr6_table *mrt, int vifi, struct list_head *head)
777 {
778 	struct mif_device *v;
779 	struct net_device *dev;
780 	struct inet6_dev *in6_dev;
781 
782 	if (vifi < 0 || vifi >= mrt->maxvif)
783 		return -EADDRNOTAVAIL;
784 
785 	v = &mrt->vif6_table[vifi];
786 
787 	write_lock_bh(&mrt_lock);
788 	dev = v->dev;
789 	v->dev = NULL;
790 
791 	if (!dev) {
792 		write_unlock_bh(&mrt_lock);
793 		return -EADDRNOTAVAIL;
794 	}
795 
796 #ifdef CONFIG_IPV6_PIMSM_V2
797 	if (vifi == mrt->mroute_reg_vif_num)
798 		mrt->mroute_reg_vif_num = -1;
799 #endif
800 
801 	if (vifi + 1 == mrt->maxvif) {
802 		int tmp;
803 		for (tmp = vifi - 1; tmp >= 0; tmp--) {
804 			if (MIF_EXISTS(mrt, tmp))
805 				break;
806 		}
807 		mrt->maxvif = tmp + 1;
808 	}
809 
810 	write_unlock_bh(&mrt_lock);
811 
812 	dev_set_allmulti(dev, -1);
813 
814 	in6_dev = __in6_dev_get(dev);
815 	if (in6_dev) {
816 		in6_dev->cnf.mc_forwarding--;
817 		inet6_netconf_notify_devconf(dev_net(dev),
818 					     NETCONFA_MC_FORWARDING,
819 					     dev->ifindex, &in6_dev->cnf);
820 	}
821 
822 	if (v->flags & MIFF_REGISTER)
823 		unregister_netdevice_queue(dev, head);
824 
825 	dev_put(dev);
826 	return 0;
827 }
828 
829 static inline void ip6mr_cache_free(struct mfc6_cache *c)
830 {
831 	kmem_cache_free(mrt_cachep, c);
832 }
833 
834 /* Destroy an unresolved cache entry, killing queued skbs
835    and reporting error to netlink readers.
836  */
837 
838 static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
839 {
840 	struct net *net = read_pnet(&mrt->net);
841 	struct sk_buff *skb;
842 
843 	atomic_dec(&mrt->cache_resolve_queue_len);
844 
845 	while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
846 		if (ipv6_hdr(skb)->version == 0) {
847 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
848 			nlh->nlmsg_type = NLMSG_ERROR;
849 			nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
850 			skb_trim(skb, nlh->nlmsg_len);
851 			((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT;
852 			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
853 		} else
854 			kfree_skb(skb);
855 	}
856 
857 	ip6mr_cache_free(c);
858 }
859 
860 
861 /* Timer process for all the unresolved queue. */
862 
863 static void ipmr_do_expire_process(struct mr6_table *mrt)
864 {
865 	unsigned long now = jiffies;
866 	unsigned long expires = 10 * HZ;
867 	struct mfc6_cache *c, *next;
868 
869 	list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
870 		if (time_after(c->mfc_un.unres.expires, now)) {
871 			/* not yet... */
872 			unsigned long interval = c->mfc_un.unres.expires - now;
873 			if (interval < expires)
874 				expires = interval;
875 			continue;
876 		}
877 
878 		list_del(&c->list);
879 		mr6_netlink_event(mrt, c, RTM_DELROUTE);
880 		ip6mr_destroy_unres(mrt, c);
881 	}
882 
883 	if (!list_empty(&mrt->mfc6_unres_queue))
884 		mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
885 }
886 
887 static void ipmr_expire_process(unsigned long arg)
888 {
889 	struct mr6_table *mrt = (struct mr6_table *)arg;
890 
891 	if (!spin_trylock(&mfc_unres_lock)) {
892 		mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
893 		return;
894 	}
895 
896 	if (!list_empty(&mrt->mfc6_unres_queue))
897 		ipmr_do_expire_process(mrt);
898 
899 	spin_unlock(&mfc_unres_lock);
900 }
901 
902 /* Fill oifs list. It is called under write locked mrt_lock. */
903 
904 static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *cache,
905 				    unsigned char *ttls)
906 {
907 	int vifi;
908 
909 	cache->mfc_un.res.minvif = MAXMIFS;
910 	cache->mfc_un.res.maxvif = 0;
911 	memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
912 
913 	for (vifi = 0; vifi < mrt->maxvif; vifi++) {
914 		if (MIF_EXISTS(mrt, vifi) &&
915 		    ttls[vifi] && ttls[vifi] < 255) {
916 			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
917 			if (cache->mfc_un.res.minvif > vifi)
918 				cache->mfc_un.res.minvif = vifi;
919 			if (cache->mfc_un.res.maxvif <= vifi)
920 				cache->mfc_un.res.maxvif = vifi + 1;
921 		}
922 	}
923 }
924 
925 static int mif6_add(struct net *net, struct mr6_table *mrt,
926 		    struct mif6ctl *vifc, int mrtsock)
927 {
928 	int vifi = vifc->mif6c_mifi;
929 	struct mif_device *v = &mrt->vif6_table[vifi];
930 	struct net_device *dev;
931 	struct inet6_dev *in6_dev;
932 	int err;
933 
934 	/* Is vif busy ? */
935 	if (MIF_EXISTS(mrt, vifi))
936 		return -EADDRINUSE;
937 
938 	switch (vifc->mif6c_flags) {
939 #ifdef CONFIG_IPV6_PIMSM_V2
940 	case MIFF_REGISTER:
941 		/*
942 		 * Special Purpose VIF in PIM
943 		 * All the packets will be sent to the daemon
944 		 */
945 		if (mrt->mroute_reg_vif_num >= 0)
946 			return -EADDRINUSE;
947 		dev = ip6mr_reg_vif(net, mrt);
948 		if (!dev)
949 			return -ENOBUFS;
950 		err = dev_set_allmulti(dev, 1);
951 		if (err) {
952 			unregister_netdevice(dev);
953 			dev_put(dev);
954 			return err;
955 		}
956 		break;
957 #endif
958 	case 0:
959 		dev = dev_get_by_index(net, vifc->mif6c_pifi);
960 		if (!dev)
961 			return -EADDRNOTAVAIL;
962 		err = dev_set_allmulti(dev, 1);
963 		if (err) {
964 			dev_put(dev);
965 			return err;
966 		}
967 		break;
968 	default:
969 		return -EINVAL;
970 	}
971 
972 	in6_dev = __in6_dev_get(dev);
973 	if (in6_dev) {
974 		in6_dev->cnf.mc_forwarding++;
975 		inet6_netconf_notify_devconf(dev_net(dev),
976 					     NETCONFA_MC_FORWARDING,
977 					     dev->ifindex, &in6_dev->cnf);
978 	}
979 
980 	/*
981 	 *	Fill in the VIF structures
982 	 */
983 	v->rate_limit = vifc->vifc_rate_limit;
984 	v->flags = vifc->mif6c_flags;
985 	if (!mrtsock)
986 		v->flags |= VIFF_STATIC;
987 	v->threshold = vifc->vifc_threshold;
988 	v->bytes_in = 0;
989 	v->bytes_out = 0;
990 	v->pkt_in = 0;
991 	v->pkt_out = 0;
992 	v->link = dev->ifindex;
993 	if (v->flags & MIFF_REGISTER)
994 		v->link = dev->iflink;
995 
996 	/* And finish update writing critical data */
997 	write_lock_bh(&mrt_lock);
998 	v->dev = dev;
999 #ifdef CONFIG_IPV6_PIMSM_V2
1000 	if (v->flags & MIFF_REGISTER)
1001 		mrt->mroute_reg_vif_num = vifi;
1002 #endif
1003 	if (vifi + 1 > mrt->maxvif)
1004 		mrt->maxvif = vifi + 1;
1005 	write_unlock_bh(&mrt_lock);
1006 	return 0;
1007 }
1008 
1009 static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt,
1010 					   const struct in6_addr *origin,
1011 					   const struct in6_addr *mcastgrp)
1012 {
1013 	int line = MFC6_HASH(mcastgrp, origin);
1014 	struct mfc6_cache *c;
1015 
1016 	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
1017 		if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
1018 		    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
1019 			return c;
1020 	}
1021 	return NULL;
1022 }
1023 
1024 /* Look for a (*,*,oif) entry */
1025 static struct mfc6_cache *ip6mr_cache_find_any_parent(struct mr6_table *mrt,
1026 						      mifi_t mifi)
1027 {
1028 	int line = MFC6_HASH(&in6addr_any, &in6addr_any);
1029 	struct mfc6_cache *c;
1030 
1031 	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
1032 		if (ipv6_addr_any(&c->mf6c_origin) &&
1033 		    ipv6_addr_any(&c->mf6c_mcastgrp) &&
1034 		    (c->mfc_un.res.ttls[mifi] < 255))
1035 			return c;
1036 
1037 	return NULL;
1038 }
1039 
1040 /* Look for a (*,G) entry */
1041 static struct mfc6_cache *ip6mr_cache_find_any(struct mr6_table *mrt,
1042 					       struct in6_addr *mcastgrp,
1043 					       mifi_t mifi)
1044 {
1045 	int line = MFC6_HASH(mcastgrp, &in6addr_any);
1046 	struct mfc6_cache *c, *proxy;
1047 
1048 	if (ipv6_addr_any(mcastgrp))
1049 		goto skip;
1050 
1051 	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
1052 		if (ipv6_addr_any(&c->mf6c_origin) &&
1053 		    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp)) {
1054 			if (c->mfc_un.res.ttls[mifi] < 255)
1055 				return c;
1056 
1057 			/* It's ok if the mifi is part of the static tree */
1058 			proxy = ip6mr_cache_find_any_parent(mrt,
1059 							    c->mf6c_parent);
1060 			if (proxy && proxy->mfc_un.res.ttls[mifi] < 255)
1061 				return c;
1062 		}
1063 
1064 skip:
1065 	return ip6mr_cache_find_any_parent(mrt, mifi);
1066 }
1067 
1068 /*
1069  *	Allocate a multicast cache entry
1070  */
1071 static struct mfc6_cache *ip6mr_cache_alloc(void)
1072 {
1073 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
1074 	if (c == NULL)
1075 		return NULL;
1076 	c->mfc_un.res.minvif = MAXMIFS;
1077 	return c;
1078 }
1079 
1080 static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
1081 {
1082 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
1083 	if (c == NULL)
1084 		return NULL;
1085 	skb_queue_head_init(&c->mfc_un.unres.unresolved);
1086 	c->mfc_un.unres.expires = jiffies + 10 * HZ;
1087 	return c;
1088 }
1089 
1090 /*
1091  *	A cache entry has gone into a resolved state from queued
1092  */
1093 
1094 static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
1095 				struct mfc6_cache *uc, struct mfc6_cache *c)
1096 {
1097 	struct sk_buff *skb;
1098 
1099 	/*
1100 	 *	Play the pending entries through our router
1101 	 */
1102 
1103 	while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
1104 		if (ipv6_hdr(skb)->version == 0) {
1105 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
1106 
1107 			if (__ip6mr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) {
1108 				nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1109 			} else {
1110 				nlh->nlmsg_type = NLMSG_ERROR;
1111 				nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
1112 				skb_trim(skb, nlh->nlmsg_len);
1113 				((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE;
1114 			}
1115 			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
1116 		} else
1117 			ip6_mr_forward(net, mrt, skb, c);
1118 	}
1119 }
1120 
1121 /*
1122  *	Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
1123  *	expects the following bizarre scheme.
1124  *
1125  *	Called under mrt_lock.
1126  */
1127 
1128 static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
1129 			      mifi_t mifi, int assert)
1130 {
1131 	struct sk_buff *skb;
1132 	struct mrt6msg *msg;
1133 	int ret;
1134 
1135 #ifdef CONFIG_IPV6_PIMSM_V2
1136 	if (assert == MRT6MSG_WHOLEPKT)
1137 		skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
1138 						+sizeof(*msg));
1139 	else
1140 #endif
1141 		skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
1142 
1143 	if (!skb)
1144 		return -ENOBUFS;
1145 
1146 	/* I suppose that internal messages
1147 	 * do not require checksums */
1148 
1149 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1150 
1151 #ifdef CONFIG_IPV6_PIMSM_V2
1152 	if (assert == MRT6MSG_WHOLEPKT) {
1153 		/* Ugly, but we have no choice with this interface.
1154 		   Duplicate old header, fix length etc.
1155 		   And all this only to mangle msg->im6_msgtype and
1156 		   to set msg->im6_mbz to "mbz" :-)
1157 		 */
1158 		skb_push(skb, -skb_network_offset(pkt));
1159 
1160 		skb_push(skb, sizeof(*msg));
1161 		skb_reset_transport_header(skb);
1162 		msg = (struct mrt6msg *)skb_transport_header(skb);
1163 		msg->im6_mbz = 0;
1164 		msg->im6_msgtype = MRT6MSG_WHOLEPKT;
1165 		msg->im6_mif = mrt->mroute_reg_vif_num;
1166 		msg->im6_pad = 0;
1167 		msg->im6_src = ipv6_hdr(pkt)->saddr;
1168 		msg->im6_dst = ipv6_hdr(pkt)->daddr;
1169 
1170 		skb->ip_summed = CHECKSUM_UNNECESSARY;
1171 	} else
1172 #endif
1173 	{
1174 	/*
1175 	 *	Copy the IP header
1176 	 */
1177 
1178 	skb_put(skb, sizeof(struct ipv6hdr));
1179 	skb_reset_network_header(skb);
1180 	skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
1181 
1182 	/*
1183 	 *	Add our header
1184 	 */
1185 	skb_put(skb, sizeof(*msg));
1186 	skb_reset_transport_header(skb);
1187 	msg = (struct mrt6msg *)skb_transport_header(skb);
1188 
1189 	msg->im6_mbz = 0;
1190 	msg->im6_msgtype = assert;
1191 	msg->im6_mif = mifi;
1192 	msg->im6_pad = 0;
1193 	msg->im6_src = ipv6_hdr(pkt)->saddr;
1194 	msg->im6_dst = ipv6_hdr(pkt)->daddr;
1195 
1196 	skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1197 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1198 	}
1199 
1200 	if (mrt->mroute6_sk == NULL) {
1201 		kfree_skb(skb);
1202 		return -EINVAL;
1203 	}
1204 
1205 	/*
1206 	 *	Deliver to user space multicast routing algorithms
1207 	 */
1208 	ret = sock_queue_rcv_skb(mrt->mroute6_sk, skb);
1209 	if (ret < 0) {
1210 		net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
1211 		kfree_skb(skb);
1212 	}
1213 
1214 	return ret;
1215 }
1216 
1217 /*
1218  *	Queue a packet for resolution. It gets locked cache entry!
1219  */
1220 
1221 static int
1222 ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
1223 {
1224 	bool found = false;
1225 	int err;
1226 	struct mfc6_cache *c;
1227 
1228 	spin_lock_bh(&mfc_unres_lock);
1229 	list_for_each_entry(c, &mrt->mfc6_unres_queue, list) {
1230 		if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1231 		    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
1232 			found = true;
1233 			break;
1234 		}
1235 	}
1236 
1237 	if (!found) {
1238 		/*
1239 		 *	Create a new entry if allowable
1240 		 */
1241 
1242 		if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
1243 		    (c = ip6mr_cache_alloc_unres()) == NULL) {
1244 			spin_unlock_bh(&mfc_unres_lock);
1245 
1246 			kfree_skb(skb);
1247 			return -ENOBUFS;
1248 		}
1249 
1250 		/*
1251 		 *	Fill in the new cache entry
1252 		 */
1253 		c->mf6c_parent = -1;
1254 		c->mf6c_origin = ipv6_hdr(skb)->saddr;
1255 		c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
1256 
1257 		/*
1258 		 *	Reflect first query at pim6sd
1259 		 */
1260 		err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1261 		if (err < 0) {
1262 			/* If the report failed throw the cache entry
1263 			   out - Brad Parker
1264 			 */
1265 			spin_unlock_bh(&mfc_unres_lock);
1266 
1267 			ip6mr_cache_free(c);
1268 			kfree_skb(skb);
1269 			return err;
1270 		}
1271 
1272 		atomic_inc(&mrt->cache_resolve_queue_len);
1273 		list_add(&c->list, &mrt->mfc6_unres_queue);
1274 		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1275 
1276 		ipmr_do_expire_process(mrt);
1277 	}
1278 
1279 	/*
1280 	 *	See if we can append the packet
1281 	 */
1282 	if (c->mfc_un.unres.unresolved.qlen > 3) {
1283 		kfree_skb(skb);
1284 		err = -ENOBUFS;
1285 	} else {
1286 		skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1287 		err = 0;
1288 	}
1289 
1290 	spin_unlock_bh(&mfc_unres_lock);
1291 	return err;
1292 }
1293 
1294 /*
1295  *	MFC6 cache manipulation by user space
1296  */
1297 
1298 static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc,
1299 			    int parent)
1300 {
1301 	int line;
1302 	struct mfc6_cache *c, *next;
1303 
1304 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1305 
1306 	list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) {
1307 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1308 		    ipv6_addr_equal(&c->mf6c_mcastgrp,
1309 				    &mfc->mf6cc_mcastgrp.sin6_addr) &&
1310 		    (parent == -1 || parent == c->mf6c_parent)) {
1311 			write_lock_bh(&mrt_lock);
1312 			list_del(&c->list);
1313 			write_unlock_bh(&mrt_lock);
1314 
1315 			mr6_netlink_event(mrt, c, RTM_DELROUTE);
1316 			ip6mr_cache_free(c);
1317 			return 0;
1318 		}
1319 	}
1320 	return -ENOENT;
1321 }
1322 
1323 static int ip6mr_device_event(struct notifier_block *this,
1324 			      unsigned long event, void *ptr)
1325 {
1326 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1327 	struct net *net = dev_net(dev);
1328 	struct mr6_table *mrt;
1329 	struct mif_device *v;
1330 	int ct;
1331 	LIST_HEAD(list);
1332 
1333 	if (event != NETDEV_UNREGISTER)
1334 		return NOTIFY_DONE;
1335 
1336 	ip6mr_for_each_table(mrt, net) {
1337 		v = &mrt->vif6_table[0];
1338 		for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1339 			if (v->dev == dev)
1340 				mif6_delete(mrt, ct, &list);
1341 		}
1342 	}
1343 	unregister_netdevice_many(&list);
1344 
1345 	return NOTIFY_DONE;
1346 }
1347 
1348 static struct notifier_block ip6_mr_notifier = {
1349 	.notifier_call = ip6mr_device_event
1350 };
1351 
1352 /*
1353  *	Setup for IP multicast routing
1354  */
1355 
1356 static int __net_init ip6mr_net_init(struct net *net)
1357 {
1358 	int err;
1359 
1360 	err = ip6mr_rules_init(net);
1361 	if (err < 0)
1362 		goto fail;
1363 
1364 #ifdef CONFIG_PROC_FS
1365 	err = -ENOMEM;
1366 	if (!proc_create("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_fops))
1367 		goto proc_vif_fail;
1368 	if (!proc_create("ip6_mr_cache", 0, net->proc_net, &ip6mr_mfc_fops))
1369 		goto proc_cache_fail;
1370 #endif
1371 
1372 	return 0;
1373 
1374 #ifdef CONFIG_PROC_FS
1375 proc_cache_fail:
1376 	remove_proc_entry("ip6_mr_vif", net->proc_net);
1377 proc_vif_fail:
1378 	ip6mr_rules_exit(net);
1379 #endif
1380 fail:
1381 	return err;
1382 }
1383 
1384 static void __net_exit ip6mr_net_exit(struct net *net)
1385 {
1386 #ifdef CONFIG_PROC_FS
1387 	remove_proc_entry("ip6_mr_cache", net->proc_net);
1388 	remove_proc_entry("ip6_mr_vif", net->proc_net);
1389 #endif
1390 	ip6mr_rules_exit(net);
1391 }
1392 
1393 static struct pernet_operations ip6mr_net_ops = {
1394 	.init = ip6mr_net_init,
1395 	.exit = ip6mr_net_exit,
1396 };
1397 
1398 int __init ip6_mr_init(void)
1399 {
1400 	int err;
1401 
1402 	mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1403 				       sizeof(struct mfc6_cache),
1404 				       0, SLAB_HWCACHE_ALIGN,
1405 				       NULL);
1406 	if (!mrt_cachep)
1407 		return -ENOMEM;
1408 
1409 	err = register_pernet_subsys(&ip6mr_net_ops);
1410 	if (err)
1411 		goto reg_pernet_fail;
1412 
1413 	err = register_netdevice_notifier(&ip6_mr_notifier);
1414 	if (err)
1415 		goto reg_notif_fail;
1416 #ifdef CONFIG_IPV6_PIMSM_V2
1417 	if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1418 		pr_err("%s: can't add PIM protocol\n", __func__);
1419 		err = -EAGAIN;
1420 		goto add_proto_fail;
1421 	}
1422 #endif
1423 	rtnl_register(RTNL_FAMILY_IP6MR, RTM_GETROUTE, NULL,
1424 		      ip6mr_rtm_dumproute, NULL);
1425 	return 0;
1426 #ifdef CONFIG_IPV6_PIMSM_V2
1427 add_proto_fail:
1428 	unregister_netdevice_notifier(&ip6_mr_notifier);
1429 #endif
1430 reg_notif_fail:
1431 	unregister_pernet_subsys(&ip6mr_net_ops);
1432 reg_pernet_fail:
1433 	kmem_cache_destroy(mrt_cachep);
1434 	return err;
1435 }
1436 
1437 void ip6_mr_cleanup(void)
1438 {
1439 	unregister_netdevice_notifier(&ip6_mr_notifier);
1440 	unregister_pernet_subsys(&ip6mr_net_ops);
1441 	kmem_cache_destroy(mrt_cachep);
1442 }
1443 
1444 static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
1445 			 struct mf6cctl *mfc, int mrtsock, int parent)
1446 {
1447 	bool found = false;
1448 	int line;
1449 	struct mfc6_cache *uc, *c;
1450 	unsigned char ttls[MAXMIFS];
1451 	int i;
1452 
1453 	if (mfc->mf6cc_parent >= MAXMIFS)
1454 		return -ENFILE;
1455 
1456 	memset(ttls, 255, MAXMIFS);
1457 	for (i = 0; i < MAXMIFS; i++) {
1458 		if (IF_ISSET(i, &mfc->mf6cc_ifset))
1459 			ttls[i] = 1;
1460 
1461 	}
1462 
1463 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1464 
1465 	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
1466 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1467 		    ipv6_addr_equal(&c->mf6c_mcastgrp,
1468 				    &mfc->mf6cc_mcastgrp.sin6_addr) &&
1469 		    (parent == -1 || parent == mfc->mf6cc_parent)) {
1470 			found = true;
1471 			break;
1472 		}
1473 	}
1474 
1475 	if (found) {
1476 		write_lock_bh(&mrt_lock);
1477 		c->mf6c_parent = mfc->mf6cc_parent;
1478 		ip6mr_update_thresholds(mrt, c, ttls);
1479 		if (!mrtsock)
1480 			c->mfc_flags |= MFC_STATIC;
1481 		write_unlock_bh(&mrt_lock);
1482 		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1483 		return 0;
1484 	}
1485 
1486 	if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) &&
1487 	    !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1488 		return -EINVAL;
1489 
1490 	c = ip6mr_cache_alloc();
1491 	if (c == NULL)
1492 		return -ENOMEM;
1493 
1494 	c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1495 	c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1496 	c->mf6c_parent = mfc->mf6cc_parent;
1497 	ip6mr_update_thresholds(mrt, c, ttls);
1498 	if (!mrtsock)
1499 		c->mfc_flags |= MFC_STATIC;
1500 
1501 	write_lock_bh(&mrt_lock);
1502 	list_add(&c->list, &mrt->mfc6_cache_array[line]);
1503 	write_unlock_bh(&mrt_lock);
1504 
1505 	/*
1506 	 *	Check to see if we resolved a queued list. If so we
1507 	 *	need to send on the frames and tidy up.
1508 	 */
1509 	found = false;
1510 	spin_lock_bh(&mfc_unres_lock);
1511 	list_for_each_entry(uc, &mrt->mfc6_unres_queue, list) {
1512 		if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1513 		    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1514 			list_del(&uc->list);
1515 			atomic_dec(&mrt->cache_resolve_queue_len);
1516 			found = true;
1517 			break;
1518 		}
1519 	}
1520 	if (list_empty(&mrt->mfc6_unres_queue))
1521 		del_timer(&mrt->ipmr_expire_timer);
1522 	spin_unlock_bh(&mfc_unres_lock);
1523 
1524 	if (found) {
1525 		ip6mr_cache_resolve(net, mrt, uc, c);
1526 		ip6mr_cache_free(uc);
1527 	}
1528 	mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1529 	return 0;
1530 }
1531 
1532 /*
1533  *	Close the multicast socket, and clear the vif tables etc
1534  */
1535 
1536 static void mroute_clean_tables(struct mr6_table *mrt)
1537 {
1538 	int i;
1539 	LIST_HEAD(list);
1540 	struct mfc6_cache *c, *next;
1541 
1542 	/*
1543 	 *	Shut down all active vif entries
1544 	 */
1545 	for (i = 0; i < mrt->maxvif; i++) {
1546 		if (!(mrt->vif6_table[i].flags & VIFF_STATIC))
1547 			mif6_delete(mrt, i, &list);
1548 	}
1549 	unregister_netdevice_many(&list);
1550 
1551 	/*
1552 	 *	Wipe the cache
1553 	 */
1554 	for (i = 0; i < MFC6_LINES; i++) {
1555 		list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) {
1556 			if (c->mfc_flags & MFC_STATIC)
1557 				continue;
1558 			write_lock_bh(&mrt_lock);
1559 			list_del(&c->list);
1560 			write_unlock_bh(&mrt_lock);
1561 
1562 			mr6_netlink_event(mrt, c, RTM_DELROUTE);
1563 			ip6mr_cache_free(c);
1564 		}
1565 	}
1566 
1567 	if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1568 		spin_lock_bh(&mfc_unres_lock);
1569 		list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
1570 			list_del(&c->list);
1571 			mr6_netlink_event(mrt, c, RTM_DELROUTE);
1572 			ip6mr_destroy_unres(mrt, c);
1573 		}
1574 		spin_unlock_bh(&mfc_unres_lock);
1575 	}
1576 }
1577 
1578 static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
1579 {
1580 	int err = 0;
1581 	struct net *net = sock_net(sk);
1582 
1583 	rtnl_lock();
1584 	write_lock_bh(&mrt_lock);
1585 	if (likely(mrt->mroute6_sk == NULL)) {
1586 		mrt->mroute6_sk = sk;
1587 		net->ipv6.devconf_all->mc_forwarding++;
1588 		inet6_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING,
1589 					     NETCONFA_IFINDEX_ALL,
1590 					     net->ipv6.devconf_all);
1591 	}
1592 	else
1593 		err = -EADDRINUSE;
1594 	write_unlock_bh(&mrt_lock);
1595 
1596 	rtnl_unlock();
1597 
1598 	return err;
1599 }
1600 
1601 int ip6mr_sk_done(struct sock *sk)
1602 {
1603 	int err = -EACCES;
1604 	struct net *net = sock_net(sk);
1605 	struct mr6_table *mrt;
1606 
1607 	rtnl_lock();
1608 	ip6mr_for_each_table(mrt, net) {
1609 		if (sk == mrt->mroute6_sk) {
1610 			write_lock_bh(&mrt_lock);
1611 			mrt->mroute6_sk = NULL;
1612 			net->ipv6.devconf_all->mc_forwarding--;
1613 			inet6_netconf_notify_devconf(net,
1614 						     NETCONFA_MC_FORWARDING,
1615 						     NETCONFA_IFINDEX_ALL,
1616 						     net->ipv6.devconf_all);
1617 			write_unlock_bh(&mrt_lock);
1618 
1619 			mroute_clean_tables(mrt);
1620 			err = 0;
1621 			break;
1622 		}
1623 	}
1624 	rtnl_unlock();
1625 
1626 	return err;
1627 }
1628 
1629 struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
1630 {
1631 	struct mr6_table *mrt;
1632 	struct flowi6 fl6 = {
1633 		.flowi6_iif	= skb->skb_iif,
1634 		.flowi6_oif	= skb->dev->ifindex,
1635 		.flowi6_mark	= skb->mark,
1636 	};
1637 
1638 	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
1639 		return NULL;
1640 
1641 	return mrt->mroute6_sk;
1642 }
1643 
1644 /*
1645  *	Socket options and virtual interface manipulation. The whole
1646  *	virtual interface system is a complete heap, but unfortunately
1647  *	that's how BSD mrouted happens to think. Maybe one day with a proper
1648  *	MOSPF/PIM router set up we can clean this up.
1649  */
1650 
1651 int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1652 {
1653 	int ret, parent = 0;
1654 	struct mif6ctl vif;
1655 	struct mf6cctl mfc;
1656 	mifi_t mifi;
1657 	struct net *net = sock_net(sk);
1658 	struct mr6_table *mrt;
1659 
1660 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1661 	if (mrt == NULL)
1662 		return -ENOENT;
1663 
1664 	if (optname != MRT6_INIT) {
1665 		if (sk != mrt->mroute6_sk && !ns_capable(net->user_ns, CAP_NET_ADMIN))
1666 			return -EACCES;
1667 	}
1668 
1669 	switch (optname) {
1670 	case MRT6_INIT:
1671 		if (sk->sk_type != SOCK_RAW ||
1672 		    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1673 			return -EOPNOTSUPP;
1674 		if (optlen < sizeof(int))
1675 			return -EINVAL;
1676 
1677 		return ip6mr_sk_init(mrt, sk);
1678 
1679 	case MRT6_DONE:
1680 		return ip6mr_sk_done(sk);
1681 
1682 	case MRT6_ADD_MIF:
1683 		if (optlen < sizeof(vif))
1684 			return -EINVAL;
1685 		if (copy_from_user(&vif, optval, sizeof(vif)))
1686 			return -EFAULT;
1687 		if (vif.mif6c_mifi >= MAXMIFS)
1688 			return -ENFILE;
1689 		rtnl_lock();
1690 		ret = mif6_add(net, mrt, &vif, sk == mrt->mroute6_sk);
1691 		rtnl_unlock();
1692 		return ret;
1693 
1694 	case MRT6_DEL_MIF:
1695 		if (optlen < sizeof(mifi_t))
1696 			return -EINVAL;
1697 		if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1698 			return -EFAULT;
1699 		rtnl_lock();
1700 		ret = mif6_delete(mrt, mifi, NULL);
1701 		rtnl_unlock();
1702 		return ret;
1703 
1704 	/*
1705 	 *	Manipulate the forwarding caches. These live
1706 	 *	in a sort of kernel/user symbiosis.
1707 	 */
1708 	case MRT6_ADD_MFC:
1709 	case MRT6_DEL_MFC:
1710 		parent = -1;
1711 	case MRT6_ADD_MFC_PROXY:
1712 	case MRT6_DEL_MFC_PROXY:
1713 		if (optlen < sizeof(mfc))
1714 			return -EINVAL;
1715 		if (copy_from_user(&mfc, optval, sizeof(mfc)))
1716 			return -EFAULT;
1717 		if (parent == 0)
1718 			parent = mfc.mf6cc_parent;
1719 		rtnl_lock();
1720 		if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY)
1721 			ret = ip6mr_mfc_delete(mrt, &mfc, parent);
1722 		else
1723 			ret = ip6mr_mfc_add(net, mrt, &mfc,
1724 					    sk == mrt->mroute6_sk, parent);
1725 		rtnl_unlock();
1726 		return ret;
1727 
1728 	/*
1729 	 *	Control PIM assert (to activate pim will activate assert)
1730 	 */
1731 	case MRT6_ASSERT:
1732 	{
1733 		int v;
1734 
1735 		if (optlen != sizeof(v))
1736 			return -EINVAL;
1737 		if (get_user(v, (int __user *)optval))
1738 			return -EFAULT;
1739 		mrt->mroute_do_assert = v;
1740 		return 0;
1741 	}
1742 
1743 #ifdef CONFIG_IPV6_PIMSM_V2
1744 	case MRT6_PIM:
1745 	{
1746 		int v;
1747 
1748 		if (optlen != sizeof(v))
1749 			return -EINVAL;
1750 		if (get_user(v, (int __user *)optval))
1751 			return -EFAULT;
1752 		v = !!v;
1753 		rtnl_lock();
1754 		ret = 0;
1755 		if (v != mrt->mroute_do_pim) {
1756 			mrt->mroute_do_pim = v;
1757 			mrt->mroute_do_assert = v;
1758 		}
1759 		rtnl_unlock();
1760 		return ret;
1761 	}
1762 
1763 #endif
1764 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1765 	case MRT6_TABLE:
1766 	{
1767 		u32 v;
1768 
1769 		if (optlen != sizeof(u32))
1770 			return -EINVAL;
1771 		if (get_user(v, (u32 __user *)optval))
1772 			return -EFAULT;
1773 		/* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
1774 		if (v != RT_TABLE_DEFAULT && v >= 100000000)
1775 			return -EINVAL;
1776 		if (sk == mrt->mroute6_sk)
1777 			return -EBUSY;
1778 
1779 		rtnl_lock();
1780 		ret = 0;
1781 		if (!ip6mr_new_table(net, v))
1782 			ret = -ENOMEM;
1783 		raw6_sk(sk)->ip6mr_table = v;
1784 		rtnl_unlock();
1785 		return ret;
1786 	}
1787 #endif
1788 	/*
1789 	 *	Spurious command, or MRT6_VERSION which you cannot
1790 	 *	set.
1791 	 */
1792 	default:
1793 		return -ENOPROTOOPT;
1794 	}
1795 }
1796 
1797 /*
1798  *	Getsock opt support for the multicast routing system.
1799  */
1800 
1801 int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1802 			  int __user *optlen)
1803 {
1804 	int olr;
1805 	int val;
1806 	struct net *net = sock_net(sk);
1807 	struct mr6_table *mrt;
1808 
1809 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1810 	if (mrt == NULL)
1811 		return -ENOENT;
1812 
1813 	switch (optname) {
1814 	case MRT6_VERSION:
1815 		val = 0x0305;
1816 		break;
1817 #ifdef CONFIG_IPV6_PIMSM_V2
1818 	case MRT6_PIM:
1819 		val = mrt->mroute_do_pim;
1820 		break;
1821 #endif
1822 	case MRT6_ASSERT:
1823 		val = mrt->mroute_do_assert;
1824 		break;
1825 	default:
1826 		return -ENOPROTOOPT;
1827 	}
1828 
1829 	if (get_user(olr, optlen))
1830 		return -EFAULT;
1831 
1832 	olr = min_t(int, olr, sizeof(int));
1833 	if (olr < 0)
1834 		return -EINVAL;
1835 
1836 	if (put_user(olr, optlen))
1837 		return -EFAULT;
1838 	if (copy_to_user(optval, &val, olr))
1839 		return -EFAULT;
1840 	return 0;
1841 }
1842 
1843 /*
1844  *	The IP multicast ioctl support routines.
1845  */
1846 
1847 int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1848 {
1849 	struct sioc_sg_req6 sr;
1850 	struct sioc_mif_req6 vr;
1851 	struct mif_device *vif;
1852 	struct mfc6_cache *c;
1853 	struct net *net = sock_net(sk);
1854 	struct mr6_table *mrt;
1855 
1856 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1857 	if (mrt == NULL)
1858 		return -ENOENT;
1859 
1860 	switch (cmd) {
1861 	case SIOCGETMIFCNT_IN6:
1862 		if (copy_from_user(&vr, arg, sizeof(vr)))
1863 			return -EFAULT;
1864 		if (vr.mifi >= mrt->maxvif)
1865 			return -EINVAL;
1866 		read_lock(&mrt_lock);
1867 		vif = &mrt->vif6_table[vr.mifi];
1868 		if (MIF_EXISTS(mrt, vr.mifi)) {
1869 			vr.icount = vif->pkt_in;
1870 			vr.ocount = vif->pkt_out;
1871 			vr.ibytes = vif->bytes_in;
1872 			vr.obytes = vif->bytes_out;
1873 			read_unlock(&mrt_lock);
1874 
1875 			if (copy_to_user(arg, &vr, sizeof(vr)))
1876 				return -EFAULT;
1877 			return 0;
1878 		}
1879 		read_unlock(&mrt_lock);
1880 		return -EADDRNOTAVAIL;
1881 	case SIOCGETSGCNT_IN6:
1882 		if (copy_from_user(&sr, arg, sizeof(sr)))
1883 			return -EFAULT;
1884 
1885 		read_lock(&mrt_lock);
1886 		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1887 		if (c) {
1888 			sr.pktcnt = c->mfc_un.res.pkt;
1889 			sr.bytecnt = c->mfc_un.res.bytes;
1890 			sr.wrong_if = c->mfc_un.res.wrong_if;
1891 			read_unlock(&mrt_lock);
1892 
1893 			if (copy_to_user(arg, &sr, sizeof(sr)))
1894 				return -EFAULT;
1895 			return 0;
1896 		}
1897 		read_unlock(&mrt_lock);
1898 		return -EADDRNOTAVAIL;
1899 	default:
1900 		return -ENOIOCTLCMD;
1901 	}
1902 }
1903 
1904 #ifdef CONFIG_COMPAT
1905 struct compat_sioc_sg_req6 {
1906 	struct sockaddr_in6 src;
1907 	struct sockaddr_in6 grp;
1908 	compat_ulong_t pktcnt;
1909 	compat_ulong_t bytecnt;
1910 	compat_ulong_t wrong_if;
1911 };
1912 
1913 struct compat_sioc_mif_req6 {
1914 	mifi_t	mifi;
1915 	compat_ulong_t icount;
1916 	compat_ulong_t ocount;
1917 	compat_ulong_t ibytes;
1918 	compat_ulong_t obytes;
1919 };
1920 
1921 int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1922 {
1923 	struct compat_sioc_sg_req6 sr;
1924 	struct compat_sioc_mif_req6 vr;
1925 	struct mif_device *vif;
1926 	struct mfc6_cache *c;
1927 	struct net *net = sock_net(sk);
1928 	struct mr6_table *mrt;
1929 
1930 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1931 	if (mrt == NULL)
1932 		return -ENOENT;
1933 
1934 	switch (cmd) {
1935 	case SIOCGETMIFCNT_IN6:
1936 		if (copy_from_user(&vr, arg, sizeof(vr)))
1937 			return -EFAULT;
1938 		if (vr.mifi >= mrt->maxvif)
1939 			return -EINVAL;
1940 		read_lock(&mrt_lock);
1941 		vif = &mrt->vif6_table[vr.mifi];
1942 		if (MIF_EXISTS(mrt, vr.mifi)) {
1943 			vr.icount = vif->pkt_in;
1944 			vr.ocount = vif->pkt_out;
1945 			vr.ibytes = vif->bytes_in;
1946 			vr.obytes = vif->bytes_out;
1947 			read_unlock(&mrt_lock);
1948 
1949 			if (copy_to_user(arg, &vr, sizeof(vr)))
1950 				return -EFAULT;
1951 			return 0;
1952 		}
1953 		read_unlock(&mrt_lock);
1954 		return -EADDRNOTAVAIL;
1955 	case SIOCGETSGCNT_IN6:
1956 		if (copy_from_user(&sr, arg, sizeof(sr)))
1957 			return -EFAULT;
1958 
1959 		read_lock(&mrt_lock);
1960 		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1961 		if (c) {
1962 			sr.pktcnt = c->mfc_un.res.pkt;
1963 			sr.bytecnt = c->mfc_un.res.bytes;
1964 			sr.wrong_if = c->mfc_un.res.wrong_if;
1965 			read_unlock(&mrt_lock);
1966 
1967 			if (copy_to_user(arg, &sr, sizeof(sr)))
1968 				return -EFAULT;
1969 			return 0;
1970 		}
1971 		read_unlock(&mrt_lock);
1972 		return -EADDRNOTAVAIL;
1973 	default:
1974 		return -ENOIOCTLCMD;
1975 	}
1976 }
1977 #endif
1978 
1979 static inline int ip6mr_forward2_finish(struct sk_buff *skb)
1980 {
1981 	IP6_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
1982 			 IPSTATS_MIB_OUTFORWDATAGRAMS);
1983 	IP6_ADD_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
1984 			 IPSTATS_MIB_OUTOCTETS, skb->len);
1985 	return dst_output(skb);
1986 }
1987 
1988 /*
1989  *	Processing handlers for ip6mr_forward
1990  */
1991 
1992 static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
1993 			  struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1994 {
1995 	struct ipv6hdr *ipv6h;
1996 	struct mif_device *vif = &mrt->vif6_table[vifi];
1997 	struct net_device *dev;
1998 	struct dst_entry *dst;
1999 	struct flowi6 fl6;
2000 
2001 	if (vif->dev == NULL)
2002 		goto out_free;
2003 
2004 #ifdef CONFIG_IPV6_PIMSM_V2
2005 	if (vif->flags & MIFF_REGISTER) {
2006 		vif->pkt_out++;
2007 		vif->bytes_out += skb->len;
2008 		vif->dev->stats.tx_bytes += skb->len;
2009 		vif->dev->stats.tx_packets++;
2010 		ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
2011 		goto out_free;
2012 	}
2013 #endif
2014 
2015 	ipv6h = ipv6_hdr(skb);
2016 
2017 	fl6 = (struct flowi6) {
2018 		.flowi6_oif = vif->link,
2019 		.daddr = ipv6h->daddr,
2020 	};
2021 
2022 	dst = ip6_route_output(net, NULL, &fl6);
2023 	if (dst->error) {
2024 		dst_release(dst);
2025 		goto out_free;
2026 	}
2027 
2028 	skb_dst_drop(skb);
2029 	skb_dst_set(skb, dst);
2030 
2031 	/*
2032 	 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
2033 	 * not only before forwarding, but after forwarding on all output
2034 	 * interfaces. It is clear, if mrouter runs a multicasting
2035 	 * program, it should receive packets not depending to what interface
2036 	 * program is joined.
2037 	 * If we will not make it, the program will have to join on all
2038 	 * interfaces. On the other hand, multihoming host (or router, but
2039 	 * not mrouter) cannot join to more than one interface - it will
2040 	 * result in receiving multiple packets.
2041 	 */
2042 	dev = vif->dev;
2043 	skb->dev = dev;
2044 	vif->pkt_out++;
2045 	vif->bytes_out += skb->len;
2046 
2047 	/* We are about to write */
2048 	/* XXX: extension headers? */
2049 	if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
2050 		goto out_free;
2051 
2052 	ipv6h = ipv6_hdr(skb);
2053 	ipv6h->hop_limit--;
2054 
2055 	IP6CB(skb)->flags |= IP6SKB_FORWARDED;
2056 
2057 	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dev,
2058 		       ip6mr_forward2_finish);
2059 
2060 out_free:
2061 	kfree_skb(skb);
2062 	return 0;
2063 }
2064 
2065 static int ip6mr_find_vif(struct mr6_table *mrt, struct net_device *dev)
2066 {
2067 	int ct;
2068 
2069 	for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
2070 		if (mrt->vif6_table[ct].dev == dev)
2071 			break;
2072 	}
2073 	return ct;
2074 }
2075 
2076 static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
2077 			   struct sk_buff *skb, struct mfc6_cache *cache)
2078 {
2079 	int psend = -1;
2080 	int vif, ct;
2081 	int true_vifi = ip6mr_find_vif(mrt, skb->dev);
2082 
2083 	vif = cache->mf6c_parent;
2084 	cache->mfc_un.res.pkt++;
2085 	cache->mfc_un.res.bytes += skb->len;
2086 
2087 	if (ipv6_addr_any(&cache->mf6c_origin) && true_vifi >= 0) {
2088 		struct mfc6_cache *cache_proxy;
2089 
2090 		/* For an (*,G) entry, we only check that the incomming
2091 		 * interface is part of the static tree.
2092 		 */
2093 		cache_proxy = ip6mr_cache_find_any_parent(mrt, vif);
2094 		if (cache_proxy &&
2095 		    cache_proxy->mfc_un.res.ttls[true_vifi] < 255)
2096 			goto forward;
2097 	}
2098 
2099 	/*
2100 	 * Wrong interface: drop packet and (maybe) send PIM assert.
2101 	 */
2102 	if (mrt->vif6_table[vif].dev != skb->dev) {
2103 		cache->mfc_un.res.wrong_if++;
2104 
2105 		if (true_vifi >= 0 && mrt->mroute_do_assert &&
2106 		    /* pimsm uses asserts, when switching from RPT to SPT,
2107 		       so that we cannot check that packet arrived on an oif.
2108 		       It is bad, but otherwise we would need to move pretty
2109 		       large chunk of pimd to kernel. Ough... --ANK
2110 		     */
2111 		    (mrt->mroute_do_pim ||
2112 		     cache->mfc_un.res.ttls[true_vifi] < 255) &&
2113 		    time_after(jiffies,
2114 			       cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
2115 			cache->mfc_un.res.last_assert = jiffies;
2116 			ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
2117 		}
2118 		goto dont_forward;
2119 	}
2120 
2121 forward:
2122 	mrt->vif6_table[vif].pkt_in++;
2123 	mrt->vif6_table[vif].bytes_in += skb->len;
2124 
2125 	/*
2126 	 *	Forward the frame
2127 	 */
2128 	if (ipv6_addr_any(&cache->mf6c_origin) &&
2129 	    ipv6_addr_any(&cache->mf6c_mcastgrp)) {
2130 		if (true_vifi >= 0 &&
2131 		    true_vifi != cache->mf6c_parent &&
2132 		    ipv6_hdr(skb)->hop_limit >
2133 				cache->mfc_un.res.ttls[cache->mf6c_parent]) {
2134 			/* It's an (*,*) entry and the packet is not coming from
2135 			 * the upstream: forward the packet to the upstream
2136 			 * only.
2137 			 */
2138 			psend = cache->mf6c_parent;
2139 			goto last_forward;
2140 		}
2141 		goto dont_forward;
2142 	}
2143 	for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
2144 		/* For (*,G) entry, don't forward to the incoming interface */
2145 		if ((!ipv6_addr_any(&cache->mf6c_origin) || ct != true_vifi) &&
2146 		    ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
2147 			if (psend != -1) {
2148 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2149 				if (skb2)
2150 					ip6mr_forward2(net, mrt, skb2, cache, psend);
2151 			}
2152 			psend = ct;
2153 		}
2154 	}
2155 last_forward:
2156 	if (psend != -1) {
2157 		ip6mr_forward2(net, mrt, skb, cache, psend);
2158 		return;
2159 	}
2160 
2161 dont_forward:
2162 	kfree_skb(skb);
2163 }
2164 
2165 
2166 /*
2167  *	Multicast packets for forwarding arrive here
2168  */
2169 
2170 int ip6_mr_input(struct sk_buff *skb)
2171 {
2172 	struct mfc6_cache *cache;
2173 	struct net *net = dev_net(skb->dev);
2174 	struct mr6_table *mrt;
2175 	struct flowi6 fl6 = {
2176 		.flowi6_iif	= skb->dev->ifindex,
2177 		.flowi6_mark	= skb->mark,
2178 	};
2179 	int err;
2180 
2181 	err = ip6mr_fib_lookup(net, &fl6, &mrt);
2182 	if (err < 0) {
2183 		kfree_skb(skb);
2184 		return err;
2185 	}
2186 
2187 	read_lock(&mrt_lock);
2188 	cache = ip6mr_cache_find(mrt,
2189 				 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2190 	if (cache == NULL) {
2191 		int vif = ip6mr_find_vif(mrt, skb->dev);
2192 
2193 		if (vif >= 0)
2194 			cache = ip6mr_cache_find_any(mrt,
2195 						     &ipv6_hdr(skb)->daddr,
2196 						     vif);
2197 	}
2198 
2199 	/*
2200 	 *	No usable cache entry
2201 	 */
2202 	if (cache == NULL) {
2203 		int vif;
2204 
2205 		vif = ip6mr_find_vif(mrt, skb->dev);
2206 		if (vif >= 0) {
2207 			int err = ip6mr_cache_unresolved(mrt, vif, skb);
2208 			read_unlock(&mrt_lock);
2209 
2210 			return err;
2211 		}
2212 		read_unlock(&mrt_lock);
2213 		kfree_skb(skb);
2214 		return -ENODEV;
2215 	}
2216 
2217 	ip6_mr_forward(net, mrt, skb, cache);
2218 
2219 	read_unlock(&mrt_lock);
2220 
2221 	return 0;
2222 }
2223 
2224 
2225 static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2226 			       struct mfc6_cache *c, struct rtmsg *rtm)
2227 {
2228 	int ct;
2229 	struct rtnexthop *nhp;
2230 	struct nlattr *mp_attr;
2231 	struct rta_mfc_stats mfcs;
2232 
2233 	/* If cache is unresolved, don't try to parse IIF and OIF */
2234 	if (c->mf6c_parent >= MAXMIFS)
2235 		return -ENOENT;
2236 
2237 	if (MIF_EXISTS(mrt, c->mf6c_parent) &&
2238 	    nla_put_u32(skb, RTA_IIF, mrt->vif6_table[c->mf6c_parent].dev->ifindex) < 0)
2239 		return -EMSGSIZE;
2240 	mp_attr = nla_nest_start(skb, RTA_MULTIPATH);
2241 	if (mp_attr == NULL)
2242 		return -EMSGSIZE;
2243 
2244 	for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
2245 		if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
2246 			nhp = nla_reserve_nohdr(skb, sizeof(*nhp));
2247 			if (nhp == NULL) {
2248 				nla_nest_cancel(skb, mp_attr);
2249 				return -EMSGSIZE;
2250 			}
2251 
2252 			nhp->rtnh_flags = 0;
2253 			nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
2254 			nhp->rtnh_ifindex = mrt->vif6_table[ct].dev->ifindex;
2255 			nhp->rtnh_len = sizeof(*nhp);
2256 		}
2257 	}
2258 
2259 	nla_nest_end(skb, mp_attr);
2260 
2261 	mfcs.mfcs_packets = c->mfc_un.res.pkt;
2262 	mfcs.mfcs_bytes = c->mfc_un.res.bytes;
2263 	mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
2264 	if (nla_put(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs) < 0)
2265 		return -EMSGSIZE;
2266 
2267 	rtm->rtm_type = RTN_MULTICAST;
2268 	return 1;
2269 }
2270 
2271 int ip6mr_get_route(struct net *net,
2272 		    struct sk_buff *skb, struct rtmsg *rtm, int nowait)
2273 {
2274 	int err;
2275 	struct mr6_table *mrt;
2276 	struct mfc6_cache *cache;
2277 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
2278 
2279 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
2280 	if (mrt == NULL)
2281 		return -ENOENT;
2282 
2283 	read_lock(&mrt_lock);
2284 	cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
2285 	if (!cache && skb->dev) {
2286 		int vif = ip6mr_find_vif(mrt, skb->dev);
2287 
2288 		if (vif >= 0)
2289 			cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr,
2290 						     vif);
2291 	}
2292 
2293 	if (!cache) {
2294 		struct sk_buff *skb2;
2295 		struct ipv6hdr *iph;
2296 		struct net_device *dev;
2297 		int vif;
2298 
2299 		if (nowait) {
2300 			read_unlock(&mrt_lock);
2301 			return -EAGAIN;
2302 		}
2303 
2304 		dev = skb->dev;
2305 		if (dev == NULL || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2306 			read_unlock(&mrt_lock);
2307 			return -ENODEV;
2308 		}
2309 
2310 		/* really correct? */
2311 		skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
2312 		if (!skb2) {
2313 			read_unlock(&mrt_lock);
2314 			return -ENOMEM;
2315 		}
2316 
2317 		skb_reset_transport_header(skb2);
2318 
2319 		skb_put(skb2, sizeof(struct ipv6hdr));
2320 		skb_reset_network_header(skb2);
2321 
2322 		iph = ipv6_hdr(skb2);
2323 		iph->version = 0;
2324 		iph->priority = 0;
2325 		iph->flow_lbl[0] = 0;
2326 		iph->flow_lbl[1] = 0;
2327 		iph->flow_lbl[2] = 0;
2328 		iph->payload_len = 0;
2329 		iph->nexthdr = IPPROTO_NONE;
2330 		iph->hop_limit = 0;
2331 		iph->saddr = rt->rt6i_src.addr;
2332 		iph->daddr = rt->rt6i_dst.addr;
2333 
2334 		err = ip6mr_cache_unresolved(mrt, vif, skb2);
2335 		read_unlock(&mrt_lock);
2336 
2337 		return err;
2338 	}
2339 
2340 	if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
2341 		cache->mfc_flags |= MFC_NOTIFY;
2342 
2343 	err = __ip6mr_fill_mroute(mrt, skb, cache, rtm);
2344 	read_unlock(&mrt_lock);
2345 	return err;
2346 }
2347 
2348 static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2349 			     u32 portid, u32 seq, struct mfc6_cache *c, int cmd)
2350 {
2351 	struct nlmsghdr *nlh;
2352 	struct rtmsg *rtm;
2353 	int err;
2354 
2355 	nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), NLM_F_MULTI);
2356 	if (nlh == NULL)
2357 		return -EMSGSIZE;
2358 
2359 	rtm = nlmsg_data(nlh);
2360 	rtm->rtm_family   = RTNL_FAMILY_IP6MR;
2361 	rtm->rtm_dst_len  = 128;
2362 	rtm->rtm_src_len  = 128;
2363 	rtm->rtm_tos      = 0;
2364 	rtm->rtm_table    = mrt->id;
2365 	if (nla_put_u32(skb, RTA_TABLE, mrt->id))
2366 		goto nla_put_failure;
2367 	rtm->rtm_type = RTN_MULTICAST;
2368 	rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
2369 	if (c->mfc_flags & MFC_STATIC)
2370 		rtm->rtm_protocol = RTPROT_STATIC;
2371 	else
2372 		rtm->rtm_protocol = RTPROT_MROUTED;
2373 	rtm->rtm_flags    = 0;
2374 
2375 	if (nla_put(skb, RTA_SRC, 16, &c->mf6c_origin) ||
2376 	    nla_put(skb, RTA_DST, 16, &c->mf6c_mcastgrp))
2377 		goto nla_put_failure;
2378 	err = __ip6mr_fill_mroute(mrt, skb, c, rtm);
2379 	/* do not break the dump if cache is unresolved */
2380 	if (err < 0 && err != -ENOENT)
2381 		goto nla_put_failure;
2382 
2383 	return nlmsg_end(skb, nlh);
2384 
2385 nla_put_failure:
2386 	nlmsg_cancel(skb, nlh);
2387 	return -EMSGSIZE;
2388 }
2389 
2390 static int mr6_msgsize(bool unresolved, int maxvif)
2391 {
2392 	size_t len =
2393 		NLMSG_ALIGN(sizeof(struct rtmsg))
2394 		+ nla_total_size(4)	/* RTA_TABLE */
2395 		+ nla_total_size(sizeof(struct in6_addr))	/* RTA_SRC */
2396 		+ nla_total_size(sizeof(struct in6_addr))	/* RTA_DST */
2397 		;
2398 
2399 	if (!unresolved)
2400 		len = len
2401 		      + nla_total_size(4)	/* RTA_IIF */
2402 		      + nla_total_size(0)	/* RTA_MULTIPATH */
2403 		      + maxvif * NLA_ALIGN(sizeof(struct rtnexthop))
2404 						/* RTA_MFC_STATS */
2405 		      + nla_total_size(sizeof(struct rta_mfc_stats))
2406 		;
2407 
2408 	return len;
2409 }
2410 
2411 static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
2412 			      int cmd)
2413 {
2414 	struct net *net = read_pnet(&mrt->net);
2415 	struct sk_buff *skb;
2416 	int err = -ENOBUFS;
2417 
2418 	skb = nlmsg_new(mr6_msgsize(mfc->mf6c_parent >= MAXMIFS, mrt->maxvif),
2419 			GFP_ATOMIC);
2420 	if (skb == NULL)
2421 		goto errout;
2422 
2423 	err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd);
2424 	if (err < 0)
2425 		goto errout;
2426 
2427 	rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC);
2428 	return;
2429 
2430 errout:
2431 	kfree_skb(skb);
2432 	if (err < 0)
2433 		rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
2434 }
2435 
2436 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2437 {
2438 	struct net *net = sock_net(skb->sk);
2439 	struct mr6_table *mrt;
2440 	struct mfc6_cache *mfc;
2441 	unsigned int t = 0, s_t;
2442 	unsigned int h = 0, s_h;
2443 	unsigned int e = 0, s_e;
2444 
2445 	s_t = cb->args[0];
2446 	s_h = cb->args[1];
2447 	s_e = cb->args[2];
2448 
2449 	read_lock(&mrt_lock);
2450 	ip6mr_for_each_table(mrt, net) {
2451 		if (t < s_t)
2452 			goto next_table;
2453 		if (t > s_t)
2454 			s_h = 0;
2455 		for (h = s_h; h < MFC6_LINES; h++) {
2456 			list_for_each_entry(mfc, &mrt->mfc6_cache_array[h], list) {
2457 				if (e < s_e)
2458 					goto next_entry;
2459 				if (ip6mr_fill_mroute(mrt, skb,
2460 						      NETLINK_CB(cb->skb).portid,
2461 						      cb->nlh->nlmsg_seq,
2462 						      mfc, RTM_NEWROUTE) < 0)
2463 					goto done;
2464 next_entry:
2465 				e++;
2466 			}
2467 			e = s_e = 0;
2468 		}
2469 		spin_lock_bh(&mfc_unres_lock);
2470 		list_for_each_entry(mfc, &mrt->mfc6_unres_queue, list) {
2471 			if (e < s_e)
2472 				goto next_entry2;
2473 			if (ip6mr_fill_mroute(mrt, skb,
2474 					      NETLINK_CB(cb->skb).portid,
2475 					      cb->nlh->nlmsg_seq,
2476 					      mfc, RTM_NEWROUTE) < 0) {
2477 				spin_unlock_bh(&mfc_unres_lock);
2478 				goto done;
2479 			}
2480 next_entry2:
2481 			e++;
2482 		}
2483 		spin_unlock_bh(&mfc_unres_lock);
2484 		e = s_e = 0;
2485 		s_h = 0;
2486 next_table:
2487 		t++;
2488 	}
2489 done:
2490 	read_unlock(&mrt_lock);
2491 
2492 	cb->args[2] = e;
2493 	cb->args[1] = h;
2494 	cb->args[0] = t;
2495 
2496 	return skb->len;
2497 }
2498