xref: /openbmc/linux/net/ipv6/ip6mr.c (revision 6a551c11)
1 /*
2  *	Linux IPv6 multicast routing support for BSD pim6sd
3  *	Based on net/ipv4/ipmr.c.
4  *
5  *	(c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
6  *		LSIIT Laboratory, Strasbourg, France
7  *	(c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
8  *		6WIND, Paris, France
9  *	Copyright (C)2007,2008 USAGI/WIDE Project
10  *		YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
11  *
12  *	This program is free software; you can redistribute it and/or
13  *	modify it under the terms of the GNU General Public License
14  *	as published by the Free Software Foundation; either version
15  *	2 of the License, or (at your option) any later version.
16  *
17  */
18 
19 #include <asm/uaccess.h>
20 #include <linux/types.h>
21 #include <linux/sched.h>
22 #include <linux/errno.h>
23 #include <linux/timer.h>
24 #include <linux/mm.h>
25 #include <linux/kernel.h>
26 #include <linux/fcntl.h>
27 #include <linux/stat.h>
28 #include <linux/socket.h>
29 #include <linux/inet.h>
30 #include <linux/netdevice.h>
31 #include <linux/inetdevice.h>
32 #include <linux/proc_fs.h>
33 #include <linux/seq_file.h>
34 #include <linux/init.h>
35 #include <linux/slab.h>
36 #include <linux/compat.h>
37 #include <net/protocol.h>
38 #include <linux/skbuff.h>
39 #include <net/sock.h>
40 #include <net/raw.h>
41 #include <linux/notifier.h>
42 #include <linux/if_arp.h>
43 #include <net/checksum.h>
44 #include <net/netlink.h>
45 #include <net/fib_rules.h>
46 
47 #include <net/ipv6.h>
48 #include <net/ip6_route.h>
49 #include <linux/mroute6.h>
50 #include <linux/pim.h>
51 #include <net/addrconf.h>
52 #include <linux/netfilter_ipv6.h>
53 #include <linux/export.h>
54 #include <net/ip6_checksum.h>
55 #include <linux/netconf.h>
56 
57 struct mr6_table {
58 	struct list_head	list;
59 	possible_net_t		net;
60 	u32			id;
61 	struct sock		*mroute6_sk;
62 	struct timer_list	ipmr_expire_timer;
63 	struct list_head	mfc6_unres_queue;
64 	struct list_head	mfc6_cache_array[MFC6_LINES];
65 	struct mif_device	vif6_table[MAXMIFS];
66 	int			maxvif;
67 	atomic_t		cache_resolve_queue_len;
68 	bool			mroute_do_assert;
69 	bool			mroute_do_pim;
70 #ifdef CONFIG_IPV6_PIMSM_V2
71 	int			mroute_reg_vif_num;
72 #endif
73 };
74 
75 struct ip6mr_rule {
76 	struct fib_rule		common;
77 };
78 
79 struct ip6mr_result {
80 	struct mr6_table	*mrt;
81 };
82 
83 /* Big lock, protecting vif table, mrt cache and mroute socket state.
84    Note that the changes are semaphored via rtnl_lock.
85  */
86 
87 static DEFINE_RWLOCK(mrt_lock);
88 
89 /*
90  *	Multicast router control variables
91  */
92 
93 #define MIF_EXISTS(_mrt, _idx) ((_mrt)->vif6_table[_idx].dev != NULL)
94 
95 /* Special spinlock for queue of unresolved entries */
96 static DEFINE_SPINLOCK(mfc_unres_lock);
97 
98 /* We return to original Alan's scheme. Hash table of resolved
99    entries is changed only in process context and protected
100    with weak lock mrt_lock. Queue of unresolved entries is protected
101    with strong spinlock mfc_unres_lock.
102 
103    In this case data path is free of exclusive locks at all.
104  */
105 
106 static struct kmem_cache *mrt_cachep __read_mostly;
107 
108 static struct mr6_table *ip6mr_new_table(struct net *net, u32 id);
109 static void ip6mr_free_table(struct mr6_table *mrt);
110 
111 static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
112 			   struct sk_buff *skb, struct mfc6_cache *cache);
113 static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
114 			      mifi_t mifi, int assert);
115 static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
116 			       struct mfc6_cache *c, struct rtmsg *rtm);
117 static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
118 			      int cmd);
119 static int ip6mr_rtm_dumproute(struct sk_buff *skb,
120 			       struct netlink_callback *cb);
121 static void mroute_clean_tables(struct mr6_table *mrt, bool all);
122 static void ipmr_expire_process(unsigned long arg);
123 
124 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
125 #define ip6mr_for_each_table(mrt, net) \
126 	list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
127 
128 static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
129 {
130 	struct mr6_table *mrt;
131 
132 	ip6mr_for_each_table(mrt, net) {
133 		if (mrt->id == id)
134 			return mrt;
135 	}
136 	return NULL;
137 }
138 
139 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
140 			    struct mr6_table **mrt)
141 {
142 	int err;
143 	struct ip6mr_result res;
144 	struct fib_lookup_arg arg = {
145 		.result = &res,
146 		.flags = FIB_LOOKUP_NOREF,
147 	};
148 
149 	err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
150 			       flowi6_to_flowi(flp6), 0, &arg);
151 	if (err < 0)
152 		return err;
153 	*mrt = res.mrt;
154 	return 0;
155 }
156 
157 static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
158 			     int flags, struct fib_lookup_arg *arg)
159 {
160 	struct ip6mr_result *res = arg->result;
161 	struct mr6_table *mrt;
162 
163 	switch (rule->action) {
164 	case FR_ACT_TO_TBL:
165 		break;
166 	case FR_ACT_UNREACHABLE:
167 		return -ENETUNREACH;
168 	case FR_ACT_PROHIBIT:
169 		return -EACCES;
170 	case FR_ACT_BLACKHOLE:
171 	default:
172 		return -EINVAL;
173 	}
174 
175 	mrt = ip6mr_get_table(rule->fr_net, rule->table);
176 	if (!mrt)
177 		return -EAGAIN;
178 	res->mrt = mrt;
179 	return 0;
180 }
181 
182 static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
183 {
184 	return 1;
185 }
186 
187 static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = {
188 	FRA_GENERIC_POLICY,
189 };
190 
191 static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
192 				struct fib_rule_hdr *frh, struct nlattr **tb)
193 {
194 	return 0;
195 }
196 
197 static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
198 			      struct nlattr **tb)
199 {
200 	return 1;
201 }
202 
203 static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
204 			   struct fib_rule_hdr *frh)
205 {
206 	frh->dst_len = 0;
207 	frh->src_len = 0;
208 	frh->tos     = 0;
209 	return 0;
210 }
211 
212 static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
213 	.family		= RTNL_FAMILY_IP6MR,
214 	.rule_size	= sizeof(struct ip6mr_rule),
215 	.addr_size	= sizeof(struct in6_addr),
216 	.action		= ip6mr_rule_action,
217 	.match		= ip6mr_rule_match,
218 	.configure	= ip6mr_rule_configure,
219 	.compare	= ip6mr_rule_compare,
220 	.fill		= ip6mr_rule_fill,
221 	.nlgroup	= RTNLGRP_IPV6_RULE,
222 	.policy		= ip6mr_rule_policy,
223 	.owner		= THIS_MODULE,
224 };
225 
226 static int __net_init ip6mr_rules_init(struct net *net)
227 {
228 	struct fib_rules_ops *ops;
229 	struct mr6_table *mrt;
230 	int err;
231 
232 	ops = fib_rules_register(&ip6mr_rules_ops_template, net);
233 	if (IS_ERR(ops))
234 		return PTR_ERR(ops);
235 
236 	INIT_LIST_HEAD(&net->ipv6.mr6_tables);
237 
238 	mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
239 	if (!mrt) {
240 		err = -ENOMEM;
241 		goto err1;
242 	}
243 
244 	err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
245 	if (err < 0)
246 		goto err2;
247 
248 	net->ipv6.mr6_rules_ops = ops;
249 	return 0;
250 
251 err2:
252 	ip6mr_free_table(mrt);
253 err1:
254 	fib_rules_unregister(ops);
255 	return err;
256 }
257 
258 static void __net_exit ip6mr_rules_exit(struct net *net)
259 {
260 	struct mr6_table *mrt, *next;
261 
262 	rtnl_lock();
263 	list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
264 		list_del(&mrt->list);
265 		ip6mr_free_table(mrt);
266 	}
267 	fib_rules_unregister(net->ipv6.mr6_rules_ops);
268 	rtnl_unlock();
269 }
270 #else
271 #define ip6mr_for_each_table(mrt, net) \
272 	for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
273 
274 static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
275 {
276 	return net->ipv6.mrt6;
277 }
278 
279 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
280 			    struct mr6_table **mrt)
281 {
282 	*mrt = net->ipv6.mrt6;
283 	return 0;
284 }
285 
286 static int __net_init ip6mr_rules_init(struct net *net)
287 {
288 	net->ipv6.mrt6 = ip6mr_new_table(net, RT6_TABLE_DFLT);
289 	return net->ipv6.mrt6 ? 0 : -ENOMEM;
290 }
291 
292 static void __net_exit ip6mr_rules_exit(struct net *net)
293 {
294 	rtnl_lock();
295 	ip6mr_free_table(net->ipv6.mrt6);
296 	net->ipv6.mrt6 = NULL;
297 	rtnl_unlock();
298 }
299 #endif
300 
301 static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
302 {
303 	struct mr6_table *mrt;
304 	unsigned int i;
305 
306 	mrt = ip6mr_get_table(net, id);
307 	if (mrt)
308 		return mrt;
309 
310 	mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
311 	if (!mrt)
312 		return NULL;
313 	mrt->id = id;
314 	write_pnet(&mrt->net, net);
315 
316 	/* Forwarding cache */
317 	for (i = 0; i < MFC6_LINES; i++)
318 		INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]);
319 
320 	INIT_LIST_HEAD(&mrt->mfc6_unres_queue);
321 
322 	setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
323 		    (unsigned long)mrt);
324 
325 #ifdef CONFIG_IPV6_PIMSM_V2
326 	mrt->mroute_reg_vif_num = -1;
327 #endif
328 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
329 	list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
330 #endif
331 	return mrt;
332 }
333 
334 static void ip6mr_free_table(struct mr6_table *mrt)
335 {
336 	del_timer_sync(&mrt->ipmr_expire_timer);
337 	mroute_clean_tables(mrt, true);
338 	kfree(mrt);
339 }
340 
341 #ifdef CONFIG_PROC_FS
342 
343 struct ipmr_mfc_iter {
344 	struct seq_net_private p;
345 	struct mr6_table *mrt;
346 	struct list_head *cache;
347 	int ct;
348 };
349 
350 
351 static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
352 					   struct ipmr_mfc_iter *it, loff_t pos)
353 {
354 	struct mr6_table *mrt = it->mrt;
355 	struct mfc6_cache *mfc;
356 
357 	read_lock(&mrt_lock);
358 	for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) {
359 		it->cache = &mrt->mfc6_cache_array[it->ct];
360 		list_for_each_entry(mfc, it->cache, list)
361 			if (pos-- == 0)
362 				return mfc;
363 	}
364 	read_unlock(&mrt_lock);
365 
366 	spin_lock_bh(&mfc_unres_lock);
367 	it->cache = &mrt->mfc6_unres_queue;
368 	list_for_each_entry(mfc, it->cache, list)
369 		if (pos-- == 0)
370 			return mfc;
371 	spin_unlock_bh(&mfc_unres_lock);
372 
373 	it->cache = NULL;
374 	return NULL;
375 }
376 
377 /*
378  *	The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
379  */
380 
381 struct ipmr_vif_iter {
382 	struct seq_net_private p;
383 	struct mr6_table *mrt;
384 	int ct;
385 };
386 
387 static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
388 					    struct ipmr_vif_iter *iter,
389 					    loff_t pos)
390 {
391 	struct mr6_table *mrt = iter->mrt;
392 
393 	for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
394 		if (!MIF_EXISTS(mrt, iter->ct))
395 			continue;
396 		if (pos-- == 0)
397 			return &mrt->vif6_table[iter->ct];
398 	}
399 	return NULL;
400 }
401 
402 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
403 	__acquires(mrt_lock)
404 {
405 	struct ipmr_vif_iter *iter = seq->private;
406 	struct net *net = seq_file_net(seq);
407 	struct mr6_table *mrt;
408 
409 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
410 	if (!mrt)
411 		return ERR_PTR(-ENOENT);
412 
413 	iter->mrt = mrt;
414 
415 	read_lock(&mrt_lock);
416 	return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1)
417 		: SEQ_START_TOKEN;
418 }
419 
420 static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
421 {
422 	struct ipmr_vif_iter *iter = seq->private;
423 	struct net *net = seq_file_net(seq);
424 	struct mr6_table *mrt = iter->mrt;
425 
426 	++*pos;
427 	if (v == SEQ_START_TOKEN)
428 		return ip6mr_vif_seq_idx(net, iter, 0);
429 
430 	while (++iter->ct < mrt->maxvif) {
431 		if (!MIF_EXISTS(mrt, iter->ct))
432 			continue;
433 		return &mrt->vif6_table[iter->ct];
434 	}
435 	return NULL;
436 }
437 
438 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
439 	__releases(mrt_lock)
440 {
441 	read_unlock(&mrt_lock);
442 }
443 
444 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
445 {
446 	struct ipmr_vif_iter *iter = seq->private;
447 	struct mr6_table *mrt = iter->mrt;
448 
449 	if (v == SEQ_START_TOKEN) {
450 		seq_puts(seq,
451 			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
452 	} else {
453 		const struct mif_device *vif = v;
454 		const char *name = vif->dev ? vif->dev->name : "none";
455 
456 		seq_printf(seq,
457 			   "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
458 			   vif - mrt->vif6_table,
459 			   name, vif->bytes_in, vif->pkt_in,
460 			   vif->bytes_out, vif->pkt_out,
461 			   vif->flags);
462 	}
463 	return 0;
464 }
465 
466 static const struct seq_operations ip6mr_vif_seq_ops = {
467 	.start = ip6mr_vif_seq_start,
468 	.next  = ip6mr_vif_seq_next,
469 	.stop  = ip6mr_vif_seq_stop,
470 	.show  = ip6mr_vif_seq_show,
471 };
472 
473 static int ip6mr_vif_open(struct inode *inode, struct file *file)
474 {
475 	return seq_open_net(inode, file, &ip6mr_vif_seq_ops,
476 			    sizeof(struct ipmr_vif_iter));
477 }
478 
479 static const struct file_operations ip6mr_vif_fops = {
480 	.owner	 = THIS_MODULE,
481 	.open    = ip6mr_vif_open,
482 	.read    = seq_read,
483 	.llseek  = seq_lseek,
484 	.release = seq_release_net,
485 };
486 
487 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
488 {
489 	struct ipmr_mfc_iter *it = seq->private;
490 	struct net *net = seq_file_net(seq);
491 	struct mr6_table *mrt;
492 
493 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
494 	if (!mrt)
495 		return ERR_PTR(-ENOENT);
496 
497 	it->mrt = mrt;
498 	return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
499 		: SEQ_START_TOKEN;
500 }
501 
502 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
503 {
504 	struct mfc6_cache *mfc = v;
505 	struct ipmr_mfc_iter *it = seq->private;
506 	struct net *net = seq_file_net(seq);
507 	struct mr6_table *mrt = it->mrt;
508 
509 	++*pos;
510 
511 	if (v == SEQ_START_TOKEN)
512 		return ipmr_mfc_seq_idx(net, seq->private, 0);
513 
514 	if (mfc->list.next != it->cache)
515 		return list_entry(mfc->list.next, struct mfc6_cache, list);
516 
517 	if (it->cache == &mrt->mfc6_unres_queue)
518 		goto end_of_list;
519 
520 	BUG_ON(it->cache != &mrt->mfc6_cache_array[it->ct]);
521 
522 	while (++it->ct < MFC6_LINES) {
523 		it->cache = &mrt->mfc6_cache_array[it->ct];
524 		if (list_empty(it->cache))
525 			continue;
526 		return list_first_entry(it->cache, struct mfc6_cache, list);
527 	}
528 
529 	/* exhausted cache_array, show unresolved */
530 	read_unlock(&mrt_lock);
531 	it->cache = &mrt->mfc6_unres_queue;
532 	it->ct = 0;
533 
534 	spin_lock_bh(&mfc_unres_lock);
535 	if (!list_empty(it->cache))
536 		return list_first_entry(it->cache, struct mfc6_cache, list);
537 
538  end_of_list:
539 	spin_unlock_bh(&mfc_unres_lock);
540 	it->cache = NULL;
541 
542 	return NULL;
543 }
544 
545 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
546 {
547 	struct ipmr_mfc_iter *it = seq->private;
548 	struct mr6_table *mrt = it->mrt;
549 
550 	if (it->cache == &mrt->mfc6_unres_queue)
551 		spin_unlock_bh(&mfc_unres_lock);
552 	else if (it->cache == &mrt->mfc6_cache_array[it->ct])
553 		read_unlock(&mrt_lock);
554 }
555 
556 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
557 {
558 	int n;
559 
560 	if (v == SEQ_START_TOKEN) {
561 		seq_puts(seq,
562 			 "Group                            "
563 			 "Origin                           "
564 			 "Iif      Pkts  Bytes     Wrong  Oifs\n");
565 	} else {
566 		const struct mfc6_cache *mfc = v;
567 		const struct ipmr_mfc_iter *it = seq->private;
568 		struct mr6_table *mrt = it->mrt;
569 
570 		seq_printf(seq, "%pI6 %pI6 %-3hd",
571 			   &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
572 			   mfc->mf6c_parent);
573 
574 		if (it->cache != &mrt->mfc6_unres_queue) {
575 			seq_printf(seq, " %8lu %8lu %8lu",
576 				   mfc->mfc_un.res.pkt,
577 				   mfc->mfc_un.res.bytes,
578 				   mfc->mfc_un.res.wrong_if);
579 			for (n = mfc->mfc_un.res.minvif;
580 			     n < mfc->mfc_un.res.maxvif; n++) {
581 				if (MIF_EXISTS(mrt, n) &&
582 				    mfc->mfc_un.res.ttls[n] < 255)
583 					seq_printf(seq,
584 						   " %2d:%-3d",
585 						   n, mfc->mfc_un.res.ttls[n]);
586 			}
587 		} else {
588 			/* unresolved mfc_caches don't contain
589 			 * pkt, bytes and wrong_if values
590 			 */
591 			seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
592 		}
593 		seq_putc(seq, '\n');
594 	}
595 	return 0;
596 }
597 
598 static const struct seq_operations ipmr_mfc_seq_ops = {
599 	.start = ipmr_mfc_seq_start,
600 	.next  = ipmr_mfc_seq_next,
601 	.stop  = ipmr_mfc_seq_stop,
602 	.show  = ipmr_mfc_seq_show,
603 };
604 
605 static int ipmr_mfc_open(struct inode *inode, struct file *file)
606 {
607 	return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
608 			    sizeof(struct ipmr_mfc_iter));
609 }
610 
611 static const struct file_operations ip6mr_mfc_fops = {
612 	.owner	 = THIS_MODULE,
613 	.open    = ipmr_mfc_open,
614 	.read    = seq_read,
615 	.llseek  = seq_lseek,
616 	.release = seq_release_net,
617 };
618 #endif
619 
620 #ifdef CONFIG_IPV6_PIMSM_V2
621 
622 static int pim6_rcv(struct sk_buff *skb)
623 {
624 	struct pimreghdr *pim;
625 	struct ipv6hdr   *encap;
626 	struct net_device  *reg_dev = NULL;
627 	struct net *net = dev_net(skb->dev);
628 	struct mr6_table *mrt;
629 	struct flowi6 fl6 = {
630 		.flowi6_iif	= skb->dev->ifindex,
631 		.flowi6_mark	= skb->mark,
632 	};
633 	int reg_vif_num;
634 
635 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
636 		goto drop;
637 
638 	pim = (struct pimreghdr *)skb_transport_header(skb);
639 	if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
640 	    (pim->flags & PIM_NULL_REGISTER) ||
641 	    (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
642 			     sizeof(*pim), IPPROTO_PIM,
643 			     csum_partial((void *)pim, sizeof(*pim), 0)) &&
644 	     csum_fold(skb_checksum(skb, 0, skb->len, 0))))
645 		goto drop;
646 
647 	/* check if the inner packet is destined to mcast group */
648 	encap = (struct ipv6hdr *)(skb_transport_header(skb) +
649 				   sizeof(*pim));
650 
651 	if (!ipv6_addr_is_multicast(&encap->daddr) ||
652 	    encap->payload_len == 0 ||
653 	    ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
654 		goto drop;
655 
656 	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
657 		goto drop;
658 	reg_vif_num = mrt->mroute_reg_vif_num;
659 
660 	read_lock(&mrt_lock);
661 	if (reg_vif_num >= 0)
662 		reg_dev = mrt->vif6_table[reg_vif_num].dev;
663 	if (reg_dev)
664 		dev_hold(reg_dev);
665 	read_unlock(&mrt_lock);
666 
667 	if (!reg_dev)
668 		goto drop;
669 
670 	skb->mac_header = skb->network_header;
671 	skb_pull(skb, (u8 *)encap - skb->data);
672 	skb_reset_network_header(skb);
673 	skb->protocol = htons(ETH_P_IPV6);
674 	skb->ip_summed = CHECKSUM_NONE;
675 
676 	skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
677 
678 	netif_rx(skb);
679 
680 	dev_put(reg_dev);
681 	return 0;
682  drop:
683 	kfree_skb(skb);
684 	return 0;
685 }
686 
687 static const struct inet6_protocol pim6_protocol = {
688 	.handler	=	pim6_rcv,
689 };
690 
691 /* Service routines creating virtual interfaces: PIMREG */
692 
693 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
694 				      struct net_device *dev)
695 {
696 	struct net *net = dev_net(dev);
697 	struct mr6_table *mrt;
698 	struct flowi6 fl6 = {
699 		.flowi6_oif	= dev->ifindex,
700 		.flowi6_iif	= skb->skb_iif ? : LOOPBACK_IFINDEX,
701 		.flowi6_mark	= skb->mark,
702 	};
703 	int err;
704 
705 	err = ip6mr_fib_lookup(net, &fl6, &mrt);
706 	if (err < 0) {
707 		kfree_skb(skb);
708 		return err;
709 	}
710 
711 	read_lock(&mrt_lock);
712 	dev->stats.tx_bytes += skb->len;
713 	dev->stats.tx_packets++;
714 	ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
715 	read_unlock(&mrt_lock);
716 	kfree_skb(skb);
717 	return NETDEV_TX_OK;
718 }
719 
720 static int reg_vif_get_iflink(const struct net_device *dev)
721 {
722 	return 0;
723 }
724 
725 static const struct net_device_ops reg_vif_netdev_ops = {
726 	.ndo_start_xmit	= reg_vif_xmit,
727 	.ndo_get_iflink = reg_vif_get_iflink,
728 };
729 
730 static void reg_vif_setup(struct net_device *dev)
731 {
732 	dev->type		= ARPHRD_PIMREG;
733 	dev->mtu		= 1500 - sizeof(struct ipv6hdr) - 8;
734 	dev->flags		= IFF_NOARP;
735 	dev->netdev_ops		= &reg_vif_netdev_ops;
736 	dev->destructor		= free_netdev;
737 	dev->features		|= NETIF_F_NETNS_LOCAL;
738 }
739 
740 static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt)
741 {
742 	struct net_device *dev;
743 	char name[IFNAMSIZ];
744 
745 	if (mrt->id == RT6_TABLE_DFLT)
746 		sprintf(name, "pim6reg");
747 	else
748 		sprintf(name, "pim6reg%u", mrt->id);
749 
750 	dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup);
751 	if (!dev)
752 		return NULL;
753 
754 	dev_net_set(dev, net);
755 
756 	if (register_netdevice(dev)) {
757 		free_netdev(dev);
758 		return NULL;
759 	}
760 
761 	if (dev_open(dev))
762 		goto failure;
763 
764 	dev_hold(dev);
765 	return dev;
766 
767 failure:
768 	unregister_netdevice(dev);
769 	return NULL;
770 }
771 #endif
772 
773 /*
774  *	Delete a VIF entry
775  */
776 
777 static int mif6_delete(struct mr6_table *mrt, int vifi, struct list_head *head)
778 {
779 	struct mif_device *v;
780 	struct net_device *dev;
781 	struct inet6_dev *in6_dev;
782 
783 	if (vifi < 0 || vifi >= mrt->maxvif)
784 		return -EADDRNOTAVAIL;
785 
786 	v = &mrt->vif6_table[vifi];
787 
788 	write_lock_bh(&mrt_lock);
789 	dev = v->dev;
790 	v->dev = NULL;
791 
792 	if (!dev) {
793 		write_unlock_bh(&mrt_lock);
794 		return -EADDRNOTAVAIL;
795 	}
796 
797 #ifdef CONFIG_IPV6_PIMSM_V2
798 	if (vifi == mrt->mroute_reg_vif_num)
799 		mrt->mroute_reg_vif_num = -1;
800 #endif
801 
802 	if (vifi + 1 == mrt->maxvif) {
803 		int tmp;
804 		for (tmp = vifi - 1; tmp >= 0; tmp--) {
805 			if (MIF_EXISTS(mrt, tmp))
806 				break;
807 		}
808 		mrt->maxvif = tmp + 1;
809 	}
810 
811 	write_unlock_bh(&mrt_lock);
812 
813 	dev_set_allmulti(dev, -1);
814 
815 	in6_dev = __in6_dev_get(dev);
816 	if (in6_dev) {
817 		in6_dev->cnf.mc_forwarding--;
818 		inet6_netconf_notify_devconf(dev_net(dev),
819 					     NETCONFA_MC_FORWARDING,
820 					     dev->ifindex, &in6_dev->cnf);
821 	}
822 
823 	if (v->flags & MIFF_REGISTER)
824 		unregister_netdevice_queue(dev, head);
825 
826 	dev_put(dev);
827 	return 0;
828 }
829 
830 static inline void ip6mr_cache_free(struct mfc6_cache *c)
831 {
832 	kmem_cache_free(mrt_cachep, c);
833 }
834 
835 /* Destroy an unresolved cache entry, killing queued skbs
836    and reporting error to netlink readers.
837  */
838 
839 static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
840 {
841 	struct net *net = read_pnet(&mrt->net);
842 	struct sk_buff *skb;
843 
844 	atomic_dec(&mrt->cache_resolve_queue_len);
845 
846 	while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
847 		if (ipv6_hdr(skb)->version == 0) {
848 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
849 			nlh->nlmsg_type = NLMSG_ERROR;
850 			nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
851 			skb_trim(skb, nlh->nlmsg_len);
852 			((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT;
853 			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
854 		} else
855 			kfree_skb(skb);
856 	}
857 
858 	ip6mr_cache_free(c);
859 }
860 
861 
862 /* Timer process for all the unresolved queue. */
863 
864 static void ipmr_do_expire_process(struct mr6_table *mrt)
865 {
866 	unsigned long now = jiffies;
867 	unsigned long expires = 10 * HZ;
868 	struct mfc6_cache *c, *next;
869 
870 	list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
871 		if (time_after(c->mfc_un.unres.expires, now)) {
872 			/* not yet... */
873 			unsigned long interval = c->mfc_un.unres.expires - now;
874 			if (interval < expires)
875 				expires = interval;
876 			continue;
877 		}
878 
879 		list_del(&c->list);
880 		mr6_netlink_event(mrt, c, RTM_DELROUTE);
881 		ip6mr_destroy_unres(mrt, c);
882 	}
883 
884 	if (!list_empty(&mrt->mfc6_unres_queue))
885 		mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
886 }
887 
888 static void ipmr_expire_process(unsigned long arg)
889 {
890 	struct mr6_table *mrt = (struct mr6_table *)arg;
891 
892 	if (!spin_trylock(&mfc_unres_lock)) {
893 		mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
894 		return;
895 	}
896 
897 	if (!list_empty(&mrt->mfc6_unres_queue))
898 		ipmr_do_expire_process(mrt);
899 
900 	spin_unlock(&mfc_unres_lock);
901 }
902 
903 /* Fill oifs list. It is called under write locked mrt_lock. */
904 
905 static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *cache,
906 				    unsigned char *ttls)
907 {
908 	int vifi;
909 
910 	cache->mfc_un.res.minvif = MAXMIFS;
911 	cache->mfc_un.res.maxvif = 0;
912 	memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
913 
914 	for (vifi = 0; vifi < mrt->maxvif; vifi++) {
915 		if (MIF_EXISTS(mrt, vifi) &&
916 		    ttls[vifi] && ttls[vifi] < 255) {
917 			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
918 			if (cache->mfc_un.res.minvif > vifi)
919 				cache->mfc_un.res.minvif = vifi;
920 			if (cache->mfc_un.res.maxvif <= vifi)
921 				cache->mfc_un.res.maxvif = vifi + 1;
922 		}
923 	}
924 }
925 
926 static int mif6_add(struct net *net, struct mr6_table *mrt,
927 		    struct mif6ctl *vifc, int mrtsock)
928 {
929 	int vifi = vifc->mif6c_mifi;
930 	struct mif_device *v = &mrt->vif6_table[vifi];
931 	struct net_device *dev;
932 	struct inet6_dev *in6_dev;
933 	int err;
934 
935 	/* Is vif busy ? */
936 	if (MIF_EXISTS(mrt, vifi))
937 		return -EADDRINUSE;
938 
939 	switch (vifc->mif6c_flags) {
940 #ifdef CONFIG_IPV6_PIMSM_V2
941 	case MIFF_REGISTER:
942 		/*
943 		 * Special Purpose VIF in PIM
944 		 * All the packets will be sent to the daemon
945 		 */
946 		if (mrt->mroute_reg_vif_num >= 0)
947 			return -EADDRINUSE;
948 		dev = ip6mr_reg_vif(net, mrt);
949 		if (!dev)
950 			return -ENOBUFS;
951 		err = dev_set_allmulti(dev, 1);
952 		if (err) {
953 			unregister_netdevice(dev);
954 			dev_put(dev);
955 			return err;
956 		}
957 		break;
958 #endif
959 	case 0:
960 		dev = dev_get_by_index(net, vifc->mif6c_pifi);
961 		if (!dev)
962 			return -EADDRNOTAVAIL;
963 		err = dev_set_allmulti(dev, 1);
964 		if (err) {
965 			dev_put(dev);
966 			return err;
967 		}
968 		break;
969 	default:
970 		return -EINVAL;
971 	}
972 
973 	in6_dev = __in6_dev_get(dev);
974 	if (in6_dev) {
975 		in6_dev->cnf.mc_forwarding++;
976 		inet6_netconf_notify_devconf(dev_net(dev),
977 					     NETCONFA_MC_FORWARDING,
978 					     dev->ifindex, &in6_dev->cnf);
979 	}
980 
981 	/*
982 	 *	Fill in the VIF structures
983 	 */
984 	v->rate_limit = vifc->vifc_rate_limit;
985 	v->flags = vifc->mif6c_flags;
986 	if (!mrtsock)
987 		v->flags |= VIFF_STATIC;
988 	v->threshold = vifc->vifc_threshold;
989 	v->bytes_in = 0;
990 	v->bytes_out = 0;
991 	v->pkt_in = 0;
992 	v->pkt_out = 0;
993 	v->link = dev->ifindex;
994 	if (v->flags & MIFF_REGISTER)
995 		v->link = dev_get_iflink(dev);
996 
997 	/* And finish update writing critical data */
998 	write_lock_bh(&mrt_lock);
999 	v->dev = dev;
1000 #ifdef CONFIG_IPV6_PIMSM_V2
1001 	if (v->flags & MIFF_REGISTER)
1002 		mrt->mroute_reg_vif_num = vifi;
1003 #endif
1004 	if (vifi + 1 > mrt->maxvif)
1005 		mrt->maxvif = vifi + 1;
1006 	write_unlock_bh(&mrt_lock);
1007 	return 0;
1008 }
1009 
1010 static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt,
1011 					   const struct in6_addr *origin,
1012 					   const struct in6_addr *mcastgrp)
1013 {
1014 	int line = MFC6_HASH(mcastgrp, origin);
1015 	struct mfc6_cache *c;
1016 
1017 	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
1018 		if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
1019 		    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
1020 			return c;
1021 	}
1022 	return NULL;
1023 }
1024 
1025 /* Look for a (*,*,oif) entry */
1026 static struct mfc6_cache *ip6mr_cache_find_any_parent(struct mr6_table *mrt,
1027 						      mifi_t mifi)
1028 {
1029 	int line = MFC6_HASH(&in6addr_any, &in6addr_any);
1030 	struct mfc6_cache *c;
1031 
1032 	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
1033 		if (ipv6_addr_any(&c->mf6c_origin) &&
1034 		    ipv6_addr_any(&c->mf6c_mcastgrp) &&
1035 		    (c->mfc_un.res.ttls[mifi] < 255))
1036 			return c;
1037 
1038 	return NULL;
1039 }
1040 
1041 /* Look for a (*,G) entry */
1042 static struct mfc6_cache *ip6mr_cache_find_any(struct mr6_table *mrt,
1043 					       struct in6_addr *mcastgrp,
1044 					       mifi_t mifi)
1045 {
1046 	int line = MFC6_HASH(mcastgrp, &in6addr_any);
1047 	struct mfc6_cache *c, *proxy;
1048 
1049 	if (ipv6_addr_any(mcastgrp))
1050 		goto skip;
1051 
1052 	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
1053 		if (ipv6_addr_any(&c->mf6c_origin) &&
1054 		    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp)) {
1055 			if (c->mfc_un.res.ttls[mifi] < 255)
1056 				return c;
1057 
1058 			/* It's ok if the mifi is part of the static tree */
1059 			proxy = ip6mr_cache_find_any_parent(mrt,
1060 							    c->mf6c_parent);
1061 			if (proxy && proxy->mfc_un.res.ttls[mifi] < 255)
1062 				return c;
1063 		}
1064 
1065 skip:
1066 	return ip6mr_cache_find_any_parent(mrt, mifi);
1067 }
1068 
1069 /*
1070  *	Allocate a multicast cache entry
1071  */
1072 static struct mfc6_cache *ip6mr_cache_alloc(void)
1073 {
1074 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
1075 	if (!c)
1076 		return NULL;
1077 	c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
1078 	c->mfc_un.res.minvif = MAXMIFS;
1079 	return c;
1080 }
1081 
1082 static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
1083 {
1084 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
1085 	if (!c)
1086 		return NULL;
1087 	skb_queue_head_init(&c->mfc_un.unres.unresolved);
1088 	c->mfc_un.unres.expires = jiffies + 10 * HZ;
1089 	return c;
1090 }
1091 
1092 /*
1093  *	A cache entry has gone into a resolved state from queued
1094  */
1095 
1096 static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
1097 				struct mfc6_cache *uc, struct mfc6_cache *c)
1098 {
1099 	struct sk_buff *skb;
1100 
1101 	/*
1102 	 *	Play the pending entries through our router
1103 	 */
1104 
1105 	while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
1106 		if (ipv6_hdr(skb)->version == 0) {
1107 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
1108 
1109 			if (__ip6mr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) {
1110 				nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1111 			} else {
1112 				nlh->nlmsg_type = NLMSG_ERROR;
1113 				nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
1114 				skb_trim(skb, nlh->nlmsg_len);
1115 				((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE;
1116 			}
1117 			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
1118 		} else
1119 			ip6_mr_forward(net, mrt, skb, c);
1120 	}
1121 }
1122 
1123 /*
1124  *	Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
1125  *	expects the following bizarre scheme.
1126  *
1127  *	Called under mrt_lock.
1128  */
1129 
1130 static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
1131 			      mifi_t mifi, int assert)
1132 {
1133 	struct sk_buff *skb;
1134 	struct mrt6msg *msg;
1135 	int ret;
1136 
1137 #ifdef CONFIG_IPV6_PIMSM_V2
1138 	if (assert == MRT6MSG_WHOLEPKT)
1139 		skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
1140 						+sizeof(*msg));
1141 	else
1142 #endif
1143 		skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
1144 
1145 	if (!skb)
1146 		return -ENOBUFS;
1147 
1148 	/* I suppose that internal messages
1149 	 * do not require checksums */
1150 
1151 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1152 
1153 #ifdef CONFIG_IPV6_PIMSM_V2
1154 	if (assert == MRT6MSG_WHOLEPKT) {
1155 		/* Ugly, but we have no choice with this interface.
1156 		   Duplicate old header, fix length etc.
1157 		   And all this only to mangle msg->im6_msgtype and
1158 		   to set msg->im6_mbz to "mbz" :-)
1159 		 */
1160 		skb_push(skb, -skb_network_offset(pkt));
1161 
1162 		skb_push(skb, sizeof(*msg));
1163 		skb_reset_transport_header(skb);
1164 		msg = (struct mrt6msg *)skb_transport_header(skb);
1165 		msg->im6_mbz = 0;
1166 		msg->im6_msgtype = MRT6MSG_WHOLEPKT;
1167 		msg->im6_mif = mrt->mroute_reg_vif_num;
1168 		msg->im6_pad = 0;
1169 		msg->im6_src = ipv6_hdr(pkt)->saddr;
1170 		msg->im6_dst = ipv6_hdr(pkt)->daddr;
1171 
1172 		skb->ip_summed = CHECKSUM_UNNECESSARY;
1173 	} else
1174 #endif
1175 	{
1176 	/*
1177 	 *	Copy the IP header
1178 	 */
1179 
1180 	skb_put(skb, sizeof(struct ipv6hdr));
1181 	skb_reset_network_header(skb);
1182 	skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
1183 
1184 	/*
1185 	 *	Add our header
1186 	 */
1187 	skb_put(skb, sizeof(*msg));
1188 	skb_reset_transport_header(skb);
1189 	msg = (struct mrt6msg *)skb_transport_header(skb);
1190 
1191 	msg->im6_mbz = 0;
1192 	msg->im6_msgtype = assert;
1193 	msg->im6_mif = mifi;
1194 	msg->im6_pad = 0;
1195 	msg->im6_src = ipv6_hdr(pkt)->saddr;
1196 	msg->im6_dst = ipv6_hdr(pkt)->daddr;
1197 
1198 	skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1199 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1200 	}
1201 
1202 	if (!mrt->mroute6_sk) {
1203 		kfree_skb(skb);
1204 		return -EINVAL;
1205 	}
1206 
1207 	/*
1208 	 *	Deliver to user space multicast routing algorithms
1209 	 */
1210 	ret = sock_queue_rcv_skb(mrt->mroute6_sk, skb);
1211 	if (ret < 0) {
1212 		net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
1213 		kfree_skb(skb);
1214 	}
1215 
1216 	return ret;
1217 }
1218 
1219 /*
1220  *	Queue a packet for resolution. It gets locked cache entry!
1221  */
1222 
1223 static int
1224 ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
1225 {
1226 	bool found = false;
1227 	int err;
1228 	struct mfc6_cache *c;
1229 
1230 	spin_lock_bh(&mfc_unres_lock);
1231 	list_for_each_entry(c, &mrt->mfc6_unres_queue, list) {
1232 		if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1233 		    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
1234 			found = true;
1235 			break;
1236 		}
1237 	}
1238 
1239 	if (!found) {
1240 		/*
1241 		 *	Create a new entry if allowable
1242 		 */
1243 
1244 		if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
1245 		    (c = ip6mr_cache_alloc_unres()) == NULL) {
1246 			spin_unlock_bh(&mfc_unres_lock);
1247 
1248 			kfree_skb(skb);
1249 			return -ENOBUFS;
1250 		}
1251 
1252 		/*
1253 		 *	Fill in the new cache entry
1254 		 */
1255 		c->mf6c_parent = -1;
1256 		c->mf6c_origin = ipv6_hdr(skb)->saddr;
1257 		c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
1258 
1259 		/*
1260 		 *	Reflect first query at pim6sd
1261 		 */
1262 		err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1263 		if (err < 0) {
1264 			/* If the report failed throw the cache entry
1265 			   out - Brad Parker
1266 			 */
1267 			spin_unlock_bh(&mfc_unres_lock);
1268 
1269 			ip6mr_cache_free(c);
1270 			kfree_skb(skb);
1271 			return err;
1272 		}
1273 
1274 		atomic_inc(&mrt->cache_resolve_queue_len);
1275 		list_add(&c->list, &mrt->mfc6_unres_queue);
1276 		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1277 
1278 		ipmr_do_expire_process(mrt);
1279 	}
1280 
1281 	/*
1282 	 *	See if we can append the packet
1283 	 */
1284 	if (c->mfc_un.unres.unresolved.qlen > 3) {
1285 		kfree_skb(skb);
1286 		err = -ENOBUFS;
1287 	} else {
1288 		skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1289 		err = 0;
1290 	}
1291 
1292 	spin_unlock_bh(&mfc_unres_lock);
1293 	return err;
1294 }
1295 
1296 /*
1297  *	MFC6 cache manipulation by user space
1298  */
1299 
1300 static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc,
1301 			    int parent)
1302 {
1303 	int line;
1304 	struct mfc6_cache *c, *next;
1305 
1306 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1307 
1308 	list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) {
1309 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1310 		    ipv6_addr_equal(&c->mf6c_mcastgrp,
1311 				    &mfc->mf6cc_mcastgrp.sin6_addr) &&
1312 		    (parent == -1 || parent == c->mf6c_parent)) {
1313 			write_lock_bh(&mrt_lock);
1314 			list_del(&c->list);
1315 			write_unlock_bh(&mrt_lock);
1316 
1317 			mr6_netlink_event(mrt, c, RTM_DELROUTE);
1318 			ip6mr_cache_free(c);
1319 			return 0;
1320 		}
1321 	}
1322 	return -ENOENT;
1323 }
1324 
1325 static int ip6mr_device_event(struct notifier_block *this,
1326 			      unsigned long event, void *ptr)
1327 {
1328 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1329 	struct net *net = dev_net(dev);
1330 	struct mr6_table *mrt;
1331 	struct mif_device *v;
1332 	int ct;
1333 	LIST_HEAD(list);
1334 
1335 	if (event != NETDEV_UNREGISTER)
1336 		return NOTIFY_DONE;
1337 
1338 	ip6mr_for_each_table(mrt, net) {
1339 		v = &mrt->vif6_table[0];
1340 		for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1341 			if (v->dev == dev)
1342 				mif6_delete(mrt, ct, &list);
1343 		}
1344 	}
1345 	unregister_netdevice_many(&list);
1346 
1347 	return NOTIFY_DONE;
1348 }
1349 
1350 static struct notifier_block ip6_mr_notifier = {
1351 	.notifier_call = ip6mr_device_event
1352 };
1353 
1354 /*
1355  *	Setup for IP multicast routing
1356  */
1357 
1358 static int __net_init ip6mr_net_init(struct net *net)
1359 {
1360 	int err;
1361 
1362 	err = ip6mr_rules_init(net);
1363 	if (err < 0)
1364 		goto fail;
1365 
1366 #ifdef CONFIG_PROC_FS
1367 	err = -ENOMEM;
1368 	if (!proc_create("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_fops))
1369 		goto proc_vif_fail;
1370 	if (!proc_create("ip6_mr_cache", 0, net->proc_net, &ip6mr_mfc_fops))
1371 		goto proc_cache_fail;
1372 #endif
1373 
1374 	return 0;
1375 
1376 #ifdef CONFIG_PROC_FS
1377 proc_cache_fail:
1378 	remove_proc_entry("ip6_mr_vif", net->proc_net);
1379 proc_vif_fail:
1380 	ip6mr_rules_exit(net);
1381 #endif
1382 fail:
1383 	return err;
1384 }
1385 
1386 static void __net_exit ip6mr_net_exit(struct net *net)
1387 {
1388 #ifdef CONFIG_PROC_FS
1389 	remove_proc_entry("ip6_mr_cache", net->proc_net);
1390 	remove_proc_entry("ip6_mr_vif", net->proc_net);
1391 #endif
1392 	ip6mr_rules_exit(net);
1393 }
1394 
1395 static struct pernet_operations ip6mr_net_ops = {
1396 	.init = ip6mr_net_init,
1397 	.exit = ip6mr_net_exit,
1398 };
1399 
1400 int __init ip6_mr_init(void)
1401 {
1402 	int err;
1403 
1404 	mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1405 				       sizeof(struct mfc6_cache),
1406 				       0, SLAB_HWCACHE_ALIGN,
1407 				       NULL);
1408 	if (!mrt_cachep)
1409 		return -ENOMEM;
1410 
1411 	err = register_pernet_subsys(&ip6mr_net_ops);
1412 	if (err)
1413 		goto reg_pernet_fail;
1414 
1415 	err = register_netdevice_notifier(&ip6_mr_notifier);
1416 	if (err)
1417 		goto reg_notif_fail;
1418 #ifdef CONFIG_IPV6_PIMSM_V2
1419 	if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1420 		pr_err("%s: can't add PIM protocol\n", __func__);
1421 		err = -EAGAIN;
1422 		goto add_proto_fail;
1423 	}
1424 #endif
1425 	rtnl_register(RTNL_FAMILY_IP6MR, RTM_GETROUTE, NULL,
1426 		      ip6mr_rtm_dumproute, NULL);
1427 	return 0;
1428 #ifdef CONFIG_IPV6_PIMSM_V2
1429 add_proto_fail:
1430 	unregister_netdevice_notifier(&ip6_mr_notifier);
1431 #endif
1432 reg_notif_fail:
1433 	unregister_pernet_subsys(&ip6mr_net_ops);
1434 reg_pernet_fail:
1435 	kmem_cache_destroy(mrt_cachep);
1436 	return err;
1437 }
1438 
1439 void ip6_mr_cleanup(void)
1440 {
1441 	rtnl_unregister(RTNL_FAMILY_IP6MR, RTM_GETROUTE);
1442 #ifdef CONFIG_IPV6_PIMSM_V2
1443 	inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1444 #endif
1445 	unregister_netdevice_notifier(&ip6_mr_notifier);
1446 	unregister_pernet_subsys(&ip6mr_net_ops);
1447 	kmem_cache_destroy(mrt_cachep);
1448 }
1449 
1450 static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
1451 			 struct mf6cctl *mfc, int mrtsock, int parent)
1452 {
1453 	bool found = false;
1454 	int line;
1455 	struct mfc6_cache *uc, *c;
1456 	unsigned char ttls[MAXMIFS];
1457 	int i;
1458 
1459 	if (mfc->mf6cc_parent >= MAXMIFS)
1460 		return -ENFILE;
1461 
1462 	memset(ttls, 255, MAXMIFS);
1463 	for (i = 0; i < MAXMIFS; i++) {
1464 		if (IF_ISSET(i, &mfc->mf6cc_ifset))
1465 			ttls[i] = 1;
1466 
1467 	}
1468 
1469 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1470 
1471 	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
1472 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1473 		    ipv6_addr_equal(&c->mf6c_mcastgrp,
1474 				    &mfc->mf6cc_mcastgrp.sin6_addr) &&
1475 		    (parent == -1 || parent == mfc->mf6cc_parent)) {
1476 			found = true;
1477 			break;
1478 		}
1479 	}
1480 
1481 	if (found) {
1482 		write_lock_bh(&mrt_lock);
1483 		c->mf6c_parent = mfc->mf6cc_parent;
1484 		ip6mr_update_thresholds(mrt, c, ttls);
1485 		if (!mrtsock)
1486 			c->mfc_flags |= MFC_STATIC;
1487 		write_unlock_bh(&mrt_lock);
1488 		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1489 		return 0;
1490 	}
1491 
1492 	if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) &&
1493 	    !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1494 		return -EINVAL;
1495 
1496 	c = ip6mr_cache_alloc();
1497 	if (!c)
1498 		return -ENOMEM;
1499 
1500 	c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1501 	c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1502 	c->mf6c_parent = mfc->mf6cc_parent;
1503 	ip6mr_update_thresholds(mrt, c, ttls);
1504 	if (!mrtsock)
1505 		c->mfc_flags |= MFC_STATIC;
1506 
1507 	write_lock_bh(&mrt_lock);
1508 	list_add(&c->list, &mrt->mfc6_cache_array[line]);
1509 	write_unlock_bh(&mrt_lock);
1510 
1511 	/*
1512 	 *	Check to see if we resolved a queued list. If so we
1513 	 *	need to send on the frames and tidy up.
1514 	 */
1515 	found = false;
1516 	spin_lock_bh(&mfc_unres_lock);
1517 	list_for_each_entry(uc, &mrt->mfc6_unres_queue, list) {
1518 		if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1519 		    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1520 			list_del(&uc->list);
1521 			atomic_dec(&mrt->cache_resolve_queue_len);
1522 			found = true;
1523 			break;
1524 		}
1525 	}
1526 	if (list_empty(&mrt->mfc6_unres_queue))
1527 		del_timer(&mrt->ipmr_expire_timer);
1528 	spin_unlock_bh(&mfc_unres_lock);
1529 
1530 	if (found) {
1531 		ip6mr_cache_resolve(net, mrt, uc, c);
1532 		ip6mr_cache_free(uc);
1533 	}
1534 	mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1535 	return 0;
1536 }
1537 
1538 /*
1539  *	Close the multicast socket, and clear the vif tables etc
1540  */
1541 
1542 static void mroute_clean_tables(struct mr6_table *mrt, bool all)
1543 {
1544 	int i;
1545 	LIST_HEAD(list);
1546 	struct mfc6_cache *c, *next;
1547 
1548 	/*
1549 	 *	Shut down all active vif entries
1550 	 */
1551 	for (i = 0; i < mrt->maxvif; i++) {
1552 		if (!all && (mrt->vif6_table[i].flags & VIFF_STATIC))
1553 			continue;
1554 		mif6_delete(mrt, i, &list);
1555 	}
1556 	unregister_netdevice_many(&list);
1557 
1558 	/*
1559 	 *	Wipe the cache
1560 	 */
1561 	for (i = 0; i < MFC6_LINES; i++) {
1562 		list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) {
1563 			if (!all && (c->mfc_flags & MFC_STATIC))
1564 				continue;
1565 			write_lock_bh(&mrt_lock);
1566 			list_del(&c->list);
1567 			write_unlock_bh(&mrt_lock);
1568 
1569 			mr6_netlink_event(mrt, c, RTM_DELROUTE);
1570 			ip6mr_cache_free(c);
1571 		}
1572 	}
1573 
1574 	if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1575 		spin_lock_bh(&mfc_unres_lock);
1576 		list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
1577 			list_del(&c->list);
1578 			mr6_netlink_event(mrt, c, RTM_DELROUTE);
1579 			ip6mr_destroy_unres(mrt, c);
1580 		}
1581 		spin_unlock_bh(&mfc_unres_lock);
1582 	}
1583 }
1584 
1585 static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
1586 {
1587 	int err = 0;
1588 	struct net *net = sock_net(sk);
1589 
1590 	rtnl_lock();
1591 	write_lock_bh(&mrt_lock);
1592 	if (likely(mrt->mroute6_sk == NULL)) {
1593 		mrt->mroute6_sk = sk;
1594 		net->ipv6.devconf_all->mc_forwarding++;
1595 		inet6_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING,
1596 					     NETCONFA_IFINDEX_ALL,
1597 					     net->ipv6.devconf_all);
1598 	}
1599 	else
1600 		err = -EADDRINUSE;
1601 	write_unlock_bh(&mrt_lock);
1602 
1603 	rtnl_unlock();
1604 
1605 	return err;
1606 }
1607 
1608 int ip6mr_sk_done(struct sock *sk)
1609 {
1610 	int err = -EACCES;
1611 	struct net *net = sock_net(sk);
1612 	struct mr6_table *mrt;
1613 
1614 	rtnl_lock();
1615 	ip6mr_for_each_table(mrt, net) {
1616 		if (sk == mrt->mroute6_sk) {
1617 			write_lock_bh(&mrt_lock);
1618 			mrt->mroute6_sk = NULL;
1619 			net->ipv6.devconf_all->mc_forwarding--;
1620 			inet6_netconf_notify_devconf(net,
1621 						     NETCONFA_MC_FORWARDING,
1622 						     NETCONFA_IFINDEX_ALL,
1623 						     net->ipv6.devconf_all);
1624 			write_unlock_bh(&mrt_lock);
1625 
1626 			mroute_clean_tables(mrt, false);
1627 			err = 0;
1628 			break;
1629 		}
1630 	}
1631 	rtnl_unlock();
1632 
1633 	return err;
1634 }
1635 
1636 struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
1637 {
1638 	struct mr6_table *mrt;
1639 	struct flowi6 fl6 = {
1640 		.flowi6_iif	= skb->skb_iif ? : LOOPBACK_IFINDEX,
1641 		.flowi6_oif	= skb->dev->ifindex,
1642 		.flowi6_mark	= skb->mark,
1643 	};
1644 
1645 	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
1646 		return NULL;
1647 
1648 	return mrt->mroute6_sk;
1649 }
1650 
1651 /*
1652  *	Socket options and virtual interface manipulation. The whole
1653  *	virtual interface system is a complete heap, but unfortunately
1654  *	that's how BSD mrouted happens to think. Maybe one day with a proper
1655  *	MOSPF/PIM router set up we can clean this up.
1656  */
1657 
1658 int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1659 {
1660 	int ret, parent = 0;
1661 	struct mif6ctl vif;
1662 	struct mf6cctl mfc;
1663 	mifi_t mifi;
1664 	struct net *net = sock_net(sk);
1665 	struct mr6_table *mrt;
1666 
1667 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1668 	if (!mrt)
1669 		return -ENOENT;
1670 
1671 	if (optname != MRT6_INIT) {
1672 		if (sk != mrt->mroute6_sk && !ns_capable(net->user_ns, CAP_NET_ADMIN))
1673 			return -EACCES;
1674 	}
1675 
1676 	switch (optname) {
1677 	case MRT6_INIT:
1678 		if (sk->sk_type != SOCK_RAW ||
1679 		    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1680 			return -EOPNOTSUPP;
1681 		if (optlen < sizeof(int))
1682 			return -EINVAL;
1683 
1684 		return ip6mr_sk_init(mrt, sk);
1685 
1686 	case MRT6_DONE:
1687 		return ip6mr_sk_done(sk);
1688 
1689 	case MRT6_ADD_MIF:
1690 		if (optlen < sizeof(vif))
1691 			return -EINVAL;
1692 		if (copy_from_user(&vif, optval, sizeof(vif)))
1693 			return -EFAULT;
1694 		if (vif.mif6c_mifi >= MAXMIFS)
1695 			return -ENFILE;
1696 		rtnl_lock();
1697 		ret = mif6_add(net, mrt, &vif, sk == mrt->mroute6_sk);
1698 		rtnl_unlock();
1699 		return ret;
1700 
1701 	case MRT6_DEL_MIF:
1702 		if (optlen < sizeof(mifi_t))
1703 			return -EINVAL;
1704 		if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1705 			return -EFAULT;
1706 		rtnl_lock();
1707 		ret = mif6_delete(mrt, mifi, NULL);
1708 		rtnl_unlock();
1709 		return ret;
1710 
1711 	/*
1712 	 *	Manipulate the forwarding caches. These live
1713 	 *	in a sort of kernel/user symbiosis.
1714 	 */
1715 	case MRT6_ADD_MFC:
1716 	case MRT6_DEL_MFC:
1717 		parent = -1;
1718 	case MRT6_ADD_MFC_PROXY:
1719 	case MRT6_DEL_MFC_PROXY:
1720 		if (optlen < sizeof(mfc))
1721 			return -EINVAL;
1722 		if (copy_from_user(&mfc, optval, sizeof(mfc)))
1723 			return -EFAULT;
1724 		if (parent == 0)
1725 			parent = mfc.mf6cc_parent;
1726 		rtnl_lock();
1727 		if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY)
1728 			ret = ip6mr_mfc_delete(mrt, &mfc, parent);
1729 		else
1730 			ret = ip6mr_mfc_add(net, mrt, &mfc,
1731 					    sk == mrt->mroute6_sk, parent);
1732 		rtnl_unlock();
1733 		return ret;
1734 
1735 	/*
1736 	 *	Control PIM assert (to activate pim will activate assert)
1737 	 */
1738 	case MRT6_ASSERT:
1739 	{
1740 		int v;
1741 
1742 		if (optlen != sizeof(v))
1743 			return -EINVAL;
1744 		if (get_user(v, (int __user *)optval))
1745 			return -EFAULT;
1746 		mrt->mroute_do_assert = v;
1747 		return 0;
1748 	}
1749 
1750 #ifdef CONFIG_IPV6_PIMSM_V2
1751 	case MRT6_PIM:
1752 	{
1753 		int v;
1754 
1755 		if (optlen != sizeof(v))
1756 			return -EINVAL;
1757 		if (get_user(v, (int __user *)optval))
1758 			return -EFAULT;
1759 		v = !!v;
1760 		rtnl_lock();
1761 		ret = 0;
1762 		if (v != mrt->mroute_do_pim) {
1763 			mrt->mroute_do_pim = v;
1764 			mrt->mroute_do_assert = v;
1765 		}
1766 		rtnl_unlock();
1767 		return ret;
1768 	}
1769 
1770 #endif
1771 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1772 	case MRT6_TABLE:
1773 	{
1774 		u32 v;
1775 
1776 		if (optlen != sizeof(u32))
1777 			return -EINVAL;
1778 		if (get_user(v, (u32 __user *)optval))
1779 			return -EFAULT;
1780 		/* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
1781 		if (v != RT_TABLE_DEFAULT && v >= 100000000)
1782 			return -EINVAL;
1783 		if (sk == mrt->mroute6_sk)
1784 			return -EBUSY;
1785 
1786 		rtnl_lock();
1787 		ret = 0;
1788 		if (!ip6mr_new_table(net, v))
1789 			ret = -ENOMEM;
1790 		raw6_sk(sk)->ip6mr_table = v;
1791 		rtnl_unlock();
1792 		return ret;
1793 	}
1794 #endif
1795 	/*
1796 	 *	Spurious command, or MRT6_VERSION which you cannot
1797 	 *	set.
1798 	 */
1799 	default:
1800 		return -ENOPROTOOPT;
1801 	}
1802 }
1803 
1804 /*
1805  *	Getsock opt support for the multicast routing system.
1806  */
1807 
1808 int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1809 			  int __user *optlen)
1810 {
1811 	int olr;
1812 	int val;
1813 	struct net *net = sock_net(sk);
1814 	struct mr6_table *mrt;
1815 
1816 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1817 	if (!mrt)
1818 		return -ENOENT;
1819 
1820 	switch (optname) {
1821 	case MRT6_VERSION:
1822 		val = 0x0305;
1823 		break;
1824 #ifdef CONFIG_IPV6_PIMSM_V2
1825 	case MRT6_PIM:
1826 		val = mrt->mroute_do_pim;
1827 		break;
1828 #endif
1829 	case MRT6_ASSERT:
1830 		val = mrt->mroute_do_assert;
1831 		break;
1832 	default:
1833 		return -ENOPROTOOPT;
1834 	}
1835 
1836 	if (get_user(olr, optlen))
1837 		return -EFAULT;
1838 
1839 	olr = min_t(int, olr, sizeof(int));
1840 	if (olr < 0)
1841 		return -EINVAL;
1842 
1843 	if (put_user(olr, optlen))
1844 		return -EFAULT;
1845 	if (copy_to_user(optval, &val, olr))
1846 		return -EFAULT;
1847 	return 0;
1848 }
1849 
1850 /*
1851  *	The IP multicast ioctl support routines.
1852  */
1853 
1854 int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1855 {
1856 	struct sioc_sg_req6 sr;
1857 	struct sioc_mif_req6 vr;
1858 	struct mif_device *vif;
1859 	struct mfc6_cache *c;
1860 	struct net *net = sock_net(sk);
1861 	struct mr6_table *mrt;
1862 
1863 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1864 	if (!mrt)
1865 		return -ENOENT;
1866 
1867 	switch (cmd) {
1868 	case SIOCGETMIFCNT_IN6:
1869 		if (copy_from_user(&vr, arg, sizeof(vr)))
1870 			return -EFAULT;
1871 		if (vr.mifi >= mrt->maxvif)
1872 			return -EINVAL;
1873 		read_lock(&mrt_lock);
1874 		vif = &mrt->vif6_table[vr.mifi];
1875 		if (MIF_EXISTS(mrt, vr.mifi)) {
1876 			vr.icount = vif->pkt_in;
1877 			vr.ocount = vif->pkt_out;
1878 			vr.ibytes = vif->bytes_in;
1879 			vr.obytes = vif->bytes_out;
1880 			read_unlock(&mrt_lock);
1881 
1882 			if (copy_to_user(arg, &vr, sizeof(vr)))
1883 				return -EFAULT;
1884 			return 0;
1885 		}
1886 		read_unlock(&mrt_lock);
1887 		return -EADDRNOTAVAIL;
1888 	case SIOCGETSGCNT_IN6:
1889 		if (copy_from_user(&sr, arg, sizeof(sr)))
1890 			return -EFAULT;
1891 
1892 		read_lock(&mrt_lock);
1893 		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1894 		if (c) {
1895 			sr.pktcnt = c->mfc_un.res.pkt;
1896 			sr.bytecnt = c->mfc_un.res.bytes;
1897 			sr.wrong_if = c->mfc_un.res.wrong_if;
1898 			read_unlock(&mrt_lock);
1899 
1900 			if (copy_to_user(arg, &sr, sizeof(sr)))
1901 				return -EFAULT;
1902 			return 0;
1903 		}
1904 		read_unlock(&mrt_lock);
1905 		return -EADDRNOTAVAIL;
1906 	default:
1907 		return -ENOIOCTLCMD;
1908 	}
1909 }
1910 
1911 #ifdef CONFIG_COMPAT
1912 struct compat_sioc_sg_req6 {
1913 	struct sockaddr_in6 src;
1914 	struct sockaddr_in6 grp;
1915 	compat_ulong_t pktcnt;
1916 	compat_ulong_t bytecnt;
1917 	compat_ulong_t wrong_if;
1918 };
1919 
1920 struct compat_sioc_mif_req6 {
1921 	mifi_t	mifi;
1922 	compat_ulong_t icount;
1923 	compat_ulong_t ocount;
1924 	compat_ulong_t ibytes;
1925 	compat_ulong_t obytes;
1926 };
1927 
1928 int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1929 {
1930 	struct compat_sioc_sg_req6 sr;
1931 	struct compat_sioc_mif_req6 vr;
1932 	struct mif_device *vif;
1933 	struct mfc6_cache *c;
1934 	struct net *net = sock_net(sk);
1935 	struct mr6_table *mrt;
1936 
1937 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1938 	if (!mrt)
1939 		return -ENOENT;
1940 
1941 	switch (cmd) {
1942 	case SIOCGETMIFCNT_IN6:
1943 		if (copy_from_user(&vr, arg, sizeof(vr)))
1944 			return -EFAULT;
1945 		if (vr.mifi >= mrt->maxvif)
1946 			return -EINVAL;
1947 		read_lock(&mrt_lock);
1948 		vif = &mrt->vif6_table[vr.mifi];
1949 		if (MIF_EXISTS(mrt, vr.mifi)) {
1950 			vr.icount = vif->pkt_in;
1951 			vr.ocount = vif->pkt_out;
1952 			vr.ibytes = vif->bytes_in;
1953 			vr.obytes = vif->bytes_out;
1954 			read_unlock(&mrt_lock);
1955 
1956 			if (copy_to_user(arg, &vr, sizeof(vr)))
1957 				return -EFAULT;
1958 			return 0;
1959 		}
1960 		read_unlock(&mrt_lock);
1961 		return -EADDRNOTAVAIL;
1962 	case SIOCGETSGCNT_IN6:
1963 		if (copy_from_user(&sr, arg, sizeof(sr)))
1964 			return -EFAULT;
1965 
1966 		read_lock(&mrt_lock);
1967 		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1968 		if (c) {
1969 			sr.pktcnt = c->mfc_un.res.pkt;
1970 			sr.bytecnt = c->mfc_un.res.bytes;
1971 			sr.wrong_if = c->mfc_un.res.wrong_if;
1972 			read_unlock(&mrt_lock);
1973 
1974 			if (copy_to_user(arg, &sr, sizeof(sr)))
1975 				return -EFAULT;
1976 			return 0;
1977 		}
1978 		read_unlock(&mrt_lock);
1979 		return -EADDRNOTAVAIL;
1980 	default:
1981 		return -ENOIOCTLCMD;
1982 	}
1983 }
1984 #endif
1985 
1986 static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
1987 {
1988 	__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
1989 			IPSTATS_MIB_OUTFORWDATAGRAMS);
1990 	__IP6_ADD_STATS(net, ip6_dst_idev(skb_dst(skb)),
1991 			IPSTATS_MIB_OUTOCTETS, skb->len);
1992 	return dst_output(net, sk, skb);
1993 }
1994 
1995 /*
1996  *	Processing handlers for ip6mr_forward
1997  */
1998 
1999 static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
2000 			  struct sk_buff *skb, struct mfc6_cache *c, int vifi)
2001 {
2002 	struct ipv6hdr *ipv6h;
2003 	struct mif_device *vif = &mrt->vif6_table[vifi];
2004 	struct net_device *dev;
2005 	struct dst_entry *dst;
2006 	struct flowi6 fl6;
2007 
2008 	if (!vif->dev)
2009 		goto out_free;
2010 
2011 #ifdef CONFIG_IPV6_PIMSM_V2
2012 	if (vif->flags & MIFF_REGISTER) {
2013 		vif->pkt_out++;
2014 		vif->bytes_out += skb->len;
2015 		vif->dev->stats.tx_bytes += skb->len;
2016 		vif->dev->stats.tx_packets++;
2017 		ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
2018 		goto out_free;
2019 	}
2020 #endif
2021 
2022 	ipv6h = ipv6_hdr(skb);
2023 
2024 	fl6 = (struct flowi6) {
2025 		.flowi6_oif = vif->link,
2026 		.daddr = ipv6h->daddr,
2027 	};
2028 
2029 	dst = ip6_route_output(net, NULL, &fl6);
2030 	if (dst->error) {
2031 		dst_release(dst);
2032 		goto out_free;
2033 	}
2034 
2035 	skb_dst_drop(skb);
2036 	skb_dst_set(skb, dst);
2037 
2038 	/*
2039 	 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
2040 	 * not only before forwarding, but after forwarding on all output
2041 	 * interfaces. It is clear, if mrouter runs a multicasting
2042 	 * program, it should receive packets not depending to what interface
2043 	 * program is joined.
2044 	 * If we will not make it, the program will have to join on all
2045 	 * interfaces. On the other hand, multihoming host (or router, but
2046 	 * not mrouter) cannot join to more than one interface - it will
2047 	 * result in receiving multiple packets.
2048 	 */
2049 	dev = vif->dev;
2050 	skb->dev = dev;
2051 	vif->pkt_out++;
2052 	vif->bytes_out += skb->len;
2053 
2054 	/* We are about to write */
2055 	/* XXX: extension headers? */
2056 	if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
2057 		goto out_free;
2058 
2059 	ipv6h = ipv6_hdr(skb);
2060 	ipv6h->hop_limit--;
2061 
2062 	IP6CB(skb)->flags |= IP6SKB_FORWARDED;
2063 
2064 	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
2065 		       net, NULL, skb, skb->dev, dev,
2066 		       ip6mr_forward2_finish);
2067 
2068 out_free:
2069 	kfree_skb(skb);
2070 	return 0;
2071 }
2072 
2073 static int ip6mr_find_vif(struct mr6_table *mrt, struct net_device *dev)
2074 {
2075 	int ct;
2076 
2077 	for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
2078 		if (mrt->vif6_table[ct].dev == dev)
2079 			break;
2080 	}
2081 	return ct;
2082 }
2083 
2084 static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
2085 			   struct sk_buff *skb, struct mfc6_cache *cache)
2086 {
2087 	int psend = -1;
2088 	int vif, ct;
2089 	int true_vifi = ip6mr_find_vif(mrt, skb->dev);
2090 
2091 	vif = cache->mf6c_parent;
2092 	cache->mfc_un.res.pkt++;
2093 	cache->mfc_un.res.bytes += skb->len;
2094 
2095 	if (ipv6_addr_any(&cache->mf6c_origin) && true_vifi >= 0) {
2096 		struct mfc6_cache *cache_proxy;
2097 
2098 		/* For an (*,G) entry, we only check that the incoming
2099 		 * interface is part of the static tree.
2100 		 */
2101 		cache_proxy = ip6mr_cache_find_any_parent(mrt, vif);
2102 		if (cache_proxy &&
2103 		    cache_proxy->mfc_un.res.ttls[true_vifi] < 255)
2104 			goto forward;
2105 	}
2106 
2107 	/*
2108 	 * Wrong interface: drop packet and (maybe) send PIM assert.
2109 	 */
2110 	if (mrt->vif6_table[vif].dev != skb->dev) {
2111 		cache->mfc_un.res.wrong_if++;
2112 
2113 		if (true_vifi >= 0 && mrt->mroute_do_assert &&
2114 		    /* pimsm uses asserts, when switching from RPT to SPT,
2115 		       so that we cannot check that packet arrived on an oif.
2116 		       It is bad, but otherwise we would need to move pretty
2117 		       large chunk of pimd to kernel. Ough... --ANK
2118 		     */
2119 		    (mrt->mroute_do_pim ||
2120 		     cache->mfc_un.res.ttls[true_vifi] < 255) &&
2121 		    time_after(jiffies,
2122 			       cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
2123 			cache->mfc_un.res.last_assert = jiffies;
2124 			ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
2125 		}
2126 		goto dont_forward;
2127 	}
2128 
2129 forward:
2130 	mrt->vif6_table[vif].pkt_in++;
2131 	mrt->vif6_table[vif].bytes_in += skb->len;
2132 
2133 	/*
2134 	 *	Forward the frame
2135 	 */
2136 	if (ipv6_addr_any(&cache->mf6c_origin) &&
2137 	    ipv6_addr_any(&cache->mf6c_mcastgrp)) {
2138 		if (true_vifi >= 0 &&
2139 		    true_vifi != cache->mf6c_parent &&
2140 		    ipv6_hdr(skb)->hop_limit >
2141 				cache->mfc_un.res.ttls[cache->mf6c_parent]) {
2142 			/* It's an (*,*) entry and the packet is not coming from
2143 			 * the upstream: forward the packet to the upstream
2144 			 * only.
2145 			 */
2146 			psend = cache->mf6c_parent;
2147 			goto last_forward;
2148 		}
2149 		goto dont_forward;
2150 	}
2151 	for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
2152 		/* For (*,G) entry, don't forward to the incoming interface */
2153 		if ((!ipv6_addr_any(&cache->mf6c_origin) || ct != true_vifi) &&
2154 		    ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
2155 			if (psend != -1) {
2156 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2157 				if (skb2)
2158 					ip6mr_forward2(net, mrt, skb2, cache, psend);
2159 			}
2160 			psend = ct;
2161 		}
2162 	}
2163 last_forward:
2164 	if (psend != -1) {
2165 		ip6mr_forward2(net, mrt, skb, cache, psend);
2166 		return;
2167 	}
2168 
2169 dont_forward:
2170 	kfree_skb(skb);
2171 }
2172 
2173 
2174 /*
2175  *	Multicast packets for forwarding arrive here
2176  */
2177 
2178 int ip6_mr_input(struct sk_buff *skb)
2179 {
2180 	struct mfc6_cache *cache;
2181 	struct net *net = dev_net(skb->dev);
2182 	struct mr6_table *mrt;
2183 	struct flowi6 fl6 = {
2184 		.flowi6_iif	= skb->dev->ifindex,
2185 		.flowi6_mark	= skb->mark,
2186 	};
2187 	int err;
2188 
2189 	err = ip6mr_fib_lookup(net, &fl6, &mrt);
2190 	if (err < 0) {
2191 		kfree_skb(skb);
2192 		return err;
2193 	}
2194 
2195 	read_lock(&mrt_lock);
2196 	cache = ip6mr_cache_find(mrt,
2197 				 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2198 	if (!cache) {
2199 		int vif = ip6mr_find_vif(mrt, skb->dev);
2200 
2201 		if (vif >= 0)
2202 			cache = ip6mr_cache_find_any(mrt,
2203 						     &ipv6_hdr(skb)->daddr,
2204 						     vif);
2205 	}
2206 
2207 	/*
2208 	 *	No usable cache entry
2209 	 */
2210 	if (!cache) {
2211 		int vif;
2212 
2213 		vif = ip6mr_find_vif(mrt, skb->dev);
2214 		if (vif >= 0) {
2215 			int err = ip6mr_cache_unresolved(mrt, vif, skb);
2216 			read_unlock(&mrt_lock);
2217 
2218 			return err;
2219 		}
2220 		read_unlock(&mrt_lock);
2221 		kfree_skb(skb);
2222 		return -ENODEV;
2223 	}
2224 
2225 	ip6_mr_forward(net, mrt, skb, cache);
2226 
2227 	read_unlock(&mrt_lock);
2228 
2229 	return 0;
2230 }
2231 
2232 
2233 static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2234 			       struct mfc6_cache *c, struct rtmsg *rtm)
2235 {
2236 	int ct;
2237 	struct rtnexthop *nhp;
2238 	struct nlattr *mp_attr;
2239 	struct rta_mfc_stats mfcs;
2240 
2241 	/* If cache is unresolved, don't try to parse IIF and OIF */
2242 	if (c->mf6c_parent >= MAXMIFS)
2243 		return -ENOENT;
2244 
2245 	if (MIF_EXISTS(mrt, c->mf6c_parent) &&
2246 	    nla_put_u32(skb, RTA_IIF, mrt->vif6_table[c->mf6c_parent].dev->ifindex) < 0)
2247 		return -EMSGSIZE;
2248 	mp_attr = nla_nest_start(skb, RTA_MULTIPATH);
2249 	if (!mp_attr)
2250 		return -EMSGSIZE;
2251 
2252 	for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
2253 		if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
2254 			nhp = nla_reserve_nohdr(skb, sizeof(*nhp));
2255 			if (!nhp) {
2256 				nla_nest_cancel(skb, mp_attr);
2257 				return -EMSGSIZE;
2258 			}
2259 
2260 			nhp->rtnh_flags = 0;
2261 			nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
2262 			nhp->rtnh_ifindex = mrt->vif6_table[ct].dev->ifindex;
2263 			nhp->rtnh_len = sizeof(*nhp);
2264 		}
2265 	}
2266 
2267 	nla_nest_end(skb, mp_attr);
2268 
2269 	mfcs.mfcs_packets = c->mfc_un.res.pkt;
2270 	mfcs.mfcs_bytes = c->mfc_un.res.bytes;
2271 	mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
2272 	if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) < 0)
2273 		return -EMSGSIZE;
2274 
2275 	rtm->rtm_type = RTN_MULTICAST;
2276 	return 1;
2277 }
2278 
2279 int ip6mr_get_route(struct net *net,
2280 		    struct sk_buff *skb, struct rtmsg *rtm, int nowait)
2281 {
2282 	int err;
2283 	struct mr6_table *mrt;
2284 	struct mfc6_cache *cache;
2285 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
2286 
2287 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
2288 	if (!mrt)
2289 		return -ENOENT;
2290 
2291 	read_lock(&mrt_lock);
2292 	cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
2293 	if (!cache && skb->dev) {
2294 		int vif = ip6mr_find_vif(mrt, skb->dev);
2295 
2296 		if (vif >= 0)
2297 			cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr,
2298 						     vif);
2299 	}
2300 
2301 	if (!cache) {
2302 		struct sk_buff *skb2;
2303 		struct ipv6hdr *iph;
2304 		struct net_device *dev;
2305 		int vif;
2306 
2307 		if (nowait) {
2308 			read_unlock(&mrt_lock);
2309 			return -EAGAIN;
2310 		}
2311 
2312 		dev = skb->dev;
2313 		if (!dev || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2314 			read_unlock(&mrt_lock);
2315 			return -ENODEV;
2316 		}
2317 
2318 		/* really correct? */
2319 		skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
2320 		if (!skb2) {
2321 			read_unlock(&mrt_lock);
2322 			return -ENOMEM;
2323 		}
2324 
2325 		skb_reset_transport_header(skb2);
2326 
2327 		skb_put(skb2, sizeof(struct ipv6hdr));
2328 		skb_reset_network_header(skb2);
2329 
2330 		iph = ipv6_hdr(skb2);
2331 		iph->version = 0;
2332 		iph->priority = 0;
2333 		iph->flow_lbl[0] = 0;
2334 		iph->flow_lbl[1] = 0;
2335 		iph->flow_lbl[2] = 0;
2336 		iph->payload_len = 0;
2337 		iph->nexthdr = IPPROTO_NONE;
2338 		iph->hop_limit = 0;
2339 		iph->saddr = rt->rt6i_src.addr;
2340 		iph->daddr = rt->rt6i_dst.addr;
2341 
2342 		err = ip6mr_cache_unresolved(mrt, vif, skb2);
2343 		read_unlock(&mrt_lock);
2344 
2345 		return err;
2346 	}
2347 
2348 	if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
2349 		cache->mfc_flags |= MFC_NOTIFY;
2350 
2351 	err = __ip6mr_fill_mroute(mrt, skb, cache, rtm);
2352 	read_unlock(&mrt_lock);
2353 	return err;
2354 }
2355 
2356 static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2357 			     u32 portid, u32 seq, struct mfc6_cache *c, int cmd,
2358 			     int flags)
2359 {
2360 	struct nlmsghdr *nlh;
2361 	struct rtmsg *rtm;
2362 	int err;
2363 
2364 	nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags);
2365 	if (!nlh)
2366 		return -EMSGSIZE;
2367 
2368 	rtm = nlmsg_data(nlh);
2369 	rtm->rtm_family   = RTNL_FAMILY_IP6MR;
2370 	rtm->rtm_dst_len  = 128;
2371 	rtm->rtm_src_len  = 128;
2372 	rtm->rtm_tos      = 0;
2373 	rtm->rtm_table    = mrt->id;
2374 	if (nla_put_u32(skb, RTA_TABLE, mrt->id))
2375 		goto nla_put_failure;
2376 	rtm->rtm_type = RTN_MULTICAST;
2377 	rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
2378 	if (c->mfc_flags & MFC_STATIC)
2379 		rtm->rtm_protocol = RTPROT_STATIC;
2380 	else
2381 		rtm->rtm_protocol = RTPROT_MROUTED;
2382 	rtm->rtm_flags    = 0;
2383 
2384 	if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) ||
2385 	    nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp))
2386 		goto nla_put_failure;
2387 	err = __ip6mr_fill_mroute(mrt, skb, c, rtm);
2388 	/* do not break the dump if cache is unresolved */
2389 	if (err < 0 && err != -ENOENT)
2390 		goto nla_put_failure;
2391 
2392 	nlmsg_end(skb, nlh);
2393 	return 0;
2394 
2395 nla_put_failure:
2396 	nlmsg_cancel(skb, nlh);
2397 	return -EMSGSIZE;
2398 }
2399 
2400 static int mr6_msgsize(bool unresolved, int maxvif)
2401 {
2402 	size_t len =
2403 		NLMSG_ALIGN(sizeof(struct rtmsg))
2404 		+ nla_total_size(4)	/* RTA_TABLE */
2405 		+ nla_total_size(sizeof(struct in6_addr))	/* RTA_SRC */
2406 		+ nla_total_size(sizeof(struct in6_addr))	/* RTA_DST */
2407 		;
2408 
2409 	if (!unresolved)
2410 		len = len
2411 		      + nla_total_size(4)	/* RTA_IIF */
2412 		      + nla_total_size(0)	/* RTA_MULTIPATH */
2413 		      + maxvif * NLA_ALIGN(sizeof(struct rtnexthop))
2414 						/* RTA_MFC_STATS */
2415 		      + nla_total_size_64bit(sizeof(struct rta_mfc_stats))
2416 		;
2417 
2418 	return len;
2419 }
2420 
2421 static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
2422 			      int cmd)
2423 {
2424 	struct net *net = read_pnet(&mrt->net);
2425 	struct sk_buff *skb;
2426 	int err = -ENOBUFS;
2427 
2428 	skb = nlmsg_new(mr6_msgsize(mfc->mf6c_parent >= MAXMIFS, mrt->maxvif),
2429 			GFP_ATOMIC);
2430 	if (!skb)
2431 		goto errout;
2432 
2433 	err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0);
2434 	if (err < 0)
2435 		goto errout;
2436 
2437 	rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC);
2438 	return;
2439 
2440 errout:
2441 	kfree_skb(skb);
2442 	if (err < 0)
2443 		rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
2444 }
2445 
2446 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2447 {
2448 	struct net *net = sock_net(skb->sk);
2449 	struct mr6_table *mrt;
2450 	struct mfc6_cache *mfc;
2451 	unsigned int t = 0, s_t;
2452 	unsigned int h = 0, s_h;
2453 	unsigned int e = 0, s_e;
2454 
2455 	s_t = cb->args[0];
2456 	s_h = cb->args[1];
2457 	s_e = cb->args[2];
2458 
2459 	read_lock(&mrt_lock);
2460 	ip6mr_for_each_table(mrt, net) {
2461 		if (t < s_t)
2462 			goto next_table;
2463 		if (t > s_t)
2464 			s_h = 0;
2465 		for (h = s_h; h < MFC6_LINES; h++) {
2466 			list_for_each_entry(mfc, &mrt->mfc6_cache_array[h], list) {
2467 				if (e < s_e)
2468 					goto next_entry;
2469 				if (ip6mr_fill_mroute(mrt, skb,
2470 						      NETLINK_CB(cb->skb).portid,
2471 						      cb->nlh->nlmsg_seq,
2472 						      mfc, RTM_NEWROUTE,
2473 						      NLM_F_MULTI) < 0)
2474 					goto done;
2475 next_entry:
2476 				e++;
2477 			}
2478 			e = s_e = 0;
2479 		}
2480 		spin_lock_bh(&mfc_unres_lock);
2481 		list_for_each_entry(mfc, &mrt->mfc6_unres_queue, list) {
2482 			if (e < s_e)
2483 				goto next_entry2;
2484 			if (ip6mr_fill_mroute(mrt, skb,
2485 					      NETLINK_CB(cb->skb).portid,
2486 					      cb->nlh->nlmsg_seq,
2487 					      mfc, RTM_NEWROUTE,
2488 					      NLM_F_MULTI) < 0) {
2489 				spin_unlock_bh(&mfc_unres_lock);
2490 				goto done;
2491 			}
2492 next_entry2:
2493 			e++;
2494 		}
2495 		spin_unlock_bh(&mfc_unres_lock);
2496 		e = s_e = 0;
2497 		s_h = 0;
2498 next_table:
2499 		t++;
2500 	}
2501 done:
2502 	read_unlock(&mrt_lock);
2503 
2504 	cb->args[2] = e;
2505 	cb->args[1] = h;
2506 	cb->args[0] = t;
2507 
2508 	return skb->len;
2509 }
2510