xref: /openbmc/linux/net/ipv6/ip6mr.c (revision 034f90b3)
1 /*
2  *	Linux IPv6 multicast routing support for BSD pim6sd
3  *	Based on net/ipv4/ipmr.c.
4  *
5  *	(c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
6  *		LSIIT Laboratory, Strasbourg, France
7  *	(c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
8  *		6WIND, Paris, France
9  *	Copyright (C)2007,2008 USAGI/WIDE Project
10  *		YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
11  *
12  *	This program is free software; you can redistribute it and/or
13  *	modify it under the terms of the GNU General Public License
14  *	as published by the Free Software Foundation; either version
15  *	2 of the License, or (at your option) any later version.
16  *
17  */
18 
19 #include <asm/uaccess.h>
20 #include <linux/types.h>
21 #include <linux/sched.h>
22 #include <linux/errno.h>
23 #include <linux/timer.h>
24 #include <linux/mm.h>
25 #include <linux/kernel.h>
26 #include <linux/fcntl.h>
27 #include <linux/stat.h>
28 #include <linux/socket.h>
29 #include <linux/inet.h>
30 #include <linux/netdevice.h>
31 #include <linux/inetdevice.h>
32 #include <linux/proc_fs.h>
33 #include <linux/seq_file.h>
34 #include <linux/init.h>
35 #include <linux/slab.h>
36 #include <linux/compat.h>
37 #include <net/protocol.h>
38 #include <linux/skbuff.h>
39 #include <net/sock.h>
40 #include <net/raw.h>
41 #include <linux/notifier.h>
42 #include <linux/if_arp.h>
43 #include <net/checksum.h>
44 #include <net/netlink.h>
45 #include <net/fib_rules.h>
46 
47 #include <net/ipv6.h>
48 #include <net/ip6_route.h>
49 #include <linux/mroute6.h>
50 #include <linux/pim.h>
51 #include <net/addrconf.h>
52 #include <linux/netfilter_ipv6.h>
53 #include <linux/export.h>
54 #include <net/ip6_checksum.h>
55 #include <linux/netconf.h>
56 
57 struct mr6_table {
58 	struct list_head	list;
59 #ifdef CONFIG_NET_NS
60 	struct net		*net;
61 #endif
62 	u32			id;
63 	struct sock		*mroute6_sk;
64 	struct timer_list	ipmr_expire_timer;
65 	struct list_head	mfc6_unres_queue;
66 	struct list_head	mfc6_cache_array[MFC6_LINES];
67 	struct mif_device	vif6_table[MAXMIFS];
68 	int			maxvif;
69 	atomic_t		cache_resolve_queue_len;
70 	bool			mroute_do_assert;
71 	bool			mroute_do_pim;
72 #ifdef CONFIG_IPV6_PIMSM_V2
73 	int			mroute_reg_vif_num;
74 #endif
75 };
76 
77 struct ip6mr_rule {
78 	struct fib_rule		common;
79 };
80 
81 struct ip6mr_result {
82 	struct mr6_table	*mrt;
83 };
84 
85 /* Big lock, protecting vif table, mrt cache and mroute socket state.
86    Note that the changes are semaphored via rtnl_lock.
87  */
88 
89 static DEFINE_RWLOCK(mrt_lock);
90 
91 /*
92  *	Multicast router control variables
93  */
94 
95 #define MIF_EXISTS(_mrt, _idx) ((_mrt)->vif6_table[_idx].dev != NULL)
96 
97 /* Special spinlock for queue of unresolved entries */
98 static DEFINE_SPINLOCK(mfc_unres_lock);
99 
100 /* We return to original Alan's scheme. Hash table of resolved
101    entries is changed only in process context and protected
102    with weak lock mrt_lock. Queue of unresolved entries is protected
103    with strong spinlock mfc_unres_lock.
104 
105    In this case data path is free of exclusive locks at all.
106  */
107 
108 static struct kmem_cache *mrt_cachep __read_mostly;
109 
110 static struct mr6_table *ip6mr_new_table(struct net *net, u32 id);
111 static void ip6mr_free_table(struct mr6_table *mrt);
112 
113 static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
114 			   struct sk_buff *skb, struct mfc6_cache *cache);
115 static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
116 			      mifi_t mifi, int assert);
117 static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
118 			       struct mfc6_cache *c, struct rtmsg *rtm);
119 static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
120 			      int cmd);
121 static int ip6mr_rtm_dumproute(struct sk_buff *skb,
122 			       struct netlink_callback *cb);
123 static void mroute_clean_tables(struct mr6_table *mrt);
124 static void ipmr_expire_process(unsigned long arg);
125 
126 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
127 #define ip6mr_for_each_table(mrt, net) \
128 	list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
129 
130 static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
131 {
132 	struct mr6_table *mrt;
133 
134 	ip6mr_for_each_table(mrt, net) {
135 		if (mrt->id == id)
136 			return mrt;
137 	}
138 	return NULL;
139 }
140 
141 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
142 			    struct mr6_table **mrt)
143 {
144 	int err;
145 	struct ip6mr_result res;
146 	struct fib_lookup_arg arg = {
147 		.result = &res,
148 		.flags = FIB_LOOKUP_NOREF,
149 	};
150 
151 	err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
152 			       flowi6_to_flowi(flp6), 0, &arg);
153 	if (err < 0)
154 		return err;
155 	*mrt = res.mrt;
156 	return 0;
157 }
158 
159 static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
160 			     int flags, struct fib_lookup_arg *arg)
161 {
162 	struct ip6mr_result *res = arg->result;
163 	struct mr6_table *mrt;
164 
165 	switch (rule->action) {
166 	case FR_ACT_TO_TBL:
167 		break;
168 	case FR_ACT_UNREACHABLE:
169 		return -ENETUNREACH;
170 	case FR_ACT_PROHIBIT:
171 		return -EACCES;
172 	case FR_ACT_BLACKHOLE:
173 	default:
174 		return -EINVAL;
175 	}
176 
177 	mrt = ip6mr_get_table(rule->fr_net, rule->table);
178 	if (mrt == NULL)
179 		return -EAGAIN;
180 	res->mrt = mrt;
181 	return 0;
182 }
183 
184 static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
185 {
186 	return 1;
187 }
188 
189 static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = {
190 	FRA_GENERIC_POLICY,
191 };
192 
193 static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
194 				struct fib_rule_hdr *frh, struct nlattr **tb)
195 {
196 	return 0;
197 }
198 
199 static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
200 			      struct nlattr **tb)
201 {
202 	return 1;
203 }
204 
205 static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
206 			   struct fib_rule_hdr *frh)
207 {
208 	frh->dst_len = 0;
209 	frh->src_len = 0;
210 	frh->tos     = 0;
211 	return 0;
212 }
213 
214 static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
215 	.family		= RTNL_FAMILY_IP6MR,
216 	.rule_size	= sizeof(struct ip6mr_rule),
217 	.addr_size	= sizeof(struct in6_addr),
218 	.action		= ip6mr_rule_action,
219 	.match		= ip6mr_rule_match,
220 	.configure	= ip6mr_rule_configure,
221 	.compare	= ip6mr_rule_compare,
222 	.default_pref	= fib_default_rule_pref,
223 	.fill		= ip6mr_rule_fill,
224 	.nlgroup	= RTNLGRP_IPV6_RULE,
225 	.policy		= ip6mr_rule_policy,
226 	.owner		= THIS_MODULE,
227 };
228 
229 static int __net_init ip6mr_rules_init(struct net *net)
230 {
231 	struct fib_rules_ops *ops;
232 	struct mr6_table *mrt;
233 	int err;
234 
235 	ops = fib_rules_register(&ip6mr_rules_ops_template, net);
236 	if (IS_ERR(ops))
237 		return PTR_ERR(ops);
238 
239 	INIT_LIST_HEAD(&net->ipv6.mr6_tables);
240 
241 	mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
242 	if (mrt == NULL) {
243 		err = -ENOMEM;
244 		goto err1;
245 	}
246 
247 	err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
248 	if (err < 0)
249 		goto err2;
250 
251 	net->ipv6.mr6_rules_ops = ops;
252 	return 0;
253 
254 err2:
255 	kfree(mrt);
256 err1:
257 	fib_rules_unregister(ops);
258 	return err;
259 }
260 
261 static void __net_exit ip6mr_rules_exit(struct net *net)
262 {
263 	struct mr6_table *mrt, *next;
264 
265 	rtnl_lock();
266 	list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
267 		list_del(&mrt->list);
268 		ip6mr_free_table(mrt);
269 	}
270 	rtnl_unlock();
271 	fib_rules_unregister(net->ipv6.mr6_rules_ops);
272 }
273 #else
274 #define ip6mr_for_each_table(mrt, net) \
275 	for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
276 
277 static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
278 {
279 	return net->ipv6.mrt6;
280 }
281 
282 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
283 			    struct mr6_table **mrt)
284 {
285 	*mrt = net->ipv6.mrt6;
286 	return 0;
287 }
288 
289 static int __net_init ip6mr_rules_init(struct net *net)
290 {
291 	net->ipv6.mrt6 = ip6mr_new_table(net, RT6_TABLE_DFLT);
292 	return net->ipv6.mrt6 ? 0 : -ENOMEM;
293 }
294 
295 static void __net_exit ip6mr_rules_exit(struct net *net)
296 {
297 	rtnl_lock();
298 	ip6mr_free_table(net->ipv6.mrt6);
299 	net->ipv6.mrt6 = NULL;
300 	rtnl_unlock();
301 }
302 #endif
303 
304 static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
305 {
306 	struct mr6_table *mrt;
307 	unsigned int i;
308 
309 	mrt = ip6mr_get_table(net, id);
310 	if (mrt != NULL)
311 		return mrt;
312 
313 	mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
314 	if (mrt == NULL)
315 		return NULL;
316 	mrt->id = id;
317 	write_pnet(&mrt->net, net);
318 
319 	/* Forwarding cache */
320 	for (i = 0; i < MFC6_LINES; i++)
321 		INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]);
322 
323 	INIT_LIST_HEAD(&mrt->mfc6_unres_queue);
324 
325 	setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
326 		    (unsigned long)mrt);
327 
328 #ifdef CONFIG_IPV6_PIMSM_V2
329 	mrt->mroute_reg_vif_num = -1;
330 #endif
331 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
332 	list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
333 #endif
334 	return mrt;
335 }
336 
337 static void ip6mr_free_table(struct mr6_table *mrt)
338 {
339 	del_timer(&mrt->ipmr_expire_timer);
340 	mroute_clean_tables(mrt);
341 	kfree(mrt);
342 }
343 
344 #ifdef CONFIG_PROC_FS
345 
346 struct ipmr_mfc_iter {
347 	struct seq_net_private p;
348 	struct mr6_table *mrt;
349 	struct list_head *cache;
350 	int ct;
351 };
352 
353 
354 static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
355 					   struct ipmr_mfc_iter *it, loff_t pos)
356 {
357 	struct mr6_table *mrt = it->mrt;
358 	struct mfc6_cache *mfc;
359 
360 	read_lock(&mrt_lock);
361 	for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) {
362 		it->cache = &mrt->mfc6_cache_array[it->ct];
363 		list_for_each_entry(mfc, it->cache, list)
364 			if (pos-- == 0)
365 				return mfc;
366 	}
367 	read_unlock(&mrt_lock);
368 
369 	spin_lock_bh(&mfc_unres_lock);
370 	it->cache = &mrt->mfc6_unres_queue;
371 	list_for_each_entry(mfc, it->cache, list)
372 		if (pos-- == 0)
373 			return mfc;
374 	spin_unlock_bh(&mfc_unres_lock);
375 
376 	it->cache = NULL;
377 	return NULL;
378 }
379 
380 /*
381  *	The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
382  */
383 
384 struct ipmr_vif_iter {
385 	struct seq_net_private p;
386 	struct mr6_table *mrt;
387 	int ct;
388 };
389 
390 static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
391 					    struct ipmr_vif_iter *iter,
392 					    loff_t pos)
393 {
394 	struct mr6_table *mrt = iter->mrt;
395 
396 	for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
397 		if (!MIF_EXISTS(mrt, iter->ct))
398 			continue;
399 		if (pos-- == 0)
400 			return &mrt->vif6_table[iter->ct];
401 	}
402 	return NULL;
403 }
404 
405 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
406 	__acquires(mrt_lock)
407 {
408 	struct ipmr_vif_iter *iter = seq->private;
409 	struct net *net = seq_file_net(seq);
410 	struct mr6_table *mrt;
411 
412 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
413 	if (mrt == NULL)
414 		return ERR_PTR(-ENOENT);
415 
416 	iter->mrt = mrt;
417 
418 	read_lock(&mrt_lock);
419 	return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1)
420 		: SEQ_START_TOKEN;
421 }
422 
423 static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
424 {
425 	struct ipmr_vif_iter *iter = seq->private;
426 	struct net *net = seq_file_net(seq);
427 	struct mr6_table *mrt = iter->mrt;
428 
429 	++*pos;
430 	if (v == SEQ_START_TOKEN)
431 		return ip6mr_vif_seq_idx(net, iter, 0);
432 
433 	while (++iter->ct < mrt->maxvif) {
434 		if (!MIF_EXISTS(mrt, iter->ct))
435 			continue;
436 		return &mrt->vif6_table[iter->ct];
437 	}
438 	return NULL;
439 }
440 
441 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
442 	__releases(mrt_lock)
443 {
444 	read_unlock(&mrt_lock);
445 }
446 
447 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
448 {
449 	struct ipmr_vif_iter *iter = seq->private;
450 	struct mr6_table *mrt = iter->mrt;
451 
452 	if (v == SEQ_START_TOKEN) {
453 		seq_puts(seq,
454 			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
455 	} else {
456 		const struct mif_device *vif = v;
457 		const char *name = vif->dev ? vif->dev->name : "none";
458 
459 		seq_printf(seq,
460 			   "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
461 			   vif - mrt->vif6_table,
462 			   name, vif->bytes_in, vif->pkt_in,
463 			   vif->bytes_out, vif->pkt_out,
464 			   vif->flags);
465 	}
466 	return 0;
467 }
468 
469 static const struct seq_operations ip6mr_vif_seq_ops = {
470 	.start = ip6mr_vif_seq_start,
471 	.next  = ip6mr_vif_seq_next,
472 	.stop  = ip6mr_vif_seq_stop,
473 	.show  = ip6mr_vif_seq_show,
474 };
475 
476 static int ip6mr_vif_open(struct inode *inode, struct file *file)
477 {
478 	return seq_open_net(inode, file, &ip6mr_vif_seq_ops,
479 			    sizeof(struct ipmr_vif_iter));
480 }
481 
482 static const struct file_operations ip6mr_vif_fops = {
483 	.owner	 = THIS_MODULE,
484 	.open    = ip6mr_vif_open,
485 	.read    = seq_read,
486 	.llseek  = seq_lseek,
487 	.release = seq_release_net,
488 };
489 
490 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
491 {
492 	struct ipmr_mfc_iter *it = seq->private;
493 	struct net *net = seq_file_net(seq);
494 	struct mr6_table *mrt;
495 
496 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
497 	if (mrt == NULL)
498 		return ERR_PTR(-ENOENT);
499 
500 	it->mrt = mrt;
501 	return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
502 		: SEQ_START_TOKEN;
503 }
504 
505 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
506 {
507 	struct mfc6_cache *mfc = v;
508 	struct ipmr_mfc_iter *it = seq->private;
509 	struct net *net = seq_file_net(seq);
510 	struct mr6_table *mrt = it->mrt;
511 
512 	++*pos;
513 
514 	if (v == SEQ_START_TOKEN)
515 		return ipmr_mfc_seq_idx(net, seq->private, 0);
516 
517 	if (mfc->list.next != it->cache)
518 		return list_entry(mfc->list.next, struct mfc6_cache, list);
519 
520 	if (it->cache == &mrt->mfc6_unres_queue)
521 		goto end_of_list;
522 
523 	BUG_ON(it->cache != &mrt->mfc6_cache_array[it->ct]);
524 
525 	while (++it->ct < MFC6_LINES) {
526 		it->cache = &mrt->mfc6_cache_array[it->ct];
527 		if (list_empty(it->cache))
528 			continue;
529 		return list_first_entry(it->cache, struct mfc6_cache, list);
530 	}
531 
532 	/* exhausted cache_array, show unresolved */
533 	read_unlock(&mrt_lock);
534 	it->cache = &mrt->mfc6_unres_queue;
535 	it->ct = 0;
536 
537 	spin_lock_bh(&mfc_unres_lock);
538 	if (!list_empty(it->cache))
539 		return list_first_entry(it->cache, struct mfc6_cache, list);
540 
541  end_of_list:
542 	spin_unlock_bh(&mfc_unres_lock);
543 	it->cache = NULL;
544 
545 	return NULL;
546 }
547 
548 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
549 {
550 	struct ipmr_mfc_iter *it = seq->private;
551 	struct mr6_table *mrt = it->mrt;
552 
553 	if (it->cache == &mrt->mfc6_unres_queue)
554 		spin_unlock_bh(&mfc_unres_lock);
555 	else if (it->cache == mrt->mfc6_cache_array)
556 		read_unlock(&mrt_lock);
557 }
558 
559 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
560 {
561 	int n;
562 
563 	if (v == SEQ_START_TOKEN) {
564 		seq_puts(seq,
565 			 "Group                            "
566 			 "Origin                           "
567 			 "Iif      Pkts  Bytes     Wrong  Oifs\n");
568 	} else {
569 		const struct mfc6_cache *mfc = v;
570 		const struct ipmr_mfc_iter *it = seq->private;
571 		struct mr6_table *mrt = it->mrt;
572 
573 		seq_printf(seq, "%pI6 %pI6 %-3hd",
574 			   &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
575 			   mfc->mf6c_parent);
576 
577 		if (it->cache != &mrt->mfc6_unres_queue) {
578 			seq_printf(seq, " %8lu %8lu %8lu",
579 				   mfc->mfc_un.res.pkt,
580 				   mfc->mfc_un.res.bytes,
581 				   mfc->mfc_un.res.wrong_if);
582 			for (n = mfc->mfc_un.res.minvif;
583 			     n < mfc->mfc_un.res.maxvif; n++) {
584 				if (MIF_EXISTS(mrt, n) &&
585 				    mfc->mfc_un.res.ttls[n] < 255)
586 					seq_printf(seq,
587 						   " %2d:%-3d",
588 						   n, mfc->mfc_un.res.ttls[n]);
589 			}
590 		} else {
591 			/* unresolved mfc_caches don't contain
592 			 * pkt, bytes and wrong_if values
593 			 */
594 			seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
595 		}
596 		seq_putc(seq, '\n');
597 	}
598 	return 0;
599 }
600 
601 static const struct seq_operations ipmr_mfc_seq_ops = {
602 	.start = ipmr_mfc_seq_start,
603 	.next  = ipmr_mfc_seq_next,
604 	.stop  = ipmr_mfc_seq_stop,
605 	.show  = ipmr_mfc_seq_show,
606 };
607 
608 static int ipmr_mfc_open(struct inode *inode, struct file *file)
609 {
610 	return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
611 			    sizeof(struct ipmr_mfc_iter));
612 }
613 
614 static const struct file_operations ip6mr_mfc_fops = {
615 	.owner	 = THIS_MODULE,
616 	.open    = ipmr_mfc_open,
617 	.read    = seq_read,
618 	.llseek  = seq_lseek,
619 	.release = seq_release_net,
620 };
621 #endif
622 
623 #ifdef CONFIG_IPV6_PIMSM_V2
624 
625 static int pim6_rcv(struct sk_buff *skb)
626 {
627 	struct pimreghdr *pim;
628 	struct ipv6hdr   *encap;
629 	struct net_device  *reg_dev = NULL;
630 	struct net *net = dev_net(skb->dev);
631 	struct mr6_table *mrt;
632 	struct flowi6 fl6 = {
633 		.flowi6_iif	= skb->dev->ifindex,
634 		.flowi6_mark	= skb->mark,
635 	};
636 	int reg_vif_num;
637 
638 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
639 		goto drop;
640 
641 	pim = (struct pimreghdr *)skb_transport_header(skb);
642 	if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
643 	    (pim->flags & PIM_NULL_REGISTER) ||
644 	    (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
645 			     sizeof(*pim), IPPROTO_PIM,
646 			     csum_partial((void *)pim, sizeof(*pim), 0)) &&
647 	     csum_fold(skb_checksum(skb, 0, skb->len, 0))))
648 		goto drop;
649 
650 	/* check if the inner packet is destined to mcast group */
651 	encap = (struct ipv6hdr *)(skb_transport_header(skb) +
652 				   sizeof(*pim));
653 
654 	if (!ipv6_addr_is_multicast(&encap->daddr) ||
655 	    encap->payload_len == 0 ||
656 	    ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
657 		goto drop;
658 
659 	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
660 		goto drop;
661 	reg_vif_num = mrt->mroute_reg_vif_num;
662 
663 	read_lock(&mrt_lock);
664 	if (reg_vif_num >= 0)
665 		reg_dev = mrt->vif6_table[reg_vif_num].dev;
666 	if (reg_dev)
667 		dev_hold(reg_dev);
668 	read_unlock(&mrt_lock);
669 
670 	if (reg_dev == NULL)
671 		goto drop;
672 
673 	skb->mac_header = skb->network_header;
674 	skb_pull(skb, (u8 *)encap - skb->data);
675 	skb_reset_network_header(skb);
676 	skb->protocol = htons(ETH_P_IPV6);
677 	skb->ip_summed = CHECKSUM_NONE;
678 
679 	skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
680 
681 	netif_rx(skb);
682 
683 	dev_put(reg_dev);
684 	return 0;
685  drop:
686 	kfree_skb(skb);
687 	return 0;
688 }
689 
690 static const struct inet6_protocol pim6_protocol = {
691 	.handler	=	pim6_rcv,
692 };
693 
694 /* Service routines creating virtual interfaces: PIMREG */
695 
696 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
697 				      struct net_device *dev)
698 {
699 	struct net *net = dev_net(dev);
700 	struct mr6_table *mrt;
701 	struct flowi6 fl6 = {
702 		.flowi6_oif	= dev->ifindex,
703 		.flowi6_iif	= skb->skb_iif ? : LOOPBACK_IFINDEX,
704 		.flowi6_mark	= skb->mark,
705 	};
706 	int err;
707 
708 	err = ip6mr_fib_lookup(net, &fl6, &mrt);
709 	if (err < 0) {
710 		kfree_skb(skb);
711 		return err;
712 	}
713 
714 	read_lock(&mrt_lock);
715 	dev->stats.tx_bytes += skb->len;
716 	dev->stats.tx_packets++;
717 	ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
718 	read_unlock(&mrt_lock);
719 	kfree_skb(skb);
720 	return NETDEV_TX_OK;
721 }
722 
723 static const struct net_device_ops reg_vif_netdev_ops = {
724 	.ndo_start_xmit	= reg_vif_xmit,
725 };
726 
727 static void reg_vif_setup(struct net_device *dev)
728 {
729 	dev->type		= ARPHRD_PIMREG;
730 	dev->mtu		= 1500 - sizeof(struct ipv6hdr) - 8;
731 	dev->flags		= IFF_NOARP;
732 	dev->netdev_ops		= &reg_vif_netdev_ops;
733 	dev->destructor		= free_netdev;
734 	dev->features		|= NETIF_F_NETNS_LOCAL;
735 }
736 
737 static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt)
738 {
739 	struct net_device *dev;
740 	char name[IFNAMSIZ];
741 
742 	if (mrt->id == RT6_TABLE_DFLT)
743 		sprintf(name, "pim6reg");
744 	else
745 		sprintf(name, "pim6reg%u", mrt->id);
746 
747 	dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup);
748 	if (dev == NULL)
749 		return NULL;
750 
751 	dev_net_set(dev, net);
752 
753 	if (register_netdevice(dev)) {
754 		free_netdev(dev);
755 		return NULL;
756 	}
757 	dev->iflink = 0;
758 
759 	if (dev_open(dev))
760 		goto failure;
761 
762 	dev_hold(dev);
763 	return dev;
764 
765 failure:
766 	/* allow the register to be completed before unregistering. */
767 	rtnl_unlock();
768 	rtnl_lock();
769 
770 	unregister_netdevice(dev);
771 	return NULL;
772 }
773 #endif
774 
775 /*
776  *	Delete a VIF entry
777  */
778 
779 static int mif6_delete(struct mr6_table *mrt, int vifi, struct list_head *head)
780 {
781 	struct mif_device *v;
782 	struct net_device *dev;
783 	struct inet6_dev *in6_dev;
784 
785 	if (vifi < 0 || vifi >= mrt->maxvif)
786 		return -EADDRNOTAVAIL;
787 
788 	v = &mrt->vif6_table[vifi];
789 
790 	write_lock_bh(&mrt_lock);
791 	dev = v->dev;
792 	v->dev = NULL;
793 
794 	if (!dev) {
795 		write_unlock_bh(&mrt_lock);
796 		return -EADDRNOTAVAIL;
797 	}
798 
799 #ifdef CONFIG_IPV6_PIMSM_V2
800 	if (vifi == mrt->mroute_reg_vif_num)
801 		mrt->mroute_reg_vif_num = -1;
802 #endif
803 
804 	if (vifi + 1 == mrt->maxvif) {
805 		int tmp;
806 		for (tmp = vifi - 1; tmp >= 0; tmp--) {
807 			if (MIF_EXISTS(mrt, tmp))
808 				break;
809 		}
810 		mrt->maxvif = tmp + 1;
811 	}
812 
813 	write_unlock_bh(&mrt_lock);
814 
815 	dev_set_allmulti(dev, -1);
816 
817 	in6_dev = __in6_dev_get(dev);
818 	if (in6_dev) {
819 		in6_dev->cnf.mc_forwarding--;
820 		inet6_netconf_notify_devconf(dev_net(dev),
821 					     NETCONFA_MC_FORWARDING,
822 					     dev->ifindex, &in6_dev->cnf);
823 	}
824 
825 	if (v->flags & MIFF_REGISTER)
826 		unregister_netdevice_queue(dev, head);
827 
828 	dev_put(dev);
829 	return 0;
830 }
831 
832 static inline void ip6mr_cache_free(struct mfc6_cache *c)
833 {
834 	kmem_cache_free(mrt_cachep, c);
835 }
836 
837 /* Destroy an unresolved cache entry, killing queued skbs
838    and reporting error to netlink readers.
839  */
840 
841 static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
842 {
843 	struct net *net = read_pnet(&mrt->net);
844 	struct sk_buff *skb;
845 
846 	atomic_dec(&mrt->cache_resolve_queue_len);
847 
848 	while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
849 		if (ipv6_hdr(skb)->version == 0) {
850 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
851 			nlh->nlmsg_type = NLMSG_ERROR;
852 			nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
853 			skb_trim(skb, nlh->nlmsg_len);
854 			((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT;
855 			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
856 		} else
857 			kfree_skb(skb);
858 	}
859 
860 	ip6mr_cache_free(c);
861 }
862 
863 
864 /* Timer process for all the unresolved queue. */
865 
866 static void ipmr_do_expire_process(struct mr6_table *mrt)
867 {
868 	unsigned long now = jiffies;
869 	unsigned long expires = 10 * HZ;
870 	struct mfc6_cache *c, *next;
871 
872 	list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
873 		if (time_after(c->mfc_un.unres.expires, now)) {
874 			/* not yet... */
875 			unsigned long interval = c->mfc_un.unres.expires - now;
876 			if (interval < expires)
877 				expires = interval;
878 			continue;
879 		}
880 
881 		list_del(&c->list);
882 		mr6_netlink_event(mrt, c, RTM_DELROUTE);
883 		ip6mr_destroy_unres(mrt, c);
884 	}
885 
886 	if (!list_empty(&mrt->mfc6_unres_queue))
887 		mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
888 }
889 
890 static void ipmr_expire_process(unsigned long arg)
891 {
892 	struct mr6_table *mrt = (struct mr6_table *)arg;
893 
894 	if (!spin_trylock(&mfc_unres_lock)) {
895 		mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
896 		return;
897 	}
898 
899 	if (!list_empty(&mrt->mfc6_unres_queue))
900 		ipmr_do_expire_process(mrt);
901 
902 	spin_unlock(&mfc_unres_lock);
903 }
904 
905 /* Fill oifs list. It is called under write locked mrt_lock. */
906 
907 static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *cache,
908 				    unsigned char *ttls)
909 {
910 	int vifi;
911 
912 	cache->mfc_un.res.minvif = MAXMIFS;
913 	cache->mfc_un.res.maxvif = 0;
914 	memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
915 
916 	for (vifi = 0; vifi < mrt->maxvif; vifi++) {
917 		if (MIF_EXISTS(mrt, vifi) &&
918 		    ttls[vifi] && ttls[vifi] < 255) {
919 			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
920 			if (cache->mfc_un.res.minvif > vifi)
921 				cache->mfc_un.res.minvif = vifi;
922 			if (cache->mfc_un.res.maxvif <= vifi)
923 				cache->mfc_un.res.maxvif = vifi + 1;
924 		}
925 	}
926 }
927 
928 static int mif6_add(struct net *net, struct mr6_table *mrt,
929 		    struct mif6ctl *vifc, int mrtsock)
930 {
931 	int vifi = vifc->mif6c_mifi;
932 	struct mif_device *v = &mrt->vif6_table[vifi];
933 	struct net_device *dev;
934 	struct inet6_dev *in6_dev;
935 	int err;
936 
937 	/* Is vif busy ? */
938 	if (MIF_EXISTS(mrt, vifi))
939 		return -EADDRINUSE;
940 
941 	switch (vifc->mif6c_flags) {
942 #ifdef CONFIG_IPV6_PIMSM_V2
943 	case MIFF_REGISTER:
944 		/*
945 		 * Special Purpose VIF in PIM
946 		 * All the packets will be sent to the daemon
947 		 */
948 		if (mrt->mroute_reg_vif_num >= 0)
949 			return -EADDRINUSE;
950 		dev = ip6mr_reg_vif(net, mrt);
951 		if (!dev)
952 			return -ENOBUFS;
953 		err = dev_set_allmulti(dev, 1);
954 		if (err) {
955 			unregister_netdevice(dev);
956 			dev_put(dev);
957 			return err;
958 		}
959 		break;
960 #endif
961 	case 0:
962 		dev = dev_get_by_index(net, vifc->mif6c_pifi);
963 		if (!dev)
964 			return -EADDRNOTAVAIL;
965 		err = dev_set_allmulti(dev, 1);
966 		if (err) {
967 			dev_put(dev);
968 			return err;
969 		}
970 		break;
971 	default:
972 		return -EINVAL;
973 	}
974 
975 	in6_dev = __in6_dev_get(dev);
976 	if (in6_dev) {
977 		in6_dev->cnf.mc_forwarding++;
978 		inet6_netconf_notify_devconf(dev_net(dev),
979 					     NETCONFA_MC_FORWARDING,
980 					     dev->ifindex, &in6_dev->cnf);
981 	}
982 
983 	/*
984 	 *	Fill in the VIF structures
985 	 */
986 	v->rate_limit = vifc->vifc_rate_limit;
987 	v->flags = vifc->mif6c_flags;
988 	if (!mrtsock)
989 		v->flags |= VIFF_STATIC;
990 	v->threshold = vifc->vifc_threshold;
991 	v->bytes_in = 0;
992 	v->bytes_out = 0;
993 	v->pkt_in = 0;
994 	v->pkt_out = 0;
995 	v->link = dev->ifindex;
996 	if (v->flags & MIFF_REGISTER)
997 		v->link = dev->iflink;
998 
999 	/* And finish update writing critical data */
1000 	write_lock_bh(&mrt_lock);
1001 	v->dev = dev;
1002 #ifdef CONFIG_IPV6_PIMSM_V2
1003 	if (v->flags & MIFF_REGISTER)
1004 		mrt->mroute_reg_vif_num = vifi;
1005 #endif
1006 	if (vifi + 1 > mrt->maxvif)
1007 		mrt->maxvif = vifi + 1;
1008 	write_unlock_bh(&mrt_lock);
1009 	return 0;
1010 }
1011 
1012 static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt,
1013 					   const struct in6_addr *origin,
1014 					   const struct in6_addr *mcastgrp)
1015 {
1016 	int line = MFC6_HASH(mcastgrp, origin);
1017 	struct mfc6_cache *c;
1018 
1019 	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
1020 		if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
1021 		    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
1022 			return c;
1023 	}
1024 	return NULL;
1025 }
1026 
1027 /* Look for a (*,*,oif) entry */
1028 static struct mfc6_cache *ip6mr_cache_find_any_parent(struct mr6_table *mrt,
1029 						      mifi_t mifi)
1030 {
1031 	int line = MFC6_HASH(&in6addr_any, &in6addr_any);
1032 	struct mfc6_cache *c;
1033 
1034 	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
1035 		if (ipv6_addr_any(&c->mf6c_origin) &&
1036 		    ipv6_addr_any(&c->mf6c_mcastgrp) &&
1037 		    (c->mfc_un.res.ttls[mifi] < 255))
1038 			return c;
1039 
1040 	return NULL;
1041 }
1042 
1043 /* Look for a (*,G) entry */
1044 static struct mfc6_cache *ip6mr_cache_find_any(struct mr6_table *mrt,
1045 					       struct in6_addr *mcastgrp,
1046 					       mifi_t mifi)
1047 {
1048 	int line = MFC6_HASH(mcastgrp, &in6addr_any);
1049 	struct mfc6_cache *c, *proxy;
1050 
1051 	if (ipv6_addr_any(mcastgrp))
1052 		goto skip;
1053 
1054 	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
1055 		if (ipv6_addr_any(&c->mf6c_origin) &&
1056 		    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp)) {
1057 			if (c->mfc_un.res.ttls[mifi] < 255)
1058 				return c;
1059 
1060 			/* It's ok if the mifi is part of the static tree */
1061 			proxy = ip6mr_cache_find_any_parent(mrt,
1062 							    c->mf6c_parent);
1063 			if (proxy && proxy->mfc_un.res.ttls[mifi] < 255)
1064 				return c;
1065 		}
1066 
1067 skip:
1068 	return ip6mr_cache_find_any_parent(mrt, mifi);
1069 }
1070 
1071 /*
1072  *	Allocate a multicast cache entry
1073  */
1074 static struct mfc6_cache *ip6mr_cache_alloc(void)
1075 {
1076 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
1077 	if (c == NULL)
1078 		return NULL;
1079 	c->mfc_un.res.minvif = MAXMIFS;
1080 	return c;
1081 }
1082 
1083 static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
1084 {
1085 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
1086 	if (c == NULL)
1087 		return NULL;
1088 	skb_queue_head_init(&c->mfc_un.unres.unresolved);
1089 	c->mfc_un.unres.expires = jiffies + 10 * HZ;
1090 	return c;
1091 }
1092 
1093 /*
1094  *	A cache entry has gone into a resolved state from queued
1095  */
1096 
1097 static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
1098 				struct mfc6_cache *uc, struct mfc6_cache *c)
1099 {
1100 	struct sk_buff *skb;
1101 
1102 	/*
1103 	 *	Play the pending entries through our router
1104 	 */
1105 
1106 	while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
1107 		if (ipv6_hdr(skb)->version == 0) {
1108 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
1109 
1110 			if (__ip6mr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) {
1111 				nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1112 			} else {
1113 				nlh->nlmsg_type = NLMSG_ERROR;
1114 				nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
1115 				skb_trim(skb, nlh->nlmsg_len);
1116 				((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE;
1117 			}
1118 			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
1119 		} else
1120 			ip6_mr_forward(net, mrt, skb, c);
1121 	}
1122 }
1123 
1124 /*
1125  *	Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
1126  *	expects the following bizarre scheme.
1127  *
1128  *	Called under mrt_lock.
1129  */
1130 
1131 static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
1132 			      mifi_t mifi, int assert)
1133 {
1134 	struct sk_buff *skb;
1135 	struct mrt6msg *msg;
1136 	int ret;
1137 
1138 #ifdef CONFIG_IPV6_PIMSM_V2
1139 	if (assert == MRT6MSG_WHOLEPKT)
1140 		skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
1141 						+sizeof(*msg));
1142 	else
1143 #endif
1144 		skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
1145 
1146 	if (!skb)
1147 		return -ENOBUFS;
1148 
1149 	/* I suppose that internal messages
1150 	 * do not require checksums */
1151 
1152 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1153 
1154 #ifdef CONFIG_IPV6_PIMSM_V2
1155 	if (assert == MRT6MSG_WHOLEPKT) {
1156 		/* Ugly, but we have no choice with this interface.
1157 		   Duplicate old header, fix length etc.
1158 		   And all this only to mangle msg->im6_msgtype and
1159 		   to set msg->im6_mbz to "mbz" :-)
1160 		 */
1161 		skb_push(skb, -skb_network_offset(pkt));
1162 
1163 		skb_push(skb, sizeof(*msg));
1164 		skb_reset_transport_header(skb);
1165 		msg = (struct mrt6msg *)skb_transport_header(skb);
1166 		msg->im6_mbz = 0;
1167 		msg->im6_msgtype = MRT6MSG_WHOLEPKT;
1168 		msg->im6_mif = mrt->mroute_reg_vif_num;
1169 		msg->im6_pad = 0;
1170 		msg->im6_src = ipv6_hdr(pkt)->saddr;
1171 		msg->im6_dst = ipv6_hdr(pkt)->daddr;
1172 
1173 		skb->ip_summed = CHECKSUM_UNNECESSARY;
1174 	} else
1175 #endif
1176 	{
1177 	/*
1178 	 *	Copy the IP header
1179 	 */
1180 
1181 	skb_put(skb, sizeof(struct ipv6hdr));
1182 	skb_reset_network_header(skb);
1183 	skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
1184 
1185 	/*
1186 	 *	Add our header
1187 	 */
1188 	skb_put(skb, sizeof(*msg));
1189 	skb_reset_transport_header(skb);
1190 	msg = (struct mrt6msg *)skb_transport_header(skb);
1191 
1192 	msg->im6_mbz = 0;
1193 	msg->im6_msgtype = assert;
1194 	msg->im6_mif = mifi;
1195 	msg->im6_pad = 0;
1196 	msg->im6_src = ipv6_hdr(pkt)->saddr;
1197 	msg->im6_dst = ipv6_hdr(pkt)->daddr;
1198 
1199 	skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1200 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1201 	}
1202 
1203 	if (mrt->mroute6_sk == NULL) {
1204 		kfree_skb(skb);
1205 		return -EINVAL;
1206 	}
1207 
1208 	/*
1209 	 *	Deliver to user space multicast routing algorithms
1210 	 */
1211 	ret = sock_queue_rcv_skb(mrt->mroute6_sk, skb);
1212 	if (ret < 0) {
1213 		net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
1214 		kfree_skb(skb);
1215 	}
1216 
1217 	return ret;
1218 }
1219 
1220 /*
1221  *	Queue a packet for resolution. It gets locked cache entry!
1222  */
1223 
1224 static int
1225 ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
1226 {
1227 	bool found = false;
1228 	int err;
1229 	struct mfc6_cache *c;
1230 
1231 	spin_lock_bh(&mfc_unres_lock);
1232 	list_for_each_entry(c, &mrt->mfc6_unres_queue, list) {
1233 		if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1234 		    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
1235 			found = true;
1236 			break;
1237 		}
1238 	}
1239 
1240 	if (!found) {
1241 		/*
1242 		 *	Create a new entry if allowable
1243 		 */
1244 
1245 		if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
1246 		    (c = ip6mr_cache_alloc_unres()) == NULL) {
1247 			spin_unlock_bh(&mfc_unres_lock);
1248 
1249 			kfree_skb(skb);
1250 			return -ENOBUFS;
1251 		}
1252 
1253 		/*
1254 		 *	Fill in the new cache entry
1255 		 */
1256 		c->mf6c_parent = -1;
1257 		c->mf6c_origin = ipv6_hdr(skb)->saddr;
1258 		c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
1259 
1260 		/*
1261 		 *	Reflect first query at pim6sd
1262 		 */
1263 		err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1264 		if (err < 0) {
1265 			/* If the report failed throw the cache entry
1266 			   out - Brad Parker
1267 			 */
1268 			spin_unlock_bh(&mfc_unres_lock);
1269 
1270 			ip6mr_cache_free(c);
1271 			kfree_skb(skb);
1272 			return err;
1273 		}
1274 
1275 		atomic_inc(&mrt->cache_resolve_queue_len);
1276 		list_add(&c->list, &mrt->mfc6_unres_queue);
1277 		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1278 
1279 		ipmr_do_expire_process(mrt);
1280 	}
1281 
1282 	/*
1283 	 *	See if we can append the packet
1284 	 */
1285 	if (c->mfc_un.unres.unresolved.qlen > 3) {
1286 		kfree_skb(skb);
1287 		err = -ENOBUFS;
1288 	} else {
1289 		skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1290 		err = 0;
1291 	}
1292 
1293 	spin_unlock_bh(&mfc_unres_lock);
1294 	return err;
1295 }
1296 
1297 /*
1298  *	MFC6 cache manipulation by user space
1299  */
1300 
1301 static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc,
1302 			    int parent)
1303 {
1304 	int line;
1305 	struct mfc6_cache *c, *next;
1306 
1307 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1308 
1309 	list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) {
1310 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1311 		    ipv6_addr_equal(&c->mf6c_mcastgrp,
1312 				    &mfc->mf6cc_mcastgrp.sin6_addr) &&
1313 		    (parent == -1 || parent == c->mf6c_parent)) {
1314 			write_lock_bh(&mrt_lock);
1315 			list_del(&c->list);
1316 			write_unlock_bh(&mrt_lock);
1317 
1318 			mr6_netlink_event(mrt, c, RTM_DELROUTE);
1319 			ip6mr_cache_free(c);
1320 			return 0;
1321 		}
1322 	}
1323 	return -ENOENT;
1324 }
1325 
1326 static int ip6mr_device_event(struct notifier_block *this,
1327 			      unsigned long event, void *ptr)
1328 {
1329 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1330 	struct net *net = dev_net(dev);
1331 	struct mr6_table *mrt;
1332 	struct mif_device *v;
1333 	int ct;
1334 	LIST_HEAD(list);
1335 
1336 	if (event != NETDEV_UNREGISTER)
1337 		return NOTIFY_DONE;
1338 
1339 	ip6mr_for_each_table(mrt, net) {
1340 		v = &mrt->vif6_table[0];
1341 		for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1342 			if (v->dev == dev)
1343 				mif6_delete(mrt, ct, &list);
1344 		}
1345 	}
1346 	unregister_netdevice_many(&list);
1347 
1348 	return NOTIFY_DONE;
1349 }
1350 
1351 static struct notifier_block ip6_mr_notifier = {
1352 	.notifier_call = ip6mr_device_event
1353 };
1354 
1355 /*
1356  *	Setup for IP multicast routing
1357  */
1358 
1359 static int __net_init ip6mr_net_init(struct net *net)
1360 {
1361 	int err;
1362 
1363 	err = ip6mr_rules_init(net);
1364 	if (err < 0)
1365 		goto fail;
1366 
1367 #ifdef CONFIG_PROC_FS
1368 	err = -ENOMEM;
1369 	if (!proc_create("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_fops))
1370 		goto proc_vif_fail;
1371 	if (!proc_create("ip6_mr_cache", 0, net->proc_net, &ip6mr_mfc_fops))
1372 		goto proc_cache_fail;
1373 #endif
1374 
1375 	return 0;
1376 
1377 #ifdef CONFIG_PROC_FS
1378 proc_cache_fail:
1379 	remove_proc_entry("ip6_mr_vif", net->proc_net);
1380 proc_vif_fail:
1381 	ip6mr_rules_exit(net);
1382 #endif
1383 fail:
1384 	return err;
1385 }
1386 
1387 static void __net_exit ip6mr_net_exit(struct net *net)
1388 {
1389 #ifdef CONFIG_PROC_FS
1390 	remove_proc_entry("ip6_mr_cache", net->proc_net);
1391 	remove_proc_entry("ip6_mr_vif", net->proc_net);
1392 #endif
1393 	ip6mr_rules_exit(net);
1394 }
1395 
1396 static struct pernet_operations ip6mr_net_ops = {
1397 	.init = ip6mr_net_init,
1398 	.exit = ip6mr_net_exit,
1399 };
1400 
1401 int __init ip6_mr_init(void)
1402 {
1403 	int err;
1404 
1405 	mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1406 				       sizeof(struct mfc6_cache),
1407 				       0, SLAB_HWCACHE_ALIGN,
1408 				       NULL);
1409 	if (!mrt_cachep)
1410 		return -ENOMEM;
1411 
1412 	err = register_pernet_subsys(&ip6mr_net_ops);
1413 	if (err)
1414 		goto reg_pernet_fail;
1415 
1416 	err = register_netdevice_notifier(&ip6_mr_notifier);
1417 	if (err)
1418 		goto reg_notif_fail;
1419 #ifdef CONFIG_IPV6_PIMSM_V2
1420 	if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1421 		pr_err("%s: can't add PIM protocol\n", __func__);
1422 		err = -EAGAIN;
1423 		goto add_proto_fail;
1424 	}
1425 #endif
1426 	rtnl_register(RTNL_FAMILY_IP6MR, RTM_GETROUTE, NULL,
1427 		      ip6mr_rtm_dumproute, NULL);
1428 	return 0;
1429 #ifdef CONFIG_IPV6_PIMSM_V2
1430 add_proto_fail:
1431 	unregister_netdevice_notifier(&ip6_mr_notifier);
1432 #endif
1433 reg_notif_fail:
1434 	unregister_pernet_subsys(&ip6mr_net_ops);
1435 reg_pernet_fail:
1436 	kmem_cache_destroy(mrt_cachep);
1437 	return err;
1438 }
1439 
1440 void ip6_mr_cleanup(void)
1441 {
1442 	rtnl_unregister(RTNL_FAMILY_IP6MR, RTM_GETROUTE);
1443 #ifdef CONFIG_IPV6_PIMSM_V2
1444 	inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1445 #endif
1446 	unregister_netdevice_notifier(&ip6_mr_notifier);
1447 	unregister_pernet_subsys(&ip6mr_net_ops);
1448 	kmem_cache_destroy(mrt_cachep);
1449 }
1450 
1451 static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
1452 			 struct mf6cctl *mfc, int mrtsock, int parent)
1453 {
1454 	bool found = false;
1455 	int line;
1456 	struct mfc6_cache *uc, *c;
1457 	unsigned char ttls[MAXMIFS];
1458 	int i;
1459 
1460 	if (mfc->mf6cc_parent >= MAXMIFS)
1461 		return -ENFILE;
1462 
1463 	memset(ttls, 255, MAXMIFS);
1464 	for (i = 0; i < MAXMIFS; i++) {
1465 		if (IF_ISSET(i, &mfc->mf6cc_ifset))
1466 			ttls[i] = 1;
1467 
1468 	}
1469 
1470 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1471 
1472 	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
1473 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1474 		    ipv6_addr_equal(&c->mf6c_mcastgrp,
1475 				    &mfc->mf6cc_mcastgrp.sin6_addr) &&
1476 		    (parent == -1 || parent == mfc->mf6cc_parent)) {
1477 			found = true;
1478 			break;
1479 		}
1480 	}
1481 
1482 	if (found) {
1483 		write_lock_bh(&mrt_lock);
1484 		c->mf6c_parent = mfc->mf6cc_parent;
1485 		ip6mr_update_thresholds(mrt, c, ttls);
1486 		if (!mrtsock)
1487 			c->mfc_flags |= MFC_STATIC;
1488 		write_unlock_bh(&mrt_lock);
1489 		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1490 		return 0;
1491 	}
1492 
1493 	if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) &&
1494 	    !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1495 		return -EINVAL;
1496 
1497 	c = ip6mr_cache_alloc();
1498 	if (c == NULL)
1499 		return -ENOMEM;
1500 
1501 	c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1502 	c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1503 	c->mf6c_parent = mfc->mf6cc_parent;
1504 	ip6mr_update_thresholds(mrt, c, ttls);
1505 	if (!mrtsock)
1506 		c->mfc_flags |= MFC_STATIC;
1507 
1508 	write_lock_bh(&mrt_lock);
1509 	list_add(&c->list, &mrt->mfc6_cache_array[line]);
1510 	write_unlock_bh(&mrt_lock);
1511 
1512 	/*
1513 	 *	Check to see if we resolved a queued list. If so we
1514 	 *	need to send on the frames and tidy up.
1515 	 */
1516 	found = false;
1517 	spin_lock_bh(&mfc_unres_lock);
1518 	list_for_each_entry(uc, &mrt->mfc6_unres_queue, list) {
1519 		if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1520 		    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1521 			list_del(&uc->list);
1522 			atomic_dec(&mrt->cache_resolve_queue_len);
1523 			found = true;
1524 			break;
1525 		}
1526 	}
1527 	if (list_empty(&mrt->mfc6_unres_queue))
1528 		del_timer(&mrt->ipmr_expire_timer);
1529 	spin_unlock_bh(&mfc_unres_lock);
1530 
1531 	if (found) {
1532 		ip6mr_cache_resolve(net, mrt, uc, c);
1533 		ip6mr_cache_free(uc);
1534 	}
1535 	mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1536 	return 0;
1537 }
1538 
1539 /*
1540  *	Close the multicast socket, and clear the vif tables etc
1541  */
1542 
1543 static void mroute_clean_tables(struct mr6_table *mrt)
1544 {
1545 	int i;
1546 	LIST_HEAD(list);
1547 	struct mfc6_cache *c, *next;
1548 
1549 	/*
1550 	 *	Shut down all active vif entries
1551 	 */
1552 	for (i = 0; i < mrt->maxvif; i++) {
1553 		if (!(mrt->vif6_table[i].flags & VIFF_STATIC))
1554 			mif6_delete(mrt, i, &list);
1555 	}
1556 	unregister_netdevice_many(&list);
1557 
1558 	/*
1559 	 *	Wipe the cache
1560 	 */
1561 	for (i = 0; i < MFC6_LINES; i++) {
1562 		list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) {
1563 			if (c->mfc_flags & MFC_STATIC)
1564 				continue;
1565 			write_lock_bh(&mrt_lock);
1566 			list_del(&c->list);
1567 			write_unlock_bh(&mrt_lock);
1568 
1569 			mr6_netlink_event(mrt, c, RTM_DELROUTE);
1570 			ip6mr_cache_free(c);
1571 		}
1572 	}
1573 
1574 	if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1575 		spin_lock_bh(&mfc_unres_lock);
1576 		list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
1577 			list_del(&c->list);
1578 			mr6_netlink_event(mrt, c, RTM_DELROUTE);
1579 			ip6mr_destroy_unres(mrt, c);
1580 		}
1581 		spin_unlock_bh(&mfc_unres_lock);
1582 	}
1583 }
1584 
1585 static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
1586 {
1587 	int err = 0;
1588 	struct net *net = sock_net(sk);
1589 
1590 	rtnl_lock();
1591 	write_lock_bh(&mrt_lock);
1592 	if (likely(mrt->mroute6_sk == NULL)) {
1593 		mrt->mroute6_sk = sk;
1594 		net->ipv6.devconf_all->mc_forwarding++;
1595 		inet6_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING,
1596 					     NETCONFA_IFINDEX_ALL,
1597 					     net->ipv6.devconf_all);
1598 	}
1599 	else
1600 		err = -EADDRINUSE;
1601 	write_unlock_bh(&mrt_lock);
1602 
1603 	rtnl_unlock();
1604 
1605 	return err;
1606 }
1607 
1608 int ip6mr_sk_done(struct sock *sk)
1609 {
1610 	int err = -EACCES;
1611 	struct net *net = sock_net(sk);
1612 	struct mr6_table *mrt;
1613 
1614 	rtnl_lock();
1615 	ip6mr_for_each_table(mrt, net) {
1616 		if (sk == mrt->mroute6_sk) {
1617 			write_lock_bh(&mrt_lock);
1618 			mrt->mroute6_sk = NULL;
1619 			net->ipv6.devconf_all->mc_forwarding--;
1620 			inet6_netconf_notify_devconf(net,
1621 						     NETCONFA_MC_FORWARDING,
1622 						     NETCONFA_IFINDEX_ALL,
1623 						     net->ipv6.devconf_all);
1624 			write_unlock_bh(&mrt_lock);
1625 
1626 			mroute_clean_tables(mrt);
1627 			err = 0;
1628 			break;
1629 		}
1630 	}
1631 	rtnl_unlock();
1632 
1633 	return err;
1634 }
1635 
1636 struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
1637 {
1638 	struct mr6_table *mrt;
1639 	struct flowi6 fl6 = {
1640 		.flowi6_iif	= skb->skb_iif ? : LOOPBACK_IFINDEX,
1641 		.flowi6_oif	= skb->dev->ifindex,
1642 		.flowi6_mark	= skb->mark,
1643 	};
1644 
1645 	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
1646 		return NULL;
1647 
1648 	return mrt->mroute6_sk;
1649 }
1650 
1651 /*
1652  *	Socket options and virtual interface manipulation. The whole
1653  *	virtual interface system is a complete heap, but unfortunately
1654  *	that's how BSD mrouted happens to think. Maybe one day with a proper
1655  *	MOSPF/PIM router set up we can clean this up.
1656  */
1657 
1658 int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1659 {
1660 	int ret, parent = 0;
1661 	struct mif6ctl vif;
1662 	struct mf6cctl mfc;
1663 	mifi_t mifi;
1664 	struct net *net = sock_net(sk);
1665 	struct mr6_table *mrt;
1666 
1667 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1668 	if (mrt == NULL)
1669 		return -ENOENT;
1670 
1671 	if (optname != MRT6_INIT) {
1672 		if (sk != mrt->mroute6_sk && !ns_capable(net->user_ns, CAP_NET_ADMIN))
1673 			return -EACCES;
1674 	}
1675 
1676 	switch (optname) {
1677 	case MRT6_INIT:
1678 		if (sk->sk_type != SOCK_RAW ||
1679 		    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1680 			return -EOPNOTSUPP;
1681 		if (optlen < sizeof(int))
1682 			return -EINVAL;
1683 
1684 		return ip6mr_sk_init(mrt, sk);
1685 
1686 	case MRT6_DONE:
1687 		return ip6mr_sk_done(sk);
1688 
1689 	case MRT6_ADD_MIF:
1690 		if (optlen < sizeof(vif))
1691 			return -EINVAL;
1692 		if (copy_from_user(&vif, optval, sizeof(vif)))
1693 			return -EFAULT;
1694 		if (vif.mif6c_mifi >= MAXMIFS)
1695 			return -ENFILE;
1696 		rtnl_lock();
1697 		ret = mif6_add(net, mrt, &vif, sk == mrt->mroute6_sk);
1698 		rtnl_unlock();
1699 		return ret;
1700 
1701 	case MRT6_DEL_MIF:
1702 		if (optlen < sizeof(mifi_t))
1703 			return -EINVAL;
1704 		if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1705 			return -EFAULT;
1706 		rtnl_lock();
1707 		ret = mif6_delete(mrt, mifi, NULL);
1708 		rtnl_unlock();
1709 		return ret;
1710 
1711 	/*
1712 	 *	Manipulate the forwarding caches. These live
1713 	 *	in a sort of kernel/user symbiosis.
1714 	 */
1715 	case MRT6_ADD_MFC:
1716 	case MRT6_DEL_MFC:
1717 		parent = -1;
1718 	case MRT6_ADD_MFC_PROXY:
1719 	case MRT6_DEL_MFC_PROXY:
1720 		if (optlen < sizeof(mfc))
1721 			return -EINVAL;
1722 		if (copy_from_user(&mfc, optval, sizeof(mfc)))
1723 			return -EFAULT;
1724 		if (parent == 0)
1725 			parent = mfc.mf6cc_parent;
1726 		rtnl_lock();
1727 		if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY)
1728 			ret = ip6mr_mfc_delete(mrt, &mfc, parent);
1729 		else
1730 			ret = ip6mr_mfc_add(net, mrt, &mfc,
1731 					    sk == mrt->mroute6_sk, parent);
1732 		rtnl_unlock();
1733 		return ret;
1734 
1735 	/*
1736 	 *	Control PIM assert (to activate pim will activate assert)
1737 	 */
1738 	case MRT6_ASSERT:
1739 	{
1740 		int v;
1741 
1742 		if (optlen != sizeof(v))
1743 			return -EINVAL;
1744 		if (get_user(v, (int __user *)optval))
1745 			return -EFAULT;
1746 		mrt->mroute_do_assert = v;
1747 		return 0;
1748 	}
1749 
1750 #ifdef CONFIG_IPV6_PIMSM_V2
1751 	case MRT6_PIM:
1752 	{
1753 		int v;
1754 
1755 		if (optlen != sizeof(v))
1756 			return -EINVAL;
1757 		if (get_user(v, (int __user *)optval))
1758 			return -EFAULT;
1759 		v = !!v;
1760 		rtnl_lock();
1761 		ret = 0;
1762 		if (v != mrt->mroute_do_pim) {
1763 			mrt->mroute_do_pim = v;
1764 			mrt->mroute_do_assert = v;
1765 		}
1766 		rtnl_unlock();
1767 		return ret;
1768 	}
1769 
1770 #endif
1771 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1772 	case MRT6_TABLE:
1773 	{
1774 		u32 v;
1775 
1776 		if (optlen != sizeof(u32))
1777 			return -EINVAL;
1778 		if (get_user(v, (u32 __user *)optval))
1779 			return -EFAULT;
1780 		/* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
1781 		if (v != RT_TABLE_DEFAULT && v >= 100000000)
1782 			return -EINVAL;
1783 		if (sk == mrt->mroute6_sk)
1784 			return -EBUSY;
1785 
1786 		rtnl_lock();
1787 		ret = 0;
1788 		if (!ip6mr_new_table(net, v))
1789 			ret = -ENOMEM;
1790 		raw6_sk(sk)->ip6mr_table = v;
1791 		rtnl_unlock();
1792 		return ret;
1793 	}
1794 #endif
1795 	/*
1796 	 *	Spurious command, or MRT6_VERSION which you cannot
1797 	 *	set.
1798 	 */
1799 	default:
1800 		return -ENOPROTOOPT;
1801 	}
1802 }
1803 
1804 /*
1805  *	Getsock opt support for the multicast routing system.
1806  */
1807 
1808 int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1809 			  int __user *optlen)
1810 {
1811 	int olr;
1812 	int val;
1813 	struct net *net = sock_net(sk);
1814 	struct mr6_table *mrt;
1815 
1816 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1817 	if (mrt == NULL)
1818 		return -ENOENT;
1819 
1820 	switch (optname) {
1821 	case MRT6_VERSION:
1822 		val = 0x0305;
1823 		break;
1824 #ifdef CONFIG_IPV6_PIMSM_V2
1825 	case MRT6_PIM:
1826 		val = mrt->mroute_do_pim;
1827 		break;
1828 #endif
1829 	case MRT6_ASSERT:
1830 		val = mrt->mroute_do_assert;
1831 		break;
1832 	default:
1833 		return -ENOPROTOOPT;
1834 	}
1835 
1836 	if (get_user(olr, optlen))
1837 		return -EFAULT;
1838 
1839 	olr = min_t(int, olr, sizeof(int));
1840 	if (olr < 0)
1841 		return -EINVAL;
1842 
1843 	if (put_user(olr, optlen))
1844 		return -EFAULT;
1845 	if (copy_to_user(optval, &val, olr))
1846 		return -EFAULT;
1847 	return 0;
1848 }
1849 
1850 /*
1851  *	The IP multicast ioctl support routines.
1852  */
1853 
1854 int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1855 {
1856 	struct sioc_sg_req6 sr;
1857 	struct sioc_mif_req6 vr;
1858 	struct mif_device *vif;
1859 	struct mfc6_cache *c;
1860 	struct net *net = sock_net(sk);
1861 	struct mr6_table *mrt;
1862 
1863 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1864 	if (mrt == NULL)
1865 		return -ENOENT;
1866 
1867 	switch (cmd) {
1868 	case SIOCGETMIFCNT_IN6:
1869 		if (copy_from_user(&vr, arg, sizeof(vr)))
1870 			return -EFAULT;
1871 		if (vr.mifi >= mrt->maxvif)
1872 			return -EINVAL;
1873 		read_lock(&mrt_lock);
1874 		vif = &mrt->vif6_table[vr.mifi];
1875 		if (MIF_EXISTS(mrt, vr.mifi)) {
1876 			vr.icount = vif->pkt_in;
1877 			vr.ocount = vif->pkt_out;
1878 			vr.ibytes = vif->bytes_in;
1879 			vr.obytes = vif->bytes_out;
1880 			read_unlock(&mrt_lock);
1881 
1882 			if (copy_to_user(arg, &vr, sizeof(vr)))
1883 				return -EFAULT;
1884 			return 0;
1885 		}
1886 		read_unlock(&mrt_lock);
1887 		return -EADDRNOTAVAIL;
1888 	case SIOCGETSGCNT_IN6:
1889 		if (copy_from_user(&sr, arg, sizeof(sr)))
1890 			return -EFAULT;
1891 
1892 		read_lock(&mrt_lock);
1893 		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1894 		if (c) {
1895 			sr.pktcnt = c->mfc_un.res.pkt;
1896 			sr.bytecnt = c->mfc_un.res.bytes;
1897 			sr.wrong_if = c->mfc_un.res.wrong_if;
1898 			read_unlock(&mrt_lock);
1899 
1900 			if (copy_to_user(arg, &sr, sizeof(sr)))
1901 				return -EFAULT;
1902 			return 0;
1903 		}
1904 		read_unlock(&mrt_lock);
1905 		return -EADDRNOTAVAIL;
1906 	default:
1907 		return -ENOIOCTLCMD;
1908 	}
1909 }
1910 
1911 #ifdef CONFIG_COMPAT
1912 struct compat_sioc_sg_req6 {
1913 	struct sockaddr_in6 src;
1914 	struct sockaddr_in6 grp;
1915 	compat_ulong_t pktcnt;
1916 	compat_ulong_t bytecnt;
1917 	compat_ulong_t wrong_if;
1918 };
1919 
1920 struct compat_sioc_mif_req6 {
1921 	mifi_t	mifi;
1922 	compat_ulong_t icount;
1923 	compat_ulong_t ocount;
1924 	compat_ulong_t ibytes;
1925 	compat_ulong_t obytes;
1926 };
1927 
1928 int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1929 {
1930 	struct compat_sioc_sg_req6 sr;
1931 	struct compat_sioc_mif_req6 vr;
1932 	struct mif_device *vif;
1933 	struct mfc6_cache *c;
1934 	struct net *net = sock_net(sk);
1935 	struct mr6_table *mrt;
1936 
1937 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1938 	if (mrt == NULL)
1939 		return -ENOENT;
1940 
1941 	switch (cmd) {
1942 	case SIOCGETMIFCNT_IN6:
1943 		if (copy_from_user(&vr, arg, sizeof(vr)))
1944 			return -EFAULT;
1945 		if (vr.mifi >= mrt->maxvif)
1946 			return -EINVAL;
1947 		read_lock(&mrt_lock);
1948 		vif = &mrt->vif6_table[vr.mifi];
1949 		if (MIF_EXISTS(mrt, vr.mifi)) {
1950 			vr.icount = vif->pkt_in;
1951 			vr.ocount = vif->pkt_out;
1952 			vr.ibytes = vif->bytes_in;
1953 			vr.obytes = vif->bytes_out;
1954 			read_unlock(&mrt_lock);
1955 
1956 			if (copy_to_user(arg, &vr, sizeof(vr)))
1957 				return -EFAULT;
1958 			return 0;
1959 		}
1960 		read_unlock(&mrt_lock);
1961 		return -EADDRNOTAVAIL;
1962 	case SIOCGETSGCNT_IN6:
1963 		if (copy_from_user(&sr, arg, sizeof(sr)))
1964 			return -EFAULT;
1965 
1966 		read_lock(&mrt_lock);
1967 		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1968 		if (c) {
1969 			sr.pktcnt = c->mfc_un.res.pkt;
1970 			sr.bytecnt = c->mfc_un.res.bytes;
1971 			sr.wrong_if = c->mfc_un.res.wrong_if;
1972 			read_unlock(&mrt_lock);
1973 
1974 			if (copy_to_user(arg, &sr, sizeof(sr)))
1975 				return -EFAULT;
1976 			return 0;
1977 		}
1978 		read_unlock(&mrt_lock);
1979 		return -EADDRNOTAVAIL;
1980 	default:
1981 		return -ENOIOCTLCMD;
1982 	}
1983 }
1984 #endif
1985 
1986 static inline int ip6mr_forward2_finish(struct sk_buff *skb)
1987 {
1988 	IP6_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
1989 			 IPSTATS_MIB_OUTFORWDATAGRAMS);
1990 	IP6_ADD_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
1991 			 IPSTATS_MIB_OUTOCTETS, skb->len);
1992 	return dst_output(skb);
1993 }
1994 
1995 /*
1996  *	Processing handlers for ip6mr_forward
1997  */
1998 
1999 static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
2000 			  struct sk_buff *skb, struct mfc6_cache *c, int vifi)
2001 {
2002 	struct ipv6hdr *ipv6h;
2003 	struct mif_device *vif = &mrt->vif6_table[vifi];
2004 	struct net_device *dev;
2005 	struct dst_entry *dst;
2006 	struct flowi6 fl6;
2007 
2008 	if (vif->dev == NULL)
2009 		goto out_free;
2010 
2011 #ifdef CONFIG_IPV6_PIMSM_V2
2012 	if (vif->flags & MIFF_REGISTER) {
2013 		vif->pkt_out++;
2014 		vif->bytes_out += skb->len;
2015 		vif->dev->stats.tx_bytes += skb->len;
2016 		vif->dev->stats.tx_packets++;
2017 		ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
2018 		goto out_free;
2019 	}
2020 #endif
2021 
2022 	ipv6h = ipv6_hdr(skb);
2023 
2024 	fl6 = (struct flowi6) {
2025 		.flowi6_oif = vif->link,
2026 		.daddr = ipv6h->daddr,
2027 	};
2028 
2029 	dst = ip6_route_output(net, NULL, &fl6);
2030 	if (dst->error) {
2031 		dst_release(dst);
2032 		goto out_free;
2033 	}
2034 
2035 	skb_dst_drop(skb);
2036 	skb_dst_set(skb, dst);
2037 
2038 	/*
2039 	 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
2040 	 * not only before forwarding, but after forwarding on all output
2041 	 * interfaces. It is clear, if mrouter runs a multicasting
2042 	 * program, it should receive packets not depending to what interface
2043 	 * program is joined.
2044 	 * If we will not make it, the program will have to join on all
2045 	 * interfaces. On the other hand, multihoming host (or router, but
2046 	 * not mrouter) cannot join to more than one interface - it will
2047 	 * result in receiving multiple packets.
2048 	 */
2049 	dev = vif->dev;
2050 	skb->dev = dev;
2051 	vif->pkt_out++;
2052 	vif->bytes_out += skb->len;
2053 
2054 	/* We are about to write */
2055 	/* XXX: extension headers? */
2056 	if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
2057 		goto out_free;
2058 
2059 	ipv6h = ipv6_hdr(skb);
2060 	ipv6h->hop_limit--;
2061 
2062 	IP6CB(skb)->flags |= IP6SKB_FORWARDED;
2063 
2064 	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dev,
2065 		       ip6mr_forward2_finish);
2066 
2067 out_free:
2068 	kfree_skb(skb);
2069 	return 0;
2070 }
2071 
2072 static int ip6mr_find_vif(struct mr6_table *mrt, struct net_device *dev)
2073 {
2074 	int ct;
2075 
2076 	for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
2077 		if (mrt->vif6_table[ct].dev == dev)
2078 			break;
2079 	}
2080 	return ct;
2081 }
2082 
2083 static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
2084 			   struct sk_buff *skb, struct mfc6_cache *cache)
2085 {
2086 	int psend = -1;
2087 	int vif, ct;
2088 	int true_vifi = ip6mr_find_vif(mrt, skb->dev);
2089 
2090 	vif = cache->mf6c_parent;
2091 	cache->mfc_un.res.pkt++;
2092 	cache->mfc_un.res.bytes += skb->len;
2093 
2094 	if (ipv6_addr_any(&cache->mf6c_origin) && true_vifi >= 0) {
2095 		struct mfc6_cache *cache_proxy;
2096 
2097 		/* For an (*,G) entry, we only check that the incoming
2098 		 * interface is part of the static tree.
2099 		 */
2100 		cache_proxy = ip6mr_cache_find_any_parent(mrt, vif);
2101 		if (cache_proxy &&
2102 		    cache_proxy->mfc_un.res.ttls[true_vifi] < 255)
2103 			goto forward;
2104 	}
2105 
2106 	/*
2107 	 * Wrong interface: drop packet and (maybe) send PIM assert.
2108 	 */
2109 	if (mrt->vif6_table[vif].dev != skb->dev) {
2110 		cache->mfc_un.res.wrong_if++;
2111 
2112 		if (true_vifi >= 0 && mrt->mroute_do_assert &&
2113 		    /* pimsm uses asserts, when switching from RPT to SPT,
2114 		       so that we cannot check that packet arrived on an oif.
2115 		       It is bad, but otherwise we would need to move pretty
2116 		       large chunk of pimd to kernel. Ough... --ANK
2117 		     */
2118 		    (mrt->mroute_do_pim ||
2119 		     cache->mfc_un.res.ttls[true_vifi] < 255) &&
2120 		    time_after(jiffies,
2121 			       cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
2122 			cache->mfc_un.res.last_assert = jiffies;
2123 			ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
2124 		}
2125 		goto dont_forward;
2126 	}
2127 
2128 forward:
2129 	mrt->vif6_table[vif].pkt_in++;
2130 	mrt->vif6_table[vif].bytes_in += skb->len;
2131 
2132 	/*
2133 	 *	Forward the frame
2134 	 */
2135 	if (ipv6_addr_any(&cache->mf6c_origin) &&
2136 	    ipv6_addr_any(&cache->mf6c_mcastgrp)) {
2137 		if (true_vifi >= 0 &&
2138 		    true_vifi != cache->mf6c_parent &&
2139 		    ipv6_hdr(skb)->hop_limit >
2140 				cache->mfc_un.res.ttls[cache->mf6c_parent]) {
2141 			/* It's an (*,*) entry and the packet is not coming from
2142 			 * the upstream: forward the packet to the upstream
2143 			 * only.
2144 			 */
2145 			psend = cache->mf6c_parent;
2146 			goto last_forward;
2147 		}
2148 		goto dont_forward;
2149 	}
2150 	for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
2151 		/* For (*,G) entry, don't forward to the incoming interface */
2152 		if ((!ipv6_addr_any(&cache->mf6c_origin) || ct != true_vifi) &&
2153 		    ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
2154 			if (psend != -1) {
2155 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2156 				if (skb2)
2157 					ip6mr_forward2(net, mrt, skb2, cache, psend);
2158 			}
2159 			psend = ct;
2160 		}
2161 	}
2162 last_forward:
2163 	if (psend != -1) {
2164 		ip6mr_forward2(net, mrt, skb, cache, psend);
2165 		return;
2166 	}
2167 
2168 dont_forward:
2169 	kfree_skb(skb);
2170 }
2171 
2172 
2173 /*
2174  *	Multicast packets for forwarding arrive here
2175  */
2176 
2177 int ip6_mr_input(struct sk_buff *skb)
2178 {
2179 	struct mfc6_cache *cache;
2180 	struct net *net = dev_net(skb->dev);
2181 	struct mr6_table *mrt;
2182 	struct flowi6 fl6 = {
2183 		.flowi6_iif	= skb->dev->ifindex,
2184 		.flowi6_mark	= skb->mark,
2185 	};
2186 	int err;
2187 
2188 	err = ip6mr_fib_lookup(net, &fl6, &mrt);
2189 	if (err < 0) {
2190 		kfree_skb(skb);
2191 		return err;
2192 	}
2193 
2194 	read_lock(&mrt_lock);
2195 	cache = ip6mr_cache_find(mrt,
2196 				 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2197 	if (cache == NULL) {
2198 		int vif = ip6mr_find_vif(mrt, skb->dev);
2199 
2200 		if (vif >= 0)
2201 			cache = ip6mr_cache_find_any(mrt,
2202 						     &ipv6_hdr(skb)->daddr,
2203 						     vif);
2204 	}
2205 
2206 	/*
2207 	 *	No usable cache entry
2208 	 */
2209 	if (cache == NULL) {
2210 		int vif;
2211 
2212 		vif = ip6mr_find_vif(mrt, skb->dev);
2213 		if (vif >= 0) {
2214 			int err = ip6mr_cache_unresolved(mrt, vif, skb);
2215 			read_unlock(&mrt_lock);
2216 
2217 			return err;
2218 		}
2219 		read_unlock(&mrt_lock);
2220 		kfree_skb(skb);
2221 		return -ENODEV;
2222 	}
2223 
2224 	ip6_mr_forward(net, mrt, skb, cache);
2225 
2226 	read_unlock(&mrt_lock);
2227 
2228 	return 0;
2229 }
2230 
2231 
2232 static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2233 			       struct mfc6_cache *c, struct rtmsg *rtm)
2234 {
2235 	int ct;
2236 	struct rtnexthop *nhp;
2237 	struct nlattr *mp_attr;
2238 	struct rta_mfc_stats mfcs;
2239 
2240 	/* If cache is unresolved, don't try to parse IIF and OIF */
2241 	if (c->mf6c_parent >= MAXMIFS)
2242 		return -ENOENT;
2243 
2244 	if (MIF_EXISTS(mrt, c->mf6c_parent) &&
2245 	    nla_put_u32(skb, RTA_IIF, mrt->vif6_table[c->mf6c_parent].dev->ifindex) < 0)
2246 		return -EMSGSIZE;
2247 	mp_attr = nla_nest_start(skb, RTA_MULTIPATH);
2248 	if (mp_attr == NULL)
2249 		return -EMSGSIZE;
2250 
2251 	for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
2252 		if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
2253 			nhp = nla_reserve_nohdr(skb, sizeof(*nhp));
2254 			if (nhp == NULL) {
2255 				nla_nest_cancel(skb, mp_attr);
2256 				return -EMSGSIZE;
2257 			}
2258 
2259 			nhp->rtnh_flags = 0;
2260 			nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
2261 			nhp->rtnh_ifindex = mrt->vif6_table[ct].dev->ifindex;
2262 			nhp->rtnh_len = sizeof(*nhp);
2263 		}
2264 	}
2265 
2266 	nla_nest_end(skb, mp_attr);
2267 
2268 	mfcs.mfcs_packets = c->mfc_un.res.pkt;
2269 	mfcs.mfcs_bytes = c->mfc_un.res.bytes;
2270 	mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
2271 	if (nla_put(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs) < 0)
2272 		return -EMSGSIZE;
2273 
2274 	rtm->rtm_type = RTN_MULTICAST;
2275 	return 1;
2276 }
2277 
2278 int ip6mr_get_route(struct net *net,
2279 		    struct sk_buff *skb, struct rtmsg *rtm, int nowait)
2280 {
2281 	int err;
2282 	struct mr6_table *mrt;
2283 	struct mfc6_cache *cache;
2284 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
2285 
2286 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
2287 	if (mrt == NULL)
2288 		return -ENOENT;
2289 
2290 	read_lock(&mrt_lock);
2291 	cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
2292 	if (!cache && skb->dev) {
2293 		int vif = ip6mr_find_vif(mrt, skb->dev);
2294 
2295 		if (vif >= 0)
2296 			cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr,
2297 						     vif);
2298 	}
2299 
2300 	if (!cache) {
2301 		struct sk_buff *skb2;
2302 		struct ipv6hdr *iph;
2303 		struct net_device *dev;
2304 		int vif;
2305 
2306 		if (nowait) {
2307 			read_unlock(&mrt_lock);
2308 			return -EAGAIN;
2309 		}
2310 
2311 		dev = skb->dev;
2312 		if (dev == NULL || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2313 			read_unlock(&mrt_lock);
2314 			return -ENODEV;
2315 		}
2316 
2317 		/* really correct? */
2318 		skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
2319 		if (!skb2) {
2320 			read_unlock(&mrt_lock);
2321 			return -ENOMEM;
2322 		}
2323 
2324 		skb_reset_transport_header(skb2);
2325 
2326 		skb_put(skb2, sizeof(struct ipv6hdr));
2327 		skb_reset_network_header(skb2);
2328 
2329 		iph = ipv6_hdr(skb2);
2330 		iph->version = 0;
2331 		iph->priority = 0;
2332 		iph->flow_lbl[0] = 0;
2333 		iph->flow_lbl[1] = 0;
2334 		iph->flow_lbl[2] = 0;
2335 		iph->payload_len = 0;
2336 		iph->nexthdr = IPPROTO_NONE;
2337 		iph->hop_limit = 0;
2338 		iph->saddr = rt->rt6i_src.addr;
2339 		iph->daddr = rt->rt6i_dst.addr;
2340 
2341 		err = ip6mr_cache_unresolved(mrt, vif, skb2);
2342 		read_unlock(&mrt_lock);
2343 
2344 		return err;
2345 	}
2346 
2347 	if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
2348 		cache->mfc_flags |= MFC_NOTIFY;
2349 
2350 	err = __ip6mr_fill_mroute(mrt, skb, cache, rtm);
2351 	read_unlock(&mrt_lock);
2352 	return err;
2353 }
2354 
2355 static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2356 			     u32 portid, u32 seq, struct mfc6_cache *c, int cmd,
2357 			     int flags)
2358 {
2359 	struct nlmsghdr *nlh;
2360 	struct rtmsg *rtm;
2361 	int err;
2362 
2363 	nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags);
2364 	if (nlh == NULL)
2365 		return -EMSGSIZE;
2366 
2367 	rtm = nlmsg_data(nlh);
2368 	rtm->rtm_family   = RTNL_FAMILY_IP6MR;
2369 	rtm->rtm_dst_len  = 128;
2370 	rtm->rtm_src_len  = 128;
2371 	rtm->rtm_tos      = 0;
2372 	rtm->rtm_table    = mrt->id;
2373 	if (nla_put_u32(skb, RTA_TABLE, mrt->id))
2374 		goto nla_put_failure;
2375 	rtm->rtm_type = RTN_MULTICAST;
2376 	rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
2377 	if (c->mfc_flags & MFC_STATIC)
2378 		rtm->rtm_protocol = RTPROT_STATIC;
2379 	else
2380 		rtm->rtm_protocol = RTPROT_MROUTED;
2381 	rtm->rtm_flags    = 0;
2382 
2383 	if (nla_put(skb, RTA_SRC, 16, &c->mf6c_origin) ||
2384 	    nla_put(skb, RTA_DST, 16, &c->mf6c_mcastgrp))
2385 		goto nla_put_failure;
2386 	err = __ip6mr_fill_mroute(mrt, skb, c, rtm);
2387 	/* do not break the dump if cache is unresolved */
2388 	if (err < 0 && err != -ENOENT)
2389 		goto nla_put_failure;
2390 
2391 	nlmsg_end(skb, nlh);
2392 	return 0;
2393 
2394 nla_put_failure:
2395 	nlmsg_cancel(skb, nlh);
2396 	return -EMSGSIZE;
2397 }
2398 
2399 static int mr6_msgsize(bool unresolved, int maxvif)
2400 {
2401 	size_t len =
2402 		NLMSG_ALIGN(sizeof(struct rtmsg))
2403 		+ nla_total_size(4)	/* RTA_TABLE */
2404 		+ nla_total_size(sizeof(struct in6_addr))	/* RTA_SRC */
2405 		+ nla_total_size(sizeof(struct in6_addr))	/* RTA_DST */
2406 		;
2407 
2408 	if (!unresolved)
2409 		len = len
2410 		      + nla_total_size(4)	/* RTA_IIF */
2411 		      + nla_total_size(0)	/* RTA_MULTIPATH */
2412 		      + maxvif * NLA_ALIGN(sizeof(struct rtnexthop))
2413 						/* RTA_MFC_STATS */
2414 		      + nla_total_size(sizeof(struct rta_mfc_stats))
2415 		;
2416 
2417 	return len;
2418 }
2419 
2420 static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
2421 			      int cmd)
2422 {
2423 	struct net *net = read_pnet(&mrt->net);
2424 	struct sk_buff *skb;
2425 	int err = -ENOBUFS;
2426 
2427 	skb = nlmsg_new(mr6_msgsize(mfc->mf6c_parent >= MAXMIFS, mrt->maxvif),
2428 			GFP_ATOMIC);
2429 	if (skb == NULL)
2430 		goto errout;
2431 
2432 	err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0);
2433 	if (err < 0)
2434 		goto errout;
2435 
2436 	rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC);
2437 	return;
2438 
2439 errout:
2440 	kfree_skb(skb);
2441 	if (err < 0)
2442 		rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
2443 }
2444 
2445 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2446 {
2447 	struct net *net = sock_net(skb->sk);
2448 	struct mr6_table *mrt;
2449 	struct mfc6_cache *mfc;
2450 	unsigned int t = 0, s_t;
2451 	unsigned int h = 0, s_h;
2452 	unsigned int e = 0, s_e;
2453 
2454 	s_t = cb->args[0];
2455 	s_h = cb->args[1];
2456 	s_e = cb->args[2];
2457 
2458 	read_lock(&mrt_lock);
2459 	ip6mr_for_each_table(mrt, net) {
2460 		if (t < s_t)
2461 			goto next_table;
2462 		if (t > s_t)
2463 			s_h = 0;
2464 		for (h = s_h; h < MFC6_LINES; h++) {
2465 			list_for_each_entry(mfc, &mrt->mfc6_cache_array[h], list) {
2466 				if (e < s_e)
2467 					goto next_entry;
2468 				if (ip6mr_fill_mroute(mrt, skb,
2469 						      NETLINK_CB(cb->skb).portid,
2470 						      cb->nlh->nlmsg_seq,
2471 						      mfc, RTM_NEWROUTE,
2472 						      NLM_F_MULTI) < 0)
2473 					goto done;
2474 next_entry:
2475 				e++;
2476 			}
2477 			e = s_e = 0;
2478 		}
2479 		spin_lock_bh(&mfc_unres_lock);
2480 		list_for_each_entry(mfc, &mrt->mfc6_unres_queue, list) {
2481 			if (e < s_e)
2482 				goto next_entry2;
2483 			if (ip6mr_fill_mroute(mrt, skb,
2484 					      NETLINK_CB(cb->skb).portid,
2485 					      cb->nlh->nlmsg_seq,
2486 					      mfc, RTM_NEWROUTE,
2487 					      NLM_F_MULTI) < 0) {
2488 				spin_unlock_bh(&mfc_unres_lock);
2489 				goto done;
2490 			}
2491 next_entry2:
2492 			e++;
2493 		}
2494 		spin_unlock_bh(&mfc_unres_lock);
2495 		e = s_e = 0;
2496 		s_h = 0;
2497 next_table:
2498 		t++;
2499 	}
2500 done:
2501 	read_unlock(&mrt_lock);
2502 
2503 	cb->args[2] = e;
2504 	cb->args[1] = h;
2505 	cb->args[0] = t;
2506 
2507 	return skb->len;
2508 }
2509