xref: /openbmc/linux/net/ipv6/ip6mr.c (revision afb46f79)
1 /*
2  *	Linux IPv6 multicast routing support for BSD pim6sd
3  *	Based on net/ipv4/ipmr.c.
4  *
5  *	(c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
6  *		LSIIT Laboratory, Strasbourg, France
7  *	(c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
8  *		6WIND, Paris, France
9  *	Copyright (C)2007,2008 USAGI/WIDE Project
10  *		YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
11  *
12  *	This program is free software; you can redistribute it and/or
13  *	modify it under the terms of the GNU General Public License
14  *	as published by the Free Software Foundation; either version
15  *	2 of the License, or (at your option) any later version.
16  *
17  */
18 
19 #include <asm/uaccess.h>
20 #include <linux/types.h>
21 #include <linux/sched.h>
22 #include <linux/errno.h>
23 #include <linux/timer.h>
24 #include <linux/mm.h>
25 #include <linux/kernel.h>
26 #include <linux/fcntl.h>
27 #include <linux/stat.h>
28 #include <linux/socket.h>
29 #include <linux/inet.h>
30 #include <linux/netdevice.h>
31 #include <linux/inetdevice.h>
32 #include <linux/proc_fs.h>
33 #include <linux/seq_file.h>
34 #include <linux/init.h>
35 #include <linux/slab.h>
36 #include <linux/compat.h>
37 #include <net/protocol.h>
38 #include <linux/skbuff.h>
39 #include <net/sock.h>
40 #include <net/raw.h>
41 #include <linux/notifier.h>
42 #include <linux/if_arp.h>
43 #include <net/checksum.h>
44 #include <net/netlink.h>
45 #include <net/fib_rules.h>
46 
47 #include <net/ipv6.h>
48 #include <net/ip6_route.h>
49 #include <linux/mroute6.h>
50 #include <linux/pim.h>
51 #include <net/addrconf.h>
52 #include <linux/netfilter_ipv6.h>
53 #include <linux/export.h>
54 #include <net/ip6_checksum.h>
55 #include <linux/netconf.h>
56 
57 struct mr6_table {
58 	struct list_head	list;
59 #ifdef CONFIG_NET_NS
60 	struct net		*net;
61 #endif
62 	u32			id;
63 	struct sock		*mroute6_sk;
64 	struct timer_list	ipmr_expire_timer;
65 	struct list_head	mfc6_unres_queue;
66 	struct list_head	mfc6_cache_array[MFC6_LINES];
67 	struct mif_device	vif6_table[MAXMIFS];
68 	int			maxvif;
69 	atomic_t		cache_resolve_queue_len;
70 	bool			mroute_do_assert;
71 	bool			mroute_do_pim;
72 #ifdef CONFIG_IPV6_PIMSM_V2
73 	int			mroute_reg_vif_num;
74 #endif
75 };
76 
77 struct ip6mr_rule {
78 	struct fib_rule		common;
79 };
80 
81 struct ip6mr_result {
82 	struct mr6_table	*mrt;
83 };
84 
85 /* Big lock, protecting vif table, mrt cache and mroute socket state.
86    Note that the changes are semaphored via rtnl_lock.
87  */
88 
89 static DEFINE_RWLOCK(mrt_lock);
90 
91 /*
92  *	Multicast router control variables
93  */
94 
95 #define MIF_EXISTS(_mrt, _idx) ((_mrt)->vif6_table[_idx].dev != NULL)
96 
97 /* Special spinlock for queue of unresolved entries */
98 static DEFINE_SPINLOCK(mfc_unres_lock);
99 
100 /* We return to original Alan's scheme. Hash table of resolved
101    entries is changed only in process context and protected
102    with weak lock mrt_lock. Queue of unresolved entries is protected
103    with strong spinlock mfc_unres_lock.
104 
105    In this case data path is free of exclusive locks at all.
106  */
107 
108 static struct kmem_cache *mrt_cachep __read_mostly;
109 
110 static struct mr6_table *ip6mr_new_table(struct net *net, u32 id);
111 static void ip6mr_free_table(struct mr6_table *mrt);
112 
113 static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
114 			   struct sk_buff *skb, struct mfc6_cache *cache);
115 static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
116 			      mifi_t mifi, int assert);
117 static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
118 			       struct mfc6_cache *c, struct rtmsg *rtm);
119 static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
120 			      int cmd);
121 static int ip6mr_rtm_dumproute(struct sk_buff *skb,
122 			       struct netlink_callback *cb);
123 static void mroute_clean_tables(struct mr6_table *mrt);
124 static void ipmr_expire_process(unsigned long arg);
125 
126 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
127 #define ip6mr_for_each_table(mrt, net) \
128 	list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
129 
130 static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
131 {
132 	struct mr6_table *mrt;
133 
134 	ip6mr_for_each_table(mrt, net) {
135 		if (mrt->id == id)
136 			return mrt;
137 	}
138 	return NULL;
139 }
140 
141 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
142 			    struct mr6_table **mrt)
143 {
144 	int err;
145 	struct ip6mr_result res;
146 	struct fib_lookup_arg arg = {
147 		.result = &res,
148 		.flags = FIB_LOOKUP_NOREF,
149 	};
150 
151 	err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
152 			       flowi6_to_flowi(flp6), 0, &arg);
153 	if (err < 0)
154 		return err;
155 	*mrt = res.mrt;
156 	return 0;
157 }
158 
159 static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
160 			     int flags, struct fib_lookup_arg *arg)
161 {
162 	struct ip6mr_result *res = arg->result;
163 	struct mr6_table *mrt;
164 
165 	switch (rule->action) {
166 	case FR_ACT_TO_TBL:
167 		break;
168 	case FR_ACT_UNREACHABLE:
169 		return -ENETUNREACH;
170 	case FR_ACT_PROHIBIT:
171 		return -EACCES;
172 	case FR_ACT_BLACKHOLE:
173 	default:
174 		return -EINVAL;
175 	}
176 
177 	mrt = ip6mr_get_table(rule->fr_net, rule->table);
178 	if (mrt == NULL)
179 		return -EAGAIN;
180 	res->mrt = mrt;
181 	return 0;
182 }
183 
184 static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
185 {
186 	return 1;
187 }
188 
189 static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = {
190 	FRA_GENERIC_POLICY,
191 };
192 
193 static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
194 				struct fib_rule_hdr *frh, struct nlattr **tb)
195 {
196 	return 0;
197 }
198 
199 static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
200 			      struct nlattr **tb)
201 {
202 	return 1;
203 }
204 
205 static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
206 			   struct fib_rule_hdr *frh)
207 {
208 	frh->dst_len = 0;
209 	frh->src_len = 0;
210 	frh->tos     = 0;
211 	return 0;
212 }
213 
214 static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
215 	.family		= RTNL_FAMILY_IP6MR,
216 	.rule_size	= sizeof(struct ip6mr_rule),
217 	.addr_size	= sizeof(struct in6_addr),
218 	.action		= ip6mr_rule_action,
219 	.match		= ip6mr_rule_match,
220 	.configure	= ip6mr_rule_configure,
221 	.compare	= ip6mr_rule_compare,
222 	.default_pref	= fib_default_rule_pref,
223 	.fill		= ip6mr_rule_fill,
224 	.nlgroup	= RTNLGRP_IPV6_RULE,
225 	.policy		= ip6mr_rule_policy,
226 	.owner		= THIS_MODULE,
227 };
228 
229 static int __net_init ip6mr_rules_init(struct net *net)
230 {
231 	struct fib_rules_ops *ops;
232 	struct mr6_table *mrt;
233 	int err;
234 
235 	ops = fib_rules_register(&ip6mr_rules_ops_template, net);
236 	if (IS_ERR(ops))
237 		return PTR_ERR(ops);
238 
239 	INIT_LIST_HEAD(&net->ipv6.mr6_tables);
240 
241 	mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
242 	if (mrt == NULL) {
243 		err = -ENOMEM;
244 		goto err1;
245 	}
246 
247 	err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
248 	if (err < 0)
249 		goto err2;
250 
251 	net->ipv6.mr6_rules_ops = ops;
252 	return 0;
253 
254 err2:
255 	kfree(mrt);
256 err1:
257 	fib_rules_unregister(ops);
258 	return err;
259 }
260 
261 static void __net_exit ip6mr_rules_exit(struct net *net)
262 {
263 	struct mr6_table *mrt, *next;
264 
265 	rtnl_lock();
266 	list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
267 		list_del(&mrt->list);
268 		ip6mr_free_table(mrt);
269 	}
270 	rtnl_unlock();
271 	fib_rules_unregister(net->ipv6.mr6_rules_ops);
272 }
273 #else
274 #define ip6mr_for_each_table(mrt, net) \
275 	for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
276 
277 static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
278 {
279 	return net->ipv6.mrt6;
280 }
281 
282 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
283 			    struct mr6_table **mrt)
284 {
285 	*mrt = net->ipv6.mrt6;
286 	return 0;
287 }
288 
289 static int __net_init ip6mr_rules_init(struct net *net)
290 {
291 	net->ipv6.mrt6 = ip6mr_new_table(net, RT6_TABLE_DFLT);
292 	return net->ipv6.mrt6 ? 0 : -ENOMEM;
293 }
294 
295 static void __net_exit ip6mr_rules_exit(struct net *net)
296 {
297 	rtnl_lock();
298 	ip6mr_free_table(net->ipv6.mrt6);
299 	net->ipv6.mrt6 = NULL;
300 	rtnl_unlock();
301 }
302 #endif
303 
304 static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
305 {
306 	struct mr6_table *mrt;
307 	unsigned int i;
308 
309 	mrt = ip6mr_get_table(net, id);
310 	if (mrt != NULL)
311 		return mrt;
312 
313 	mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
314 	if (mrt == NULL)
315 		return NULL;
316 	mrt->id = id;
317 	write_pnet(&mrt->net, net);
318 
319 	/* Forwarding cache */
320 	for (i = 0; i < MFC6_LINES; i++)
321 		INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]);
322 
323 	INIT_LIST_HEAD(&mrt->mfc6_unres_queue);
324 
325 	setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
326 		    (unsigned long)mrt);
327 
328 #ifdef CONFIG_IPV6_PIMSM_V2
329 	mrt->mroute_reg_vif_num = -1;
330 #endif
331 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
332 	list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
333 #endif
334 	return mrt;
335 }
336 
337 static void ip6mr_free_table(struct mr6_table *mrt)
338 {
339 	del_timer(&mrt->ipmr_expire_timer);
340 	mroute_clean_tables(mrt);
341 	kfree(mrt);
342 }
343 
344 #ifdef CONFIG_PROC_FS
345 
346 struct ipmr_mfc_iter {
347 	struct seq_net_private p;
348 	struct mr6_table *mrt;
349 	struct list_head *cache;
350 	int ct;
351 };
352 
353 
354 static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
355 					   struct ipmr_mfc_iter *it, loff_t pos)
356 {
357 	struct mr6_table *mrt = it->mrt;
358 	struct mfc6_cache *mfc;
359 
360 	read_lock(&mrt_lock);
361 	for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) {
362 		it->cache = &mrt->mfc6_cache_array[it->ct];
363 		list_for_each_entry(mfc, it->cache, list)
364 			if (pos-- == 0)
365 				return mfc;
366 	}
367 	read_unlock(&mrt_lock);
368 
369 	spin_lock_bh(&mfc_unres_lock);
370 	it->cache = &mrt->mfc6_unres_queue;
371 	list_for_each_entry(mfc, it->cache, list)
372 		if (pos-- == 0)
373 			return mfc;
374 	spin_unlock_bh(&mfc_unres_lock);
375 
376 	it->cache = NULL;
377 	return NULL;
378 }
379 
380 /*
381  *	The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
382  */
383 
384 struct ipmr_vif_iter {
385 	struct seq_net_private p;
386 	struct mr6_table *mrt;
387 	int ct;
388 };
389 
390 static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
391 					    struct ipmr_vif_iter *iter,
392 					    loff_t pos)
393 {
394 	struct mr6_table *mrt = iter->mrt;
395 
396 	for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
397 		if (!MIF_EXISTS(mrt, iter->ct))
398 			continue;
399 		if (pos-- == 0)
400 			return &mrt->vif6_table[iter->ct];
401 	}
402 	return NULL;
403 }
404 
405 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
406 	__acquires(mrt_lock)
407 {
408 	struct ipmr_vif_iter *iter = seq->private;
409 	struct net *net = seq_file_net(seq);
410 	struct mr6_table *mrt;
411 
412 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
413 	if (mrt == NULL)
414 		return ERR_PTR(-ENOENT);
415 
416 	iter->mrt = mrt;
417 
418 	read_lock(&mrt_lock);
419 	return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1)
420 		: SEQ_START_TOKEN;
421 }
422 
423 static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
424 {
425 	struct ipmr_vif_iter *iter = seq->private;
426 	struct net *net = seq_file_net(seq);
427 	struct mr6_table *mrt = iter->mrt;
428 
429 	++*pos;
430 	if (v == SEQ_START_TOKEN)
431 		return ip6mr_vif_seq_idx(net, iter, 0);
432 
433 	while (++iter->ct < mrt->maxvif) {
434 		if (!MIF_EXISTS(mrt, iter->ct))
435 			continue;
436 		return &mrt->vif6_table[iter->ct];
437 	}
438 	return NULL;
439 }
440 
441 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
442 	__releases(mrt_lock)
443 {
444 	read_unlock(&mrt_lock);
445 }
446 
447 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
448 {
449 	struct ipmr_vif_iter *iter = seq->private;
450 	struct mr6_table *mrt = iter->mrt;
451 
452 	if (v == SEQ_START_TOKEN) {
453 		seq_puts(seq,
454 			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
455 	} else {
456 		const struct mif_device *vif = v;
457 		const char *name = vif->dev ? vif->dev->name : "none";
458 
459 		seq_printf(seq,
460 			   "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
461 			   vif - mrt->vif6_table,
462 			   name, vif->bytes_in, vif->pkt_in,
463 			   vif->bytes_out, vif->pkt_out,
464 			   vif->flags);
465 	}
466 	return 0;
467 }
468 
469 static const struct seq_operations ip6mr_vif_seq_ops = {
470 	.start = ip6mr_vif_seq_start,
471 	.next  = ip6mr_vif_seq_next,
472 	.stop  = ip6mr_vif_seq_stop,
473 	.show  = ip6mr_vif_seq_show,
474 };
475 
476 static int ip6mr_vif_open(struct inode *inode, struct file *file)
477 {
478 	return seq_open_net(inode, file, &ip6mr_vif_seq_ops,
479 			    sizeof(struct ipmr_vif_iter));
480 }
481 
482 static const struct file_operations ip6mr_vif_fops = {
483 	.owner	 = THIS_MODULE,
484 	.open    = ip6mr_vif_open,
485 	.read    = seq_read,
486 	.llseek  = seq_lseek,
487 	.release = seq_release_net,
488 };
489 
490 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
491 {
492 	struct ipmr_mfc_iter *it = seq->private;
493 	struct net *net = seq_file_net(seq);
494 	struct mr6_table *mrt;
495 
496 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
497 	if (mrt == NULL)
498 		return ERR_PTR(-ENOENT);
499 
500 	it->mrt = mrt;
501 	return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
502 		: SEQ_START_TOKEN;
503 }
504 
505 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
506 {
507 	struct mfc6_cache *mfc = v;
508 	struct ipmr_mfc_iter *it = seq->private;
509 	struct net *net = seq_file_net(seq);
510 	struct mr6_table *mrt = it->mrt;
511 
512 	++*pos;
513 
514 	if (v == SEQ_START_TOKEN)
515 		return ipmr_mfc_seq_idx(net, seq->private, 0);
516 
517 	if (mfc->list.next != it->cache)
518 		return list_entry(mfc->list.next, struct mfc6_cache, list);
519 
520 	if (it->cache == &mrt->mfc6_unres_queue)
521 		goto end_of_list;
522 
523 	BUG_ON(it->cache != &mrt->mfc6_cache_array[it->ct]);
524 
525 	while (++it->ct < MFC6_LINES) {
526 		it->cache = &mrt->mfc6_cache_array[it->ct];
527 		if (list_empty(it->cache))
528 			continue;
529 		return list_first_entry(it->cache, struct mfc6_cache, list);
530 	}
531 
532 	/* exhausted cache_array, show unresolved */
533 	read_unlock(&mrt_lock);
534 	it->cache = &mrt->mfc6_unres_queue;
535 	it->ct = 0;
536 
537 	spin_lock_bh(&mfc_unres_lock);
538 	if (!list_empty(it->cache))
539 		return list_first_entry(it->cache, struct mfc6_cache, list);
540 
541  end_of_list:
542 	spin_unlock_bh(&mfc_unres_lock);
543 	it->cache = NULL;
544 
545 	return NULL;
546 }
547 
548 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
549 {
550 	struct ipmr_mfc_iter *it = seq->private;
551 	struct mr6_table *mrt = it->mrt;
552 
553 	if (it->cache == &mrt->mfc6_unres_queue)
554 		spin_unlock_bh(&mfc_unres_lock);
555 	else if (it->cache == mrt->mfc6_cache_array)
556 		read_unlock(&mrt_lock);
557 }
558 
559 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
560 {
561 	int n;
562 
563 	if (v == SEQ_START_TOKEN) {
564 		seq_puts(seq,
565 			 "Group                            "
566 			 "Origin                           "
567 			 "Iif      Pkts  Bytes     Wrong  Oifs\n");
568 	} else {
569 		const struct mfc6_cache *mfc = v;
570 		const struct ipmr_mfc_iter *it = seq->private;
571 		struct mr6_table *mrt = it->mrt;
572 
573 		seq_printf(seq, "%pI6 %pI6 %-3hd",
574 			   &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
575 			   mfc->mf6c_parent);
576 
577 		if (it->cache != &mrt->mfc6_unres_queue) {
578 			seq_printf(seq, " %8lu %8lu %8lu",
579 				   mfc->mfc_un.res.pkt,
580 				   mfc->mfc_un.res.bytes,
581 				   mfc->mfc_un.res.wrong_if);
582 			for (n = mfc->mfc_un.res.minvif;
583 			     n < mfc->mfc_un.res.maxvif; n++) {
584 				if (MIF_EXISTS(mrt, n) &&
585 				    mfc->mfc_un.res.ttls[n] < 255)
586 					seq_printf(seq,
587 						   " %2d:%-3d",
588 						   n, mfc->mfc_un.res.ttls[n]);
589 			}
590 		} else {
591 			/* unresolved mfc_caches don't contain
592 			 * pkt, bytes and wrong_if values
593 			 */
594 			seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
595 		}
596 		seq_putc(seq, '\n');
597 	}
598 	return 0;
599 }
600 
601 static const struct seq_operations ipmr_mfc_seq_ops = {
602 	.start = ipmr_mfc_seq_start,
603 	.next  = ipmr_mfc_seq_next,
604 	.stop  = ipmr_mfc_seq_stop,
605 	.show  = ipmr_mfc_seq_show,
606 };
607 
608 static int ipmr_mfc_open(struct inode *inode, struct file *file)
609 {
610 	return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
611 			    sizeof(struct ipmr_mfc_iter));
612 }
613 
614 static const struct file_operations ip6mr_mfc_fops = {
615 	.owner	 = THIS_MODULE,
616 	.open    = ipmr_mfc_open,
617 	.read    = seq_read,
618 	.llseek  = seq_lseek,
619 	.release = seq_release_net,
620 };
621 #endif
622 
623 #ifdef CONFIG_IPV6_PIMSM_V2
624 
625 static int pim6_rcv(struct sk_buff *skb)
626 {
627 	struct pimreghdr *pim;
628 	struct ipv6hdr   *encap;
629 	struct net_device  *reg_dev = NULL;
630 	struct net *net = dev_net(skb->dev);
631 	struct mr6_table *mrt;
632 	struct flowi6 fl6 = {
633 		.flowi6_iif	= skb->dev->ifindex,
634 		.flowi6_mark	= skb->mark,
635 	};
636 	int reg_vif_num;
637 
638 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
639 		goto drop;
640 
641 	pim = (struct pimreghdr *)skb_transport_header(skb);
642 	if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
643 	    (pim->flags & PIM_NULL_REGISTER) ||
644 	    (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
645 			     sizeof(*pim), IPPROTO_PIM,
646 			     csum_partial((void *)pim, sizeof(*pim), 0)) &&
647 	     csum_fold(skb_checksum(skb, 0, skb->len, 0))))
648 		goto drop;
649 
650 	/* check if the inner packet is destined to mcast group */
651 	encap = (struct ipv6hdr *)(skb_transport_header(skb) +
652 				   sizeof(*pim));
653 
654 	if (!ipv6_addr_is_multicast(&encap->daddr) ||
655 	    encap->payload_len == 0 ||
656 	    ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
657 		goto drop;
658 
659 	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
660 		goto drop;
661 	reg_vif_num = mrt->mroute_reg_vif_num;
662 
663 	read_lock(&mrt_lock);
664 	if (reg_vif_num >= 0)
665 		reg_dev = mrt->vif6_table[reg_vif_num].dev;
666 	if (reg_dev)
667 		dev_hold(reg_dev);
668 	read_unlock(&mrt_lock);
669 
670 	if (reg_dev == NULL)
671 		goto drop;
672 
673 	skb->mac_header = skb->network_header;
674 	skb_pull(skb, (u8 *)encap - skb->data);
675 	skb_reset_network_header(skb);
676 	skb->protocol = htons(ETH_P_IPV6);
677 	skb->ip_summed = CHECKSUM_NONE;
678 
679 	skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
680 
681 	netif_rx(skb);
682 
683 	dev_put(reg_dev);
684 	return 0;
685  drop:
686 	kfree_skb(skb);
687 	return 0;
688 }
689 
690 static const struct inet6_protocol pim6_protocol = {
691 	.handler	=	pim6_rcv,
692 };
693 
694 /* Service routines creating virtual interfaces: PIMREG */
695 
696 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
697 				      struct net_device *dev)
698 {
699 	struct net *net = dev_net(dev);
700 	struct mr6_table *mrt;
701 	struct flowi6 fl6 = {
702 		.flowi6_oif	= dev->ifindex,
703 		.flowi6_iif	= skb->skb_iif ? : LOOPBACK_IFINDEX,
704 		.flowi6_mark	= skb->mark,
705 	};
706 	int err;
707 
708 	err = ip6mr_fib_lookup(net, &fl6, &mrt);
709 	if (err < 0) {
710 		kfree_skb(skb);
711 		return err;
712 	}
713 
714 	read_lock(&mrt_lock);
715 	dev->stats.tx_bytes += skb->len;
716 	dev->stats.tx_packets++;
717 	ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
718 	read_unlock(&mrt_lock);
719 	kfree_skb(skb);
720 	return NETDEV_TX_OK;
721 }
722 
723 static const struct net_device_ops reg_vif_netdev_ops = {
724 	.ndo_start_xmit	= reg_vif_xmit,
725 };
726 
727 static void reg_vif_setup(struct net_device *dev)
728 {
729 	dev->type		= ARPHRD_PIMREG;
730 	dev->mtu		= 1500 - sizeof(struct ipv6hdr) - 8;
731 	dev->flags		= IFF_NOARP;
732 	dev->netdev_ops		= &reg_vif_netdev_ops;
733 	dev->destructor		= free_netdev;
734 	dev->features		|= NETIF_F_NETNS_LOCAL;
735 }
736 
737 static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt)
738 {
739 	struct net_device *dev;
740 	char name[IFNAMSIZ];
741 
742 	if (mrt->id == RT6_TABLE_DFLT)
743 		sprintf(name, "pim6reg");
744 	else
745 		sprintf(name, "pim6reg%u", mrt->id);
746 
747 	dev = alloc_netdev(0, name, reg_vif_setup);
748 	if (dev == NULL)
749 		return NULL;
750 
751 	dev_net_set(dev, net);
752 
753 	if (register_netdevice(dev)) {
754 		free_netdev(dev);
755 		return NULL;
756 	}
757 	dev->iflink = 0;
758 
759 	if (dev_open(dev))
760 		goto failure;
761 
762 	dev_hold(dev);
763 	return dev;
764 
765 failure:
766 	/* allow the register to be completed before unregistering. */
767 	rtnl_unlock();
768 	rtnl_lock();
769 
770 	unregister_netdevice(dev);
771 	return NULL;
772 }
773 #endif
774 
775 /*
776  *	Delete a VIF entry
777  */
778 
779 static int mif6_delete(struct mr6_table *mrt, int vifi, struct list_head *head)
780 {
781 	struct mif_device *v;
782 	struct net_device *dev;
783 	struct inet6_dev *in6_dev;
784 
785 	if (vifi < 0 || vifi >= mrt->maxvif)
786 		return -EADDRNOTAVAIL;
787 
788 	v = &mrt->vif6_table[vifi];
789 
790 	write_lock_bh(&mrt_lock);
791 	dev = v->dev;
792 	v->dev = NULL;
793 
794 	if (!dev) {
795 		write_unlock_bh(&mrt_lock);
796 		return -EADDRNOTAVAIL;
797 	}
798 
799 #ifdef CONFIG_IPV6_PIMSM_V2
800 	if (vifi == mrt->mroute_reg_vif_num)
801 		mrt->mroute_reg_vif_num = -1;
802 #endif
803 
804 	if (vifi + 1 == mrt->maxvif) {
805 		int tmp;
806 		for (tmp = vifi - 1; tmp >= 0; tmp--) {
807 			if (MIF_EXISTS(mrt, tmp))
808 				break;
809 		}
810 		mrt->maxvif = tmp + 1;
811 	}
812 
813 	write_unlock_bh(&mrt_lock);
814 
815 	dev_set_allmulti(dev, -1);
816 
817 	in6_dev = __in6_dev_get(dev);
818 	if (in6_dev) {
819 		in6_dev->cnf.mc_forwarding--;
820 		inet6_netconf_notify_devconf(dev_net(dev),
821 					     NETCONFA_MC_FORWARDING,
822 					     dev->ifindex, &in6_dev->cnf);
823 	}
824 
825 	if (v->flags & MIFF_REGISTER)
826 		unregister_netdevice_queue(dev, head);
827 
828 	dev_put(dev);
829 	return 0;
830 }
831 
832 static inline void ip6mr_cache_free(struct mfc6_cache *c)
833 {
834 	kmem_cache_free(mrt_cachep, c);
835 }
836 
837 /* Destroy an unresolved cache entry, killing queued skbs
838    and reporting error to netlink readers.
839  */
840 
841 static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
842 {
843 	struct net *net = read_pnet(&mrt->net);
844 	struct sk_buff *skb;
845 
846 	atomic_dec(&mrt->cache_resolve_queue_len);
847 
848 	while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
849 		if (ipv6_hdr(skb)->version == 0) {
850 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
851 			nlh->nlmsg_type = NLMSG_ERROR;
852 			nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
853 			skb_trim(skb, nlh->nlmsg_len);
854 			((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT;
855 			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
856 		} else
857 			kfree_skb(skb);
858 	}
859 
860 	ip6mr_cache_free(c);
861 }
862 
863 
864 /* Timer process for all the unresolved queue. */
865 
866 static void ipmr_do_expire_process(struct mr6_table *mrt)
867 {
868 	unsigned long now = jiffies;
869 	unsigned long expires = 10 * HZ;
870 	struct mfc6_cache *c, *next;
871 
872 	list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
873 		if (time_after(c->mfc_un.unres.expires, now)) {
874 			/* not yet... */
875 			unsigned long interval = c->mfc_un.unres.expires - now;
876 			if (interval < expires)
877 				expires = interval;
878 			continue;
879 		}
880 
881 		list_del(&c->list);
882 		mr6_netlink_event(mrt, c, RTM_DELROUTE);
883 		ip6mr_destroy_unres(mrt, c);
884 	}
885 
886 	if (!list_empty(&mrt->mfc6_unres_queue))
887 		mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
888 }
889 
890 static void ipmr_expire_process(unsigned long arg)
891 {
892 	struct mr6_table *mrt = (struct mr6_table *)arg;
893 
894 	if (!spin_trylock(&mfc_unres_lock)) {
895 		mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
896 		return;
897 	}
898 
899 	if (!list_empty(&mrt->mfc6_unres_queue))
900 		ipmr_do_expire_process(mrt);
901 
902 	spin_unlock(&mfc_unres_lock);
903 }
904 
905 /* Fill oifs list. It is called under write locked mrt_lock. */
906 
907 static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *cache,
908 				    unsigned char *ttls)
909 {
910 	int vifi;
911 
912 	cache->mfc_un.res.minvif = MAXMIFS;
913 	cache->mfc_un.res.maxvif = 0;
914 	memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
915 
916 	for (vifi = 0; vifi < mrt->maxvif; vifi++) {
917 		if (MIF_EXISTS(mrt, vifi) &&
918 		    ttls[vifi] && ttls[vifi] < 255) {
919 			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
920 			if (cache->mfc_un.res.minvif > vifi)
921 				cache->mfc_un.res.minvif = vifi;
922 			if (cache->mfc_un.res.maxvif <= vifi)
923 				cache->mfc_un.res.maxvif = vifi + 1;
924 		}
925 	}
926 }
927 
928 static int mif6_add(struct net *net, struct mr6_table *mrt,
929 		    struct mif6ctl *vifc, int mrtsock)
930 {
931 	int vifi = vifc->mif6c_mifi;
932 	struct mif_device *v = &mrt->vif6_table[vifi];
933 	struct net_device *dev;
934 	struct inet6_dev *in6_dev;
935 	int err;
936 
937 	/* Is vif busy ? */
938 	if (MIF_EXISTS(mrt, vifi))
939 		return -EADDRINUSE;
940 
941 	switch (vifc->mif6c_flags) {
942 #ifdef CONFIG_IPV6_PIMSM_V2
943 	case MIFF_REGISTER:
944 		/*
945 		 * Special Purpose VIF in PIM
946 		 * All the packets will be sent to the daemon
947 		 */
948 		if (mrt->mroute_reg_vif_num >= 0)
949 			return -EADDRINUSE;
950 		dev = ip6mr_reg_vif(net, mrt);
951 		if (!dev)
952 			return -ENOBUFS;
953 		err = dev_set_allmulti(dev, 1);
954 		if (err) {
955 			unregister_netdevice(dev);
956 			dev_put(dev);
957 			return err;
958 		}
959 		break;
960 #endif
961 	case 0:
962 		dev = dev_get_by_index(net, vifc->mif6c_pifi);
963 		if (!dev)
964 			return -EADDRNOTAVAIL;
965 		err = dev_set_allmulti(dev, 1);
966 		if (err) {
967 			dev_put(dev);
968 			return err;
969 		}
970 		break;
971 	default:
972 		return -EINVAL;
973 	}
974 
975 	in6_dev = __in6_dev_get(dev);
976 	if (in6_dev) {
977 		in6_dev->cnf.mc_forwarding++;
978 		inet6_netconf_notify_devconf(dev_net(dev),
979 					     NETCONFA_MC_FORWARDING,
980 					     dev->ifindex, &in6_dev->cnf);
981 	}
982 
983 	/*
984 	 *	Fill in the VIF structures
985 	 */
986 	v->rate_limit = vifc->vifc_rate_limit;
987 	v->flags = vifc->mif6c_flags;
988 	if (!mrtsock)
989 		v->flags |= VIFF_STATIC;
990 	v->threshold = vifc->vifc_threshold;
991 	v->bytes_in = 0;
992 	v->bytes_out = 0;
993 	v->pkt_in = 0;
994 	v->pkt_out = 0;
995 	v->link = dev->ifindex;
996 	if (v->flags & MIFF_REGISTER)
997 		v->link = dev->iflink;
998 
999 	/* And finish update writing critical data */
1000 	write_lock_bh(&mrt_lock);
1001 	v->dev = dev;
1002 #ifdef CONFIG_IPV6_PIMSM_V2
1003 	if (v->flags & MIFF_REGISTER)
1004 		mrt->mroute_reg_vif_num = vifi;
1005 #endif
1006 	if (vifi + 1 > mrt->maxvif)
1007 		mrt->maxvif = vifi + 1;
1008 	write_unlock_bh(&mrt_lock);
1009 	return 0;
1010 }
1011 
1012 static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt,
1013 					   const struct in6_addr *origin,
1014 					   const struct in6_addr *mcastgrp)
1015 {
1016 	int line = MFC6_HASH(mcastgrp, origin);
1017 	struct mfc6_cache *c;
1018 
1019 	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
1020 		if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
1021 		    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
1022 			return c;
1023 	}
1024 	return NULL;
1025 }
1026 
1027 /* Look for a (*,*,oif) entry */
1028 static struct mfc6_cache *ip6mr_cache_find_any_parent(struct mr6_table *mrt,
1029 						      mifi_t mifi)
1030 {
1031 	int line = MFC6_HASH(&in6addr_any, &in6addr_any);
1032 	struct mfc6_cache *c;
1033 
1034 	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
1035 		if (ipv6_addr_any(&c->mf6c_origin) &&
1036 		    ipv6_addr_any(&c->mf6c_mcastgrp) &&
1037 		    (c->mfc_un.res.ttls[mifi] < 255))
1038 			return c;
1039 
1040 	return NULL;
1041 }
1042 
1043 /* Look for a (*,G) entry */
1044 static struct mfc6_cache *ip6mr_cache_find_any(struct mr6_table *mrt,
1045 					       struct in6_addr *mcastgrp,
1046 					       mifi_t mifi)
1047 {
1048 	int line = MFC6_HASH(mcastgrp, &in6addr_any);
1049 	struct mfc6_cache *c, *proxy;
1050 
1051 	if (ipv6_addr_any(mcastgrp))
1052 		goto skip;
1053 
1054 	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
1055 		if (ipv6_addr_any(&c->mf6c_origin) &&
1056 		    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp)) {
1057 			if (c->mfc_un.res.ttls[mifi] < 255)
1058 				return c;
1059 
1060 			/* It's ok if the mifi is part of the static tree */
1061 			proxy = ip6mr_cache_find_any_parent(mrt,
1062 							    c->mf6c_parent);
1063 			if (proxy && proxy->mfc_un.res.ttls[mifi] < 255)
1064 				return c;
1065 		}
1066 
1067 skip:
1068 	return ip6mr_cache_find_any_parent(mrt, mifi);
1069 }
1070 
1071 /*
1072  *	Allocate a multicast cache entry
1073  */
1074 static struct mfc6_cache *ip6mr_cache_alloc(void)
1075 {
1076 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
1077 	if (c == NULL)
1078 		return NULL;
1079 	c->mfc_un.res.minvif = MAXMIFS;
1080 	return c;
1081 }
1082 
1083 static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
1084 {
1085 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
1086 	if (c == NULL)
1087 		return NULL;
1088 	skb_queue_head_init(&c->mfc_un.unres.unresolved);
1089 	c->mfc_un.unres.expires = jiffies + 10 * HZ;
1090 	return c;
1091 }
1092 
1093 /*
1094  *	A cache entry has gone into a resolved state from queued
1095  */
1096 
1097 static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
1098 				struct mfc6_cache *uc, struct mfc6_cache *c)
1099 {
1100 	struct sk_buff *skb;
1101 
1102 	/*
1103 	 *	Play the pending entries through our router
1104 	 */
1105 
1106 	while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
1107 		if (ipv6_hdr(skb)->version == 0) {
1108 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
1109 
1110 			if (__ip6mr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) {
1111 				nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1112 			} else {
1113 				nlh->nlmsg_type = NLMSG_ERROR;
1114 				nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
1115 				skb_trim(skb, nlh->nlmsg_len);
1116 				((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE;
1117 			}
1118 			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
1119 		} else
1120 			ip6_mr_forward(net, mrt, skb, c);
1121 	}
1122 }
1123 
1124 /*
1125  *	Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
1126  *	expects the following bizarre scheme.
1127  *
1128  *	Called under mrt_lock.
1129  */
1130 
1131 static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
1132 			      mifi_t mifi, int assert)
1133 {
1134 	struct sk_buff *skb;
1135 	struct mrt6msg *msg;
1136 	int ret;
1137 
1138 #ifdef CONFIG_IPV6_PIMSM_V2
1139 	if (assert == MRT6MSG_WHOLEPKT)
1140 		skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
1141 						+sizeof(*msg));
1142 	else
1143 #endif
1144 		skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
1145 
1146 	if (!skb)
1147 		return -ENOBUFS;
1148 
1149 	/* I suppose that internal messages
1150 	 * do not require checksums */
1151 
1152 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1153 
1154 #ifdef CONFIG_IPV6_PIMSM_V2
1155 	if (assert == MRT6MSG_WHOLEPKT) {
1156 		/* Ugly, but we have no choice with this interface.
1157 		   Duplicate old header, fix length etc.
1158 		   And all this only to mangle msg->im6_msgtype and
1159 		   to set msg->im6_mbz to "mbz" :-)
1160 		 */
1161 		skb_push(skb, -skb_network_offset(pkt));
1162 
1163 		skb_push(skb, sizeof(*msg));
1164 		skb_reset_transport_header(skb);
1165 		msg = (struct mrt6msg *)skb_transport_header(skb);
1166 		msg->im6_mbz = 0;
1167 		msg->im6_msgtype = MRT6MSG_WHOLEPKT;
1168 		msg->im6_mif = mrt->mroute_reg_vif_num;
1169 		msg->im6_pad = 0;
1170 		msg->im6_src = ipv6_hdr(pkt)->saddr;
1171 		msg->im6_dst = ipv6_hdr(pkt)->daddr;
1172 
1173 		skb->ip_summed = CHECKSUM_UNNECESSARY;
1174 	} else
1175 #endif
1176 	{
1177 	/*
1178 	 *	Copy the IP header
1179 	 */
1180 
1181 	skb_put(skb, sizeof(struct ipv6hdr));
1182 	skb_reset_network_header(skb);
1183 	skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
1184 
1185 	/*
1186 	 *	Add our header
1187 	 */
1188 	skb_put(skb, sizeof(*msg));
1189 	skb_reset_transport_header(skb);
1190 	msg = (struct mrt6msg *)skb_transport_header(skb);
1191 
1192 	msg->im6_mbz = 0;
1193 	msg->im6_msgtype = assert;
1194 	msg->im6_mif = mifi;
1195 	msg->im6_pad = 0;
1196 	msg->im6_src = ipv6_hdr(pkt)->saddr;
1197 	msg->im6_dst = ipv6_hdr(pkt)->daddr;
1198 
1199 	skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1200 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1201 	}
1202 
1203 	if (mrt->mroute6_sk == NULL) {
1204 		kfree_skb(skb);
1205 		return -EINVAL;
1206 	}
1207 
1208 	/*
1209 	 *	Deliver to user space multicast routing algorithms
1210 	 */
1211 	ret = sock_queue_rcv_skb(mrt->mroute6_sk, skb);
1212 	if (ret < 0) {
1213 		net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
1214 		kfree_skb(skb);
1215 	}
1216 
1217 	return ret;
1218 }
1219 
1220 /*
1221  *	Queue a packet for resolution. It gets locked cache entry!
1222  */
1223 
1224 static int
1225 ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
1226 {
1227 	bool found = false;
1228 	int err;
1229 	struct mfc6_cache *c;
1230 
1231 	spin_lock_bh(&mfc_unres_lock);
1232 	list_for_each_entry(c, &mrt->mfc6_unres_queue, list) {
1233 		if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1234 		    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
1235 			found = true;
1236 			break;
1237 		}
1238 	}
1239 
1240 	if (!found) {
1241 		/*
1242 		 *	Create a new entry if allowable
1243 		 */
1244 
1245 		if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
1246 		    (c = ip6mr_cache_alloc_unres()) == NULL) {
1247 			spin_unlock_bh(&mfc_unres_lock);
1248 
1249 			kfree_skb(skb);
1250 			return -ENOBUFS;
1251 		}
1252 
1253 		/*
1254 		 *	Fill in the new cache entry
1255 		 */
1256 		c->mf6c_parent = -1;
1257 		c->mf6c_origin = ipv6_hdr(skb)->saddr;
1258 		c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
1259 
1260 		/*
1261 		 *	Reflect first query at pim6sd
1262 		 */
1263 		err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1264 		if (err < 0) {
1265 			/* If the report failed throw the cache entry
1266 			   out - Brad Parker
1267 			 */
1268 			spin_unlock_bh(&mfc_unres_lock);
1269 
1270 			ip6mr_cache_free(c);
1271 			kfree_skb(skb);
1272 			return err;
1273 		}
1274 
1275 		atomic_inc(&mrt->cache_resolve_queue_len);
1276 		list_add(&c->list, &mrt->mfc6_unres_queue);
1277 		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1278 
1279 		ipmr_do_expire_process(mrt);
1280 	}
1281 
1282 	/*
1283 	 *	See if we can append the packet
1284 	 */
1285 	if (c->mfc_un.unres.unresolved.qlen > 3) {
1286 		kfree_skb(skb);
1287 		err = -ENOBUFS;
1288 	} else {
1289 		skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1290 		err = 0;
1291 	}
1292 
1293 	spin_unlock_bh(&mfc_unres_lock);
1294 	return err;
1295 }
1296 
1297 /*
1298  *	MFC6 cache manipulation by user space
1299  */
1300 
1301 static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc,
1302 			    int parent)
1303 {
1304 	int line;
1305 	struct mfc6_cache *c, *next;
1306 
1307 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1308 
1309 	list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) {
1310 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1311 		    ipv6_addr_equal(&c->mf6c_mcastgrp,
1312 				    &mfc->mf6cc_mcastgrp.sin6_addr) &&
1313 		    (parent == -1 || parent == c->mf6c_parent)) {
1314 			write_lock_bh(&mrt_lock);
1315 			list_del(&c->list);
1316 			write_unlock_bh(&mrt_lock);
1317 
1318 			mr6_netlink_event(mrt, c, RTM_DELROUTE);
1319 			ip6mr_cache_free(c);
1320 			return 0;
1321 		}
1322 	}
1323 	return -ENOENT;
1324 }
1325 
1326 static int ip6mr_device_event(struct notifier_block *this,
1327 			      unsigned long event, void *ptr)
1328 {
1329 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1330 	struct net *net = dev_net(dev);
1331 	struct mr6_table *mrt;
1332 	struct mif_device *v;
1333 	int ct;
1334 	LIST_HEAD(list);
1335 
1336 	if (event != NETDEV_UNREGISTER)
1337 		return NOTIFY_DONE;
1338 
1339 	ip6mr_for_each_table(mrt, net) {
1340 		v = &mrt->vif6_table[0];
1341 		for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1342 			if (v->dev == dev)
1343 				mif6_delete(mrt, ct, &list);
1344 		}
1345 	}
1346 	unregister_netdevice_many(&list);
1347 
1348 	return NOTIFY_DONE;
1349 }
1350 
1351 static struct notifier_block ip6_mr_notifier = {
1352 	.notifier_call = ip6mr_device_event
1353 };
1354 
1355 /*
1356  *	Setup for IP multicast routing
1357  */
1358 
1359 static int __net_init ip6mr_net_init(struct net *net)
1360 {
1361 	int err;
1362 
1363 	err = ip6mr_rules_init(net);
1364 	if (err < 0)
1365 		goto fail;
1366 
1367 #ifdef CONFIG_PROC_FS
1368 	err = -ENOMEM;
1369 	if (!proc_create("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_fops))
1370 		goto proc_vif_fail;
1371 	if (!proc_create("ip6_mr_cache", 0, net->proc_net, &ip6mr_mfc_fops))
1372 		goto proc_cache_fail;
1373 #endif
1374 
1375 	return 0;
1376 
1377 #ifdef CONFIG_PROC_FS
1378 proc_cache_fail:
1379 	remove_proc_entry("ip6_mr_vif", net->proc_net);
1380 proc_vif_fail:
1381 	ip6mr_rules_exit(net);
1382 #endif
1383 fail:
1384 	return err;
1385 }
1386 
1387 static void __net_exit ip6mr_net_exit(struct net *net)
1388 {
1389 #ifdef CONFIG_PROC_FS
1390 	remove_proc_entry("ip6_mr_cache", net->proc_net);
1391 	remove_proc_entry("ip6_mr_vif", net->proc_net);
1392 #endif
1393 	ip6mr_rules_exit(net);
1394 }
1395 
1396 static struct pernet_operations ip6mr_net_ops = {
1397 	.init = ip6mr_net_init,
1398 	.exit = ip6mr_net_exit,
1399 };
1400 
1401 int __init ip6_mr_init(void)
1402 {
1403 	int err;
1404 
1405 	mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1406 				       sizeof(struct mfc6_cache),
1407 				       0, SLAB_HWCACHE_ALIGN,
1408 				       NULL);
1409 	if (!mrt_cachep)
1410 		return -ENOMEM;
1411 
1412 	err = register_pernet_subsys(&ip6mr_net_ops);
1413 	if (err)
1414 		goto reg_pernet_fail;
1415 
1416 	err = register_netdevice_notifier(&ip6_mr_notifier);
1417 	if (err)
1418 		goto reg_notif_fail;
1419 #ifdef CONFIG_IPV6_PIMSM_V2
1420 	if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1421 		pr_err("%s: can't add PIM protocol\n", __func__);
1422 		err = -EAGAIN;
1423 		goto add_proto_fail;
1424 	}
1425 #endif
1426 	rtnl_register(RTNL_FAMILY_IP6MR, RTM_GETROUTE, NULL,
1427 		      ip6mr_rtm_dumproute, NULL);
1428 	return 0;
1429 #ifdef CONFIG_IPV6_PIMSM_V2
1430 add_proto_fail:
1431 	unregister_netdevice_notifier(&ip6_mr_notifier);
1432 #endif
1433 reg_notif_fail:
1434 	unregister_pernet_subsys(&ip6mr_net_ops);
1435 reg_pernet_fail:
1436 	kmem_cache_destroy(mrt_cachep);
1437 	return err;
1438 }
1439 
1440 void ip6_mr_cleanup(void)
1441 {
1442 	unregister_netdevice_notifier(&ip6_mr_notifier);
1443 	unregister_pernet_subsys(&ip6mr_net_ops);
1444 	kmem_cache_destroy(mrt_cachep);
1445 }
1446 
1447 static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
1448 			 struct mf6cctl *mfc, int mrtsock, int parent)
1449 {
1450 	bool found = false;
1451 	int line;
1452 	struct mfc6_cache *uc, *c;
1453 	unsigned char ttls[MAXMIFS];
1454 	int i;
1455 
1456 	if (mfc->mf6cc_parent >= MAXMIFS)
1457 		return -ENFILE;
1458 
1459 	memset(ttls, 255, MAXMIFS);
1460 	for (i = 0; i < MAXMIFS; i++) {
1461 		if (IF_ISSET(i, &mfc->mf6cc_ifset))
1462 			ttls[i] = 1;
1463 
1464 	}
1465 
1466 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1467 
1468 	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
1469 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1470 		    ipv6_addr_equal(&c->mf6c_mcastgrp,
1471 				    &mfc->mf6cc_mcastgrp.sin6_addr) &&
1472 		    (parent == -1 || parent == mfc->mf6cc_parent)) {
1473 			found = true;
1474 			break;
1475 		}
1476 	}
1477 
1478 	if (found) {
1479 		write_lock_bh(&mrt_lock);
1480 		c->mf6c_parent = mfc->mf6cc_parent;
1481 		ip6mr_update_thresholds(mrt, c, ttls);
1482 		if (!mrtsock)
1483 			c->mfc_flags |= MFC_STATIC;
1484 		write_unlock_bh(&mrt_lock);
1485 		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1486 		return 0;
1487 	}
1488 
1489 	if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) &&
1490 	    !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1491 		return -EINVAL;
1492 
1493 	c = ip6mr_cache_alloc();
1494 	if (c == NULL)
1495 		return -ENOMEM;
1496 
1497 	c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1498 	c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1499 	c->mf6c_parent = mfc->mf6cc_parent;
1500 	ip6mr_update_thresholds(mrt, c, ttls);
1501 	if (!mrtsock)
1502 		c->mfc_flags |= MFC_STATIC;
1503 
1504 	write_lock_bh(&mrt_lock);
1505 	list_add(&c->list, &mrt->mfc6_cache_array[line]);
1506 	write_unlock_bh(&mrt_lock);
1507 
1508 	/*
1509 	 *	Check to see if we resolved a queued list. If so we
1510 	 *	need to send on the frames and tidy up.
1511 	 */
1512 	found = false;
1513 	spin_lock_bh(&mfc_unres_lock);
1514 	list_for_each_entry(uc, &mrt->mfc6_unres_queue, list) {
1515 		if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1516 		    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1517 			list_del(&uc->list);
1518 			atomic_dec(&mrt->cache_resolve_queue_len);
1519 			found = true;
1520 			break;
1521 		}
1522 	}
1523 	if (list_empty(&mrt->mfc6_unres_queue))
1524 		del_timer(&mrt->ipmr_expire_timer);
1525 	spin_unlock_bh(&mfc_unres_lock);
1526 
1527 	if (found) {
1528 		ip6mr_cache_resolve(net, mrt, uc, c);
1529 		ip6mr_cache_free(uc);
1530 	}
1531 	mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1532 	return 0;
1533 }
1534 
1535 /*
1536  *	Close the multicast socket, and clear the vif tables etc
1537  */
1538 
1539 static void mroute_clean_tables(struct mr6_table *mrt)
1540 {
1541 	int i;
1542 	LIST_HEAD(list);
1543 	struct mfc6_cache *c, *next;
1544 
1545 	/*
1546 	 *	Shut down all active vif entries
1547 	 */
1548 	for (i = 0; i < mrt->maxvif; i++) {
1549 		if (!(mrt->vif6_table[i].flags & VIFF_STATIC))
1550 			mif6_delete(mrt, i, &list);
1551 	}
1552 	unregister_netdevice_many(&list);
1553 
1554 	/*
1555 	 *	Wipe the cache
1556 	 */
1557 	for (i = 0; i < MFC6_LINES; i++) {
1558 		list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) {
1559 			if (c->mfc_flags & MFC_STATIC)
1560 				continue;
1561 			write_lock_bh(&mrt_lock);
1562 			list_del(&c->list);
1563 			write_unlock_bh(&mrt_lock);
1564 
1565 			mr6_netlink_event(mrt, c, RTM_DELROUTE);
1566 			ip6mr_cache_free(c);
1567 		}
1568 	}
1569 
1570 	if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1571 		spin_lock_bh(&mfc_unres_lock);
1572 		list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
1573 			list_del(&c->list);
1574 			mr6_netlink_event(mrt, c, RTM_DELROUTE);
1575 			ip6mr_destroy_unres(mrt, c);
1576 		}
1577 		spin_unlock_bh(&mfc_unres_lock);
1578 	}
1579 }
1580 
1581 static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
1582 {
1583 	int err = 0;
1584 	struct net *net = sock_net(sk);
1585 
1586 	rtnl_lock();
1587 	write_lock_bh(&mrt_lock);
1588 	if (likely(mrt->mroute6_sk == NULL)) {
1589 		mrt->mroute6_sk = sk;
1590 		net->ipv6.devconf_all->mc_forwarding++;
1591 		inet6_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING,
1592 					     NETCONFA_IFINDEX_ALL,
1593 					     net->ipv6.devconf_all);
1594 	}
1595 	else
1596 		err = -EADDRINUSE;
1597 	write_unlock_bh(&mrt_lock);
1598 
1599 	rtnl_unlock();
1600 
1601 	return err;
1602 }
1603 
1604 int ip6mr_sk_done(struct sock *sk)
1605 {
1606 	int err = -EACCES;
1607 	struct net *net = sock_net(sk);
1608 	struct mr6_table *mrt;
1609 
1610 	rtnl_lock();
1611 	ip6mr_for_each_table(mrt, net) {
1612 		if (sk == mrt->mroute6_sk) {
1613 			write_lock_bh(&mrt_lock);
1614 			mrt->mroute6_sk = NULL;
1615 			net->ipv6.devconf_all->mc_forwarding--;
1616 			inet6_netconf_notify_devconf(net,
1617 						     NETCONFA_MC_FORWARDING,
1618 						     NETCONFA_IFINDEX_ALL,
1619 						     net->ipv6.devconf_all);
1620 			write_unlock_bh(&mrt_lock);
1621 
1622 			mroute_clean_tables(mrt);
1623 			err = 0;
1624 			break;
1625 		}
1626 	}
1627 	rtnl_unlock();
1628 
1629 	return err;
1630 }
1631 
1632 struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
1633 {
1634 	struct mr6_table *mrt;
1635 	struct flowi6 fl6 = {
1636 		.flowi6_iif	= skb->skb_iif,
1637 		.flowi6_oif	= skb->dev->ifindex,
1638 		.flowi6_mark	= skb->mark,
1639 	};
1640 
1641 	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
1642 		return NULL;
1643 
1644 	return mrt->mroute6_sk;
1645 }
1646 
1647 /*
1648  *	Socket options and virtual interface manipulation. The whole
1649  *	virtual interface system is a complete heap, but unfortunately
1650  *	that's how BSD mrouted happens to think. Maybe one day with a proper
1651  *	MOSPF/PIM router set up we can clean this up.
1652  */
1653 
1654 int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1655 {
1656 	int ret, parent = 0;
1657 	struct mif6ctl vif;
1658 	struct mf6cctl mfc;
1659 	mifi_t mifi;
1660 	struct net *net = sock_net(sk);
1661 	struct mr6_table *mrt;
1662 
1663 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1664 	if (mrt == NULL)
1665 		return -ENOENT;
1666 
1667 	if (optname != MRT6_INIT) {
1668 		if (sk != mrt->mroute6_sk && !ns_capable(net->user_ns, CAP_NET_ADMIN))
1669 			return -EACCES;
1670 	}
1671 
1672 	switch (optname) {
1673 	case MRT6_INIT:
1674 		if (sk->sk_type != SOCK_RAW ||
1675 		    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1676 			return -EOPNOTSUPP;
1677 		if (optlen < sizeof(int))
1678 			return -EINVAL;
1679 
1680 		return ip6mr_sk_init(mrt, sk);
1681 
1682 	case MRT6_DONE:
1683 		return ip6mr_sk_done(sk);
1684 
1685 	case MRT6_ADD_MIF:
1686 		if (optlen < sizeof(vif))
1687 			return -EINVAL;
1688 		if (copy_from_user(&vif, optval, sizeof(vif)))
1689 			return -EFAULT;
1690 		if (vif.mif6c_mifi >= MAXMIFS)
1691 			return -ENFILE;
1692 		rtnl_lock();
1693 		ret = mif6_add(net, mrt, &vif, sk == mrt->mroute6_sk);
1694 		rtnl_unlock();
1695 		return ret;
1696 
1697 	case MRT6_DEL_MIF:
1698 		if (optlen < sizeof(mifi_t))
1699 			return -EINVAL;
1700 		if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1701 			return -EFAULT;
1702 		rtnl_lock();
1703 		ret = mif6_delete(mrt, mifi, NULL);
1704 		rtnl_unlock();
1705 		return ret;
1706 
1707 	/*
1708 	 *	Manipulate the forwarding caches. These live
1709 	 *	in a sort of kernel/user symbiosis.
1710 	 */
1711 	case MRT6_ADD_MFC:
1712 	case MRT6_DEL_MFC:
1713 		parent = -1;
1714 	case MRT6_ADD_MFC_PROXY:
1715 	case MRT6_DEL_MFC_PROXY:
1716 		if (optlen < sizeof(mfc))
1717 			return -EINVAL;
1718 		if (copy_from_user(&mfc, optval, sizeof(mfc)))
1719 			return -EFAULT;
1720 		if (parent == 0)
1721 			parent = mfc.mf6cc_parent;
1722 		rtnl_lock();
1723 		if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY)
1724 			ret = ip6mr_mfc_delete(mrt, &mfc, parent);
1725 		else
1726 			ret = ip6mr_mfc_add(net, mrt, &mfc,
1727 					    sk == mrt->mroute6_sk, parent);
1728 		rtnl_unlock();
1729 		return ret;
1730 
1731 	/*
1732 	 *	Control PIM assert (to activate pim will activate assert)
1733 	 */
1734 	case MRT6_ASSERT:
1735 	{
1736 		int v;
1737 
1738 		if (optlen != sizeof(v))
1739 			return -EINVAL;
1740 		if (get_user(v, (int __user *)optval))
1741 			return -EFAULT;
1742 		mrt->mroute_do_assert = v;
1743 		return 0;
1744 	}
1745 
1746 #ifdef CONFIG_IPV6_PIMSM_V2
1747 	case MRT6_PIM:
1748 	{
1749 		int v;
1750 
1751 		if (optlen != sizeof(v))
1752 			return -EINVAL;
1753 		if (get_user(v, (int __user *)optval))
1754 			return -EFAULT;
1755 		v = !!v;
1756 		rtnl_lock();
1757 		ret = 0;
1758 		if (v != mrt->mroute_do_pim) {
1759 			mrt->mroute_do_pim = v;
1760 			mrt->mroute_do_assert = v;
1761 		}
1762 		rtnl_unlock();
1763 		return ret;
1764 	}
1765 
1766 #endif
1767 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1768 	case MRT6_TABLE:
1769 	{
1770 		u32 v;
1771 
1772 		if (optlen != sizeof(u32))
1773 			return -EINVAL;
1774 		if (get_user(v, (u32 __user *)optval))
1775 			return -EFAULT;
1776 		/* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
1777 		if (v != RT_TABLE_DEFAULT && v >= 100000000)
1778 			return -EINVAL;
1779 		if (sk == mrt->mroute6_sk)
1780 			return -EBUSY;
1781 
1782 		rtnl_lock();
1783 		ret = 0;
1784 		if (!ip6mr_new_table(net, v))
1785 			ret = -ENOMEM;
1786 		raw6_sk(sk)->ip6mr_table = v;
1787 		rtnl_unlock();
1788 		return ret;
1789 	}
1790 #endif
1791 	/*
1792 	 *	Spurious command, or MRT6_VERSION which you cannot
1793 	 *	set.
1794 	 */
1795 	default:
1796 		return -ENOPROTOOPT;
1797 	}
1798 }
1799 
1800 /*
1801  *	Getsock opt support for the multicast routing system.
1802  */
1803 
1804 int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1805 			  int __user *optlen)
1806 {
1807 	int olr;
1808 	int val;
1809 	struct net *net = sock_net(sk);
1810 	struct mr6_table *mrt;
1811 
1812 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1813 	if (mrt == NULL)
1814 		return -ENOENT;
1815 
1816 	switch (optname) {
1817 	case MRT6_VERSION:
1818 		val = 0x0305;
1819 		break;
1820 #ifdef CONFIG_IPV6_PIMSM_V2
1821 	case MRT6_PIM:
1822 		val = mrt->mroute_do_pim;
1823 		break;
1824 #endif
1825 	case MRT6_ASSERT:
1826 		val = mrt->mroute_do_assert;
1827 		break;
1828 	default:
1829 		return -ENOPROTOOPT;
1830 	}
1831 
1832 	if (get_user(olr, optlen))
1833 		return -EFAULT;
1834 
1835 	olr = min_t(int, olr, sizeof(int));
1836 	if (olr < 0)
1837 		return -EINVAL;
1838 
1839 	if (put_user(olr, optlen))
1840 		return -EFAULT;
1841 	if (copy_to_user(optval, &val, olr))
1842 		return -EFAULT;
1843 	return 0;
1844 }
1845 
1846 /*
1847  *	The IP multicast ioctl support routines.
1848  */
1849 
1850 int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1851 {
1852 	struct sioc_sg_req6 sr;
1853 	struct sioc_mif_req6 vr;
1854 	struct mif_device *vif;
1855 	struct mfc6_cache *c;
1856 	struct net *net = sock_net(sk);
1857 	struct mr6_table *mrt;
1858 
1859 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1860 	if (mrt == NULL)
1861 		return -ENOENT;
1862 
1863 	switch (cmd) {
1864 	case SIOCGETMIFCNT_IN6:
1865 		if (copy_from_user(&vr, arg, sizeof(vr)))
1866 			return -EFAULT;
1867 		if (vr.mifi >= mrt->maxvif)
1868 			return -EINVAL;
1869 		read_lock(&mrt_lock);
1870 		vif = &mrt->vif6_table[vr.mifi];
1871 		if (MIF_EXISTS(mrt, vr.mifi)) {
1872 			vr.icount = vif->pkt_in;
1873 			vr.ocount = vif->pkt_out;
1874 			vr.ibytes = vif->bytes_in;
1875 			vr.obytes = vif->bytes_out;
1876 			read_unlock(&mrt_lock);
1877 
1878 			if (copy_to_user(arg, &vr, sizeof(vr)))
1879 				return -EFAULT;
1880 			return 0;
1881 		}
1882 		read_unlock(&mrt_lock);
1883 		return -EADDRNOTAVAIL;
1884 	case SIOCGETSGCNT_IN6:
1885 		if (copy_from_user(&sr, arg, sizeof(sr)))
1886 			return -EFAULT;
1887 
1888 		read_lock(&mrt_lock);
1889 		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1890 		if (c) {
1891 			sr.pktcnt = c->mfc_un.res.pkt;
1892 			sr.bytecnt = c->mfc_un.res.bytes;
1893 			sr.wrong_if = c->mfc_un.res.wrong_if;
1894 			read_unlock(&mrt_lock);
1895 
1896 			if (copy_to_user(arg, &sr, sizeof(sr)))
1897 				return -EFAULT;
1898 			return 0;
1899 		}
1900 		read_unlock(&mrt_lock);
1901 		return -EADDRNOTAVAIL;
1902 	default:
1903 		return -ENOIOCTLCMD;
1904 	}
1905 }
1906 
1907 #ifdef CONFIG_COMPAT
1908 struct compat_sioc_sg_req6 {
1909 	struct sockaddr_in6 src;
1910 	struct sockaddr_in6 grp;
1911 	compat_ulong_t pktcnt;
1912 	compat_ulong_t bytecnt;
1913 	compat_ulong_t wrong_if;
1914 };
1915 
1916 struct compat_sioc_mif_req6 {
1917 	mifi_t	mifi;
1918 	compat_ulong_t icount;
1919 	compat_ulong_t ocount;
1920 	compat_ulong_t ibytes;
1921 	compat_ulong_t obytes;
1922 };
1923 
1924 int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1925 {
1926 	struct compat_sioc_sg_req6 sr;
1927 	struct compat_sioc_mif_req6 vr;
1928 	struct mif_device *vif;
1929 	struct mfc6_cache *c;
1930 	struct net *net = sock_net(sk);
1931 	struct mr6_table *mrt;
1932 
1933 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1934 	if (mrt == NULL)
1935 		return -ENOENT;
1936 
1937 	switch (cmd) {
1938 	case SIOCGETMIFCNT_IN6:
1939 		if (copy_from_user(&vr, arg, sizeof(vr)))
1940 			return -EFAULT;
1941 		if (vr.mifi >= mrt->maxvif)
1942 			return -EINVAL;
1943 		read_lock(&mrt_lock);
1944 		vif = &mrt->vif6_table[vr.mifi];
1945 		if (MIF_EXISTS(mrt, vr.mifi)) {
1946 			vr.icount = vif->pkt_in;
1947 			vr.ocount = vif->pkt_out;
1948 			vr.ibytes = vif->bytes_in;
1949 			vr.obytes = vif->bytes_out;
1950 			read_unlock(&mrt_lock);
1951 
1952 			if (copy_to_user(arg, &vr, sizeof(vr)))
1953 				return -EFAULT;
1954 			return 0;
1955 		}
1956 		read_unlock(&mrt_lock);
1957 		return -EADDRNOTAVAIL;
1958 	case SIOCGETSGCNT_IN6:
1959 		if (copy_from_user(&sr, arg, sizeof(sr)))
1960 			return -EFAULT;
1961 
1962 		read_lock(&mrt_lock);
1963 		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1964 		if (c) {
1965 			sr.pktcnt = c->mfc_un.res.pkt;
1966 			sr.bytecnt = c->mfc_un.res.bytes;
1967 			sr.wrong_if = c->mfc_un.res.wrong_if;
1968 			read_unlock(&mrt_lock);
1969 
1970 			if (copy_to_user(arg, &sr, sizeof(sr)))
1971 				return -EFAULT;
1972 			return 0;
1973 		}
1974 		read_unlock(&mrt_lock);
1975 		return -EADDRNOTAVAIL;
1976 	default:
1977 		return -ENOIOCTLCMD;
1978 	}
1979 }
1980 #endif
1981 
1982 static inline int ip6mr_forward2_finish(struct sk_buff *skb)
1983 {
1984 	IP6_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
1985 			 IPSTATS_MIB_OUTFORWDATAGRAMS);
1986 	IP6_ADD_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
1987 			 IPSTATS_MIB_OUTOCTETS, skb->len);
1988 	return dst_output(skb);
1989 }
1990 
1991 /*
1992  *	Processing handlers for ip6mr_forward
1993  */
1994 
1995 static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
1996 			  struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1997 {
1998 	struct ipv6hdr *ipv6h;
1999 	struct mif_device *vif = &mrt->vif6_table[vifi];
2000 	struct net_device *dev;
2001 	struct dst_entry *dst;
2002 	struct flowi6 fl6;
2003 
2004 	if (vif->dev == NULL)
2005 		goto out_free;
2006 
2007 #ifdef CONFIG_IPV6_PIMSM_V2
2008 	if (vif->flags & MIFF_REGISTER) {
2009 		vif->pkt_out++;
2010 		vif->bytes_out += skb->len;
2011 		vif->dev->stats.tx_bytes += skb->len;
2012 		vif->dev->stats.tx_packets++;
2013 		ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
2014 		goto out_free;
2015 	}
2016 #endif
2017 
2018 	ipv6h = ipv6_hdr(skb);
2019 
2020 	fl6 = (struct flowi6) {
2021 		.flowi6_oif = vif->link,
2022 		.daddr = ipv6h->daddr,
2023 	};
2024 
2025 	dst = ip6_route_output(net, NULL, &fl6);
2026 	if (dst->error) {
2027 		dst_release(dst);
2028 		goto out_free;
2029 	}
2030 
2031 	skb_dst_drop(skb);
2032 	skb_dst_set(skb, dst);
2033 
2034 	/*
2035 	 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
2036 	 * not only before forwarding, but after forwarding on all output
2037 	 * interfaces. It is clear, if mrouter runs a multicasting
2038 	 * program, it should receive packets not depending to what interface
2039 	 * program is joined.
2040 	 * If we will not make it, the program will have to join on all
2041 	 * interfaces. On the other hand, multihoming host (or router, but
2042 	 * not mrouter) cannot join to more than one interface - it will
2043 	 * result in receiving multiple packets.
2044 	 */
2045 	dev = vif->dev;
2046 	skb->dev = dev;
2047 	vif->pkt_out++;
2048 	vif->bytes_out += skb->len;
2049 
2050 	/* We are about to write */
2051 	/* XXX: extension headers? */
2052 	if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
2053 		goto out_free;
2054 
2055 	ipv6h = ipv6_hdr(skb);
2056 	ipv6h->hop_limit--;
2057 
2058 	IP6CB(skb)->flags |= IP6SKB_FORWARDED;
2059 
2060 	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dev,
2061 		       ip6mr_forward2_finish);
2062 
2063 out_free:
2064 	kfree_skb(skb);
2065 	return 0;
2066 }
2067 
2068 static int ip6mr_find_vif(struct mr6_table *mrt, struct net_device *dev)
2069 {
2070 	int ct;
2071 
2072 	for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
2073 		if (mrt->vif6_table[ct].dev == dev)
2074 			break;
2075 	}
2076 	return ct;
2077 }
2078 
2079 static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
2080 			   struct sk_buff *skb, struct mfc6_cache *cache)
2081 {
2082 	int psend = -1;
2083 	int vif, ct;
2084 	int true_vifi = ip6mr_find_vif(mrt, skb->dev);
2085 
2086 	vif = cache->mf6c_parent;
2087 	cache->mfc_un.res.pkt++;
2088 	cache->mfc_un.res.bytes += skb->len;
2089 
2090 	if (ipv6_addr_any(&cache->mf6c_origin) && true_vifi >= 0) {
2091 		struct mfc6_cache *cache_proxy;
2092 
2093 		/* For an (*,G) entry, we only check that the incomming
2094 		 * interface is part of the static tree.
2095 		 */
2096 		cache_proxy = ip6mr_cache_find_any_parent(mrt, vif);
2097 		if (cache_proxy &&
2098 		    cache_proxy->mfc_un.res.ttls[true_vifi] < 255)
2099 			goto forward;
2100 	}
2101 
2102 	/*
2103 	 * Wrong interface: drop packet and (maybe) send PIM assert.
2104 	 */
2105 	if (mrt->vif6_table[vif].dev != skb->dev) {
2106 		cache->mfc_un.res.wrong_if++;
2107 
2108 		if (true_vifi >= 0 && mrt->mroute_do_assert &&
2109 		    /* pimsm uses asserts, when switching from RPT to SPT,
2110 		       so that we cannot check that packet arrived on an oif.
2111 		       It is bad, but otherwise we would need to move pretty
2112 		       large chunk of pimd to kernel. Ough... --ANK
2113 		     */
2114 		    (mrt->mroute_do_pim ||
2115 		     cache->mfc_un.res.ttls[true_vifi] < 255) &&
2116 		    time_after(jiffies,
2117 			       cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
2118 			cache->mfc_un.res.last_assert = jiffies;
2119 			ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
2120 		}
2121 		goto dont_forward;
2122 	}
2123 
2124 forward:
2125 	mrt->vif6_table[vif].pkt_in++;
2126 	mrt->vif6_table[vif].bytes_in += skb->len;
2127 
2128 	/*
2129 	 *	Forward the frame
2130 	 */
2131 	if (ipv6_addr_any(&cache->mf6c_origin) &&
2132 	    ipv6_addr_any(&cache->mf6c_mcastgrp)) {
2133 		if (true_vifi >= 0 &&
2134 		    true_vifi != cache->mf6c_parent &&
2135 		    ipv6_hdr(skb)->hop_limit >
2136 				cache->mfc_un.res.ttls[cache->mf6c_parent]) {
2137 			/* It's an (*,*) entry and the packet is not coming from
2138 			 * the upstream: forward the packet to the upstream
2139 			 * only.
2140 			 */
2141 			psend = cache->mf6c_parent;
2142 			goto last_forward;
2143 		}
2144 		goto dont_forward;
2145 	}
2146 	for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
2147 		/* For (*,G) entry, don't forward to the incoming interface */
2148 		if ((!ipv6_addr_any(&cache->mf6c_origin) || ct != true_vifi) &&
2149 		    ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
2150 			if (psend != -1) {
2151 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2152 				if (skb2)
2153 					ip6mr_forward2(net, mrt, skb2, cache, psend);
2154 			}
2155 			psend = ct;
2156 		}
2157 	}
2158 last_forward:
2159 	if (psend != -1) {
2160 		ip6mr_forward2(net, mrt, skb, cache, psend);
2161 		return;
2162 	}
2163 
2164 dont_forward:
2165 	kfree_skb(skb);
2166 }
2167 
2168 
2169 /*
2170  *	Multicast packets for forwarding arrive here
2171  */
2172 
2173 int ip6_mr_input(struct sk_buff *skb)
2174 {
2175 	struct mfc6_cache *cache;
2176 	struct net *net = dev_net(skb->dev);
2177 	struct mr6_table *mrt;
2178 	struct flowi6 fl6 = {
2179 		.flowi6_iif	= skb->dev->ifindex,
2180 		.flowi6_mark	= skb->mark,
2181 	};
2182 	int err;
2183 
2184 	err = ip6mr_fib_lookup(net, &fl6, &mrt);
2185 	if (err < 0) {
2186 		kfree_skb(skb);
2187 		return err;
2188 	}
2189 
2190 	read_lock(&mrt_lock);
2191 	cache = ip6mr_cache_find(mrt,
2192 				 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2193 	if (cache == NULL) {
2194 		int vif = ip6mr_find_vif(mrt, skb->dev);
2195 
2196 		if (vif >= 0)
2197 			cache = ip6mr_cache_find_any(mrt,
2198 						     &ipv6_hdr(skb)->daddr,
2199 						     vif);
2200 	}
2201 
2202 	/*
2203 	 *	No usable cache entry
2204 	 */
2205 	if (cache == NULL) {
2206 		int vif;
2207 
2208 		vif = ip6mr_find_vif(mrt, skb->dev);
2209 		if (vif >= 0) {
2210 			int err = ip6mr_cache_unresolved(mrt, vif, skb);
2211 			read_unlock(&mrt_lock);
2212 
2213 			return err;
2214 		}
2215 		read_unlock(&mrt_lock);
2216 		kfree_skb(skb);
2217 		return -ENODEV;
2218 	}
2219 
2220 	ip6_mr_forward(net, mrt, skb, cache);
2221 
2222 	read_unlock(&mrt_lock);
2223 
2224 	return 0;
2225 }
2226 
2227 
2228 static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2229 			       struct mfc6_cache *c, struct rtmsg *rtm)
2230 {
2231 	int ct;
2232 	struct rtnexthop *nhp;
2233 	struct nlattr *mp_attr;
2234 	struct rta_mfc_stats mfcs;
2235 
2236 	/* If cache is unresolved, don't try to parse IIF and OIF */
2237 	if (c->mf6c_parent >= MAXMIFS)
2238 		return -ENOENT;
2239 
2240 	if (MIF_EXISTS(mrt, c->mf6c_parent) &&
2241 	    nla_put_u32(skb, RTA_IIF, mrt->vif6_table[c->mf6c_parent].dev->ifindex) < 0)
2242 		return -EMSGSIZE;
2243 	mp_attr = nla_nest_start(skb, RTA_MULTIPATH);
2244 	if (mp_attr == NULL)
2245 		return -EMSGSIZE;
2246 
2247 	for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
2248 		if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
2249 			nhp = nla_reserve_nohdr(skb, sizeof(*nhp));
2250 			if (nhp == NULL) {
2251 				nla_nest_cancel(skb, mp_attr);
2252 				return -EMSGSIZE;
2253 			}
2254 
2255 			nhp->rtnh_flags = 0;
2256 			nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
2257 			nhp->rtnh_ifindex = mrt->vif6_table[ct].dev->ifindex;
2258 			nhp->rtnh_len = sizeof(*nhp);
2259 		}
2260 	}
2261 
2262 	nla_nest_end(skb, mp_attr);
2263 
2264 	mfcs.mfcs_packets = c->mfc_un.res.pkt;
2265 	mfcs.mfcs_bytes = c->mfc_un.res.bytes;
2266 	mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
2267 	if (nla_put(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs) < 0)
2268 		return -EMSGSIZE;
2269 
2270 	rtm->rtm_type = RTN_MULTICAST;
2271 	return 1;
2272 }
2273 
2274 int ip6mr_get_route(struct net *net,
2275 		    struct sk_buff *skb, struct rtmsg *rtm, int nowait)
2276 {
2277 	int err;
2278 	struct mr6_table *mrt;
2279 	struct mfc6_cache *cache;
2280 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
2281 
2282 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
2283 	if (mrt == NULL)
2284 		return -ENOENT;
2285 
2286 	read_lock(&mrt_lock);
2287 	cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
2288 	if (!cache && skb->dev) {
2289 		int vif = ip6mr_find_vif(mrt, skb->dev);
2290 
2291 		if (vif >= 0)
2292 			cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr,
2293 						     vif);
2294 	}
2295 
2296 	if (!cache) {
2297 		struct sk_buff *skb2;
2298 		struct ipv6hdr *iph;
2299 		struct net_device *dev;
2300 		int vif;
2301 
2302 		if (nowait) {
2303 			read_unlock(&mrt_lock);
2304 			return -EAGAIN;
2305 		}
2306 
2307 		dev = skb->dev;
2308 		if (dev == NULL || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2309 			read_unlock(&mrt_lock);
2310 			return -ENODEV;
2311 		}
2312 
2313 		/* really correct? */
2314 		skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
2315 		if (!skb2) {
2316 			read_unlock(&mrt_lock);
2317 			return -ENOMEM;
2318 		}
2319 
2320 		skb_reset_transport_header(skb2);
2321 
2322 		skb_put(skb2, sizeof(struct ipv6hdr));
2323 		skb_reset_network_header(skb2);
2324 
2325 		iph = ipv6_hdr(skb2);
2326 		iph->version = 0;
2327 		iph->priority = 0;
2328 		iph->flow_lbl[0] = 0;
2329 		iph->flow_lbl[1] = 0;
2330 		iph->flow_lbl[2] = 0;
2331 		iph->payload_len = 0;
2332 		iph->nexthdr = IPPROTO_NONE;
2333 		iph->hop_limit = 0;
2334 		iph->saddr = rt->rt6i_src.addr;
2335 		iph->daddr = rt->rt6i_dst.addr;
2336 
2337 		err = ip6mr_cache_unresolved(mrt, vif, skb2);
2338 		read_unlock(&mrt_lock);
2339 
2340 		return err;
2341 	}
2342 
2343 	if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
2344 		cache->mfc_flags |= MFC_NOTIFY;
2345 
2346 	err = __ip6mr_fill_mroute(mrt, skb, cache, rtm);
2347 	read_unlock(&mrt_lock);
2348 	return err;
2349 }
2350 
2351 static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2352 			     u32 portid, u32 seq, struct mfc6_cache *c, int cmd,
2353 			     int flags)
2354 {
2355 	struct nlmsghdr *nlh;
2356 	struct rtmsg *rtm;
2357 	int err;
2358 
2359 	nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags);
2360 	if (nlh == NULL)
2361 		return -EMSGSIZE;
2362 
2363 	rtm = nlmsg_data(nlh);
2364 	rtm->rtm_family   = RTNL_FAMILY_IP6MR;
2365 	rtm->rtm_dst_len  = 128;
2366 	rtm->rtm_src_len  = 128;
2367 	rtm->rtm_tos      = 0;
2368 	rtm->rtm_table    = mrt->id;
2369 	if (nla_put_u32(skb, RTA_TABLE, mrt->id))
2370 		goto nla_put_failure;
2371 	rtm->rtm_type = RTN_MULTICAST;
2372 	rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
2373 	if (c->mfc_flags & MFC_STATIC)
2374 		rtm->rtm_protocol = RTPROT_STATIC;
2375 	else
2376 		rtm->rtm_protocol = RTPROT_MROUTED;
2377 	rtm->rtm_flags    = 0;
2378 
2379 	if (nla_put(skb, RTA_SRC, 16, &c->mf6c_origin) ||
2380 	    nla_put(skb, RTA_DST, 16, &c->mf6c_mcastgrp))
2381 		goto nla_put_failure;
2382 	err = __ip6mr_fill_mroute(mrt, skb, c, rtm);
2383 	/* do not break the dump if cache is unresolved */
2384 	if (err < 0 && err != -ENOENT)
2385 		goto nla_put_failure;
2386 
2387 	return nlmsg_end(skb, nlh);
2388 
2389 nla_put_failure:
2390 	nlmsg_cancel(skb, nlh);
2391 	return -EMSGSIZE;
2392 }
2393 
2394 static int mr6_msgsize(bool unresolved, int maxvif)
2395 {
2396 	size_t len =
2397 		NLMSG_ALIGN(sizeof(struct rtmsg))
2398 		+ nla_total_size(4)	/* RTA_TABLE */
2399 		+ nla_total_size(sizeof(struct in6_addr))	/* RTA_SRC */
2400 		+ nla_total_size(sizeof(struct in6_addr))	/* RTA_DST */
2401 		;
2402 
2403 	if (!unresolved)
2404 		len = len
2405 		      + nla_total_size(4)	/* RTA_IIF */
2406 		      + nla_total_size(0)	/* RTA_MULTIPATH */
2407 		      + maxvif * NLA_ALIGN(sizeof(struct rtnexthop))
2408 						/* RTA_MFC_STATS */
2409 		      + nla_total_size(sizeof(struct rta_mfc_stats))
2410 		;
2411 
2412 	return len;
2413 }
2414 
2415 static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
2416 			      int cmd)
2417 {
2418 	struct net *net = read_pnet(&mrt->net);
2419 	struct sk_buff *skb;
2420 	int err = -ENOBUFS;
2421 
2422 	skb = nlmsg_new(mr6_msgsize(mfc->mf6c_parent >= MAXMIFS, mrt->maxvif),
2423 			GFP_ATOMIC);
2424 	if (skb == NULL)
2425 		goto errout;
2426 
2427 	err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0);
2428 	if (err < 0)
2429 		goto errout;
2430 
2431 	rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC);
2432 	return;
2433 
2434 errout:
2435 	kfree_skb(skb);
2436 	if (err < 0)
2437 		rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
2438 }
2439 
2440 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2441 {
2442 	struct net *net = sock_net(skb->sk);
2443 	struct mr6_table *mrt;
2444 	struct mfc6_cache *mfc;
2445 	unsigned int t = 0, s_t;
2446 	unsigned int h = 0, s_h;
2447 	unsigned int e = 0, s_e;
2448 
2449 	s_t = cb->args[0];
2450 	s_h = cb->args[1];
2451 	s_e = cb->args[2];
2452 
2453 	read_lock(&mrt_lock);
2454 	ip6mr_for_each_table(mrt, net) {
2455 		if (t < s_t)
2456 			goto next_table;
2457 		if (t > s_t)
2458 			s_h = 0;
2459 		for (h = s_h; h < MFC6_LINES; h++) {
2460 			list_for_each_entry(mfc, &mrt->mfc6_cache_array[h], list) {
2461 				if (e < s_e)
2462 					goto next_entry;
2463 				if (ip6mr_fill_mroute(mrt, skb,
2464 						      NETLINK_CB(cb->skb).portid,
2465 						      cb->nlh->nlmsg_seq,
2466 						      mfc, RTM_NEWROUTE,
2467 						      NLM_F_MULTI) < 0)
2468 					goto done;
2469 next_entry:
2470 				e++;
2471 			}
2472 			e = s_e = 0;
2473 		}
2474 		spin_lock_bh(&mfc_unres_lock);
2475 		list_for_each_entry(mfc, &mrt->mfc6_unres_queue, list) {
2476 			if (e < s_e)
2477 				goto next_entry2;
2478 			if (ip6mr_fill_mroute(mrt, skb,
2479 					      NETLINK_CB(cb->skb).portid,
2480 					      cb->nlh->nlmsg_seq,
2481 					      mfc, RTM_NEWROUTE,
2482 					      NLM_F_MULTI) < 0) {
2483 				spin_unlock_bh(&mfc_unres_lock);
2484 				goto done;
2485 			}
2486 next_entry2:
2487 			e++;
2488 		}
2489 		spin_unlock_bh(&mfc_unres_lock);
2490 		e = s_e = 0;
2491 		s_h = 0;
2492 next_table:
2493 		t++;
2494 	}
2495 done:
2496 	read_unlock(&mrt_lock);
2497 
2498 	cb->args[2] = e;
2499 	cb->args[1] = h;
2500 	cb->args[0] = t;
2501 
2502 	return skb->len;
2503 }
2504