xref: /openbmc/linux/net/ipv6/ip6mr.c (revision 80ecbd24)
1 /*
2  *	Linux IPv6 multicast routing support for BSD pim6sd
3  *	Based on net/ipv4/ipmr.c.
4  *
5  *	(c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
6  *		LSIIT Laboratory, Strasbourg, France
7  *	(c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
8  *		6WIND, Paris, France
9  *	Copyright (C)2007,2008 USAGI/WIDE Project
10  *		YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
11  *
12  *	This program is free software; you can redistribute it and/or
13  *	modify it under the terms of the GNU General Public License
14  *	as published by the Free Software Foundation; either version
15  *	2 of the License, or (at your option) any later version.
16  *
17  */
18 
19 #include <asm/uaccess.h>
20 #include <linux/types.h>
21 #include <linux/sched.h>
22 #include <linux/errno.h>
23 #include <linux/timer.h>
24 #include <linux/mm.h>
25 #include <linux/kernel.h>
26 #include <linux/fcntl.h>
27 #include <linux/stat.h>
28 #include <linux/socket.h>
29 #include <linux/inet.h>
30 #include <linux/netdevice.h>
31 #include <linux/inetdevice.h>
32 #include <linux/proc_fs.h>
33 #include <linux/seq_file.h>
34 #include <linux/init.h>
35 #include <linux/slab.h>
36 #include <linux/compat.h>
37 #include <net/protocol.h>
38 #include <linux/skbuff.h>
39 #include <net/sock.h>
40 #include <net/raw.h>
41 #include <linux/notifier.h>
42 #include <linux/if_arp.h>
43 #include <net/checksum.h>
44 #include <net/netlink.h>
45 #include <net/fib_rules.h>
46 
47 #include <net/ipv6.h>
48 #include <net/ip6_route.h>
49 #include <linux/mroute6.h>
50 #include <linux/pim.h>
51 #include <net/addrconf.h>
52 #include <linux/netfilter_ipv6.h>
53 #include <linux/export.h>
54 #include <net/ip6_checksum.h>
55 #include <linux/netconf.h>
56 
57 struct mr6_table {
58 	struct list_head	list;
59 #ifdef CONFIG_NET_NS
60 	struct net		*net;
61 #endif
62 	u32			id;
63 	struct sock		*mroute6_sk;
64 	struct timer_list	ipmr_expire_timer;
65 	struct list_head	mfc6_unres_queue;
66 	struct list_head	mfc6_cache_array[MFC6_LINES];
67 	struct mif_device	vif6_table[MAXMIFS];
68 	int			maxvif;
69 	atomic_t		cache_resolve_queue_len;
70 	bool			mroute_do_assert;
71 	bool			mroute_do_pim;
72 #ifdef CONFIG_IPV6_PIMSM_V2
73 	int			mroute_reg_vif_num;
74 #endif
75 };
76 
77 struct ip6mr_rule {
78 	struct fib_rule		common;
79 };
80 
81 struct ip6mr_result {
82 	struct mr6_table	*mrt;
83 };
84 
85 /* Big lock, protecting vif table, mrt cache and mroute socket state.
86    Note that the changes are semaphored via rtnl_lock.
87  */
88 
89 static DEFINE_RWLOCK(mrt_lock);
90 
91 /*
92  *	Multicast router control variables
93  */
94 
95 #define MIF_EXISTS(_mrt, _idx) ((_mrt)->vif6_table[_idx].dev != NULL)
96 
97 /* Special spinlock for queue of unresolved entries */
98 static DEFINE_SPINLOCK(mfc_unres_lock);
99 
100 /* We return to original Alan's scheme. Hash table of resolved
101    entries is changed only in process context and protected
102    with weak lock mrt_lock. Queue of unresolved entries is protected
103    with strong spinlock mfc_unres_lock.
104 
105    In this case data path is free of exclusive locks at all.
106  */
107 
108 static struct kmem_cache *mrt_cachep __read_mostly;
109 
110 static struct mr6_table *ip6mr_new_table(struct net *net, u32 id);
111 static void ip6mr_free_table(struct mr6_table *mrt);
112 
113 static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
114 			  struct sk_buff *skb, struct mfc6_cache *cache);
115 static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
116 			      mifi_t mifi, int assert);
117 static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
118 			       struct mfc6_cache *c, struct rtmsg *rtm);
119 static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
120 			      int cmd);
121 static int ip6mr_rtm_dumproute(struct sk_buff *skb,
122 			       struct netlink_callback *cb);
123 static void mroute_clean_tables(struct mr6_table *mrt);
124 static void ipmr_expire_process(unsigned long arg);
125 
126 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
127 #define ip6mr_for_each_table(mrt, net) \
128 	list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
129 
130 static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
131 {
132 	struct mr6_table *mrt;
133 
134 	ip6mr_for_each_table(mrt, net) {
135 		if (mrt->id == id)
136 			return mrt;
137 	}
138 	return NULL;
139 }
140 
141 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
142 			    struct mr6_table **mrt)
143 {
144 	struct ip6mr_result res;
145 	struct fib_lookup_arg arg = { .result = &res, };
146 	int err;
147 
148 	err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
149 			       flowi6_to_flowi(flp6), 0, &arg);
150 	if (err < 0)
151 		return err;
152 	*mrt = res.mrt;
153 	return 0;
154 }
155 
156 static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
157 			     int flags, struct fib_lookup_arg *arg)
158 {
159 	struct ip6mr_result *res = arg->result;
160 	struct mr6_table *mrt;
161 
162 	switch (rule->action) {
163 	case FR_ACT_TO_TBL:
164 		break;
165 	case FR_ACT_UNREACHABLE:
166 		return -ENETUNREACH;
167 	case FR_ACT_PROHIBIT:
168 		return -EACCES;
169 	case FR_ACT_BLACKHOLE:
170 	default:
171 		return -EINVAL;
172 	}
173 
174 	mrt = ip6mr_get_table(rule->fr_net, rule->table);
175 	if (mrt == NULL)
176 		return -EAGAIN;
177 	res->mrt = mrt;
178 	return 0;
179 }
180 
181 static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
182 {
183 	return 1;
184 }
185 
186 static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = {
187 	FRA_GENERIC_POLICY,
188 };
189 
190 static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
191 				struct fib_rule_hdr *frh, struct nlattr **tb)
192 {
193 	return 0;
194 }
195 
196 static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
197 			      struct nlattr **tb)
198 {
199 	return 1;
200 }
201 
202 static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
203 			   struct fib_rule_hdr *frh)
204 {
205 	frh->dst_len = 0;
206 	frh->src_len = 0;
207 	frh->tos     = 0;
208 	return 0;
209 }
210 
211 static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
212 	.family		= RTNL_FAMILY_IP6MR,
213 	.rule_size	= sizeof(struct ip6mr_rule),
214 	.addr_size	= sizeof(struct in6_addr),
215 	.action		= ip6mr_rule_action,
216 	.match		= ip6mr_rule_match,
217 	.configure	= ip6mr_rule_configure,
218 	.compare	= ip6mr_rule_compare,
219 	.default_pref	= fib_default_rule_pref,
220 	.fill		= ip6mr_rule_fill,
221 	.nlgroup	= RTNLGRP_IPV6_RULE,
222 	.policy		= ip6mr_rule_policy,
223 	.owner		= THIS_MODULE,
224 };
225 
226 static int __net_init ip6mr_rules_init(struct net *net)
227 {
228 	struct fib_rules_ops *ops;
229 	struct mr6_table *mrt;
230 	int err;
231 
232 	ops = fib_rules_register(&ip6mr_rules_ops_template, net);
233 	if (IS_ERR(ops))
234 		return PTR_ERR(ops);
235 
236 	INIT_LIST_HEAD(&net->ipv6.mr6_tables);
237 
238 	mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
239 	if (mrt == NULL) {
240 		err = -ENOMEM;
241 		goto err1;
242 	}
243 
244 	err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
245 	if (err < 0)
246 		goto err2;
247 
248 	net->ipv6.mr6_rules_ops = ops;
249 	return 0;
250 
251 err2:
252 	kfree(mrt);
253 err1:
254 	fib_rules_unregister(ops);
255 	return err;
256 }
257 
258 static void __net_exit ip6mr_rules_exit(struct net *net)
259 {
260 	struct mr6_table *mrt, *next;
261 
262 	rtnl_lock();
263 	list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
264 		list_del(&mrt->list);
265 		ip6mr_free_table(mrt);
266 	}
267 	rtnl_unlock();
268 	fib_rules_unregister(net->ipv6.mr6_rules_ops);
269 }
270 #else
271 #define ip6mr_for_each_table(mrt, net) \
272 	for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
273 
274 static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
275 {
276 	return net->ipv6.mrt6;
277 }
278 
279 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
280 			    struct mr6_table **mrt)
281 {
282 	*mrt = net->ipv6.mrt6;
283 	return 0;
284 }
285 
286 static int __net_init ip6mr_rules_init(struct net *net)
287 {
288 	net->ipv6.mrt6 = ip6mr_new_table(net, RT6_TABLE_DFLT);
289 	return net->ipv6.mrt6 ? 0 : -ENOMEM;
290 }
291 
292 static void __net_exit ip6mr_rules_exit(struct net *net)
293 {
294 	rtnl_lock();
295 	ip6mr_free_table(net->ipv6.mrt6);
296 	net->ipv6.mrt6 = NULL;
297 	rtnl_unlock();
298 }
299 #endif
300 
301 static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
302 {
303 	struct mr6_table *mrt;
304 	unsigned int i;
305 
306 	mrt = ip6mr_get_table(net, id);
307 	if (mrt != NULL)
308 		return mrt;
309 
310 	mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
311 	if (mrt == NULL)
312 		return NULL;
313 	mrt->id = id;
314 	write_pnet(&mrt->net, net);
315 
316 	/* Forwarding cache */
317 	for (i = 0; i < MFC6_LINES; i++)
318 		INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]);
319 
320 	INIT_LIST_HEAD(&mrt->mfc6_unres_queue);
321 
322 	setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
323 		    (unsigned long)mrt);
324 
325 #ifdef CONFIG_IPV6_PIMSM_V2
326 	mrt->mroute_reg_vif_num = -1;
327 #endif
328 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
329 	list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
330 #endif
331 	return mrt;
332 }
333 
334 static void ip6mr_free_table(struct mr6_table *mrt)
335 {
336 	del_timer(&mrt->ipmr_expire_timer);
337 	mroute_clean_tables(mrt);
338 	kfree(mrt);
339 }
340 
341 #ifdef CONFIG_PROC_FS
342 
343 struct ipmr_mfc_iter {
344 	struct seq_net_private p;
345 	struct mr6_table *mrt;
346 	struct list_head *cache;
347 	int ct;
348 };
349 
350 
351 static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
352 					   struct ipmr_mfc_iter *it, loff_t pos)
353 {
354 	struct mr6_table *mrt = it->mrt;
355 	struct mfc6_cache *mfc;
356 
357 	read_lock(&mrt_lock);
358 	for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) {
359 		it->cache = &mrt->mfc6_cache_array[it->ct];
360 		list_for_each_entry(mfc, it->cache, list)
361 			if (pos-- == 0)
362 				return mfc;
363 	}
364 	read_unlock(&mrt_lock);
365 
366 	spin_lock_bh(&mfc_unres_lock);
367 	it->cache = &mrt->mfc6_unres_queue;
368 	list_for_each_entry(mfc, it->cache, list)
369 		if (pos-- == 0)
370 			return mfc;
371 	spin_unlock_bh(&mfc_unres_lock);
372 
373 	it->cache = NULL;
374 	return NULL;
375 }
376 
377 /*
378  *	The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
379  */
380 
381 struct ipmr_vif_iter {
382 	struct seq_net_private p;
383 	struct mr6_table *mrt;
384 	int ct;
385 };
386 
387 static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
388 					    struct ipmr_vif_iter *iter,
389 					    loff_t pos)
390 {
391 	struct mr6_table *mrt = iter->mrt;
392 
393 	for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
394 		if (!MIF_EXISTS(mrt, iter->ct))
395 			continue;
396 		if (pos-- == 0)
397 			return &mrt->vif6_table[iter->ct];
398 	}
399 	return NULL;
400 }
401 
402 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
403 	__acquires(mrt_lock)
404 {
405 	struct ipmr_vif_iter *iter = seq->private;
406 	struct net *net = seq_file_net(seq);
407 	struct mr6_table *mrt;
408 
409 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
410 	if (mrt == NULL)
411 		return ERR_PTR(-ENOENT);
412 
413 	iter->mrt = mrt;
414 
415 	read_lock(&mrt_lock);
416 	return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1)
417 		: SEQ_START_TOKEN;
418 }
419 
420 static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
421 {
422 	struct ipmr_vif_iter *iter = seq->private;
423 	struct net *net = seq_file_net(seq);
424 	struct mr6_table *mrt = iter->mrt;
425 
426 	++*pos;
427 	if (v == SEQ_START_TOKEN)
428 		return ip6mr_vif_seq_idx(net, iter, 0);
429 
430 	while (++iter->ct < mrt->maxvif) {
431 		if (!MIF_EXISTS(mrt, iter->ct))
432 			continue;
433 		return &mrt->vif6_table[iter->ct];
434 	}
435 	return NULL;
436 }
437 
438 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
439 	__releases(mrt_lock)
440 {
441 	read_unlock(&mrt_lock);
442 }
443 
444 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
445 {
446 	struct ipmr_vif_iter *iter = seq->private;
447 	struct mr6_table *mrt = iter->mrt;
448 
449 	if (v == SEQ_START_TOKEN) {
450 		seq_puts(seq,
451 			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
452 	} else {
453 		const struct mif_device *vif = v;
454 		const char *name = vif->dev ? vif->dev->name : "none";
455 
456 		seq_printf(seq,
457 			   "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
458 			   vif - mrt->vif6_table,
459 			   name, vif->bytes_in, vif->pkt_in,
460 			   vif->bytes_out, vif->pkt_out,
461 			   vif->flags);
462 	}
463 	return 0;
464 }
465 
466 static const struct seq_operations ip6mr_vif_seq_ops = {
467 	.start = ip6mr_vif_seq_start,
468 	.next  = ip6mr_vif_seq_next,
469 	.stop  = ip6mr_vif_seq_stop,
470 	.show  = ip6mr_vif_seq_show,
471 };
472 
473 static int ip6mr_vif_open(struct inode *inode, struct file *file)
474 {
475 	return seq_open_net(inode, file, &ip6mr_vif_seq_ops,
476 			    sizeof(struct ipmr_vif_iter));
477 }
478 
479 static const struct file_operations ip6mr_vif_fops = {
480 	.owner	 = THIS_MODULE,
481 	.open    = ip6mr_vif_open,
482 	.read    = seq_read,
483 	.llseek  = seq_lseek,
484 	.release = seq_release_net,
485 };
486 
487 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
488 {
489 	struct ipmr_mfc_iter *it = seq->private;
490 	struct net *net = seq_file_net(seq);
491 	struct mr6_table *mrt;
492 
493 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
494 	if (mrt == NULL)
495 		return ERR_PTR(-ENOENT);
496 
497 	it->mrt = mrt;
498 	return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
499 		: SEQ_START_TOKEN;
500 }
501 
502 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
503 {
504 	struct mfc6_cache *mfc = v;
505 	struct ipmr_mfc_iter *it = seq->private;
506 	struct net *net = seq_file_net(seq);
507 	struct mr6_table *mrt = it->mrt;
508 
509 	++*pos;
510 
511 	if (v == SEQ_START_TOKEN)
512 		return ipmr_mfc_seq_idx(net, seq->private, 0);
513 
514 	if (mfc->list.next != it->cache)
515 		return list_entry(mfc->list.next, struct mfc6_cache, list);
516 
517 	if (it->cache == &mrt->mfc6_unres_queue)
518 		goto end_of_list;
519 
520 	BUG_ON(it->cache != &mrt->mfc6_cache_array[it->ct]);
521 
522 	while (++it->ct < MFC6_LINES) {
523 		it->cache = &mrt->mfc6_cache_array[it->ct];
524 		if (list_empty(it->cache))
525 			continue;
526 		return list_first_entry(it->cache, struct mfc6_cache, list);
527 	}
528 
529 	/* exhausted cache_array, show unresolved */
530 	read_unlock(&mrt_lock);
531 	it->cache = &mrt->mfc6_unres_queue;
532 	it->ct = 0;
533 
534 	spin_lock_bh(&mfc_unres_lock);
535 	if (!list_empty(it->cache))
536 		return list_first_entry(it->cache, struct mfc6_cache, list);
537 
538  end_of_list:
539 	spin_unlock_bh(&mfc_unres_lock);
540 	it->cache = NULL;
541 
542 	return NULL;
543 }
544 
545 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
546 {
547 	struct ipmr_mfc_iter *it = seq->private;
548 	struct mr6_table *mrt = it->mrt;
549 
550 	if (it->cache == &mrt->mfc6_unres_queue)
551 		spin_unlock_bh(&mfc_unres_lock);
552 	else if (it->cache == mrt->mfc6_cache_array)
553 		read_unlock(&mrt_lock);
554 }
555 
556 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
557 {
558 	int n;
559 
560 	if (v == SEQ_START_TOKEN) {
561 		seq_puts(seq,
562 			 "Group                            "
563 			 "Origin                           "
564 			 "Iif      Pkts  Bytes     Wrong  Oifs\n");
565 	} else {
566 		const struct mfc6_cache *mfc = v;
567 		const struct ipmr_mfc_iter *it = seq->private;
568 		struct mr6_table *mrt = it->mrt;
569 
570 		seq_printf(seq, "%pI6 %pI6 %-3hd",
571 			   &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
572 			   mfc->mf6c_parent);
573 
574 		if (it->cache != &mrt->mfc6_unres_queue) {
575 			seq_printf(seq, " %8lu %8lu %8lu",
576 				   mfc->mfc_un.res.pkt,
577 				   mfc->mfc_un.res.bytes,
578 				   mfc->mfc_un.res.wrong_if);
579 			for (n = mfc->mfc_un.res.minvif;
580 			     n < mfc->mfc_un.res.maxvif; n++) {
581 				if (MIF_EXISTS(mrt, n) &&
582 				    mfc->mfc_un.res.ttls[n] < 255)
583 					seq_printf(seq,
584 						   " %2d:%-3d",
585 						   n, mfc->mfc_un.res.ttls[n]);
586 			}
587 		} else {
588 			/* unresolved mfc_caches don't contain
589 			 * pkt, bytes and wrong_if values
590 			 */
591 			seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
592 		}
593 		seq_putc(seq, '\n');
594 	}
595 	return 0;
596 }
597 
598 static const struct seq_operations ipmr_mfc_seq_ops = {
599 	.start = ipmr_mfc_seq_start,
600 	.next  = ipmr_mfc_seq_next,
601 	.stop  = ipmr_mfc_seq_stop,
602 	.show  = ipmr_mfc_seq_show,
603 };
604 
605 static int ipmr_mfc_open(struct inode *inode, struct file *file)
606 {
607 	return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
608 			    sizeof(struct ipmr_mfc_iter));
609 }
610 
611 static const struct file_operations ip6mr_mfc_fops = {
612 	.owner	 = THIS_MODULE,
613 	.open    = ipmr_mfc_open,
614 	.read    = seq_read,
615 	.llseek  = seq_lseek,
616 	.release = seq_release_net,
617 };
618 #endif
619 
620 #ifdef CONFIG_IPV6_PIMSM_V2
621 
622 static int pim6_rcv(struct sk_buff *skb)
623 {
624 	struct pimreghdr *pim;
625 	struct ipv6hdr   *encap;
626 	struct net_device  *reg_dev = NULL;
627 	struct net *net = dev_net(skb->dev);
628 	struct mr6_table *mrt;
629 	struct flowi6 fl6 = {
630 		.flowi6_iif	= skb->dev->ifindex,
631 		.flowi6_mark	= skb->mark,
632 	};
633 	int reg_vif_num;
634 
635 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
636 		goto drop;
637 
638 	pim = (struct pimreghdr *)skb_transport_header(skb);
639 	if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
640 	    (pim->flags & PIM_NULL_REGISTER) ||
641 	    (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
642 			     sizeof(*pim), IPPROTO_PIM,
643 			     csum_partial((void *)pim, sizeof(*pim), 0)) &&
644 	     csum_fold(skb_checksum(skb, 0, skb->len, 0))))
645 		goto drop;
646 
647 	/* check if the inner packet is destined to mcast group */
648 	encap = (struct ipv6hdr *)(skb_transport_header(skb) +
649 				   sizeof(*pim));
650 
651 	if (!ipv6_addr_is_multicast(&encap->daddr) ||
652 	    encap->payload_len == 0 ||
653 	    ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
654 		goto drop;
655 
656 	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
657 		goto drop;
658 	reg_vif_num = mrt->mroute_reg_vif_num;
659 
660 	read_lock(&mrt_lock);
661 	if (reg_vif_num >= 0)
662 		reg_dev = mrt->vif6_table[reg_vif_num].dev;
663 	if (reg_dev)
664 		dev_hold(reg_dev);
665 	read_unlock(&mrt_lock);
666 
667 	if (reg_dev == NULL)
668 		goto drop;
669 
670 	skb->mac_header = skb->network_header;
671 	skb_pull(skb, (u8 *)encap - skb->data);
672 	skb_reset_network_header(skb);
673 	skb->protocol = htons(ETH_P_IPV6);
674 	skb->ip_summed = CHECKSUM_NONE;
675 	skb->pkt_type = PACKET_HOST;
676 
677 	skb_tunnel_rx(skb, reg_dev);
678 
679 	netif_rx(skb);
680 
681 	dev_put(reg_dev);
682 	return 0;
683  drop:
684 	kfree_skb(skb);
685 	return 0;
686 }
687 
688 static const struct inet6_protocol pim6_protocol = {
689 	.handler	=	pim6_rcv,
690 };
691 
692 /* Service routines creating virtual interfaces: PIMREG */
693 
694 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
695 				      struct net_device *dev)
696 {
697 	struct net *net = dev_net(dev);
698 	struct mr6_table *mrt;
699 	struct flowi6 fl6 = {
700 		.flowi6_oif	= dev->ifindex,
701 		.flowi6_iif	= skb->skb_iif,
702 		.flowi6_mark	= skb->mark,
703 	};
704 	int err;
705 
706 	err = ip6mr_fib_lookup(net, &fl6, &mrt);
707 	if (err < 0) {
708 		kfree_skb(skb);
709 		return err;
710 	}
711 
712 	read_lock(&mrt_lock);
713 	dev->stats.tx_bytes += skb->len;
714 	dev->stats.tx_packets++;
715 	ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
716 	read_unlock(&mrt_lock);
717 	kfree_skb(skb);
718 	return NETDEV_TX_OK;
719 }
720 
721 static const struct net_device_ops reg_vif_netdev_ops = {
722 	.ndo_start_xmit	= reg_vif_xmit,
723 };
724 
725 static void reg_vif_setup(struct net_device *dev)
726 {
727 	dev->type		= ARPHRD_PIMREG;
728 	dev->mtu		= 1500 - sizeof(struct ipv6hdr) - 8;
729 	dev->flags		= IFF_NOARP;
730 	dev->netdev_ops		= &reg_vif_netdev_ops;
731 	dev->destructor		= free_netdev;
732 	dev->features		|= NETIF_F_NETNS_LOCAL;
733 }
734 
735 static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt)
736 {
737 	struct net_device *dev;
738 	char name[IFNAMSIZ];
739 
740 	if (mrt->id == RT6_TABLE_DFLT)
741 		sprintf(name, "pim6reg");
742 	else
743 		sprintf(name, "pim6reg%u", mrt->id);
744 
745 	dev = alloc_netdev(0, name, reg_vif_setup);
746 	if (dev == NULL)
747 		return NULL;
748 
749 	dev_net_set(dev, net);
750 
751 	if (register_netdevice(dev)) {
752 		free_netdev(dev);
753 		return NULL;
754 	}
755 	dev->iflink = 0;
756 
757 	if (dev_open(dev))
758 		goto failure;
759 
760 	dev_hold(dev);
761 	return dev;
762 
763 failure:
764 	/* allow the register to be completed before unregistering. */
765 	rtnl_unlock();
766 	rtnl_lock();
767 
768 	unregister_netdevice(dev);
769 	return NULL;
770 }
771 #endif
772 
773 /*
774  *	Delete a VIF entry
775  */
776 
777 static int mif6_delete(struct mr6_table *mrt, int vifi, struct list_head *head)
778 {
779 	struct mif_device *v;
780 	struct net_device *dev;
781 	struct inet6_dev *in6_dev;
782 
783 	if (vifi < 0 || vifi >= mrt->maxvif)
784 		return -EADDRNOTAVAIL;
785 
786 	v = &mrt->vif6_table[vifi];
787 
788 	write_lock_bh(&mrt_lock);
789 	dev = v->dev;
790 	v->dev = NULL;
791 
792 	if (!dev) {
793 		write_unlock_bh(&mrt_lock);
794 		return -EADDRNOTAVAIL;
795 	}
796 
797 #ifdef CONFIG_IPV6_PIMSM_V2
798 	if (vifi == mrt->mroute_reg_vif_num)
799 		mrt->mroute_reg_vif_num = -1;
800 #endif
801 
802 	if (vifi + 1 == mrt->maxvif) {
803 		int tmp;
804 		for (tmp = vifi - 1; tmp >= 0; tmp--) {
805 			if (MIF_EXISTS(mrt, tmp))
806 				break;
807 		}
808 		mrt->maxvif = tmp + 1;
809 	}
810 
811 	write_unlock_bh(&mrt_lock);
812 
813 	dev_set_allmulti(dev, -1);
814 
815 	in6_dev = __in6_dev_get(dev);
816 	if (in6_dev) {
817 		in6_dev->cnf.mc_forwarding--;
818 		inet6_netconf_notify_devconf(dev_net(dev),
819 					     NETCONFA_MC_FORWARDING,
820 					     dev->ifindex, &in6_dev->cnf);
821 	}
822 
823 	if (v->flags & MIFF_REGISTER)
824 		unregister_netdevice_queue(dev, head);
825 
826 	dev_put(dev);
827 	return 0;
828 }
829 
830 static inline void ip6mr_cache_free(struct mfc6_cache *c)
831 {
832 	kmem_cache_free(mrt_cachep, c);
833 }
834 
835 /* Destroy an unresolved cache entry, killing queued skbs
836    and reporting error to netlink readers.
837  */
838 
839 static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
840 {
841 	struct net *net = read_pnet(&mrt->net);
842 	struct sk_buff *skb;
843 
844 	atomic_dec(&mrt->cache_resolve_queue_len);
845 
846 	while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
847 		if (ipv6_hdr(skb)->version == 0) {
848 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
849 			nlh->nlmsg_type = NLMSG_ERROR;
850 			nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
851 			skb_trim(skb, nlh->nlmsg_len);
852 			((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT;
853 			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
854 		} else
855 			kfree_skb(skb);
856 	}
857 
858 	ip6mr_cache_free(c);
859 }
860 
861 
862 /* Timer process for all the unresolved queue. */
863 
864 static void ipmr_do_expire_process(struct mr6_table *mrt)
865 {
866 	unsigned long now = jiffies;
867 	unsigned long expires = 10 * HZ;
868 	struct mfc6_cache *c, *next;
869 
870 	list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
871 		if (time_after(c->mfc_un.unres.expires, now)) {
872 			/* not yet... */
873 			unsigned long interval = c->mfc_un.unres.expires - now;
874 			if (interval < expires)
875 				expires = interval;
876 			continue;
877 		}
878 
879 		list_del(&c->list);
880 		mr6_netlink_event(mrt, c, RTM_DELROUTE);
881 		ip6mr_destroy_unres(mrt, c);
882 	}
883 
884 	if (!list_empty(&mrt->mfc6_unres_queue))
885 		mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
886 }
887 
888 static void ipmr_expire_process(unsigned long arg)
889 {
890 	struct mr6_table *mrt = (struct mr6_table *)arg;
891 
892 	if (!spin_trylock(&mfc_unres_lock)) {
893 		mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
894 		return;
895 	}
896 
897 	if (!list_empty(&mrt->mfc6_unres_queue))
898 		ipmr_do_expire_process(mrt);
899 
900 	spin_unlock(&mfc_unres_lock);
901 }
902 
903 /* Fill oifs list. It is called under write locked mrt_lock. */
904 
905 static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *cache,
906 				    unsigned char *ttls)
907 {
908 	int vifi;
909 
910 	cache->mfc_un.res.minvif = MAXMIFS;
911 	cache->mfc_un.res.maxvif = 0;
912 	memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
913 
914 	for (vifi = 0; vifi < mrt->maxvif; vifi++) {
915 		if (MIF_EXISTS(mrt, vifi) &&
916 		    ttls[vifi] && ttls[vifi] < 255) {
917 			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
918 			if (cache->mfc_un.res.minvif > vifi)
919 				cache->mfc_un.res.minvif = vifi;
920 			if (cache->mfc_un.res.maxvif <= vifi)
921 				cache->mfc_un.res.maxvif = vifi + 1;
922 		}
923 	}
924 }
925 
926 static int mif6_add(struct net *net, struct mr6_table *mrt,
927 		    struct mif6ctl *vifc, int mrtsock)
928 {
929 	int vifi = vifc->mif6c_mifi;
930 	struct mif_device *v = &mrt->vif6_table[vifi];
931 	struct net_device *dev;
932 	struct inet6_dev *in6_dev;
933 	int err;
934 
935 	/* Is vif busy ? */
936 	if (MIF_EXISTS(mrt, vifi))
937 		return -EADDRINUSE;
938 
939 	switch (vifc->mif6c_flags) {
940 #ifdef CONFIG_IPV6_PIMSM_V2
941 	case MIFF_REGISTER:
942 		/*
943 		 * Special Purpose VIF in PIM
944 		 * All the packets will be sent to the daemon
945 		 */
946 		if (mrt->mroute_reg_vif_num >= 0)
947 			return -EADDRINUSE;
948 		dev = ip6mr_reg_vif(net, mrt);
949 		if (!dev)
950 			return -ENOBUFS;
951 		err = dev_set_allmulti(dev, 1);
952 		if (err) {
953 			unregister_netdevice(dev);
954 			dev_put(dev);
955 			return err;
956 		}
957 		break;
958 #endif
959 	case 0:
960 		dev = dev_get_by_index(net, vifc->mif6c_pifi);
961 		if (!dev)
962 			return -EADDRNOTAVAIL;
963 		err = dev_set_allmulti(dev, 1);
964 		if (err) {
965 			dev_put(dev);
966 			return err;
967 		}
968 		break;
969 	default:
970 		return -EINVAL;
971 	}
972 
973 	in6_dev = __in6_dev_get(dev);
974 	if (in6_dev) {
975 		in6_dev->cnf.mc_forwarding++;
976 		inet6_netconf_notify_devconf(dev_net(dev),
977 					     NETCONFA_MC_FORWARDING,
978 					     dev->ifindex, &in6_dev->cnf);
979 	}
980 
981 	/*
982 	 *	Fill in the VIF structures
983 	 */
984 	v->rate_limit = vifc->vifc_rate_limit;
985 	v->flags = vifc->mif6c_flags;
986 	if (!mrtsock)
987 		v->flags |= VIFF_STATIC;
988 	v->threshold = vifc->vifc_threshold;
989 	v->bytes_in = 0;
990 	v->bytes_out = 0;
991 	v->pkt_in = 0;
992 	v->pkt_out = 0;
993 	v->link = dev->ifindex;
994 	if (v->flags & MIFF_REGISTER)
995 		v->link = dev->iflink;
996 
997 	/* And finish update writing critical data */
998 	write_lock_bh(&mrt_lock);
999 	v->dev = dev;
1000 #ifdef CONFIG_IPV6_PIMSM_V2
1001 	if (v->flags & MIFF_REGISTER)
1002 		mrt->mroute_reg_vif_num = vifi;
1003 #endif
1004 	if (vifi + 1 > mrt->maxvif)
1005 		mrt->maxvif = vifi + 1;
1006 	write_unlock_bh(&mrt_lock);
1007 	return 0;
1008 }
1009 
1010 static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt,
1011 					   const struct in6_addr *origin,
1012 					   const struct in6_addr *mcastgrp)
1013 {
1014 	int line = MFC6_HASH(mcastgrp, origin);
1015 	struct mfc6_cache *c;
1016 
1017 	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
1018 		if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
1019 		    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
1020 			return c;
1021 	}
1022 	return NULL;
1023 }
1024 
1025 /* Look for a (*,*,oif) entry */
1026 static struct mfc6_cache *ip6mr_cache_find_any_parent(struct mr6_table *mrt,
1027 						      mifi_t mifi)
1028 {
1029 	int line = MFC6_HASH(&in6addr_any, &in6addr_any);
1030 	struct mfc6_cache *c;
1031 
1032 	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
1033 		if (ipv6_addr_any(&c->mf6c_origin) &&
1034 		    ipv6_addr_any(&c->mf6c_mcastgrp) &&
1035 		    (c->mfc_un.res.ttls[mifi] < 255))
1036 			return c;
1037 
1038 	return NULL;
1039 }
1040 
1041 /* Look for a (*,G) entry */
1042 static struct mfc6_cache *ip6mr_cache_find_any(struct mr6_table *mrt,
1043 					       struct in6_addr *mcastgrp,
1044 					       mifi_t mifi)
1045 {
1046 	int line = MFC6_HASH(mcastgrp, &in6addr_any);
1047 	struct mfc6_cache *c, *proxy;
1048 
1049 	if (ipv6_addr_any(mcastgrp))
1050 		goto skip;
1051 
1052 	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
1053 		if (ipv6_addr_any(&c->mf6c_origin) &&
1054 		    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp)) {
1055 			if (c->mfc_un.res.ttls[mifi] < 255)
1056 				return c;
1057 
1058 			/* It's ok if the mifi is part of the static tree */
1059 			proxy = ip6mr_cache_find_any_parent(mrt,
1060 							    c->mf6c_parent);
1061 			if (proxy && proxy->mfc_un.res.ttls[mifi] < 255)
1062 				return c;
1063 		}
1064 
1065 skip:
1066 	return ip6mr_cache_find_any_parent(mrt, mifi);
1067 }
1068 
1069 /*
1070  *	Allocate a multicast cache entry
1071  */
1072 static struct mfc6_cache *ip6mr_cache_alloc(void)
1073 {
1074 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
1075 	if (c == NULL)
1076 		return NULL;
1077 	c->mfc_un.res.minvif = MAXMIFS;
1078 	return c;
1079 }
1080 
1081 static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
1082 {
1083 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
1084 	if (c == NULL)
1085 		return NULL;
1086 	skb_queue_head_init(&c->mfc_un.unres.unresolved);
1087 	c->mfc_un.unres.expires = jiffies + 10 * HZ;
1088 	return c;
1089 }
1090 
1091 /*
1092  *	A cache entry has gone into a resolved state from queued
1093  */
1094 
1095 static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
1096 				struct mfc6_cache *uc, struct mfc6_cache *c)
1097 {
1098 	struct sk_buff *skb;
1099 
1100 	/*
1101 	 *	Play the pending entries through our router
1102 	 */
1103 
1104 	while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
1105 		if (ipv6_hdr(skb)->version == 0) {
1106 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
1107 
1108 			if (__ip6mr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) {
1109 				nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1110 			} else {
1111 				nlh->nlmsg_type = NLMSG_ERROR;
1112 				nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
1113 				skb_trim(skb, nlh->nlmsg_len);
1114 				((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE;
1115 			}
1116 			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
1117 		} else
1118 			ip6_mr_forward(net, mrt, skb, c);
1119 	}
1120 }
1121 
1122 /*
1123  *	Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
1124  *	expects the following bizarre scheme.
1125  *
1126  *	Called under mrt_lock.
1127  */
1128 
1129 static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
1130 			      mifi_t mifi, int assert)
1131 {
1132 	struct sk_buff *skb;
1133 	struct mrt6msg *msg;
1134 	int ret;
1135 
1136 #ifdef CONFIG_IPV6_PIMSM_V2
1137 	if (assert == MRT6MSG_WHOLEPKT)
1138 		skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
1139 						+sizeof(*msg));
1140 	else
1141 #endif
1142 		skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
1143 
1144 	if (!skb)
1145 		return -ENOBUFS;
1146 
1147 	/* I suppose that internal messages
1148 	 * do not require checksums */
1149 
1150 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1151 
1152 #ifdef CONFIG_IPV6_PIMSM_V2
1153 	if (assert == MRT6MSG_WHOLEPKT) {
1154 		/* Ugly, but we have no choice with this interface.
1155 		   Duplicate old header, fix length etc.
1156 		   And all this only to mangle msg->im6_msgtype and
1157 		   to set msg->im6_mbz to "mbz" :-)
1158 		 */
1159 		skb_push(skb, -skb_network_offset(pkt));
1160 
1161 		skb_push(skb, sizeof(*msg));
1162 		skb_reset_transport_header(skb);
1163 		msg = (struct mrt6msg *)skb_transport_header(skb);
1164 		msg->im6_mbz = 0;
1165 		msg->im6_msgtype = MRT6MSG_WHOLEPKT;
1166 		msg->im6_mif = mrt->mroute_reg_vif_num;
1167 		msg->im6_pad = 0;
1168 		msg->im6_src = ipv6_hdr(pkt)->saddr;
1169 		msg->im6_dst = ipv6_hdr(pkt)->daddr;
1170 
1171 		skb->ip_summed = CHECKSUM_UNNECESSARY;
1172 	} else
1173 #endif
1174 	{
1175 	/*
1176 	 *	Copy the IP header
1177 	 */
1178 
1179 	skb_put(skb, sizeof(struct ipv6hdr));
1180 	skb_reset_network_header(skb);
1181 	skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
1182 
1183 	/*
1184 	 *	Add our header
1185 	 */
1186 	skb_put(skb, sizeof(*msg));
1187 	skb_reset_transport_header(skb);
1188 	msg = (struct mrt6msg *)skb_transport_header(skb);
1189 
1190 	msg->im6_mbz = 0;
1191 	msg->im6_msgtype = assert;
1192 	msg->im6_mif = mifi;
1193 	msg->im6_pad = 0;
1194 	msg->im6_src = ipv6_hdr(pkt)->saddr;
1195 	msg->im6_dst = ipv6_hdr(pkt)->daddr;
1196 
1197 	skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1198 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1199 	}
1200 
1201 	if (mrt->mroute6_sk == NULL) {
1202 		kfree_skb(skb);
1203 		return -EINVAL;
1204 	}
1205 
1206 	/*
1207 	 *	Deliver to user space multicast routing algorithms
1208 	 */
1209 	ret = sock_queue_rcv_skb(mrt->mroute6_sk, skb);
1210 	if (ret < 0) {
1211 		net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
1212 		kfree_skb(skb);
1213 	}
1214 
1215 	return ret;
1216 }
1217 
1218 /*
1219  *	Queue a packet for resolution. It gets locked cache entry!
1220  */
1221 
1222 static int
1223 ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
1224 {
1225 	bool found = false;
1226 	int err;
1227 	struct mfc6_cache *c;
1228 
1229 	spin_lock_bh(&mfc_unres_lock);
1230 	list_for_each_entry(c, &mrt->mfc6_unres_queue, list) {
1231 		if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1232 		    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
1233 			found = true;
1234 			break;
1235 		}
1236 	}
1237 
1238 	if (!found) {
1239 		/*
1240 		 *	Create a new entry if allowable
1241 		 */
1242 
1243 		if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
1244 		    (c = ip6mr_cache_alloc_unres()) == NULL) {
1245 			spin_unlock_bh(&mfc_unres_lock);
1246 
1247 			kfree_skb(skb);
1248 			return -ENOBUFS;
1249 		}
1250 
1251 		/*
1252 		 *	Fill in the new cache entry
1253 		 */
1254 		c->mf6c_parent = -1;
1255 		c->mf6c_origin = ipv6_hdr(skb)->saddr;
1256 		c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
1257 
1258 		/*
1259 		 *	Reflect first query at pim6sd
1260 		 */
1261 		err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1262 		if (err < 0) {
1263 			/* If the report failed throw the cache entry
1264 			   out - Brad Parker
1265 			 */
1266 			spin_unlock_bh(&mfc_unres_lock);
1267 
1268 			ip6mr_cache_free(c);
1269 			kfree_skb(skb);
1270 			return err;
1271 		}
1272 
1273 		atomic_inc(&mrt->cache_resolve_queue_len);
1274 		list_add(&c->list, &mrt->mfc6_unres_queue);
1275 		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1276 
1277 		ipmr_do_expire_process(mrt);
1278 	}
1279 
1280 	/*
1281 	 *	See if we can append the packet
1282 	 */
1283 	if (c->mfc_un.unres.unresolved.qlen > 3) {
1284 		kfree_skb(skb);
1285 		err = -ENOBUFS;
1286 	} else {
1287 		skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1288 		err = 0;
1289 	}
1290 
1291 	spin_unlock_bh(&mfc_unres_lock);
1292 	return err;
1293 }
1294 
1295 /*
1296  *	MFC6 cache manipulation by user space
1297  */
1298 
1299 static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc,
1300 			    int parent)
1301 {
1302 	int line;
1303 	struct mfc6_cache *c, *next;
1304 
1305 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1306 
1307 	list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) {
1308 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1309 		    ipv6_addr_equal(&c->mf6c_mcastgrp,
1310 				    &mfc->mf6cc_mcastgrp.sin6_addr) &&
1311 		    (parent == -1 || parent == c->mf6c_parent)) {
1312 			write_lock_bh(&mrt_lock);
1313 			list_del(&c->list);
1314 			write_unlock_bh(&mrt_lock);
1315 
1316 			mr6_netlink_event(mrt, c, RTM_DELROUTE);
1317 			ip6mr_cache_free(c);
1318 			return 0;
1319 		}
1320 	}
1321 	return -ENOENT;
1322 }
1323 
1324 static int ip6mr_device_event(struct notifier_block *this,
1325 			      unsigned long event, void *ptr)
1326 {
1327 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1328 	struct net *net = dev_net(dev);
1329 	struct mr6_table *mrt;
1330 	struct mif_device *v;
1331 	int ct;
1332 	LIST_HEAD(list);
1333 
1334 	if (event != NETDEV_UNREGISTER)
1335 		return NOTIFY_DONE;
1336 
1337 	ip6mr_for_each_table(mrt, net) {
1338 		v = &mrt->vif6_table[0];
1339 		for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1340 			if (v->dev == dev)
1341 				mif6_delete(mrt, ct, &list);
1342 		}
1343 	}
1344 	unregister_netdevice_many(&list);
1345 
1346 	return NOTIFY_DONE;
1347 }
1348 
1349 static struct notifier_block ip6_mr_notifier = {
1350 	.notifier_call = ip6mr_device_event
1351 };
1352 
1353 /*
1354  *	Setup for IP multicast routing
1355  */
1356 
1357 static int __net_init ip6mr_net_init(struct net *net)
1358 {
1359 	int err;
1360 
1361 	err = ip6mr_rules_init(net);
1362 	if (err < 0)
1363 		goto fail;
1364 
1365 #ifdef CONFIG_PROC_FS
1366 	err = -ENOMEM;
1367 	if (!proc_create("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_fops))
1368 		goto proc_vif_fail;
1369 	if (!proc_create("ip6_mr_cache", 0, net->proc_net, &ip6mr_mfc_fops))
1370 		goto proc_cache_fail;
1371 #endif
1372 
1373 	return 0;
1374 
1375 #ifdef CONFIG_PROC_FS
1376 proc_cache_fail:
1377 	remove_proc_entry("ip6_mr_vif", net->proc_net);
1378 proc_vif_fail:
1379 	ip6mr_rules_exit(net);
1380 #endif
1381 fail:
1382 	return err;
1383 }
1384 
1385 static void __net_exit ip6mr_net_exit(struct net *net)
1386 {
1387 #ifdef CONFIG_PROC_FS
1388 	remove_proc_entry("ip6_mr_cache", net->proc_net);
1389 	remove_proc_entry("ip6_mr_vif", net->proc_net);
1390 #endif
1391 	ip6mr_rules_exit(net);
1392 }
1393 
1394 static struct pernet_operations ip6mr_net_ops = {
1395 	.init = ip6mr_net_init,
1396 	.exit = ip6mr_net_exit,
1397 };
1398 
1399 int __init ip6_mr_init(void)
1400 {
1401 	int err;
1402 
1403 	mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1404 				       sizeof(struct mfc6_cache),
1405 				       0, SLAB_HWCACHE_ALIGN,
1406 				       NULL);
1407 	if (!mrt_cachep)
1408 		return -ENOMEM;
1409 
1410 	err = register_pernet_subsys(&ip6mr_net_ops);
1411 	if (err)
1412 		goto reg_pernet_fail;
1413 
1414 	err = register_netdevice_notifier(&ip6_mr_notifier);
1415 	if (err)
1416 		goto reg_notif_fail;
1417 #ifdef CONFIG_IPV6_PIMSM_V2
1418 	if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1419 		pr_err("%s: can't add PIM protocol\n", __func__);
1420 		err = -EAGAIN;
1421 		goto add_proto_fail;
1422 	}
1423 #endif
1424 	rtnl_register(RTNL_FAMILY_IP6MR, RTM_GETROUTE, NULL,
1425 		      ip6mr_rtm_dumproute, NULL);
1426 	return 0;
1427 #ifdef CONFIG_IPV6_PIMSM_V2
1428 add_proto_fail:
1429 	unregister_netdevice_notifier(&ip6_mr_notifier);
1430 #endif
1431 reg_notif_fail:
1432 	unregister_pernet_subsys(&ip6mr_net_ops);
1433 reg_pernet_fail:
1434 	kmem_cache_destroy(mrt_cachep);
1435 	return err;
1436 }
1437 
1438 void ip6_mr_cleanup(void)
1439 {
1440 	unregister_netdevice_notifier(&ip6_mr_notifier);
1441 	unregister_pernet_subsys(&ip6mr_net_ops);
1442 	kmem_cache_destroy(mrt_cachep);
1443 }
1444 
1445 static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
1446 			 struct mf6cctl *mfc, int mrtsock, int parent)
1447 {
1448 	bool found = false;
1449 	int line;
1450 	struct mfc6_cache *uc, *c;
1451 	unsigned char ttls[MAXMIFS];
1452 	int i;
1453 
1454 	if (mfc->mf6cc_parent >= MAXMIFS)
1455 		return -ENFILE;
1456 
1457 	memset(ttls, 255, MAXMIFS);
1458 	for (i = 0; i < MAXMIFS; i++) {
1459 		if (IF_ISSET(i, &mfc->mf6cc_ifset))
1460 			ttls[i] = 1;
1461 
1462 	}
1463 
1464 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1465 
1466 	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
1467 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1468 		    ipv6_addr_equal(&c->mf6c_mcastgrp,
1469 				    &mfc->mf6cc_mcastgrp.sin6_addr) &&
1470 		    (parent == -1 || parent == mfc->mf6cc_parent)) {
1471 			found = true;
1472 			break;
1473 		}
1474 	}
1475 
1476 	if (found) {
1477 		write_lock_bh(&mrt_lock);
1478 		c->mf6c_parent = mfc->mf6cc_parent;
1479 		ip6mr_update_thresholds(mrt, c, ttls);
1480 		if (!mrtsock)
1481 			c->mfc_flags |= MFC_STATIC;
1482 		write_unlock_bh(&mrt_lock);
1483 		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1484 		return 0;
1485 	}
1486 
1487 	if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) &&
1488 	    !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1489 		return -EINVAL;
1490 
1491 	c = ip6mr_cache_alloc();
1492 	if (c == NULL)
1493 		return -ENOMEM;
1494 
1495 	c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1496 	c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1497 	c->mf6c_parent = mfc->mf6cc_parent;
1498 	ip6mr_update_thresholds(mrt, c, ttls);
1499 	if (!mrtsock)
1500 		c->mfc_flags |= MFC_STATIC;
1501 
1502 	write_lock_bh(&mrt_lock);
1503 	list_add(&c->list, &mrt->mfc6_cache_array[line]);
1504 	write_unlock_bh(&mrt_lock);
1505 
1506 	/*
1507 	 *	Check to see if we resolved a queued list. If so we
1508 	 *	need to send on the frames and tidy up.
1509 	 */
1510 	found = false;
1511 	spin_lock_bh(&mfc_unres_lock);
1512 	list_for_each_entry(uc, &mrt->mfc6_unres_queue, list) {
1513 		if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1514 		    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1515 			list_del(&uc->list);
1516 			atomic_dec(&mrt->cache_resolve_queue_len);
1517 			found = true;
1518 			break;
1519 		}
1520 	}
1521 	if (list_empty(&mrt->mfc6_unres_queue))
1522 		del_timer(&mrt->ipmr_expire_timer);
1523 	spin_unlock_bh(&mfc_unres_lock);
1524 
1525 	if (found) {
1526 		ip6mr_cache_resolve(net, mrt, uc, c);
1527 		ip6mr_cache_free(uc);
1528 	}
1529 	mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1530 	return 0;
1531 }
1532 
1533 /*
1534  *	Close the multicast socket, and clear the vif tables etc
1535  */
1536 
1537 static void mroute_clean_tables(struct mr6_table *mrt)
1538 {
1539 	int i;
1540 	LIST_HEAD(list);
1541 	struct mfc6_cache *c, *next;
1542 
1543 	/*
1544 	 *	Shut down all active vif entries
1545 	 */
1546 	for (i = 0; i < mrt->maxvif; i++) {
1547 		if (!(mrt->vif6_table[i].flags & VIFF_STATIC))
1548 			mif6_delete(mrt, i, &list);
1549 	}
1550 	unregister_netdevice_many(&list);
1551 
1552 	/*
1553 	 *	Wipe the cache
1554 	 */
1555 	for (i = 0; i < MFC6_LINES; i++) {
1556 		list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) {
1557 			if (c->mfc_flags & MFC_STATIC)
1558 				continue;
1559 			write_lock_bh(&mrt_lock);
1560 			list_del(&c->list);
1561 			write_unlock_bh(&mrt_lock);
1562 
1563 			mr6_netlink_event(mrt, c, RTM_DELROUTE);
1564 			ip6mr_cache_free(c);
1565 		}
1566 	}
1567 
1568 	if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1569 		spin_lock_bh(&mfc_unres_lock);
1570 		list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
1571 			list_del(&c->list);
1572 			mr6_netlink_event(mrt, c, RTM_DELROUTE);
1573 			ip6mr_destroy_unres(mrt, c);
1574 		}
1575 		spin_unlock_bh(&mfc_unres_lock);
1576 	}
1577 }
1578 
1579 static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
1580 {
1581 	int err = 0;
1582 	struct net *net = sock_net(sk);
1583 
1584 	rtnl_lock();
1585 	write_lock_bh(&mrt_lock);
1586 	if (likely(mrt->mroute6_sk == NULL)) {
1587 		mrt->mroute6_sk = sk;
1588 		net->ipv6.devconf_all->mc_forwarding++;
1589 		inet6_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING,
1590 					     NETCONFA_IFINDEX_ALL,
1591 					     net->ipv6.devconf_all);
1592 	}
1593 	else
1594 		err = -EADDRINUSE;
1595 	write_unlock_bh(&mrt_lock);
1596 
1597 	rtnl_unlock();
1598 
1599 	return err;
1600 }
1601 
1602 int ip6mr_sk_done(struct sock *sk)
1603 {
1604 	int err = -EACCES;
1605 	struct net *net = sock_net(sk);
1606 	struct mr6_table *mrt;
1607 
1608 	rtnl_lock();
1609 	ip6mr_for_each_table(mrt, net) {
1610 		if (sk == mrt->mroute6_sk) {
1611 			write_lock_bh(&mrt_lock);
1612 			mrt->mroute6_sk = NULL;
1613 			net->ipv6.devconf_all->mc_forwarding--;
1614 			inet6_netconf_notify_devconf(net,
1615 						     NETCONFA_MC_FORWARDING,
1616 						     NETCONFA_IFINDEX_ALL,
1617 						     net->ipv6.devconf_all);
1618 			write_unlock_bh(&mrt_lock);
1619 
1620 			mroute_clean_tables(mrt);
1621 			err = 0;
1622 			break;
1623 		}
1624 	}
1625 	rtnl_unlock();
1626 
1627 	return err;
1628 }
1629 
1630 struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
1631 {
1632 	struct mr6_table *mrt;
1633 	struct flowi6 fl6 = {
1634 		.flowi6_iif	= skb->skb_iif,
1635 		.flowi6_oif	= skb->dev->ifindex,
1636 		.flowi6_mark	= skb->mark,
1637 	};
1638 
1639 	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
1640 		return NULL;
1641 
1642 	return mrt->mroute6_sk;
1643 }
1644 
1645 /*
1646  *	Socket options and virtual interface manipulation. The whole
1647  *	virtual interface system is a complete heap, but unfortunately
1648  *	that's how BSD mrouted happens to think. Maybe one day with a proper
1649  *	MOSPF/PIM router set up we can clean this up.
1650  */
1651 
1652 int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1653 {
1654 	int ret, parent = 0;
1655 	struct mif6ctl vif;
1656 	struct mf6cctl mfc;
1657 	mifi_t mifi;
1658 	struct net *net = sock_net(sk);
1659 	struct mr6_table *mrt;
1660 
1661 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1662 	if (mrt == NULL)
1663 		return -ENOENT;
1664 
1665 	if (optname != MRT6_INIT) {
1666 		if (sk != mrt->mroute6_sk && !ns_capable(net->user_ns, CAP_NET_ADMIN))
1667 			return -EACCES;
1668 	}
1669 
1670 	switch (optname) {
1671 	case MRT6_INIT:
1672 		if (sk->sk_type != SOCK_RAW ||
1673 		    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1674 			return -EOPNOTSUPP;
1675 		if (optlen < sizeof(int))
1676 			return -EINVAL;
1677 
1678 		return ip6mr_sk_init(mrt, sk);
1679 
1680 	case MRT6_DONE:
1681 		return ip6mr_sk_done(sk);
1682 
1683 	case MRT6_ADD_MIF:
1684 		if (optlen < sizeof(vif))
1685 			return -EINVAL;
1686 		if (copy_from_user(&vif, optval, sizeof(vif)))
1687 			return -EFAULT;
1688 		if (vif.mif6c_mifi >= MAXMIFS)
1689 			return -ENFILE;
1690 		rtnl_lock();
1691 		ret = mif6_add(net, mrt, &vif, sk == mrt->mroute6_sk);
1692 		rtnl_unlock();
1693 		return ret;
1694 
1695 	case MRT6_DEL_MIF:
1696 		if (optlen < sizeof(mifi_t))
1697 			return -EINVAL;
1698 		if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1699 			return -EFAULT;
1700 		rtnl_lock();
1701 		ret = mif6_delete(mrt, mifi, NULL);
1702 		rtnl_unlock();
1703 		return ret;
1704 
1705 	/*
1706 	 *	Manipulate the forwarding caches. These live
1707 	 *	in a sort of kernel/user symbiosis.
1708 	 */
1709 	case MRT6_ADD_MFC:
1710 	case MRT6_DEL_MFC:
1711 		parent = -1;
1712 	case MRT6_ADD_MFC_PROXY:
1713 	case MRT6_DEL_MFC_PROXY:
1714 		if (optlen < sizeof(mfc))
1715 			return -EINVAL;
1716 		if (copy_from_user(&mfc, optval, sizeof(mfc)))
1717 			return -EFAULT;
1718 		if (parent == 0)
1719 			parent = mfc.mf6cc_parent;
1720 		rtnl_lock();
1721 		if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY)
1722 			ret = ip6mr_mfc_delete(mrt, &mfc, parent);
1723 		else
1724 			ret = ip6mr_mfc_add(net, mrt, &mfc,
1725 					    sk == mrt->mroute6_sk, parent);
1726 		rtnl_unlock();
1727 		return ret;
1728 
1729 	/*
1730 	 *	Control PIM assert (to activate pim will activate assert)
1731 	 */
1732 	case MRT6_ASSERT:
1733 	{
1734 		int v;
1735 
1736 		if (optlen != sizeof(v))
1737 			return -EINVAL;
1738 		if (get_user(v, (int __user *)optval))
1739 			return -EFAULT;
1740 		mrt->mroute_do_assert = v;
1741 		return 0;
1742 	}
1743 
1744 #ifdef CONFIG_IPV6_PIMSM_V2
1745 	case MRT6_PIM:
1746 	{
1747 		int v;
1748 
1749 		if (optlen != sizeof(v))
1750 			return -EINVAL;
1751 		if (get_user(v, (int __user *)optval))
1752 			return -EFAULT;
1753 		v = !!v;
1754 		rtnl_lock();
1755 		ret = 0;
1756 		if (v != mrt->mroute_do_pim) {
1757 			mrt->mroute_do_pim = v;
1758 			mrt->mroute_do_assert = v;
1759 		}
1760 		rtnl_unlock();
1761 		return ret;
1762 	}
1763 
1764 #endif
1765 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1766 	case MRT6_TABLE:
1767 	{
1768 		u32 v;
1769 
1770 		if (optlen != sizeof(u32))
1771 			return -EINVAL;
1772 		if (get_user(v, (u32 __user *)optval))
1773 			return -EFAULT;
1774 		/* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
1775 		if (v != RT_TABLE_DEFAULT && v >= 100000000)
1776 			return -EINVAL;
1777 		if (sk == mrt->mroute6_sk)
1778 			return -EBUSY;
1779 
1780 		rtnl_lock();
1781 		ret = 0;
1782 		if (!ip6mr_new_table(net, v))
1783 			ret = -ENOMEM;
1784 		raw6_sk(sk)->ip6mr_table = v;
1785 		rtnl_unlock();
1786 		return ret;
1787 	}
1788 #endif
1789 	/*
1790 	 *	Spurious command, or MRT6_VERSION which you cannot
1791 	 *	set.
1792 	 */
1793 	default:
1794 		return -ENOPROTOOPT;
1795 	}
1796 }
1797 
1798 /*
1799  *	Getsock opt support for the multicast routing system.
1800  */
1801 
1802 int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1803 			  int __user *optlen)
1804 {
1805 	int olr;
1806 	int val;
1807 	struct net *net = sock_net(sk);
1808 	struct mr6_table *mrt;
1809 
1810 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1811 	if (mrt == NULL)
1812 		return -ENOENT;
1813 
1814 	switch (optname) {
1815 	case MRT6_VERSION:
1816 		val = 0x0305;
1817 		break;
1818 #ifdef CONFIG_IPV6_PIMSM_V2
1819 	case MRT6_PIM:
1820 		val = mrt->mroute_do_pim;
1821 		break;
1822 #endif
1823 	case MRT6_ASSERT:
1824 		val = mrt->mroute_do_assert;
1825 		break;
1826 	default:
1827 		return -ENOPROTOOPT;
1828 	}
1829 
1830 	if (get_user(olr, optlen))
1831 		return -EFAULT;
1832 
1833 	olr = min_t(int, olr, sizeof(int));
1834 	if (olr < 0)
1835 		return -EINVAL;
1836 
1837 	if (put_user(olr, optlen))
1838 		return -EFAULT;
1839 	if (copy_to_user(optval, &val, olr))
1840 		return -EFAULT;
1841 	return 0;
1842 }
1843 
1844 /*
1845  *	The IP multicast ioctl support routines.
1846  */
1847 
1848 int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1849 {
1850 	struct sioc_sg_req6 sr;
1851 	struct sioc_mif_req6 vr;
1852 	struct mif_device *vif;
1853 	struct mfc6_cache *c;
1854 	struct net *net = sock_net(sk);
1855 	struct mr6_table *mrt;
1856 
1857 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1858 	if (mrt == NULL)
1859 		return -ENOENT;
1860 
1861 	switch (cmd) {
1862 	case SIOCGETMIFCNT_IN6:
1863 		if (copy_from_user(&vr, arg, sizeof(vr)))
1864 			return -EFAULT;
1865 		if (vr.mifi >= mrt->maxvif)
1866 			return -EINVAL;
1867 		read_lock(&mrt_lock);
1868 		vif = &mrt->vif6_table[vr.mifi];
1869 		if (MIF_EXISTS(mrt, vr.mifi)) {
1870 			vr.icount = vif->pkt_in;
1871 			vr.ocount = vif->pkt_out;
1872 			vr.ibytes = vif->bytes_in;
1873 			vr.obytes = vif->bytes_out;
1874 			read_unlock(&mrt_lock);
1875 
1876 			if (copy_to_user(arg, &vr, sizeof(vr)))
1877 				return -EFAULT;
1878 			return 0;
1879 		}
1880 		read_unlock(&mrt_lock);
1881 		return -EADDRNOTAVAIL;
1882 	case SIOCGETSGCNT_IN6:
1883 		if (copy_from_user(&sr, arg, sizeof(sr)))
1884 			return -EFAULT;
1885 
1886 		read_lock(&mrt_lock);
1887 		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1888 		if (c) {
1889 			sr.pktcnt = c->mfc_un.res.pkt;
1890 			sr.bytecnt = c->mfc_un.res.bytes;
1891 			sr.wrong_if = c->mfc_un.res.wrong_if;
1892 			read_unlock(&mrt_lock);
1893 
1894 			if (copy_to_user(arg, &sr, sizeof(sr)))
1895 				return -EFAULT;
1896 			return 0;
1897 		}
1898 		read_unlock(&mrt_lock);
1899 		return -EADDRNOTAVAIL;
1900 	default:
1901 		return -ENOIOCTLCMD;
1902 	}
1903 }
1904 
1905 #ifdef CONFIG_COMPAT
1906 struct compat_sioc_sg_req6 {
1907 	struct sockaddr_in6 src;
1908 	struct sockaddr_in6 grp;
1909 	compat_ulong_t pktcnt;
1910 	compat_ulong_t bytecnt;
1911 	compat_ulong_t wrong_if;
1912 };
1913 
1914 struct compat_sioc_mif_req6 {
1915 	mifi_t	mifi;
1916 	compat_ulong_t icount;
1917 	compat_ulong_t ocount;
1918 	compat_ulong_t ibytes;
1919 	compat_ulong_t obytes;
1920 };
1921 
1922 int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1923 {
1924 	struct compat_sioc_sg_req6 sr;
1925 	struct compat_sioc_mif_req6 vr;
1926 	struct mif_device *vif;
1927 	struct mfc6_cache *c;
1928 	struct net *net = sock_net(sk);
1929 	struct mr6_table *mrt;
1930 
1931 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1932 	if (mrt == NULL)
1933 		return -ENOENT;
1934 
1935 	switch (cmd) {
1936 	case SIOCGETMIFCNT_IN6:
1937 		if (copy_from_user(&vr, arg, sizeof(vr)))
1938 			return -EFAULT;
1939 		if (vr.mifi >= mrt->maxvif)
1940 			return -EINVAL;
1941 		read_lock(&mrt_lock);
1942 		vif = &mrt->vif6_table[vr.mifi];
1943 		if (MIF_EXISTS(mrt, vr.mifi)) {
1944 			vr.icount = vif->pkt_in;
1945 			vr.ocount = vif->pkt_out;
1946 			vr.ibytes = vif->bytes_in;
1947 			vr.obytes = vif->bytes_out;
1948 			read_unlock(&mrt_lock);
1949 
1950 			if (copy_to_user(arg, &vr, sizeof(vr)))
1951 				return -EFAULT;
1952 			return 0;
1953 		}
1954 		read_unlock(&mrt_lock);
1955 		return -EADDRNOTAVAIL;
1956 	case SIOCGETSGCNT_IN6:
1957 		if (copy_from_user(&sr, arg, sizeof(sr)))
1958 			return -EFAULT;
1959 
1960 		read_lock(&mrt_lock);
1961 		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1962 		if (c) {
1963 			sr.pktcnt = c->mfc_un.res.pkt;
1964 			sr.bytecnt = c->mfc_un.res.bytes;
1965 			sr.wrong_if = c->mfc_un.res.wrong_if;
1966 			read_unlock(&mrt_lock);
1967 
1968 			if (copy_to_user(arg, &sr, sizeof(sr)))
1969 				return -EFAULT;
1970 			return 0;
1971 		}
1972 		read_unlock(&mrt_lock);
1973 		return -EADDRNOTAVAIL;
1974 	default:
1975 		return -ENOIOCTLCMD;
1976 	}
1977 }
1978 #endif
1979 
1980 static inline int ip6mr_forward2_finish(struct sk_buff *skb)
1981 {
1982 	IP6_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
1983 			 IPSTATS_MIB_OUTFORWDATAGRAMS);
1984 	IP6_ADD_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
1985 			 IPSTATS_MIB_OUTOCTETS, skb->len);
1986 	return dst_output(skb);
1987 }
1988 
1989 /*
1990  *	Processing handlers for ip6mr_forward
1991  */
1992 
1993 static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
1994 			  struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1995 {
1996 	struct ipv6hdr *ipv6h;
1997 	struct mif_device *vif = &mrt->vif6_table[vifi];
1998 	struct net_device *dev;
1999 	struct dst_entry *dst;
2000 	struct flowi6 fl6;
2001 
2002 	if (vif->dev == NULL)
2003 		goto out_free;
2004 
2005 #ifdef CONFIG_IPV6_PIMSM_V2
2006 	if (vif->flags & MIFF_REGISTER) {
2007 		vif->pkt_out++;
2008 		vif->bytes_out += skb->len;
2009 		vif->dev->stats.tx_bytes += skb->len;
2010 		vif->dev->stats.tx_packets++;
2011 		ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
2012 		goto out_free;
2013 	}
2014 #endif
2015 
2016 	ipv6h = ipv6_hdr(skb);
2017 
2018 	fl6 = (struct flowi6) {
2019 		.flowi6_oif = vif->link,
2020 		.daddr = ipv6h->daddr,
2021 	};
2022 
2023 	dst = ip6_route_output(net, NULL, &fl6);
2024 	if (dst->error) {
2025 		dst_release(dst);
2026 		goto out_free;
2027 	}
2028 
2029 	skb_dst_drop(skb);
2030 	skb_dst_set(skb, dst);
2031 
2032 	/*
2033 	 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
2034 	 * not only before forwarding, but after forwarding on all output
2035 	 * interfaces. It is clear, if mrouter runs a multicasting
2036 	 * program, it should receive packets not depending to what interface
2037 	 * program is joined.
2038 	 * If we will not make it, the program will have to join on all
2039 	 * interfaces. On the other hand, multihoming host (or router, but
2040 	 * not mrouter) cannot join to more than one interface - it will
2041 	 * result in receiving multiple packets.
2042 	 */
2043 	dev = vif->dev;
2044 	skb->dev = dev;
2045 	vif->pkt_out++;
2046 	vif->bytes_out += skb->len;
2047 
2048 	/* We are about to write */
2049 	/* XXX: extension headers? */
2050 	if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
2051 		goto out_free;
2052 
2053 	ipv6h = ipv6_hdr(skb);
2054 	ipv6h->hop_limit--;
2055 
2056 	IP6CB(skb)->flags |= IP6SKB_FORWARDED;
2057 
2058 	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dev,
2059 		       ip6mr_forward2_finish);
2060 
2061 out_free:
2062 	kfree_skb(skb);
2063 	return 0;
2064 }
2065 
2066 static int ip6mr_find_vif(struct mr6_table *mrt, struct net_device *dev)
2067 {
2068 	int ct;
2069 
2070 	for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
2071 		if (mrt->vif6_table[ct].dev == dev)
2072 			break;
2073 	}
2074 	return ct;
2075 }
2076 
2077 static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
2078 			  struct sk_buff *skb, struct mfc6_cache *cache)
2079 {
2080 	int psend = -1;
2081 	int vif, ct;
2082 	int true_vifi = ip6mr_find_vif(mrt, skb->dev);
2083 
2084 	vif = cache->mf6c_parent;
2085 	cache->mfc_un.res.pkt++;
2086 	cache->mfc_un.res.bytes += skb->len;
2087 
2088 	if (ipv6_addr_any(&cache->mf6c_origin) && true_vifi >= 0) {
2089 		struct mfc6_cache *cache_proxy;
2090 
2091 		/* For an (*,G) entry, we only check that the incomming
2092 		 * interface is part of the static tree.
2093 		 */
2094 		cache_proxy = ip6mr_cache_find_any_parent(mrt, vif);
2095 		if (cache_proxy &&
2096 		    cache_proxy->mfc_un.res.ttls[true_vifi] < 255)
2097 			goto forward;
2098 	}
2099 
2100 	/*
2101 	 * Wrong interface: drop packet and (maybe) send PIM assert.
2102 	 */
2103 	if (mrt->vif6_table[vif].dev != skb->dev) {
2104 		cache->mfc_un.res.wrong_if++;
2105 
2106 		if (true_vifi >= 0 && mrt->mroute_do_assert &&
2107 		    /* pimsm uses asserts, when switching from RPT to SPT,
2108 		       so that we cannot check that packet arrived on an oif.
2109 		       It is bad, but otherwise we would need to move pretty
2110 		       large chunk of pimd to kernel. Ough... --ANK
2111 		     */
2112 		    (mrt->mroute_do_pim ||
2113 		     cache->mfc_un.res.ttls[true_vifi] < 255) &&
2114 		    time_after(jiffies,
2115 			       cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
2116 			cache->mfc_un.res.last_assert = jiffies;
2117 			ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
2118 		}
2119 		goto dont_forward;
2120 	}
2121 
2122 forward:
2123 	mrt->vif6_table[vif].pkt_in++;
2124 	mrt->vif6_table[vif].bytes_in += skb->len;
2125 
2126 	/*
2127 	 *	Forward the frame
2128 	 */
2129 	if (ipv6_addr_any(&cache->mf6c_origin) &&
2130 	    ipv6_addr_any(&cache->mf6c_mcastgrp)) {
2131 		if (true_vifi >= 0 &&
2132 		    true_vifi != cache->mf6c_parent &&
2133 		    ipv6_hdr(skb)->hop_limit >
2134 				cache->mfc_un.res.ttls[cache->mf6c_parent]) {
2135 			/* It's an (*,*) entry and the packet is not coming from
2136 			 * the upstream: forward the packet to the upstream
2137 			 * only.
2138 			 */
2139 			psend = cache->mf6c_parent;
2140 			goto last_forward;
2141 		}
2142 		goto dont_forward;
2143 	}
2144 	for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
2145 		/* For (*,G) entry, don't forward to the incoming interface */
2146 		if ((!ipv6_addr_any(&cache->mf6c_origin) || ct != true_vifi) &&
2147 		    ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
2148 			if (psend != -1) {
2149 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2150 				if (skb2)
2151 					ip6mr_forward2(net, mrt, skb2, cache, psend);
2152 			}
2153 			psend = ct;
2154 		}
2155 	}
2156 last_forward:
2157 	if (psend != -1) {
2158 		ip6mr_forward2(net, mrt, skb, cache, psend);
2159 		return 0;
2160 	}
2161 
2162 dont_forward:
2163 	kfree_skb(skb);
2164 	return 0;
2165 }
2166 
2167 
2168 /*
2169  *	Multicast packets for forwarding arrive here
2170  */
2171 
2172 int ip6_mr_input(struct sk_buff *skb)
2173 {
2174 	struct mfc6_cache *cache;
2175 	struct net *net = dev_net(skb->dev);
2176 	struct mr6_table *mrt;
2177 	struct flowi6 fl6 = {
2178 		.flowi6_iif	= skb->dev->ifindex,
2179 		.flowi6_mark	= skb->mark,
2180 	};
2181 	int err;
2182 
2183 	err = ip6mr_fib_lookup(net, &fl6, &mrt);
2184 	if (err < 0) {
2185 		kfree_skb(skb);
2186 		return err;
2187 	}
2188 
2189 	read_lock(&mrt_lock);
2190 	cache = ip6mr_cache_find(mrt,
2191 				 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2192 	if (cache == NULL) {
2193 		int vif = ip6mr_find_vif(mrt, skb->dev);
2194 
2195 		if (vif >= 0)
2196 			cache = ip6mr_cache_find_any(mrt,
2197 						     &ipv6_hdr(skb)->daddr,
2198 						     vif);
2199 	}
2200 
2201 	/*
2202 	 *	No usable cache entry
2203 	 */
2204 	if (cache == NULL) {
2205 		int vif;
2206 
2207 		vif = ip6mr_find_vif(mrt, skb->dev);
2208 		if (vif >= 0) {
2209 			int err = ip6mr_cache_unresolved(mrt, vif, skb);
2210 			read_unlock(&mrt_lock);
2211 
2212 			return err;
2213 		}
2214 		read_unlock(&mrt_lock);
2215 		kfree_skb(skb);
2216 		return -ENODEV;
2217 	}
2218 
2219 	ip6_mr_forward(net, mrt, skb, cache);
2220 
2221 	read_unlock(&mrt_lock);
2222 
2223 	return 0;
2224 }
2225 
2226 
2227 static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2228 			       struct mfc6_cache *c, struct rtmsg *rtm)
2229 {
2230 	int ct;
2231 	struct rtnexthop *nhp;
2232 	struct nlattr *mp_attr;
2233 	struct rta_mfc_stats mfcs;
2234 
2235 	/* If cache is unresolved, don't try to parse IIF and OIF */
2236 	if (c->mf6c_parent >= MAXMIFS)
2237 		return -ENOENT;
2238 
2239 	if (MIF_EXISTS(mrt, c->mf6c_parent) &&
2240 	    nla_put_u32(skb, RTA_IIF, mrt->vif6_table[c->mf6c_parent].dev->ifindex) < 0)
2241 		return -EMSGSIZE;
2242 	mp_attr = nla_nest_start(skb, RTA_MULTIPATH);
2243 	if (mp_attr == NULL)
2244 		return -EMSGSIZE;
2245 
2246 	for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
2247 		if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
2248 			nhp = nla_reserve_nohdr(skb, sizeof(*nhp));
2249 			if (nhp == NULL) {
2250 				nla_nest_cancel(skb, mp_attr);
2251 				return -EMSGSIZE;
2252 			}
2253 
2254 			nhp->rtnh_flags = 0;
2255 			nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
2256 			nhp->rtnh_ifindex = mrt->vif6_table[ct].dev->ifindex;
2257 			nhp->rtnh_len = sizeof(*nhp);
2258 		}
2259 	}
2260 
2261 	nla_nest_end(skb, mp_attr);
2262 
2263 	mfcs.mfcs_packets = c->mfc_un.res.pkt;
2264 	mfcs.mfcs_bytes = c->mfc_un.res.bytes;
2265 	mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
2266 	if (nla_put(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs) < 0)
2267 		return -EMSGSIZE;
2268 
2269 	rtm->rtm_type = RTN_MULTICAST;
2270 	return 1;
2271 }
2272 
2273 int ip6mr_get_route(struct net *net,
2274 		    struct sk_buff *skb, struct rtmsg *rtm, int nowait)
2275 {
2276 	int err;
2277 	struct mr6_table *mrt;
2278 	struct mfc6_cache *cache;
2279 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
2280 
2281 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
2282 	if (mrt == NULL)
2283 		return -ENOENT;
2284 
2285 	read_lock(&mrt_lock);
2286 	cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
2287 	if (!cache && skb->dev) {
2288 		int vif = ip6mr_find_vif(mrt, skb->dev);
2289 
2290 		if (vif >= 0)
2291 			cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr,
2292 						     vif);
2293 	}
2294 
2295 	if (!cache) {
2296 		struct sk_buff *skb2;
2297 		struct ipv6hdr *iph;
2298 		struct net_device *dev;
2299 		int vif;
2300 
2301 		if (nowait) {
2302 			read_unlock(&mrt_lock);
2303 			return -EAGAIN;
2304 		}
2305 
2306 		dev = skb->dev;
2307 		if (dev == NULL || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2308 			read_unlock(&mrt_lock);
2309 			return -ENODEV;
2310 		}
2311 
2312 		/* really correct? */
2313 		skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
2314 		if (!skb2) {
2315 			read_unlock(&mrt_lock);
2316 			return -ENOMEM;
2317 		}
2318 
2319 		skb_reset_transport_header(skb2);
2320 
2321 		skb_put(skb2, sizeof(struct ipv6hdr));
2322 		skb_reset_network_header(skb2);
2323 
2324 		iph = ipv6_hdr(skb2);
2325 		iph->version = 0;
2326 		iph->priority = 0;
2327 		iph->flow_lbl[0] = 0;
2328 		iph->flow_lbl[1] = 0;
2329 		iph->flow_lbl[2] = 0;
2330 		iph->payload_len = 0;
2331 		iph->nexthdr = IPPROTO_NONE;
2332 		iph->hop_limit = 0;
2333 		iph->saddr = rt->rt6i_src.addr;
2334 		iph->daddr = rt->rt6i_dst.addr;
2335 
2336 		err = ip6mr_cache_unresolved(mrt, vif, skb2);
2337 		read_unlock(&mrt_lock);
2338 
2339 		return err;
2340 	}
2341 
2342 	if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
2343 		cache->mfc_flags |= MFC_NOTIFY;
2344 
2345 	err = __ip6mr_fill_mroute(mrt, skb, cache, rtm);
2346 	read_unlock(&mrt_lock);
2347 	return err;
2348 }
2349 
2350 static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2351 			     u32 portid, u32 seq, struct mfc6_cache *c, int cmd)
2352 {
2353 	struct nlmsghdr *nlh;
2354 	struct rtmsg *rtm;
2355 	int err;
2356 
2357 	nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), NLM_F_MULTI);
2358 	if (nlh == NULL)
2359 		return -EMSGSIZE;
2360 
2361 	rtm = nlmsg_data(nlh);
2362 	rtm->rtm_family   = RTNL_FAMILY_IP6MR;
2363 	rtm->rtm_dst_len  = 128;
2364 	rtm->rtm_src_len  = 128;
2365 	rtm->rtm_tos      = 0;
2366 	rtm->rtm_table    = mrt->id;
2367 	if (nla_put_u32(skb, RTA_TABLE, mrt->id))
2368 		goto nla_put_failure;
2369 	rtm->rtm_type = RTN_MULTICAST;
2370 	rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
2371 	if (c->mfc_flags & MFC_STATIC)
2372 		rtm->rtm_protocol = RTPROT_STATIC;
2373 	else
2374 		rtm->rtm_protocol = RTPROT_MROUTED;
2375 	rtm->rtm_flags    = 0;
2376 
2377 	if (nla_put(skb, RTA_SRC, 16, &c->mf6c_origin) ||
2378 	    nla_put(skb, RTA_DST, 16, &c->mf6c_mcastgrp))
2379 		goto nla_put_failure;
2380 	err = __ip6mr_fill_mroute(mrt, skb, c, rtm);
2381 	/* do not break the dump if cache is unresolved */
2382 	if (err < 0 && err != -ENOENT)
2383 		goto nla_put_failure;
2384 
2385 	return nlmsg_end(skb, nlh);
2386 
2387 nla_put_failure:
2388 	nlmsg_cancel(skb, nlh);
2389 	return -EMSGSIZE;
2390 }
2391 
2392 static int mr6_msgsize(bool unresolved, int maxvif)
2393 {
2394 	size_t len =
2395 		NLMSG_ALIGN(sizeof(struct rtmsg))
2396 		+ nla_total_size(4)	/* RTA_TABLE */
2397 		+ nla_total_size(sizeof(struct in6_addr))	/* RTA_SRC */
2398 		+ nla_total_size(sizeof(struct in6_addr))	/* RTA_DST */
2399 		;
2400 
2401 	if (!unresolved)
2402 		len = len
2403 		      + nla_total_size(4)	/* RTA_IIF */
2404 		      + nla_total_size(0)	/* RTA_MULTIPATH */
2405 		      + maxvif * NLA_ALIGN(sizeof(struct rtnexthop))
2406 						/* RTA_MFC_STATS */
2407 		      + nla_total_size(sizeof(struct rta_mfc_stats))
2408 		;
2409 
2410 	return len;
2411 }
2412 
2413 static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
2414 			      int cmd)
2415 {
2416 	struct net *net = read_pnet(&mrt->net);
2417 	struct sk_buff *skb;
2418 	int err = -ENOBUFS;
2419 
2420 	skb = nlmsg_new(mr6_msgsize(mfc->mf6c_parent >= MAXMIFS, mrt->maxvif),
2421 			GFP_ATOMIC);
2422 	if (skb == NULL)
2423 		goto errout;
2424 
2425 	err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd);
2426 	if (err < 0)
2427 		goto errout;
2428 
2429 	rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC);
2430 	return;
2431 
2432 errout:
2433 	kfree_skb(skb);
2434 	if (err < 0)
2435 		rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
2436 }
2437 
2438 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2439 {
2440 	struct net *net = sock_net(skb->sk);
2441 	struct mr6_table *mrt;
2442 	struct mfc6_cache *mfc;
2443 	unsigned int t = 0, s_t;
2444 	unsigned int h = 0, s_h;
2445 	unsigned int e = 0, s_e;
2446 
2447 	s_t = cb->args[0];
2448 	s_h = cb->args[1];
2449 	s_e = cb->args[2];
2450 
2451 	read_lock(&mrt_lock);
2452 	ip6mr_for_each_table(mrt, net) {
2453 		if (t < s_t)
2454 			goto next_table;
2455 		if (t > s_t)
2456 			s_h = 0;
2457 		for (h = s_h; h < MFC6_LINES; h++) {
2458 			list_for_each_entry(mfc, &mrt->mfc6_cache_array[h], list) {
2459 				if (e < s_e)
2460 					goto next_entry;
2461 				if (ip6mr_fill_mroute(mrt, skb,
2462 						      NETLINK_CB(cb->skb).portid,
2463 						      cb->nlh->nlmsg_seq,
2464 						      mfc, RTM_NEWROUTE) < 0)
2465 					goto done;
2466 next_entry:
2467 				e++;
2468 			}
2469 			e = s_e = 0;
2470 		}
2471 		spin_lock_bh(&mfc_unres_lock);
2472 		list_for_each_entry(mfc, &mrt->mfc6_unres_queue, list) {
2473 			if (e < s_e)
2474 				goto next_entry2;
2475 			if (ip6mr_fill_mroute(mrt, skb,
2476 					      NETLINK_CB(cb->skb).portid,
2477 					      cb->nlh->nlmsg_seq,
2478 					      mfc, RTM_NEWROUTE) < 0) {
2479 				spin_unlock_bh(&mfc_unres_lock);
2480 				goto done;
2481 			}
2482 next_entry2:
2483 			e++;
2484 		}
2485 		spin_unlock_bh(&mfc_unres_lock);
2486 		e = s_e = 0;
2487 		s_h = 0;
2488 next_table:
2489 		t++;
2490 	}
2491 done:
2492 	read_unlock(&mrt_lock);
2493 
2494 	cb->args[2] = e;
2495 	cb->args[1] = h;
2496 	cb->args[0] = t;
2497 
2498 	return skb->len;
2499 }
2500