xref: /openbmc/linux/net/ipv6/ip6mr.c (revision 32981ea5)
1 /*
2  *	Linux IPv6 multicast routing support for BSD pim6sd
3  *	Based on net/ipv4/ipmr.c.
4  *
5  *	(c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
6  *		LSIIT Laboratory, Strasbourg, France
7  *	(c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
8  *		6WIND, Paris, France
9  *	Copyright (C)2007,2008 USAGI/WIDE Project
10  *		YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
11  *
12  *	This program is free software; you can redistribute it and/or
13  *	modify it under the terms of the GNU General Public License
14  *	as published by the Free Software Foundation; either version
15  *	2 of the License, or (at your option) any later version.
16  *
17  */
18 
19 #include <asm/uaccess.h>
20 #include <linux/types.h>
21 #include <linux/sched.h>
22 #include <linux/errno.h>
23 #include <linux/timer.h>
24 #include <linux/mm.h>
25 #include <linux/kernel.h>
26 #include <linux/fcntl.h>
27 #include <linux/stat.h>
28 #include <linux/socket.h>
29 #include <linux/inet.h>
30 #include <linux/netdevice.h>
31 #include <linux/inetdevice.h>
32 #include <linux/proc_fs.h>
33 #include <linux/seq_file.h>
34 #include <linux/init.h>
35 #include <linux/slab.h>
36 #include <linux/compat.h>
37 #include <net/protocol.h>
38 #include <linux/skbuff.h>
39 #include <net/sock.h>
40 #include <net/raw.h>
41 #include <linux/notifier.h>
42 #include <linux/if_arp.h>
43 #include <net/checksum.h>
44 #include <net/netlink.h>
45 #include <net/fib_rules.h>
46 
47 #include <net/ipv6.h>
48 #include <net/ip6_route.h>
49 #include <linux/mroute6.h>
50 #include <linux/pim.h>
51 #include <net/addrconf.h>
52 #include <linux/netfilter_ipv6.h>
53 #include <linux/export.h>
54 #include <net/ip6_checksum.h>
55 #include <linux/netconf.h>
56 
57 struct mr6_table {
58 	struct list_head	list;
59 	possible_net_t		net;
60 	u32			id;
61 	struct sock		*mroute6_sk;
62 	struct timer_list	ipmr_expire_timer;
63 	struct list_head	mfc6_unres_queue;
64 	struct list_head	mfc6_cache_array[MFC6_LINES];
65 	struct mif_device	vif6_table[MAXMIFS];
66 	int			maxvif;
67 	atomic_t		cache_resolve_queue_len;
68 	bool			mroute_do_assert;
69 	bool			mroute_do_pim;
70 #ifdef CONFIG_IPV6_PIMSM_V2
71 	int			mroute_reg_vif_num;
72 #endif
73 };
74 
75 struct ip6mr_rule {
76 	struct fib_rule		common;
77 };
78 
79 struct ip6mr_result {
80 	struct mr6_table	*mrt;
81 };
82 
83 /* Big lock, protecting vif table, mrt cache and mroute socket state.
84    Note that the changes are semaphored via rtnl_lock.
85  */
86 
87 static DEFINE_RWLOCK(mrt_lock);
88 
89 /*
90  *	Multicast router control variables
91  */
92 
93 #define MIF_EXISTS(_mrt, _idx) ((_mrt)->vif6_table[_idx].dev != NULL)
94 
95 /* Special spinlock for queue of unresolved entries */
96 static DEFINE_SPINLOCK(mfc_unres_lock);
97 
98 /* We return to original Alan's scheme. Hash table of resolved
99    entries is changed only in process context and protected
100    with weak lock mrt_lock. Queue of unresolved entries is protected
101    with strong spinlock mfc_unres_lock.
102 
103    In this case data path is free of exclusive locks at all.
104  */
105 
106 static struct kmem_cache *mrt_cachep __read_mostly;
107 
108 static struct mr6_table *ip6mr_new_table(struct net *net, u32 id);
109 static void ip6mr_free_table(struct mr6_table *mrt);
110 
111 static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
112 			   struct sk_buff *skb, struct mfc6_cache *cache);
113 static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
114 			      mifi_t mifi, int assert);
115 static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
116 			       struct mfc6_cache *c, struct rtmsg *rtm);
117 static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
118 			      int cmd);
119 static int ip6mr_rtm_dumproute(struct sk_buff *skb,
120 			       struct netlink_callback *cb);
121 static void mroute_clean_tables(struct mr6_table *mrt, bool all);
122 static void ipmr_expire_process(unsigned long arg);
123 
124 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
125 #define ip6mr_for_each_table(mrt, net) \
126 	list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
127 
128 static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
129 {
130 	struct mr6_table *mrt;
131 
132 	ip6mr_for_each_table(mrt, net) {
133 		if (mrt->id == id)
134 			return mrt;
135 	}
136 	return NULL;
137 }
138 
139 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
140 			    struct mr6_table **mrt)
141 {
142 	int err;
143 	struct ip6mr_result res;
144 	struct fib_lookup_arg arg = {
145 		.result = &res,
146 		.flags = FIB_LOOKUP_NOREF,
147 	};
148 
149 	err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
150 			       flowi6_to_flowi(flp6), 0, &arg);
151 	if (err < 0)
152 		return err;
153 	*mrt = res.mrt;
154 	return 0;
155 }
156 
157 static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
158 			     int flags, struct fib_lookup_arg *arg)
159 {
160 	struct ip6mr_result *res = arg->result;
161 	struct mr6_table *mrt;
162 
163 	switch (rule->action) {
164 	case FR_ACT_TO_TBL:
165 		break;
166 	case FR_ACT_UNREACHABLE:
167 		return -ENETUNREACH;
168 	case FR_ACT_PROHIBIT:
169 		return -EACCES;
170 	case FR_ACT_BLACKHOLE:
171 	default:
172 		return -EINVAL;
173 	}
174 
175 	mrt = ip6mr_get_table(rule->fr_net, rule->table);
176 	if (!mrt)
177 		return -EAGAIN;
178 	res->mrt = mrt;
179 	return 0;
180 }
181 
182 static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
183 {
184 	return 1;
185 }
186 
187 static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = {
188 	FRA_GENERIC_POLICY,
189 };
190 
191 static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
192 				struct fib_rule_hdr *frh, struct nlattr **tb)
193 {
194 	return 0;
195 }
196 
197 static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
198 			      struct nlattr **tb)
199 {
200 	return 1;
201 }
202 
203 static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
204 			   struct fib_rule_hdr *frh)
205 {
206 	frh->dst_len = 0;
207 	frh->src_len = 0;
208 	frh->tos     = 0;
209 	return 0;
210 }
211 
212 static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
213 	.family		= RTNL_FAMILY_IP6MR,
214 	.rule_size	= sizeof(struct ip6mr_rule),
215 	.addr_size	= sizeof(struct in6_addr),
216 	.action		= ip6mr_rule_action,
217 	.match		= ip6mr_rule_match,
218 	.configure	= ip6mr_rule_configure,
219 	.compare	= ip6mr_rule_compare,
220 	.fill		= ip6mr_rule_fill,
221 	.nlgroup	= RTNLGRP_IPV6_RULE,
222 	.policy		= ip6mr_rule_policy,
223 	.owner		= THIS_MODULE,
224 };
225 
226 static int __net_init ip6mr_rules_init(struct net *net)
227 {
228 	struct fib_rules_ops *ops;
229 	struct mr6_table *mrt;
230 	int err;
231 
232 	ops = fib_rules_register(&ip6mr_rules_ops_template, net);
233 	if (IS_ERR(ops))
234 		return PTR_ERR(ops);
235 
236 	INIT_LIST_HEAD(&net->ipv6.mr6_tables);
237 
238 	mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
239 	if (!mrt) {
240 		err = -ENOMEM;
241 		goto err1;
242 	}
243 
244 	err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
245 	if (err < 0)
246 		goto err2;
247 
248 	net->ipv6.mr6_rules_ops = ops;
249 	return 0;
250 
251 err2:
252 	ip6mr_free_table(mrt);
253 err1:
254 	fib_rules_unregister(ops);
255 	return err;
256 }
257 
258 static void __net_exit ip6mr_rules_exit(struct net *net)
259 {
260 	struct mr6_table *mrt, *next;
261 
262 	rtnl_lock();
263 	list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
264 		list_del(&mrt->list);
265 		ip6mr_free_table(mrt);
266 	}
267 	fib_rules_unregister(net->ipv6.mr6_rules_ops);
268 	rtnl_unlock();
269 }
270 #else
271 #define ip6mr_for_each_table(mrt, net) \
272 	for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
273 
274 static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
275 {
276 	return net->ipv6.mrt6;
277 }
278 
279 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
280 			    struct mr6_table **mrt)
281 {
282 	*mrt = net->ipv6.mrt6;
283 	return 0;
284 }
285 
286 static int __net_init ip6mr_rules_init(struct net *net)
287 {
288 	net->ipv6.mrt6 = ip6mr_new_table(net, RT6_TABLE_DFLT);
289 	return net->ipv6.mrt6 ? 0 : -ENOMEM;
290 }
291 
292 static void __net_exit ip6mr_rules_exit(struct net *net)
293 {
294 	rtnl_lock();
295 	ip6mr_free_table(net->ipv6.mrt6);
296 	net->ipv6.mrt6 = NULL;
297 	rtnl_unlock();
298 }
299 #endif
300 
301 static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
302 {
303 	struct mr6_table *mrt;
304 	unsigned int i;
305 
306 	mrt = ip6mr_get_table(net, id);
307 	if (mrt)
308 		return mrt;
309 
310 	mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
311 	if (!mrt)
312 		return NULL;
313 	mrt->id = id;
314 	write_pnet(&mrt->net, net);
315 
316 	/* Forwarding cache */
317 	for (i = 0; i < MFC6_LINES; i++)
318 		INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]);
319 
320 	INIT_LIST_HEAD(&mrt->mfc6_unres_queue);
321 
322 	setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
323 		    (unsigned long)mrt);
324 
325 #ifdef CONFIG_IPV6_PIMSM_V2
326 	mrt->mroute_reg_vif_num = -1;
327 #endif
328 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
329 	list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
330 #endif
331 	return mrt;
332 }
333 
334 static void ip6mr_free_table(struct mr6_table *mrt)
335 {
336 	del_timer_sync(&mrt->ipmr_expire_timer);
337 	mroute_clean_tables(mrt, true);
338 	kfree(mrt);
339 }
340 
341 #ifdef CONFIG_PROC_FS
342 
343 struct ipmr_mfc_iter {
344 	struct seq_net_private p;
345 	struct mr6_table *mrt;
346 	struct list_head *cache;
347 	int ct;
348 };
349 
350 
351 static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
352 					   struct ipmr_mfc_iter *it, loff_t pos)
353 {
354 	struct mr6_table *mrt = it->mrt;
355 	struct mfc6_cache *mfc;
356 
357 	read_lock(&mrt_lock);
358 	for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) {
359 		it->cache = &mrt->mfc6_cache_array[it->ct];
360 		list_for_each_entry(mfc, it->cache, list)
361 			if (pos-- == 0)
362 				return mfc;
363 	}
364 	read_unlock(&mrt_lock);
365 
366 	spin_lock_bh(&mfc_unres_lock);
367 	it->cache = &mrt->mfc6_unres_queue;
368 	list_for_each_entry(mfc, it->cache, list)
369 		if (pos-- == 0)
370 			return mfc;
371 	spin_unlock_bh(&mfc_unres_lock);
372 
373 	it->cache = NULL;
374 	return NULL;
375 }
376 
377 /*
378  *	The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
379  */
380 
381 struct ipmr_vif_iter {
382 	struct seq_net_private p;
383 	struct mr6_table *mrt;
384 	int ct;
385 };
386 
387 static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
388 					    struct ipmr_vif_iter *iter,
389 					    loff_t pos)
390 {
391 	struct mr6_table *mrt = iter->mrt;
392 
393 	for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
394 		if (!MIF_EXISTS(mrt, iter->ct))
395 			continue;
396 		if (pos-- == 0)
397 			return &mrt->vif6_table[iter->ct];
398 	}
399 	return NULL;
400 }
401 
402 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
403 	__acquires(mrt_lock)
404 {
405 	struct ipmr_vif_iter *iter = seq->private;
406 	struct net *net = seq_file_net(seq);
407 	struct mr6_table *mrt;
408 
409 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
410 	if (!mrt)
411 		return ERR_PTR(-ENOENT);
412 
413 	iter->mrt = mrt;
414 
415 	read_lock(&mrt_lock);
416 	return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1)
417 		: SEQ_START_TOKEN;
418 }
419 
420 static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
421 {
422 	struct ipmr_vif_iter *iter = seq->private;
423 	struct net *net = seq_file_net(seq);
424 	struct mr6_table *mrt = iter->mrt;
425 
426 	++*pos;
427 	if (v == SEQ_START_TOKEN)
428 		return ip6mr_vif_seq_idx(net, iter, 0);
429 
430 	while (++iter->ct < mrt->maxvif) {
431 		if (!MIF_EXISTS(mrt, iter->ct))
432 			continue;
433 		return &mrt->vif6_table[iter->ct];
434 	}
435 	return NULL;
436 }
437 
438 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
439 	__releases(mrt_lock)
440 {
441 	read_unlock(&mrt_lock);
442 }
443 
444 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
445 {
446 	struct ipmr_vif_iter *iter = seq->private;
447 	struct mr6_table *mrt = iter->mrt;
448 
449 	if (v == SEQ_START_TOKEN) {
450 		seq_puts(seq,
451 			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
452 	} else {
453 		const struct mif_device *vif = v;
454 		const char *name = vif->dev ? vif->dev->name : "none";
455 
456 		seq_printf(seq,
457 			   "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
458 			   vif - mrt->vif6_table,
459 			   name, vif->bytes_in, vif->pkt_in,
460 			   vif->bytes_out, vif->pkt_out,
461 			   vif->flags);
462 	}
463 	return 0;
464 }
465 
466 static const struct seq_operations ip6mr_vif_seq_ops = {
467 	.start = ip6mr_vif_seq_start,
468 	.next  = ip6mr_vif_seq_next,
469 	.stop  = ip6mr_vif_seq_stop,
470 	.show  = ip6mr_vif_seq_show,
471 };
472 
473 static int ip6mr_vif_open(struct inode *inode, struct file *file)
474 {
475 	return seq_open_net(inode, file, &ip6mr_vif_seq_ops,
476 			    sizeof(struct ipmr_vif_iter));
477 }
478 
479 static const struct file_operations ip6mr_vif_fops = {
480 	.owner	 = THIS_MODULE,
481 	.open    = ip6mr_vif_open,
482 	.read    = seq_read,
483 	.llseek  = seq_lseek,
484 	.release = seq_release_net,
485 };
486 
487 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
488 {
489 	struct ipmr_mfc_iter *it = seq->private;
490 	struct net *net = seq_file_net(seq);
491 	struct mr6_table *mrt;
492 
493 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
494 	if (!mrt)
495 		return ERR_PTR(-ENOENT);
496 
497 	it->mrt = mrt;
498 	return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
499 		: SEQ_START_TOKEN;
500 }
501 
502 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
503 {
504 	struct mfc6_cache *mfc = v;
505 	struct ipmr_mfc_iter *it = seq->private;
506 	struct net *net = seq_file_net(seq);
507 	struct mr6_table *mrt = it->mrt;
508 
509 	++*pos;
510 
511 	if (v == SEQ_START_TOKEN)
512 		return ipmr_mfc_seq_idx(net, seq->private, 0);
513 
514 	if (mfc->list.next != it->cache)
515 		return list_entry(mfc->list.next, struct mfc6_cache, list);
516 
517 	if (it->cache == &mrt->mfc6_unres_queue)
518 		goto end_of_list;
519 
520 	BUG_ON(it->cache != &mrt->mfc6_cache_array[it->ct]);
521 
522 	while (++it->ct < MFC6_LINES) {
523 		it->cache = &mrt->mfc6_cache_array[it->ct];
524 		if (list_empty(it->cache))
525 			continue;
526 		return list_first_entry(it->cache, struct mfc6_cache, list);
527 	}
528 
529 	/* exhausted cache_array, show unresolved */
530 	read_unlock(&mrt_lock);
531 	it->cache = &mrt->mfc6_unres_queue;
532 	it->ct = 0;
533 
534 	spin_lock_bh(&mfc_unres_lock);
535 	if (!list_empty(it->cache))
536 		return list_first_entry(it->cache, struct mfc6_cache, list);
537 
538  end_of_list:
539 	spin_unlock_bh(&mfc_unres_lock);
540 	it->cache = NULL;
541 
542 	return NULL;
543 }
544 
545 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
546 {
547 	struct ipmr_mfc_iter *it = seq->private;
548 	struct mr6_table *mrt = it->mrt;
549 
550 	if (it->cache == &mrt->mfc6_unres_queue)
551 		spin_unlock_bh(&mfc_unres_lock);
552 	else if (it->cache == &mrt->mfc6_cache_array[it->ct])
553 		read_unlock(&mrt_lock);
554 }
555 
556 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
557 {
558 	int n;
559 
560 	if (v == SEQ_START_TOKEN) {
561 		seq_puts(seq,
562 			 "Group                            "
563 			 "Origin                           "
564 			 "Iif      Pkts  Bytes     Wrong  Oifs\n");
565 	} else {
566 		const struct mfc6_cache *mfc = v;
567 		const struct ipmr_mfc_iter *it = seq->private;
568 		struct mr6_table *mrt = it->mrt;
569 
570 		seq_printf(seq, "%pI6 %pI6 %-3hd",
571 			   &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
572 			   mfc->mf6c_parent);
573 
574 		if (it->cache != &mrt->mfc6_unres_queue) {
575 			seq_printf(seq, " %8lu %8lu %8lu",
576 				   mfc->mfc_un.res.pkt,
577 				   mfc->mfc_un.res.bytes,
578 				   mfc->mfc_un.res.wrong_if);
579 			for (n = mfc->mfc_un.res.minvif;
580 			     n < mfc->mfc_un.res.maxvif; n++) {
581 				if (MIF_EXISTS(mrt, n) &&
582 				    mfc->mfc_un.res.ttls[n] < 255)
583 					seq_printf(seq,
584 						   " %2d:%-3d",
585 						   n, mfc->mfc_un.res.ttls[n]);
586 			}
587 		} else {
588 			/* unresolved mfc_caches don't contain
589 			 * pkt, bytes and wrong_if values
590 			 */
591 			seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
592 		}
593 		seq_putc(seq, '\n');
594 	}
595 	return 0;
596 }
597 
598 static const struct seq_operations ipmr_mfc_seq_ops = {
599 	.start = ipmr_mfc_seq_start,
600 	.next  = ipmr_mfc_seq_next,
601 	.stop  = ipmr_mfc_seq_stop,
602 	.show  = ipmr_mfc_seq_show,
603 };
604 
605 static int ipmr_mfc_open(struct inode *inode, struct file *file)
606 {
607 	return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
608 			    sizeof(struct ipmr_mfc_iter));
609 }
610 
611 static const struct file_operations ip6mr_mfc_fops = {
612 	.owner	 = THIS_MODULE,
613 	.open    = ipmr_mfc_open,
614 	.read    = seq_read,
615 	.llseek  = seq_lseek,
616 	.release = seq_release_net,
617 };
618 #endif
619 
620 #ifdef CONFIG_IPV6_PIMSM_V2
621 
622 static int pim6_rcv(struct sk_buff *skb)
623 {
624 	struct pimreghdr *pim;
625 	struct ipv6hdr   *encap;
626 	struct net_device  *reg_dev = NULL;
627 	struct net *net = dev_net(skb->dev);
628 	struct mr6_table *mrt;
629 	struct flowi6 fl6 = {
630 		.flowi6_iif	= skb->dev->ifindex,
631 		.flowi6_mark	= skb->mark,
632 	};
633 	int reg_vif_num;
634 
635 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
636 		goto drop;
637 
638 	pim = (struct pimreghdr *)skb_transport_header(skb);
639 	if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
640 	    (pim->flags & PIM_NULL_REGISTER) ||
641 	    (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
642 			     sizeof(*pim), IPPROTO_PIM,
643 			     csum_partial((void *)pim, sizeof(*pim), 0)) &&
644 	     csum_fold(skb_checksum(skb, 0, skb->len, 0))))
645 		goto drop;
646 
647 	/* check if the inner packet is destined to mcast group */
648 	encap = (struct ipv6hdr *)(skb_transport_header(skb) +
649 				   sizeof(*pim));
650 
651 	if (!ipv6_addr_is_multicast(&encap->daddr) ||
652 	    encap->payload_len == 0 ||
653 	    ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
654 		goto drop;
655 
656 	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
657 		goto drop;
658 	reg_vif_num = mrt->mroute_reg_vif_num;
659 
660 	read_lock(&mrt_lock);
661 	if (reg_vif_num >= 0)
662 		reg_dev = mrt->vif6_table[reg_vif_num].dev;
663 	if (reg_dev)
664 		dev_hold(reg_dev);
665 	read_unlock(&mrt_lock);
666 
667 	if (!reg_dev)
668 		goto drop;
669 
670 	skb->mac_header = skb->network_header;
671 	skb_pull(skb, (u8 *)encap - skb->data);
672 	skb_reset_network_header(skb);
673 	skb->protocol = htons(ETH_P_IPV6);
674 	skb->ip_summed = CHECKSUM_NONE;
675 
676 	skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
677 
678 	netif_rx(skb);
679 
680 	dev_put(reg_dev);
681 	return 0;
682  drop:
683 	kfree_skb(skb);
684 	return 0;
685 }
686 
687 static const struct inet6_protocol pim6_protocol = {
688 	.handler	=	pim6_rcv,
689 };
690 
691 /* Service routines creating virtual interfaces: PIMREG */
692 
693 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
694 				      struct net_device *dev)
695 {
696 	struct net *net = dev_net(dev);
697 	struct mr6_table *mrt;
698 	struct flowi6 fl6 = {
699 		.flowi6_oif	= dev->ifindex,
700 		.flowi6_iif	= skb->skb_iif ? : LOOPBACK_IFINDEX,
701 		.flowi6_mark	= skb->mark,
702 	};
703 	int err;
704 
705 	err = ip6mr_fib_lookup(net, &fl6, &mrt);
706 	if (err < 0) {
707 		kfree_skb(skb);
708 		return err;
709 	}
710 
711 	read_lock(&mrt_lock);
712 	dev->stats.tx_bytes += skb->len;
713 	dev->stats.tx_packets++;
714 	ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
715 	read_unlock(&mrt_lock);
716 	kfree_skb(skb);
717 	return NETDEV_TX_OK;
718 }
719 
720 static int reg_vif_get_iflink(const struct net_device *dev)
721 {
722 	return 0;
723 }
724 
725 static const struct net_device_ops reg_vif_netdev_ops = {
726 	.ndo_start_xmit	= reg_vif_xmit,
727 	.ndo_get_iflink = reg_vif_get_iflink,
728 };
729 
730 static void reg_vif_setup(struct net_device *dev)
731 {
732 	dev->type		= ARPHRD_PIMREG;
733 	dev->mtu		= 1500 - sizeof(struct ipv6hdr) - 8;
734 	dev->flags		= IFF_NOARP;
735 	dev->netdev_ops		= &reg_vif_netdev_ops;
736 	dev->destructor		= free_netdev;
737 	dev->features		|= NETIF_F_NETNS_LOCAL;
738 }
739 
740 static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt)
741 {
742 	struct net_device *dev;
743 	char name[IFNAMSIZ];
744 
745 	if (mrt->id == RT6_TABLE_DFLT)
746 		sprintf(name, "pim6reg");
747 	else
748 		sprintf(name, "pim6reg%u", mrt->id);
749 
750 	dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup);
751 	if (!dev)
752 		return NULL;
753 
754 	dev_net_set(dev, net);
755 
756 	if (register_netdevice(dev)) {
757 		free_netdev(dev);
758 		return NULL;
759 	}
760 
761 	if (dev_open(dev))
762 		goto failure;
763 
764 	dev_hold(dev);
765 	return dev;
766 
767 failure:
768 	unregister_netdevice(dev);
769 	return NULL;
770 }
771 #endif
772 
773 /*
774  *	Delete a VIF entry
775  */
776 
777 static int mif6_delete(struct mr6_table *mrt, int vifi, struct list_head *head)
778 {
779 	struct mif_device *v;
780 	struct net_device *dev;
781 	struct inet6_dev *in6_dev;
782 
783 	if (vifi < 0 || vifi >= mrt->maxvif)
784 		return -EADDRNOTAVAIL;
785 
786 	v = &mrt->vif6_table[vifi];
787 
788 	write_lock_bh(&mrt_lock);
789 	dev = v->dev;
790 	v->dev = NULL;
791 
792 	if (!dev) {
793 		write_unlock_bh(&mrt_lock);
794 		return -EADDRNOTAVAIL;
795 	}
796 
797 #ifdef CONFIG_IPV6_PIMSM_V2
798 	if (vifi == mrt->mroute_reg_vif_num)
799 		mrt->mroute_reg_vif_num = -1;
800 #endif
801 
802 	if (vifi + 1 == mrt->maxvif) {
803 		int tmp;
804 		for (tmp = vifi - 1; tmp >= 0; tmp--) {
805 			if (MIF_EXISTS(mrt, tmp))
806 				break;
807 		}
808 		mrt->maxvif = tmp + 1;
809 	}
810 
811 	write_unlock_bh(&mrt_lock);
812 
813 	dev_set_allmulti(dev, -1);
814 
815 	in6_dev = __in6_dev_get(dev);
816 	if (in6_dev) {
817 		in6_dev->cnf.mc_forwarding--;
818 		inet6_netconf_notify_devconf(dev_net(dev),
819 					     NETCONFA_MC_FORWARDING,
820 					     dev->ifindex, &in6_dev->cnf);
821 	}
822 
823 	if (v->flags & MIFF_REGISTER)
824 		unregister_netdevice_queue(dev, head);
825 
826 	dev_put(dev);
827 	return 0;
828 }
829 
830 static inline void ip6mr_cache_free(struct mfc6_cache *c)
831 {
832 	kmem_cache_free(mrt_cachep, c);
833 }
834 
835 /* Destroy an unresolved cache entry, killing queued skbs
836    and reporting error to netlink readers.
837  */
838 
839 static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
840 {
841 	struct net *net = read_pnet(&mrt->net);
842 	struct sk_buff *skb;
843 
844 	atomic_dec(&mrt->cache_resolve_queue_len);
845 
846 	while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
847 		if (ipv6_hdr(skb)->version == 0) {
848 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
849 			nlh->nlmsg_type = NLMSG_ERROR;
850 			nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
851 			skb_trim(skb, nlh->nlmsg_len);
852 			((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT;
853 			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
854 		} else
855 			kfree_skb(skb);
856 	}
857 
858 	ip6mr_cache_free(c);
859 }
860 
861 
862 /* Timer process for all the unresolved queue. */
863 
864 static void ipmr_do_expire_process(struct mr6_table *mrt)
865 {
866 	unsigned long now = jiffies;
867 	unsigned long expires = 10 * HZ;
868 	struct mfc6_cache *c, *next;
869 
870 	list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
871 		if (time_after(c->mfc_un.unres.expires, now)) {
872 			/* not yet... */
873 			unsigned long interval = c->mfc_un.unres.expires - now;
874 			if (interval < expires)
875 				expires = interval;
876 			continue;
877 		}
878 
879 		list_del(&c->list);
880 		mr6_netlink_event(mrt, c, RTM_DELROUTE);
881 		ip6mr_destroy_unres(mrt, c);
882 	}
883 
884 	if (!list_empty(&mrt->mfc6_unres_queue))
885 		mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
886 }
887 
888 static void ipmr_expire_process(unsigned long arg)
889 {
890 	struct mr6_table *mrt = (struct mr6_table *)arg;
891 
892 	if (!spin_trylock(&mfc_unres_lock)) {
893 		mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
894 		return;
895 	}
896 
897 	if (!list_empty(&mrt->mfc6_unres_queue))
898 		ipmr_do_expire_process(mrt);
899 
900 	spin_unlock(&mfc_unres_lock);
901 }
902 
903 /* Fill oifs list. It is called under write locked mrt_lock. */
904 
905 static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *cache,
906 				    unsigned char *ttls)
907 {
908 	int vifi;
909 
910 	cache->mfc_un.res.minvif = MAXMIFS;
911 	cache->mfc_un.res.maxvif = 0;
912 	memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
913 
914 	for (vifi = 0; vifi < mrt->maxvif; vifi++) {
915 		if (MIF_EXISTS(mrt, vifi) &&
916 		    ttls[vifi] && ttls[vifi] < 255) {
917 			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
918 			if (cache->mfc_un.res.minvif > vifi)
919 				cache->mfc_un.res.minvif = vifi;
920 			if (cache->mfc_un.res.maxvif <= vifi)
921 				cache->mfc_un.res.maxvif = vifi + 1;
922 		}
923 	}
924 }
925 
926 static int mif6_add(struct net *net, struct mr6_table *mrt,
927 		    struct mif6ctl *vifc, int mrtsock)
928 {
929 	int vifi = vifc->mif6c_mifi;
930 	struct mif_device *v = &mrt->vif6_table[vifi];
931 	struct net_device *dev;
932 	struct inet6_dev *in6_dev;
933 	int err;
934 
935 	/* Is vif busy ? */
936 	if (MIF_EXISTS(mrt, vifi))
937 		return -EADDRINUSE;
938 
939 	switch (vifc->mif6c_flags) {
940 #ifdef CONFIG_IPV6_PIMSM_V2
941 	case MIFF_REGISTER:
942 		/*
943 		 * Special Purpose VIF in PIM
944 		 * All the packets will be sent to the daemon
945 		 */
946 		if (mrt->mroute_reg_vif_num >= 0)
947 			return -EADDRINUSE;
948 		dev = ip6mr_reg_vif(net, mrt);
949 		if (!dev)
950 			return -ENOBUFS;
951 		err = dev_set_allmulti(dev, 1);
952 		if (err) {
953 			unregister_netdevice(dev);
954 			dev_put(dev);
955 			return err;
956 		}
957 		break;
958 #endif
959 	case 0:
960 		dev = dev_get_by_index(net, vifc->mif6c_pifi);
961 		if (!dev)
962 			return -EADDRNOTAVAIL;
963 		err = dev_set_allmulti(dev, 1);
964 		if (err) {
965 			dev_put(dev);
966 			return err;
967 		}
968 		break;
969 	default:
970 		return -EINVAL;
971 	}
972 
973 	in6_dev = __in6_dev_get(dev);
974 	if (in6_dev) {
975 		in6_dev->cnf.mc_forwarding++;
976 		inet6_netconf_notify_devconf(dev_net(dev),
977 					     NETCONFA_MC_FORWARDING,
978 					     dev->ifindex, &in6_dev->cnf);
979 	}
980 
981 	/*
982 	 *	Fill in the VIF structures
983 	 */
984 	v->rate_limit = vifc->vifc_rate_limit;
985 	v->flags = vifc->mif6c_flags;
986 	if (!mrtsock)
987 		v->flags |= VIFF_STATIC;
988 	v->threshold = vifc->vifc_threshold;
989 	v->bytes_in = 0;
990 	v->bytes_out = 0;
991 	v->pkt_in = 0;
992 	v->pkt_out = 0;
993 	v->link = dev->ifindex;
994 	if (v->flags & MIFF_REGISTER)
995 		v->link = dev_get_iflink(dev);
996 
997 	/* And finish update writing critical data */
998 	write_lock_bh(&mrt_lock);
999 	v->dev = dev;
1000 #ifdef CONFIG_IPV6_PIMSM_V2
1001 	if (v->flags & MIFF_REGISTER)
1002 		mrt->mroute_reg_vif_num = vifi;
1003 #endif
1004 	if (vifi + 1 > mrt->maxvif)
1005 		mrt->maxvif = vifi + 1;
1006 	write_unlock_bh(&mrt_lock);
1007 	return 0;
1008 }
1009 
1010 static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt,
1011 					   const struct in6_addr *origin,
1012 					   const struct in6_addr *mcastgrp)
1013 {
1014 	int line = MFC6_HASH(mcastgrp, origin);
1015 	struct mfc6_cache *c;
1016 
1017 	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
1018 		if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
1019 		    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
1020 			return c;
1021 	}
1022 	return NULL;
1023 }
1024 
1025 /* Look for a (*,*,oif) entry */
1026 static struct mfc6_cache *ip6mr_cache_find_any_parent(struct mr6_table *mrt,
1027 						      mifi_t mifi)
1028 {
1029 	int line = MFC6_HASH(&in6addr_any, &in6addr_any);
1030 	struct mfc6_cache *c;
1031 
1032 	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
1033 		if (ipv6_addr_any(&c->mf6c_origin) &&
1034 		    ipv6_addr_any(&c->mf6c_mcastgrp) &&
1035 		    (c->mfc_un.res.ttls[mifi] < 255))
1036 			return c;
1037 
1038 	return NULL;
1039 }
1040 
1041 /* Look for a (*,G) entry */
1042 static struct mfc6_cache *ip6mr_cache_find_any(struct mr6_table *mrt,
1043 					       struct in6_addr *mcastgrp,
1044 					       mifi_t mifi)
1045 {
1046 	int line = MFC6_HASH(mcastgrp, &in6addr_any);
1047 	struct mfc6_cache *c, *proxy;
1048 
1049 	if (ipv6_addr_any(mcastgrp))
1050 		goto skip;
1051 
1052 	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
1053 		if (ipv6_addr_any(&c->mf6c_origin) &&
1054 		    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp)) {
1055 			if (c->mfc_un.res.ttls[mifi] < 255)
1056 				return c;
1057 
1058 			/* It's ok if the mifi is part of the static tree */
1059 			proxy = ip6mr_cache_find_any_parent(mrt,
1060 							    c->mf6c_parent);
1061 			if (proxy && proxy->mfc_un.res.ttls[mifi] < 255)
1062 				return c;
1063 		}
1064 
1065 skip:
1066 	return ip6mr_cache_find_any_parent(mrt, mifi);
1067 }
1068 
1069 /*
1070  *	Allocate a multicast cache entry
1071  */
1072 static struct mfc6_cache *ip6mr_cache_alloc(void)
1073 {
1074 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
1075 	if (!c)
1076 		return NULL;
1077 	c->mfc_un.res.minvif = MAXMIFS;
1078 	return c;
1079 }
1080 
1081 static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
1082 {
1083 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
1084 	if (!c)
1085 		return NULL;
1086 	skb_queue_head_init(&c->mfc_un.unres.unresolved);
1087 	c->mfc_un.unres.expires = jiffies + 10 * HZ;
1088 	return c;
1089 }
1090 
1091 /*
1092  *	A cache entry has gone into a resolved state from queued
1093  */
1094 
1095 static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
1096 				struct mfc6_cache *uc, struct mfc6_cache *c)
1097 {
1098 	struct sk_buff *skb;
1099 
1100 	/*
1101 	 *	Play the pending entries through our router
1102 	 */
1103 
1104 	while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
1105 		if (ipv6_hdr(skb)->version == 0) {
1106 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
1107 
1108 			if (__ip6mr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) {
1109 				nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1110 			} else {
1111 				nlh->nlmsg_type = NLMSG_ERROR;
1112 				nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
1113 				skb_trim(skb, nlh->nlmsg_len);
1114 				((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE;
1115 			}
1116 			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
1117 		} else
1118 			ip6_mr_forward(net, mrt, skb, c);
1119 	}
1120 }
1121 
1122 /*
1123  *	Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
1124  *	expects the following bizarre scheme.
1125  *
1126  *	Called under mrt_lock.
1127  */
1128 
1129 static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
1130 			      mifi_t mifi, int assert)
1131 {
1132 	struct sk_buff *skb;
1133 	struct mrt6msg *msg;
1134 	int ret;
1135 
1136 #ifdef CONFIG_IPV6_PIMSM_V2
1137 	if (assert == MRT6MSG_WHOLEPKT)
1138 		skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
1139 						+sizeof(*msg));
1140 	else
1141 #endif
1142 		skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
1143 
1144 	if (!skb)
1145 		return -ENOBUFS;
1146 
1147 	/* I suppose that internal messages
1148 	 * do not require checksums */
1149 
1150 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1151 
1152 #ifdef CONFIG_IPV6_PIMSM_V2
1153 	if (assert == MRT6MSG_WHOLEPKT) {
1154 		/* Ugly, but we have no choice with this interface.
1155 		   Duplicate old header, fix length etc.
1156 		   And all this only to mangle msg->im6_msgtype and
1157 		   to set msg->im6_mbz to "mbz" :-)
1158 		 */
1159 		skb_push(skb, -skb_network_offset(pkt));
1160 
1161 		skb_push(skb, sizeof(*msg));
1162 		skb_reset_transport_header(skb);
1163 		msg = (struct mrt6msg *)skb_transport_header(skb);
1164 		msg->im6_mbz = 0;
1165 		msg->im6_msgtype = MRT6MSG_WHOLEPKT;
1166 		msg->im6_mif = mrt->mroute_reg_vif_num;
1167 		msg->im6_pad = 0;
1168 		msg->im6_src = ipv6_hdr(pkt)->saddr;
1169 		msg->im6_dst = ipv6_hdr(pkt)->daddr;
1170 
1171 		skb->ip_summed = CHECKSUM_UNNECESSARY;
1172 	} else
1173 #endif
1174 	{
1175 	/*
1176 	 *	Copy the IP header
1177 	 */
1178 
1179 	skb_put(skb, sizeof(struct ipv6hdr));
1180 	skb_reset_network_header(skb);
1181 	skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
1182 
1183 	/*
1184 	 *	Add our header
1185 	 */
1186 	skb_put(skb, sizeof(*msg));
1187 	skb_reset_transport_header(skb);
1188 	msg = (struct mrt6msg *)skb_transport_header(skb);
1189 
1190 	msg->im6_mbz = 0;
1191 	msg->im6_msgtype = assert;
1192 	msg->im6_mif = mifi;
1193 	msg->im6_pad = 0;
1194 	msg->im6_src = ipv6_hdr(pkt)->saddr;
1195 	msg->im6_dst = ipv6_hdr(pkt)->daddr;
1196 
1197 	skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1198 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1199 	}
1200 
1201 	if (!mrt->mroute6_sk) {
1202 		kfree_skb(skb);
1203 		return -EINVAL;
1204 	}
1205 
1206 	/*
1207 	 *	Deliver to user space multicast routing algorithms
1208 	 */
1209 	ret = sock_queue_rcv_skb(mrt->mroute6_sk, skb);
1210 	if (ret < 0) {
1211 		net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
1212 		kfree_skb(skb);
1213 	}
1214 
1215 	return ret;
1216 }
1217 
1218 /*
1219  *	Queue a packet for resolution. It gets locked cache entry!
1220  */
1221 
1222 static int
1223 ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
1224 {
1225 	bool found = false;
1226 	int err;
1227 	struct mfc6_cache *c;
1228 
1229 	spin_lock_bh(&mfc_unres_lock);
1230 	list_for_each_entry(c, &mrt->mfc6_unres_queue, list) {
1231 		if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1232 		    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
1233 			found = true;
1234 			break;
1235 		}
1236 	}
1237 
1238 	if (!found) {
1239 		/*
1240 		 *	Create a new entry if allowable
1241 		 */
1242 
1243 		if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
1244 		    (c = ip6mr_cache_alloc_unres()) == NULL) {
1245 			spin_unlock_bh(&mfc_unres_lock);
1246 
1247 			kfree_skb(skb);
1248 			return -ENOBUFS;
1249 		}
1250 
1251 		/*
1252 		 *	Fill in the new cache entry
1253 		 */
1254 		c->mf6c_parent = -1;
1255 		c->mf6c_origin = ipv6_hdr(skb)->saddr;
1256 		c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
1257 
1258 		/*
1259 		 *	Reflect first query at pim6sd
1260 		 */
1261 		err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1262 		if (err < 0) {
1263 			/* If the report failed throw the cache entry
1264 			   out - Brad Parker
1265 			 */
1266 			spin_unlock_bh(&mfc_unres_lock);
1267 
1268 			ip6mr_cache_free(c);
1269 			kfree_skb(skb);
1270 			return err;
1271 		}
1272 
1273 		atomic_inc(&mrt->cache_resolve_queue_len);
1274 		list_add(&c->list, &mrt->mfc6_unres_queue);
1275 		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1276 
1277 		ipmr_do_expire_process(mrt);
1278 	}
1279 
1280 	/*
1281 	 *	See if we can append the packet
1282 	 */
1283 	if (c->mfc_un.unres.unresolved.qlen > 3) {
1284 		kfree_skb(skb);
1285 		err = -ENOBUFS;
1286 	} else {
1287 		skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1288 		err = 0;
1289 	}
1290 
1291 	spin_unlock_bh(&mfc_unres_lock);
1292 	return err;
1293 }
1294 
1295 /*
1296  *	MFC6 cache manipulation by user space
1297  */
1298 
1299 static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc,
1300 			    int parent)
1301 {
1302 	int line;
1303 	struct mfc6_cache *c, *next;
1304 
1305 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1306 
1307 	list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) {
1308 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1309 		    ipv6_addr_equal(&c->mf6c_mcastgrp,
1310 				    &mfc->mf6cc_mcastgrp.sin6_addr) &&
1311 		    (parent == -1 || parent == c->mf6c_parent)) {
1312 			write_lock_bh(&mrt_lock);
1313 			list_del(&c->list);
1314 			write_unlock_bh(&mrt_lock);
1315 
1316 			mr6_netlink_event(mrt, c, RTM_DELROUTE);
1317 			ip6mr_cache_free(c);
1318 			return 0;
1319 		}
1320 	}
1321 	return -ENOENT;
1322 }
1323 
1324 static int ip6mr_device_event(struct notifier_block *this,
1325 			      unsigned long event, void *ptr)
1326 {
1327 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1328 	struct net *net = dev_net(dev);
1329 	struct mr6_table *mrt;
1330 	struct mif_device *v;
1331 	int ct;
1332 	LIST_HEAD(list);
1333 
1334 	if (event != NETDEV_UNREGISTER)
1335 		return NOTIFY_DONE;
1336 
1337 	ip6mr_for_each_table(mrt, net) {
1338 		v = &mrt->vif6_table[0];
1339 		for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1340 			if (v->dev == dev)
1341 				mif6_delete(mrt, ct, &list);
1342 		}
1343 	}
1344 	unregister_netdevice_many(&list);
1345 
1346 	return NOTIFY_DONE;
1347 }
1348 
1349 static struct notifier_block ip6_mr_notifier = {
1350 	.notifier_call = ip6mr_device_event
1351 };
1352 
1353 /*
1354  *	Setup for IP multicast routing
1355  */
1356 
1357 static int __net_init ip6mr_net_init(struct net *net)
1358 {
1359 	int err;
1360 
1361 	err = ip6mr_rules_init(net);
1362 	if (err < 0)
1363 		goto fail;
1364 
1365 #ifdef CONFIG_PROC_FS
1366 	err = -ENOMEM;
1367 	if (!proc_create("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_fops))
1368 		goto proc_vif_fail;
1369 	if (!proc_create("ip6_mr_cache", 0, net->proc_net, &ip6mr_mfc_fops))
1370 		goto proc_cache_fail;
1371 #endif
1372 
1373 	return 0;
1374 
1375 #ifdef CONFIG_PROC_FS
1376 proc_cache_fail:
1377 	remove_proc_entry("ip6_mr_vif", net->proc_net);
1378 proc_vif_fail:
1379 	ip6mr_rules_exit(net);
1380 #endif
1381 fail:
1382 	return err;
1383 }
1384 
1385 static void __net_exit ip6mr_net_exit(struct net *net)
1386 {
1387 #ifdef CONFIG_PROC_FS
1388 	remove_proc_entry("ip6_mr_cache", net->proc_net);
1389 	remove_proc_entry("ip6_mr_vif", net->proc_net);
1390 #endif
1391 	ip6mr_rules_exit(net);
1392 }
1393 
1394 static struct pernet_operations ip6mr_net_ops = {
1395 	.init = ip6mr_net_init,
1396 	.exit = ip6mr_net_exit,
1397 };
1398 
1399 int __init ip6_mr_init(void)
1400 {
1401 	int err;
1402 
1403 	mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1404 				       sizeof(struct mfc6_cache),
1405 				       0, SLAB_HWCACHE_ALIGN,
1406 				       NULL);
1407 	if (!mrt_cachep)
1408 		return -ENOMEM;
1409 
1410 	err = register_pernet_subsys(&ip6mr_net_ops);
1411 	if (err)
1412 		goto reg_pernet_fail;
1413 
1414 	err = register_netdevice_notifier(&ip6_mr_notifier);
1415 	if (err)
1416 		goto reg_notif_fail;
1417 #ifdef CONFIG_IPV6_PIMSM_V2
1418 	if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1419 		pr_err("%s: can't add PIM protocol\n", __func__);
1420 		err = -EAGAIN;
1421 		goto add_proto_fail;
1422 	}
1423 #endif
1424 	rtnl_register(RTNL_FAMILY_IP6MR, RTM_GETROUTE, NULL,
1425 		      ip6mr_rtm_dumproute, NULL);
1426 	return 0;
1427 #ifdef CONFIG_IPV6_PIMSM_V2
1428 add_proto_fail:
1429 	unregister_netdevice_notifier(&ip6_mr_notifier);
1430 #endif
1431 reg_notif_fail:
1432 	unregister_pernet_subsys(&ip6mr_net_ops);
1433 reg_pernet_fail:
1434 	kmem_cache_destroy(mrt_cachep);
1435 	return err;
1436 }
1437 
1438 void ip6_mr_cleanup(void)
1439 {
1440 	rtnl_unregister(RTNL_FAMILY_IP6MR, RTM_GETROUTE);
1441 #ifdef CONFIG_IPV6_PIMSM_V2
1442 	inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1443 #endif
1444 	unregister_netdevice_notifier(&ip6_mr_notifier);
1445 	unregister_pernet_subsys(&ip6mr_net_ops);
1446 	kmem_cache_destroy(mrt_cachep);
1447 }
1448 
1449 static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
1450 			 struct mf6cctl *mfc, int mrtsock, int parent)
1451 {
1452 	bool found = false;
1453 	int line;
1454 	struct mfc6_cache *uc, *c;
1455 	unsigned char ttls[MAXMIFS];
1456 	int i;
1457 
1458 	if (mfc->mf6cc_parent >= MAXMIFS)
1459 		return -ENFILE;
1460 
1461 	memset(ttls, 255, MAXMIFS);
1462 	for (i = 0; i < MAXMIFS; i++) {
1463 		if (IF_ISSET(i, &mfc->mf6cc_ifset))
1464 			ttls[i] = 1;
1465 
1466 	}
1467 
1468 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1469 
1470 	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
1471 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1472 		    ipv6_addr_equal(&c->mf6c_mcastgrp,
1473 				    &mfc->mf6cc_mcastgrp.sin6_addr) &&
1474 		    (parent == -1 || parent == mfc->mf6cc_parent)) {
1475 			found = true;
1476 			break;
1477 		}
1478 	}
1479 
1480 	if (found) {
1481 		write_lock_bh(&mrt_lock);
1482 		c->mf6c_parent = mfc->mf6cc_parent;
1483 		ip6mr_update_thresholds(mrt, c, ttls);
1484 		if (!mrtsock)
1485 			c->mfc_flags |= MFC_STATIC;
1486 		write_unlock_bh(&mrt_lock);
1487 		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1488 		return 0;
1489 	}
1490 
1491 	if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) &&
1492 	    !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1493 		return -EINVAL;
1494 
1495 	c = ip6mr_cache_alloc();
1496 	if (!c)
1497 		return -ENOMEM;
1498 
1499 	c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1500 	c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1501 	c->mf6c_parent = mfc->mf6cc_parent;
1502 	ip6mr_update_thresholds(mrt, c, ttls);
1503 	if (!mrtsock)
1504 		c->mfc_flags |= MFC_STATIC;
1505 
1506 	write_lock_bh(&mrt_lock);
1507 	list_add(&c->list, &mrt->mfc6_cache_array[line]);
1508 	write_unlock_bh(&mrt_lock);
1509 
1510 	/*
1511 	 *	Check to see if we resolved a queued list. If so we
1512 	 *	need to send on the frames and tidy up.
1513 	 */
1514 	found = false;
1515 	spin_lock_bh(&mfc_unres_lock);
1516 	list_for_each_entry(uc, &mrt->mfc6_unres_queue, list) {
1517 		if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1518 		    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1519 			list_del(&uc->list);
1520 			atomic_dec(&mrt->cache_resolve_queue_len);
1521 			found = true;
1522 			break;
1523 		}
1524 	}
1525 	if (list_empty(&mrt->mfc6_unres_queue))
1526 		del_timer(&mrt->ipmr_expire_timer);
1527 	spin_unlock_bh(&mfc_unres_lock);
1528 
1529 	if (found) {
1530 		ip6mr_cache_resolve(net, mrt, uc, c);
1531 		ip6mr_cache_free(uc);
1532 	}
1533 	mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1534 	return 0;
1535 }
1536 
1537 /*
1538  *	Close the multicast socket, and clear the vif tables etc
1539  */
1540 
1541 static void mroute_clean_tables(struct mr6_table *mrt, bool all)
1542 {
1543 	int i;
1544 	LIST_HEAD(list);
1545 	struct mfc6_cache *c, *next;
1546 
1547 	/*
1548 	 *	Shut down all active vif entries
1549 	 */
1550 	for (i = 0; i < mrt->maxvif; i++) {
1551 		if (!all && (mrt->vif6_table[i].flags & VIFF_STATIC))
1552 			continue;
1553 		mif6_delete(mrt, i, &list);
1554 	}
1555 	unregister_netdevice_many(&list);
1556 
1557 	/*
1558 	 *	Wipe the cache
1559 	 */
1560 	for (i = 0; i < MFC6_LINES; i++) {
1561 		list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) {
1562 			if (!all && (c->mfc_flags & MFC_STATIC))
1563 				continue;
1564 			write_lock_bh(&mrt_lock);
1565 			list_del(&c->list);
1566 			write_unlock_bh(&mrt_lock);
1567 
1568 			mr6_netlink_event(mrt, c, RTM_DELROUTE);
1569 			ip6mr_cache_free(c);
1570 		}
1571 	}
1572 
1573 	if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1574 		spin_lock_bh(&mfc_unres_lock);
1575 		list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
1576 			list_del(&c->list);
1577 			mr6_netlink_event(mrt, c, RTM_DELROUTE);
1578 			ip6mr_destroy_unres(mrt, c);
1579 		}
1580 		spin_unlock_bh(&mfc_unres_lock);
1581 	}
1582 }
1583 
1584 static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
1585 {
1586 	int err = 0;
1587 	struct net *net = sock_net(sk);
1588 
1589 	rtnl_lock();
1590 	write_lock_bh(&mrt_lock);
1591 	if (likely(mrt->mroute6_sk == NULL)) {
1592 		mrt->mroute6_sk = sk;
1593 		net->ipv6.devconf_all->mc_forwarding++;
1594 		inet6_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING,
1595 					     NETCONFA_IFINDEX_ALL,
1596 					     net->ipv6.devconf_all);
1597 	}
1598 	else
1599 		err = -EADDRINUSE;
1600 	write_unlock_bh(&mrt_lock);
1601 
1602 	rtnl_unlock();
1603 
1604 	return err;
1605 }
1606 
1607 int ip6mr_sk_done(struct sock *sk)
1608 {
1609 	int err = -EACCES;
1610 	struct net *net = sock_net(sk);
1611 	struct mr6_table *mrt;
1612 
1613 	rtnl_lock();
1614 	ip6mr_for_each_table(mrt, net) {
1615 		if (sk == mrt->mroute6_sk) {
1616 			write_lock_bh(&mrt_lock);
1617 			mrt->mroute6_sk = NULL;
1618 			net->ipv6.devconf_all->mc_forwarding--;
1619 			inet6_netconf_notify_devconf(net,
1620 						     NETCONFA_MC_FORWARDING,
1621 						     NETCONFA_IFINDEX_ALL,
1622 						     net->ipv6.devconf_all);
1623 			write_unlock_bh(&mrt_lock);
1624 
1625 			mroute_clean_tables(mrt, false);
1626 			err = 0;
1627 			break;
1628 		}
1629 	}
1630 	rtnl_unlock();
1631 
1632 	return err;
1633 }
1634 
1635 struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
1636 {
1637 	struct mr6_table *mrt;
1638 	struct flowi6 fl6 = {
1639 		.flowi6_iif	= skb->skb_iif ? : LOOPBACK_IFINDEX,
1640 		.flowi6_oif	= skb->dev->ifindex,
1641 		.flowi6_mark	= skb->mark,
1642 	};
1643 
1644 	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
1645 		return NULL;
1646 
1647 	return mrt->mroute6_sk;
1648 }
1649 
1650 /*
1651  *	Socket options and virtual interface manipulation. The whole
1652  *	virtual interface system is a complete heap, but unfortunately
1653  *	that's how BSD mrouted happens to think. Maybe one day with a proper
1654  *	MOSPF/PIM router set up we can clean this up.
1655  */
1656 
1657 int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1658 {
1659 	int ret, parent = 0;
1660 	struct mif6ctl vif;
1661 	struct mf6cctl mfc;
1662 	mifi_t mifi;
1663 	struct net *net = sock_net(sk);
1664 	struct mr6_table *mrt;
1665 
1666 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1667 	if (!mrt)
1668 		return -ENOENT;
1669 
1670 	if (optname != MRT6_INIT) {
1671 		if (sk != mrt->mroute6_sk && !ns_capable(net->user_ns, CAP_NET_ADMIN))
1672 			return -EACCES;
1673 	}
1674 
1675 	switch (optname) {
1676 	case MRT6_INIT:
1677 		if (sk->sk_type != SOCK_RAW ||
1678 		    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1679 			return -EOPNOTSUPP;
1680 		if (optlen < sizeof(int))
1681 			return -EINVAL;
1682 
1683 		return ip6mr_sk_init(mrt, sk);
1684 
1685 	case MRT6_DONE:
1686 		return ip6mr_sk_done(sk);
1687 
1688 	case MRT6_ADD_MIF:
1689 		if (optlen < sizeof(vif))
1690 			return -EINVAL;
1691 		if (copy_from_user(&vif, optval, sizeof(vif)))
1692 			return -EFAULT;
1693 		if (vif.mif6c_mifi >= MAXMIFS)
1694 			return -ENFILE;
1695 		rtnl_lock();
1696 		ret = mif6_add(net, mrt, &vif, sk == mrt->mroute6_sk);
1697 		rtnl_unlock();
1698 		return ret;
1699 
1700 	case MRT6_DEL_MIF:
1701 		if (optlen < sizeof(mifi_t))
1702 			return -EINVAL;
1703 		if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1704 			return -EFAULT;
1705 		rtnl_lock();
1706 		ret = mif6_delete(mrt, mifi, NULL);
1707 		rtnl_unlock();
1708 		return ret;
1709 
1710 	/*
1711 	 *	Manipulate the forwarding caches. These live
1712 	 *	in a sort of kernel/user symbiosis.
1713 	 */
1714 	case MRT6_ADD_MFC:
1715 	case MRT6_DEL_MFC:
1716 		parent = -1;
1717 	case MRT6_ADD_MFC_PROXY:
1718 	case MRT6_DEL_MFC_PROXY:
1719 		if (optlen < sizeof(mfc))
1720 			return -EINVAL;
1721 		if (copy_from_user(&mfc, optval, sizeof(mfc)))
1722 			return -EFAULT;
1723 		if (parent == 0)
1724 			parent = mfc.mf6cc_parent;
1725 		rtnl_lock();
1726 		if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY)
1727 			ret = ip6mr_mfc_delete(mrt, &mfc, parent);
1728 		else
1729 			ret = ip6mr_mfc_add(net, mrt, &mfc,
1730 					    sk == mrt->mroute6_sk, parent);
1731 		rtnl_unlock();
1732 		return ret;
1733 
1734 	/*
1735 	 *	Control PIM assert (to activate pim will activate assert)
1736 	 */
1737 	case MRT6_ASSERT:
1738 	{
1739 		int v;
1740 
1741 		if (optlen != sizeof(v))
1742 			return -EINVAL;
1743 		if (get_user(v, (int __user *)optval))
1744 			return -EFAULT;
1745 		mrt->mroute_do_assert = v;
1746 		return 0;
1747 	}
1748 
1749 #ifdef CONFIG_IPV6_PIMSM_V2
1750 	case MRT6_PIM:
1751 	{
1752 		int v;
1753 
1754 		if (optlen != sizeof(v))
1755 			return -EINVAL;
1756 		if (get_user(v, (int __user *)optval))
1757 			return -EFAULT;
1758 		v = !!v;
1759 		rtnl_lock();
1760 		ret = 0;
1761 		if (v != mrt->mroute_do_pim) {
1762 			mrt->mroute_do_pim = v;
1763 			mrt->mroute_do_assert = v;
1764 		}
1765 		rtnl_unlock();
1766 		return ret;
1767 	}
1768 
1769 #endif
1770 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1771 	case MRT6_TABLE:
1772 	{
1773 		u32 v;
1774 
1775 		if (optlen != sizeof(u32))
1776 			return -EINVAL;
1777 		if (get_user(v, (u32 __user *)optval))
1778 			return -EFAULT;
1779 		/* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
1780 		if (v != RT_TABLE_DEFAULT && v >= 100000000)
1781 			return -EINVAL;
1782 		if (sk == mrt->mroute6_sk)
1783 			return -EBUSY;
1784 
1785 		rtnl_lock();
1786 		ret = 0;
1787 		if (!ip6mr_new_table(net, v))
1788 			ret = -ENOMEM;
1789 		raw6_sk(sk)->ip6mr_table = v;
1790 		rtnl_unlock();
1791 		return ret;
1792 	}
1793 #endif
1794 	/*
1795 	 *	Spurious command, or MRT6_VERSION which you cannot
1796 	 *	set.
1797 	 */
1798 	default:
1799 		return -ENOPROTOOPT;
1800 	}
1801 }
1802 
1803 /*
1804  *	Getsock opt support for the multicast routing system.
1805  */
1806 
1807 int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1808 			  int __user *optlen)
1809 {
1810 	int olr;
1811 	int val;
1812 	struct net *net = sock_net(sk);
1813 	struct mr6_table *mrt;
1814 
1815 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1816 	if (!mrt)
1817 		return -ENOENT;
1818 
1819 	switch (optname) {
1820 	case MRT6_VERSION:
1821 		val = 0x0305;
1822 		break;
1823 #ifdef CONFIG_IPV6_PIMSM_V2
1824 	case MRT6_PIM:
1825 		val = mrt->mroute_do_pim;
1826 		break;
1827 #endif
1828 	case MRT6_ASSERT:
1829 		val = mrt->mroute_do_assert;
1830 		break;
1831 	default:
1832 		return -ENOPROTOOPT;
1833 	}
1834 
1835 	if (get_user(olr, optlen))
1836 		return -EFAULT;
1837 
1838 	olr = min_t(int, olr, sizeof(int));
1839 	if (olr < 0)
1840 		return -EINVAL;
1841 
1842 	if (put_user(olr, optlen))
1843 		return -EFAULT;
1844 	if (copy_to_user(optval, &val, olr))
1845 		return -EFAULT;
1846 	return 0;
1847 }
1848 
1849 /*
1850  *	The IP multicast ioctl support routines.
1851  */
1852 
1853 int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1854 {
1855 	struct sioc_sg_req6 sr;
1856 	struct sioc_mif_req6 vr;
1857 	struct mif_device *vif;
1858 	struct mfc6_cache *c;
1859 	struct net *net = sock_net(sk);
1860 	struct mr6_table *mrt;
1861 
1862 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1863 	if (!mrt)
1864 		return -ENOENT;
1865 
1866 	switch (cmd) {
1867 	case SIOCGETMIFCNT_IN6:
1868 		if (copy_from_user(&vr, arg, sizeof(vr)))
1869 			return -EFAULT;
1870 		if (vr.mifi >= mrt->maxvif)
1871 			return -EINVAL;
1872 		read_lock(&mrt_lock);
1873 		vif = &mrt->vif6_table[vr.mifi];
1874 		if (MIF_EXISTS(mrt, vr.mifi)) {
1875 			vr.icount = vif->pkt_in;
1876 			vr.ocount = vif->pkt_out;
1877 			vr.ibytes = vif->bytes_in;
1878 			vr.obytes = vif->bytes_out;
1879 			read_unlock(&mrt_lock);
1880 
1881 			if (copy_to_user(arg, &vr, sizeof(vr)))
1882 				return -EFAULT;
1883 			return 0;
1884 		}
1885 		read_unlock(&mrt_lock);
1886 		return -EADDRNOTAVAIL;
1887 	case SIOCGETSGCNT_IN6:
1888 		if (copy_from_user(&sr, arg, sizeof(sr)))
1889 			return -EFAULT;
1890 
1891 		read_lock(&mrt_lock);
1892 		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1893 		if (c) {
1894 			sr.pktcnt = c->mfc_un.res.pkt;
1895 			sr.bytecnt = c->mfc_un.res.bytes;
1896 			sr.wrong_if = c->mfc_un.res.wrong_if;
1897 			read_unlock(&mrt_lock);
1898 
1899 			if (copy_to_user(arg, &sr, sizeof(sr)))
1900 				return -EFAULT;
1901 			return 0;
1902 		}
1903 		read_unlock(&mrt_lock);
1904 		return -EADDRNOTAVAIL;
1905 	default:
1906 		return -ENOIOCTLCMD;
1907 	}
1908 }
1909 
1910 #ifdef CONFIG_COMPAT
1911 struct compat_sioc_sg_req6 {
1912 	struct sockaddr_in6 src;
1913 	struct sockaddr_in6 grp;
1914 	compat_ulong_t pktcnt;
1915 	compat_ulong_t bytecnt;
1916 	compat_ulong_t wrong_if;
1917 };
1918 
1919 struct compat_sioc_mif_req6 {
1920 	mifi_t	mifi;
1921 	compat_ulong_t icount;
1922 	compat_ulong_t ocount;
1923 	compat_ulong_t ibytes;
1924 	compat_ulong_t obytes;
1925 };
1926 
1927 int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1928 {
1929 	struct compat_sioc_sg_req6 sr;
1930 	struct compat_sioc_mif_req6 vr;
1931 	struct mif_device *vif;
1932 	struct mfc6_cache *c;
1933 	struct net *net = sock_net(sk);
1934 	struct mr6_table *mrt;
1935 
1936 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1937 	if (!mrt)
1938 		return -ENOENT;
1939 
1940 	switch (cmd) {
1941 	case SIOCGETMIFCNT_IN6:
1942 		if (copy_from_user(&vr, arg, sizeof(vr)))
1943 			return -EFAULT;
1944 		if (vr.mifi >= mrt->maxvif)
1945 			return -EINVAL;
1946 		read_lock(&mrt_lock);
1947 		vif = &mrt->vif6_table[vr.mifi];
1948 		if (MIF_EXISTS(mrt, vr.mifi)) {
1949 			vr.icount = vif->pkt_in;
1950 			vr.ocount = vif->pkt_out;
1951 			vr.ibytes = vif->bytes_in;
1952 			vr.obytes = vif->bytes_out;
1953 			read_unlock(&mrt_lock);
1954 
1955 			if (copy_to_user(arg, &vr, sizeof(vr)))
1956 				return -EFAULT;
1957 			return 0;
1958 		}
1959 		read_unlock(&mrt_lock);
1960 		return -EADDRNOTAVAIL;
1961 	case SIOCGETSGCNT_IN6:
1962 		if (copy_from_user(&sr, arg, sizeof(sr)))
1963 			return -EFAULT;
1964 
1965 		read_lock(&mrt_lock);
1966 		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1967 		if (c) {
1968 			sr.pktcnt = c->mfc_un.res.pkt;
1969 			sr.bytecnt = c->mfc_un.res.bytes;
1970 			sr.wrong_if = c->mfc_un.res.wrong_if;
1971 			read_unlock(&mrt_lock);
1972 
1973 			if (copy_to_user(arg, &sr, sizeof(sr)))
1974 				return -EFAULT;
1975 			return 0;
1976 		}
1977 		read_unlock(&mrt_lock);
1978 		return -EADDRNOTAVAIL;
1979 	default:
1980 		return -ENOIOCTLCMD;
1981 	}
1982 }
1983 #endif
1984 
1985 static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
1986 {
1987 	__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
1988 			IPSTATS_MIB_OUTFORWDATAGRAMS);
1989 	__IP6_ADD_STATS(net, ip6_dst_idev(skb_dst(skb)),
1990 			IPSTATS_MIB_OUTOCTETS, skb->len);
1991 	return dst_output(net, sk, skb);
1992 }
1993 
1994 /*
1995  *	Processing handlers for ip6mr_forward
1996  */
1997 
1998 static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
1999 			  struct sk_buff *skb, struct mfc6_cache *c, int vifi)
2000 {
2001 	struct ipv6hdr *ipv6h;
2002 	struct mif_device *vif = &mrt->vif6_table[vifi];
2003 	struct net_device *dev;
2004 	struct dst_entry *dst;
2005 	struct flowi6 fl6;
2006 
2007 	if (!vif->dev)
2008 		goto out_free;
2009 
2010 #ifdef CONFIG_IPV6_PIMSM_V2
2011 	if (vif->flags & MIFF_REGISTER) {
2012 		vif->pkt_out++;
2013 		vif->bytes_out += skb->len;
2014 		vif->dev->stats.tx_bytes += skb->len;
2015 		vif->dev->stats.tx_packets++;
2016 		ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
2017 		goto out_free;
2018 	}
2019 #endif
2020 
2021 	ipv6h = ipv6_hdr(skb);
2022 
2023 	fl6 = (struct flowi6) {
2024 		.flowi6_oif = vif->link,
2025 		.daddr = ipv6h->daddr,
2026 	};
2027 
2028 	dst = ip6_route_output(net, NULL, &fl6);
2029 	if (dst->error) {
2030 		dst_release(dst);
2031 		goto out_free;
2032 	}
2033 
2034 	skb_dst_drop(skb);
2035 	skb_dst_set(skb, dst);
2036 
2037 	/*
2038 	 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
2039 	 * not only before forwarding, but after forwarding on all output
2040 	 * interfaces. It is clear, if mrouter runs a multicasting
2041 	 * program, it should receive packets not depending to what interface
2042 	 * program is joined.
2043 	 * If we will not make it, the program will have to join on all
2044 	 * interfaces. On the other hand, multihoming host (or router, but
2045 	 * not mrouter) cannot join to more than one interface - it will
2046 	 * result in receiving multiple packets.
2047 	 */
2048 	dev = vif->dev;
2049 	skb->dev = dev;
2050 	vif->pkt_out++;
2051 	vif->bytes_out += skb->len;
2052 
2053 	/* We are about to write */
2054 	/* XXX: extension headers? */
2055 	if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
2056 		goto out_free;
2057 
2058 	ipv6h = ipv6_hdr(skb);
2059 	ipv6h->hop_limit--;
2060 
2061 	IP6CB(skb)->flags |= IP6SKB_FORWARDED;
2062 
2063 	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
2064 		       net, NULL, skb, skb->dev, dev,
2065 		       ip6mr_forward2_finish);
2066 
2067 out_free:
2068 	kfree_skb(skb);
2069 	return 0;
2070 }
2071 
2072 static int ip6mr_find_vif(struct mr6_table *mrt, struct net_device *dev)
2073 {
2074 	int ct;
2075 
2076 	for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
2077 		if (mrt->vif6_table[ct].dev == dev)
2078 			break;
2079 	}
2080 	return ct;
2081 }
2082 
2083 static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
2084 			   struct sk_buff *skb, struct mfc6_cache *cache)
2085 {
2086 	int psend = -1;
2087 	int vif, ct;
2088 	int true_vifi = ip6mr_find_vif(mrt, skb->dev);
2089 
2090 	vif = cache->mf6c_parent;
2091 	cache->mfc_un.res.pkt++;
2092 	cache->mfc_un.res.bytes += skb->len;
2093 
2094 	if (ipv6_addr_any(&cache->mf6c_origin) && true_vifi >= 0) {
2095 		struct mfc6_cache *cache_proxy;
2096 
2097 		/* For an (*,G) entry, we only check that the incoming
2098 		 * interface is part of the static tree.
2099 		 */
2100 		cache_proxy = ip6mr_cache_find_any_parent(mrt, vif);
2101 		if (cache_proxy &&
2102 		    cache_proxy->mfc_un.res.ttls[true_vifi] < 255)
2103 			goto forward;
2104 	}
2105 
2106 	/*
2107 	 * Wrong interface: drop packet and (maybe) send PIM assert.
2108 	 */
2109 	if (mrt->vif6_table[vif].dev != skb->dev) {
2110 		cache->mfc_un.res.wrong_if++;
2111 
2112 		if (true_vifi >= 0 && mrt->mroute_do_assert &&
2113 		    /* pimsm uses asserts, when switching from RPT to SPT,
2114 		       so that we cannot check that packet arrived on an oif.
2115 		       It is bad, but otherwise we would need to move pretty
2116 		       large chunk of pimd to kernel. Ough... --ANK
2117 		     */
2118 		    (mrt->mroute_do_pim ||
2119 		     cache->mfc_un.res.ttls[true_vifi] < 255) &&
2120 		    time_after(jiffies,
2121 			       cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
2122 			cache->mfc_un.res.last_assert = jiffies;
2123 			ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
2124 		}
2125 		goto dont_forward;
2126 	}
2127 
2128 forward:
2129 	mrt->vif6_table[vif].pkt_in++;
2130 	mrt->vif6_table[vif].bytes_in += skb->len;
2131 
2132 	/*
2133 	 *	Forward the frame
2134 	 */
2135 	if (ipv6_addr_any(&cache->mf6c_origin) &&
2136 	    ipv6_addr_any(&cache->mf6c_mcastgrp)) {
2137 		if (true_vifi >= 0 &&
2138 		    true_vifi != cache->mf6c_parent &&
2139 		    ipv6_hdr(skb)->hop_limit >
2140 				cache->mfc_un.res.ttls[cache->mf6c_parent]) {
2141 			/* It's an (*,*) entry and the packet is not coming from
2142 			 * the upstream: forward the packet to the upstream
2143 			 * only.
2144 			 */
2145 			psend = cache->mf6c_parent;
2146 			goto last_forward;
2147 		}
2148 		goto dont_forward;
2149 	}
2150 	for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
2151 		/* For (*,G) entry, don't forward to the incoming interface */
2152 		if ((!ipv6_addr_any(&cache->mf6c_origin) || ct != true_vifi) &&
2153 		    ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
2154 			if (psend != -1) {
2155 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2156 				if (skb2)
2157 					ip6mr_forward2(net, mrt, skb2, cache, psend);
2158 			}
2159 			psend = ct;
2160 		}
2161 	}
2162 last_forward:
2163 	if (psend != -1) {
2164 		ip6mr_forward2(net, mrt, skb, cache, psend);
2165 		return;
2166 	}
2167 
2168 dont_forward:
2169 	kfree_skb(skb);
2170 }
2171 
2172 
2173 /*
2174  *	Multicast packets for forwarding arrive here
2175  */
2176 
2177 int ip6_mr_input(struct sk_buff *skb)
2178 {
2179 	struct mfc6_cache *cache;
2180 	struct net *net = dev_net(skb->dev);
2181 	struct mr6_table *mrt;
2182 	struct flowi6 fl6 = {
2183 		.flowi6_iif	= skb->dev->ifindex,
2184 		.flowi6_mark	= skb->mark,
2185 	};
2186 	int err;
2187 
2188 	err = ip6mr_fib_lookup(net, &fl6, &mrt);
2189 	if (err < 0) {
2190 		kfree_skb(skb);
2191 		return err;
2192 	}
2193 
2194 	read_lock(&mrt_lock);
2195 	cache = ip6mr_cache_find(mrt,
2196 				 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2197 	if (!cache) {
2198 		int vif = ip6mr_find_vif(mrt, skb->dev);
2199 
2200 		if (vif >= 0)
2201 			cache = ip6mr_cache_find_any(mrt,
2202 						     &ipv6_hdr(skb)->daddr,
2203 						     vif);
2204 	}
2205 
2206 	/*
2207 	 *	No usable cache entry
2208 	 */
2209 	if (!cache) {
2210 		int vif;
2211 
2212 		vif = ip6mr_find_vif(mrt, skb->dev);
2213 		if (vif >= 0) {
2214 			int err = ip6mr_cache_unresolved(mrt, vif, skb);
2215 			read_unlock(&mrt_lock);
2216 
2217 			return err;
2218 		}
2219 		read_unlock(&mrt_lock);
2220 		kfree_skb(skb);
2221 		return -ENODEV;
2222 	}
2223 
2224 	ip6_mr_forward(net, mrt, skb, cache);
2225 
2226 	read_unlock(&mrt_lock);
2227 
2228 	return 0;
2229 }
2230 
2231 
2232 static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2233 			       struct mfc6_cache *c, struct rtmsg *rtm)
2234 {
2235 	int ct;
2236 	struct rtnexthop *nhp;
2237 	struct nlattr *mp_attr;
2238 	struct rta_mfc_stats mfcs;
2239 
2240 	/* If cache is unresolved, don't try to parse IIF and OIF */
2241 	if (c->mf6c_parent >= MAXMIFS)
2242 		return -ENOENT;
2243 
2244 	if (MIF_EXISTS(mrt, c->mf6c_parent) &&
2245 	    nla_put_u32(skb, RTA_IIF, mrt->vif6_table[c->mf6c_parent].dev->ifindex) < 0)
2246 		return -EMSGSIZE;
2247 	mp_attr = nla_nest_start(skb, RTA_MULTIPATH);
2248 	if (!mp_attr)
2249 		return -EMSGSIZE;
2250 
2251 	for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
2252 		if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
2253 			nhp = nla_reserve_nohdr(skb, sizeof(*nhp));
2254 			if (!nhp) {
2255 				nla_nest_cancel(skb, mp_attr);
2256 				return -EMSGSIZE;
2257 			}
2258 
2259 			nhp->rtnh_flags = 0;
2260 			nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
2261 			nhp->rtnh_ifindex = mrt->vif6_table[ct].dev->ifindex;
2262 			nhp->rtnh_len = sizeof(*nhp);
2263 		}
2264 	}
2265 
2266 	nla_nest_end(skb, mp_attr);
2267 
2268 	mfcs.mfcs_packets = c->mfc_un.res.pkt;
2269 	mfcs.mfcs_bytes = c->mfc_un.res.bytes;
2270 	mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
2271 	if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) < 0)
2272 		return -EMSGSIZE;
2273 
2274 	rtm->rtm_type = RTN_MULTICAST;
2275 	return 1;
2276 }
2277 
2278 int ip6mr_get_route(struct net *net,
2279 		    struct sk_buff *skb, struct rtmsg *rtm, int nowait)
2280 {
2281 	int err;
2282 	struct mr6_table *mrt;
2283 	struct mfc6_cache *cache;
2284 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
2285 
2286 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
2287 	if (!mrt)
2288 		return -ENOENT;
2289 
2290 	read_lock(&mrt_lock);
2291 	cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
2292 	if (!cache && skb->dev) {
2293 		int vif = ip6mr_find_vif(mrt, skb->dev);
2294 
2295 		if (vif >= 0)
2296 			cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr,
2297 						     vif);
2298 	}
2299 
2300 	if (!cache) {
2301 		struct sk_buff *skb2;
2302 		struct ipv6hdr *iph;
2303 		struct net_device *dev;
2304 		int vif;
2305 
2306 		if (nowait) {
2307 			read_unlock(&mrt_lock);
2308 			return -EAGAIN;
2309 		}
2310 
2311 		dev = skb->dev;
2312 		if (!dev || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2313 			read_unlock(&mrt_lock);
2314 			return -ENODEV;
2315 		}
2316 
2317 		/* really correct? */
2318 		skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
2319 		if (!skb2) {
2320 			read_unlock(&mrt_lock);
2321 			return -ENOMEM;
2322 		}
2323 
2324 		skb_reset_transport_header(skb2);
2325 
2326 		skb_put(skb2, sizeof(struct ipv6hdr));
2327 		skb_reset_network_header(skb2);
2328 
2329 		iph = ipv6_hdr(skb2);
2330 		iph->version = 0;
2331 		iph->priority = 0;
2332 		iph->flow_lbl[0] = 0;
2333 		iph->flow_lbl[1] = 0;
2334 		iph->flow_lbl[2] = 0;
2335 		iph->payload_len = 0;
2336 		iph->nexthdr = IPPROTO_NONE;
2337 		iph->hop_limit = 0;
2338 		iph->saddr = rt->rt6i_src.addr;
2339 		iph->daddr = rt->rt6i_dst.addr;
2340 
2341 		err = ip6mr_cache_unresolved(mrt, vif, skb2);
2342 		read_unlock(&mrt_lock);
2343 
2344 		return err;
2345 	}
2346 
2347 	if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
2348 		cache->mfc_flags |= MFC_NOTIFY;
2349 
2350 	err = __ip6mr_fill_mroute(mrt, skb, cache, rtm);
2351 	read_unlock(&mrt_lock);
2352 	return err;
2353 }
2354 
2355 static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2356 			     u32 portid, u32 seq, struct mfc6_cache *c, int cmd,
2357 			     int flags)
2358 {
2359 	struct nlmsghdr *nlh;
2360 	struct rtmsg *rtm;
2361 	int err;
2362 
2363 	nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags);
2364 	if (!nlh)
2365 		return -EMSGSIZE;
2366 
2367 	rtm = nlmsg_data(nlh);
2368 	rtm->rtm_family   = RTNL_FAMILY_IP6MR;
2369 	rtm->rtm_dst_len  = 128;
2370 	rtm->rtm_src_len  = 128;
2371 	rtm->rtm_tos      = 0;
2372 	rtm->rtm_table    = mrt->id;
2373 	if (nla_put_u32(skb, RTA_TABLE, mrt->id))
2374 		goto nla_put_failure;
2375 	rtm->rtm_type = RTN_MULTICAST;
2376 	rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
2377 	if (c->mfc_flags & MFC_STATIC)
2378 		rtm->rtm_protocol = RTPROT_STATIC;
2379 	else
2380 		rtm->rtm_protocol = RTPROT_MROUTED;
2381 	rtm->rtm_flags    = 0;
2382 
2383 	if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) ||
2384 	    nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp))
2385 		goto nla_put_failure;
2386 	err = __ip6mr_fill_mroute(mrt, skb, c, rtm);
2387 	/* do not break the dump if cache is unresolved */
2388 	if (err < 0 && err != -ENOENT)
2389 		goto nla_put_failure;
2390 
2391 	nlmsg_end(skb, nlh);
2392 	return 0;
2393 
2394 nla_put_failure:
2395 	nlmsg_cancel(skb, nlh);
2396 	return -EMSGSIZE;
2397 }
2398 
2399 static int mr6_msgsize(bool unresolved, int maxvif)
2400 {
2401 	size_t len =
2402 		NLMSG_ALIGN(sizeof(struct rtmsg))
2403 		+ nla_total_size(4)	/* RTA_TABLE */
2404 		+ nla_total_size(sizeof(struct in6_addr))	/* RTA_SRC */
2405 		+ nla_total_size(sizeof(struct in6_addr))	/* RTA_DST */
2406 		;
2407 
2408 	if (!unresolved)
2409 		len = len
2410 		      + nla_total_size(4)	/* RTA_IIF */
2411 		      + nla_total_size(0)	/* RTA_MULTIPATH */
2412 		      + maxvif * NLA_ALIGN(sizeof(struct rtnexthop))
2413 						/* RTA_MFC_STATS */
2414 		      + nla_total_size_64bit(sizeof(struct rta_mfc_stats))
2415 		;
2416 
2417 	return len;
2418 }
2419 
2420 static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
2421 			      int cmd)
2422 {
2423 	struct net *net = read_pnet(&mrt->net);
2424 	struct sk_buff *skb;
2425 	int err = -ENOBUFS;
2426 
2427 	skb = nlmsg_new(mr6_msgsize(mfc->mf6c_parent >= MAXMIFS, mrt->maxvif),
2428 			GFP_ATOMIC);
2429 	if (!skb)
2430 		goto errout;
2431 
2432 	err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0);
2433 	if (err < 0)
2434 		goto errout;
2435 
2436 	rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC);
2437 	return;
2438 
2439 errout:
2440 	kfree_skb(skb);
2441 	if (err < 0)
2442 		rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
2443 }
2444 
2445 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2446 {
2447 	struct net *net = sock_net(skb->sk);
2448 	struct mr6_table *mrt;
2449 	struct mfc6_cache *mfc;
2450 	unsigned int t = 0, s_t;
2451 	unsigned int h = 0, s_h;
2452 	unsigned int e = 0, s_e;
2453 
2454 	s_t = cb->args[0];
2455 	s_h = cb->args[1];
2456 	s_e = cb->args[2];
2457 
2458 	read_lock(&mrt_lock);
2459 	ip6mr_for_each_table(mrt, net) {
2460 		if (t < s_t)
2461 			goto next_table;
2462 		if (t > s_t)
2463 			s_h = 0;
2464 		for (h = s_h; h < MFC6_LINES; h++) {
2465 			list_for_each_entry(mfc, &mrt->mfc6_cache_array[h], list) {
2466 				if (e < s_e)
2467 					goto next_entry;
2468 				if (ip6mr_fill_mroute(mrt, skb,
2469 						      NETLINK_CB(cb->skb).portid,
2470 						      cb->nlh->nlmsg_seq,
2471 						      mfc, RTM_NEWROUTE,
2472 						      NLM_F_MULTI) < 0)
2473 					goto done;
2474 next_entry:
2475 				e++;
2476 			}
2477 			e = s_e = 0;
2478 		}
2479 		spin_lock_bh(&mfc_unres_lock);
2480 		list_for_each_entry(mfc, &mrt->mfc6_unres_queue, list) {
2481 			if (e < s_e)
2482 				goto next_entry2;
2483 			if (ip6mr_fill_mroute(mrt, skb,
2484 					      NETLINK_CB(cb->skb).portid,
2485 					      cb->nlh->nlmsg_seq,
2486 					      mfc, RTM_NEWROUTE,
2487 					      NLM_F_MULTI) < 0) {
2488 				spin_unlock_bh(&mfc_unres_lock);
2489 				goto done;
2490 			}
2491 next_entry2:
2492 			e++;
2493 		}
2494 		spin_unlock_bh(&mfc_unres_lock);
2495 		e = s_e = 0;
2496 		s_h = 0;
2497 next_table:
2498 		t++;
2499 	}
2500 done:
2501 	read_unlock(&mrt_lock);
2502 
2503 	cb->args[2] = e;
2504 	cb->args[1] = h;
2505 	cb->args[0] = t;
2506 
2507 	return skb->len;
2508 }
2509