xref: /openbmc/linux/net/ipv6/ip6mr.c (revision 63159f29be1df7f93563a8a0f78c5e65fc844ed6)
1 /*
2  *	Linux IPv6 multicast routing support for BSD pim6sd
3  *	Based on net/ipv4/ipmr.c.
4  *
5  *	(c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
6  *		LSIIT Laboratory, Strasbourg, France
7  *	(c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
8  *		6WIND, Paris, France
9  *	Copyright (C)2007,2008 USAGI/WIDE Project
10  *		YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
11  *
12  *	This program is free software; you can redistribute it and/or
13  *	modify it under the terms of the GNU General Public License
14  *	as published by the Free Software Foundation; either version
15  *	2 of the License, or (at your option) any later version.
16  *
17  */
18 
19 #include <asm/uaccess.h>
20 #include <linux/types.h>
21 #include <linux/sched.h>
22 #include <linux/errno.h>
23 #include <linux/timer.h>
24 #include <linux/mm.h>
25 #include <linux/kernel.h>
26 #include <linux/fcntl.h>
27 #include <linux/stat.h>
28 #include <linux/socket.h>
29 #include <linux/inet.h>
30 #include <linux/netdevice.h>
31 #include <linux/inetdevice.h>
32 #include <linux/proc_fs.h>
33 #include <linux/seq_file.h>
34 #include <linux/init.h>
35 #include <linux/slab.h>
36 #include <linux/compat.h>
37 #include <net/protocol.h>
38 #include <linux/skbuff.h>
39 #include <net/sock.h>
40 #include <net/raw.h>
41 #include <linux/notifier.h>
42 #include <linux/if_arp.h>
43 #include <net/checksum.h>
44 #include <net/netlink.h>
45 #include <net/fib_rules.h>
46 
47 #include <net/ipv6.h>
48 #include <net/ip6_route.h>
49 #include <linux/mroute6.h>
50 #include <linux/pim.h>
51 #include <net/addrconf.h>
52 #include <linux/netfilter_ipv6.h>
53 #include <linux/export.h>
54 #include <net/ip6_checksum.h>
55 #include <linux/netconf.h>
56 
57 struct mr6_table {
58 	struct list_head	list;
59 	possible_net_t		net;
60 	u32			id;
61 	struct sock		*mroute6_sk;
62 	struct timer_list	ipmr_expire_timer;
63 	struct list_head	mfc6_unres_queue;
64 	struct list_head	mfc6_cache_array[MFC6_LINES];
65 	struct mif_device	vif6_table[MAXMIFS];
66 	int			maxvif;
67 	atomic_t		cache_resolve_queue_len;
68 	bool			mroute_do_assert;
69 	bool			mroute_do_pim;
70 #ifdef CONFIG_IPV6_PIMSM_V2
71 	int			mroute_reg_vif_num;
72 #endif
73 };
74 
75 struct ip6mr_rule {
76 	struct fib_rule		common;
77 };
78 
79 struct ip6mr_result {
80 	struct mr6_table	*mrt;
81 };
82 
83 /* Big lock, protecting vif table, mrt cache and mroute socket state.
84    Note that the changes are semaphored via rtnl_lock.
85  */
86 
87 static DEFINE_RWLOCK(mrt_lock);
88 
89 /*
90  *	Multicast router control variables
91  */
92 
93 #define MIF_EXISTS(_mrt, _idx) ((_mrt)->vif6_table[_idx].dev != NULL)
94 
95 /* Special spinlock for queue of unresolved entries */
96 static DEFINE_SPINLOCK(mfc_unres_lock);
97 
98 /* We return to original Alan's scheme. Hash table of resolved
99    entries is changed only in process context and protected
100    with weak lock mrt_lock. Queue of unresolved entries is protected
101    with strong spinlock mfc_unres_lock.
102 
103    In this case data path is free of exclusive locks at all.
104  */
105 
106 static struct kmem_cache *mrt_cachep __read_mostly;
107 
108 static struct mr6_table *ip6mr_new_table(struct net *net, u32 id);
109 static void ip6mr_free_table(struct mr6_table *mrt);
110 
111 static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
112 			   struct sk_buff *skb, struct mfc6_cache *cache);
113 static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
114 			      mifi_t mifi, int assert);
115 static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
116 			       struct mfc6_cache *c, struct rtmsg *rtm);
117 static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
118 			      int cmd);
119 static int ip6mr_rtm_dumproute(struct sk_buff *skb,
120 			       struct netlink_callback *cb);
121 static void mroute_clean_tables(struct mr6_table *mrt);
122 static void ipmr_expire_process(unsigned long arg);
123 
124 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
125 #define ip6mr_for_each_table(mrt, net) \
126 	list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
127 
128 static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
129 {
130 	struct mr6_table *mrt;
131 
132 	ip6mr_for_each_table(mrt, net) {
133 		if (mrt->id == id)
134 			return mrt;
135 	}
136 	return NULL;
137 }
138 
139 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
140 			    struct mr6_table **mrt)
141 {
142 	int err;
143 	struct ip6mr_result res;
144 	struct fib_lookup_arg arg = {
145 		.result = &res,
146 		.flags = FIB_LOOKUP_NOREF,
147 	};
148 
149 	err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
150 			       flowi6_to_flowi(flp6), 0, &arg);
151 	if (err < 0)
152 		return err;
153 	*mrt = res.mrt;
154 	return 0;
155 }
156 
157 static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
158 			     int flags, struct fib_lookup_arg *arg)
159 {
160 	struct ip6mr_result *res = arg->result;
161 	struct mr6_table *mrt;
162 
163 	switch (rule->action) {
164 	case FR_ACT_TO_TBL:
165 		break;
166 	case FR_ACT_UNREACHABLE:
167 		return -ENETUNREACH;
168 	case FR_ACT_PROHIBIT:
169 		return -EACCES;
170 	case FR_ACT_BLACKHOLE:
171 	default:
172 		return -EINVAL;
173 	}
174 
175 	mrt = ip6mr_get_table(rule->fr_net, rule->table);
176 	if (!mrt)
177 		return -EAGAIN;
178 	res->mrt = mrt;
179 	return 0;
180 }
181 
182 static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
183 {
184 	return 1;
185 }
186 
187 static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = {
188 	FRA_GENERIC_POLICY,
189 };
190 
191 static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
192 				struct fib_rule_hdr *frh, struct nlattr **tb)
193 {
194 	return 0;
195 }
196 
197 static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
198 			      struct nlattr **tb)
199 {
200 	return 1;
201 }
202 
203 static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
204 			   struct fib_rule_hdr *frh)
205 {
206 	frh->dst_len = 0;
207 	frh->src_len = 0;
208 	frh->tos     = 0;
209 	return 0;
210 }
211 
212 static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
213 	.family		= RTNL_FAMILY_IP6MR,
214 	.rule_size	= sizeof(struct ip6mr_rule),
215 	.addr_size	= sizeof(struct in6_addr),
216 	.action		= ip6mr_rule_action,
217 	.match		= ip6mr_rule_match,
218 	.configure	= ip6mr_rule_configure,
219 	.compare	= ip6mr_rule_compare,
220 	.default_pref	= fib_default_rule_pref,
221 	.fill		= ip6mr_rule_fill,
222 	.nlgroup	= RTNLGRP_IPV6_RULE,
223 	.policy		= ip6mr_rule_policy,
224 	.owner		= THIS_MODULE,
225 };
226 
227 static int __net_init ip6mr_rules_init(struct net *net)
228 {
229 	struct fib_rules_ops *ops;
230 	struct mr6_table *mrt;
231 	int err;
232 
233 	ops = fib_rules_register(&ip6mr_rules_ops_template, net);
234 	if (IS_ERR(ops))
235 		return PTR_ERR(ops);
236 
237 	INIT_LIST_HEAD(&net->ipv6.mr6_tables);
238 
239 	mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
240 	if (!mrt) {
241 		err = -ENOMEM;
242 		goto err1;
243 	}
244 
245 	err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
246 	if (err < 0)
247 		goto err2;
248 
249 	net->ipv6.mr6_rules_ops = ops;
250 	return 0;
251 
252 err2:
253 	kfree(mrt);
254 err1:
255 	fib_rules_unregister(ops);
256 	return err;
257 }
258 
259 static void __net_exit ip6mr_rules_exit(struct net *net)
260 {
261 	struct mr6_table *mrt, *next;
262 
263 	rtnl_lock();
264 	list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
265 		list_del(&mrt->list);
266 		ip6mr_free_table(mrt);
267 	}
268 	rtnl_unlock();
269 	fib_rules_unregister(net->ipv6.mr6_rules_ops);
270 }
271 #else
272 #define ip6mr_for_each_table(mrt, net) \
273 	for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
274 
275 static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
276 {
277 	return net->ipv6.mrt6;
278 }
279 
280 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
281 			    struct mr6_table **mrt)
282 {
283 	*mrt = net->ipv6.mrt6;
284 	return 0;
285 }
286 
287 static int __net_init ip6mr_rules_init(struct net *net)
288 {
289 	net->ipv6.mrt6 = ip6mr_new_table(net, RT6_TABLE_DFLT);
290 	return net->ipv6.mrt6 ? 0 : -ENOMEM;
291 }
292 
293 static void __net_exit ip6mr_rules_exit(struct net *net)
294 {
295 	rtnl_lock();
296 	ip6mr_free_table(net->ipv6.mrt6);
297 	net->ipv6.mrt6 = NULL;
298 	rtnl_unlock();
299 }
300 #endif
301 
302 static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
303 {
304 	struct mr6_table *mrt;
305 	unsigned int i;
306 
307 	mrt = ip6mr_get_table(net, id);
308 	if (mrt != NULL)
309 		return mrt;
310 
311 	mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
312 	if (!mrt)
313 		return NULL;
314 	mrt->id = id;
315 	write_pnet(&mrt->net, net);
316 
317 	/* Forwarding cache */
318 	for (i = 0; i < MFC6_LINES; i++)
319 		INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]);
320 
321 	INIT_LIST_HEAD(&mrt->mfc6_unres_queue);
322 
323 	setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
324 		    (unsigned long)mrt);
325 
326 #ifdef CONFIG_IPV6_PIMSM_V2
327 	mrt->mroute_reg_vif_num = -1;
328 #endif
329 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
330 	list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
331 #endif
332 	return mrt;
333 }
334 
335 static void ip6mr_free_table(struct mr6_table *mrt)
336 {
337 	del_timer(&mrt->ipmr_expire_timer);
338 	mroute_clean_tables(mrt);
339 	kfree(mrt);
340 }
341 
342 #ifdef CONFIG_PROC_FS
343 
344 struct ipmr_mfc_iter {
345 	struct seq_net_private p;
346 	struct mr6_table *mrt;
347 	struct list_head *cache;
348 	int ct;
349 };
350 
351 
352 static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
353 					   struct ipmr_mfc_iter *it, loff_t pos)
354 {
355 	struct mr6_table *mrt = it->mrt;
356 	struct mfc6_cache *mfc;
357 
358 	read_lock(&mrt_lock);
359 	for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) {
360 		it->cache = &mrt->mfc6_cache_array[it->ct];
361 		list_for_each_entry(mfc, it->cache, list)
362 			if (pos-- == 0)
363 				return mfc;
364 	}
365 	read_unlock(&mrt_lock);
366 
367 	spin_lock_bh(&mfc_unres_lock);
368 	it->cache = &mrt->mfc6_unres_queue;
369 	list_for_each_entry(mfc, it->cache, list)
370 		if (pos-- == 0)
371 			return mfc;
372 	spin_unlock_bh(&mfc_unres_lock);
373 
374 	it->cache = NULL;
375 	return NULL;
376 }
377 
378 /*
379  *	The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
380  */
381 
382 struct ipmr_vif_iter {
383 	struct seq_net_private p;
384 	struct mr6_table *mrt;
385 	int ct;
386 };
387 
388 static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
389 					    struct ipmr_vif_iter *iter,
390 					    loff_t pos)
391 {
392 	struct mr6_table *mrt = iter->mrt;
393 
394 	for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
395 		if (!MIF_EXISTS(mrt, iter->ct))
396 			continue;
397 		if (pos-- == 0)
398 			return &mrt->vif6_table[iter->ct];
399 	}
400 	return NULL;
401 }
402 
403 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
404 	__acquires(mrt_lock)
405 {
406 	struct ipmr_vif_iter *iter = seq->private;
407 	struct net *net = seq_file_net(seq);
408 	struct mr6_table *mrt;
409 
410 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
411 	if (!mrt)
412 		return ERR_PTR(-ENOENT);
413 
414 	iter->mrt = mrt;
415 
416 	read_lock(&mrt_lock);
417 	return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1)
418 		: SEQ_START_TOKEN;
419 }
420 
421 static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
422 {
423 	struct ipmr_vif_iter *iter = seq->private;
424 	struct net *net = seq_file_net(seq);
425 	struct mr6_table *mrt = iter->mrt;
426 
427 	++*pos;
428 	if (v == SEQ_START_TOKEN)
429 		return ip6mr_vif_seq_idx(net, iter, 0);
430 
431 	while (++iter->ct < mrt->maxvif) {
432 		if (!MIF_EXISTS(mrt, iter->ct))
433 			continue;
434 		return &mrt->vif6_table[iter->ct];
435 	}
436 	return NULL;
437 }
438 
439 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
440 	__releases(mrt_lock)
441 {
442 	read_unlock(&mrt_lock);
443 }
444 
445 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
446 {
447 	struct ipmr_vif_iter *iter = seq->private;
448 	struct mr6_table *mrt = iter->mrt;
449 
450 	if (v == SEQ_START_TOKEN) {
451 		seq_puts(seq,
452 			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
453 	} else {
454 		const struct mif_device *vif = v;
455 		const char *name = vif->dev ? vif->dev->name : "none";
456 
457 		seq_printf(seq,
458 			   "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
459 			   vif - mrt->vif6_table,
460 			   name, vif->bytes_in, vif->pkt_in,
461 			   vif->bytes_out, vif->pkt_out,
462 			   vif->flags);
463 	}
464 	return 0;
465 }
466 
467 static const struct seq_operations ip6mr_vif_seq_ops = {
468 	.start = ip6mr_vif_seq_start,
469 	.next  = ip6mr_vif_seq_next,
470 	.stop  = ip6mr_vif_seq_stop,
471 	.show  = ip6mr_vif_seq_show,
472 };
473 
474 static int ip6mr_vif_open(struct inode *inode, struct file *file)
475 {
476 	return seq_open_net(inode, file, &ip6mr_vif_seq_ops,
477 			    sizeof(struct ipmr_vif_iter));
478 }
479 
480 static const struct file_operations ip6mr_vif_fops = {
481 	.owner	 = THIS_MODULE,
482 	.open    = ip6mr_vif_open,
483 	.read    = seq_read,
484 	.llseek  = seq_lseek,
485 	.release = seq_release_net,
486 };
487 
488 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
489 {
490 	struct ipmr_mfc_iter *it = seq->private;
491 	struct net *net = seq_file_net(seq);
492 	struct mr6_table *mrt;
493 
494 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
495 	if (!mrt)
496 		return ERR_PTR(-ENOENT);
497 
498 	it->mrt = mrt;
499 	return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
500 		: SEQ_START_TOKEN;
501 }
502 
503 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
504 {
505 	struct mfc6_cache *mfc = v;
506 	struct ipmr_mfc_iter *it = seq->private;
507 	struct net *net = seq_file_net(seq);
508 	struct mr6_table *mrt = it->mrt;
509 
510 	++*pos;
511 
512 	if (v == SEQ_START_TOKEN)
513 		return ipmr_mfc_seq_idx(net, seq->private, 0);
514 
515 	if (mfc->list.next != it->cache)
516 		return list_entry(mfc->list.next, struct mfc6_cache, list);
517 
518 	if (it->cache == &mrt->mfc6_unres_queue)
519 		goto end_of_list;
520 
521 	BUG_ON(it->cache != &mrt->mfc6_cache_array[it->ct]);
522 
523 	while (++it->ct < MFC6_LINES) {
524 		it->cache = &mrt->mfc6_cache_array[it->ct];
525 		if (list_empty(it->cache))
526 			continue;
527 		return list_first_entry(it->cache, struct mfc6_cache, list);
528 	}
529 
530 	/* exhausted cache_array, show unresolved */
531 	read_unlock(&mrt_lock);
532 	it->cache = &mrt->mfc6_unres_queue;
533 	it->ct = 0;
534 
535 	spin_lock_bh(&mfc_unres_lock);
536 	if (!list_empty(it->cache))
537 		return list_first_entry(it->cache, struct mfc6_cache, list);
538 
539  end_of_list:
540 	spin_unlock_bh(&mfc_unres_lock);
541 	it->cache = NULL;
542 
543 	return NULL;
544 }
545 
546 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
547 {
548 	struct ipmr_mfc_iter *it = seq->private;
549 	struct mr6_table *mrt = it->mrt;
550 
551 	if (it->cache == &mrt->mfc6_unres_queue)
552 		spin_unlock_bh(&mfc_unres_lock);
553 	else if (it->cache == mrt->mfc6_cache_array)
554 		read_unlock(&mrt_lock);
555 }
556 
557 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
558 {
559 	int n;
560 
561 	if (v == SEQ_START_TOKEN) {
562 		seq_puts(seq,
563 			 "Group                            "
564 			 "Origin                           "
565 			 "Iif      Pkts  Bytes     Wrong  Oifs\n");
566 	} else {
567 		const struct mfc6_cache *mfc = v;
568 		const struct ipmr_mfc_iter *it = seq->private;
569 		struct mr6_table *mrt = it->mrt;
570 
571 		seq_printf(seq, "%pI6 %pI6 %-3hd",
572 			   &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
573 			   mfc->mf6c_parent);
574 
575 		if (it->cache != &mrt->mfc6_unres_queue) {
576 			seq_printf(seq, " %8lu %8lu %8lu",
577 				   mfc->mfc_un.res.pkt,
578 				   mfc->mfc_un.res.bytes,
579 				   mfc->mfc_un.res.wrong_if);
580 			for (n = mfc->mfc_un.res.minvif;
581 			     n < mfc->mfc_un.res.maxvif; n++) {
582 				if (MIF_EXISTS(mrt, n) &&
583 				    mfc->mfc_un.res.ttls[n] < 255)
584 					seq_printf(seq,
585 						   " %2d:%-3d",
586 						   n, mfc->mfc_un.res.ttls[n]);
587 			}
588 		} else {
589 			/* unresolved mfc_caches don't contain
590 			 * pkt, bytes and wrong_if values
591 			 */
592 			seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
593 		}
594 		seq_putc(seq, '\n');
595 	}
596 	return 0;
597 }
598 
599 static const struct seq_operations ipmr_mfc_seq_ops = {
600 	.start = ipmr_mfc_seq_start,
601 	.next  = ipmr_mfc_seq_next,
602 	.stop  = ipmr_mfc_seq_stop,
603 	.show  = ipmr_mfc_seq_show,
604 };
605 
606 static int ipmr_mfc_open(struct inode *inode, struct file *file)
607 {
608 	return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
609 			    sizeof(struct ipmr_mfc_iter));
610 }
611 
612 static const struct file_operations ip6mr_mfc_fops = {
613 	.owner	 = THIS_MODULE,
614 	.open    = ipmr_mfc_open,
615 	.read    = seq_read,
616 	.llseek  = seq_lseek,
617 	.release = seq_release_net,
618 };
619 #endif
620 
621 #ifdef CONFIG_IPV6_PIMSM_V2
622 
623 static int pim6_rcv(struct sk_buff *skb)
624 {
625 	struct pimreghdr *pim;
626 	struct ipv6hdr   *encap;
627 	struct net_device  *reg_dev = NULL;
628 	struct net *net = dev_net(skb->dev);
629 	struct mr6_table *mrt;
630 	struct flowi6 fl6 = {
631 		.flowi6_iif	= skb->dev->ifindex,
632 		.flowi6_mark	= skb->mark,
633 	};
634 	int reg_vif_num;
635 
636 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
637 		goto drop;
638 
639 	pim = (struct pimreghdr *)skb_transport_header(skb);
640 	if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
641 	    (pim->flags & PIM_NULL_REGISTER) ||
642 	    (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
643 			     sizeof(*pim), IPPROTO_PIM,
644 			     csum_partial((void *)pim, sizeof(*pim), 0)) &&
645 	     csum_fold(skb_checksum(skb, 0, skb->len, 0))))
646 		goto drop;
647 
648 	/* check if the inner packet is destined to mcast group */
649 	encap = (struct ipv6hdr *)(skb_transport_header(skb) +
650 				   sizeof(*pim));
651 
652 	if (!ipv6_addr_is_multicast(&encap->daddr) ||
653 	    encap->payload_len == 0 ||
654 	    ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
655 		goto drop;
656 
657 	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
658 		goto drop;
659 	reg_vif_num = mrt->mroute_reg_vif_num;
660 
661 	read_lock(&mrt_lock);
662 	if (reg_vif_num >= 0)
663 		reg_dev = mrt->vif6_table[reg_vif_num].dev;
664 	if (reg_dev)
665 		dev_hold(reg_dev);
666 	read_unlock(&mrt_lock);
667 
668 	if (!reg_dev)
669 		goto drop;
670 
671 	skb->mac_header = skb->network_header;
672 	skb_pull(skb, (u8 *)encap - skb->data);
673 	skb_reset_network_header(skb);
674 	skb->protocol = htons(ETH_P_IPV6);
675 	skb->ip_summed = CHECKSUM_NONE;
676 
677 	skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
678 
679 	netif_rx(skb);
680 
681 	dev_put(reg_dev);
682 	return 0;
683  drop:
684 	kfree_skb(skb);
685 	return 0;
686 }
687 
688 static const struct inet6_protocol pim6_protocol = {
689 	.handler	=	pim6_rcv,
690 };
691 
692 /* Service routines creating virtual interfaces: PIMREG */
693 
694 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
695 				      struct net_device *dev)
696 {
697 	struct net *net = dev_net(dev);
698 	struct mr6_table *mrt;
699 	struct flowi6 fl6 = {
700 		.flowi6_oif	= dev->ifindex,
701 		.flowi6_iif	= skb->skb_iif ? : LOOPBACK_IFINDEX,
702 		.flowi6_mark	= skb->mark,
703 	};
704 	int err;
705 
706 	err = ip6mr_fib_lookup(net, &fl6, &mrt);
707 	if (err < 0) {
708 		kfree_skb(skb);
709 		return err;
710 	}
711 
712 	read_lock(&mrt_lock);
713 	dev->stats.tx_bytes += skb->len;
714 	dev->stats.tx_packets++;
715 	ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
716 	read_unlock(&mrt_lock);
717 	kfree_skb(skb);
718 	return NETDEV_TX_OK;
719 }
720 
721 static const struct net_device_ops reg_vif_netdev_ops = {
722 	.ndo_start_xmit	= reg_vif_xmit,
723 };
724 
725 static void reg_vif_setup(struct net_device *dev)
726 {
727 	dev->type		= ARPHRD_PIMREG;
728 	dev->mtu		= 1500 - sizeof(struct ipv6hdr) - 8;
729 	dev->flags		= IFF_NOARP;
730 	dev->netdev_ops		= &reg_vif_netdev_ops;
731 	dev->destructor		= free_netdev;
732 	dev->features		|= NETIF_F_NETNS_LOCAL;
733 }
734 
735 static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt)
736 {
737 	struct net_device *dev;
738 	char name[IFNAMSIZ];
739 
740 	if (mrt->id == RT6_TABLE_DFLT)
741 		sprintf(name, "pim6reg");
742 	else
743 		sprintf(name, "pim6reg%u", mrt->id);
744 
745 	dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup);
746 	if (!dev)
747 		return NULL;
748 
749 	dev_net_set(dev, net);
750 
751 	if (register_netdevice(dev)) {
752 		free_netdev(dev);
753 		return NULL;
754 	}
755 	dev->iflink = 0;
756 
757 	if (dev_open(dev))
758 		goto failure;
759 
760 	dev_hold(dev);
761 	return dev;
762 
763 failure:
764 	/* allow the register to be completed before unregistering. */
765 	rtnl_unlock();
766 	rtnl_lock();
767 
768 	unregister_netdevice(dev);
769 	return NULL;
770 }
771 #endif
772 
773 /*
774  *	Delete a VIF entry
775  */
776 
777 static int mif6_delete(struct mr6_table *mrt, int vifi, struct list_head *head)
778 {
779 	struct mif_device *v;
780 	struct net_device *dev;
781 	struct inet6_dev *in6_dev;
782 
783 	if (vifi < 0 || vifi >= mrt->maxvif)
784 		return -EADDRNOTAVAIL;
785 
786 	v = &mrt->vif6_table[vifi];
787 
788 	write_lock_bh(&mrt_lock);
789 	dev = v->dev;
790 	v->dev = NULL;
791 
792 	if (!dev) {
793 		write_unlock_bh(&mrt_lock);
794 		return -EADDRNOTAVAIL;
795 	}
796 
797 #ifdef CONFIG_IPV6_PIMSM_V2
798 	if (vifi == mrt->mroute_reg_vif_num)
799 		mrt->mroute_reg_vif_num = -1;
800 #endif
801 
802 	if (vifi + 1 == mrt->maxvif) {
803 		int tmp;
804 		for (tmp = vifi - 1; tmp >= 0; tmp--) {
805 			if (MIF_EXISTS(mrt, tmp))
806 				break;
807 		}
808 		mrt->maxvif = tmp + 1;
809 	}
810 
811 	write_unlock_bh(&mrt_lock);
812 
813 	dev_set_allmulti(dev, -1);
814 
815 	in6_dev = __in6_dev_get(dev);
816 	if (in6_dev) {
817 		in6_dev->cnf.mc_forwarding--;
818 		inet6_netconf_notify_devconf(dev_net(dev),
819 					     NETCONFA_MC_FORWARDING,
820 					     dev->ifindex, &in6_dev->cnf);
821 	}
822 
823 	if (v->flags & MIFF_REGISTER)
824 		unregister_netdevice_queue(dev, head);
825 
826 	dev_put(dev);
827 	return 0;
828 }
829 
830 static inline void ip6mr_cache_free(struct mfc6_cache *c)
831 {
832 	kmem_cache_free(mrt_cachep, c);
833 }
834 
835 /* Destroy an unresolved cache entry, killing queued skbs
836    and reporting error to netlink readers.
837  */
838 
839 static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
840 {
841 	struct net *net = read_pnet(&mrt->net);
842 	struct sk_buff *skb;
843 
844 	atomic_dec(&mrt->cache_resolve_queue_len);
845 
846 	while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
847 		if (ipv6_hdr(skb)->version == 0) {
848 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
849 			nlh->nlmsg_type = NLMSG_ERROR;
850 			nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
851 			skb_trim(skb, nlh->nlmsg_len);
852 			((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT;
853 			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
854 		} else
855 			kfree_skb(skb);
856 	}
857 
858 	ip6mr_cache_free(c);
859 }
860 
861 
862 /* Timer process for all the unresolved queue. */
863 
864 static void ipmr_do_expire_process(struct mr6_table *mrt)
865 {
866 	unsigned long now = jiffies;
867 	unsigned long expires = 10 * HZ;
868 	struct mfc6_cache *c, *next;
869 
870 	list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
871 		if (time_after(c->mfc_un.unres.expires, now)) {
872 			/* not yet... */
873 			unsigned long interval = c->mfc_un.unres.expires - now;
874 			if (interval < expires)
875 				expires = interval;
876 			continue;
877 		}
878 
879 		list_del(&c->list);
880 		mr6_netlink_event(mrt, c, RTM_DELROUTE);
881 		ip6mr_destroy_unres(mrt, c);
882 	}
883 
884 	if (!list_empty(&mrt->mfc6_unres_queue))
885 		mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
886 }
887 
888 static void ipmr_expire_process(unsigned long arg)
889 {
890 	struct mr6_table *mrt = (struct mr6_table *)arg;
891 
892 	if (!spin_trylock(&mfc_unres_lock)) {
893 		mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
894 		return;
895 	}
896 
897 	if (!list_empty(&mrt->mfc6_unres_queue))
898 		ipmr_do_expire_process(mrt);
899 
900 	spin_unlock(&mfc_unres_lock);
901 }
902 
903 /* Fill oifs list. It is called under write locked mrt_lock. */
904 
905 static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *cache,
906 				    unsigned char *ttls)
907 {
908 	int vifi;
909 
910 	cache->mfc_un.res.minvif = MAXMIFS;
911 	cache->mfc_un.res.maxvif = 0;
912 	memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
913 
914 	for (vifi = 0; vifi < mrt->maxvif; vifi++) {
915 		if (MIF_EXISTS(mrt, vifi) &&
916 		    ttls[vifi] && ttls[vifi] < 255) {
917 			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
918 			if (cache->mfc_un.res.minvif > vifi)
919 				cache->mfc_un.res.minvif = vifi;
920 			if (cache->mfc_un.res.maxvif <= vifi)
921 				cache->mfc_un.res.maxvif = vifi + 1;
922 		}
923 	}
924 }
925 
926 static int mif6_add(struct net *net, struct mr6_table *mrt,
927 		    struct mif6ctl *vifc, int mrtsock)
928 {
929 	int vifi = vifc->mif6c_mifi;
930 	struct mif_device *v = &mrt->vif6_table[vifi];
931 	struct net_device *dev;
932 	struct inet6_dev *in6_dev;
933 	int err;
934 
935 	/* Is vif busy ? */
936 	if (MIF_EXISTS(mrt, vifi))
937 		return -EADDRINUSE;
938 
939 	switch (vifc->mif6c_flags) {
940 #ifdef CONFIG_IPV6_PIMSM_V2
941 	case MIFF_REGISTER:
942 		/*
943 		 * Special Purpose VIF in PIM
944 		 * All the packets will be sent to the daemon
945 		 */
946 		if (mrt->mroute_reg_vif_num >= 0)
947 			return -EADDRINUSE;
948 		dev = ip6mr_reg_vif(net, mrt);
949 		if (!dev)
950 			return -ENOBUFS;
951 		err = dev_set_allmulti(dev, 1);
952 		if (err) {
953 			unregister_netdevice(dev);
954 			dev_put(dev);
955 			return err;
956 		}
957 		break;
958 #endif
959 	case 0:
960 		dev = dev_get_by_index(net, vifc->mif6c_pifi);
961 		if (!dev)
962 			return -EADDRNOTAVAIL;
963 		err = dev_set_allmulti(dev, 1);
964 		if (err) {
965 			dev_put(dev);
966 			return err;
967 		}
968 		break;
969 	default:
970 		return -EINVAL;
971 	}
972 
973 	in6_dev = __in6_dev_get(dev);
974 	if (in6_dev) {
975 		in6_dev->cnf.mc_forwarding++;
976 		inet6_netconf_notify_devconf(dev_net(dev),
977 					     NETCONFA_MC_FORWARDING,
978 					     dev->ifindex, &in6_dev->cnf);
979 	}
980 
981 	/*
982 	 *	Fill in the VIF structures
983 	 */
984 	v->rate_limit = vifc->vifc_rate_limit;
985 	v->flags = vifc->mif6c_flags;
986 	if (!mrtsock)
987 		v->flags |= VIFF_STATIC;
988 	v->threshold = vifc->vifc_threshold;
989 	v->bytes_in = 0;
990 	v->bytes_out = 0;
991 	v->pkt_in = 0;
992 	v->pkt_out = 0;
993 	v->link = dev->ifindex;
994 	if (v->flags & MIFF_REGISTER)
995 		v->link = dev->iflink;
996 
997 	/* And finish update writing critical data */
998 	write_lock_bh(&mrt_lock);
999 	v->dev = dev;
1000 #ifdef CONFIG_IPV6_PIMSM_V2
1001 	if (v->flags & MIFF_REGISTER)
1002 		mrt->mroute_reg_vif_num = vifi;
1003 #endif
1004 	if (vifi + 1 > mrt->maxvif)
1005 		mrt->maxvif = vifi + 1;
1006 	write_unlock_bh(&mrt_lock);
1007 	return 0;
1008 }
1009 
1010 static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt,
1011 					   const struct in6_addr *origin,
1012 					   const struct in6_addr *mcastgrp)
1013 {
1014 	int line = MFC6_HASH(mcastgrp, origin);
1015 	struct mfc6_cache *c;
1016 
1017 	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
1018 		if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
1019 		    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
1020 			return c;
1021 	}
1022 	return NULL;
1023 }
1024 
1025 /* Look for a (*,*,oif) entry */
1026 static struct mfc6_cache *ip6mr_cache_find_any_parent(struct mr6_table *mrt,
1027 						      mifi_t mifi)
1028 {
1029 	int line = MFC6_HASH(&in6addr_any, &in6addr_any);
1030 	struct mfc6_cache *c;
1031 
1032 	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
1033 		if (ipv6_addr_any(&c->mf6c_origin) &&
1034 		    ipv6_addr_any(&c->mf6c_mcastgrp) &&
1035 		    (c->mfc_un.res.ttls[mifi] < 255))
1036 			return c;
1037 
1038 	return NULL;
1039 }
1040 
1041 /* Look for a (*,G) entry */
1042 static struct mfc6_cache *ip6mr_cache_find_any(struct mr6_table *mrt,
1043 					       struct in6_addr *mcastgrp,
1044 					       mifi_t mifi)
1045 {
1046 	int line = MFC6_HASH(mcastgrp, &in6addr_any);
1047 	struct mfc6_cache *c, *proxy;
1048 
1049 	if (ipv6_addr_any(mcastgrp))
1050 		goto skip;
1051 
1052 	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
1053 		if (ipv6_addr_any(&c->mf6c_origin) &&
1054 		    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp)) {
1055 			if (c->mfc_un.res.ttls[mifi] < 255)
1056 				return c;
1057 
1058 			/* It's ok if the mifi is part of the static tree */
1059 			proxy = ip6mr_cache_find_any_parent(mrt,
1060 							    c->mf6c_parent);
1061 			if (proxy && proxy->mfc_un.res.ttls[mifi] < 255)
1062 				return c;
1063 		}
1064 
1065 skip:
1066 	return ip6mr_cache_find_any_parent(mrt, mifi);
1067 }
1068 
1069 /*
1070  *	Allocate a multicast cache entry
1071  */
1072 static struct mfc6_cache *ip6mr_cache_alloc(void)
1073 {
1074 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
1075 	if (!c)
1076 		return NULL;
1077 	c->mfc_un.res.minvif = MAXMIFS;
1078 	return c;
1079 }
1080 
1081 static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
1082 {
1083 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
1084 	if (!c)
1085 		return NULL;
1086 	skb_queue_head_init(&c->mfc_un.unres.unresolved);
1087 	c->mfc_un.unres.expires = jiffies + 10 * HZ;
1088 	return c;
1089 }
1090 
1091 /*
1092  *	A cache entry has gone into a resolved state from queued
1093  */
1094 
1095 static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
1096 				struct mfc6_cache *uc, struct mfc6_cache *c)
1097 {
1098 	struct sk_buff *skb;
1099 
1100 	/*
1101 	 *	Play the pending entries through our router
1102 	 */
1103 
1104 	while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
1105 		if (ipv6_hdr(skb)->version == 0) {
1106 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
1107 
1108 			if (__ip6mr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) {
1109 				nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1110 			} else {
1111 				nlh->nlmsg_type = NLMSG_ERROR;
1112 				nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
1113 				skb_trim(skb, nlh->nlmsg_len);
1114 				((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE;
1115 			}
1116 			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
1117 		} else
1118 			ip6_mr_forward(net, mrt, skb, c);
1119 	}
1120 }
1121 
1122 /*
1123  *	Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
1124  *	expects the following bizarre scheme.
1125  *
1126  *	Called under mrt_lock.
1127  */
1128 
1129 static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
1130 			      mifi_t mifi, int assert)
1131 {
1132 	struct sk_buff *skb;
1133 	struct mrt6msg *msg;
1134 	int ret;
1135 
1136 #ifdef CONFIG_IPV6_PIMSM_V2
1137 	if (assert == MRT6MSG_WHOLEPKT)
1138 		skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
1139 						+sizeof(*msg));
1140 	else
1141 #endif
1142 		skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
1143 
1144 	if (!skb)
1145 		return -ENOBUFS;
1146 
1147 	/* I suppose that internal messages
1148 	 * do not require checksums */
1149 
1150 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1151 
1152 #ifdef CONFIG_IPV6_PIMSM_V2
1153 	if (assert == MRT6MSG_WHOLEPKT) {
1154 		/* Ugly, but we have no choice with this interface.
1155 		   Duplicate old header, fix length etc.
1156 		   And all this only to mangle msg->im6_msgtype and
1157 		   to set msg->im6_mbz to "mbz" :-)
1158 		 */
1159 		skb_push(skb, -skb_network_offset(pkt));
1160 
1161 		skb_push(skb, sizeof(*msg));
1162 		skb_reset_transport_header(skb);
1163 		msg = (struct mrt6msg *)skb_transport_header(skb);
1164 		msg->im6_mbz = 0;
1165 		msg->im6_msgtype = MRT6MSG_WHOLEPKT;
1166 		msg->im6_mif = mrt->mroute_reg_vif_num;
1167 		msg->im6_pad = 0;
1168 		msg->im6_src = ipv6_hdr(pkt)->saddr;
1169 		msg->im6_dst = ipv6_hdr(pkt)->daddr;
1170 
1171 		skb->ip_summed = CHECKSUM_UNNECESSARY;
1172 	} else
1173 #endif
1174 	{
1175 	/*
1176 	 *	Copy the IP header
1177 	 */
1178 
1179 	skb_put(skb, sizeof(struct ipv6hdr));
1180 	skb_reset_network_header(skb);
1181 	skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
1182 
1183 	/*
1184 	 *	Add our header
1185 	 */
1186 	skb_put(skb, sizeof(*msg));
1187 	skb_reset_transport_header(skb);
1188 	msg = (struct mrt6msg *)skb_transport_header(skb);
1189 
1190 	msg->im6_mbz = 0;
1191 	msg->im6_msgtype = assert;
1192 	msg->im6_mif = mifi;
1193 	msg->im6_pad = 0;
1194 	msg->im6_src = ipv6_hdr(pkt)->saddr;
1195 	msg->im6_dst = ipv6_hdr(pkt)->daddr;
1196 
1197 	skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1198 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1199 	}
1200 
1201 	if (!mrt->mroute6_sk) {
1202 		kfree_skb(skb);
1203 		return -EINVAL;
1204 	}
1205 
1206 	/*
1207 	 *	Deliver to user space multicast routing algorithms
1208 	 */
1209 	ret = sock_queue_rcv_skb(mrt->mroute6_sk, skb);
1210 	if (ret < 0) {
1211 		net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
1212 		kfree_skb(skb);
1213 	}
1214 
1215 	return ret;
1216 }
1217 
1218 /*
1219  *	Queue a packet for resolution. It gets locked cache entry!
1220  */
1221 
1222 static int
1223 ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
1224 {
1225 	bool found = false;
1226 	int err;
1227 	struct mfc6_cache *c;
1228 
1229 	spin_lock_bh(&mfc_unres_lock);
1230 	list_for_each_entry(c, &mrt->mfc6_unres_queue, list) {
1231 		if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1232 		    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
1233 			found = true;
1234 			break;
1235 		}
1236 	}
1237 
1238 	if (!found) {
1239 		/*
1240 		 *	Create a new entry if allowable
1241 		 */
1242 
1243 		if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
1244 		    (c = ip6mr_cache_alloc_unres()) == NULL) {
1245 			spin_unlock_bh(&mfc_unres_lock);
1246 
1247 			kfree_skb(skb);
1248 			return -ENOBUFS;
1249 		}
1250 
1251 		/*
1252 		 *	Fill in the new cache entry
1253 		 */
1254 		c->mf6c_parent = -1;
1255 		c->mf6c_origin = ipv6_hdr(skb)->saddr;
1256 		c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
1257 
1258 		/*
1259 		 *	Reflect first query at pim6sd
1260 		 */
1261 		err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1262 		if (err < 0) {
1263 			/* If the report failed throw the cache entry
1264 			   out - Brad Parker
1265 			 */
1266 			spin_unlock_bh(&mfc_unres_lock);
1267 
1268 			ip6mr_cache_free(c);
1269 			kfree_skb(skb);
1270 			return err;
1271 		}
1272 
1273 		atomic_inc(&mrt->cache_resolve_queue_len);
1274 		list_add(&c->list, &mrt->mfc6_unres_queue);
1275 		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1276 
1277 		ipmr_do_expire_process(mrt);
1278 	}
1279 
1280 	/*
1281 	 *	See if we can append the packet
1282 	 */
1283 	if (c->mfc_un.unres.unresolved.qlen > 3) {
1284 		kfree_skb(skb);
1285 		err = -ENOBUFS;
1286 	} else {
1287 		skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1288 		err = 0;
1289 	}
1290 
1291 	spin_unlock_bh(&mfc_unres_lock);
1292 	return err;
1293 }
1294 
1295 /*
1296  *	MFC6 cache manipulation by user space
1297  */
1298 
1299 static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc,
1300 			    int parent)
1301 {
1302 	int line;
1303 	struct mfc6_cache *c, *next;
1304 
1305 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1306 
1307 	list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) {
1308 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1309 		    ipv6_addr_equal(&c->mf6c_mcastgrp,
1310 				    &mfc->mf6cc_mcastgrp.sin6_addr) &&
1311 		    (parent == -1 || parent == c->mf6c_parent)) {
1312 			write_lock_bh(&mrt_lock);
1313 			list_del(&c->list);
1314 			write_unlock_bh(&mrt_lock);
1315 
1316 			mr6_netlink_event(mrt, c, RTM_DELROUTE);
1317 			ip6mr_cache_free(c);
1318 			return 0;
1319 		}
1320 	}
1321 	return -ENOENT;
1322 }
1323 
1324 static int ip6mr_device_event(struct notifier_block *this,
1325 			      unsigned long event, void *ptr)
1326 {
1327 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1328 	struct net *net = dev_net(dev);
1329 	struct mr6_table *mrt;
1330 	struct mif_device *v;
1331 	int ct;
1332 	LIST_HEAD(list);
1333 
1334 	if (event != NETDEV_UNREGISTER)
1335 		return NOTIFY_DONE;
1336 
1337 	ip6mr_for_each_table(mrt, net) {
1338 		v = &mrt->vif6_table[0];
1339 		for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1340 			if (v->dev == dev)
1341 				mif6_delete(mrt, ct, &list);
1342 		}
1343 	}
1344 	unregister_netdevice_many(&list);
1345 
1346 	return NOTIFY_DONE;
1347 }
1348 
1349 static struct notifier_block ip6_mr_notifier = {
1350 	.notifier_call = ip6mr_device_event
1351 };
1352 
1353 /*
1354  *	Setup for IP multicast routing
1355  */
1356 
1357 static int __net_init ip6mr_net_init(struct net *net)
1358 {
1359 	int err;
1360 
1361 	err = ip6mr_rules_init(net);
1362 	if (err < 0)
1363 		goto fail;
1364 
1365 #ifdef CONFIG_PROC_FS
1366 	err = -ENOMEM;
1367 	if (!proc_create("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_fops))
1368 		goto proc_vif_fail;
1369 	if (!proc_create("ip6_mr_cache", 0, net->proc_net, &ip6mr_mfc_fops))
1370 		goto proc_cache_fail;
1371 #endif
1372 
1373 	return 0;
1374 
1375 #ifdef CONFIG_PROC_FS
1376 proc_cache_fail:
1377 	remove_proc_entry("ip6_mr_vif", net->proc_net);
1378 proc_vif_fail:
1379 	ip6mr_rules_exit(net);
1380 #endif
1381 fail:
1382 	return err;
1383 }
1384 
1385 static void __net_exit ip6mr_net_exit(struct net *net)
1386 {
1387 #ifdef CONFIG_PROC_FS
1388 	remove_proc_entry("ip6_mr_cache", net->proc_net);
1389 	remove_proc_entry("ip6_mr_vif", net->proc_net);
1390 #endif
1391 	ip6mr_rules_exit(net);
1392 }
1393 
1394 static struct pernet_operations ip6mr_net_ops = {
1395 	.init = ip6mr_net_init,
1396 	.exit = ip6mr_net_exit,
1397 };
1398 
1399 int __init ip6_mr_init(void)
1400 {
1401 	int err;
1402 
1403 	mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1404 				       sizeof(struct mfc6_cache),
1405 				       0, SLAB_HWCACHE_ALIGN,
1406 				       NULL);
1407 	if (!mrt_cachep)
1408 		return -ENOMEM;
1409 
1410 	err = register_pernet_subsys(&ip6mr_net_ops);
1411 	if (err)
1412 		goto reg_pernet_fail;
1413 
1414 	err = register_netdevice_notifier(&ip6_mr_notifier);
1415 	if (err)
1416 		goto reg_notif_fail;
1417 #ifdef CONFIG_IPV6_PIMSM_V2
1418 	if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1419 		pr_err("%s: can't add PIM protocol\n", __func__);
1420 		err = -EAGAIN;
1421 		goto add_proto_fail;
1422 	}
1423 #endif
1424 	rtnl_register(RTNL_FAMILY_IP6MR, RTM_GETROUTE, NULL,
1425 		      ip6mr_rtm_dumproute, NULL);
1426 	return 0;
1427 #ifdef CONFIG_IPV6_PIMSM_V2
1428 add_proto_fail:
1429 	unregister_netdevice_notifier(&ip6_mr_notifier);
1430 #endif
1431 reg_notif_fail:
1432 	unregister_pernet_subsys(&ip6mr_net_ops);
1433 reg_pernet_fail:
1434 	kmem_cache_destroy(mrt_cachep);
1435 	return err;
1436 }
1437 
1438 void ip6_mr_cleanup(void)
1439 {
1440 	rtnl_unregister(RTNL_FAMILY_IP6MR, RTM_GETROUTE);
1441 #ifdef CONFIG_IPV6_PIMSM_V2
1442 	inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1443 #endif
1444 	unregister_netdevice_notifier(&ip6_mr_notifier);
1445 	unregister_pernet_subsys(&ip6mr_net_ops);
1446 	kmem_cache_destroy(mrt_cachep);
1447 }
1448 
1449 static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
1450 			 struct mf6cctl *mfc, int mrtsock, int parent)
1451 {
1452 	bool found = false;
1453 	int line;
1454 	struct mfc6_cache *uc, *c;
1455 	unsigned char ttls[MAXMIFS];
1456 	int i;
1457 
1458 	if (mfc->mf6cc_parent >= MAXMIFS)
1459 		return -ENFILE;
1460 
1461 	memset(ttls, 255, MAXMIFS);
1462 	for (i = 0; i < MAXMIFS; i++) {
1463 		if (IF_ISSET(i, &mfc->mf6cc_ifset))
1464 			ttls[i] = 1;
1465 
1466 	}
1467 
1468 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1469 
1470 	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
1471 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1472 		    ipv6_addr_equal(&c->mf6c_mcastgrp,
1473 				    &mfc->mf6cc_mcastgrp.sin6_addr) &&
1474 		    (parent == -1 || parent == mfc->mf6cc_parent)) {
1475 			found = true;
1476 			break;
1477 		}
1478 	}
1479 
1480 	if (found) {
1481 		write_lock_bh(&mrt_lock);
1482 		c->mf6c_parent = mfc->mf6cc_parent;
1483 		ip6mr_update_thresholds(mrt, c, ttls);
1484 		if (!mrtsock)
1485 			c->mfc_flags |= MFC_STATIC;
1486 		write_unlock_bh(&mrt_lock);
1487 		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1488 		return 0;
1489 	}
1490 
1491 	if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) &&
1492 	    !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1493 		return -EINVAL;
1494 
1495 	c = ip6mr_cache_alloc();
1496 	if (!c)
1497 		return -ENOMEM;
1498 
1499 	c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1500 	c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1501 	c->mf6c_parent = mfc->mf6cc_parent;
1502 	ip6mr_update_thresholds(mrt, c, ttls);
1503 	if (!mrtsock)
1504 		c->mfc_flags |= MFC_STATIC;
1505 
1506 	write_lock_bh(&mrt_lock);
1507 	list_add(&c->list, &mrt->mfc6_cache_array[line]);
1508 	write_unlock_bh(&mrt_lock);
1509 
1510 	/*
1511 	 *	Check to see if we resolved a queued list. If so we
1512 	 *	need to send on the frames and tidy up.
1513 	 */
1514 	found = false;
1515 	spin_lock_bh(&mfc_unres_lock);
1516 	list_for_each_entry(uc, &mrt->mfc6_unres_queue, list) {
1517 		if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1518 		    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1519 			list_del(&uc->list);
1520 			atomic_dec(&mrt->cache_resolve_queue_len);
1521 			found = true;
1522 			break;
1523 		}
1524 	}
1525 	if (list_empty(&mrt->mfc6_unres_queue))
1526 		del_timer(&mrt->ipmr_expire_timer);
1527 	spin_unlock_bh(&mfc_unres_lock);
1528 
1529 	if (found) {
1530 		ip6mr_cache_resolve(net, mrt, uc, c);
1531 		ip6mr_cache_free(uc);
1532 	}
1533 	mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1534 	return 0;
1535 }
1536 
1537 /*
1538  *	Close the multicast socket, and clear the vif tables etc
1539  */
1540 
1541 static void mroute_clean_tables(struct mr6_table *mrt)
1542 {
1543 	int i;
1544 	LIST_HEAD(list);
1545 	struct mfc6_cache *c, *next;
1546 
1547 	/*
1548 	 *	Shut down all active vif entries
1549 	 */
1550 	for (i = 0; i < mrt->maxvif; i++) {
1551 		if (!(mrt->vif6_table[i].flags & VIFF_STATIC))
1552 			mif6_delete(mrt, i, &list);
1553 	}
1554 	unregister_netdevice_many(&list);
1555 
1556 	/*
1557 	 *	Wipe the cache
1558 	 */
1559 	for (i = 0; i < MFC6_LINES; i++) {
1560 		list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) {
1561 			if (c->mfc_flags & MFC_STATIC)
1562 				continue;
1563 			write_lock_bh(&mrt_lock);
1564 			list_del(&c->list);
1565 			write_unlock_bh(&mrt_lock);
1566 
1567 			mr6_netlink_event(mrt, c, RTM_DELROUTE);
1568 			ip6mr_cache_free(c);
1569 		}
1570 	}
1571 
1572 	if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1573 		spin_lock_bh(&mfc_unres_lock);
1574 		list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
1575 			list_del(&c->list);
1576 			mr6_netlink_event(mrt, c, RTM_DELROUTE);
1577 			ip6mr_destroy_unres(mrt, c);
1578 		}
1579 		spin_unlock_bh(&mfc_unres_lock);
1580 	}
1581 }
1582 
1583 static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
1584 {
1585 	int err = 0;
1586 	struct net *net = sock_net(sk);
1587 
1588 	rtnl_lock();
1589 	write_lock_bh(&mrt_lock);
1590 	if (likely(mrt->mroute6_sk == NULL)) {
1591 		mrt->mroute6_sk = sk;
1592 		net->ipv6.devconf_all->mc_forwarding++;
1593 		inet6_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING,
1594 					     NETCONFA_IFINDEX_ALL,
1595 					     net->ipv6.devconf_all);
1596 	}
1597 	else
1598 		err = -EADDRINUSE;
1599 	write_unlock_bh(&mrt_lock);
1600 
1601 	rtnl_unlock();
1602 
1603 	return err;
1604 }
1605 
1606 int ip6mr_sk_done(struct sock *sk)
1607 {
1608 	int err = -EACCES;
1609 	struct net *net = sock_net(sk);
1610 	struct mr6_table *mrt;
1611 
1612 	rtnl_lock();
1613 	ip6mr_for_each_table(mrt, net) {
1614 		if (sk == mrt->mroute6_sk) {
1615 			write_lock_bh(&mrt_lock);
1616 			mrt->mroute6_sk = NULL;
1617 			net->ipv6.devconf_all->mc_forwarding--;
1618 			inet6_netconf_notify_devconf(net,
1619 						     NETCONFA_MC_FORWARDING,
1620 						     NETCONFA_IFINDEX_ALL,
1621 						     net->ipv6.devconf_all);
1622 			write_unlock_bh(&mrt_lock);
1623 
1624 			mroute_clean_tables(mrt);
1625 			err = 0;
1626 			break;
1627 		}
1628 	}
1629 	rtnl_unlock();
1630 
1631 	return err;
1632 }
1633 
1634 struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
1635 {
1636 	struct mr6_table *mrt;
1637 	struct flowi6 fl6 = {
1638 		.flowi6_iif	= skb->skb_iif ? : LOOPBACK_IFINDEX,
1639 		.flowi6_oif	= skb->dev->ifindex,
1640 		.flowi6_mark	= skb->mark,
1641 	};
1642 
1643 	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
1644 		return NULL;
1645 
1646 	return mrt->mroute6_sk;
1647 }
1648 
1649 /*
1650  *	Socket options and virtual interface manipulation. The whole
1651  *	virtual interface system is a complete heap, but unfortunately
1652  *	that's how BSD mrouted happens to think. Maybe one day with a proper
1653  *	MOSPF/PIM router set up we can clean this up.
1654  */
1655 
1656 int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1657 {
1658 	int ret, parent = 0;
1659 	struct mif6ctl vif;
1660 	struct mf6cctl mfc;
1661 	mifi_t mifi;
1662 	struct net *net = sock_net(sk);
1663 	struct mr6_table *mrt;
1664 
1665 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1666 	if (!mrt)
1667 		return -ENOENT;
1668 
1669 	if (optname != MRT6_INIT) {
1670 		if (sk != mrt->mroute6_sk && !ns_capable(net->user_ns, CAP_NET_ADMIN))
1671 			return -EACCES;
1672 	}
1673 
1674 	switch (optname) {
1675 	case MRT6_INIT:
1676 		if (sk->sk_type != SOCK_RAW ||
1677 		    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1678 			return -EOPNOTSUPP;
1679 		if (optlen < sizeof(int))
1680 			return -EINVAL;
1681 
1682 		return ip6mr_sk_init(mrt, sk);
1683 
1684 	case MRT6_DONE:
1685 		return ip6mr_sk_done(sk);
1686 
1687 	case MRT6_ADD_MIF:
1688 		if (optlen < sizeof(vif))
1689 			return -EINVAL;
1690 		if (copy_from_user(&vif, optval, sizeof(vif)))
1691 			return -EFAULT;
1692 		if (vif.mif6c_mifi >= MAXMIFS)
1693 			return -ENFILE;
1694 		rtnl_lock();
1695 		ret = mif6_add(net, mrt, &vif, sk == mrt->mroute6_sk);
1696 		rtnl_unlock();
1697 		return ret;
1698 
1699 	case MRT6_DEL_MIF:
1700 		if (optlen < sizeof(mifi_t))
1701 			return -EINVAL;
1702 		if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1703 			return -EFAULT;
1704 		rtnl_lock();
1705 		ret = mif6_delete(mrt, mifi, NULL);
1706 		rtnl_unlock();
1707 		return ret;
1708 
1709 	/*
1710 	 *	Manipulate the forwarding caches. These live
1711 	 *	in a sort of kernel/user symbiosis.
1712 	 */
1713 	case MRT6_ADD_MFC:
1714 	case MRT6_DEL_MFC:
1715 		parent = -1;
1716 	case MRT6_ADD_MFC_PROXY:
1717 	case MRT6_DEL_MFC_PROXY:
1718 		if (optlen < sizeof(mfc))
1719 			return -EINVAL;
1720 		if (copy_from_user(&mfc, optval, sizeof(mfc)))
1721 			return -EFAULT;
1722 		if (parent == 0)
1723 			parent = mfc.mf6cc_parent;
1724 		rtnl_lock();
1725 		if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY)
1726 			ret = ip6mr_mfc_delete(mrt, &mfc, parent);
1727 		else
1728 			ret = ip6mr_mfc_add(net, mrt, &mfc,
1729 					    sk == mrt->mroute6_sk, parent);
1730 		rtnl_unlock();
1731 		return ret;
1732 
1733 	/*
1734 	 *	Control PIM assert (to activate pim will activate assert)
1735 	 */
1736 	case MRT6_ASSERT:
1737 	{
1738 		int v;
1739 
1740 		if (optlen != sizeof(v))
1741 			return -EINVAL;
1742 		if (get_user(v, (int __user *)optval))
1743 			return -EFAULT;
1744 		mrt->mroute_do_assert = v;
1745 		return 0;
1746 	}
1747 
1748 #ifdef CONFIG_IPV6_PIMSM_V2
1749 	case MRT6_PIM:
1750 	{
1751 		int v;
1752 
1753 		if (optlen != sizeof(v))
1754 			return -EINVAL;
1755 		if (get_user(v, (int __user *)optval))
1756 			return -EFAULT;
1757 		v = !!v;
1758 		rtnl_lock();
1759 		ret = 0;
1760 		if (v != mrt->mroute_do_pim) {
1761 			mrt->mroute_do_pim = v;
1762 			mrt->mroute_do_assert = v;
1763 		}
1764 		rtnl_unlock();
1765 		return ret;
1766 	}
1767 
1768 #endif
1769 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1770 	case MRT6_TABLE:
1771 	{
1772 		u32 v;
1773 
1774 		if (optlen != sizeof(u32))
1775 			return -EINVAL;
1776 		if (get_user(v, (u32 __user *)optval))
1777 			return -EFAULT;
1778 		/* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
1779 		if (v != RT_TABLE_DEFAULT && v >= 100000000)
1780 			return -EINVAL;
1781 		if (sk == mrt->mroute6_sk)
1782 			return -EBUSY;
1783 
1784 		rtnl_lock();
1785 		ret = 0;
1786 		if (!ip6mr_new_table(net, v))
1787 			ret = -ENOMEM;
1788 		raw6_sk(sk)->ip6mr_table = v;
1789 		rtnl_unlock();
1790 		return ret;
1791 	}
1792 #endif
1793 	/*
1794 	 *	Spurious command, or MRT6_VERSION which you cannot
1795 	 *	set.
1796 	 */
1797 	default:
1798 		return -ENOPROTOOPT;
1799 	}
1800 }
1801 
1802 /*
1803  *	Getsock opt support for the multicast routing system.
1804  */
1805 
1806 int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1807 			  int __user *optlen)
1808 {
1809 	int olr;
1810 	int val;
1811 	struct net *net = sock_net(sk);
1812 	struct mr6_table *mrt;
1813 
1814 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1815 	if (!mrt)
1816 		return -ENOENT;
1817 
1818 	switch (optname) {
1819 	case MRT6_VERSION:
1820 		val = 0x0305;
1821 		break;
1822 #ifdef CONFIG_IPV6_PIMSM_V2
1823 	case MRT6_PIM:
1824 		val = mrt->mroute_do_pim;
1825 		break;
1826 #endif
1827 	case MRT6_ASSERT:
1828 		val = mrt->mroute_do_assert;
1829 		break;
1830 	default:
1831 		return -ENOPROTOOPT;
1832 	}
1833 
1834 	if (get_user(olr, optlen))
1835 		return -EFAULT;
1836 
1837 	olr = min_t(int, olr, sizeof(int));
1838 	if (olr < 0)
1839 		return -EINVAL;
1840 
1841 	if (put_user(olr, optlen))
1842 		return -EFAULT;
1843 	if (copy_to_user(optval, &val, olr))
1844 		return -EFAULT;
1845 	return 0;
1846 }
1847 
1848 /*
1849  *	The IP multicast ioctl support routines.
1850  */
1851 
1852 int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1853 {
1854 	struct sioc_sg_req6 sr;
1855 	struct sioc_mif_req6 vr;
1856 	struct mif_device *vif;
1857 	struct mfc6_cache *c;
1858 	struct net *net = sock_net(sk);
1859 	struct mr6_table *mrt;
1860 
1861 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1862 	if (!mrt)
1863 		return -ENOENT;
1864 
1865 	switch (cmd) {
1866 	case SIOCGETMIFCNT_IN6:
1867 		if (copy_from_user(&vr, arg, sizeof(vr)))
1868 			return -EFAULT;
1869 		if (vr.mifi >= mrt->maxvif)
1870 			return -EINVAL;
1871 		read_lock(&mrt_lock);
1872 		vif = &mrt->vif6_table[vr.mifi];
1873 		if (MIF_EXISTS(mrt, vr.mifi)) {
1874 			vr.icount = vif->pkt_in;
1875 			vr.ocount = vif->pkt_out;
1876 			vr.ibytes = vif->bytes_in;
1877 			vr.obytes = vif->bytes_out;
1878 			read_unlock(&mrt_lock);
1879 
1880 			if (copy_to_user(arg, &vr, sizeof(vr)))
1881 				return -EFAULT;
1882 			return 0;
1883 		}
1884 		read_unlock(&mrt_lock);
1885 		return -EADDRNOTAVAIL;
1886 	case SIOCGETSGCNT_IN6:
1887 		if (copy_from_user(&sr, arg, sizeof(sr)))
1888 			return -EFAULT;
1889 
1890 		read_lock(&mrt_lock);
1891 		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1892 		if (c) {
1893 			sr.pktcnt = c->mfc_un.res.pkt;
1894 			sr.bytecnt = c->mfc_un.res.bytes;
1895 			sr.wrong_if = c->mfc_un.res.wrong_if;
1896 			read_unlock(&mrt_lock);
1897 
1898 			if (copy_to_user(arg, &sr, sizeof(sr)))
1899 				return -EFAULT;
1900 			return 0;
1901 		}
1902 		read_unlock(&mrt_lock);
1903 		return -EADDRNOTAVAIL;
1904 	default:
1905 		return -ENOIOCTLCMD;
1906 	}
1907 }
1908 
1909 #ifdef CONFIG_COMPAT
1910 struct compat_sioc_sg_req6 {
1911 	struct sockaddr_in6 src;
1912 	struct sockaddr_in6 grp;
1913 	compat_ulong_t pktcnt;
1914 	compat_ulong_t bytecnt;
1915 	compat_ulong_t wrong_if;
1916 };
1917 
1918 struct compat_sioc_mif_req6 {
1919 	mifi_t	mifi;
1920 	compat_ulong_t icount;
1921 	compat_ulong_t ocount;
1922 	compat_ulong_t ibytes;
1923 	compat_ulong_t obytes;
1924 };
1925 
1926 int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1927 {
1928 	struct compat_sioc_sg_req6 sr;
1929 	struct compat_sioc_mif_req6 vr;
1930 	struct mif_device *vif;
1931 	struct mfc6_cache *c;
1932 	struct net *net = sock_net(sk);
1933 	struct mr6_table *mrt;
1934 
1935 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1936 	if (!mrt)
1937 		return -ENOENT;
1938 
1939 	switch (cmd) {
1940 	case SIOCGETMIFCNT_IN6:
1941 		if (copy_from_user(&vr, arg, sizeof(vr)))
1942 			return -EFAULT;
1943 		if (vr.mifi >= mrt->maxvif)
1944 			return -EINVAL;
1945 		read_lock(&mrt_lock);
1946 		vif = &mrt->vif6_table[vr.mifi];
1947 		if (MIF_EXISTS(mrt, vr.mifi)) {
1948 			vr.icount = vif->pkt_in;
1949 			vr.ocount = vif->pkt_out;
1950 			vr.ibytes = vif->bytes_in;
1951 			vr.obytes = vif->bytes_out;
1952 			read_unlock(&mrt_lock);
1953 
1954 			if (copy_to_user(arg, &vr, sizeof(vr)))
1955 				return -EFAULT;
1956 			return 0;
1957 		}
1958 		read_unlock(&mrt_lock);
1959 		return -EADDRNOTAVAIL;
1960 	case SIOCGETSGCNT_IN6:
1961 		if (copy_from_user(&sr, arg, sizeof(sr)))
1962 			return -EFAULT;
1963 
1964 		read_lock(&mrt_lock);
1965 		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1966 		if (c) {
1967 			sr.pktcnt = c->mfc_un.res.pkt;
1968 			sr.bytecnt = c->mfc_un.res.bytes;
1969 			sr.wrong_if = c->mfc_un.res.wrong_if;
1970 			read_unlock(&mrt_lock);
1971 
1972 			if (copy_to_user(arg, &sr, sizeof(sr)))
1973 				return -EFAULT;
1974 			return 0;
1975 		}
1976 		read_unlock(&mrt_lock);
1977 		return -EADDRNOTAVAIL;
1978 	default:
1979 		return -ENOIOCTLCMD;
1980 	}
1981 }
1982 #endif
1983 
1984 static inline int ip6mr_forward2_finish(struct sk_buff *skb)
1985 {
1986 	IP6_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
1987 			 IPSTATS_MIB_OUTFORWDATAGRAMS);
1988 	IP6_ADD_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
1989 			 IPSTATS_MIB_OUTOCTETS, skb->len);
1990 	return dst_output(skb);
1991 }
1992 
1993 /*
1994  *	Processing handlers for ip6mr_forward
1995  */
1996 
1997 static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
1998 			  struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1999 {
2000 	struct ipv6hdr *ipv6h;
2001 	struct mif_device *vif = &mrt->vif6_table[vifi];
2002 	struct net_device *dev;
2003 	struct dst_entry *dst;
2004 	struct flowi6 fl6;
2005 
2006 	if (!vif->dev)
2007 		goto out_free;
2008 
2009 #ifdef CONFIG_IPV6_PIMSM_V2
2010 	if (vif->flags & MIFF_REGISTER) {
2011 		vif->pkt_out++;
2012 		vif->bytes_out += skb->len;
2013 		vif->dev->stats.tx_bytes += skb->len;
2014 		vif->dev->stats.tx_packets++;
2015 		ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
2016 		goto out_free;
2017 	}
2018 #endif
2019 
2020 	ipv6h = ipv6_hdr(skb);
2021 
2022 	fl6 = (struct flowi6) {
2023 		.flowi6_oif = vif->link,
2024 		.daddr = ipv6h->daddr,
2025 	};
2026 
2027 	dst = ip6_route_output(net, NULL, &fl6);
2028 	if (dst->error) {
2029 		dst_release(dst);
2030 		goto out_free;
2031 	}
2032 
2033 	skb_dst_drop(skb);
2034 	skb_dst_set(skb, dst);
2035 
2036 	/*
2037 	 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
2038 	 * not only before forwarding, but after forwarding on all output
2039 	 * interfaces. It is clear, if mrouter runs a multicasting
2040 	 * program, it should receive packets not depending to what interface
2041 	 * program is joined.
2042 	 * If we will not make it, the program will have to join on all
2043 	 * interfaces. On the other hand, multihoming host (or router, but
2044 	 * not mrouter) cannot join to more than one interface - it will
2045 	 * result in receiving multiple packets.
2046 	 */
2047 	dev = vif->dev;
2048 	skb->dev = dev;
2049 	vif->pkt_out++;
2050 	vif->bytes_out += skb->len;
2051 
2052 	/* We are about to write */
2053 	/* XXX: extension headers? */
2054 	if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
2055 		goto out_free;
2056 
2057 	ipv6h = ipv6_hdr(skb);
2058 	ipv6h->hop_limit--;
2059 
2060 	IP6CB(skb)->flags |= IP6SKB_FORWARDED;
2061 
2062 	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dev,
2063 		       ip6mr_forward2_finish);
2064 
2065 out_free:
2066 	kfree_skb(skb);
2067 	return 0;
2068 }
2069 
2070 static int ip6mr_find_vif(struct mr6_table *mrt, struct net_device *dev)
2071 {
2072 	int ct;
2073 
2074 	for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
2075 		if (mrt->vif6_table[ct].dev == dev)
2076 			break;
2077 	}
2078 	return ct;
2079 }
2080 
2081 static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
2082 			   struct sk_buff *skb, struct mfc6_cache *cache)
2083 {
2084 	int psend = -1;
2085 	int vif, ct;
2086 	int true_vifi = ip6mr_find_vif(mrt, skb->dev);
2087 
2088 	vif = cache->mf6c_parent;
2089 	cache->mfc_un.res.pkt++;
2090 	cache->mfc_un.res.bytes += skb->len;
2091 
2092 	if (ipv6_addr_any(&cache->mf6c_origin) && true_vifi >= 0) {
2093 		struct mfc6_cache *cache_proxy;
2094 
2095 		/* For an (*,G) entry, we only check that the incoming
2096 		 * interface is part of the static tree.
2097 		 */
2098 		cache_proxy = ip6mr_cache_find_any_parent(mrt, vif);
2099 		if (cache_proxy &&
2100 		    cache_proxy->mfc_un.res.ttls[true_vifi] < 255)
2101 			goto forward;
2102 	}
2103 
2104 	/*
2105 	 * Wrong interface: drop packet and (maybe) send PIM assert.
2106 	 */
2107 	if (mrt->vif6_table[vif].dev != skb->dev) {
2108 		cache->mfc_un.res.wrong_if++;
2109 
2110 		if (true_vifi >= 0 && mrt->mroute_do_assert &&
2111 		    /* pimsm uses asserts, when switching from RPT to SPT,
2112 		       so that we cannot check that packet arrived on an oif.
2113 		       It is bad, but otherwise we would need to move pretty
2114 		       large chunk of pimd to kernel. Ough... --ANK
2115 		     */
2116 		    (mrt->mroute_do_pim ||
2117 		     cache->mfc_un.res.ttls[true_vifi] < 255) &&
2118 		    time_after(jiffies,
2119 			       cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
2120 			cache->mfc_un.res.last_assert = jiffies;
2121 			ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
2122 		}
2123 		goto dont_forward;
2124 	}
2125 
2126 forward:
2127 	mrt->vif6_table[vif].pkt_in++;
2128 	mrt->vif6_table[vif].bytes_in += skb->len;
2129 
2130 	/*
2131 	 *	Forward the frame
2132 	 */
2133 	if (ipv6_addr_any(&cache->mf6c_origin) &&
2134 	    ipv6_addr_any(&cache->mf6c_mcastgrp)) {
2135 		if (true_vifi >= 0 &&
2136 		    true_vifi != cache->mf6c_parent &&
2137 		    ipv6_hdr(skb)->hop_limit >
2138 				cache->mfc_un.res.ttls[cache->mf6c_parent]) {
2139 			/* It's an (*,*) entry and the packet is not coming from
2140 			 * the upstream: forward the packet to the upstream
2141 			 * only.
2142 			 */
2143 			psend = cache->mf6c_parent;
2144 			goto last_forward;
2145 		}
2146 		goto dont_forward;
2147 	}
2148 	for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
2149 		/* For (*,G) entry, don't forward to the incoming interface */
2150 		if ((!ipv6_addr_any(&cache->mf6c_origin) || ct != true_vifi) &&
2151 		    ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
2152 			if (psend != -1) {
2153 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2154 				if (skb2)
2155 					ip6mr_forward2(net, mrt, skb2, cache, psend);
2156 			}
2157 			psend = ct;
2158 		}
2159 	}
2160 last_forward:
2161 	if (psend != -1) {
2162 		ip6mr_forward2(net, mrt, skb, cache, psend);
2163 		return;
2164 	}
2165 
2166 dont_forward:
2167 	kfree_skb(skb);
2168 }
2169 
2170 
2171 /*
2172  *	Multicast packets for forwarding arrive here
2173  */
2174 
2175 int ip6_mr_input(struct sk_buff *skb)
2176 {
2177 	struct mfc6_cache *cache;
2178 	struct net *net = dev_net(skb->dev);
2179 	struct mr6_table *mrt;
2180 	struct flowi6 fl6 = {
2181 		.flowi6_iif	= skb->dev->ifindex,
2182 		.flowi6_mark	= skb->mark,
2183 	};
2184 	int err;
2185 
2186 	err = ip6mr_fib_lookup(net, &fl6, &mrt);
2187 	if (err < 0) {
2188 		kfree_skb(skb);
2189 		return err;
2190 	}
2191 
2192 	read_lock(&mrt_lock);
2193 	cache = ip6mr_cache_find(mrt,
2194 				 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2195 	if (!cache) {
2196 		int vif = ip6mr_find_vif(mrt, skb->dev);
2197 
2198 		if (vif >= 0)
2199 			cache = ip6mr_cache_find_any(mrt,
2200 						     &ipv6_hdr(skb)->daddr,
2201 						     vif);
2202 	}
2203 
2204 	/*
2205 	 *	No usable cache entry
2206 	 */
2207 	if (!cache) {
2208 		int vif;
2209 
2210 		vif = ip6mr_find_vif(mrt, skb->dev);
2211 		if (vif >= 0) {
2212 			int err = ip6mr_cache_unresolved(mrt, vif, skb);
2213 			read_unlock(&mrt_lock);
2214 
2215 			return err;
2216 		}
2217 		read_unlock(&mrt_lock);
2218 		kfree_skb(skb);
2219 		return -ENODEV;
2220 	}
2221 
2222 	ip6_mr_forward(net, mrt, skb, cache);
2223 
2224 	read_unlock(&mrt_lock);
2225 
2226 	return 0;
2227 }
2228 
2229 
2230 static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2231 			       struct mfc6_cache *c, struct rtmsg *rtm)
2232 {
2233 	int ct;
2234 	struct rtnexthop *nhp;
2235 	struct nlattr *mp_attr;
2236 	struct rta_mfc_stats mfcs;
2237 
2238 	/* If cache is unresolved, don't try to parse IIF and OIF */
2239 	if (c->mf6c_parent >= MAXMIFS)
2240 		return -ENOENT;
2241 
2242 	if (MIF_EXISTS(mrt, c->mf6c_parent) &&
2243 	    nla_put_u32(skb, RTA_IIF, mrt->vif6_table[c->mf6c_parent].dev->ifindex) < 0)
2244 		return -EMSGSIZE;
2245 	mp_attr = nla_nest_start(skb, RTA_MULTIPATH);
2246 	if (!mp_attr)
2247 		return -EMSGSIZE;
2248 
2249 	for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
2250 		if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
2251 			nhp = nla_reserve_nohdr(skb, sizeof(*nhp));
2252 			if (!nhp) {
2253 				nla_nest_cancel(skb, mp_attr);
2254 				return -EMSGSIZE;
2255 			}
2256 
2257 			nhp->rtnh_flags = 0;
2258 			nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
2259 			nhp->rtnh_ifindex = mrt->vif6_table[ct].dev->ifindex;
2260 			nhp->rtnh_len = sizeof(*nhp);
2261 		}
2262 	}
2263 
2264 	nla_nest_end(skb, mp_attr);
2265 
2266 	mfcs.mfcs_packets = c->mfc_un.res.pkt;
2267 	mfcs.mfcs_bytes = c->mfc_un.res.bytes;
2268 	mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
2269 	if (nla_put(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs) < 0)
2270 		return -EMSGSIZE;
2271 
2272 	rtm->rtm_type = RTN_MULTICAST;
2273 	return 1;
2274 }
2275 
2276 int ip6mr_get_route(struct net *net,
2277 		    struct sk_buff *skb, struct rtmsg *rtm, int nowait)
2278 {
2279 	int err;
2280 	struct mr6_table *mrt;
2281 	struct mfc6_cache *cache;
2282 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
2283 
2284 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
2285 	if (!mrt)
2286 		return -ENOENT;
2287 
2288 	read_lock(&mrt_lock);
2289 	cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
2290 	if (!cache && skb->dev) {
2291 		int vif = ip6mr_find_vif(mrt, skb->dev);
2292 
2293 		if (vif >= 0)
2294 			cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr,
2295 						     vif);
2296 	}
2297 
2298 	if (!cache) {
2299 		struct sk_buff *skb2;
2300 		struct ipv6hdr *iph;
2301 		struct net_device *dev;
2302 		int vif;
2303 
2304 		if (nowait) {
2305 			read_unlock(&mrt_lock);
2306 			return -EAGAIN;
2307 		}
2308 
2309 		dev = skb->dev;
2310 		if (!dev || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2311 			read_unlock(&mrt_lock);
2312 			return -ENODEV;
2313 		}
2314 
2315 		/* really correct? */
2316 		skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
2317 		if (!skb2) {
2318 			read_unlock(&mrt_lock);
2319 			return -ENOMEM;
2320 		}
2321 
2322 		skb_reset_transport_header(skb2);
2323 
2324 		skb_put(skb2, sizeof(struct ipv6hdr));
2325 		skb_reset_network_header(skb2);
2326 
2327 		iph = ipv6_hdr(skb2);
2328 		iph->version = 0;
2329 		iph->priority = 0;
2330 		iph->flow_lbl[0] = 0;
2331 		iph->flow_lbl[1] = 0;
2332 		iph->flow_lbl[2] = 0;
2333 		iph->payload_len = 0;
2334 		iph->nexthdr = IPPROTO_NONE;
2335 		iph->hop_limit = 0;
2336 		iph->saddr = rt->rt6i_src.addr;
2337 		iph->daddr = rt->rt6i_dst.addr;
2338 
2339 		err = ip6mr_cache_unresolved(mrt, vif, skb2);
2340 		read_unlock(&mrt_lock);
2341 
2342 		return err;
2343 	}
2344 
2345 	if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
2346 		cache->mfc_flags |= MFC_NOTIFY;
2347 
2348 	err = __ip6mr_fill_mroute(mrt, skb, cache, rtm);
2349 	read_unlock(&mrt_lock);
2350 	return err;
2351 }
2352 
2353 static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2354 			     u32 portid, u32 seq, struct mfc6_cache *c, int cmd,
2355 			     int flags)
2356 {
2357 	struct nlmsghdr *nlh;
2358 	struct rtmsg *rtm;
2359 	int err;
2360 
2361 	nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags);
2362 	if (!nlh)
2363 		return -EMSGSIZE;
2364 
2365 	rtm = nlmsg_data(nlh);
2366 	rtm->rtm_family   = RTNL_FAMILY_IP6MR;
2367 	rtm->rtm_dst_len  = 128;
2368 	rtm->rtm_src_len  = 128;
2369 	rtm->rtm_tos      = 0;
2370 	rtm->rtm_table    = mrt->id;
2371 	if (nla_put_u32(skb, RTA_TABLE, mrt->id))
2372 		goto nla_put_failure;
2373 	rtm->rtm_type = RTN_MULTICAST;
2374 	rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
2375 	if (c->mfc_flags & MFC_STATIC)
2376 		rtm->rtm_protocol = RTPROT_STATIC;
2377 	else
2378 		rtm->rtm_protocol = RTPROT_MROUTED;
2379 	rtm->rtm_flags    = 0;
2380 
2381 	if (nla_put(skb, RTA_SRC, 16, &c->mf6c_origin) ||
2382 	    nla_put(skb, RTA_DST, 16, &c->mf6c_mcastgrp))
2383 		goto nla_put_failure;
2384 	err = __ip6mr_fill_mroute(mrt, skb, c, rtm);
2385 	/* do not break the dump if cache is unresolved */
2386 	if (err < 0 && err != -ENOENT)
2387 		goto nla_put_failure;
2388 
2389 	nlmsg_end(skb, nlh);
2390 	return 0;
2391 
2392 nla_put_failure:
2393 	nlmsg_cancel(skb, nlh);
2394 	return -EMSGSIZE;
2395 }
2396 
2397 static int mr6_msgsize(bool unresolved, int maxvif)
2398 {
2399 	size_t len =
2400 		NLMSG_ALIGN(sizeof(struct rtmsg))
2401 		+ nla_total_size(4)	/* RTA_TABLE */
2402 		+ nla_total_size(sizeof(struct in6_addr))	/* RTA_SRC */
2403 		+ nla_total_size(sizeof(struct in6_addr))	/* RTA_DST */
2404 		;
2405 
2406 	if (!unresolved)
2407 		len = len
2408 		      + nla_total_size(4)	/* RTA_IIF */
2409 		      + nla_total_size(0)	/* RTA_MULTIPATH */
2410 		      + maxvif * NLA_ALIGN(sizeof(struct rtnexthop))
2411 						/* RTA_MFC_STATS */
2412 		      + nla_total_size(sizeof(struct rta_mfc_stats))
2413 		;
2414 
2415 	return len;
2416 }
2417 
2418 static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
2419 			      int cmd)
2420 {
2421 	struct net *net = read_pnet(&mrt->net);
2422 	struct sk_buff *skb;
2423 	int err = -ENOBUFS;
2424 
2425 	skb = nlmsg_new(mr6_msgsize(mfc->mf6c_parent >= MAXMIFS, mrt->maxvif),
2426 			GFP_ATOMIC);
2427 	if (!skb)
2428 		goto errout;
2429 
2430 	err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0);
2431 	if (err < 0)
2432 		goto errout;
2433 
2434 	rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC);
2435 	return;
2436 
2437 errout:
2438 	kfree_skb(skb);
2439 	if (err < 0)
2440 		rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
2441 }
2442 
2443 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2444 {
2445 	struct net *net = sock_net(skb->sk);
2446 	struct mr6_table *mrt;
2447 	struct mfc6_cache *mfc;
2448 	unsigned int t = 0, s_t;
2449 	unsigned int h = 0, s_h;
2450 	unsigned int e = 0, s_e;
2451 
2452 	s_t = cb->args[0];
2453 	s_h = cb->args[1];
2454 	s_e = cb->args[2];
2455 
2456 	read_lock(&mrt_lock);
2457 	ip6mr_for_each_table(mrt, net) {
2458 		if (t < s_t)
2459 			goto next_table;
2460 		if (t > s_t)
2461 			s_h = 0;
2462 		for (h = s_h; h < MFC6_LINES; h++) {
2463 			list_for_each_entry(mfc, &mrt->mfc6_cache_array[h], list) {
2464 				if (e < s_e)
2465 					goto next_entry;
2466 				if (ip6mr_fill_mroute(mrt, skb,
2467 						      NETLINK_CB(cb->skb).portid,
2468 						      cb->nlh->nlmsg_seq,
2469 						      mfc, RTM_NEWROUTE,
2470 						      NLM_F_MULTI) < 0)
2471 					goto done;
2472 next_entry:
2473 				e++;
2474 			}
2475 			e = s_e = 0;
2476 		}
2477 		spin_lock_bh(&mfc_unres_lock);
2478 		list_for_each_entry(mfc, &mrt->mfc6_unres_queue, list) {
2479 			if (e < s_e)
2480 				goto next_entry2;
2481 			if (ip6mr_fill_mroute(mrt, skb,
2482 					      NETLINK_CB(cb->skb).portid,
2483 					      cb->nlh->nlmsg_seq,
2484 					      mfc, RTM_NEWROUTE,
2485 					      NLM_F_MULTI) < 0) {
2486 				spin_unlock_bh(&mfc_unres_lock);
2487 				goto done;
2488 			}
2489 next_entry2:
2490 			e++;
2491 		}
2492 		spin_unlock_bh(&mfc_unres_lock);
2493 		e = s_e = 0;
2494 		s_h = 0;
2495 next_table:
2496 		t++;
2497 	}
2498 done:
2499 	read_unlock(&mrt_lock);
2500 
2501 	cb->args[2] = e;
2502 	cb->args[1] = h;
2503 	cb->args[0] = t;
2504 
2505 	return skb->len;
2506 }
2507