xref: /openbmc/linux/net/ipv6/ip6mr.c (revision d0b73b48)
1 /*
2  *	Linux IPv6 multicast routing support for BSD pim6sd
3  *	Based on net/ipv4/ipmr.c.
4  *
5  *	(c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
6  *		LSIIT Laboratory, Strasbourg, France
7  *	(c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
8  *		6WIND, Paris, France
9  *	Copyright (C)2007,2008 USAGI/WIDE Project
10  *		YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
11  *
12  *	This program is free software; you can redistribute it and/or
13  *	modify it under the terms of the GNU General Public License
14  *	as published by the Free Software Foundation; either version
15  *	2 of the License, or (at your option) any later version.
16  *
17  */
18 
19 #include <asm/uaccess.h>
20 #include <linux/types.h>
21 #include <linux/sched.h>
22 #include <linux/errno.h>
23 #include <linux/timer.h>
24 #include <linux/mm.h>
25 #include <linux/kernel.h>
26 #include <linux/fcntl.h>
27 #include <linux/stat.h>
28 #include <linux/socket.h>
29 #include <linux/inet.h>
30 #include <linux/netdevice.h>
31 #include <linux/inetdevice.h>
32 #include <linux/proc_fs.h>
33 #include <linux/seq_file.h>
34 #include <linux/init.h>
35 #include <linux/slab.h>
36 #include <linux/compat.h>
37 #include <net/protocol.h>
38 #include <linux/skbuff.h>
39 #include <net/sock.h>
40 #include <net/raw.h>
41 #include <linux/notifier.h>
42 #include <linux/if_arp.h>
43 #include <net/checksum.h>
44 #include <net/netlink.h>
45 #include <net/fib_rules.h>
46 
47 #include <net/ipv6.h>
48 #include <net/ip6_route.h>
49 #include <linux/mroute6.h>
50 #include <linux/pim.h>
51 #include <net/addrconf.h>
52 #include <linux/netfilter_ipv6.h>
53 #include <linux/export.h>
54 #include <net/ip6_checksum.h>
55 #include <linux/netconf.h>
56 
57 struct mr6_table {
58 	struct list_head	list;
59 #ifdef CONFIG_NET_NS
60 	struct net		*net;
61 #endif
62 	u32			id;
63 	struct sock		*mroute6_sk;
64 	struct timer_list	ipmr_expire_timer;
65 	struct list_head	mfc6_unres_queue;
66 	struct list_head	mfc6_cache_array[MFC6_LINES];
67 	struct mif_device	vif6_table[MAXMIFS];
68 	int			maxvif;
69 	atomic_t		cache_resolve_queue_len;
70 	bool			mroute_do_assert;
71 	bool			mroute_do_pim;
72 #ifdef CONFIG_IPV6_PIMSM_V2
73 	int			mroute_reg_vif_num;
74 #endif
75 };
76 
77 struct ip6mr_rule {
78 	struct fib_rule		common;
79 };
80 
81 struct ip6mr_result {
82 	struct mr6_table	*mrt;
83 };
84 
85 /* Big lock, protecting vif table, mrt cache and mroute socket state.
86    Note that the changes are semaphored via rtnl_lock.
87  */
88 
89 static DEFINE_RWLOCK(mrt_lock);
90 
91 /*
92  *	Multicast router control variables
93  */
94 
95 #define MIF_EXISTS(_mrt, _idx) ((_mrt)->vif6_table[_idx].dev != NULL)
96 
97 /* Special spinlock for queue of unresolved entries */
98 static DEFINE_SPINLOCK(mfc_unres_lock);
99 
100 /* We return to original Alan's scheme. Hash table of resolved
101    entries is changed only in process context and protected
102    with weak lock mrt_lock. Queue of unresolved entries is protected
103    with strong spinlock mfc_unres_lock.
104 
105    In this case data path is free of exclusive locks at all.
106  */
107 
108 static struct kmem_cache *mrt_cachep __read_mostly;
109 
110 static struct mr6_table *ip6mr_new_table(struct net *net, u32 id);
111 static void ip6mr_free_table(struct mr6_table *mrt);
112 
113 static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
114 			  struct sk_buff *skb, struct mfc6_cache *cache);
115 static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
116 			      mifi_t mifi, int assert);
117 static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
118 			       struct mfc6_cache *c, struct rtmsg *rtm);
119 static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
120 			      int cmd);
121 static int ip6mr_rtm_dumproute(struct sk_buff *skb,
122 			       struct netlink_callback *cb);
123 static void mroute_clean_tables(struct mr6_table *mrt);
124 static void ipmr_expire_process(unsigned long arg);
125 
126 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
127 #define ip6mr_for_each_table(mrt, net) \
128 	list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
129 
130 static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
131 {
132 	struct mr6_table *mrt;
133 
134 	ip6mr_for_each_table(mrt, net) {
135 		if (mrt->id == id)
136 			return mrt;
137 	}
138 	return NULL;
139 }
140 
141 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
142 			    struct mr6_table **mrt)
143 {
144 	struct ip6mr_result res;
145 	struct fib_lookup_arg arg = { .result = &res, };
146 	int err;
147 
148 	err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
149 			       flowi6_to_flowi(flp6), 0, &arg);
150 	if (err < 0)
151 		return err;
152 	*mrt = res.mrt;
153 	return 0;
154 }
155 
156 static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
157 			     int flags, struct fib_lookup_arg *arg)
158 {
159 	struct ip6mr_result *res = arg->result;
160 	struct mr6_table *mrt;
161 
162 	switch (rule->action) {
163 	case FR_ACT_TO_TBL:
164 		break;
165 	case FR_ACT_UNREACHABLE:
166 		return -ENETUNREACH;
167 	case FR_ACT_PROHIBIT:
168 		return -EACCES;
169 	case FR_ACT_BLACKHOLE:
170 	default:
171 		return -EINVAL;
172 	}
173 
174 	mrt = ip6mr_get_table(rule->fr_net, rule->table);
175 	if (mrt == NULL)
176 		return -EAGAIN;
177 	res->mrt = mrt;
178 	return 0;
179 }
180 
181 static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
182 {
183 	return 1;
184 }
185 
186 static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = {
187 	FRA_GENERIC_POLICY,
188 };
189 
190 static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
191 				struct fib_rule_hdr *frh, struct nlattr **tb)
192 {
193 	return 0;
194 }
195 
196 static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
197 			      struct nlattr **tb)
198 {
199 	return 1;
200 }
201 
202 static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
203 			   struct fib_rule_hdr *frh)
204 {
205 	frh->dst_len = 0;
206 	frh->src_len = 0;
207 	frh->tos     = 0;
208 	return 0;
209 }
210 
211 static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
212 	.family		= RTNL_FAMILY_IP6MR,
213 	.rule_size	= sizeof(struct ip6mr_rule),
214 	.addr_size	= sizeof(struct in6_addr),
215 	.action		= ip6mr_rule_action,
216 	.match		= ip6mr_rule_match,
217 	.configure	= ip6mr_rule_configure,
218 	.compare	= ip6mr_rule_compare,
219 	.default_pref	= fib_default_rule_pref,
220 	.fill		= ip6mr_rule_fill,
221 	.nlgroup	= RTNLGRP_IPV6_RULE,
222 	.policy		= ip6mr_rule_policy,
223 	.owner		= THIS_MODULE,
224 };
225 
226 static int __net_init ip6mr_rules_init(struct net *net)
227 {
228 	struct fib_rules_ops *ops;
229 	struct mr6_table *mrt;
230 	int err;
231 
232 	ops = fib_rules_register(&ip6mr_rules_ops_template, net);
233 	if (IS_ERR(ops))
234 		return PTR_ERR(ops);
235 
236 	INIT_LIST_HEAD(&net->ipv6.mr6_tables);
237 
238 	mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
239 	if (mrt == NULL) {
240 		err = -ENOMEM;
241 		goto err1;
242 	}
243 
244 	err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
245 	if (err < 0)
246 		goto err2;
247 
248 	net->ipv6.mr6_rules_ops = ops;
249 	return 0;
250 
251 err2:
252 	kfree(mrt);
253 err1:
254 	fib_rules_unregister(ops);
255 	return err;
256 }
257 
258 static void __net_exit ip6mr_rules_exit(struct net *net)
259 {
260 	struct mr6_table *mrt, *next;
261 
262 	list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
263 		list_del(&mrt->list);
264 		ip6mr_free_table(mrt);
265 	}
266 	fib_rules_unregister(net->ipv6.mr6_rules_ops);
267 }
268 #else
269 #define ip6mr_for_each_table(mrt, net) \
270 	for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
271 
272 static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
273 {
274 	return net->ipv6.mrt6;
275 }
276 
277 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
278 			    struct mr6_table **mrt)
279 {
280 	*mrt = net->ipv6.mrt6;
281 	return 0;
282 }
283 
284 static int __net_init ip6mr_rules_init(struct net *net)
285 {
286 	net->ipv6.mrt6 = ip6mr_new_table(net, RT6_TABLE_DFLT);
287 	return net->ipv6.mrt6 ? 0 : -ENOMEM;
288 }
289 
290 static void __net_exit ip6mr_rules_exit(struct net *net)
291 {
292 	ip6mr_free_table(net->ipv6.mrt6);
293 }
294 #endif
295 
296 static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
297 {
298 	struct mr6_table *mrt;
299 	unsigned int i;
300 
301 	mrt = ip6mr_get_table(net, id);
302 	if (mrt != NULL)
303 		return mrt;
304 
305 	mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
306 	if (mrt == NULL)
307 		return NULL;
308 	mrt->id = id;
309 	write_pnet(&mrt->net, net);
310 
311 	/* Forwarding cache */
312 	for (i = 0; i < MFC6_LINES; i++)
313 		INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]);
314 
315 	INIT_LIST_HEAD(&mrt->mfc6_unres_queue);
316 
317 	setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
318 		    (unsigned long)mrt);
319 
320 #ifdef CONFIG_IPV6_PIMSM_V2
321 	mrt->mroute_reg_vif_num = -1;
322 #endif
323 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
324 	list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
325 #endif
326 	return mrt;
327 }
328 
329 static void ip6mr_free_table(struct mr6_table *mrt)
330 {
331 	del_timer(&mrt->ipmr_expire_timer);
332 	mroute_clean_tables(mrt);
333 	kfree(mrt);
334 }
335 
336 #ifdef CONFIG_PROC_FS
337 
338 struct ipmr_mfc_iter {
339 	struct seq_net_private p;
340 	struct mr6_table *mrt;
341 	struct list_head *cache;
342 	int ct;
343 };
344 
345 
346 static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
347 					   struct ipmr_mfc_iter *it, loff_t pos)
348 {
349 	struct mr6_table *mrt = it->mrt;
350 	struct mfc6_cache *mfc;
351 
352 	read_lock(&mrt_lock);
353 	for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) {
354 		it->cache = &mrt->mfc6_cache_array[it->ct];
355 		list_for_each_entry(mfc, it->cache, list)
356 			if (pos-- == 0)
357 				return mfc;
358 	}
359 	read_unlock(&mrt_lock);
360 
361 	spin_lock_bh(&mfc_unres_lock);
362 	it->cache = &mrt->mfc6_unres_queue;
363 	list_for_each_entry(mfc, it->cache, list)
364 		if (pos-- == 0)
365 			return mfc;
366 	spin_unlock_bh(&mfc_unres_lock);
367 
368 	it->cache = NULL;
369 	return NULL;
370 }
371 
372 /*
373  *	The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
374  */
375 
376 struct ipmr_vif_iter {
377 	struct seq_net_private p;
378 	struct mr6_table *mrt;
379 	int ct;
380 };
381 
382 static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
383 					    struct ipmr_vif_iter *iter,
384 					    loff_t pos)
385 {
386 	struct mr6_table *mrt = iter->mrt;
387 
388 	for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
389 		if (!MIF_EXISTS(mrt, iter->ct))
390 			continue;
391 		if (pos-- == 0)
392 			return &mrt->vif6_table[iter->ct];
393 	}
394 	return NULL;
395 }
396 
397 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
398 	__acquires(mrt_lock)
399 {
400 	struct ipmr_vif_iter *iter = seq->private;
401 	struct net *net = seq_file_net(seq);
402 	struct mr6_table *mrt;
403 
404 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
405 	if (mrt == NULL)
406 		return ERR_PTR(-ENOENT);
407 
408 	iter->mrt = mrt;
409 
410 	read_lock(&mrt_lock);
411 	return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1)
412 		: SEQ_START_TOKEN;
413 }
414 
415 static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
416 {
417 	struct ipmr_vif_iter *iter = seq->private;
418 	struct net *net = seq_file_net(seq);
419 	struct mr6_table *mrt = iter->mrt;
420 
421 	++*pos;
422 	if (v == SEQ_START_TOKEN)
423 		return ip6mr_vif_seq_idx(net, iter, 0);
424 
425 	while (++iter->ct < mrt->maxvif) {
426 		if (!MIF_EXISTS(mrt, iter->ct))
427 			continue;
428 		return &mrt->vif6_table[iter->ct];
429 	}
430 	return NULL;
431 }
432 
433 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
434 	__releases(mrt_lock)
435 {
436 	read_unlock(&mrt_lock);
437 }
438 
439 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
440 {
441 	struct ipmr_vif_iter *iter = seq->private;
442 	struct mr6_table *mrt = iter->mrt;
443 
444 	if (v == SEQ_START_TOKEN) {
445 		seq_puts(seq,
446 			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
447 	} else {
448 		const struct mif_device *vif = v;
449 		const char *name = vif->dev ? vif->dev->name : "none";
450 
451 		seq_printf(seq,
452 			   "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
453 			   vif - mrt->vif6_table,
454 			   name, vif->bytes_in, vif->pkt_in,
455 			   vif->bytes_out, vif->pkt_out,
456 			   vif->flags);
457 	}
458 	return 0;
459 }
460 
461 static const struct seq_operations ip6mr_vif_seq_ops = {
462 	.start = ip6mr_vif_seq_start,
463 	.next  = ip6mr_vif_seq_next,
464 	.stop  = ip6mr_vif_seq_stop,
465 	.show  = ip6mr_vif_seq_show,
466 };
467 
468 static int ip6mr_vif_open(struct inode *inode, struct file *file)
469 {
470 	return seq_open_net(inode, file, &ip6mr_vif_seq_ops,
471 			    sizeof(struct ipmr_vif_iter));
472 }
473 
474 static const struct file_operations ip6mr_vif_fops = {
475 	.owner	 = THIS_MODULE,
476 	.open    = ip6mr_vif_open,
477 	.read    = seq_read,
478 	.llseek  = seq_lseek,
479 	.release = seq_release_net,
480 };
481 
482 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
483 {
484 	struct ipmr_mfc_iter *it = seq->private;
485 	struct net *net = seq_file_net(seq);
486 	struct mr6_table *mrt;
487 
488 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
489 	if (mrt == NULL)
490 		return ERR_PTR(-ENOENT);
491 
492 	it->mrt = mrt;
493 	return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
494 		: SEQ_START_TOKEN;
495 }
496 
497 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
498 {
499 	struct mfc6_cache *mfc = v;
500 	struct ipmr_mfc_iter *it = seq->private;
501 	struct net *net = seq_file_net(seq);
502 	struct mr6_table *mrt = it->mrt;
503 
504 	++*pos;
505 
506 	if (v == SEQ_START_TOKEN)
507 		return ipmr_mfc_seq_idx(net, seq->private, 0);
508 
509 	if (mfc->list.next != it->cache)
510 		return list_entry(mfc->list.next, struct mfc6_cache, list);
511 
512 	if (it->cache == &mrt->mfc6_unres_queue)
513 		goto end_of_list;
514 
515 	BUG_ON(it->cache != &mrt->mfc6_cache_array[it->ct]);
516 
517 	while (++it->ct < MFC6_LINES) {
518 		it->cache = &mrt->mfc6_cache_array[it->ct];
519 		if (list_empty(it->cache))
520 			continue;
521 		return list_first_entry(it->cache, struct mfc6_cache, list);
522 	}
523 
524 	/* exhausted cache_array, show unresolved */
525 	read_unlock(&mrt_lock);
526 	it->cache = &mrt->mfc6_unres_queue;
527 	it->ct = 0;
528 
529 	spin_lock_bh(&mfc_unres_lock);
530 	if (!list_empty(it->cache))
531 		return list_first_entry(it->cache, struct mfc6_cache, list);
532 
533  end_of_list:
534 	spin_unlock_bh(&mfc_unres_lock);
535 	it->cache = NULL;
536 
537 	return NULL;
538 }
539 
540 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
541 {
542 	struct ipmr_mfc_iter *it = seq->private;
543 	struct mr6_table *mrt = it->mrt;
544 
545 	if (it->cache == &mrt->mfc6_unres_queue)
546 		spin_unlock_bh(&mfc_unres_lock);
547 	else if (it->cache == mrt->mfc6_cache_array)
548 		read_unlock(&mrt_lock);
549 }
550 
551 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
552 {
553 	int n;
554 
555 	if (v == SEQ_START_TOKEN) {
556 		seq_puts(seq,
557 			 "Group                            "
558 			 "Origin                           "
559 			 "Iif      Pkts  Bytes     Wrong  Oifs\n");
560 	} else {
561 		const struct mfc6_cache *mfc = v;
562 		const struct ipmr_mfc_iter *it = seq->private;
563 		struct mr6_table *mrt = it->mrt;
564 
565 		seq_printf(seq, "%pI6 %pI6 %-3hd",
566 			   &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
567 			   mfc->mf6c_parent);
568 
569 		if (it->cache != &mrt->mfc6_unres_queue) {
570 			seq_printf(seq, " %8lu %8lu %8lu",
571 				   mfc->mfc_un.res.pkt,
572 				   mfc->mfc_un.res.bytes,
573 				   mfc->mfc_un.res.wrong_if);
574 			for (n = mfc->mfc_un.res.minvif;
575 			     n < mfc->mfc_un.res.maxvif; n++) {
576 				if (MIF_EXISTS(mrt, n) &&
577 				    mfc->mfc_un.res.ttls[n] < 255)
578 					seq_printf(seq,
579 						   " %2d:%-3d",
580 						   n, mfc->mfc_un.res.ttls[n]);
581 			}
582 		} else {
583 			/* unresolved mfc_caches don't contain
584 			 * pkt, bytes and wrong_if values
585 			 */
586 			seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
587 		}
588 		seq_putc(seq, '\n');
589 	}
590 	return 0;
591 }
592 
593 static const struct seq_operations ipmr_mfc_seq_ops = {
594 	.start = ipmr_mfc_seq_start,
595 	.next  = ipmr_mfc_seq_next,
596 	.stop  = ipmr_mfc_seq_stop,
597 	.show  = ipmr_mfc_seq_show,
598 };
599 
600 static int ipmr_mfc_open(struct inode *inode, struct file *file)
601 {
602 	return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
603 			    sizeof(struct ipmr_mfc_iter));
604 }
605 
606 static const struct file_operations ip6mr_mfc_fops = {
607 	.owner	 = THIS_MODULE,
608 	.open    = ipmr_mfc_open,
609 	.read    = seq_read,
610 	.llseek  = seq_lseek,
611 	.release = seq_release_net,
612 };
613 #endif
614 
615 #ifdef CONFIG_IPV6_PIMSM_V2
616 
617 static int pim6_rcv(struct sk_buff *skb)
618 {
619 	struct pimreghdr *pim;
620 	struct ipv6hdr   *encap;
621 	struct net_device  *reg_dev = NULL;
622 	struct net *net = dev_net(skb->dev);
623 	struct mr6_table *mrt;
624 	struct flowi6 fl6 = {
625 		.flowi6_iif	= skb->dev->ifindex,
626 		.flowi6_mark	= skb->mark,
627 	};
628 	int reg_vif_num;
629 
630 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
631 		goto drop;
632 
633 	pim = (struct pimreghdr *)skb_transport_header(skb);
634 	if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
635 	    (pim->flags & PIM_NULL_REGISTER) ||
636 	    (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
637 			     sizeof(*pim), IPPROTO_PIM,
638 			     csum_partial((void *)pim, sizeof(*pim), 0)) &&
639 	     csum_fold(skb_checksum(skb, 0, skb->len, 0))))
640 		goto drop;
641 
642 	/* check if the inner packet is destined to mcast group */
643 	encap = (struct ipv6hdr *)(skb_transport_header(skb) +
644 				   sizeof(*pim));
645 
646 	if (!ipv6_addr_is_multicast(&encap->daddr) ||
647 	    encap->payload_len == 0 ||
648 	    ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
649 		goto drop;
650 
651 	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
652 		goto drop;
653 	reg_vif_num = mrt->mroute_reg_vif_num;
654 
655 	read_lock(&mrt_lock);
656 	if (reg_vif_num >= 0)
657 		reg_dev = mrt->vif6_table[reg_vif_num].dev;
658 	if (reg_dev)
659 		dev_hold(reg_dev);
660 	read_unlock(&mrt_lock);
661 
662 	if (reg_dev == NULL)
663 		goto drop;
664 
665 	skb->mac_header = skb->network_header;
666 	skb_pull(skb, (u8 *)encap - skb->data);
667 	skb_reset_network_header(skb);
668 	skb->protocol = htons(ETH_P_IPV6);
669 	skb->ip_summed = CHECKSUM_NONE;
670 	skb->pkt_type = PACKET_HOST;
671 
672 	skb_tunnel_rx(skb, reg_dev);
673 
674 	netif_rx(skb);
675 
676 	dev_put(reg_dev);
677 	return 0;
678  drop:
679 	kfree_skb(skb);
680 	return 0;
681 }
682 
683 static const struct inet6_protocol pim6_protocol = {
684 	.handler	=	pim6_rcv,
685 };
686 
687 /* Service routines creating virtual interfaces: PIMREG */
688 
689 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
690 				      struct net_device *dev)
691 {
692 	struct net *net = dev_net(dev);
693 	struct mr6_table *mrt;
694 	struct flowi6 fl6 = {
695 		.flowi6_oif	= dev->ifindex,
696 		.flowi6_iif	= skb->skb_iif,
697 		.flowi6_mark	= skb->mark,
698 	};
699 	int err;
700 
701 	err = ip6mr_fib_lookup(net, &fl6, &mrt);
702 	if (err < 0) {
703 		kfree_skb(skb);
704 		return err;
705 	}
706 
707 	read_lock(&mrt_lock);
708 	dev->stats.tx_bytes += skb->len;
709 	dev->stats.tx_packets++;
710 	ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
711 	read_unlock(&mrt_lock);
712 	kfree_skb(skb);
713 	return NETDEV_TX_OK;
714 }
715 
716 static const struct net_device_ops reg_vif_netdev_ops = {
717 	.ndo_start_xmit	= reg_vif_xmit,
718 };
719 
720 static void reg_vif_setup(struct net_device *dev)
721 {
722 	dev->type		= ARPHRD_PIMREG;
723 	dev->mtu		= 1500 - sizeof(struct ipv6hdr) - 8;
724 	dev->flags		= IFF_NOARP;
725 	dev->netdev_ops		= &reg_vif_netdev_ops;
726 	dev->destructor		= free_netdev;
727 	dev->features		|= NETIF_F_NETNS_LOCAL;
728 }
729 
730 static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt)
731 {
732 	struct net_device *dev;
733 	char name[IFNAMSIZ];
734 
735 	if (mrt->id == RT6_TABLE_DFLT)
736 		sprintf(name, "pim6reg");
737 	else
738 		sprintf(name, "pim6reg%u", mrt->id);
739 
740 	dev = alloc_netdev(0, name, reg_vif_setup);
741 	if (dev == NULL)
742 		return NULL;
743 
744 	dev_net_set(dev, net);
745 
746 	if (register_netdevice(dev)) {
747 		free_netdev(dev);
748 		return NULL;
749 	}
750 	dev->iflink = 0;
751 
752 	if (dev_open(dev))
753 		goto failure;
754 
755 	dev_hold(dev);
756 	return dev;
757 
758 failure:
759 	/* allow the register to be completed before unregistering. */
760 	rtnl_unlock();
761 	rtnl_lock();
762 
763 	unregister_netdevice(dev);
764 	return NULL;
765 }
766 #endif
767 
768 /*
769  *	Delete a VIF entry
770  */
771 
772 static int mif6_delete(struct mr6_table *mrt, int vifi, struct list_head *head)
773 {
774 	struct mif_device *v;
775 	struct net_device *dev;
776 	struct inet6_dev *in6_dev;
777 
778 	if (vifi < 0 || vifi >= mrt->maxvif)
779 		return -EADDRNOTAVAIL;
780 
781 	v = &mrt->vif6_table[vifi];
782 
783 	write_lock_bh(&mrt_lock);
784 	dev = v->dev;
785 	v->dev = NULL;
786 
787 	if (!dev) {
788 		write_unlock_bh(&mrt_lock);
789 		return -EADDRNOTAVAIL;
790 	}
791 
792 #ifdef CONFIG_IPV6_PIMSM_V2
793 	if (vifi == mrt->mroute_reg_vif_num)
794 		mrt->mroute_reg_vif_num = -1;
795 #endif
796 
797 	if (vifi + 1 == mrt->maxvif) {
798 		int tmp;
799 		for (tmp = vifi - 1; tmp >= 0; tmp--) {
800 			if (MIF_EXISTS(mrt, tmp))
801 				break;
802 		}
803 		mrt->maxvif = tmp + 1;
804 	}
805 
806 	write_unlock_bh(&mrt_lock);
807 
808 	dev_set_allmulti(dev, -1);
809 
810 	in6_dev = __in6_dev_get(dev);
811 	if (in6_dev) {
812 		in6_dev->cnf.mc_forwarding--;
813 		inet6_netconf_notify_devconf(dev_net(dev),
814 					     NETCONFA_MC_FORWARDING,
815 					     dev->ifindex, &in6_dev->cnf);
816 	}
817 
818 	if (v->flags & MIFF_REGISTER)
819 		unregister_netdevice_queue(dev, head);
820 
821 	dev_put(dev);
822 	return 0;
823 }
824 
825 static inline void ip6mr_cache_free(struct mfc6_cache *c)
826 {
827 	kmem_cache_free(mrt_cachep, c);
828 }
829 
830 /* Destroy an unresolved cache entry, killing queued skbs
831    and reporting error to netlink readers.
832  */
833 
834 static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
835 {
836 	struct net *net = read_pnet(&mrt->net);
837 	struct sk_buff *skb;
838 
839 	atomic_dec(&mrt->cache_resolve_queue_len);
840 
841 	while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
842 		if (ipv6_hdr(skb)->version == 0) {
843 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
844 			nlh->nlmsg_type = NLMSG_ERROR;
845 			nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
846 			skb_trim(skb, nlh->nlmsg_len);
847 			((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
848 			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
849 		} else
850 			kfree_skb(skb);
851 	}
852 
853 	ip6mr_cache_free(c);
854 }
855 
856 
857 /* Timer process for all the unresolved queue. */
858 
859 static void ipmr_do_expire_process(struct mr6_table *mrt)
860 {
861 	unsigned long now = jiffies;
862 	unsigned long expires = 10 * HZ;
863 	struct mfc6_cache *c, *next;
864 
865 	list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
866 		if (time_after(c->mfc_un.unres.expires, now)) {
867 			/* not yet... */
868 			unsigned long interval = c->mfc_un.unres.expires - now;
869 			if (interval < expires)
870 				expires = interval;
871 			continue;
872 		}
873 
874 		list_del(&c->list);
875 		mr6_netlink_event(mrt, c, RTM_DELROUTE);
876 		ip6mr_destroy_unres(mrt, c);
877 	}
878 
879 	if (!list_empty(&mrt->mfc6_unres_queue))
880 		mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
881 }
882 
883 static void ipmr_expire_process(unsigned long arg)
884 {
885 	struct mr6_table *mrt = (struct mr6_table *)arg;
886 
887 	if (!spin_trylock(&mfc_unres_lock)) {
888 		mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
889 		return;
890 	}
891 
892 	if (!list_empty(&mrt->mfc6_unres_queue))
893 		ipmr_do_expire_process(mrt);
894 
895 	spin_unlock(&mfc_unres_lock);
896 }
897 
898 /* Fill oifs list. It is called under write locked mrt_lock. */
899 
900 static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *cache,
901 				    unsigned char *ttls)
902 {
903 	int vifi;
904 
905 	cache->mfc_un.res.minvif = MAXMIFS;
906 	cache->mfc_un.res.maxvif = 0;
907 	memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
908 
909 	for (vifi = 0; vifi < mrt->maxvif; vifi++) {
910 		if (MIF_EXISTS(mrt, vifi) &&
911 		    ttls[vifi] && ttls[vifi] < 255) {
912 			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
913 			if (cache->mfc_un.res.minvif > vifi)
914 				cache->mfc_un.res.minvif = vifi;
915 			if (cache->mfc_un.res.maxvif <= vifi)
916 				cache->mfc_un.res.maxvif = vifi + 1;
917 		}
918 	}
919 }
920 
921 static int mif6_add(struct net *net, struct mr6_table *mrt,
922 		    struct mif6ctl *vifc, int mrtsock)
923 {
924 	int vifi = vifc->mif6c_mifi;
925 	struct mif_device *v = &mrt->vif6_table[vifi];
926 	struct net_device *dev;
927 	struct inet6_dev *in6_dev;
928 	int err;
929 
930 	/* Is vif busy ? */
931 	if (MIF_EXISTS(mrt, vifi))
932 		return -EADDRINUSE;
933 
934 	switch (vifc->mif6c_flags) {
935 #ifdef CONFIG_IPV6_PIMSM_V2
936 	case MIFF_REGISTER:
937 		/*
938 		 * Special Purpose VIF in PIM
939 		 * All the packets will be sent to the daemon
940 		 */
941 		if (mrt->mroute_reg_vif_num >= 0)
942 			return -EADDRINUSE;
943 		dev = ip6mr_reg_vif(net, mrt);
944 		if (!dev)
945 			return -ENOBUFS;
946 		err = dev_set_allmulti(dev, 1);
947 		if (err) {
948 			unregister_netdevice(dev);
949 			dev_put(dev);
950 			return err;
951 		}
952 		break;
953 #endif
954 	case 0:
955 		dev = dev_get_by_index(net, vifc->mif6c_pifi);
956 		if (!dev)
957 			return -EADDRNOTAVAIL;
958 		err = dev_set_allmulti(dev, 1);
959 		if (err) {
960 			dev_put(dev);
961 			return err;
962 		}
963 		break;
964 	default:
965 		return -EINVAL;
966 	}
967 
968 	in6_dev = __in6_dev_get(dev);
969 	if (in6_dev) {
970 		in6_dev->cnf.mc_forwarding++;
971 		inet6_netconf_notify_devconf(dev_net(dev),
972 					     NETCONFA_MC_FORWARDING,
973 					     dev->ifindex, &in6_dev->cnf);
974 	}
975 
976 	/*
977 	 *	Fill in the VIF structures
978 	 */
979 	v->rate_limit = vifc->vifc_rate_limit;
980 	v->flags = vifc->mif6c_flags;
981 	if (!mrtsock)
982 		v->flags |= VIFF_STATIC;
983 	v->threshold = vifc->vifc_threshold;
984 	v->bytes_in = 0;
985 	v->bytes_out = 0;
986 	v->pkt_in = 0;
987 	v->pkt_out = 0;
988 	v->link = dev->ifindex;
989 	if (v->flags & MIFF_REGISTER)
990 		v->link = dev->iflink;
991 
992 	/* And finish update writing critical data */
993 	write_lock_bh(&mrt_lock);
994 	v->dev = dev;
995 #ifdef CONFIG_IPV6_PIMSM_V2
996 	if (v->flags & MIFF_REGISTER)
997 		mrt->mroute_reg_vif_num = vifi;
998 #endif
999 	if (vifi + 1 > mrt->maxvif)
1000 		mrt->maxvif = vifi + 1;
1001 	write_unlock_bh(&mrt_lock);
1002 	return 0;
1003 }
1004 
1005 static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt,
1006 					   const struct in6_addr *origin,
1007 					   const struct in6_addr *mcastgrp)
1008 {
1009 	int line = MFC6_HASH(mcastgrp, origin);
1010 	struct mfc6_cache *c;
1011 
1012 	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
1013 		if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
1014 		    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
1015 			return c;
1016 	}
1017 	return NULL;
1018 }
1019 
1020 /*
1021  *	Allocate a multicast cache entry
1022  */
1023 static struct mfc6_cache *ip6mr_cache_alloc(void)
1024 {
1025 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
1026 	if (c == NULL)
1027 		return NULL;
1028 	c->mfc_un.res.minvif = MAXMIFS;
1029 	return c;
1030 }
1031 
1032 static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
1033 {
1034 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
1035 	if (c == NULL)
1036 		return NULL;
1037 	skb_queue_head_init(&c->mfc_un.unres.unresolved);
1038 	c->mfc_un.unres.expires = jiffies + 10 * HZ;
1039 	return c;
1040 }
1041 
1042 /*
1043  *	A cache entry has gone into a resolved state from queued
1044  */
1045 
1046 static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
1047 				struct mfc6_cache *uc, struct mfc6_cache *c)
1048 {
1049 	struct sk_buff *skb;
1050 
1051 	/*
1052 	 *	Play the pending entries through our router
1053 	 */
1054 
1055 	while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
1056 		if (ipv6_hdr(skb)->version == 0) {
1057 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
1058 
1059 			if (__ip6mr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
1060 				nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1061 			} else {
1062 				nlh->nlmsg_type = NLMSG_ERROR;
1063 				nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
1064 				skb_trim(skb, nlh->nlmsg_len);
1065 				((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE;
1066 			}
1067 			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
1068 		} else
1069 			ip6_mr_forward(net, mrt, skb, c);
1070 	}
1071 }
1072 
1073 /*
1074  *	Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
1075  *	expects the following bizarre scheme.
1076  *
1077  *	Called under mrt_lock.
1078  */
1079 
1080 static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
1081 			      mifi_t mifi, int assert)
1082 {
1083 	struct sk_buff *skb;
1084 	struct mrt6msg *msg;
1085 	int ret;
1086 
1087 #ifdef CONFIG_IPV6_PIMSM_V2
1088 	if (assert == MRT6MSG_WHOLEPKT)
1089 		skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
1090 						+sizeof(*msg));
1091 	else
1092 #endif
1093 		skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
1094 
1095 	if (!skb)
1096 		return -ENOBUFS;
1097 
1098 	/* I suppose that internal messages
1099 	 * do not require checksums */
1100 
1101 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1102 
1103 #ifdef CONFIG_IPV6_PIMSM_V2
1104 	if (assert == MRT6MSG_WHOLEPKT) {
1105 		/* Ugly, but we have no choice with this interface.
1106 		   Duplicate old header, fix length etc.
1107 		   And all this only to mangle msg->im6_msgtype and
1108 		   to set msg->im6_mbz to "mbz" :-)
1109 		 */
1110 		skb_push(skb, -skb_network_offset(pkt));
1111 
1112 		skb_push(skb, sizeof(*msg));
1113 		skb_reset_transport_header(skb);
1114 		msg = (struct mrt6msg *)skb_transport_header(skb);
1115 		msg->im6_mbz = 0;
1116 		msg->im6_msgtype = MRT6MSG_WHOLEPKT;
1117 		msg->im6_mif = mrt->mroute_reg_vif_num;
1118 		msg->im6_pad = 0;
1119 		msg->im6_src = ipv6_hdr(pkt)->saddr;
1120 		msg->im6_dst = ipv6_hdr(pkt)->daddr;
1121 
1122 		skb->ip_summed = CHECKSUM_UNNECESSARY;
1123 	} else
1124 #endif
1125 	{
1126 	/*
1127 	 *	Copy the IP header
1128 	 */
1129 
1130 	skb_put(skb, sizeof(struct ipv6hdr));
1131 	skb_reset_network_header(skb);
1132 	skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
1133 
1134 	/*
1135 	 *	Add our header
1136 	 */
1137 	skb_put(skb, sizeof(*msg));
1138 	skb_reset_transport_header(skb);
1139 	msg = (struct mrt6msg *)skb_transport_header(skb);
1140 
1141 	msg->im6_mbz = 0;
1142 	msg->im6_msgtype = assert;
1143 	msg->im6_mif = mifi;
1144 	msg->im6_pad = 0;
1145 	msg->im6_src = ipv6_hdr(pkt)->saddr;
1146 	msg->im6_dst = ipv6_hdr(pkt)->daddr;
1147 
1148 	skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1149 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1150 	}
1151 
1152 	if (mrt->mroute6_sk == NULL) {
1153 		kfree_skb(skb);
1154 		return -EINVAL;
1155 	}
1156 
1157 	/*
1158 	 *	Deliver to user space multicast routing algorithms
1159 	 */
1160 	ret = sock_queue_rcv_skb(mrt->mroute6_sk, skb);
1161 	if (ret < 0) {
1162 		net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
1163 		kfree_skb(skb);
1164 	}
1165 
1166 	return ret;
1167 }
1168 
1169 /*
1170  *	Queue a packet for resolution. It gets locked cache entry!
1171  */
1172 
1173 static int
1174 ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
1175 {
1176 	bool found = false;
1177 	int err;
1178 	struct mfc6_cache *c;
1179 
1180 	spin_lock_bh(&mfc_unres_lock);
1181 	list_for_each_entry(c, &mrt->mfc6_unres_queue, list) {
1182 		if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1183 		    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
1184 			found = true;
1185 			break;
1186 		}
1187 	}
1188 
1189 	if (!found) {
1190 		/*
1191 		 *	Create a new entry if allowable
1192 		 */
1193 
1194 		if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
1195 		    (c = ip6mr_cache_alloc_unres()) == NULL) {
1196 			spin_unlock_bh(&mfc_unres_lock);
1197 
1198 			kfree_skb(skb);
1199 			return -ENOBUFS;
1200 		}
1201 
1202 		/*
1203 		 *	Fill in the new cache entry
1204 		 */
1205 		c->mf6c_parent = -1;
1206 		c->mf6c_origin = ipv6_hdr(skb)->saddr;
1207 		c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
1208 
1209 		/*
1210 		 *	Reflect first query at pim6sd
1211 		 */
1212 		err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1213 		if (err < 0) {
1214 			/* If the report failed throw the cache entry
1215 			   out - Brad Parker
1216 			 */
1217 			spin_unlock_bh(&mfc_unres_lock);
1218 
1219 			ip6mr_cache_free(c);
1220 			kfree_skb(skb);
1221 			return err;
1222 		}
1223 
1224 		atomic_inc(&mrt->cache_resolve_queue_len);
1225 		list_add(&c->list, &mrt->mfc6_unres_queue);
1226 		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1227 
1228 		ipmr_do_expire_process(mrt);
1229 	}
1230 
1231 	/*
1232 	 *	See if we can append the packet
1233 	 */
1234 	if (c->mfc_un.unres.unresolved.qlen > 3) {
1235 		kfree_skb(skb);
1236 		err = -ENOBUFS;
1237 	} else {
1238 		skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1239 		err = 0;
1240 	}
1241 
1242 	spin_unlock_bh(&mfc_unres_lock);
1243 	return err;
1244 }
1245 
1246 /*
1247  *	MFC6 cache manipulation by user space
1248  */
1249 
1250 static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc)
1251 {
1252 	int line;
1253 	struct mfc6_cache *c, *next;
1254 
1255 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1256 
1257 	list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) {
1258 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1259 		    ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
1260 			write_lock_bh(&mrt_lock);
1261 			list_del(&c->list);
1262 			write_unlock_bh(&mrt_lock);
1263 
1264 			mr6_netlink_event(mrt, c, RTM_DELROUTE);
1265 			ip6mr_cache_free(c);
1266 			return 0;
1267 		}
1268 	}
1269 	return -ENOENT;
1270 }
1271 
1272 static int ip6mr_device_event(struct notifier_block *this,
1273 			      unsigned long event, void *ptr)
1274 {
1275 	struct net_device *dev = ptr;
1276 	struct net *net = dev_net(dev);
1277 	struct mr6_table *mrt;
1278 	struct mif_device *v;
1279 	int ct;
1280 	LIST_HEAD(list);
1281 
1282 	if (event != NETDEV_UNREGISTER)
1283 		return NOTIFY_DONE;
1284 
1285 	ip6mr_for_each_table(mrt, net) {
1286 		v = &mrt->vif6_table[0];
1287 		for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1288 			if (v->dev == dev)
1289 				mif6_delete(mrt, ct, &list);
1290 		}
1291 	}
1292 	unregister_netdevice_many(&list);
1293 
1294 	return NOTIFY_DONE;
1295 }
1296 
1297 static struct notifier_block ip6_mr_notifier = {
1298 	.notifier_call = ip6mr_device_event
1299 };
1300 
1301 /*
1302  *	Setup for IP multicast routing
1303  */
1304 
1305 static int __net_init ip6mr_net_init(struct net *net)
1306 {
1307 	int err;
1308 
1309 	err = ip6mr_rules_init(net);
1310 	if (err < 0)
1311 		goto fail;
1312 
1313 #ifdef CONFIG_PROC_FS
1314 	err = -ENOMEM;
1315 	if (!proc_net_fops_create(net, "ip6_mr_vif", 0, &ip6mr_vif_fops))
1316 		goto proc_vif_fail;
1317 	if (!proc_net_fops_create(net, "ip6_mr_cache", 0, &ip6mr_mfc_fops))
1318 		goto proc_cache_fail;
1319 #endif
1320 
1321 	return 0;
1322 
1323 #ifdef CONFIG_PROC_FS
1324 proc_cache_fail:
1325 	proc_net_remove(net, "ip6_mr_vif");
1326 proc_vif_fail:
1327 	ip6mr_rules_exit(net);
1328 #endif
1329 fail:
1330 	return err;
1331 }
1332 
1333 static void __net_exit ip6mr_net_exit(struct net *net)
1334 {
1335 #ifdef CONFIG_PROC_FS
1336 	proc_net_remove(net, "ip6_mr_cache");
1337 	proc_net_remove(net, "ip6_mr_vif");
1338 #endif
1339 	ip6mr_rules_exit(net);
1340 }
1341 
1342 static struct pernet_operations ip6mr_net_ops = {
1343 	.init = ip6mr_net_init,
1344 	.exit = ip6mr_net_exit,
1345 };
1346 
1347 int __init ip6_mr_init(void)
1348 {
1349 	int err;
1350 
1351 	mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1352 				       sizeof(struct mfc6_cache),
1353 				       0, SLAB_HWCACHE_ALIGN,
1354 				       NULL);
1355 	if (!mrt_cachep)
1356 		return -ENOMEM;
1357 
1358 	err = register_pernet_subsys(&ip6mr_net_ops);
1359 	if (err)
1360 		goto reg_pernet_fail;
1361 
1362 	err = register_netdevice_notifier(&ip6_mr_notifier);
1363 	if (err)
1364 		goto reg_notif_fail;
1365 #ifdef CONFIG_IPV6_PIMSM_V2
1366 	if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1367 		pr_err("%s: can't add PIM protocol\n", __func__);
1368 		err = -EAGAIN;
1369 		goto add_proto_fail;
1370 	}
1371 #endif
1372 	rtnl_register(RTNL_FAMILY_IP6MR, RTM_GETROUTE, NULL,
1373 		      ip6mr_rtm_dumproute, NULL);
1374 	return 0;
1375 #ifdef CONFIG_IPV6_PIMSM_V2
1376 add_proto_fail:
1377 	unregister_netdevice_notifier(&ip6_mr_notifier);
1378 #endif
1379 reg_notif_fail:
1380 	unregister_pernet_subsys(&ip6mr_net_ops);
1381 reg_pernet_fail:
1382 	kmem_cache_destroy(mrt_cachep);
1383 	return err;
1384 }
1385 
1386 void ip6_mr_cleanup(void)
1387 {
1388 	unregister_netdevice_notifier(&ip6_mr_notifier);
1389 	unregister_pernet_subsys(&ip6mr_net_ops);
1390 	kmem_cache_destroy(mrt_cachep);
1391 }
1392 
1393 static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
1394 			 struct mf6cctl *mfc, int mrtsock)
1395 {
1396 	bool found = false;
1397 	int line;
1398 	struct mfc6_cache *uc, *c;
1399 	unsigned char ttls[MAXMIFS];
1400 	int i;
1401 
1402 	if (mfc->mf6cc_parent >= MAXMIFS)
1403 		return -ENFILE;
1404 
1405 	memset(ttls, 255, MAXMIFS);
1406 	for (i = 0; i < MAXMIFS; i++) {
1407 		if (IF_ISSET(i, &mfc->mf6cc_ifset))
1408 			ttls[i] = 1;
1409 
1410 	}
1411 
1412 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1413 
1414 	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
1415 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1416 		    ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
1417 			found = true;
1418 			break;
1419 		}
1420 	}
1421 
1422 	if (found) {
1423 		write_lock_bh(&mrt_lock);
1424 		c->mf6c_parent = mfc->mf6cc_parent;
1425 		ip6mr_update_thresholds(mrt, c, ttls);
1426 		if (!mrtsock)
1427 			c->mfc_flags |= MFC_STATIC;
1428 		write_unlock_bh(&mrt_lock);
1429 		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1430 		return 0;
1431 	}
1432 
1433 	if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1434 		return -EINVAL;
1435 
1436 	c = ip6mr_cache_alloc();
1437 	if (c == NULL)
1438 		return -ENOMEM;
1439 
1440 	c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1441 	c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1442 	c->mf6c_parent = mfc->mf6cc_parent;
1443 	ip6mr_update_thresholds(mrt, c, ttls);
1444 	if (!mrtsock)
1445 		c->mfc_flags |= MFC_STATIC;
1446 
1447 	write_lock_bh(&mrt_lock);
1448 	list_add(&c->list, &mrt->mfc6_cache_array[line]);
1449 	write_unlock_bh(&mrt_lock);
1450 
1451 	/*
1452 	 *	Check to see if we resolved a queued list. If so we
1453 	 *	need to send on the frames and tidy up.
1454 	 */
1455 	found = false;
1456 	spin_lock_bh(&mfc_unres_lock);
1457 	list_for_each_entry(uc, &mrt->mfc6_unres_queue, list) {
1458 		if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1459 		    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1460 			list_del(&uc->list);
1461 			atomic_dec(&mrt->cache_resolve_queue_len);
1462 			found = true;
1463 			break;
1464 		}
1465 	}
1466 	if (list_empty(&mrt->mfc6_unres_queue))
1467 		del_timer(&mrt->ipmr_expire_timer);
1468 	spin_unlock_bh(&mfc_unres_lock);
1469 
1470 	if (found) {
1471 		ip6mr_cache_resolve(net, mrt, uc, c);
1472 		ip6mr_cache_free(uc);
1473 	}
1474 	mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1475 	return 0;
1476 }
1477 
1478 /*
1479  *	Close the multicast socket, and clear the vif tables etc
1480  */
1481 
1482 static void mroute_clean_tables(struct mr6_table *mrt)
1483 {
1484 	int i;
1485 	LIST_HEAD(list);
1486 	struct mfc6_cache *c, *next;
1487 
1488 	/*
1489 	 *	Shut down all active vif entries
1490 	 */
1491 	for (i = 0; i < mrt->maxvif; i++) {
1492 		if (!(mrt->vif6_table[i].flags & VIFF_STATIC))
1493 			mif6_delete(mrt, i, &list);
1494 	}
1495 	unregister_netdevice_many(&list);
1496 
1497 	/*
1498 	 *	Wipe the cache
1499 	 */
1500 	for (i = 0; i < MFC6_LINES; i++) {
1501 		list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) {
1502 			if (c->mfc_flags & MFC_STATIC)
1503 				continue;
1504 			write_lock_bh(&mrt_lock);
1505 			list_del(&c->list);
1506 			write_unlock_bh(&mrt_lock);
1507 
1508 			mr6_netlink_event(mrt, c, RTM_DELROUTE);
1509 			ip6mr_cache_free(c);
1510 		}
1511 	}
1512 
1513 	if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1514 		spin_lock_bh(&mfc_unres_lock);
1515 		list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
1516 			list_del(&c->list);
1517 			mr6_netlink_event(mrt, c, RTM_DELROUTE);
1518 			ip6mr_destroy_unres(mrt, c);
1519 		}
1520 		spin_unlock_bh(&mfc_unres_lock);
1521 	}
1522 }
1523 
1524 static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
1525 {
1526 	int err = 0;
1527 	struct net *net = sock_net(sk);
1528 
1529 	rtnl_lock();
1530 	write_lock_bh(&mrt_lock);
1531 	if (likely(mrt->mroute6_sk == NULL)) {
1532 		mrt->mroute6_sk = sk;
1533 		net->ipv6.devconf_all->mc_forwarding++;
1534 		inet6_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING,
1535 					     NETCONFA_IFINDEX_ALL,
1536 					     net->ipv6.devconf_all);
1537 	}
1538 	else
1539 		err = -EADDRINUSE;
1540 	write_unlock_bh(&mrt_lock);
1541 
1542 	rtnl_unlock();
1543 
1544 	return err;
1545 }
1546 
1547 int ip6mr_sk_done(struct sock *sk)
1548 {
1549 	int err = -EACCES;
1550 	struct net *net = sock_net(sk);
1551 	struct mr6_table *mrt;
1552 
1553 	rtnl_lock();
1554 	ip6mr_for_each_table(mrt, net) {
1555 		if (sk == mrt->mroute6_sk) {
1556 			write_lock_bh(&mrt_lock);
1557 			mrt->mroute6_sk = NULL;
1558 			net->ipv6.devconf_all->mc_forwarding--;
1559 			inet6_netconf_notify_devconf(net,
1560 						     NETCONFA_MC_FORWARDING,
1561 						     NETCONFA_IFINDEX_ALL,
1562 						     net->ipv6.devconf_all);
1563 			write_unlock_bh(&mrt_lock);
1564 
1565 			mroute_clean_tables(mrt);
1566 			err = 0;
1567 			break;
1568 		}
1569 	}
1570 	rtnl_unlock();
1571 
1572 	return err;
1573 }
1574 
1575 struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
1576 {
1577 	struct mr6_table *mrt;
1578 	struct flowi6 fl6 = {
1579 		.flowi6_iif	= skb->skb_iif,
1580 		.flowi6_oif	= skb->dev->ifindex,
1581 		.flowi6_mark	= skb->mark,
1582 	};
1583 
1584 	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
1585 		return NULL;
1586 
1587 	return mrt->mroute6_sk;
1588 }
1589 
1590 /*
1591  *	Socket options and virtual interface manipulation. The whole
1592  *	virtual interface system is a complete heap, but unfortunately
1593  *	that's how BSD mrouted happens to think. Maybe one day with a proper
1594  *	MOSPF/PIM router set up we can clean this up.
1595  */
1596 
1597 int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1598 {
1599 	int ret;
1600 	struct mif6ctl vif;
1601 	struct mf6cctl mfc;
1602 	mifi_t mifi;
1603 	struct net *net = sock_net(sk);
1604 	struct mr6_table *mrt;
1605 
1606 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1607 	if (mrt == NULL)
1608 		return -ENOENT;
1609 
1610 	if (optname != MRT6_INIT) {
1611 		if (sk != mrt->mroute6_sk && !ns_capable(net->user_ns, CAP_NET_ADMIN))
1612 			return -EACCES;
1613 	}
1614 
1615 	switch (optname) {
1616 	case MRT6_INIT:
1617 		if (sk->sk_type != SOCK_RAW ||
1618 		    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1619 			return -EOPNOTSUPP;
1620 		if (optlen < sizeof(int))
1621 			return -EINVAL;
1622 
1623 		return ip6mr_sk_init(mrt, sk);
1624 
1625 	case MRT6_DONE:
1626 		return ip6mr_sk_done(sk);
1627 
1628 	case MRT6_ADD_MIF:
1629 		if (optlen < sizeof(vif))
1630 			return -EINVAL;
1631 		if (copy_from_user(&vif, optval, sizeof(vif)))
1632 			return -EFAULT;
1633 		if (vif.mif6c_mifi >= MAXMIFS)
1634 			return -ENFILE;
1635 		rtnl_lock();
1636 		ret = mif6_add(net, mrt, &vif, sk == mrt->mroute6_sk);
1637 		rtnl_unlock();
1638 		return ret;
1639 
1640 	case MRT6_DEL_MIF:
1641 		if (optlen < sizeof(mifi_t))
1642 			return -EINVAL;
1643 		if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1644 			return -EFAULT;
1645 		rtnl_lock();
1646 		ret = mif6_delete(mrt, mifi, NULL);
1647 		rtnl_unlock();
1648 		return ret;
1649 
1650 	/*
1651 	 *	Manipulate the forwarding caches. These live
1652 	 *	in a sort of kernel/user symbiosis.
1653 	 */
1654 	case MRT6_ADD_MFC:
1655 	case MRT6_DEL_MFC:
1656 		if (optlen < sizeof(mfc))
1657 			return -EINVAL;
1658 		if (copy_from_user(&mfc, optval, sizeof(mfc)))
1659 			return -EFAULT;
1660 		rtnl_lock();
1661 		if (optname == MRT6_DEL_MFC)
1662 			ret = ip6mr_mfc_delete(mrt, &mfc);
1663 		else
1664 			ret = ip6mr_mfc_add(net, mrt, &mfc, sk == mrt->mroute6_sk);
1665 		rtnl_unlock();
1666 		return ret;
1667 
1668 	/*
1669 	 *	Control PIM assert (to activate pim will activate assert)
1670 	 */
1671 	case MRT6_ASSERT:
1672 	{
1673 		int v;
1674 
1675 		if (optlen != sizeof(v))
1676 			return -EINVAL;
1677 		if (get_user(v, (int __user *)optval))
1678 			return -EFAULT;
1679 		mrt->mroute_do_assert = v;
1680 		return 0;
1681 	}
1682 
1683 #ifdef CONFIG_IPV6_PIMSM_V2
1684 	case MRT6_PIM:
1685 	{
1686 		int v;
1687 
1688 		if (optlen != sizeof(v))
1689 			return -EINVAL;
1690 		if (get_user(v, (int __user *)optval))
1691 			return -EFAULT;
1692 		v = !!v;
1693 		rtnl_lock();
1694 		ret = 0;
1695 		if (v != mrt->mroute_do_pim) {
1696 			mrt->mroute_do_pim = v;
1697 			mrt->mroute_do_assert = v;
1698 		}
1699 		rtnl_unlock();
1700 		return ret;
1701 	}
1702 
1703 #endif
1704 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1705 	case MRT6_TABLE:
1706 	{
1707 		u32 v;
1708 
1709 		if (optlen != sizeof(u32))
1710 			return -EINVAL;
1711 		if (get_user(v, (u32 __user *)optval))
1712 			return -EFAULT;
1713 		/* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
1714 		if (v != RT_TABLE_DEFAULT && v >= 100000000)
1715 			return -EINVAL;
1716 		if (sk == mrt->mroute6_sk)
1717 			return -EBUSY;
1718 
1719 		rtnl_lock();
1720 		ret = 0;
1721 		if (!ip6mr_new_table(net, v))
1722 			ret = -ENOMEM;
1723 		raw6_sk(sk)->ip6mr_table = v;
1724 		rtnl_unlock();
1725 		return ret;
1726 	}
1727 #endif
1728 	/*
1729 	 *	Spurious command, or MRT6_VERSION which you cannot
1730 	 *	set.
1731 	 */
1732 	default:
1733 		return -ENOPROTOOPT;
1734 	}
1735 }
1736 
1737 /*
1738  *	Getsock opt support for the multicast routing system.
1739  */
1740 
1741 int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1742 			  int __user *optlen)
1743 {
1744 	int olr;
1745 	int val;
1746 	struct net *net = sock_net(sk);
1747 	struct mr6_table *mrt;
1748 
1749 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1750 	if (mrt == NULL)
1751 		return -ENOENT;
1752 
1753 	switch (optname) {
1754 	case MRT6_VERSION:
1755 		val = 0x0305;
1756 		break;
1757 #ifdef CONFIG_IPV6_PIMSM_V2
1758 	case MRT6_PIM:
1759 		val = mrt->mroute_do_pim;
1760 		break;
1761 #endif
1762 	case MRT6_ASSERT:
1763 		val = mrt->mroute_do_assert;
1764 		break;
1765 	default:
1766 		return -ENOPROTOOPT;
1767 	}
1768 
1769 	if (get_user(olr, optlen))
1770 		return -EFAULT;
1771 
1772 	olr = min_t(int, olr, sizeof(int));
1773 	if (olr < 0)
1774 		return -EINVAL;
1775 
1776 	if (put_user(olr, optlen))
1777 		return -EFAULT;
1778 	if (copy_to_user(optval, &val, olr))
1779 		return -EFAULT;
1780 	return 0;
1781 }
1782 
1783 /*
1784  *	The IP multicast ioctl support routines.
1785  */
1786 
1787 int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1788 {
1789 	struct sioc_sg_req6 sr;
1790 	struct sioc_mif_req6 vr;
1791 	struct mif_device *vif;
1792 	struct mfc6_cache *c;
1793 	struct net *net = sock_net(sk);
1794 	struct mr6_table *mrt;
1795 
1796 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1797 	if (mrt == NULL)
1798 		return -ENOENT;
1799 
1800 	switch (cmd) {
1801 	case SIOCGETMIFCNT_IN6:
1802 		if (copy_from_user(&vr, arg, sizeof(vr)))
1803 			return -EFAULT;
1804 		if (vr.mifi >= mrt->maxvif)
1805 			return -EINVAL;
1806 		read_lock(&mrt_lock);
1807 		vif = &mrt->vif6_table[vr.mifi];
1808 		if (MIF_EXISTS(mrt, vr.mifi)) {
1809 			vr.icount = vif->pkt_in;
1810 			vr.ocount = vif->pkt_out;
1811 			vr.ibytes = vif->bytes_in;
1812 			vr.obytes = vif->bytes_out;
1813 			read_unlock(&mrt_lock);
1814 
1815 			if (copy_to_user(arg, &vr, sizeof(vr)))
1816 				return -EFAULT;
1817 			return 0;
1818 		}
1819 		read_unlock(&mrt_lock);
1820 		return -EADDRNOTAVAIL;
1821 	case SIOCGETSGCNT_IN6:
1822 		if (copy_from_user(&sr, arg, sizeof(sr)))
1823 			return -EFAULT;
1824 
1825 		read_lock(&mrt_lock);
1826 		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1827 		if (c) {
1828 			sr.pktcnt = c->mfc_un.res.pkt;
1829 			sr.bytecnt = c->mfc_un.res.bytes;
1830 			sr.wrong_if = c->mfc_un.res.wrong_if;
1831 			read_unlock(&mrt_lock);
1832 
1833 			if (copy_to_user(arg, &sr, sizeof(sr)))
1834 				return -EFAULT;
1835 			return 0;
1836 		}
1837 		read_unlock(&mrt_lock);
1838 		return -EADDRNOTAVAIL;
1839 	default:
1840 		return -ENOIOCTLCMD;
1841 	}
1842 }
1843 
1844 #ifdef CONFIG_COMPAT
1845 struct compat_sioc_sg_req6 {
1846 	struct sockaddr_in6 src;
1847 	struct sockaddr_in6 grp;
1848 	compat_ulong_t pktcnt;
1849 	compat_ulong_t bytecnt;
1850 	compat_ulong_t wrong_if;
1851 };
1852 
1853 struct compat_sioc_mif_req6 {
1854 	mifi_t	mifi;
1855 	compat_ulong_t icount;
1856 	compat_ulong_t ocount;
1857 	compat_ulong_t ibytes;
1858 	compat_ulong_t obytes;
1859 };
1860 
1861 int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1862 {
1863 	struct compat_sioc_sg_req6 sr;
1864 	struct compat_sioc_mif_req6 vr;
1865 	struct mif_device *vif;
1866 	struct mfc6_cache *c;
1867 	struct net *net = sock_net(sk);
1868 	struct mr6_table *mrt;
1869 
1870 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1871 	if (mrt == NULL)
1872 		return -ENOENT;
1873 
1874 	switch (cmd) {
1875 	case SIOCGETMIFCNT_IN6:
1876 		if (copy_from_user(&vr, arg, sizeof(vr)))
1877 			return -EFAULT;
1878 		if (vr.mifi >= mrt->maxvif)
1879 			return -EINVAL;
1880 		read_lock(&mrt_lock);
1881 		vif = &mrt->vif6_table[vr.mifi];
1882 		if (MIF_EXISTS(mrt, vr.mifi)) {
1883 			vr.icount = vif->pkt_in;
1884 			vr.ocount = vif->pkt_out;
1885 			vr.ibytes = vif->bytes_in;
1886 			vr.obytes = vif->bytes_out;
1887 			read_unlock(&mrt_lock);
1888 
1889 			if (copy_to_user(arg, &vr, sizeof(vr)))
1890 				return -EFAULT;
1891 			return 0;
1892 		}
1893 		read_unlock(&mrt_lock);
1894 		return -EADDRNOTAVAIL;
1895 	case SIOCGETSGCNT_IN6:
1896 		if (copy_from_user(&sr, arg, sizeof(sr)))
1897 			return -EFAULT;
1898 
1899 		read_lock(&mrt_lock);
1900 		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1901 		if (c) {
1902 			sr.pktcnt = c->mfc_un.res.pkt;
1903 			sr.bytecnt = c->mfc_un.res.bytes;
1904 			sr.wrong_if = c->mfc_un.res.wrong_if;
1905 			read_unlock(&mrt_lock);
1906 
1907 			if (copy_to_user(arg, &sr, sizeof(sr)))
1908 				return -EFAULT;
1909 			return 0;
1910 		}
1911 		read_unlock(&mrt_lock);
1912 		return -EADDRNOTAVAIL;
1913 	default:
1914 		return -ENOIOCTLCMD;
1915 	}
1916 }
1917 #endif
1918 
1919 static inline int ip6mr_forward2_finish(struct sk_buff *skb)
1920 {
1921 	IP6_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
1922 			 IPSTATS_MIB_OUTFORWDATAGRAMS);
1923 	IP6_ADD_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
1924 			 IPSTATS_MIB_OUTOCTETS, skb->len);
1925 	return dst_output(skb);
1926 }
1927 
1928 /*
1929  *	Processing handlers for ip6mr_forward
1930  */
1931 
1932 static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
1933 			  struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1934 {
1935 	struct ipv6hdr *ipv6h;
1936 	struct mif_device *vif = &mrt->vif6_table[vifi];
1937 	struct net_device *dev;
1938 	struct dst_entry *dst;
1939 	struct flowi6 fl6;
1940 
1941 	if (vif->dev == NULL)
1942 		goto out_free;
1943 
1944 #ifdef CONFIG_IPV6_PIMSM_V2
1945 	if (vif->flags & MIFF_REGISTER) {
1946 		vif->pkt_out++;
1947 		vif->bytes_out += skb->len;
1948 		vif->dev->stats.tx_bytes += skb->len;
1949 		vif->dev->stats.tx_packets++;
1950 		ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
1951 		goto out_free;
1952 	}
1953 #endif
1954 
1955 	ipv6h = ipv6_hdr(skb);
1956 
1957 	fl6 = (struct flowi6) {
1958 		.flowi6_oif = vif->link,
1959 		.daddr = ipv6h->daddr,
1960 	};
1961 
1962 	dst = ip6_route_output(net, NULL, &fl6);
1963 	if (dst->error) {
1964 		dst_release(dst);
1965 		goto out_free;
1966 	}
1967 
1968 	skb_dst_drop(skb);
1969 	skb_dst_set(skb, dst);
1970 
1971 	/*
1972 	 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1973 	 * not only before forwarding, but after forwarding on all output
1974 	 * interfaces. It is clear, if mrouter runs a multicasting
1975 	 * program, it should receive packets not depending to what interface
1976 	 * program is joined.
1977 	 * If we will not make it, the program will have to join on all
1978 	 * interfaces. On the other hand, multihoming host (or router, but
1979 	 * not mrouter) cannot join to more than one interface - it will
1980 	 * result in receiving multiple packets.
1981 	 */
1982 	dev = vif->dev;
1983 	skb->dev = dev;
1984 	vif->pkt_out++;
1985 	vif->bytes_out += skb->len;
1986 
1987 	/* We are about to write */
1988 	/* XXX: extension headers? */
1989 	if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
1990 		goto out_free;
1991 
1992 	ipv6h = ipv6_hdr(skb);
1993 	ipv6h->hop_limit--;
1994 
1995 	IP6CB(skb)->flags |= IP6SKB_FORWARDED;
1996 
1997 	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dev,
1998 		       ip6mr_forward2_finish);
1999 
2000 out_free:
2001 	kfree_skb(skb);
2002 	return 0;
2003 }
2004 
2005 static int ip6mr_find_vif(struct mr6_table *mrt, struct net_device *dev)
2006 {
2007 	int ct;
2008 
2009 	for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
2010 		if (mrt->vif6_table[ct].dev == dev)
2011 			break;
2012 	}
2013 	return ct;
2014 }
2015 
2016 static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
2017 			  struct sk_buff *skb, struct mfc6_cache *cache)
2018 {
2019 	int psend = -1;
2020 	int vif, ct;
2021 
2022 	vif = cache->mf6c_parent;
2023 	cache->mfc_un.res.pkt++;
2024 	cache->mfc_un.res.bytes += skb->len;
2025 
2026 	/*
2027 	 * Wrong interface: drop packet and (maybe) send PIM assert.
2028 	 */
2029 	if (mrt->vif6_table[vif].dev != skb->dev) {
2030 		int true_vifi;
2031 
2032 		cache->mfc_un.res.wrong_if++;
2033 		true_vifi = ip6mr_find_vif(mrt, skb->dev);
2034 
2035 		if (true_vifi >= 0 && mrt->mroute_do_assert &&
2036 		    /* pimsm uses asserts, when switching from RPT to SPT,
2037 		       so that we cannot check that packet arrived on an oif.
2038 		       It is bad, but otherwise we would need to move pretty
2039 		       large chunk of pimd to kernel. Ough... --ANK
2040 		     */
2041 		    (mrt->mroute_do_pim ||
2042 		     cache->mfc_un.res.ttls[true_vifi] < 255) &&
2043 		    time_after(jiffies,
2044 			       cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
2045 			cache->mfc_un.res.last_assert = jiffies;
2046 			ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
2047 		}
2048 		goto dont_forward;
2049 	}
2050 
2051 	mrt->vif6_table[vif].pkt_in++;
2052 	mrt->vif6_table[vif].bytes_in += skb->len;
2053 
2054 	/*
2055 	 *	Forward the frame
2056 	 */
2057 	for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
2058 		if (ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
2059 			if (psend != -1) {
2060 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2061 				if (skb2)
2062 					ip6mr_forward2(net, mrt, skb2, cache, psend);
2063 			}
2064 			psend = ct;
2065 		}
2066 	}
2067 	if (psend != -1) {
2068 		ip6mr_forward2(net, mrt, skb, cache, psend);
2069 		return 0;
2070 	}
2071 
2072 dont_forward:
2073 	kfree_skb(skb);
2074 	return 0;
2075 }
2076 
2077 
2078 /*
2079  *	Multicast packets for forwarding arrive here
2080  */
2081 
2082 int ip6_mr_input(struct sk_buff *skb)
2083 {
2084 	struct mfc6_cache *cache;
2085 	struct net *net = dev_net(skb->dev);
2086 	struct mr6_table *mrt;
2087 	struct flowi6 fl6 = {
2088 		.flowi6_iif	= skb->dev->ifindex,
2089 		.flowi6_mark	= skb->mark,
2090 	};
2091 	int err;
2092 
2093 	err = ip6mr_fib_lookup(net, &fl6, &mrt);
2094 	if (err < 0) {
2095 		kfree_skb(skb);
2096 		return err;
2097 	}
2098 
2099 	read_lock(&mrt_lock);
2100 	cache = ip6mr_cache_find(mrt,
2101 				 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2102 
2103 	/*
2104 	 *	No usable cache entry
2105 	 */
2106 	if (cache == NULL) {
2107 		int vif;
2108 
2109 		vif = ip6mr_find_vif(mrt, skb->dev);
2110 		if (vif >= 0) {
2111 			int err = ip6mr_cache_unresolved(mrt, vif, skb);
2112 			read_unlock(&mrt_lock);
2113 
2114 			return err;
2115 		}
2116 		read_unlock(&mrt_lock);
2117 		kfree_skb(skb);
2118 		return -ENODEV;
2119 	}
2120 
2121 	ip6_mr_forward(net, mrt, skb, cache);
2122 
2123 	read_unlock(&mrt_lock);
2124 
2125 	return 0;
2126 }
2127 
2128 
2129 static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2130 			       struct mfc6_cache *c, struct rtmsg *rtm)
2131 {
2132 	int ct;
2133 	struct rtnexthop *nhp;
2134 	struct nlattr *mp_attr;
2135 	struct rta_mfc_stats mfcs;
2136 
2137 	/* If cache is unresolved, don't try to parse IIF and OIF */
2138 	if (c->mf6c_parent >= MAXMIFS)
2139 		return -ENOENT;
2140 
2141 	if (MIF_EXISTS(mrt, c->mf6c_parent) &&
2142 	    nla_put_u32(skb, RTA_IIF, mrt->vif6_table[c->mf6c_parent].dev->ifindex) < 0)
2143 		return -EMSGSIZE;
2144 	mp_attr = nla_nest_start(skb, RTA_MULTIPATH);
2145 	if (mp_attr == NULL)
2146 		return -EMSGSIZE;
2147 
2148 	for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
2149 		if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
2150 			nhp = nla_reserve_nohdr(skb, sizeof(*nhp));
2151 			if (nhp == NULL) {
2152 				nla_nest_cancel(skb, mp_attr);
2153 				return -EMSGSIZE;
2154 			}
2155 
2156 			nhp->rtnh_flags = 0;
2157 			nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
2158 			nhp->rtnh_ifindex = mrt->vif6_table[ct].dev->ifindex;
2159 			nhp->rtnh_len = sizeof(*nhp);
2160 		}
2161 	}
2162 
2163 	nla_nest_end(skb, mp_attr);
2164 
2165 	mfcs.mfcs_packets = c->mfc_un.res.pkt;
2166 	mfcs.mfcs_bytes = c->mfc_un.res.bytes;
2167 	mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
2168 	if (nla_put(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs) < 0)
2169 		return -EMSGSIZE;
2170 
2171 	rtm->rtm_type = RTN_MULTICAST;
2172 	return 1;
2173 }
2174 
2175 int ip6mr_get_route(struct net *net,
2176 		    struct sk_buff *skb, struct rtmsg *rtm, int nowait)
2177 {
2178 	int err;
2179 	struct mr6_table *mrt;
2180 	struct mfc6_cache *cache;
2181 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
2182 
2183 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
2184 	if (mrt == NULL)
2185 		return -ENOENT;
2186 
2187 	read_lock(&mrt_lock);
2188 	cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
2189 
2190 	if (!cache) {
2191 		struct sk_buff *skb2;
2192 		struct ipv6hdr *iph;
2193 		struct net_device *dev;
2194 		int vif;
2195 
2196 		if (nowait) {
2197 			read_unlock(&mrt_lock);
2198 			return -EAGAIN;
2199 		}
2200 
2201 		dev = skb->dev;
2202 		if (dev == NULL || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2203 			read_unlock(&mrt_lock);
2204 			return -ENODEV;
2205 		}
2206 
2207 		/* really correct? */
2208 		skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
2209 		if (!skb2) {
2210 			read_unlock(&mrt_lock);
2211 			return -ENOMEM;
2212 		}
2213 
2214 		skb_reset_transport_header(skb2);
2215 
2216 		skb_put(skb2, sizeof(struct ipv6hdr));
2217 		skb_reset_network_header(skb2);
2218 
2219 		iph = ipv6_hdr(skb2);
2220 		iph->version = 0;
2221 		iph->priority = 0;
2222 		iph->flow_lbl[0] = 0;
2223 		iph->flow_lbl[1] = 0;
2224 		iph->flow_lbl[2] = 0;
2225 		iph->payload_len = 0;
2226 		iph->nexthdr = IPPROTO_NONE;
2227 		iph->hop_limit = 0;
2228 		iph->saddr = rt->rt6i_src.addr;
2229 		iph->daddr = rt->rt6i_dst.addr;
2230 
2231 		err = ip6mr_cache_unresolved(mrt, vif, skb2);
2232 		read_unlock(&mrt_lock);
2233 
2234 		return err;
2235 	}
2236 
2237 	if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
2238 		cache->mfc_flags |= MFC_NOTIFY;
2239 
2240 	err = __ip6mr_fill_mroute(mrt, skb, cache, rtm);
2241 	read_unlock(&mrt_lock);
2242 	return err;
2243 }
2244 
2245 static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2246 			     u32 portid, u32 seq, struct mfc6_cache *c, int cmd)
2247 {
2248 	struct nlmsghdr *nlh;
2249 	struct rtmsg *rtm;
2250 	int err;
2251 
2252 	nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), NLM_F_MULTI);
2253 	if (nlh == NULL)
2254 		return -EMSGSIZE;
2255 
2256 	rtm = nlmsg_data(nlh);
2257 	rtm->rtm_family   = RTNL_FAMILY_IP6MR;
2258 	rtm->rtm_dst_len  = 128;
2259 	rtm->rtm_src_len  = 128;
2260 	rtm->rtm_tos      = 0;
2261 	rtm->rtm_table    = mrt->id;
2262 	if (nla_put_u32(skb, RTA_TABLE, mrt->id))
2263 		goto nla_put_failure;
2264 	rtm->rtm_type = RTN_MULTICAST;
2265 	rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
2266 	if (c->mfc_flags & MFC_STATIC)
2267 		rtm->rtm_protocol = RTPROT_STATIC;
2268 	else
2269 		rtm->rtm_protocol = RTPROT_MROUTED;
2270 	rtm->rtm_flags    = 0;
2271 
2272 	if (nla_put(skb, RTA_SRC, 16, &c->mf6c_origin) ||
2273 	    nla_put(skb, RTA_DST, 16, &c->mf6c_mcastgrp))
2274 		goto nla_put_failure;
2275 	err = __ip6mr_fill_mroute(mrt, skb, c, rtm);
2276 	/* do not break the dump if cache is unresolved */
2277 	if (err < 0 && err != -ENOENT)
2278 		goto nla_put_failure;
2279 
2280 	return nlmsg_end(skb, nlh);
2281 
2282 nla_put_failure:
2283 	nlmsg_cancel(skb, nlh);
2284 	return -EMSGSIZE;
2285 }
2286 
2287 static int mr6_msgsize(bool unresolved, int maxvif)
2288 {
2289 	size_t len =
2290 		NLMSG_ALIGN(sizeof(struct rtmsg))
2291 		+ nla_total_size(4)	/* RTA_TABLE */
2292 		+ nla_total_size(sizeof(struct in6_addr))	/* RTA_SRC */
2293 		+ nla_total_size(sizeof(struct in6_addr))	/* RTA_DST */
2294 		;
2295 
2296 	if (!unresolved)
2297 		len = len
2298 		      + nla_total_size(4)	/* RTA_IIF */
2299 		      + nla_total_size(0)	/* RTA_MULTIPATH */
2300 		      + maxvif * NLA_ALIGN(sizeof(struct rtnexthop))
2301 						/* RTA_MFC_STATS */
2302 		      + nla_total_size(sizeof(struct rta_mfc_stats))
2303 		;
2304 
2305 	return len;
2306 }
2307 
2308 static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
2309 			      int cmd)
2310 {
2311 	struct net *net = read_pnet(&mrt->net);
2312 	struct sk_buff *skb;
2313 	int err = -ENOBUFS;
2314 
2315 	skb = nlmsg_new(mr6_msgsize(mfc->mf6c_parent >= MAXMIFS, mrt->maxvif),
2316 			GFP_ATOMIC);
2317 	if (skb == NULL)
2318 		goto errout;
2319 
2320 	err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd);
2321 	if (err < 0)
2322 		goto errout;
2323 
2324 	rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC);
2325 	return;
2326 
2327 errout:
2328 	kfree_skb(skb);
2329 	if (err < 0)
2330 		rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
2331 }
2332 
2333 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2334 {
2335 	struct net *net = sock_net(skb->sk);
2336 	struct mr6_table *mrt;
2337 	struct mfc6_cache *mfc;
2338 	unsigned int t = 0, s_t;
2339 	unsigned int h = 0, s_h;
2340 	unsigned int e = 0, s_e;
2341 
2342 	s_t = cb->args[0];
2343 	s_h = cb->args[1];
2344 	s_e = cb->args[2];
2345 
2346 	read_lock(&mrt_lock);
2347 	ip6mr_for_each_table(mrt, net) {
2348 		if (t < s_t)
2349 			goto next_table;
2350 		if (t > s_t)
2351 			s_h = 0;
2352 		for (h = s_h; h < MFC6_LINES; h++) {
2353 			list_for_each_entry(mfc, &mrt->mfc6_cache_array[h], list) {
2354 				if (e < s_e)
2355 					goto next_entry;
2356 				if (ip6mr_fill_mroute(mrt, skb,
2357 						      NETLINK_CB(cb->skb).portid,
2358 						      cb->nlh->nlmsg_seq,
2359 						      mfc, RTM_NEWROUTE) < 0)
2360 					goto done;
2361 next_entry:
2362 				e++;
2363 			}
2364 			e = s_e = 0;
2365 		}
2366 		spin_lock_bh(&mfc_unres_lock);
2367 		list_for_each_entry(mfc, &mrt->mfc6_unres_queue, list) {
2368 			if (e < s_e)
2369 				goto next_entry2;
2370 			if (ip6mr_fill_mroute(mrt, skb,
2371 					      NETLINK_CB(cb->skb).portid,
2372 					      cb->nlh->nlmsg_seq,
2373 					      mfc, RTM_NEWROUTE) < 0) {
2374 				spin_unlock_bh(&mfc_unres_lock);
2375 				goto done;
2376 			}
2377 next_entry2:
2378 			e++;
2379 		}
2380 		spin_unlock_bh(&mfc_unres_lock);
2381 		e = s_e = 0;
2382 		s_h = 0;
2383 next_table:
2384 		t++;
2385 	}
2386 done:
2387 	read_unlock(&mrt_lock);
2388 
2389 	cb->args[2] = e;
2390 	cb->args[1] = h;
2391 	cb->args[0] = t;
2392 
2393 	return skb->len;
2394 }
2395