xref: /openbmc/linux/net/ipv6/ip6mr.c (revision 2eb0f624b709e78ec8e2f4c3412947703db99301)
1 /*
2  *	Linux IPv6 multicast routing support for BSD pim6sd
3  *	Based on net/ipv4/ipmr.c.
4  *
5  *	(c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
6  *		LSIIT Laboratory, Strasbourg, France
7  *	(c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
8  *		6WIND, Paris, France
9  *	Copyright (C)2007,2008 USAGI/WIDE Project
10  *		YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
11  *
12  *	This program is free software; you can redistribute it and/or
13  *	modify it under the terms of the GNU General Public License
14  *	as published by the Free Software Foundation; either version
15  *	2 of the License, or (at your option) any later version.
16  *
17  */
18 
19 #include <linux/uaccess.h>
20 #include <linux/types.h>
21 #include <linux/sched.h>
22 #include <linux/errno.h>
23 #include <linux/mm.h>
24 #include <linux/kernel.h>
25 #include <linux/fcntl.h>
26 #include <linux/stat.h>
27 #include <linux/socket.h>
28 #include <linux/inet.h>
29 #include <linux/netdevice.h>
30 #include <linux/inetdevice.h>
31 #include <linux/proc_fs.h>
32 #include <linux/seq_file.h>
33 #include <linux/init.h>
34 #include <linux/compat.h>
35 #include <net/protocol.h>
36 #include <linux/skbuff.h>
37 #include <net/raw.h>
38 #include <linux/notifier.h>
39 #include <linux/if_arp.h>
40 #include <net/checksum.h>
41 #include <net/netlink.h>
42 #include <net/fib_rules.h>
43 
44 #include <net/ipv6.h>
45 #include <net/ip6_route.h>
46 #include <linux/mroute6.h>
47 #include <linux/pim.h>
48 #include <net/addrconf.h>
49 #include <linux/netfilter_ipv6.h>
50 #include <linux/export.h>
51 #include <net/ip6_checksum.h>
52 #include <linux/netconf.h>
53 
54 struct ip6mr_rule {
55 	struct fib_rule		common;
56 };
57 
58 struct ip6mr_result {
59 	struct mr_table	*mrt;
60 };
61 
62 /* Big lock, protecting vif table, mrt cache and mroute socket state.
63    Note that the changes are semaphored via rtnl_lock.
64  */
65 
66 static DEFINE_RWLOCK(mrt_lock);
67 
68 /* Multicast router control variables */
69 
70 /* Special spinlock for queue of unresolved entries */
71 static DEFINE_SPINLOCK(mfc_unres_lock);
72 
73 /* We return to original Alan's scheme. Hash table of resolved
74    entries is changed only in process context and protected
75    with weak lock mrt_lock. Queue of unresolved entries is protected
76    with strong spinlock mfc_unres_lock.
77 
78    In this case data path is free of exclusive locks at all.
79  */
80 
81 static struct kmem_cache *mrt_cachep __read_mostly;
82 
83 static struct mr_table *ip6mr_new_table(struct net *net, u32 id);
84 static void ip6mr_free_table(struct mr_table *mrt);
85 
86 static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
87 			   struct sk_buff *skb, struct mfc6_cache *cache);
88 static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
89 			      mifi_t mifi, int assert);
90 static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
91 			      int cmd);
92 static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt);
93 static int ip6mr_rtm_dumproute(struct sk_buff *skb,
94 			       struct netlink_callback *cb);
95 static void mroute_clean_tables(struct mr_table *mrt, bool all);
96 static void ipmr_expire_process(struct timer_list *t);
97 
98 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
99 #define ip6mr_for_each_table(mrt, net) \
100 	list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
101 
102 static struct mr_table *ip6mr_mr_table_iter(struct net *net,
103 					    struct mr_table *mrt)
104 {
105 	struct mr_table *ret;
106 
107 	if (!mrt)
108 		ret = list_entry_rcu(net->ipv6.mr6_tables.next,
109 				     struct mr_table, list);
110 	else
111 		ret = list_entry_rcu(mrt->list.next,
112 				     struct mr_table, list);
113 
114 	if (&ret->list == &net->ipv6.mr6_tables)
115 		return NULL;
116 	return ret;
117 }
118 
119 static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
120 {
121 	struct mr_table *mrt;
122 
123 	ip6mr_for_each_table(mrt, net) {
124 		if (mrt->id == id)
125 			return mrt;
126 	}
127 	return NULL;
128 }
129 
130 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
131 			    struct mr_table **mrt)
132 {
133 	int err;
134 	struct ip6mr_result res;
135 	struct fib_lookup_arg arg = {
136 		.result = &res,
137 		.flags = FIB_LOOKUP_NOREF,
138 	};
139 
140 	err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
141 			       flowi6_to_flowi(flp6), 0, &arg);
142 	if (err < 0)
143 		return err;
144 	*mrt = res.mrt;
145 	return 0;
146 }
147 
148 static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
149 			     int flags, struct fib_lookup_arg *arg)
150 {
151 	struct ip6mr_result *res = arg->result;
152 	struct mr_table *mrt;
153 
154 	switch (rule->action) {
155 	case FR_ACT_TO_TBL:
156 		break;
157 	case FR_ACT_UNREACHABLE:
158 		return -ENETUNREACH;
159 	case FR_ACT_PROHIBIT:
160 		return -EACCES;
161 	case FR_ACT_BLACKHOLE:
162 	default:
163 		return -EINVAL;
164 	}
165 
166 	mrt = ip6mr_get_table(rule->fr_net, rule->table);
167 	if (!mrt)
168 		return -EAGAIN;
169 	res->mrt = mrt;
170 	return 0;
171 }
172 
173 static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
174 {
175 	return 1;
176 }
177 
178 static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = {
179 	FRA_GENERIC_POLICY,
180 };
181 
182 static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
183 				struct fib_rule_hdr *frh, struct nlattr **tb)
184 {
185 	return 0;
186 }
187 
188 static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
189 			      struct nlattr **tb)
190 {
191 	return 1;
192 }
193 
194 static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
195 			   struct fib_rule_hdr *frh)
196 {
197 	frh->dst_len = 0;
198 	frh->src_len = 0;
199 	frh->tos     = 0;
200 	return 0;
201 }
202 
203 static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
204 	.family		= RTNL_FAMILY_IP6MR,
205 	.rule_size	= sizeof(struct ip6mr_rule),
206 	.addr_size	= sizeof(struct in6_addr),
207 	.action		= ip6mr_rule_action,
208 	.match		= ip6mr_rule_match,
209 	.configure	= ip6mr_rule_configure,
210 	.compare	= ip6mr_rule_compare,
211 	.fill		= ip6mr_rule_fill,
212 	.nlgroup	= RTNLGRP_IPV6_RULE,
213 	.policy		= ip6mr_rule_policy,
214 	.owner		= THIS_MODULE,
215 };
216 
217 static int __net_init ip6mr_rules_init(struct net *net)
218 {
219 	struct fib_rules_ops *ops;
220 	struct mr_table *mrt;
221 	int err;
222 
223 	ops = fib_rules_register(&ip6mr_rules_ops_template, net);
224 	if (IS_ERR(ops))
225 		return PTR_ERR(ops);
226 
227 	INIT_LIST_HEAD(&net->ipv6.mr6_tables);
228 
229 	mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
230 	if (!mrt) {
231 		err = -ENOMEM;
232 		goto err1;
233 	}
234 
235 	err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
236 	if (err < 0)
237 		goto err2;
238 
239 	net->ipv6.mr6_rules_ops = ops;
240 	return 0;
241 
242 err2:
243 	ip6mr_free_table(mrt);
244 err1:
245 	fib_rules_unregister(ops);
246 	return err;
247 }
248 
249 static void __net_exit ip6mr_rules_exit(struct net *net)
250 {
251 	struct mr_table *mrt, *next;
252 
253 	rtnl_lock();
254 	list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
255 		list_del(&mrt->list);
256 		ip6mr_free_table(mrt);
257 	}
258 	fib_rules_unregister(net->ipv6.mr6_rules_ops);
259 	rtnl_unlock();
260 }
261 
262 static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb)
263 {
264 	return fib_rules_dump(net, nb, RTNL_FAMILY_IP6MR);
265 }
266 
267 static unsigned int ip6mr_rules_seq_read(struct net *net)
268 {
269 	return fib_rules_seq_read(net, RTNL_FAMILY_IP6MR);
270 }
271 
272 bool ip6mr_rule_default(const struct fib_rule *rule)
273 {
274 	return fib_rule_matchall(rule) && rule->action == FR_ACT_TO_TBL &&
275 	       rule->table == RT6_TABLE_DFLT && !rule->l3mdev;
276 }
277 EXPORT_SYMBOL(ip6mr_rule_default);
278 #else
279 #define ip6mr_for_each_table(mrt, net) \
280 	for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
281 
282 static struct mr_table *ip6mr_mr_table_iter(struct net *net,
283 					    struct mr_table *mrt)
284 {
285 	if (!mrt)
286 		return net->ipv6.mrt6;
287 	return NULL;
288 }
289 
290 static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
291 {
292 	return net->ipv6.mrt6;
293 }
294 
295 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
296 			    struct mr_table **mrt)
297 {
298 	*mrt = net->ipv6.mrt6;
299 	return 0;
300 }
301 
302 static int __net_init ip6mr_rules_init(struct net *net)
303 {
304 	net->ipv6.mrt6 = ip6mr_new_table(net, RT6_TABLE_DFLT);
305 	return net->ipv6.mrt6 ? 0 : -ENOMEM;
306 }
307 
308 static void __net_exit ip6mr_rules_exit(struct net *net)
309 {
310 	rtnl_lock();
311 	ip6mr_free_table(net->ipv6.mrt6);
312 	net->ipv6.mrt6 = NULL;
313 	rtnl_unlock();
314 }
315 
316 static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb)
317 {
318 	return 0;
319 }
320 
321 static unsigned int ip6mr_rules_seq_read(struct net *net)
322 {
323 	return 0;
324 }
325 #endif
326 
327 static int ip6mr_hash_cmp(struct rhashtable_compare_arg *arg,
328 			  const void *ptr)
329 {
330 	const struct mfc6_cache_cmp_arg *cmparg = arg->key;
331 	struct mfc6_cache *c = (struct mfc6_cache *)ptr;
332 
333 	return !ipv6_addr_equal(&c->mf6c_mcastgrp, &cmparg->mf6c_mcastgrp) ||
334 	       !ipv6_addr_equal(&c->mf6c_origin, &cmparg->mf6c_origin);
335 }
336 
337 static const struct rhashtable_params ip6mr_rht_params = {
338 	.head_offset = offsetof(struct mr_mfc, mnode),
339 	.key_offset = offsetof(struct mfc6_cache, cmparg),
340 	.key_len = sizeof(struct mfc6_cache_cmp_arg),
341 	.nelem_hint = 3,
342 	.locks_mul = 1,
343 	.obj_cmpfn = ip6mr_hash_cmp,
344 	.automatic_shrinking = true,
345 };
346 
347 static void ip6mr_new_table_set(struct mr_table *mrt,
348 				struct net *net)
349 {
350 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
351 	list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
352 #endif
353 }
354 
355 static struct mfc6_cache_cmp_arg ip6mr_mr_table_ops_cmparg_any = {
356 	.mf6c_origin = IN6ADDR_ANY_INIT,
357 	.mf6c_mcastgrp = IN6ADDR_ANY_INIT,
358 };
359 
360 static struct mr_table_ops ip6mr_mr_table_ops = {
361 	.rht_params = &ip6mr_rht_params,
362 	.cmparg_any = &ip6mr_mr_table_ops_cmparg_any,
363 };
364 
365 static struct mr_table *ip6mr_new_table(struct net *net, u32 id)
366 {
367 	struct mr_table *mrt;
368 
369 	mrt = ip6mr_get_table(net, id);
370 	if (mrt)
371 		return mrt;
372 
373 	return mr_table_alloc(net, id, &ip6mr_mr_table_ops,
374 			      ipmr_expire_process, ip6mr_new_table_set);
375 }
376 
377 static void ip6mr_free_table(struct mr_table *mrt)
378 {
379 	del_timer_sync(&mrt->ipmr_expire_timer);
380 	mroute_clean_tables(mrt, true);
381 	rhltable_destroy(&mrt->mfc_hash);
382 	kfree(mrt);
383 }
384 
385 #ifdef CONFIG_PROC_FS
386 /* The /proc interfaces to multicast routing
387  * /proc/ip6_mr_cache /proc/ip6_mr_vif
388  */
389 
390 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
391 	__acquires(mrt_lock)
392 {
393 	struct mr_vif_iter *iter = seq->private;
394 	struct net *net = seq_file_net(seq);
395 	struct mr_table *mrt;
396 
397 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
398 	if (!mrt)
399 		return ERR_PTR(-ENOENT);
400 
401 	iter->mrt = mrt;
402 
403 	read_lock(&mrt_lock);
404 	return mr_vif_seq_start(seq, pos);
405 }
406 
407 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
408 	__releases(mrt_lock)
409 {
410 	read_unlock(&mrt_lock);
411 }
412 
413 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
414 {
415 	struct mr_vif_iter *iter = seq->private;
416 	struct mr_table *mrt = iter->mrt;
417 
418 	if (v == SEQ_START_TOKEN) {
419 		seq_puts(seq,
420 			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
421 	} else {
422 		const struct vif_device *vif = v;
423 		const char *name = vif->dev ? vif->dev->name : "none";
424 
425 		seq_printf(seq,
426 			   "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
427 			   vif - mrt->vif_table,
428 			   name, vif->bytes_in, vif->pkt_in,
429 			   vif->bytes_out, vif->pkt_out,
430 			   vif->flags);
431 	}
432 	return 0;
433 }
434 
435 static const struct seq_operations ip6mr_vif_seq_ops = {
436 	.start = ip6mr_vif_seq_start,
437 	.next  = mr_vif_seq_next,
438 	.stop  = ip6mr_vif_seq_stop,
439 	.show  = ip6mr_vif_seq_show,
440 };
441 
442 static int ip6mr_vif_open(struct inode *inode, struct file *file)
443 {
444 	return seq_open_net(inode, file, &ip6mr_vif_seq_ops,
445 			    sizeof(struct mr_vif_iter));
446 }
447 
448 static const struct file_operations ip6mr_vif_fops = {
449 	.open    = ip6mr_vif_open,
450 	.read    = seq_read,
451 	.llseek  = seq_lseek,
452 	.release = seq_release_net,
453 };
454 
455 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
456 {
457 	struct net *net = seq_file_net(seq);
458 	struct mr_table *mrt;
459 
460 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
461 	if (!mrt)
462 		return ERR_PTR(-ENOENT);
463 
464 	return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock);
465 }
466 
467 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
468 {
469 	int n;
470 
471 	if (v == SEQ_START_TOKEN) {
472 		seq_puts(seq,
473 			 "Group                            "
474 			 "Origin                           "
475 			 "Iif      Pkts  Bytes     Wrong  Oifs\n");
476 	} else {
477 		const struct mfc6_cache *mfc = v;
478 		const struct mr_mfc_iter *it = seq->private;
479 		struct mr_table *mrt = it->mrt;
480 
481 		seq_printf(seq, "%pI6 %pI6 %-3hd",
482 			   &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
483 			   mfc->_c.mfc_parent);
484 
485 		if (it->cache != &mrt->mfc_unres_queue) {
486 			seq_printf(seq, " %8lu %8lu %8lu",
487 				   mfc->_c.mfc_un.res.pkt,
488 				   mfc->_c.mfc_un.res.bytes,
489 				   mfc->_c.mfc_un.res.wrong_if);
490 			for (n = mfc->_c.mfc_un.res.minvif;
491 			     n < mfc->_c.mfc_un.res.maxvif; n++) {
492 				if (VIF_EXISTS(mrt, n) &&
493 				    mfc->_c.mfc_un.res.ttls[n] < 255)
494 					seq_printf(seq,
495 						   " %2d:%-3d", n,
496 						   mfc->_c.mfc_un.res.ttls[n]);
497 			}
498 		} else {
499 			/* unresolved mfc_caches don't contain
500 			 * pkt, bytes and wrong_if values
501 			 */
502 			seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
503 		}
504 		seq_putc(seq, '\n');
505 	}
506 	return 0;
507 }
508 
509 static const struct seq_operations ipmr_mfc_seq_ops = {
510 	.start = ipmr_mfc_seq_start,
511 	.next  = mr_mfc_seq_next,
512 	.stop  = mr_mfc_seq_stop,
513 	.show  = ipmr_mfc_seq_show,
514 };
515 
516 static int ipmr_mfc_open(struct inode *inode, struct file *file)
517 {
518 	return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
519 			    sizeof(struct mr_mfc_iter));
520 }
521 
522 static const struct file_operations ip6mr_mfc_fops = {
523 	.open    = ipmr_mfc_open,
524 	.read    = seq_read,
525 	.llseek  = seq_lseek,
526 	.release = seq_release_net,
527 };
528 #endif
529 
530 #ifdef CONFIG_IPV6_PIMSM_V2
531 
532 static int pim6_rcv(struct sk_buff *skb)
533 {
534 	struct pimreghdr *pim;
535 	struct ipv6hdr   *encap;
536 	struct net_device  *reg_dev = NULL;
537 	struct net *net = dev_net(skb->dev);
538 	struct mr_table *mrt;
539 	struct flowi6 fl6 = {
540 		.flowi6_iif	= skb->dev->ifindex,
541 		.flowi6_mark	= skb->mark,
542 	};
543 	int reg_vif_num;
544 
545 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
546 		goto drop;
547 
548 	pim = (struct pimreghdr *)skb_transport_header(skb);
549 	if (pim->type != ((PIM_VERSION << 4) | PIM_TYPE_REGISTER) ||
550 	    (pim->flags & PIM_NULL_REGISTER) ||
551 	    (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
552 			     sizeof(*pim), IPPROTO_PIM,
553 			     csum_partial((void *)pim, sizeof(*pim), 0)) &&
554 	     csum_fold(skb_checksum(skb, 0, skb->len, 0))))
555 		goto drop;
556 
557 	/* check if the inner packet is destined to mcast group */
558 	encap = (struct ipv6hdr *)(skb_transport_header(skb) +
559 				   sizeof(*pim));
560 
561 	if (!ipv6_addr_is_multicast(&encap->daddr) ||
562 	    encap->payload_len == 0 ||
563 	    ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
564 		goto drop;
565 
566 	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
567 		goto drop;
568 	reg_vif_num = mrt->mroute_reg_vif_num;
569 
570 	read_lock(&mrt_lock);
571 	if (reg_vif_num >= 0)
572 		reg_dev = mrt->vif_table[reg_vif_num].dev;
573 	if (reg_dev)
574 		dev_hold(reg_dev);
575 	read_unlock(&mrt_lock);
576 
577 	if (!reg_dev)
578 		goto drop;
579 
580 	skb->mac_header = skb->network_header;
581 	skb_pull(skb, (u8 *)encap - skb->data);
582 	skb_reset_network_header(skb);
583 	skb->protocol = htons(ETH_P_IPV6);
584 	skb->ip_summed = CHECKSUM_NONE;
585 
586 	skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
587 
588 	netif_rx(skb);
589 
590 	dev_put(reg_dev);
591 	return 0;
592  drop:
593 	kfree_skb(skb);
594 	return 0;
595 }
596 
597 static const struct inet6_protocol pim6_protocol = {
598 	.handler	=	pim6_rcv,
599 };
600 
601 /* Service routines creating virtual interfaces: PIMREG */
602 
603 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
604 				      struct net_device *dev)
605 {
606 	struct net *net = dev_net(dev);
607 	struct mr_table *mrt;
608 	struct flowi6 fl6 = {
609 		.flowi6_oif	= dev->ifindex,
610 		.flowi6_iif	= skb->skb_iif ? : LOOPBACK_IFINDEX,
611 		.flowi6_mark	= skb->mark,
612 	};
613 	int err;
614 
615 	err = ip6mr_fib_lookup(net, &fl6, &mrt);
616 	if (err < 0) {
617 		kfree_skb(skb);
618 		return err;
619 	}
620 
621 	read_lock(&mrt_lock);
622 	dev->stats.tx_bytes += skb->len;
623 	dev->stats.tx_packets++;
624 	ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
625 	read_unlock(&mrt_lock);
626 	kfree_skb(skb);
627 	return NETDEV_TX_OK;
628 }
629 
630 static int reg_vif_get_iflink(const struct net_device *dev)
631 {
632 	return 0;
633 }
634 
635 static const struct net_device_ops reg_vif_netdev_ops = {
636 	.ndo_start_xmit	= reg_vif_xmit,
637 	.ndo_get_iflink = reg_vif_get_iflink,
638 };
639 
640 static void reg_vif_setup(struct net_device *dev)
641 {
642 	dev->type		= ARPHRD_PIMREG;
643 	dev->mtu		= 1500 - sizeof(struct ipv6hdr) - 8;
644 	dev->flags		= IFF_NOARP;
645 	dev->netdev_ops		= &reg_vif_netdev_ops;
646 	dev->needs_free_netdev	= true;
647 	dev->features		|= NETIF_F_NETNS_LOCAL;
648 }
649 
650 static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt)
651 {
652 	struct net_device *dev;
653 	char name[IFNAMSIZ];
654 
655 	if (mrt->id == RT6_TABLE_DFLT)
656 		sprintf(name, "pim6reg");
657 	else
658 		sprintf(name, "pim6reg%u", mrt->id);
659 
660 	dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup);
661 	if (!dev)
662 		return NULL;
663 
664 	dev_net_set(dev, net);
665 
666 	if (register_netdevice(dev)) {
667 		free_netdev(dev);
668 		return NULL;
669 	}
670 
671 	if (dev_open(dev))
672 		goto failure;
673 
674 	dev_hold(dev);
675 	return dev;
676 
677 failure:
678 	unregister_netdevice(dev);
679 	return NULL;
680 }
681 #endif
682 
683 static int call_ip6mr_vif_entry_notifiers(struct net *net,
684 					  enum fib_event_type event_type,
685 					  struct vif_device *vif,
686 					  mifi_t vif_index, u32 tb_id)
687 {
688 	return mr_call_vif_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
689 				     vif, vif_index, tb_id,
690 				     &net->ipv6.ipmr_seq);
691 }
692 
693 static int call_ip6mr_mfc_entry_notifiers(struct net *net,
694 					  enum fib_event_type event_type,
695 					  struct mfc6_cache *mfc, u32 tb_id)
696 {
697 	return mr_call_mfc_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
698 				     &mfc->_c, tb_id, &net->ipv6.ipmr_seq);
699 }
700 
701 /* Delete a VIF entry */
702 static int mif6_delete(struct mr_table *mrt, int vifi, int notify,
703 		       struct list_head *head)
704 {
705 	struct vif_device *v;
706 	struct net_device *dev;
707 	struct inet6_dev *in6_dev;
708 
709 	if (vifi < 0 || vifi >= mrt->maxvif)
710 		return -EADDRNOTAVAIL;
711 
712 	v = &mrt->vif_table[vifi];
713 
714 	if (VIF_EXISTS(mrt, vifi))
715 		call_ip6mr_vif_entry_notifiers(read_pnet(&mrt->net),
716 					       FIB_EVENT_VIF_DEL, v, vifi,
717 					       mrt->id);
718 
719 	write_lock_bh(&mrt_lock);
720 	dev = v->dev;
721 	v->dev = NULL;
722 
723 	if (!dev) {
724 		write_unlock_bh(&mrt_lock);
725 		return -EADDRNOTAVAIL;
726 	}
727 
728 #ifdef CONFIG_IPV6_PIMSM_V2
729 	if (vifi == mrt->mroute_reg_vif_num)
730 		mrt->mroute_reg_vif_num = -1;
731 #endif
732 
733 	if (vifi + 1 == mrt->maxvif) {
734 		int tmp;
735 		for (tmp = vifi - 1; tmp >= 0; tmp--) {
736 			if (VIF_EXISTS(mrt, tmp))
737 				break;
738 		}
739 		mrt->maxvif = tmp + 1;
740 	}
741 
742 	write_unlock_bh(&mrt_lock);
743 
744 	dev_set_allmulti(dev, -1);
745 
746 	in6_dev = __in6_dev_get(dev);
747 	if (in6_dev) {
748 		in6_dev->cnf.mc_forwarding--;
749 		inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
750 					     NETCONFA_MC_FORWARDING,
751 					     dev->ifindex, &in6_dev->cnf);
752 	}
753 
754 	if ((v->flags & MIFF_REGISTER) && !notify)
755 		unregister_netdevice_queue(dev, head);
756 
757 	dev_put(dev);
758 	return 0;
759 }
760 
761 static inline void ip6mr_cache_free_rcu(struct rcu_head *head)
762 {
763 	struct mr_mfc *c = container_of(head, struct mr_mfc, rcu);
764 
765 	kmem_cache_free(mrt_cachep, (struct mfc6_cache *)c);
766 }
767 
768 static inline void ip6mr_cache_free(struct mfc6_cache *c)
769 {
770 	call_rcu(&c->_c.rcu, ip6mr_cache_free_rcu);
771 }
772 
773 /* Destroy an unresolved cache entry, killing queued skbs
774    and reporting error to netlink readers.
775  */
776 
777 static void ip6mr_destroy_unres(struct mr_table *mrt, struct mfc6_cache *c)
778 {
779 	struct net *net = read_pnet(&mrt->net);
780 	struct sk_buff *skb;
781 
782 	atomic_dec(&mrt->cache_resolve_queue_len);
783 
784 	while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved)) != NULL) {
785 		if (ipv6_hdr(skb)->version == 0) {
786 			struct nlmsghdr *nlh = skb_pull(skb,
787 							sizeof(struct ipv6hdr));
788 			nlh->nlmsg_type = NLMSG_ERROR;
789 			nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
790 			skb_trim(skb, nlh->nlmsg_len);
791 			((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT;
792 			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
793 		} else
794 			kfree_skb(skb);
795 	}
796 
797 	ip6mr_cache_free(c);
798 }
799 
800 
801 /* Timer process for all the unresolved queue. */
802 
803 static void ipmr_do_expire_process(struct mr_table *mrt)
804 {
805 	unsigned long now = jiffies;
806 	unsigned long expires = 10 * HZ;
807 	struct mr_mfc *c, *next;
808 
809 	list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
810 		if (time_after(c->mfc_un.unres.expires, now)) {
811 			/* not yet... */
812 			unsigned long interval = c->mfc_un.unres.expires - now;
813 			if (interval < expires)
814 				expires = interval;
815 			continue;
816 		}
817 
818 		list_del(&c->list);
819 		mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
820 		ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
821 	}
822 
823 	if (!list_empty(&mrt->mfc_unres_queue))
824 		mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
825 }
826 
827 static void ipmr_expire_process(struct timer_list *t)
828 {
829 	struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
830 
831 	if (!spin_trylock(&mfc_unres_lock)) {
832 		mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
833 		return;
834 	}
835 
836 	if (!list_empty(&mrt->mfc_unres_queue))
837 		ipmr_do_expire_process(mrt);
838 
839 	spin_unlock(&mfc_unres_lock);
840 }
841 
842 /* Fill oifs list. It is called under write locked mrt_lock. */
843 
844 static void ip6mr_update_thresholds(struct mr_table *mrt,
845 				    struct mr_mfc *cache,
846 				    unsigned char *ttls)
847 {
848 	int vifi;
849 
850 	cache->mfc_un.res.minvif = MAXMIFS;
851 	cache->mfc_un.res.maxvif = 0;
852 	memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
853 
854 	for (vifi = 0; vifi < mrt->maxvif; vifi++) {
855 		if (VIF_EXISTS(mrt, vifi) &&
856 		    ttls[vifi] && ttls[vifi] < 255) {
857 			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
858 			if (cache->mfc_un.res.minvif > vifi)
859 				cache->mfc_un.res.minvif = vifi;
860 			if (cache->mfc_un.res.maxvif <= vifi)
861 				cache->mfc_un.res.maxvif = vifi + 1;
862 		}
863 	}
864 	cache->mfc_un.res.lastuse = jiffies;
865 }
866 
867 static int mif6_add(struct net *net, struct mr_table *mrt,
868 		    struct mif6ctl *vifc, int mrtsock)
869 {
870 	int vifi = vifc->mif6c_mifi;
871 	struct vif_device *v = &mrt->vif_table[vifi];
872 	struct net_device *dev;
873 	struct inet6_dev *in6_dev;
874 	int err;
875 
876 	/* Is vif busy ? */
877 	if (VIF_EXISTS(mrt, vifi))
878 		return -EADDRINUSE;
879 
880 	switch (vifc->mif6c_flags) {
881 #ifdef CONFIG_IPV6_PIMSM_V2
882 	case MIFF_REGISTER:
883 		/*
884 		 * Special Purpose VIF in PIM
885 		 * All the packets will be sent to the daemon
886 		 */
887 		if (mrt->mroute_reg_vif_num >= 0)
888 			return -EADDRINUSE;
889 		dev = ip6mr_reg_vif(net, mrt);
890 		if (!dev)
891 			return -ENOBUFS;
892 		err = dev_set_allmulti(dev, 1);
893 		if (err) {
894 			unregister_netdevice(dev);
895 			dev_put(dev);
896 			return err;
897 		}
898 		break;
899 #endif
900 	case 0:
901 		dev = dev_get_by_index(net, vifc->mif6c_pifi);
902 		if (!dev)
903 			return -EADDRNOTAVAIL;
904 		err = dev_set_allmulti(dev, 1);
905 		if (err) {
906 			dev_put(dev);
907 			return err;
908 		}
909 		break;
910 	default:
911 		return -EINVAL;
912 	}
913 
914 	in6_dev = __in6_dev_get(dev);
915 	if (in6_dev) {
916 		in6_dev->cnf.mc_forwarding++;
917 		inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
918 					     NETCONFA_MC_FORWARDING,
919 					     dev->ifindex, &in6_dev->cnf);
920 	}
921 
922 	/* Fill in the VIF structures */
923 	vif_device_init(v, dev, vifc->vifc_rate_limit, vifc->vifc_threshold,
924 			vifc->mif6c_flags | (!mrtsock ? VIFF_STATIC : 0),
925 			MIFF_REGISTER);
926 
927 	/* And finish update writing critical data */
928 	write_lock_bh(&mrt_lock);
929 	v->dev = dev;
930 #ifdef CONFIG_IPV6_PIMSM_V2
931 	if (v->flags & MIFF_REGISTER)
932 		mrt->mroute_reg_vif_num = vifi;
933 #endif
934 	if (vifi + 1 > mrt->maxvif)
935 		mrt->maxvif = vifi + 1;
936 	write_unlock_bh(&mrt_lock);
937 	call_ip6mr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD,
938 				       v, vifi, mrt->id);
939 	return 0;
940 }
941 
942 static struct mfc6_cache *ip6mr_cache_find(struct mr_table *mrt,
943 					   const struct in6_addr *origin,
944 					   const struct in6_addr *mcastgrp)
945 {
946 	struct mfc6_cache_cmp_arg arg = {
947 		.mf6c_origin = *origin,
948 		.mf6c_mcastgrp = *mcastgrp,
949 	};
950 
951 	return mr_mfc_find(mrt, &arg);
952 }
953 
954 /* Look for a (*,G) entry */
955 static struct mfc6_cache *ip6mr_cache_find_any(struct mr_table *mrt,
956 					       struct in6_addr *mcastgrp,
957 					       mifi_t mifi)
958 {
959 	struct mfc6_cache_cmp_arg arg = {
960 		.mf6c_origin = in6addr_any,
961 		.mf6c_mcastgrp = *mcastgrp,
962 	};
963 
964 	if (ipv6_addr_any(mcastgrp))
965 		return mr_mfc_find_any_parent(mrt, mifi);
966 	return mr_mfc_find_any(mrt, mifi, &arg);
967 }
968 
969 /* Look for a (S,G,iif) entry if parent != -1 */
970 static struct mfc6_cache *
971 ip6mr_cache_find_parent(struct mr_table *mrt,
972 			const struct in6_addr *origin,
973 			const struct in6_addr *mcastgrp,
974 			int parent)
975 {
976 	struct mfc6_cache_cmp_arg arg = {
977 		.mf6c_origin = *origin,
978 		.mf6c_mcastgrp = *mcastgrp,
979 	};
980 
981 	return mr_mfc_find_parent(mrt, &arg, parent);
982 }
983 
984 /* Allocate a multicast cache entry */
985 static struct mfc6_cache *ip6mr_cache_alloc(void)
986 {
987 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
988 	if (!c)
989 		return NULL;
990 	c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
991 	c->_c.mfc_un.res.minvif = MAXMIFS;
992 	c->_c.free = ip6mr_cache_free_rcu;
993 	refcount_set(&c->_c.mfc_un.res.refcount, 1);
994 	return c;
995 }
996 
997 static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
998 {
999 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
1000 	if (!c)
1001 		return NULL;
1002 	skb_queue_head_init(&c->_c.mfc_un.unres.unresolved);
1003 	c->_c.mfc_un.unres.expires = jiffies + 10 * HZ;
1004 	return c;
1005 }
1006 
1007 /*
1008  *	A cache entry has gone into a resolved state from queued
1009  */
1010 
1011 static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt,
1012 				struct mfc6_cache *uc, struct mfc6_cache *c)
1013 {
1014 	struct sk_buff *skb;
1015 
1016 	/*
1017 	 *	Play the pending entries through our router
1018 	 */
1019 
1020 	while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) {
1021 		if (ipv6_hdr(skb)->version == 0) {
1022 			struct nlmsghdr *nlh = skb_pull(skb,
1023 							sizeof(struct ipv6hdr));
1024 
1025 			if (mr_fill_mroute(mrt, skb, &c->_c,
1026 					   nlmsg_data(nlh)) > 0) {
1027 				nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1028 			} else {
1029 				nlh->nlmsg_type = NLMSG_ERROR;
1030 				nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
1031 				skb_trim(skb, nlh->nlmsg_len);
1032 				((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE;
1033 			}
1034 			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
1035 		} else
1036 			ip6_mr_forward(net, mrt, skb, c);
1037 	}
1038 }
1039 
1040 /*
1041  *	Bounce a cache query up to pim6sd and netlink.
1042  *
1043  *	Called under mrt_lock.
1044  */
1045 
1046 static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
1047 			      mifi_t mifi, int assert)
1048 {
1049 	struct sock *mroute6_sk;
1050 	struct sk_buff *skb;
1051 	struct mrt6msg *msg;
1052 	int ret;
1053 
1054 #ifdef CONFIG_IPV6_PIMSM_V2
1055 	if (assert == MRT6MSG_WHOLEPKT)
1056 		skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
1057 						+sizeof(*msg));
1058 	else
1059 #endif
1060 		skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
1061 
1062 	if (!skb)
1063 		return -ENOBUFS;
1064 
1065 	/* I suppose that internal messages
1066 	 * do not require checksums */
1067 
1068 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1069 
1070 #ifdef CONFIG_IPV6_PIMSM_V2
1071 	if (assert == MRT6MSG_WHOLEPKT) {
1072 		/* Ugly, but we have no choice with this interface.
1073 		   Duplicate old header, fix length etc.
1074 		   And all this only to mangle msg->im6_msgtype and
1075 		   to set msg->im6_mbz to "mbz" :-)
1076 		 */
1077 		skb_push(skb, -skb_network_offset(pkt));
1078 
1079 		skb_push(skb, sizeof(*msg));
1080 		skb_reset_transport_header(skb);
1081 		msg = (struct mrt6msg *)skb_transport_header(skb);
1082 		msg->im6_mbz = 0;
1083 		msg->im6_msgtype = MRT6MSG_WHOLEPKT;
1084 		msg->im6_mif = mrt->mroute_reg_vif_num;
1085 		msg->im6_pad = 0;
1086 		msg->im6_src = ipv6_hdr(pkt)->saddr;
1087 		msg->im6_dst = ipv6_hdr(pkt)->daddr;
1088 
1089 		skb->ip_summed = CHECKSUM_UNNECESSARY;
1090 	} else
1091 #endif
1092 	{
1093 	/*
1094 	 *	Copy the IP header
1095 	 */
1096 
1097 	skb_put(skb, sizeof(struct ipv6hdr));
1098 	skb_reset_network_header(skb);
1099 	skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
1100 
1101 	/*
1102 	 *	Add our header
1103 	 */
1104 	skb_put(skb, sizeof(*msg));
1105 	skb_reset_transport_header(skb);
1106 	msg = (struct mrt6msg *)skb_transport_header(skb);
1107 
1108 	msg->im6_mbz = 0;
1109 	msg->im6_msgtype = assert;
1110 	msg->im6_mif = mifi;
1111 	msg->im6_pad = 0;
1112 	msg->im6_src = ipv6_hdr(pkt)->saddr;
1113 	msg->im6_dst = ipv6_hdr(pkt)->daddr;
1114 
1115 	skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1116 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1117 	}
1118 
1119 	rcu_read_lock();
1120 	mroute6_sk = rcu_dereference(mrt->mroute_sk);
1121 	if (!mroute6_sk) {
1122 		rcu_read_unlock();
1123 		kfree_skb(skb);
1124 		return -EINVAL;
1125 	}
1126 
1127 	mrt6msg_netlink_event(mrt, skb);
1128 
1129 	/* Deliver to user space multicast routing algorithms */
1130 	ret = sock_queue_rcv_skb(mroute6_sk, skb);
1131 	rcu_read_unlock();
1132 	if (ret < 0) {
1133 		net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
1134 		kfree_skb(skb);
1135 	}
1136 
1137 	return ret;
1138 }
1139 
1140 /* Queue a packet for resolution. It gets locked cache entry! */
1141 static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi,
1142 				  struct sk_buff *skb)
1143 {
1144 	struct mfc6_cache *c;
1145 	bool found = false;
1146 	int err;
1147 
1148 	spin_lock_bh(&mfc_unres_lock);
1149 	list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) {
1150 		if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1151 		    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
1152 			found = true;
1153 			break;
1154 		}
1155 	}
1156 
1157 	if (!found) {
1158 		/*
1159 		 *	Create a new entry if allowable
1160 		 */
1161 
1162 		if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
1163 		    (c = ip6mr_cache_alloc_unres()) == NULL) {
1164 			spin_unlock_bh(&mfc_unres_lock);
1165 
1166 			kfree_skb(skb);
1167 			return -ENOBUFS;
1168 		}
1169 
1170 		/* Fill in the new cache entry */
1171 		c->_c.mfc_parent = -1;
1172 		c->mf6c_origin = ipv6_hdr(skb)->saddr;
1173 		c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
1174 
1175 		/*
1176 		 *	Reflect first query at pim6sd
1177 		 */
1178 		err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1179 		if (err < 0) {
1180 			/* If the report failed throw the cache entry
1181 			   out - Brad Parker
1182 			 */
1183 			spin_unlock_bh(&mfc_unres_lock);
1184 
1185 			ip6mr_cache_free(c);
1186 			kfree_skb(skb);
1187 			return err;
1188 		}
1189 
1190 		atomic_inc(&mrt->cache_resolve_queue_len);
1191 		list_add(&c->_c.list, &mrt->mfc_unres_queue);
1192 		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1193 
1194 		ipmr_do_expire_process(mrt);
1195 	}
1196 
1197 	/* See if we can append the packet */
1198 	if (c->_c.mfc_un.unres.unresolved.qlen > 3) {
1199 		kfree_skb(skb);
1200 		err = -ENOBUFS;
1201 	} else {
1202 		skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb);
1203 		err = 0;
1204 	}
1205 
1206 	spin_unlock_bh(&mfc_unres_lock);
1207 	return err;
1208 }
1209 
1210 /*
1211  *	MFC6 cache manipulation by user space
1212  */
1213 
1214 static int ip6mr_mfc_delete(struct mr_table *mrt, struct mf6cctl *mfc,
1215 			    int parent)
1216 {
1217 	struct mfc6_cache *c;
1218 
1219 	/* The entries are added/deleted only under RTNL */
1220 	rcu_read_lock();
1221 	c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
1222 				    &mfc->mf6cc_mcastgrp.sin6_addr, parent);
1223 	rcu_read_unlock();
1224 	if (!c)
1225 		return -ENOENT;
1226 	rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ip6mr_rht_params);
1227 	list_del_rcu(&c->_c.list);
1228 
1229 	call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
1230 				       FIB_EVENT_ENTRY_DEL, c, mrt->id);
1231 	mr6_netlink_event(mrt, c, RTM_DELROUTE);
1232 	mr_cache_put(&c->_c);
1233 	return 0;
1234 }
1235 
1236 static int ip6mr_device_event(struct notifier_block *this,
1237 			      unsigned long event, void *ptr)
1238 {
1239 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1240 	struct net *net = dev_net(dev);
1241 	struct mr_table *mrt;
1242 	struct vif_device *v;
1243 	int ct;
1244 
1245 	if (event != NETDEV_UNREGISTER)
1246 		return NOTIFY_DONE;
1247 
1248 	ip6mr_for_each_table(mrt, net) {
1249 		v = &mrt->vif_table[0];
1250 		for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1251 			if (v->dev == dev)
1252 				mif6_delete(mrt, ct, 1, NULL);
1253 		}
1254 	}
1255 
1256 	return NOTIFY_DONE;
1257 }
1258 
1259 static unsigned int ip6mr_seq_read(struct net *net)
1260 {
1261 	ASSERT_RTNL();
1262 
1263 	return net->ipv6.ipmr_seq + ip6mr_rules_seq_read(net);
1264 }
1265 
1266 static int ip6mr_dump(struct net *net, struct notifier_block *nb)
1267 {
1268 	return mr_dump(net, nb, RTNL_FAMILY_IP6MR, ip6mr_rules_dump,
1269 		       ip6mr_mr_table_iter, &mrt_lock);
1270 }
1271 
1272 static struct notifier_block ip6_mr_notifier = {
1273 	.notifier_call = ip6mr_device_event
1274 };
1275 
1276 static const struct fib_notifier_ops ip6mr_notifier_ops_template = {
1277 	.family		= RTNL_FAMILY_IP6MR,
1278 	.fib_seq_read	= ip6mr_seq_read,
1279 	.fib_dump	= ip6mr_dump,
1280 	.owner		= THIS_MODULE,
1281 };
1282 
1283 static int __net_init ip6mr_notifier_init(struct net *net)
1284 {
1285 	struct fib_notifier_ops *ops;
1286 
1287 	net->ipv6.ipmr_seq = 0;
1288 
1289 	ops = fib_notifier_ops_register(&ip6mr_notifier_ops_template, net);
1290 	if (IS_ERR(ops))
1291 		return PTR_ERR(ops);
1292 
1293 	net->ipv6.ip6mr_notifier_ops = ops;
1294 
1295 	return 0;
1296 }
1297 
1298 static void __net_exit ip6mr_notifier_exit(struct net *net)
1299 {
1300 	fib_notifier_ops_unregister(net->ipv6.ip6mr_notifier_ops);
1301 	net->ipv6.ip6mr_notifier_ops = NULL;
1302 }
1303 
1304 /* Setup for IP multicast routing */
1305 static int __net_init ip6mr_net_init(struct net *net)
1306 {
1307 	int err;
1308 
1309 	err = ip6mr_notifier_init(net);
1310 	if (err)
1311 		return err;
1312 
1313 	err = ip6mr_rules_init(net);
1314 	if (err < 0)
1315 		goto ip6mr_rules_fail;
1316 
1317 #ifdef CONFIG_PROC_FS
1318 	err = -ENOMEM;
1319 	if (!proc_create("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_fops))
1320 		goto proc_vif_fail;
1321 	if (!proc_create("ip6_mr_cache", 0, net->proc_net, &ip6mr_mfc_fops))
1322 		goto proc_cache_fail;
1323 #endif
1324 
1325 	return 0;
1326 
1327 #ifdef CONFIG_PROC_FS
1328 proc_cache_fail:
1329 	remove_proc_entry("ip6_mr_vif", net->proc_net);
1330 proc_vif_fail:
1331 	ip6mr_rules_exit(net);
1332 #endif
1333 ip6mr_rules_fail:
1334 	ip6mr_notifier_exit(net);
1335 	return err;
1336 }
1337 
1338 static void __net_exit ip6mr_net_exit(struct net *net)
1339 {
1340 #ifdef CONFIG_PROC_FS
1341 	remove_proc_entry("ip6_mr_cache", net->proc_net);
1342 	remove_proc_entry("ip6_mr_vif", net->proc_net);
1343 #endif
1344 	ip6mr_rules_exit(net);
1345 	ip6mr_notifier_exit(net);
1346 }
1347 
1348 static struct pernet_operations ip6mr_net_ops = {
1349 	.init = ip6mr_net_init,
1350 	.exit = ip6mr_net_exit,
1351 };
1352 
1353 int __init ip6_mr_init(void)
1354 {
1355 	int err;
1356 
1357 	mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1358 				       sizeof(struct mfc6_cache),
1359 				       0, SLAB_HWCACHE_ALIGN,
1360 				       NULL);
1361 	if (!mrt_cachep)
1362 		return -ENOMEM;
1363 
1364 	err = register_pernet_subsys(&ip6mr_net_ops);
1365 	if (err)
1366 		goto reg_pernet_fail;
1367 
1368 	err = register_netdevice_notifier(&ip6_mr_notifier);
1369 	if (err)
1370 		goto reg_notif_fail;
1371 #ifdef CONFIG_IPV6_PIMSM_V2
1372 	if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1373 		pr_err("%s: can't add PIM protocol\n", __func__);
1374 		err = -EAGAIN;
1375 		goto add_proto_fail;
1376 	}
1377 #endif
1378 	err = rtnl_register_module(THIS_MODULE, RTNL_FAMILY_IP6MR, RTM_GETROUTE,
1379 				   NULL, ip6mr_rtm_dumproute, 0);
1380 	if (err == 0)
1381 		return 0;
1382 
1383 #ifdef CONFIG_IPV6_PIMSM_V2
1384 	inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1385 add_proto_fail:
1386 	unregister_netdevice_notifier(&ip6_mr_notifier);
1387 #endif
1388 reg_notif_fail:
1389 	unregister_pernet_subsys(&ip6mr_net_ops);
1390 reg_pernet_fail:
1391 	kmem_cache_destroy(mrt_cachep);
1392 	return err;
1393 }
1394 
1395 void ip6_mr_cleanup(void)
1396 {
1397 	rtnl_unregister(RTNL_FAMILY_IP6MR, RTM_GETROUTE);
1398 #ifdef CONFIG_IPV6_PIMSM_V2
1399 	inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1400 #endif
1401 	unregister_netdevice_notifier(&ip6_mr_notifier);
1402 	unregister_pernet_subsys(&ip6mr_net_ops);
1403 	kmem_cache_destroy(mrt_cachep);
1404 }
1405 
1406 static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt,
1407 			 struct mf6cctl *mfc, int mrtsock, int parent)
1408 {
1409 	unsigned char ttls[MAXMIFS];
1410 	struct mfc6_cache *uc, *c;
1411 	struct mr_mfc *_uc;
1412 	bool found;
1413 	int i, err;
1414 
1415 	if (mfc->mf6cc_parent >= MAXMIFS)
1416 		return -ENFILE;
1417 
1418 	memset(ttls, 255, MAXMIFS);
1419 	for (i = 0; i < MAXMIFS; i++) {
1420 		if (IF_ISSET(i, &mfc->mf6cc_ifset))
1421 			ttls[i] = 1;
1422 	}
1423 
1424 	/* The entries are added/deleted only under RTNL */
1425 	rcu_read_lock();
1426 	c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
1427 				    &mfc->mf6cc_mcastgrp.sin6_addr, parent);
1428 	rcu_read_unlock();
1429 	if (c) {
1430 		write_lock_bh(&mrt_lock);
1431 		c->_c.mfc_parent = mfc->mf6cc_parent;
1432 		ip6mr_update_thresholds(mrt, &c->_c, ttls);
1433 		if (!mrtsock)
1434 			c->_c.mfc_flags |= MFC_STATIC;
1435 		write_unlock_bh(&mrt_lock);
1436 		call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE,
1437 					       c, mrt->id);
1438 		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1439 		return 0;
1440 	}
1441 
1442 	if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) &&
1443 	    !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1444 		return -EINVAL;
1445 
1446 	c = ip6mr_cache_alloc();
1447 	if (!c)
1448 		return -ENOMEM;
1449 
1450 	c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1451 	c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1452 	c->_c.mfc_parent = mfc->mf6cc_parent;
1453 	ip6mr_update_thresholds(mrt, &c->_c, ttls);
1454 	if (!mrtsock)
1455 		c->_c.mfc_flags |= MFC_STATIC;
1456 
1457 	err = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode,
1458 				  ip6mr_rht_params);
1459 	if (err) {
1460 		pr_err("ip6mr: rhtable insert error %d\n", err);
1461 		ip6mr_cache_free(c);
1462 		return err;
1463 	}
1464 	list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list);
1465 
1466 	/* Check to see if we resolved a queued list. If so we
1467 	 * need to send on the frames and tidy up.
1468 	 */
1469 	found = false;
1470 	spin_lock_bh(&mfc_unres_lock);
1471 	list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) {
1472 		uc = (struct mfc6_cache *)_uc;
1473 		if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1474 		    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1475 			list_del(&_uc->list);
1476 			atomic_dec(&mrt->cache_resolve_queue_len);
1477 			found = true;
1478 			break;
1479 		}
1480 	}
1481 	if (list_empty(&mrt->mfc_unres_queue))
1482 		del_timer(&mrt->ipmr_expire_timer);
1483 	spin_unlock_bh(&mfc_unres_lock);
1484 
1485 	if (found) {
1486 		ip6mr_cache_resolve(net, mrt, uc, c);
1487 		ip6mr_cache_free(uc);
1488 	}
1489 	call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD,
1490 				       c, mrt->id);
1491 	mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1492 	return 0;
1493 }
1494 
1495 /*
1496  *	Close the multicast socket, and clear the vif tables etc
1497  */
1498 
1499 static void mroute_clean_tables(struct mr_table *mrt, bool all)
1500 {
1501 	struct mr_mfc *c, *tmp;
1502 	LIST_HEAD(list);
1503 	int i;
1504 
1505 	/* Shut down all active vif entries */
1506 	for (i = 0; i < mrt->maxvif; i++) {
1507 		if (!all && (mrt->vif_table[i].flags & VIFF_STATIC))
1508 			continue;
1509 		mif6_delete(mrt, i, 0, &list);
1510 	}
1511 	unregister_netdevice_many(&list);
1512 
1513 	/* Wipe the cache */
1514 	list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
1515 		if (!all && (c->mfc_flags & MFC_STATIC))
1516 			continue;
1517 		rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params);
1518 		list_del_rcu(&c->list);
1519 		mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
1520 		mr_cache_put(c);
1521 	}
1522 
1523 	if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1524 		spin_lock_bh(&mfc_unres_lock);
1525 		list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
1526 			list_del(&c->list);
1527 			call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
1528 						       FIB_EVENT_ENTRY_DEL,
1529 						       (struct mfc6_cache *)c,
1530 						       mrt->id);
1531 			mr6_netlink_event(mrt, (struct mfc6_cache *)c,
1532 					  RTM_DELROUTE);
1533 			ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
1534 		}
1535 		spin_unlock_bh(&mfc_unres_lock);
1536 	}
1537 }
1538 
1539 static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk)
1540 {
1541 	int err = 0;
1542 	struct net *net = sock_net(sk);
1543 
1544 	rtnl_lock();
1545 	write_lock_bh(&mrt_lock);
1546 	if (rtnl_dereference(mrt->mroute_sk)) {
1547 		err = -EADDRINUSE;
1548 	} else {
1549 		rcu_assign_pointer(mrt->mroute_sk, sk);
1550 		sock_set_flag(sk, SOCK_RCU_FREE);
1551 		net->ipv6.devconf_all->mc_forwarding++;
1552 	}
1553 	write_unlock_bh(&mrt_lock);
1554 
1555 	if (!err)
1556 		inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
1557 					     NETCONFA_MC_FORWARDING,
1558 					     NETCONFA_IFINDEX_ALL,
1559 					     net->ipv6.devconf_all);
1560 	rtnl_unlock();
1561 
1562 	return err;
1563 }
1564 
1565 int ip6mr_sk_done(struct sock *sk)
1566 {
1567 	int err = -EACCES;
1568 	struct net *net = sock_net(sk);
1569 	struct mr_table *mrt;
1570 
1571 	if (sk->sk_type != SOCK_RAW ||
1572 	    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1573 		return err;
1574 
1575 	rtnl_lock();
1576 	ip6mr_for_each_table(mrt, net) {
1577 		if (sk == rtnl_dereference(mrt->mroute_sk)) {
1578 			write_lock_bh(&mrt_lock);
1579 			RCU_INIT_POINTER(mrt->mroute_sk, NULL);
1580 			/* Note that mroute_sk had SOCK_RCU_FREE set,
1581 			 * so the RCU grace period before sk freeing
1582 			 * is guaranteed by sk_destruct()
1583 			 */
1584 			net->ipv6.devconf_all->mc_forwarding--;
1585 			write_unlock_bh(&mrt_lock);
1586 			inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
1587 						     NETCONFA_MC_FORWARDING,
1588 						     NETCONFA_IFINDEX_ALL,
1589 						     net->ipv6.devconf_all);
1590 
1591 			mroute_clean_tables(mrt, false);
1592 			err = 0;
1593 			break;
1594 		}
1595 	}
1596 	rtnl_unlock();
1597 
1598 	return err;
1599 }
1600 
1601 bool mroute6_is_socket(struct net *net, struct sk_buff *skb)
1602 {
1603 	struct mr_table *mrt;
1604 	struct flowi6 fl6 = {
1605 		.flowi6_iif	= skb->skb_iif ? : LOOPBACK_IFINDEX,
1606 		.flowi6_oif	= skb->dev->ifindex,
1607 		.flowi6_mark	= skb->mark,
1608 	};
1609 
1610 	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
1611 		return NULL;
1612 
1613 	return rcu_access_pointer(mrt->mroute_sk);
1614 }
1615 EXPORT_SYMBOL(mroute6_is_socket);
1616 
1617 /*
1618  *	Socket options and virtual interface manipulation. The whole
1619  *	virtual interface system is a complete heap, but unfortunately
1620  *	that's how BSD mrouted happens to think. Maybe one day with a proper
1621  *	MOSPF/PIM router set up we can clean this up.
1622  */
1623 
1624 int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1625 {
1626 	int ret, parent = 0;
1627 	struct mif6ctl vif;
1628 	struct mf6cctl mfc;
1629 	mifi_t mifi;
1630 	struct net *net = sock_net(sk);
1631 	struct mr_table *mrt;
1632 
1633 	if (sk->sk_type != SOCK_RAW ||
1634 	    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1635 		return -EOPNOTSUPP;
1636 
1637 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1638 	if (!mrt)
1639 		return -ENOENT;
1640 
1641 	if (optname != MRT6_INIT) {
1642 		if (sk != rcu_access_pointer(mrt->mroute_sk) &&
1643 		    !ns_capable(net->user_ns, CAP_NET_ADMIN))
1644 			return -EACCES;
1645 	}
1646 
1647 	switch (optname) {
1648 	case MRT6_INIT:
1649 		if (optlen < sizeof(int))
1650 			return -EINVAL;
1651 
1652 		return ip6mr_sk_init(mrt, sk);
1653 
1654 	case MRT6_DONE:
1655 		return ip6mr_sk_done(sk);
1656 
1657 	case MRT6_ADD_MIF:
1658 		if (optlen < sizeof(vif))
1659 			return -EINVAL;
1660 		if (copy_from_user(&vif, optval, sizeof(vif)))
1661 			return -EFAULT;
1662 		if (vif.mif6c_mifi >= MAXMIFS)
1663 			return -ENFILE;
1664 		rtnl_lock();
1665 		ret = mif6_add(net, mrt, &vif,
1666 			       sk == rtnl_dereference(mrt->mroute_sk));
1667 		rtnl_unlock();
1668 		return ret;
1669 
1670 	case MRT6_DEL_MIF:
1671 		if (optlen < sizeof(mifi_t))
1672 			return -EINVAL;
1673 		if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1674 			return -EFAULT;
1675 		rtnl_lock();
1676 		ret = mif6_delete(mrt, mifi, 0, NULL);
1677 		rtnl_unlock();
1678 		return ret;
1679 
1680 	/*
1681 	 *	Manipulate the forwarding caches. These live
1682 	 *	in a sort of kernel/user symbiosis.
1683 	 */
1684 	case MRT6_ADD_MFC:
1685 	case MRT6_DEL_MFC:
1686 		parent = -1;
1687 		/* fall through */
1688 	case MRT6_ADD_MFC_PROXY:
1689 	case MRT6_DEL_MFC_PROXY:
1690 		if (optlen < sizeof(mfc))
1691 			return -EINVAL;
1692 		if (copy_from_user(&mfc, optval, sizeof(mfc)))
1693 			return -EFAULT;
1694 		if (parent == 0)
1695 			parent = mfc.mf6cc_parent;
1696 		rtnl_lock();
1697 		if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY)
1698 			ret = ip6mr_mfc_delete(mrt, &mfc, parent);
1699 		else
1700 			ret = ip6mr_mfc_add(net, mrt, &mfc,
1701 					    sk ==
1702 					    rtnl_dereference(mrt->mroute_sk),
1703 					    parent);
1704 		rtnl_unlock();
1705 		return ret;
1706 
1707 	/*
1708 	 *	Control PIM assert (to activate pim will activate assert)
1709 	 */
1710 	case MRT6_ASSERT:
1711 	{
1712 		int v;
1713 
1714 		if (optlen != sizeof(v))
1715 			return -EINVAL;
1716 		if (get_user(v, (int __user *)optval))
1717 			return -EFAULT;
1718 		mrt->mroute_do_assert = v;
1719 		return 0;
1720 	}
1721 
1722 #ifdef CONFIG_IPV6_PIMSM_V2
1723 	case MRT6_PIM:
1724 	{
1725 		int v;
1726 
1727 		if (optlen != sizeof(v))
1728 			return -EINVAL;
1729 		if (get_user(v, (int __user *)optval))
1730 			return -EFAULT;
1731 		v = !!v;
1732 		rtnl_lock();
1733 		ret = 0;
1734 		if (v != mrt->mroute_do_pim) {
1735 			mrt->mroute_do_pim = v;
1736 			mrt->mroute_do_assert = v;
1737 		}
1738 		rtnl_unlock();
1739 		return ret;
1740 	}
1741 
1742 #endif
1743 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1744 	case MRT6_TABLE:
1745 	{
1746 		u32 v;
1747 
1748 		if (optlen != sizeof(u32))
1749 			return -EINVAL;
1750 		if (get_user(v, (u32 __user *)optval))
1751 			return -EFAULT;
1752 		/* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
1753 		if (v != RT_TABLE_DEFAULT && v >= 100000000)
1754 			return -EINVAL;
1755 		if (sk == rcu_access_pointer(mrt->mroute_sk))
1756 			return -EBUSY;
1757 
1758 		rtnl_lock();
1759 		ret = 0;
1760 		if (!ip6mr_new_table(net, v))
1761 			ret = -ENOMEM;
1762 		raw6_sk(sk)->ip6mr_table = v;
1763 		rtnl_unlock();
1764 		return ret;
1765 	}
1766 #endif
1767 	/*
1768 	 *	Spurious command, or MRT6_VERSION which you cannot
1769 	 *	set.
1770 	 */
1771 	default:
1772 		return -ENOPROTOOPT;
1773 	}
1774 }
1775 
1776 /*
1777  *	Getsock opt support for the multicast routing system.
1778  */
1779 
1780 int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1781 			  int __user *optlen)
1782 {
1783 	int olr;
1784 	int val;
1785 	struct net *net = sock_net(sk);
1786 	struct mr_table *mrt;
1787 
1788 	if (sk->sk_type != SOCK_RAW ||
1789 	    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1790 		return -EOPNOTSUPP;
1791 
1792 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1793 	if (!mrt)
1794 		return -ENOENT;
1795 
1796 	switch (optname) {
1797 	case MRT6_VERSION:
1798 		val = 0x0305;
1799 		break;
1800 #ifdef CONFIG_IPV6_PIMSM_V2
1801 	case MRT6_PIM:
1802 		val = mrt->mroute_do_pim;
1803 		break;
1804 #endif
1805 	case MRT6_ASSERT:
1806 		val = mrt->mroute_do_assert;
1807 		break;
1808 	default:
1809 		return -ENOPROTOOPT;
1810 	}
1811 
1812 	if (get_user(olr, optlen))
1813 		return -EFAULT;
1814 
1815 	olr = min_t(int, olr, sizeof(int));
1816 	if (olr < 0)
1817 		return -EINVAL;
1818 
1819 	if (put_user(olr, optlen))
1820 		return -EFAULT;
1821 	if (copy_to_user(optval, &val, olr))
1822 		return -EFAULT;
1823 	return 0;
1824 }
1825 
1826 /*
1827  *	The IP multicast ioctl support routines.
1828  */
1829 
1830 int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1831 {
1832 	struct sioc_sg_req6 sr;
1833 	struct sioc_mif_req6 vr;
1834 	struct vif_device *vif;
1835 	struct mfc6_cache *c;
1836 	struct net *net = sock_net(sk);
1837 	struct mr_table *mrt;
1838 
1839 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1840 	if (!mrt)
1841 		return -ENOENT;
1842 
1843 	switch (cmd) {
1844 	case SIOCGETMIFCNT_IN6:
1845 		if (copy_from_user(&vr, arg, sizeof(vr)))
1846 			return -EFAULT;
1847 		if (vr.mifi >= mrt->maxvif)
1848 			return -EINVAL;
1849 		read_lock(&mrt_lock);
1850 		vif = &mrt->vif_table[vr.mifi];
1851 		if (VIF_EXISTS(mrt, vr.mifi)) {
1852 			vr.icount = vif->pkt_in;
1853 			vr.ocount = vif->pkt_out;
1854 			vr.ibytes = vif->bytes_in;
1855 			vr.obytes = vif->bytes_out;
1856 			read_unlock(&mrt_lock);
1857 
1858 			if (copy_to_user(arg, &vr, sizeof(vr)))
1859 				return -EFAULT;
1860 			return 0;
1861 		}
1862 		read_unlock(&mrt_lock);
1863 		return -EADDRNOTAVAIL;
1864 	case SIOCGETSGCNT_IN6:
1865 		if (copy_from_user(&sr, arg, sizeof(sr)))
1866 			return -EFAULT;
1867 
1868 		rcu_read_lock();
1869 		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1870 		if (c) {
1871 			sr.pktcnt = c->_c.mfc_un.res.pkt;
1872 			sr.bytecnt = c->_c.mfc_un.res.bytes;
1873 			sr.wrong_if = c->_c.mfc_un.res.wrong_if;
1874 			rcu_read_unlock();
1875 
1876 			if (copy_to_user(arg, &sr, sizeof(sr)))
1877 				return -EFAULT;
1878 			return 0;
1879 		}
1880 		rcu_read_unlock();
1881 		return -EADDRNOTAVAIL;
1882 	default:
1883 		return -ENOIOCTLCMD;
1884 	}
1885 }
1886 
1887 #ifdef CONFIG_COMPAT
1888 struct compat_sioc_sg_req6 {
1889 	struct sockaddr_in6 src;
1890 	struct sockaddr_in6 grp;
1891 	compat_ulong_t pktcnt;
1892 	compat_ulong_t bytecnt;
1893 	compat_ulong_t wrong_if;
1894 };
1895 
1896 struct compat_sioc_mif_req6 {
1897 	mifi_t	mifi;
1898 	compat_ulong_t icount;
1899 	compat_ulong_t ocount;
1900 	compat_ulong_t ibytes;
1901 	compat_ulong_t obytes;
1902 };
1903 
1904 int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1905 {
1906 	struct compat_sioc_sg_req6 sr;
1907 	struct compat_sioc_mif_req6 vr;
1908 	struct vif_device *vif;
1909 	struct mfc6_cache *c;
1910 	struct net *net = sock_net(sk);
1911 	struct mr_table *mrt;
1912 
1913 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1914 	if (!mrt)
1915 		return -ENOENT;
1916 
1917 	switch (cmd) {
1918 	case SIOCGETMIFCNT_IN6:
1919 		if (copy_from_user(&vr, arg, sizeof(vr)))
1920 			return -EFAULT;
1921 		if (vr.mifi >= mrt->maxvif)
1922 			return -EINVAL;
1923 		read_lock(&mrt_lock);
1924 		vif = &mrt->vif_table[vr.mifi];
1925 		if (VIF_EXISTS(mrt, vr.mifi)) {
1926 			vr.icount = vif->pkt_in;
1927 			vr.ocount = vif->pkt_out;
1928 			vr.ibytes = vif->bytes_in;
1929 			vr.obytes = vif->bytes_out;
1930 			read_unlock(&mrt_lock);
1931 
1932 			if (copy_to_user(arg, &vr, sizeof(vr)))
1933 				return -EFAULT;
1934 			return 0;
1935 		}
1936 		read_unlock(&mrt_lock);
1937 		return -EADDRNOTAVAIL;
1938 	case SIOCGETSGCNT_IN6:
1939 		if (copy_from_user(&sr, arg, sizeof(sr)))
1940 			return -EFAULT;
1941 
1942 		rcu_read_lock();
1943 		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1944 		if (c) {
1945 			sr.pktcnt = c->_c.mfc_un.res.pkt;
1946 			sr.bytecnt = c->_c.mfc_un.res.bytes;
1947 			sr.wrong_if = c->_c.mfc_un.res.wrong_if;
1948 			rcu_read_unlock();
1949 
1950 			if (copy_to_user(arg, &sr, sizeof(sr)))
1951 				return -EFAULT;
1952 			return 0;
1953 		}
1954 		rcu_read_unlock();
1955 		return -EADDRNOTAVAIL;
1956 	default:
1957 		return -ENOIOCTLCMD;
1958 	}
1959 }
1960 #endif
1961 
1962 static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
1963 {
1964 	__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
1965 			IPSTATS_MIB_OUTFORWDATAGRAMS);
1966 	__IP6_ADD_STATS(net, ip6_dst_idev(skb_dst(skb)),
1967 			IPSTATS_MIB_OUTOCTETS, skb->len);
1968 	return dst_output(net, sk, skb);
1969 }
1970 
1971 /*
1972  *	Processing handlers for ip6mr_forward
1973  */
1974 
1975 static int ip6mr_forward2(struct net *net, struct mr_table *mrt,
1976 			  struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1977 {
1978 	struct ipv6hdr *ipv6h;
1979 	struct vif_device *vif = &mrt->vif_table[vifi];
1980 	struct net_device *dev;
1981 	struct dst_entry *dst;
1982 	struct flowi6 fl6;
1983 
1984 	if (!vif->dev)
1985 		goto out_free;
1986 
1987 #ifdef CONFIG_IPV6_PIMSM_V2
1988 	if (vif->flags & MIFF_REGISTER) {
1989 		vif->pkt_out++;
1990 		vif->bytes_out += skb->len;
1991 		vif->dev->stats.tx_bytes += skb->len;
1992 		vif->dev->stats.tx_packets++;
1993 		ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
1994 		goto out_free;
1995 	}
1996 #endif
1997 
1998 	ipv6h = ipv6_hdr(skb);
1999 
2000 	fl6 = (struct flowi6) {
2001 		.flowi6_oif = vif->link,
2002 		.daddr = ipv6h->daddr,
2003 	};
2004 
2005 	dst = ip6_route_output(net, NULL, &fl6);
2006 	if (dst->error) {
2007 		dst_release(dst);
2008 		goto out_free;
2009 	}
2010 
2011 	skb_dst_drop(skb);
2012 	skb_dst_set(skb, dst);
2013 
2014 	/*
2015 	 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
2016 	 * not only before forwarding, but after forwarding on all output
2017 	 * interfaces. It is clear, if mrouter runs a multicasting
2018 	 * program, it should receive packets not depending to what interface
2019 	 * program is joined.
2020 	 * If we will not make it, the program will have to join on all
2021 	 * interfaces. On the other hand, multihoming host (or router, but
2022 	 * not mrouter) cannot join to more than one interface - it will
2023 	 * result in receiving multiple packets.
2024 	 */
2025 	dev = vif->dev;
2026 	skb->dev = dev;
2027 	vif->pkt_out++;
2028 	vif->bytes_out += skb->len;
2029 
2030 	/* We are about to write */
2031 	/* XXX: extension headers? */
2032 	if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
2033 		goto out_free;
2034 
2035 	ipv6h = ipv6_hdr(skb);
2036 	ipv6h->hop_limit--;
2037 
2038 	IP6CB(skb)->flags |= IP6SKB_FORWARDED;
2039 
2040 	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
2041 		       net, NULL, skb, skb->dev, dev,
2042 		       ip6mr_forward2_finish);
2043 
2044 out_free:
2045 	kfree_skb(skb);
2046 	return 0;
2047 }
2048 
2049 static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev)
2050 {
2051 	int ct;
2052 
2053 	for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
2054 		if (mrt->vif_table[ct].dev == dev)
2055 			break;
2056 	}
2057 	return ct;
2058 }
2059 
2060 static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
2061 			   struct sk_buff *skb, struct mfc6_cache *c)
2062 {
2063 	int psend = -1;
2064 	int vif, ct;
2065 	int true_vifi = ip6mr_find_vif(mrt, skb->dev);
2066 
2067 	vif = c->_c.mfc_parent;
2068 	c->_c.mfc_un.res.pkt++;
2069 	c->_c.mfc_un.res.bytes += skb->len;
2070 	c->_c.mfc_un.res.lastuse = jiffies;
2071 
2072 	if (ipv6_addr_any(&c->mf6c_origin) && true_vifi >= 0) {
2073 		struct mfc6_cache *cache_proxy;
2074 
2075 		/* For an (*,G) entry, we only check that the incoming
2076 		 * interface is part of the static tree.
2077 		 */
2078 		rcu_read_lock();
2079 		cache_proxy = mr_mfc_find_any_parent(mrt, vif);
2080 		if (cache_proxy &&
2081 		    cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255) {
2082 			rcu_read_unlock();
2083 			goto forward;
2084 		}
2085 		rcu_read_unlock();
2086 	}
2087 
2088 	/*
2089 	 * Wrong interface: drop packet and (maybe) send PIM assert.
2090 	 */
2091 	if (mrt->vif_table[vif].dev != skb->dev) {
2092 		c->_c.mfc_un.res.wrong_if++;
2093 
2094 		if (true_vifi >= 0 && mrt->mroute_do_assert &&
2095 		    /* pimsm uses asserts, when switching from RPT to SPT,
2096 		       so that we cannot check that packet arrived on an oif.
2097 		       It is bad, but otherwise we would need to move pretty
2098 		       large chunk of pimd to kernel. Ough... --ANK
2099 		     */
2100 		    (mrt->mroute_do_pim ||
2101 		     c->_c.mfc_un.res.ttls[true_vifi] < 255) &&
2102 		    time_after(jiffies,
2103 			       c->_c.mfc_un.res.last_assert +
2104 			       MFC_ASSERT_THRESH)) {
2105 			c->_c.mfc_un.res.last_assert = jiffies;
2106 			ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
2107 		}
2108 		goto dont_forward;
2109 	}
2110 
2111 forward:
2112 	mrt->vif_table[vif].pkt_in++;
2113 	mrt->vif_table[vif].bytes_in += skb->len;
2114 
2115 	/*
2116 	 *	Forward the frame
2117 	 */
2118 	if (ipv6_addr_any(&c->mf6c_origin) &&
2119 	    ipv6_addr_any(&c->mf6c_mcastgrp)) {
2120 		if (true_vifi >= 0 &&
2121 		    true_vifi != c->_c.mfc_parent &&
2122 		    ipv6_hdr(skb)->hop_limit >
2123 				c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
2124 			/* It's an (*,*) entry and the packet is not coming from
2125 			 * the upstream: forward the packet to the upstream
2126 			 * only.
2127 			 */
2128 			psend = c->_c.mfc_parent;
2129 			goto last_forward;
2130 		}
2131 		goto dont_forward;
2132 	}
2133 	for (ct = c->_c.mfc_un.res.maxvif - 1;
2134 	     ct >= c->_c.mfc_un.res.minvif; ct--) {
2135 		/* For (*,G) entry, don't forward to the incoming interface */
2136 		if ((!ipv6_addr_any(&c->mf6c_origin) || ct != true_vifi) &&
2137 		    ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) {
2138 			if (psend != -1) {
2139 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2140 				if (skb2)
2141 					ip6mr_forward2(net, mrt, skb2,
2142 						       c, psend);
2143 			}
2144 			psend = ct;
2145 		}
2146 	}
2147 last_forward:
2148 	if (psend != -1) {
2149 		ip6mr_forward2(net, mrt, skb, c, psend);
2150 		return;
2151 	}
2152 
2153 dont_forward:
2154 	kfree_skb(skb);
2155 }
2156 
2157 
2158 /*
2159  *	Multicast packets for forwarding arrive here
2160  */
2161 
2162 int ip6_mr_input(struct sk_buff *skb)
2163 {
2164 	struct mfc6_cache *cache;
2165 	struct net *net = dev_net(skb->dev);
2166 	struct mr_table *mrt;
2167 	struct flowi6 fl6 = {
2168 		.flowi6_iif	= skb->dev->ifindex,
2169 		.flowi6_mark	= skb->mark,
2170 	};
2171 	int err;
2172 
2173 	err = ip6mr_fib_lookup(net, &fl6, &mrt);
2174 	if (err < 0) {
2175 		kfree_skb(skb);
2176 		return err;
2177 	}
2178 
2179 	read_lock(&mrt_lock);
2180 	cache = ip6mr_cache_find(mrt,
2181 				 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2182 	if (!cache) {
2183 		int vif = ip6mr_find_vif(mrt, skb->dev);
2184 
2185 		if (vif >= 0)
2186 			cache = ip6mr_cache_find_any(mrt,
2187 						     &ipv6_hdr(skb)->daddr,
2188 						     vif);
2189 	}
2190 
2191 	/*
2192 	 *	No usable cache entry
2193 	 */
2194 	if (!cache) {
2195 		int vif;
2196 
2197 		vif = ip6mr_find_vif(mrt, skb->dev);
2198 		if (vif >= 0) {
2199 			int err = ip6mr_cache_unresolved(mrt, vif, skb);
2200 			read_unlock(&mrt_lock);
2201 
2202 			return err;
2203 		}
2204 		read_unlock(&mrt_lock);
2205 		kfree_skb(skb);
2206 		return -ENODEV;
2207 	}
2208 
2209 	ip6_mr_forward(net, mrt, skb, cache);
2210 
2211 	read_unlock(&mrt_lock);
2212 
2213 	return 0;
2214 }
2215 
2216 int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
2217 		    u32 portid)
2218 {
2219 	int err;
2220 	struct mr_table *mrt;
2221 	struct mfc6_cache *cache;
2222 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
2223 
2224 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
2225 	if (!mrt)
2226 		return -ENOENT;
2227 
2228 	read_lock(&mrt_lock);
2229 	cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
2230 	if (!cache && skb->dev) {
2231 		int vif = ip6mr_find_vif(mrt, skb->dev);
2232 
2233 		if (vif >= 0)
2234 			cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr,
2235 						     vif);
2236 	}
2237 
2238 	if (!cache) {
2239 		struct sk_buff *skb2;
2240 		struct ipv6hdr *iph;
2241 		struct net_device *dev;
2242 		int vif;
2243 
2244 		dev = skb->dev;
2245 		if (!dev || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2246 			read_unlock(&mrt_lock);
2247 			return -ENODEV;
2248 		}
2249 
2250 		/* really correct? */
2251 		skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
2252 		if (!skb2) {
2253 			read_unlock(&mrt_lock);
2254 			return -ENOMEM;
2255 		}
2256 
2257 		NETLINK_CB(skb2).portid = portid;
2258 		skb_reset_transport_header(skb2);
2259 
2260 		skb_put(skb2, sizeof(struct ipv6hdr));
2261 		skb_reset_network_header(skb2);
2262 
2263 		iph = ipv6_hdr(skb2);
2264 		iph->version = 0;
2265 		iph->priority = 0;
2266 		iph->flow_lbl[0] = 0;
2267 		iph->flow_lbl[1] = 0;
2268 		iph->flow_lbl[2] = 0;
2269 		iph->payload_len = 0;
2270 		iph->nexthdr = IPPROTO_NONE;
2271 		iph->hop_limit = 0;
2272 		iph->saddr = rt->rt6i_src.addr;
2273 		iph->daddr = rt->rt6i_dst.addr;
2274 
2275 		err = ip6mr_cache_unresolved(mrt, vif, skb2);
2276 		read_unlock(&mrt_lock);
2277 
2278 		return err;
2279 	}
2280 
2281 	err = mr_fill_mroute(mrt, skb, &cache->_c, rtm);
2282 	read_unlock(&mrt_lock);
2283 	return err;
2284 }
2285 
2286 static int ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2287 			     u32 portid, u32 seq, struct mfc6_cache *c, int cmd,
2288 			     int flags)
2289 {
2290 	struct nlmsghdr *nlh;
2291 	struct rtmsg *rtm;
2292 	int err;
2293 
2294 	nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags);
2295 	if (!nlh)
2296 		return -EMSGSIZE;
2297 
2298 	rtm = nlmsg_data(nlh);
2299 	rtm->rtm_family   = RTNL_FAMILY_IP6MR;
2300 	rtm->rtm_dst_len  = 128;
2301 	rtm->rtm_src_len  = 128;
2302 	rtm->rtm_tos      = 0;
2303 	rtm->rtm_table    = mrt->id;
2304 	if (nla_put_u32(skb, RTA_TABLE, mrt->id))
2305 		goto nla_put_failure;
2306 	rtm->rtm_type = RTN_MULTICAST;
2307 	rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
2308 	if (c->_c.mfc_flags & MFC_STATIC)
2309 		rtm->rtm_protocol = RTPROT_STATIC;
2310 	else
2311 		rtm->rtm_protocol = RTPROT_MROUTED;
2312 	rtm->rtm_flags    = 0;
2313 
2314 	if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) ||
2315 	    nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp))
2316 		goto nla_put_failure;
2317 	err = mr_fill_mroute(mrt, skb, &c->_c, rtm);
2318 	/* do not break the dump if cache is unresolved */
2319 	if (err < 0 && err != -ENOENT)
2320 		goto nla_put_failure;
2321 
2322 	nlmsg_end(skb, nlh);
2323 	return 0;
2324 
2325 nla_put_failure:
2326 	nlmsg_cancel(skb, nlh);
2327 	return -EMSGSIZE;
2328 }
2329 
2330 static int _ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2331 			      u32 portid, u32 seq, struct mr_mfc *c,
2332 			      int cmd, int flags)
2333 {
2334 	return ip6mr_fill_mroute(mrt, skb, portid, seq, (struct mfc6_cache *)c,
2335 				 cmd, flags);
2336 }
2337 
2338 static int mr6_msgsize(bool unresolved, int maxvif)
2339 {
2340 	size_t len =
2341 		NLMSG_ALIGN(sizeof(struct rtmsg))
2342 		+ nla_total_size(4)	/* RTA_TABLE */
2343 		+ nla_total_size(sizeof(struct in6_addr))	/* RTA_SRC */
2344 		+ nla_total_size(sizeof(struct in6_addr))	/* RTA_DST */
2345 		;
2346 
2347 	if (!unresolved)
2348 		len = len
2349 		      + nla_total_size(4)	/* RTA_IIF */
2350 		      + nla_total_size(0)	/* RTA_MULTIPATH */
2351 		      + maxvif * NLA_ALIGN(sizeof(struct rtnexthop))
2352 						/* RTA_MFC_STATS */
2353 		      + nla_total_size_64bit(sizeof(struct rta_mfc_stats))
2354 		;
2355 
2356 	return len;
2357 }
2358 
2359 static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
2360 			      int cmd)
2361 {
2362 	struct net *net = read_pnet(&mrt->net);
2363 	struct sk_buff *skb;
2364 	int err = -ENOBUFS;
2365 
2366 	skb = nlmsg_new(mr6_msgsize(mfc->_c.mfc_parent >= MAXMIFS, mrt->maxvif),
2367 			GFP_ATOMIC);
2368 	if (!skb)
2369 		goto errout;
2370 
2371 	err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0);
2372 	if (err < 0)
2373 		goto errout;
2374 
2375 	rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC);
2376 	return;
2377 
2378 errout:
2379 	kfree_skb(skb);
2380 	if (err < 0)
2381 		rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
2382 }
2383 
2384 static size_t mrt6msg_netlink_msgsize(size_t payloadlen)
2385 {
2386 	size_t len =
2387 		NLMSG_ALIGN(sizeof(struct rtgenmsg))
2388 		+ nla_total_size(1)	/* IP6MRA_CREPORT_MSGTYPE */
2389 		+ nla_total_size(4)	/* IP6MRA_CREPORT_MIF_ID */
2390 					/* IP6MRA_CREPORT_SRC_ADDR */
2391 		+ nla_total_size(sizeof(struct in6_addr))
2392 					/* IP6MRA_CREPORT_DST_ADDR */
2393 		+ nla_total_size(sizeof(struct in6_addr))
2394 					/* IP6MRA_CREPORT_PKT */
2395 		+ nla_total_size(payloadlen)
2396 		;
2397 
2398 	return len;
2399 }
2400 
2401 static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt)
2402 {
2403 	struct net *net = read_pnet(&mrt->net);
2404 	struct nlmsghdr *nlh;
2405 	struct rtgenmsg *rtgenm;
2406 	struct mrt6msg *msg;
2407 	struct sk_buff *skb;
2408 	struct nlattr *nla;
2409 	int payloadlen;
2410 
2411 	payloadlen = pkt->len - sizeof(struct mrt6msg);
2412 	msg = (struct mrt6msg *)skb_transport_header(pkt);
2413 
2414 	skb = nlmsg_new(mrt6msg_netlink_msgsize(payloadlen), GFP_ATOMIC);
2415 	if (!skb)
2416 		goto errout;
2417 
2418 	nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT,
2419 			sizeof(struct rtgenmsg), 0);
2420 	if (!nlh)
2421 		goto errout;
2422 	rtgenm = nlmsg_data(nlh);
2423 	rtgenm->rtgen_family = RTNL_FAMILY_IP6MR;
2424 	if (nla_put_u8(skb, IP6MRA_CREPORT_MSGTYPE, msg->im6_msgtype) ||
2425 	    nla_put_u32(skb, IP6MRA_CREPORT_MIF_ID, msg->im6_mif) ||
2426 	    nla_put_in6_addr(skb, IP6MRA_CREPORT_SRC_ADDR,
2427 			     &msg->im6_src) ||
2428 	    nla_put_in6_addr(skb, IP6MRA_CREPORT_DST_ADDR,
2429 			     &msg->im6_dst))
2430 		goto nla_put_failure;
2431 
2432 	nla = nla_reserve(skb, IP6MRA_CREPORT_PKT, payloadlen);
2433 	if (!nla || skb_copy_bits(pkt, sizeof(struct mrt6msg),
2434 				  nla_data(nla), payloadlen))
2435 		goto nla_put_failure;
2436 
2437 	nlmsg_end(skb, nlh);
2438 
2439 	rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE_R, NULL, GFP_ATOMIC);
2440 	return;
2441 
2442 nla_put_failure:
2443 	nlmsg_cancel(skb, nlh);
2444 errout:
2445 	kfree_skb(skb);
2446 	rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE_R, -ENOBUFS);
2447 }
2448 
2449 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2450 {
2451 	return mr_rtm_dumproute(skb, cb, ip6mr_mr_table_iter,
2452 				_ip6mr_fill_mroute, &mfc_unres_lock);
2453 }
2454