xref: /openbmc/linux/net/ipv6/ip6mr.c (revision d2ba09c1)
1 /*
2  *	Linux IPv6 multicast routing support for BSD pim6sd
3  *	Based on net/ipv4/ipmr.c.
4  *
5  *	(c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
6  *		LSIIT Laboratory, Strasbourg, France
7  *	(c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
8  *		6WIND, Paris, France
9  *	Copyright (C)2007,2008 USAGI/WIDE Project
10  *		YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
11  *
12  *	This program is free software; you can redistribute it and/or
13  *	modify it under the terms of the GNU General Public License
14  *	as published by the Free Software Foundation; either version
15  *	2 of the License, or (at your option) any later version.
16  *
17  */
18 
19 #include <linux/uaccess.h>
20 #include <linux/types.h>
21 #include <linux/sched.h>
22 #include <linux/errno.h>
23 #include <linux/mm.h>
24 #include <linux/kernel.h>
25 #include <linux/fcntl.h>
26 #include <linux/stat.h>
27 #include <linux/socket.h>
28 #include <linux/inet.h>
29 #include <linux/netdevice.h>
30 #include <linux/inetdevice.h>
31 #include <linux/proc_fs.h>
32 #include <linux/seq_file.h>
33 #include <linux/init.h>
34 #include <linux/compat.h>
35 #include <net/protocol.h>
36 #include <linux/skbuff.h>
37 #include <net/raw.h>
38 #include <linux/notifier.h>
39 #include <linux/if_arp.h>
40 #include <net/checksum.h>
41 #include <net/netlink.h>
42 #include <net/fib_rules.h>
43 
44 #include <net/ipv6.h>
45 #include <net/ip6_route.h>
46 #include <linux/mroute6.h>
47 #include <linux/pim.h>
48 #include <net/addrconf.h>
49 #include <linux/netfilter_ipv6.h>
50 #include <linux/export.h>
51 #include <net/ip6_checksum.h>
52 #include <linux/netconf.h>
53 
54 struct ip6mr_rule {
55 	struct fib_rule		common;
56 };
57 
58 struct ip6mr_result {
59 	struct mr_table	*mrt;
60 };
61 
62 /* Big lock, protecting vif table, mrt cache and mroute socket state.
63    Note that the changes are semaphored via rtnl_lock.
64  */
65 
66 static DEFINE_RWLOCK(mrt_lock);
67 
68 /* Multicast router control variables */
69 
70 /* Special spinlock for queue of unresolved entries */
71 static DEFINE_SPINLOCK(mfc_unres_lock);
72 
73 /* We return to original Alan's scheme. Hash table of resolved
74    entries is changed only in process context and protected
75    with weak lock mrt_lock. Queue of unresolved entries is protected
76    with strong spinlock mfc_unres_lock.
77 
78    In this case data path is free of exclusive locks at all.
79  */
80 
81 static struct kmem_cache *mrt_cachep __read_mostly;
82 
83 static struct mr_table *ip6mr_new_table(struct net *net, u32 id);
84 static void ip6mr_free_table(struct mr_table *mrt);
85 
86 static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
87 			   struct sk_buff *skb, struct mfc6_cache *cache);
88 static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
89 			      mifi_t mifi, int assert);
90 static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
91 			      int cmd);
92 static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt);
93 static int ip6mr_rtm_dumproute(struct sk_buff *skb,
94 			       struct netlink_callback *cb);
95 static void mroute_clean_tables(struct mr_table *mrt, bool all);
96 static void ipmr_expire_process(struct timer_list *t);
97 
98 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
99 #define ip6mr_for_each_table(mrt, net) \
100 	list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
101 
102 static struct mr_table *ip6mr_mr_table_iter(struct net *net,
103 					    struct mr_table *mrt)
104 {
105 	struct mr_table *ret;
106 
107 	if (!mrt)
108 		ret = list_entry_rcu(net->ipv6.mr6_tables.next,
109 				     struct mr_table, list);
110 	else
111 		ret = list_entry_rcu(mrt->list.next,
112 				     struct mr_table, list);
113 
114 	if (&ret->list == &net->ipv6.mr6_tables)
115 		return NULL;
116 	return ret;
117 }
118 
119 static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
120 {
121 	struct mr_table *mrt;
122 
123 	ip6mr_for_each_table(mrt, net) {
124 		if (mrt->id == id)
125 			return mrt;
126 	}
127 	return NULL;
128 }
129 
130 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
131 			    struct mr_table **mrt)
132 {
133 	int err;
134 	struct ip6mr_result res;
135 	struct fib_lookup_arg arg = {
136 		.result = &res,
137 		.flags = FIB_LOOKUP_NOREF,
138 	};
139 
140 	err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
141 			       flowi6_to_flowi(flp6), 0, &arg);
142 	if (err < 0)
143 		return err;
144 	*mrt = res.mrt;
145 	return 0;
146 }
147 
148 static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
149 			     int flags, struct fib_lookup_arg *arg)
150 {
151 	struct ip6mr_result *res = arg->result;
152 	struct mr_table *mrt;
153 
154 	switch (rule->action) {
155 	case FR_ACT_TO_TBL:
156 		break;
157 	case FR_ACT_UNREACHABLE:
158 		return -ENETUNREACH;
159 	case FR_ACT_PROHIBIT:
160 		return -EACCES;
161 	case FR_ACT_BLACKHOLE:
162 	default:
163 		return -EINVAL;
164 	}
165 
166 	mrt = ip6mr_get_table(rule->fr_net, rule->table);
167 	if (!mrt)
168 		return -EAGAIN;
169 	res->mrt = mrt;
170 	return 0;
171 }
172 
173 static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
174 {
175 	return 1;
176 }
177 
178 static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = {
179 	FRA_GENERIC_POLICY,
180 };
181 
182 static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
183 				struct fib_rule_hdr *frh, struct nlattr **tb,
184 				struct netlink_ext_ack *extack)
185 {
186 	return 0;
187 }
188 
189 static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
190 			      struct nlattr **tb)
191 {
192 	return 1;
193 }
194 
195 static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
196 			   struct fib_rule_hdr *frh)
197 {
198 	frh->dst_len = 0;
199 	frh->src_len = 0;
200 	frh->tos     = 0;
201 	return 0;
202 }
203 
204 static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
205 	.family		= RTNL_FAMILY_IP6MR,
206 	.rule_size	= sizeof(struct ip6mr_rule),
207 	.addr_size	= sizeof(struct in6_addr),
208 	.action		= ip6mr_rule_action,
209 	.match		= ip6mr_rule_match,
210 	.configure	= ip6mr_rule_configure,
211 	.compare	= ip6mr_rule_compare,
212 	.fill		= ip6mr_rule_fill,
213 	.nlgroup	= RTNLGRP_IPV6_RULE,
214 	.policy		= ip6mr_rule_policy,
215 	.owner		= THIS_MODULE,
216 };
217 
218 static int __net_init ip6mr_rules_init(struct net *net)
219 {
220 	struct fib_rules_ops *ops;
221 	struct mr_table *mrt;
222 	int err;
223 
224 	ops = fib_rules_register(&ip6mr_rules_ops_template, net);
225 	if (IS_ERR(ops))
226 		return PTR_ERR(ops);
227 
228 	INIT_LIST_HEAD(&net->ipv6.mr6_tables);
229 
230 	mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
231 	if (!mrt) {
232 		err = -ENOMEM;
233 		goto err1;
234 	}
235 
236 	err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
237 	if (err < 0)
238 		goto err2;
239 
240 	net->ipv6.mr6_rules_ops = ops;
241 	return 0;
242 
243 err2:
244 	ip6mr_free_table(mrt);
245 err1:
246 	fib_rules_unregister(ops);
247 	return err;
248 }
249 
250 static void __net_exit ip6mr_rules_exit(struct net *net)
251 {
252 	struct mr_table *mrt, *next;
253 
254 	rtnl_lock();
255 	list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
256 		list_del(&mrt->list);
257 		ip6mr_free_table(mrt);
258 	}
259 	fib_rules_unregister(net->ipv6.mr6_rules_ops);
260 	rtnl_unlock();
261 }
262 
263 static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb)
264 {
265 	return fib_rules_dump(net, nb, RTNL_FAMILY_IP6MR);
266 }
267 
268 static unsigned int ip6mr_rules_seq_read(struct net *net)
269 {
270 	return fib_rules_seq_read(net, RTNL_FAMILY_IP6MR);
271 }
272 
273 bool ip6mr_rule_default(const struct fib_rule *rule)
274 {
275 	return fib_rule_matchall(rule) && rule->action == FR_ACT_TO_TBL &&
276 	       rule->table == RT6_TABLE_DFLT && !rule->l3mdev;
277 }
278 EXPORT_SYMBOL(ip6mr_rule_default);
279 #else
280 #define ip6mr_for_each_table(mrt, net) \
281 	for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
282 
283 static struct mr_table *ip6mr_mr_table_iter(struct net *net,
284 					    struct mr_table *mrt)
285 {
286 	if (!mrt)
287 		return net->ipv6.mrt6;
288 	return NULL;
289 }
290 
291 static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
292 {
293 	return net->ipv6.mrt6;
294 }
295 
296 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
297 			    struct mr_table **mrt)
298 {
299 	*mrt = net->ipv6.mrt6;
300 	return 0;
301 }
302 
303 static int __net_init ip6mr_rules_init(struct net *net)
304 {
305 	net->ipv6.mrt6 = ip6mr_new_table(net, RT6_TABLE_DFLT);
306 	return net->ipv6.mrt6 ? 0 : -ENOMEM;
307 }
308 
309 static void __net_exit ip6mr_rules_exit(struct net *net)
310 {
311 	rtnl_lock();
312 	ip6mr_free_table(net->ipv6.mrt6);
313 	net->ipv6.mrt6 = NULL;
314 	rtnl_unlock();
315 }
316 
317 static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb)
318 {
319 	return 0;
320 }
321 
322 static unsigned int ip6mr_rules_seq_read(struct net *net)
323 {
324 	return 0;
325 }
326 #endif
327 
328 static int ip6mr_hash_cmp(struct rhashtable_compare_arg *arg,
329 			  const void *ptr)
330 {
331 	const struct mfc6_cache_cmp_arg *cmparg = arg->key;
332 	struct mfc6_cache *c = (struct mfc6_cache *)ptr;
333 
334 	return !ipv6_addr_equal(&c->mf6c_mcastgrp, &cmparg->mf6c_mcastgrp) ||
335 	       !ipv6_addr_equal(&c->mf6c_origin, &cmparg->mf6c_origin);
336 }
337 
338 static const struct rhashtable_params ip6mr_rht_params = {
339 	.head_offset = offsetof(struct mr_mfc, mnode),
340 	.key_offset = offsetof(struct mfc6_cache, cmparg),
341 	.key_len = sizeof(struct mfc6_cache_cmp_arg),
342 	.nelem_hint = 3,
343 	.locks_mul = 1,
344 	.obj_cmpfn = ip6mr_hash_cmp,
345 	.automatic_shrinking = true,
346 };
347 
348 static void ip6mr_new_table_set(struct mr_table *mrt,
349 				struct net *net)
350 {
351 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
352 	list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
353 #endif
354 }
355 
356 static struct mfc6_cache_cmp_arg ip6mr_mr_table_ops_cmparg_any = {
357 	.mf6c_origin = IN6ADDR_ANY_INIT,
358 	.mf6c_mcastgrp = IN6ADDR_ANY_INIT,
359 };
360 
361 static struct mr_table_ops ip6mr_mr_table_ops = {
362 	.rht_params = &ip6mr_rht_params,
363 	.cmparg_any = &ip6mr_mr_table_ops_cmparg_any,
364 };
365 
366 static struct mr_table *ip6mr_new_table(struct net *net, u32 id)
367 {
368 	struct mr_table *mrt;
369 
370 	mrt = ip6mr_get_table(net, id);
371 	if (mrt)
372 		return mrt;
373 
374 	return mr_table_alloc(net, id, &ip6mr_mr_table_ops,
375 			      ipmr_expire_process, ip6mr_new_table_set);
376 }
377 
378 static void ip6mr_free_table(struct mr_table *mrt)
379 {
380 	del_timer_sync(&mrt->ipmr_expire_timer);
381 	mroute_clean_tables(mrt, true);
382 	rhltable_destroy(&mrt->mfc_hash);
383 	kfree(mrt);
384 }
385 
386 #ifdef CONFIG_PROC_FS
387 /* The /proc interfaces to multicast routing
388  * /proc/ip6_mr_cache /proc/ip6_mr_vif
389  */
390 
391 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
392 	__acquires(mrt_lock)
393 {
394 	struct mr_vif_iter *iter = seq->private;
395 	struct net *net = seq_file_net(seq);
396 	struct mr_table *mrt;
397 
398 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
399 	if (!mrt)
400 		return ERR_PTR(-ENOENT);
401 
402 	iter->mrt = mrt;
403 
404 	read_lock(&mrt_lock);
405 	return mr_vif_seq_start(seq, pos);
406 }
407 
408 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
409 	__releases(mrt_lock)
410 {
411 	read_unlock(&mrt_lock);
412 }
413 
414 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
415 {
416 	struct mr_vif_iter *iter = seq->private;
417 	struct mr_table *mrt = iter->mrt;
418 
419 	if (v == SEQ_START_TOKEN) {
420 		seq_puts(seq,
421 			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
422 	} else {
423 		const struct vif_device *vif = v;
424 		const char *name = vif->dev ? vif->dev->name : "none";
425 
426 		seq_printf(seq,
427 			   "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
428 			   vif - mrt->vif_table,
429 			   name, vif->bytes_in, vif->pkt_in,
430 			   vif->bytes_out, vif->pkt_out,
431 			   vif->flags);
432 	}
433 	return 0;
434 }
435 
436 static const struct seq_operations ip6mr_vif_seq_ops = {
437 	.start = ip6mr_vif_seq_start,
438 	.next  = mr_vif_seq_next,
439 	.stop  = ip6mr_vif_seq_stop,
440 	.show  = ip6mr_vif_seq_show,
441 };
442 
443 static int ip6mr_vif_open(struct inode *inode, struct file *file)
444 {
445 	return seq_open_net(inode, file, &ip6mr_vif_seq_ops,
446 			    sizeof(struct mr_vif_iter));
447 }
448 
449 static const struct file_operations ip6mr_vif_fops = {
450 	.open    = ip6mr_vif_open,
451 	.read    = seq_read,
452 	.llseek  = seq_lseek,
453 	.release = seq_release_net,
454 };
455 
456 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
457 {
458 	struct net *net = seq_file_net(seq);
459 	struct mr_table *mrt;
460 
461 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
462 	if (!mrt)
463 		return ERR_PTR(-ENOENT);
464 
465 	return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock);
466 }
467 
468 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
469 {
470 	int n;
471 
472 	if (v == SEQ_START_TOKEN) {
473 		seq_puts(seq,
474 			 "Group                            "
475 			 "Origin                           "
476 			 "Iif      Pkts  Bytes     Wrong  Oifs\n");
477 	} else {
478 		const struct mfc6_cache *mfc = v;
479 		const struct mr_mfc_iter *it = seq->private;
480 		struct mr_table *mrt = it->mrt;
481 
482 		seq_printf(seq, "%pI6 %pI6 %-3hd",
483 			   &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
484 			   mfc->_c.mfc_parent);
485 
486 		if (it->cache != &mrt->mfc_unres_queue) {
487 			seq_printf(seq, " %8lu %8lu %8lu",
488 				   mfc->_c.mfc_un.res.pkt,
489 				   mfc->_c.mfc_un.res.bytes,
490 				   mfc->_c.mfc_un.res.wrong_if);
491 			for (n = mfc->_c.mfc_un.res.minvif;
492 			     n < mfc->_c.mfc_un.res.maxvif; n++) {
493 				if (VIF_EXISTS(mrt, n) &&
494 				    mfc->_c.mfc_un.res.ttls[n] < 255)
495 					seq_printf(seq,
496 						   " %2d:%-3d", n,
497 						   mfc->_c.mfc_un.res.ttls[n]);
498 			}
499 		} else {
500 			/* unresolved mfc_caches don't contain
501 			 * pkt, bytes and wrong_if values
502 			 */
503 			seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
504 		}
505 		seq_putc(seq, '\n');
506 	}
507 	return 0;
508 }
509 
510 static const struct seq_operations ipmr_mfc_seq_ops = {
511 	.start = ipmr_mfc_seq_start,
512 	.next  = mr_mfc_seq_next,
513 	.stop  = mr_mfc_seq_stop,
514 	.show  = ipmr_mfc_seq_show,
515 };
516 
517 static int ipmr_mfc_open(struct inode *inode, struct file *file)
518 {
519 	return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
520 			    sizeof(struct mr_mfc_iter));
521 }
522 
523 static const struct file_operations ip6mr_mfc_fops = {
524 	.open    = ipmr_mfc_open,
525 	.read    = seq_read,
526 	.llseek  = seq_lseek,
527 	.release = seq_release_net,
528 };
529 #endif
530 
531 #ifdef CONFIG_IPV6_PIMSM_V2
532 
533 static int pim6_rcv(struct sk_buff *skb)
534 {
535 	struct pimreghdr *pim;
536 	struct ipv6hdr   *encap;
537 	struct net_device  *reg_dev = NULL;
538 	struct net *net = dev_net(skb->dev);
539 	struct mr_table *mrt;
540 	struct flowi6 fl6 = {
541 		.flowi6_iif	= skb->dev->ifindex,
542 		.flowi6_mark	= skb->mark,
543 	};
544 	int reg_vif_num;
545 
546 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
547 		goto drop;
548 
549 	pim = (struct pimreghdr *)skb_transport_header(skb);
550 	if (pim->type != ((PIM_VERSION << 4) | PIM_TYPE_REGISTER) ||
551 	    (pim->flags & PIM_NULL_REGISTER) ||
552 	    (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
553 			     sizeof(*pim), IPPROTO_PIM,
554 			     csum_partial((void *)pim, sizeof(*pim), 0)) &&
555 	     csum_fold(skb_checksum(skb, 0, skb->len, 0))))
556 		goto drop;
557 
558 	/* check if the inner packet is destined to mcast group */
559 	encap = (struct ipv6hdr *)(skb_transport_header(skb) +
560 				   sizeof(*pim));
561 
562 	if (!ipv6_addr_is_multicast(&encap->daddr) ||
563 	    encap->payload_len == 0 ||
564 	    ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
565 		goto drop;
566 
567 	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
568 		goto drop;
569 	reg_vif_num = mrt->mroute_reg_vif_num;
570 
571 	read_lock(&mrt_lock);
572 	if (reg_vif_num >= 0)
573 		reg_dev = mrt->vif_table[reg_vif_num].dev;
574 	if (reg_dev)
575 		dev_hold(reg_dev);
576 	read_unlock(&mrt_lock);
577 
578 	if (!reg_dev)
579 		goto drop;
580 
581 	skb->mac_header = skb->network_header;
582 	skb_pull(skb, (u8 *)encap - skb->data);
583 	skb_reset_network_header(skb);
584 	skb->protocol = htons(ETH_P_IPV6);
585 	skb->ip_summed = CHECKSUM_NONE;
586 
587 	skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
588 
589 	netif_rx(skb);
590 
591 	dev_put(reg_dev);
592 	return 0;
593  drop:
594 	kfree_skb(skb);
595 	return 0;
596 }
597 
598 static const struct inet6_protocol pim6_protocol = {
599 	.handler	=	pim6_rcv,
600 };
601 
602 /* Service routines creating virtual interfaces: PIMREG */
603 
604 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
605 				      struct net_device *dev)
606 {
607 	struct net *net = dev_net(dev);
608 	struct mr_table *mrt;
609 	struct flowi6 fl6 = {
610 		.flowi6_oif	= dev->ifindex,
611 		.flowi6_iif	= skb->skb_iif ? : LOOPBACK_IFINDEX,
612 		.flowi6_mark	= skb->mark,
613 	};
614 	int err;
615 
616 	err = ip6mr_fib_lookup(net, &fl6, &mrt);
617 	if (err < 0) {
618 		kfree_skb(skb);
619 		return err;
620 	}
621 
622 	read_lock(&mrt_lock);
623 	dev->stats.tx_bytes += skb->len;
624 	dev->stats.tx_packets++;
625 	ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
626 	read_unlock(&mrt_lock);
627 	kfree_skb(skb);
628 	return NETDEV_TX_OK;
629 }
630 
631 static int reg_vif_get_iflink(const struct net_device *dev)
632 {
633 	return 0;
634 }
635 
636 static const struct net_device_ops reg_vif_netdev_ops = {
637 	.ndo_start_xmit	= reg_vif_xmit,
638 	.ndo_get_iflink = reg_vif_get_iflink,
639 };
640 
641 static void reg_vif_setup(struct net_device *dev)
642 {
643 	dev->type		= ARPHRD_PIMREG;
644 	dev->mtu		= 1500 - sizeof(struct ipv6hdr) - 8;
645 	dev->flags		= IFF_NOARP;
646 	dev->netdev_ops		= &reg_vif_netdev_ops;
647 	dev->needs_free_netdev	= true;
648 	dev->features		|= NETIF_F_NETNS_LOCAL;
649 }
650 
651 static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt)
652 {
653 	struct net_device *dev;
654 	char name[IFNAMSIZ];
655 
656 	if (mrt->id == RT6_TABLE_DFLT)
657 		sprintf(name, "pim6reg");
658 	else
659 		sprintf(name, "pim6reg%u", mrt->id);
660 
661 	dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup);
662 	if (!dev)
663 		return NULL;
664 
665 	dev_net_set(dev, net);
666 
667 	if (register_netdevice(dev)) {
668 		free_netdev(dev);
669 		return NULL;
670 	}
671 
672 	if (dev_open(dev))
673 		goto failure;
674 
675 	dev_hold(dev);
676 	return dev;
677 
678 failure:
679 	unregister_netdevice(dev);
680 	return NULL;
681 }
682 #endif
683 
684 static int call_ip6mr_vif_entry_notifiers(struct net *net,
685 					  enum fib_event_type event_type,
686 					  struct vif_device *vif,
687 					  mifi_t vif_index, u32 tb_id)
688 {
689 	return mr_call_vif_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
690 				     vif, vif_index, tb_id,
691 				     &net->ipv6.ipmr_seq);
692 }
693 
694 static int call_ip6mr_mfc_entry_notifiers(struct net *net,
695 					  enum fib_event_type event_type,
696 					  struct mfc6_cache *mfc, u32 tb_id)
697 {
698 	return mr_call_mfc_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
699 				     &mfc->_c, tb_id, &net->ipv6.ipmr_seq);
700 }
701 
702 /* Delete a VIF entry */
703 static int mif6_delete(struct mr_table *mrt, int vifi, int notify,
704 		       struct list_head *head)
705 {
706 	struct vif_device *v;
707 	struct net_device *dev;
708 	struct inet6_dev *in6_dev;
709 
710 	if (vifi < 0 || vifi >= mrt->maxvif)
711 		return -EADDRNOTAVAIL;
712 
713 	v = &mrt->vif_table[vifi];
714 
715 	if (VIF_EXISTS(mrt, vifi))
716 		call_ip6mr_vif_entry_notifiers(read_pnet(&mrt->net),
717 					       FIB_EVENT_VIF_DEL, v, vifi,
718 					       mrt->id);
719 
720 	write_lock_bh(&mrt_lock);
721 	dev = v->dev;
722 	v->dev = NULL;
723 
724 	if (!dev) {
725 		write_unlock_bh(&mrt_lock);
726 		return -EADDRNOTAVAIL;
727 	}
728 
729 #ifdef CONFIG_IPV6_PIMSM_V2
730 	if (vifi == mrt->mroute_reg_vif_num)
731 		mrt->mroute_reg_vif_num = -1;
732 #endif
733 
734 	if (vifi + 1 == mrt->maxvif) {
735 		int tmp;
736 		for (tmp = vifi - 1; tmp >= 0; tmp--) {
737 			if (VIF_EXISTS(mrt, tmp))
738 				break;
739 		}
740 		mrt->maxvif = tmp + 1;
741 	}
742 
743 	write_unlock_bh(&mrt_lock);
744 
745 	dev_set_allmulti(dev, -1);
746 
747 	in6_dev = __in6_dev_get(dev);
748 	if (in6_dev) {
749 		in6_dev->cnf.mc_forwarding--;
750 		inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
751 					     NETCONFA_MC_FORWARDING,
752 					     dev->ifindex, &in6_dev->cnf);
753 	}
754 
755 	if ((v->flags & MIFF_REGISTER) && !notify)
756 		unregister_netdevice_queue(dev, head);
757 
758 	dev_put(dev);
759 	return 0;
760 }
761 
762 static inline void ip6mr_cache_free_rcu(struct rcu_head *head)
763 {
764 	struct mr_mfc *c = container_of(head, struct mr_mfc, rcu);
765 
766 	kmem_cache_free(mrt_cachep, (struct mfc6_cache *)c);
767 }
768 
769 static inline void ip6mr_cache_free(struct mfc6_cache *c)
770 {
771 	call_rcu(&c->_c.rcu, ip6mr_cache_free_rcu);
772 }
773 
774 /* Destroy an unresolved cache entry, killing queued skbs
775    and reporting error to netlink readers.
776  */
777 
778 static void ip6mr_destroy_unres(struct mr_table *mrt, struct mfc6_cache *c)
779 {
780 	struct net *net = read_pnet(&mrt->net);
781 	struct sk_buff *skb;
782 
783 	atomic_dec(&mrt->cache_resolve_queue_len);
784 
785 	while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved)) != NULL) {
786 		if (ipv6_hdr(skb)->version == 0) {
787 			struct nlmsghdr *nlh = skb_pull(skb,
788 							sizeof(struct ipv6hdr));
789 			nlh->nlmsg_type = NLMSG_ERROR;
790 			nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
791 			skb_trim(skb, nlh->nlmsg_len);
792 			((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT;
793 			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
794 		} else
795 			kfree_skb(skb);
796 	}
797 
798 	ip6mr_cache_free(c);
799 }
800 
801 
802 /* Timer process for all the unresolved queue. */
803 
804 static void ipmr_do_expire_process(struct mr_table *mrt)
805 {
806 	unsigned long now = jiffies;
807 	unsigned long expires = 10 * HZ;
808 	struct mr_mfc *c, *next;
809 
810 	list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
811 		if (time_after(c->mfc_un.unres.expires, now)) {
812 			/* not yet... */
813 			unsigned long interval = c->mfc_un.unres.expires - now;
814 			if (interval < expires)
815 				expires = interval;
816 			continue;
817 		}
818 
819 		list_del(&c->list);
820 		mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
821 		ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
822 	}
823 
824 	if (!list_empty(&mrt->mfc_unres_queue))
825 		mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
826 }
827 
828 static void ipmr_expire_process(struct timer_list *t)
829 {
830 	struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
831 
832 	if (!spin_trylock(&mfc_unres_lock)) {
833 		mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
834 		return;
835 	}
836 
837 	if (!list_empty(&mrt->mfc_unres_queue))
838 		ipmr_do_expire_process(mrt);
839 
840 	spin_unlock(&mfc_unres_lock);
841 }
842 
843 /* Fill oifs list. It is called under write locked mrt_lock. */
844 
845 static void ip6mr_update_thresholds(struct mr_table *mrt,
846 				    struct mr_mfc *cache,
847 				    unsigned char *ttls)
848 {
849 	int vifi;
850 
851 	cache->mfc_un.res.minvif = MAXMIFS;
852 	cache->mfc_un.res.maxvif = 0;
853 	memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
854 
855 	for (vifi = 0; vifi < mrt->maxvif; vifi++) {
856 		if (VIF_EXISTS(mrt, vifi) &&
857 		    ttls[vifi] && ttls[vifi] < 255) {
858 			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
859 			if (cache->mfc_un.res.minvif > vifi)
860 				cache->mfc_un.res.minvif = vifi;
861 			if (cache->mfc_un.res.maxvif <= vifi)
862 				cache->mfc_un.res.maxvif = vifi + 1;
863 		}
864 	}
865 	cache->mfc_un.res.lastuse = jiffies;
866 }
867 
868 static int mif6_add(struct net *net, struct mr_table *mrt,
869 		    struct mif6ctl *vifc, int mrtsock)
870 {
871 	int vifi = vifc->mif6c_mifi;
872 	struct vif_device *v = &mrt->vif_table[vifi];
873 	struct net_device *dev;
874 	struct inet6_dev *in6_dev;
875 	int err;
876 
877 	/* Is vif busy ? */
878 	if (VIF_EXISTS(mrt, vifi))
879 		return -EADDRINUSE;
880 
881 	switch (vifc->mif6c_flags) {
882 #ifdef CONFIG_IPV6_PIMSM_V2
883 	case MIFF_REGISTER:
884 		/*
885 		 * Special Purpose VIF in PIM
886 		 * All the packets will be sent to the daemon
887 		 */
888 		if (mrt->mroute_reg_vif_num >= 0)
889 			return -EADDRINUSE;
890 		dev = ip6mr_reg_vif(net, mrt);
891 		if (!dev)
892 			return -ENOBUFS;
893 		err = dev_set_allmulti(dev, 1);
894 		if (err) {
895 			unregister_netdevice(dev);
896 			dev_put(dev);
897 			return err;
898 		}
899 		break;
900 #endif
901 	case 0:
902 		dev = dev_get_by_index(net, vifc->mif6c_pifi);
903 		if (!dev)
904 			return -EADDRNOTAVAIL;
905 		err = dev_set_allmulti(dev, 1);
906 		if (err) {
907 			dev_put(dev);
908 			return err;
909 		}
910 		break;
911 	default:
912 		return -EINVAL;
913 	}
914 
915 	in6_dev = __in6_dev_get(dev);
916 	if (in6_dev) {
917 		in6_dev->cnf.mc_forwarding++;
918 		inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
919 					     NETCONFA_MC_FORWARDING,
920 					     dev->ifindex, &in6_dev->cnf);
921 	}
922 
923 	/* Fill in the VIF structures */
924 	vif_device_init(v, dev, vifc->vifc_rate_limit, vifc->vifc_threshold,
925 			vifc->mif6c_flags | (!mrtsock ? VIFF_STATIC : 0),
926 			MIFF_REGISTER);
927 
928 	/* And finish update writing critical data */
929 	write_lock_bh(&mrt_lock);
930 	v->dev = dev;
931 #ifdef CONFIG_IPV6_PIMSM_V2
932 	if (v->flags & MIFF_REGISTER)
933 		mrt->mroute_reg_vif_num = vifi;
934 #endif
935 	if (vifi + 1 > mrt->maxvif)
936 		mrt->maxvif = vifi + 1;
937 	write_unlock_bh(&mrt_lock);
938 	call_ip6mr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD,
939 				       v, vifi, mrt->id);
940 	return 0;
941 }
942 
943 static struct mfc6_cache *ip6mr_cache_find(struct mr_table *mrt,
944 					   const struct in6_addr *origin,
945 					   const struct in6_addr *mcastgrp)
946 {
947 	struct mfc6_cache_cmp_arg arg = {
948 		.mf6c_origin = *origin,
949 		.mf6c_mcastgrp = *mcastgrp,
950 	};
951 
952 	return mr_mfc_find(mrt, &arg);
953 }
954 
955 /* Look for a (*,G) entry */
956 static struct mfc6_cache *ip6mr_cache_find_any(struct mr_table *mrt,
957 					       struct in6_addr *mcastgrp,
958 					       mifi_t mifi)
959 {
960 	struct mfc6_cache_cmp_arg arg = {
961 		.mf6c_origin = in6addr_any,
962 		.mf6c_mcastgrp = *mcastgrp,
963 	};
964 
965 	if (ipv6_addr_any(mcastgrp))
966 		return mr_mfc_find_any_parent(mrt, mifi);
967 	return mr_mfc_find_any(mrt, mifi, &arg);
968 }
969 
970 /* Look for a (S,G,iif) entry if parent != -1 */
971 static struct mfc6_cache *
972 ip6mr_cache_find_parent(struct mr_table *mrt,
973 			const struct in6_addr *origin,
974 			const struct in6_addr *mcastgrp,
975 			int parent)
976 {
977 	struct mfc6_cache_cmp_arg arg = {
978 		.mf6c_origin = *origin,
979 		.mf6c_mcastgrp = *mcastgrp,
980 	};
981 
982 	return mr_mfc_find_parent(mrt, &arg, parent);
983 }
984 
985 /* Allocate a multicast cache entry */
986 static struct mfc6_cache *ip6mr_cache_alloc(void)
987 {
988 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
989 	if (!c)
990 		return NULL;
991 	c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
992 	c->_c.mfc_un.res.minvif = MAXMIFS;
993 	c->_c.free = ip6mr_cache_free_rcu;
994 	refcount_set(&c->_c.mfc_un.res.refcount, 1);
995 	return c;
996 }
997 
998 static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
999 {
1000 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
1001 	if (!c)
1002 		return NULL;
1003 	skb_queue_head_init(&c->_c.mfc_un.unres.unresolved);
1004 	c->_c.mfc_un.unres.expires = jiffies + 10 * HZ;
1005 	return c;
1006 }
1007 
1008 /*
1009  *	A cache entry has gone into a resolved state from queued
1010  */
1011 
1012 static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt,
1013 				struct mfc6_cache *uc, struct mfc6_cache *c)
1014 {
1015 	struct sk_buff *skb;
1016 
1017 	/*
1018 	 *	Play the pending entries through our router
1019 	 */
1020 
1021 	while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) {
1022 		if (ipv6_hdr(skb)->version == 0) {
1023 			struct nlmsghdr *nlh = skb_pull(skb,
1024 							sizeof(struct ipv6hdr));
1025 
1026 			if (mr_fill_mroute(mrt, skb, &c->_c,
1027 					   nlmsg_data(nlh)) > 0) {
1028 				nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1029 			} else {
1030 				nlh->nlmsg_type = NLMSG_ERROR;
1031 				nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
1032 				skb_trim(skb, nlh->nlmsg_len);
1033 				((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE;
1034 			}
1035 			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
1036 		} else
1037 			ip6_mr_forward(net, mrt, skb, c);
1038 	}
1039 }
1040 
1041 /*
1042  *	Bounce a cache query up to pim6sd and netlink.
1043  *
1044  *	Called under mrt_lock.
1045  */
1046 
1047 static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
1048 			      mifi_t mifi, int assert)
1049 {
1050 	struct sock *mroute6_sk;
1051 	struct sk_buff *skb;
1052 	struct mrt6msg *msg;
1053 	int ret;
1054 
1055 #ifdef CONFIG_IPV6_PIMSM_V2
1056 	if (assert == MRT6MSG_WHOLEPKT)
1057 		skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
1058 						+sizeof(*msg));
1059 	else
1060 #endif
1061 		skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
1062 
1063 	if (!skb)
1064 		return -ENOBUFS;
1065 
1066 	/* I suppose that internal messages
1067 	 * do not require checksums */
1068 
1069 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1070 
1071 #ifdef CONFIG_IPV6_PIMSM_V2
1072 	if (assert == MRT6MSG_WHOLEPKT) {
1073 		/* Ugly, but we have no choice with this interface.
1074 		   Duplicate old header, fix length etc.
1075 		   And all this only to mangle msg->im6_msgtype and
1076 		   to set msg->im6_mbz to "mbz" :-)
1077 		 */
1078 		skb_push(skb, -skb_network_offset(pkt));
1079 
1080 		skb_push(skb, sizeof(*msg));
1081 		skb_reset_transport_header(skb);
1082 		msg = (struct mrt6msg *)skb_transport_header(skb);
1083 		msg->im6_mbz = 0;
1084 		msg->im6_msgtype = MRT6MSG_WHOLEPKT;
1085 		msg->im6_mif = mrt->mroute_reg_vif_num;
1086 		msg->im6_pad = 0;
1087 		msg->im6_src = ipv6_hdr(pkt)->saddr;
1088 		msg->im6_dst = ipv6_hdr(pkt)->daddr;
1089 
1090 		skb->ip_summed = CHECKSUM_UNNECESSARY;
1091 	} else
1092 #endif
1093 	{
1094 	/*
1095 	 *	Copy the IP header
1096 	 */
1097 
1098 	skb_put(skb, sizeof(struct ipv6hdr));
1099 	skb_reset_network_header(skb);
1100 	skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
1101 
1102 	/*
1103 	 *	Add our header
1104 	 */
1105 	skb_put(skb, sizeof(*msg));
1106 	skb_reset_transport_header(skb);
1107 	msg = (struct mrt6msg *)skb_transport_header(skb);
1108 
1109 	msg->im6_mbz = 0;
1110 	msg->im6_msgtype = assert;
1111 	msg->im6_mif = mifi;
1112 	msg->im6_pad = 0;
1113 	msg->im6_src = ipv6_hdr(pkt)->saddr;
1114 	msg->im6_dst = ipv6_hdr(pkt)->daddr;
1115 
1116 	skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1117 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1118 	}
1119 
1120 	rcu_read_lock();
1121 	mroute6_sk = rcu_dereference(mrt->mroute_sk);
1122 	if (!mroute6_sk) {
1123 		rcu_read_unlock();
1124 		kfree_skb(skb);
1125 		return -EINVAL;
1126 	}
1127 
1128 	mrt6msg_netlink_event(mrt, skb);
1129 
1130 	/* Deliver to user space multicast routing algorithms */
1131 	ret = sock_queue_rcv_skb(mroute6_sk, skb);
1132 	rcu_read_unlock();
1133 	if (ret < 0) {
1134 		net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
1135 		kfree_skb(skb);
1136 	}
1137 
1138 	return ret;
1139 }
1140 
1141 /* Queue a packet for resolution. It gets locked cache entry! */
1142 static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi,
1143 				  struct sk_buff *skb)
1144 {
1145 	struct mfc6_cache *c;
1146 	bool found = false;
1147 	int err;
1148 
1149 	spin_lock_bh(&mfc_unres_lock);
1150 	list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) {
1151 		if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1152 		    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
1153 			found = true;
1154 			break;
1155 		}
1156 	}
1157 
1158 	if (!found) {
1159 		/*
1160 		 *	Create a new entry if allowable
1161 		 */
1162 
1163 		if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
1164 		    (c = ip6mr_cache_alloc_unres()) == NULL) {
1165 			spin_unlock_bh(&mfc_unres_lock);
1166 
1167 			kfree_skb(skb);
1168 			return -ENOBUFS;
1169 		}
1170 
1171 		/* Fill in the new cache entry */
1172 		c->_c.mfc_parent = -1;
1173 		c->mf6c_origin = ipv6_hdr(skb)->saddr;
1174 		c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
1175 
1176 		/*
1177 		 *	Reflect first query at pim6sd
1178 		 */
1179 		err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1180 		if (err < 0) {
1181 			/* If the report failed throw the cache entry
1182 			   out - Brad Parker
1183 			 */
1184 			spin_unlock_bh(&mfc_unres_lock);
1185 
1186 			ip6mr_cache_free(c);
1187 			kfree_skb(skb);
1188 			return err;
1189 		}
1190 
1191 		atomic_inc(&mrt->cache_resolve_queue_len);
1192 		list_add(&c->_c.list, &mrt->mfc_unres_queue);
1193 		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1194 
1195 		ipmr_do_expire_process(mrt);
1196 	}
1197 
1198 	/* See if we can append the packet */
1199 	if (c->_c.mfc_un.unres.unresolved.qlen > 3) {
1200 		kfree_skb(skb);
1201 		err = -ENOBUFS;
1202 	} else {
1203 		skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb);
1204 		err = 0;
1205 	}
1206 
1207 	spin_unlock_bh(&mfc_unres_lock);
1208 	return err;
1209 }
1210 
1211 /*
1212  *	MFC6 cache manipulation by user space
1213  */
1214 
1215 static int ip6mr_mfc_delete(struct mr_table *mrt, struct mf6cctl *mfc,
1216 			    int parent)
1217 {
1218 	struct mfc6_cache *c;
1219 
1220 	/* The entries are added/deleted only under RTNL */
1221 	rcu_read_lock();
1222 	c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
1223 				    &mfc->mf6cc_mcastgrp.sin6_addr, parent);
1224 	rcu_read_unlock();
1225 	if (!c)
1226 		return -ENOENT;
1227 	rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ip6mr_rht_params);
1228 	list_del_rcu(&c->_c.list);
1229 
1230 	call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
1231 				       FIB_EVENT_ENTRY_DEL, c, mrt->id);
1232 	mr6_netlink_event(mrt, c, RTM_DELROUTE);
1233 	mr_cache_put(&c->_c);
1234 	return 0;
1235 }
1236 
1237 static int ip6mr_device_event(struct notifier_block *this,
1238 			      unsigned long event, void *ptr)
1239 {
1240 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1241 	struct net *net = dev_net(dev);
1242 	struct mr_table *mrt;
1243 	struct vif_device *v;
1244 	int ct;
1245 
1246 	if (event != NETDEV_UNREGISTER)
1247 		return NOTIFY_DONE;
1248 
1249 	ip6mr_for_each_table(mrt, net) {
1250 		v = &mrt->vif_table[0];
1251 		for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1252 			if (v->dev == dev)
1253 				mif6_delete(mrt, ct, 1, NULL);
1254 		}
1255 	}
1256 
1257 	return NOTIFY_DONE;
1258 }
1259 
1260 static unsigned int ip6mr_seq_read(struct net *net)
1261 {
1262 	ASSERT_RTNL();
1263 
1264 	return net->ipv6.ipmr_seq + ip6mr_rules_seq_read(net);
1265 }
1266 
1267 static int ip6mr_dump(struct net *net, struct notifier_block *nb)
1268 {
1269 	return mr_dump(net, nb, RTNL_FAMILY_IP6MR, ip6mr_rules_dump,
1270 		       ip6mr_mr_table_iter, &mrt_lock);
1271 }
1272 
1273 static struct notifier_block ip6_mr_notifier = {
1274 	.notifier_call = ip6mr_device_event
1275 };
1276 
1277 static const struct fib_notifier_ops ip6mr_notifier_ops_template = {
1278 	.family		= RTNL_FAMILY_IP6MR,
1279 	.fib_seq_read	= ip6mr_seq_read,
1280 	.fib_dump	= ip6mr_dump,
1281 	.owner		= THIS_MODULE,
1282 };
1283 
1284 static int __net_init ip6mr_notifier_init(struct net *net)
1285 {
1286 	struct fib_notifier_ops *ops;
1287 
1288 	net->ipv6.ipmr_seq = 0;
1289 
1290 	ops = fib_notifier_ops_register(&ip6mr_notifier_ops_template, net);
1291 	if (IS_ERR(ops))
1292 		return PTR_ERR(ops);
1293 
1294 	net->ipv6.ip6mr_notifier_ops = ops;
1295 
1296 	return 0;
1297 }
1298 
1299 static void __net_exit ip6mr_notifier_exit(struct net *net)
1300 {
1301 	fib_notifier_ops_unregister(net->ipv6.ip6mr_notifier_ops);
1302 	net->ipv6.ip6mr_notifier_ops = NULL;
1303 }
1304 
1305 /* Setup for IP multicast routing */
1306 static int __net_init ip6mr_net_init(struct net *net)
1307 {
1308 	int err;
1309 
1310 	err = ip6mr_notifier_init(net);
1311 	if (err)
1312 		return err;
1313 
1314 	err = ip6mr_rules_init(net);
1315 	if (err < 0)
1316 		goto ip6mr_rules_fail;
1317 
1318 #ifdef CONFIG_PROC_FS
1319 	err = -ENOMEM;
1320 	if (!proc_create("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_fops))
1321 		goto proc_vif_fail;
1322 	if (!proc_create("ip6_mr_cache", 0, net->proc_net, &ip6mr_mfc_fops))
1323 		goto proc_cache_fail;
1324 #endif
1325 
1326 	return 0;
1327 
1328 #ifdef CONFIG_PROC_FS
1329 proc_cache_fail:
1330 	remove_proc_entry("ip6_mr_vif", net->proc_net);
1331 proc_vif_fail:
1332 	ip6mr_rules_exit(net);
1333 #endif
1334 ip6mr_rules_fail:
1335 	ip6mr_notifier_exit(net);
1336 	return err;
1337 }
1338 
1339 static void __net_exit ip6mr_net_exit(struct net *net)
1340 {
1341 #ifdef CONFIG_PROC_FS
1342 	remove_proc_entry("ip6_mr_cache", net->proc_net);
1343 	remove_proc_entry("ip6_mr_vif", net->proc_net);
1344 #endif
1345 	ip6mr_rules_exit(net);
1346 	ip6mr_notifier_exit(net);
1347 }
1348 
1349 static struct pernet_operations ip6mr_net_ops = {
1350 	.init = ip6mr_net_init,
1351 	.exit = ip6mr_net_exit,
1352 };
1353 
1354 int __init ip6_mr_init(void)
1355 {
1356 	int err;
1357 
1358 	mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1359 				       sizeof(struct mfc6_cache),
1360 				       0, SLAB_HWCACHE_ALIGN,
1361 				       NULL);
1362 	if (!mrt_cachep)
1363 		return -ENOMEM;
1364 
1365 	err = register_pernet_subsys(&ip6mr_net_ops);
1366 	if (err)
1367 		goto reg_pernet_fail;
1368 
1369 	err = register_netdevice_notifier(&ip6_mr_notifier);
1370 	if (err)
1371 		goto reg_notif_fail;
1372 #ifdef CONFIG_IPV6_PIMSM_V2
1373 	if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1374 		pr_err("%s: can't add PIM protocol\n", __func__);
1375 		err = -EAGAIN;
1376 		goto add_proto_fail;
1377 	}
1378 #endif
1379 	err = rtnl_register_module(THIS_MODULE, RTNL_FAMILY_IP6MR, RTM_GETROUTE,
1380 				   NULL, ip6mr_rtm_dumproute, 0);
1381 	if (err == 0)
1382 		return 0;
1383 
1384 #ifdef CONFIG_IPV6_PIMSM_V2
1385 	inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1386 add_proto_fail:
1387 	unregister_netdevice_notifier(&ip6_mr_notifier);
1388 #endif
1389 reg_notif_fail:
1390 	unregister_pernet_subsys(&ip6mr_net_ops);
1391 reg_pernet_fail:
1392 	kmem_cache_destroy(mrt_cachep);
1393 	return err;
1394 }
1395 
1396 void ip6_mr_cleanup(void)
1397 {
1398 	rtnl_unregister(RTNL_FAMILY_IP6MR, RTM_GETROUTE);
1399 #ifdef CONFIG_IPV6_PIMSM_V2
1400 	inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1401 #endif
1402 	unregister_netdevice_notifier(&ip6_mr_notifier);
1403 	unregister_pernet_subsys(&ip6mr_net_ops);
1404 	kmem_cache_destroy(mrt_cachep);
1405 }
1406 
1407 static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt,
1408 			 struct mf6cctl *mfc, int mrtsock, int parent)
1409 {
1410 	unsigned char ttls[MAXMIFS];
1411 	struct mfc6_cache *uc, *c;
1412 	struct mr_mfc *_uc;
1413 	bool found;
1414 	int i, err;
1415 
1416 	if (mfc->mf6cc_parent >= MAXMIFS)
1417 		return -ENFILE;
1418 
1419 	memset(ttls, 255, MAXMIFS);
1420 	for (i = 0; i < MAXMIFS; i++) {
1421 		if (IF_ISSET(i, &mfc->mf6cc_ifset))
1422 			ttls[i] = 1;
1423 	}
1424 
1425 	/* The entries are added/deleted only under RTNL */
1426 	rcu_read_lock();
1427 	c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
1428 				    &mfc->mf6cc_mcastgrp.sin6_addr, parent);
1429 	rcu_read_unlock();
1430 	if (c) {
1431 		write_lock_bh(&mrt_lock);
1432 		c->_c.mfc_parent = mfc->mf6cc_parent;
1433 		ip6mr_update_thresholds(mrt, &c->_c, ttls);
1434 		if (!mrtsock)
1435 			c->_c.mfc_flags |= MFC_STATIC;
1436 		write_unlock_bh(&mrt_lock);
1437 		call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE,
1438 					       c, mrt->id);
1439 		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1440 		return 0;
1441 	}
1442 
1443 	if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) &&
1444 	    !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1445 		return -EINVAL;
1446 
1447 	c = ip6mr_cache_alloc();
1448 	if (!c)
1449 		return -ENOMEM;
1450 
1451 	c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1452 	c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1453 	c->_c.mfc_parent = mfc->mf6cc_parent;
1454 	ip6mr_update_thresholds(mrt, &c->_c, ttls);
1455 	if (!mrtsock)
1456 		c->_c.mfc_flags |= MFC_STATIC;
1457 
1458 	err = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode,
1459 				  ip6mr_rht_params);
1460 	if (err) {
1461 		pr_err("ip6mr: rhtable insert error %d\n", err);
1462 		ip6mr_cache_free(c);
1463 		return err;
1464 	}
1465 	list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list);
1466 
1467 	/* Check to see if we resolved a queued list. If so we
1468 	 * need to send on the frames and tidy up.
1469 	 */
1470 	found = false;
1471 	spin_lock_bh(&mfc_unres_lock);
1472 	list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) {
1473 		uc = (struct mfc6_cache *)_uc;
1474 		if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1475 		    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1476 			list_del(&_uc->list);
1477 			atomic_dec(&mrt->cache_resolve_queue_len);
1478 			found = true;
1479 			break;
1480 		}
1481 	}
1482 	if (list_empty(&mrt->mfc_unres_queue))
1483 		del_timer(&mrt->ipmr_expire_timer);
1484 	spin_unlock_bh(&mfc_unres_lock);
1485 
1486 	if (found) {
1487 		ip6mr_cache_resolve(net, mrt, uc, c);
1488 		ip6mr_cache_free(uc);
1489 	}
1490 	call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD,
1491 				       c, mrt->id);
1492 	mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1493 	return 0;
1494 }
1495 
1496 /*
1497  *	Close the multicast socket, and clear the vif tables etc
1498  */
1499 
1500 static void mroute_clean_tables(struct mr_table *mrt, bool all)
1501 {
1502 	struct mr_mfc *c, *tmp;
1503 	LIST_HEAD(list);
1504 	int i;
1505 
1506 	/* Shut down all active vif entries */
1507 	for (i = 0; i < mrt->maxvif; i++) {
1508 		if (!all && (mrt->vif_table[i].flags & VIFF_STATIC))
1509 			continue;
1510 		mif6_delete(mrt, i, 0, &list);
1511 	}
1512 	unregister_netdevice_many(&list);
1513 
1514 	/* Wipe the cache */
1515 	list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
1516 		if (!all && (c->mfc_flags & MFC_STATIC))
1517 			continue;
1518 		rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params);
1519 		list_del_rcu(&c->list);
1520 		mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
1521 		mr_cache_put(c);
1522 	}
1523 
1524 	if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1525 		spin_lock_bh(&mfc_unres_lock);
1526 		list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
1527 			list_del(&c->list);
1528 			call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
1529 						       FIB_EVENT_ENTRY_DEL,
1530 						       (struct mfc6_cache *)c,
1531 						       mrt->id);
1532 			mr6_netlink_event(mrt, (struct mfc6_cache *)c,
1533 					  RTM_DELROUTE);
1534 			ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
1535 		}
1536 		spin_unlock_bh(&mfc_unres_lock);
1537 	}
1538 }
1539 
1540 static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk)
1541 {
1542 	int err = 0;
1543 	struct net *net = sock_net(sk);
1544 
1545 	rtnl_lock();
1546 	write_lock_bh(&mrt_lock);
1547 	if (rtnl_dereference(mrt->mroute_sk)) {
1548 		err = -EADDRINUSE;
1549 	} else {
1550 		rcu_assign_pointer(mrt->mroute_sk, sk);
1551 		sock_set_flag(sk, SOCK_RCU_FREE);
1552 		net->ipv6.devconf_all->mc_forwarding++;
1553 	}
1554 	write_unlock_bh(&mrt_lock);
1555 
1556 	if (!err)
1557 		inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
1558 					     NETCONFA_MC_FORWARDING,
1559 					     NETCONFA_IFINDEX_ALL,
1560 					     net->ipv6.devconf_all);
1561 	rtnl_unlock();
1562 
1563 	return err;
1564 }
1565 
1566 int ip6mr_sk_done(struct sock *sk)
1567 {
1568 	int err = -EACCES;
1569 	struct net *net = sock_net(sk);
1570 	struct mr_table *mrt;
1571 
1572 	if (sk->sk_type != SOCK_RAW ||
1573 	    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1574 		return err;
1575 
1576 	rtnl_lock();
1577 	ip6mr_for_each_table(mrt, net) {
1578 		if (sk == rtnl_dereference(mrt->mroute_sk)) {
1579 			write_lock_bh(&mrt_lock);
1580 			RCU_INIT_POINTER(mrt->mroute_sk, NULL);
1581 			/* Note that mroute_sk had SOCK_RCU_FREE set,
1582 			 * so the RCU grace period before sk freeing
1583 			 * is guaranteed by sk_destruct()
1584 			 */
1585 			net->ipv6.devconf_all->mc_forwarding--;
1586 			write_unlock_bh(&mrt_lock);
1587 			inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
1588 						     NETCONFA_MC_FORWARDING,
1589 						     NETCONFA_IFINDEX_ALL,
1590 						     net->ipv6.devconf_all);
1591 
1592 			mroute_clean_tables(mrt, false);
1593 			err = 0;
1594 			break;
1595 		}
1596 	}
1597 	rtnl_unlock();
1598 
1599 	return err;
1600 }
1601 
1602 bool mroute6_is_socket(struct net *net, struct sk_buff *skb)
1603 {
1604 	struct mr_table *mrt;
1605 	struct flowi6 fl6 = {
1606 		.flowi6_iif	= skb->skb_iif ? : LOOPBACK_IFINDEX,
1607 		.flowi6_oif	= skb->dev->ifindex,
1608 		.flowi6_mark	= skb->mark,
1609 	};
1610 
1611 	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
1612 		return NULL;
1613 
1614 	return rcu_access_pointer(mrt->mroute_sk);
1615 }
1616 EXPORT_SYMBOL(mroute6_is_socket);
1617 
1618 /*
1619  *	Socket options and virtual interface manipulation. The whole
1620  *	virtual interface system is a complete heap, but unfortunately
1621  *	that's how BSD mrouted happens to think. Maybe one day with a proper
1622  *	MOSPF/PIM router set up we can clean this up.
1623  */
1624 
1625 int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1626 {
1627 	int ret, parent = 0;
1628 	struct mif6ctl vif;
1629 	struct mf6cctl mfc;
1630 	mifi_t mifi;
1631 	struct net *net = sock_net(sk);
1632 	struct mr_table *mrt;
1633 
1634 	if (sk->sk_type != SOCK_RAW ||
1635 	    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1636 		return -EOPNOTSUPP;
1637 
1638 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1639 	if (!mrt)
1640 		return -ENOENT;
1641 
1642 	if (optname != MRT6_INIT) {
1643 		if (sk != rcu_access_pointer(mrt->mroute_sk) &&
1644 		    !ns_capable(net->user_ns, CAP_NET_ADMIN))
1645 			return -EACCES;
1646 	}
1647 
1648 	switch (optname) {
1649 	case MRT6_INIT:
1650 		if (optlen < sizeof(int))
1651 			return -EINVAL;
1652 
1653 		return ip6mr_sk_init(mrt, sk);
1654 
1655 	case MRT6_DONE:
1656 		return ip6mr_sk_done(sk);
1657 
1658 	case MRT6_ADD_MIF:
1659 		if (optlen < sizeof(vif))
1660 			return -EINVAL;
1661 		if (copy_from_user(&vif, optval, sizeof(vif)))
1662 			return -EFAULT;
1663 		if (vif.mif6c_mifi >= MAXMIFS)
1664 			return -ENFILE;
1665 		rtnl_lock();
1666 		ret = mif6_add(net, mrt, &vif,
1667 			       sk == rtnl_dereference(mrt->mroute_sk));
1668 		rtnl_unlock();
1669 		return ret;
1670 
1671 	case MRT6_DEL_MIF:
1672 		if (optlen < sizeof(mifi_t))
1673 			return -EINVAL;
1674 		if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1675 			return -EFAULT;
1676 		rtnl_lock();
1677 		ret = mif6_delete(mrt, mifi, 0, NULL);
1678 		rtnl_unlock();
1679 		return ret;
1680 
1681 	/*
1682 	 *	Manipulate the forwarding caches. These live
1683 	 *	in a sort of kernel/user symbiosis.
1684 	 */
1685 	case MRT6_ADD_MFC:
1686 	case MRT6_DEL_MFC:
1687 		parent = -1;
1688 		/* fall through */
1689 	case MRT6_ADD_MFC_PROXY:
1690 	case MRT6_DEL_MFC_PROXY:
1691 		if (optlen < sizeof(mfc))
1692 			return -EINVAL;
1693 		if (copy_from_user(&mfc, optval, sizeof(mfc)))
1694 			return -EFAULT;
1695 		if (parent == 0)
1696 			parent = mfc.mf6cc_parent;
1697 		rtnl_lock();
1698 		if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY)
1699 			ret = ip6mr_mfc_delete(mrt, &mfc, parent);
1700 		else
1701 			ret = ip6mr_mfc_add(net, mrt, &mfc,
1702 					    sk ==
1703 					    rtnl_dereference(mrt->mroute_sk),
1704 					    parent);
1705 		rtnl_unlock();
1706 		return ret;
1707 
1708 	/*
1709 	 *	Control PIM assert (to activate pim will activate assert)
1710 	 */
1711 	case MRT6_ASSERT:
1712 	{
1713 		int v;
1714 
1715 		if (optlen != sizeof(v))
1716 			return -EINVAL;
1717 		if (get_user(v, (int __user *)optval))
1718 			return -EFAULT;
1719 		mrt->mroute_do_assert = v;
1720 		return 0;
1721 	}
1722 
1723 #ifdef CONFIG_IPV6_PIMSM_V2
1724 	case MRT6_PIM:
1725 	{
1726 		int v;
1727 
1728 		if (optlen != sizeof(v))
1729 			return -EINVAL;
1730 		if (get_user(v, (int __user *)optval))
1731 			return -EFAULT;
1732 		v = !!v;
1733 		rtnl_lock();
1734 		ret = 0;
1735 		if (v != mrt->mroute_do_pim) {
1736 			mrt->mroute_do_pim = v;
1737 			mrt->mroute_do_assert = v;
1738 		}
1739 		rtnl_unlock();
1740 		return ret;
1741 	}
1742 
1743 #endif
1744 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1745 	case MRT6_TABLE:
1746 	{
1747 		u32 v;
1748 
1749 		if (optlen != sizeof(u32))
1750 			return -EINVAL;
1751 		if (get_user(v, (u32 __user *)optval))
1752 			return -EFAULT;
1753 		/* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
1754 		if (v != RT_TABLE_DEFAULT && v >= 100000000)
1755 			return -EINVAL;
1756 		if (sk == rcu_access_pointer(mrt->mroute_sk))
1757 			return -EBUSY;
1758 
1759 		rtnl_lock();
1760 		ret = 0;
1761 		if (!ip6mr_new_table(net, v))
1762 			ret = -ENOMEM;
1763 		raw6_sk(sk)->ip6mr_table = v;
1764 		rtnl_unlock();
1765 		return ret;
1766 	}
1767 #endif
1768 	/*
1769 	 *	Spurious command, or MRT6_VERSION which you cannot
1770 	 *	set.
1771 	 */
1772 	default:
1773 		return -ENOPROTOOPT;
1774 	}
1775 }
1776 
1777 /*
1778  *	Getsock opt support for the multicast routing system.
1779  */
1780 
1781 int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1782 			  int __user *optlen)
1783 {
1784 	int olr;
1785 	int val;
1786 	struct net *net = sock_net(sk);
1787 	struct mr_table *mrt;
1788 
1789 	if (sk->sk_type != SOCK_RAW ||
1790 	    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1791 		return -EOPNOTSUPP;
1792 
1793 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1794 	if (!mrt)
1795 		return -ENOENT;
1796 
1797 	switch (optname) {
1798 	case MRT6_VERSION:
1799 		val = 0x0305;
1800 		break;
1801 #ifdef CONFIG_IPV6_PIMSM_V2
1802 	case MRT6_PIM:
1803 		val = mrt->mroute_do_pim;
1804 		break;
1805 #endif
1806 	case MRT6_ASSERT:
1807 		val = mrt->mroute_do_assert;
1808 		break;
1809 	default:
1810 		return -ENOPROTOOPT;
1811 	}
1812 
1813 	if (get_user(olr, optlen))
1814 		return -EFAULT;
1815 
1816 	olr = min_t(int, olr, sizeof(int));
1817 	if (olr < 0)
1818 		return -EINVAL;
1819 
1820 	if (put_user(olr, optlen))
1821 		return -EFAULT;
1822 	if (copy_to_user(optval, &val, olr))
1823 		return -EFAULT;
1824 	return 0;
1825 }
1826 
1827 /*
1828  *	The IP multicast ioctl support routines.
1829  */
1830 
1831 int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1832 {
1833 	struct sioc_sg_req6 sr;
1834 	struct sioc_mif_req6 vr;
1835 	struct vif_device *vif;
1836 	struct mfc6_cache *c;
1837 	struct net *net = sock_net(sk);
1838 	struct mr_table *mrt;
1839 
1840 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1841 	if (!mrt)
1842 		return -ENOENT;
1843 
1844 	switch (cmd) {
1845 	case SIOCGETMIFCNT_IN6:
1846 		if (copy_from_user(&vr, arg, sizeof(vr)))
1847 			return -EFAULT;
1848 		if (vr.mifi >= mrt->maxvif)
1849 			return -EINVAL;
1850 		read_lock(&mrt_lock);
1851 		vif = &mrt->vif_table[vr.mifi];
1852 		if (VIF_EXISTS(mrt, vr.mifi)) {
1853 			vr.icount = vif->pkt_in;
1854 			vr.ocount = vif->pkt_out;
1855 			vr.ibytes = vif->bytes_in;
1856 			vr.obytes = vif->bytes_out;
1857 			read_unlock(&mrt_lock);
1858 
1859 			if (copy_to_user(arg, &vr, sizeof(vr)))
1860 				return -EFAULT;
1861 			return 0;
1862 		}
1863 		read_unlock(&mrt_lock);
1864 		return -EADDRNOTAVAIL;
1865 	case SIOCGETSGCNT_IN6:
1866 		if (copy_from_user(&sr, arg, sizeof(sr)))
1867 			return -EFAULT;
1868 
1869 		rcu_read_lock();
1870 		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1871 		if (c) {
1872 			sr.pktcnt = c->_c.mfc_un.res.pkt;
1873 			sr.bytecnt = c->_c.mfc_un.res.bytes;
1874 			sr.wrong_if = c->_c.mfc_un.res.wrong_if;
1875 			rcu_read_unlock();
1876 
1877 			if (copy_to_user(arg, &sr, sizeof(sr)))
1878 				return -EFAULT;
1879 			return 0;
1880 		}
1881 		rcu_read_unlock();
1882 		return -EADDRNOTAVAIL;
1883 	default:
1884 		return -ENOIOCTLCMD;
1885 	}
1886 }
1887 
1888 #ifdef CONFIG_COMPAT
1889 struct compat_sioc_sg_req6 {
1890 	struct sockaddr_in6 src;
1891 	struct sockaddr_in6 grp;
1892 	compat_ulong_t pktcnt;
1893 	compat_ulong_t bytecnt;
1894 	compat_ulong_t wrong_if;
1895 };
1896 
1897 struct compat_sioc_mif_req6 {
1898 	mifi_t	mifi;
1899 	compat_ulong_t icount;
1900 	compat_ulong_t ocount;
1901 	compat_ulong_t ibytes;
1902 	compat_ulong_t obytes;
1903 };
1904 
1905 int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1906 {
1907 	struct compat_sioc_sg_req6 sr;
1908 	struct compat_sioc_mif_req6 vr;
1909 	struct vif_device *vif;
1910 	struct mfc6_cache *c;
1911 	struct net *net = sock_net(sk);
1912 	struct mr_table *mrt;
1913 
1914 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1915 	if (!mrt)
1916 		return -ENOENT;
1917 
1918 	switch (cmd) {
1919 	case SIOCGETMIFCNT_IN6:
1920 		if (copy_from_user(&vr, arg, sizeof(vr)))
1921 			return -EFAULT;
1922 		if (vr.mifi >= mrt->maxvif)
1923 			return -EINVAL;
1924 		read_lock(&mrt_lock);
1925 		vif = &mrt->vif_table[vr.mifi];
1926 		if (VIF_EXISTS(mrt, vr.mifi)) {
1927 			vr.icount = vif->pkt_in;
1928 			vr.ocount = vif->pkt_out;
1929 			vr.ibytes = vif->bytes_in;
1930 			vr.obytes = vif->bytes_out;
1931 			read_unlock(&mrt_lock);
1932 
1933 			if (copy_to_user(arg, &vr, sizeof(vr)))
1934 				return -EFAULT;
1935 			return 0;
1936 		}
1937 		read_unlock(&mrt_lock);
1938 		return -EADDRNOTAVAIL;
1939 	case SIOCGETSGCNT_IN6:
1940 		if (copy_from_user(&sr, arg, sizeof(sr)))
1941 			return -EFAULT;
1942 
1943 		rcu_read_lock();
1944 		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1945 		if (c) {
1946 			sr.pktcnt = c->_c.mfc_un.res.pkt;
1947 			sr.bytecnt = c->_c.mfc_un.res.bytes;
1948 			sr.wrong_if = c->_c.mfc_un.res.wrong_if;
1949 			rcu_read_unlock();
1950 
1951 			if (copy_to_user(arg, &sr, sizeof(sr)))
1952 				return -EFAULT;
1953 			return 0;
1954 		}
1955 		rcu_read_unlock();
1956 		return -EADDRNOTAVAIL;
1957 	default:
1958 		return -ENOIOCTLCMD;
1959 	}
1960 }
1961 #endif
1962 
1963 static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
1964 {
1965 	__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
1966 			IPSTATS_MIB_OUTFORWDATAGRAMS);
1967 	__IP6_ADD_STATS(net, ip6_dst_idev(skb_dst(skb)),
1968 			IPSTATS_MIB_OUTOCTETS, skb->len);
1969 	return dst_output(net, sk, skb);
1970 }
1971 
1972 /*
1973  *	Processing handlers for ip6mr_forward
1974  */
1975 
1976 static int ip6mr_forward2(struct net *net, struct mr_table *mrt,
1977 			  struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1978 {
1979 	struct ipv6hdr *ipv6h;
1980 	struct vif_device *vif = &mrt->vif_table[vifi];
1981 	struct net_device *dev;
1982 	struct dst_entry *dst;
1983 	struct flowi6 fl6;
1984 
1985 	if (!vif->dev)
1986 		goto out_free;
1987 
1988 #ifdef CONFIG_IPV6_PIMSM_V2
1989 	if (vif->flags & MIFF_REGISTER) {
1990 		vif->pkt_out++;
1991 		vif->bytes_out += skb->len;
1992 		vif->dev->stats.tx_bytes += skb->len;
1993 		vif->dev->stats.tx_packets++;
1994 		ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
1995 		goto out_free;
1996 	}
1997 #endif
1998 
1999 	ipv6h = ipv6_hdr(skb);
2000 
2001 	fl6 = (struct flowi6) {
2002 		.flowi6_oif = vif->link,
2003 		.daddr = ipv6h->daddr,
2004 	};
2005 
2006 	dst = ip6_route_output(net, NULL, &fl6);
2007 	if (dst->error) {
2008 		dst_release(dst);
2009 		goto out_free;
2010 	}
2011 
2012 	skb_dst_drop(skb);
2013 	skb_dst_set(skb, dst);
2014 
2015 	/*
2016 	 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
2017 	 * not only before forwarding, but after forwarding on all output
2018 	 * interfaces. It is clear, if mrouter runs a multicasting
2019 	 * program, it should receive packets not depending to what interface
2020 	 * program is joined.
2021 	 * If we will not make it, the program will have to join on all
2022 	 * interfaces. On the other hand, multihoming host (or router, but
2023 	 * not mrouter) cannot join to more than one interface - it will
2024 	 * result in receiving multiple packets.
2025 	 */
2026 	dev = vif->dev;
2027 	skb->dev = dev;
2028 	vif->pkt_out++;
2029 	vif->bytes_out += skb->len;
2030 
2031 	/* We are about to write */
2032 	/* XXX: extension headers? */
2033 	if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
2034 		goto out_free;
2035 
2036 	ipv6h = ipv6_hdr(skb);
2037 	ipv6h->hop_limit--;
2038 
2039 	IP6CB(skb)->flags |= IP6SKB_FORWARDED;
2040 
2041 	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
2042 		       net, NULL, skb, skb->dev, dev,
2043 		       ip6mr_forward2_finish);
2044 
2045 out_free:
2046 	kfree_skb(skb);
2047 	return 0;
2048 }
2049 
2050 static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev)
2051 {
2052 	int ct;
2053 
2054 	for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
2055 		if (mrt->vif_table[ct].dev == dev)
2056 			break;
2057 	}
2058 	return ct;
2059 }
2060 
2061 static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
2062 			   struct sk_buff *skb, struct mfc6_cache *c)
2063 {
2064 	int psend = -1;
2065 	int vif, ct;
2066 	int true_vifi = ip6mr_find_vif(mrt, skb->dev);
2067 
2068 	vif = c->_c.mfc_parent;
2069 	c->_c.mfc_un.res.pkt++;
2070 	c->_c.mfc_un.res.bytes += skb->len;
2071 	c->_c.mfc_un.res.lastuse = jiffies;
2072 
2073 	if (ipv6_addr_any(&c->mf6c_origin) && true_vifi >= 0) {
2074 		struct mfc6_cache *cache_proxy;
2075 
2076 		/* For an (*,G) entry, we only check that the incoming
2077 		 * interface is part of the static tree.
2078 		 */
2079 		rcu_read_lock();
2080 		cache_proxy = mr_mfc_find_any_parent(mrt, vif);
2081 		if (cache_proxy &&
2082 		    cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255) {
2083 			rcu_read_unlock();
2084 			goto forward;
2085 		}
2086 		rcu_read_unlock();
2087 	}
2088 
2089 	/*
2090 	 * Wrong interface: drop packet and (maybe) send PIM assert.
2091 	 */
2092 	if (mrt->vif_table[vif].dev != skb->dev) {
2093 		c->_c.mfc_un.res.wrong_if++;
2094 
2095 		if (true_vifi >= 0 && mrt->mroute_do_assert &&
2096 		    /* pimsm uses asserts, when switching from RPT to SPT,
2097 		       so that we cannot check that packet arrived on an oif.
2098 		       It is bad, but otherwise we would need to move pretty
2099 		       large chunk of pimd to kernel. Ough... --ANK
2100 		     */
2101 		    (mrt->mroute_do_pim ||
2102 		     c->_c.mfc_un.res.ttls[true_vifi] < 255) &&
2103 		    time_after(jiffies,
2104 			       c->_c.mfc_un.res.last_assert +
2105 			       MFC_ASSERT_THRESH)) {
2106 			c->_c.mfc_un.res.last_assert = jiffies;
2107 			ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
2108 		}
2109 		goto dont_forward;
2110 	}
2111 
2112 forward:
2113 	mrt->vif_table[vif].pkt_in++;
2114 	mrt->vif_table[vif].bytes_in += skb->len;
2115 
2116 	/*
2117 	 *	Forward the frame
2118 	 */
2119 	if (ipv6_addr_any(&c->mf6c_origin) &&
2120 	    ipv6_addr_any(&c->mf6c_mcastgrp)) {
2121 		if (true_vifi >= 0 &&
2122 		    true_vifi != c->_c.mfc_parent &&
2123 		    ipv6_hdr(skb)->hop_limit >
2124 				c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
2125 			/* It's an (*,*) entry and the packet is not coming from
2126 			 * the upstream: forward the packet to the upstream
2127 			 * only.
2128 			 */
2129 			psend = c->_c.mfc_parent;
2130 			goto last_forward;
2131 		}
2132 		goto dont_forward;
2133 	}
2134 	for (ct = c->_c.mfc_un.res.maxvif - 1;
2135 	     ct >= c->_c.mfc_un.res.minvif; ct--) {
2136 		/* For (*,G) entry, don't forward to the incoming interface */
2137 		if ((!ipv6_addr_any(&c->mf6c_origin) || ct != true_vifi) &&
2138 		    ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) {
2139 			if (psend != -1) {
2140 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2141 				if (skb2)
2142 					ip6mr_forward2(net, mrt, skb2,
2143 						       c, psend);
2144 			}
2145 			psend = ct;
2146 		}
2147 	}
2148 last_forward:
2149 	if (psend != -1) {
2150 		ip6mr_forward2(net, mrt, skb, c, psend);
2151 		return;
2152 	}
2153 
2154 dont_forward:
2155 	kfree_skb(skb);
2156 }
2157 
2158 
2159 /*
2160  *	Multicast packets for forwarding arrive here
2161  */
2162 
2163 int ip6_mr_input(struct sk_buff *skb)
2164 {
2165 	struct mfc6_cache *cache;
2166 	struct net *net = dev_net(skb->dev);
2167 	struct mr_table *mrt;
2168 	struct flowi6 fl6 = {
2169 		.flowi6_iif	= skb->dev->ifindex,
2170 		.flowi6_mark	= skb->mark,
2171 	};
2172 	int err;
2173 
2174 	err = ip6mr_fib_lookup(net, &fl6, &mrt);
2175 	if (err < 0) {
2176 		kfree_skb(skb);
2177 		return err;
2178 	}
2179 
2180 	read_lock(&mrt_lock);
2181 	cache = ip6mr_cache_find(mrt,
2182 				 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2183 	if (!cache) {
2184 		int vif = ip6mr_find_vif(mrt, skb->dev);
2185 
2186 		if (vif >= 0)
2187 			cache = ip6mr_cache_find_any(mrt,
2188 						     &ipv6_hdr(skb)->daddr,
2189 						     vif);
2190 	}
2191 
2192 	/*
2193 	 *	No usable cache entry
2194 	 */
2195 	if (!cache) {
2196 		int vif;
2197 
2198 		vif = ip6mr_find_vif(mrt, skb->dev);
2199 		if (vif >= 0) {
2200 			int err = ip6mr_cache_unresolved(mrt, vif, skb);
2201 			read_unlock(&mrt_lock);
2202 
2203 			return err;
2204 		}
2205 		read_unlock(&mrt_lock);
2206 		kfree_skb(skb);
2207 		return -ENODEV;
2208 	}
2209 
2210 	ip6_mr_forward(net, mrt, skb, cache);
2211 
2212 	read_unlock(&mrt_lock);
2213 
2214 	return 0;
2215 }
2216 
2217 int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
2218 		    u32 portid)
2219 {
2220 	int err;
2221 	struct mr_table *mrt;
2222 	struct mfc6_cache *cache;
2223 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
2224 
2225 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
2226 	if (!mrt)
2227 		return -ENOENT;
2228 
2229 	read_lock(&mrt_lock);
2230 	cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
2231 	if (!cache && skb->dev) {
2232 		int vif = ip6mr_find_vif(mrt, skb->dev);
2233 
2234 		if (vif >= 0)
2235 			cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr,
2236 						     vif);
2237 	}
2238 
2239 	if (!cache) {
2240 		struct sk_buff *skb2;
2241 		struct ipv6hdr *iph;
2242 		struct net_device *dev;
2243 		int vif;
2244 
2245 		dev = skb->dev;
2246 		if (!dev || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2247 			read_unlock(&mrt_lock);
2248 			return -ENODEV;
2249 		}
2250 
2251 		/* really correct? */
2252 		skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
2253 		if (!skb2) {
2254 			read_unlock(&mrt_lock);
2255 			return -ENOMEM;
2256 		}
2257 
2258 		NETLINK_CB(skb2).portid = portid;
2259 		skb_reset_transport_header(skb2);
2260 
2261 		skb_put(skb2, sizeof(struct ipv6hdr));
2262 		skb_reset_network_header(skb2);
2263 
2264 		iph = ipv6_hdr(skb2);
2265 		iph->version = 0;
2266 		iph->priority = 0;
2267 		iph->flow_lbl[0] = 0;
2268 		iph->flow_lbl[1] = 0;
2269 		iph->flow_lbl[2] = 0;
2270 		iph->payload_len = 0;
2271 		iph->nexthdr = IPPROTO_NONE;
2272 		iph->hop_limit = 0;
2273 		iph->saddr = rt->rt6i_src.addr;
2274 		iph->daddr = rt->rt6i_dst.addr;
2275 
2276 		err = ip6mr_cache_unresolved(mrt, vif, skb2);
2277 		read_unlock(&mrt_lock);
2278 
2279 		return err;
2280 	}
2281 
2282 	err = mr_fill_mroute(mrt, skb, &cache->_c, rtm);
2283 	read_unlock(&mrt_lock);
2284 	return err;
2285 }
2286 
2287 static int ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2288 			     u32 portid, u32 seq, struct mfc6_cache *c, int cmd,
2289 			     int flags)
2290 {
2291 	struct nlmsghdr *nlh;
2292 	struct rtmsg *rtm;
2293 	int err;
2294 
2295 	nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags);
2296 	if (!nlh)
2297 		return -EMSGSIZE;
2298 
2299 	rtm = nlmsg_data(nlh);
2300 	rtm->rtm_family   = RTNL_FAMILY_IP6MR;
2301 	rtm->rtm_dst_len  = 128;
2302 	rtm->rtm_src_len  = 128;
2303 	rtm->rtm_tos      = 0;
2304 	rtm->rtm_table    = mrt->id;
2305 	if (nla_put_u32(skb, RTA_TABLE, mrt->id))
2306 		goto nla_put_failure;
2307 	rtm->rtm_type = RTN_MULTICAST;
2308 	rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
2309 	if (c->_c.mfc_flags & MFC_STATIC)
2310 		rtm->rtm_protocol = RTPROT_STATIC;
2311 	else
2312 		rtm->rtm_protocol = RTPROT_MROUTED;
2313 	rtm->rtm_flags    = 0;
2314 
2315 	if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) ||
2316 	    nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp))
2317 		goto nla_put_failure;
2318 	err = mr_fill_mroute(mrt, skb, &c->_c, rtm);
2319 	/* do not break the dump if cache is unresolved */
2320 	if (err < 0 && err != -ENOENT)
2321 		goto nla_put_failure;
2322 
2323 	nlmsg_end(skb, nlh);
2324 	return 0;
2325 
2326 nla_put_failure:
2327 	nlmsg_cancel(skb, nlh);
2328 	return -EMSGSIZE;
2329 }
2330 
2331 static int _ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2332 			      u32 portid, u32 seq, struct mr_mfc *c,
2333 			      int cmd, int flags)
2334 {
2335 	return ip6mr_fill_mroute(mrt, skb, portid, seq, (struct mfc6_cache *)c,
2336 				 cmd, flags);
2337 }
2338 
2339 static int mr6_msgsize(bool unresolved, int maxvif)
2340 {
2341 	size_t len =
2342 		NLMSG_ALIGN(sizeof(struct rtmsg))
2343 		+ nla_total_size(4)	/* RTA_TABLE */
2344 		+ nla_total_size(sizeof(struct in6_addr))	/* RTA_SRC */
2345 		+ nla_total_size(sizeof(struct in6_addr))	/* RTA_DST */
2346 		;
2347 
2348 	if (!unresolved)
2349 		len = len
2350 		      + nla_total_size(4)	/* RTA_IIF */
2351 		      + nla_total_size(0)	/* RTA_MULTIPATH */
2352 		      + maxvif * NLA_ALIGN(sizeof(struct rtnexthop))
2353 						/* RTA_MFC_STATS */
2354 		      + nla_total_size_64bit(sizeof(struct rta_mfc_stats))
2355 		;
2356 
2357 	return len;
2358 }
2359 
2360 static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
2361 			      int cmd)
2362 {
2363 	struct net *net = read_pnet(&mrt->net);
2364 	struct sk_buff *skb;
2365 	int err = -ENOBUFS;
2366 
2367 	skb = nlmsg_new(mr6_msgsize(mfc->_c.mfc_parent >= MAXMIFS, mrt->maxvif),
2368 			GFP_ATOMIC);
2369 	if (!skb)
2370 		goto errout;
2371 
2372 	err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0);
2373 	if (err < 0)
2374 		goto errout;
2375 
2376 	rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC);
2377 	return;
2378 
2379 errout:
2380 	kfree_skb(skb);
2381 	if (err < 0)
2382 		rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
2383 }
2384 
2385 static size_t mrt6msg_netlink_msgsize(size_t payloadlen)
2386 {
2387 	size_t len =
2388 		NLMSG_ALIGN(sizeof(struct rtgenmsg))
2389 		+ nla_total_size(1)	/* IP6MRA_CREPORT_MSGTYPE */
2390 		+ nla_total_size(4)	/* IP6MRA_CREPORT_MIF_ID */
2391 					/* IP6MRA_CREPORT_SRC_ADDR */
2392 		+ nla_total_size(sizeof(struct in6_addr))
2393 					/* IP6MRA_CREPORT_DST_ADDR */
2394 		+ nla_total_size(sizeof(struct in6_addr))
2395 					/* IP6MRA_CREPORT_PKT */
2396 		+ nla_total_size(payloadlen)
2397 		;
2398 
2399 	return len;
2400 }
2401 
2402 static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt)
2403 {
2404 	struct net *net = read_pnet(&mrt->net);
2405 	struct nlmsghdr *nlh;
2406 	struct rtgenmsg *rtgenm;
2407 	struct mrt6msg *msg;
2408 	struct sk_buff *skb;
2409 	struct nlattr *nla;
2410 	int payloadlen;
2411 
2412 	payloadlen = pkt->len - sizeof(struct mrt6msg);
2413 	msg = (struct mrt6msg *)skb_transport_header(pkt);
2414 
2415 	skb = nlmsg_new(mrt6msg_netlink_msgsize(payloadlen), GFP_ATOMIC);
2416 	if (!skb)
2417 		goto errout;
2418 
2419 	nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT,
2420 			sizeof(struct rtgenmsg), 0);
2421 	if (!nlh)
2422 		goto errout;
2423 	rtgenm = nlmsg_data(nlh);
2424 	rtgenm->rtgen_family = RTNL_FAMILY_IP6MR;
2425 	if (nla_put_u8(skb, IP6MRA_CREPORT_MSGTYPE, msg->im6_msgtype) ||
2426 	    nla_put_u32(skb, IP6MRA_CREPORT_MIF_ID, msg->im6_mif) ||
2427 	    nla_put_in6_addr(skb, IP6MRA_CREPORT_SRC_ADDR,
2428 			     &msg->im6_src) ||
2429 	    nla_put_in6_addr(skb, IP6MRA_CREPORT_DST_ADDR,
2430 			     &msg->im6_dst))
2431 		goto nla_put_failure;
2432 
2433 	nla = nla_reserve(skb, IP6MRA_CREPORT_PKT, payloadlen);
2434 	if (!nla || skb_copy_bits(pkt, sizeof(struct mrt6msg),
2435 				  nla_data(nla), payloadlen))
2436 		goto nla_put_failure;
2437 
2438 	nlmsg_end(skb, nlh);
2439 
2440 	rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE_R, NULL, GFP_ATOMIC);
2441 	return;
2442 
2443 nla_put_failure:
2444 	nlmsg_cancel(skb, nlh);
2445 errout:
2446 	kfree_skb(skb);
2447 	rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE_R, -ENOBUFS);
2448 }
2449 
2450 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2451 {
2452 	return mr_rtm_dumproute(skb, cb, ip6mr_mr_table_iter,
2453 				_ip6mr_fill_mroute, &mfc_unres_lock);
2454 }
2455