xref: /openbmc/linux/net/ipv6/ip6mr.c (revision aee94ee8)
1 /*
2  *	Linux IPv6 multicast routing support for BSD pim6sd
3  *	Based on net/ipv4/ipmr.c.
4  *
5  *	(c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
6  *		LSIIT Laboratory, Strasbourg, France
7  *	(c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
8  *		6WIND, Paris, France
9  *	Copyright (C)2007,2008 USAGI/WIDE Project
10  *		YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
11  *
12  *	This program is free software; you can redistribute it and/or
13  *	modify it under the terms of the GNU General Public License
14  *	as published by the Free Software Foundation; either version
15  *	2 of the License, or (at your option) any later version.
16  *
17  */
18 
19 #include <linux/uaccess.h>
20 #include <linux/types.h>
21 #include <linux/sched.h>
22 #include <linux/errno.h>
23 #include <linux/mm.h>
24 #include <linux/kernel.h>
25 #include <linux/fcntl.h>
26 #include <linux/stat.h>
27 #include <linux/socket.h>
28 #include <linux/inet.h>
29 #include <linux/netdevice.h>
30 #include <linux/inetdevice.h>
31 #include <linux/proc_fs.h>
32 #include <linux/seq_file.h>
33 #include <linux/init.h>
34 #include <linux/compat.h>
35 #include <linux/rhashtable.h>
36 #include <net/protocol.h>
37 #include <linux/skbuff.h>
38 #include <net/raw.h>
39 #include <linux/notifier.h>
40 #include <linux/if_arp.h>
41 #include <net/checksum.h>
42 #include <net/netlink.h>
43 #include <net/fib_rules.h>
44 
45 #include <net/ipv6.h>
46 #include <net/ip6_route.h>
47 #include <linux/mroute6.h>
48 #include <linux/pim.h>
49 #include <net/addrconf.h>
50 #include <linux/netfilter_ipv6.h>
51 #include <linux/export.h>
52 #include <net/ip6_checksum.h>
53 #include <linux/netconf.h>
54 
55 struct ip6mr_rule {
56 	struct fib_rule		common;
57 };
58 
59 struct ip6mr_result {
60 	struct mr_table	*mrt;
61 };
62 
63 /* Big lock, protecting vif table, mrt cache and mroute socket state.
64    Note that the changes are semaphored via rtnl_lock.
65  */
66 
67 static DEFINE_RWLOCK(mrt_lock);
68 
69 /* Multicast router control variables */
70 
71 /* Special spinlock for queue of unresolved entries */
72 static DEFINE_SPINLOCK(mfc_unres_lock);
73 
74 /* We return to original Alan's scheme. Hash table of resolved
75    entries is changed only in process context and protected
76    with weak lock mrt_lock. Queue of unresolved entries is protected
77    with strong spinlock mfc_unres_lock.
78 
79    In this case data path is free of exclusive locks at all.
80  */
81 
82 static struct kmem_cache *mrt_cachep __read_mostly;
83 
84 static struct mr_table *ip6mr_new_table(struct net *net, u32 id);
85 static void ip6mr_free_table(struct mr_table *mrt);
86 
87 static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
88 			   struct sk_buff *skb, struct mfc6_cache *cache);
89 static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
90 			      mifi_t mifi, int assert);
91 static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
92 			      int cmd);
93 static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt);
94 static int ip6mr_rtm_dumproute(struct sk_buff *skb,
95 			       struct netlink_callback *cb);
96 static void mroute_clean_tables(struct mr_table *mrt, bool all);
97 static void ipmr_expire_process(struct timer_list *t);
98 
99 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
100 #define ip6mr_for_each_table(mrt, net) \
101 	list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
102 
103 static struct mr_table *ip6mr_mr_table_iter(struct net *net,
104 					    struct mr_table *mrt)
105 {
106 	struct mr_table *ret;
107 
108 	if (!mrt)
109 		ret = list_entry_rcu(net->ipv6.mr6_tables.next,
110 				     struct mr_table, list);
111 	else
112 		ret = list_entry_rcu(mrt->list.next,
113 				     struct mr_table, list);
114 
115 	if (&ret->list == &net->ipv6.mr6_tables)
116 		return NULL;
117 	return ret;
118 }
119 
120 static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
121 {
122 	struct mr_table *mrt;
123 
124 	ip6mr_for_each_table(mrt, net) {
125 		if (mrt->id == id)
126 			return mrt;
127 	}
128 	return NULL;
129 }
130 
131 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
132 			    struct mr_table **mrt)
133 {
134 	int err;
135 	struct ip6mr_result res;
136 	struct fib_lookup_arg arg = {
137 		.result = &res,
138 		.flags = FIB_LOOKUP_NOREF,
139 	};
140 
141 	err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
142 			       flowi6_to_flowi(flp6), 0, &arg);
143 	if (err < 0)
144 		return err;
145 	*mrt = res.mrt;
146 	return 0;
147 }
148 
149 static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
150 			     int flags, struct fib_lookup_arg *arg)
151 {
152 	struct ip6mr_result *res = arg->result;
153 	struct mr_table *mrt;
154 
155 	switch (rule->action) {
156 	case FR_ACT_TO_TBL:
157 		break;
158 	case FR_ACT_UNREACHABLE:
159 		return -ENETUNREACH;
160 	case FR_ACT_PROHIBIT:
161 		return -EACCES;
162 	case FR_ACT_BLACKHOLE:
163 	default:
164 		return -EINVAL;
165 	}
166 
167 	mrt = ip6mr_get_table(rule->fr_net, rule->table);
168 	if (!mrt)
169 		return -EAGAIN;
170 	res->mrt = mrt;
171 	return 0;
172 }
173 
174 static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
175 {
176 	return 1;
177 }
178 
179 static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = {
180 	FRA_GENERIC_POLICY,
181 };
182 
183 static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
184 				struct fib_rule_hdr *frh, struct nlattr **tb,
185 				struct netlink_ext_ack *extack)
186 {
187 	return 0;
188 }
189 
190 static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
191 			      struct nlattr **tb)
192 {
193 	return 1;
194 }
195 
196 static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
197 			   struct fib_rule_hdr *frh)
198 {
199 	frh->dst_len = 0;
200 	frh->src_len = 0;
201 	frh->tos     = 0;
202 	return 0;
203 }
204 
205 static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
206 	.family		= RTNL_FAMILY_IP6MR,
207 	.rule_size	= sizeof(struct ip6mr_rule),
208 	.addr_size	= sizeof(struct in6_addr),
209 	.action		= ip6mr_rule_action,
210 	.match		= ip6mr_rule_match,
211 	.configure	= ip6mr_rule_configure,
212 	.compare	= ip6mr_rule_compare,
213 	.fill		= ip6mr_rule_fill,
214 	.nlgroup	= RTNLGRP_IPV6_RULE,
215 	.policy		= ip6mr_rule_policy,
216 	.owner		= THIS_MODULE,
217 };
218 
219 static int __net_init ip6mr_rules_init(struct net *net)
220 {
221 	struct fib_rules_ops *ops;
222 	struct mr_table *mrt;
223 	int err;
224 
225 	ops = fib_rules_register(&ip6mr_rules_ops_template, net);
226 	if (IS_ERR(ops))
227 		return PTR_ERR(ops);
228 
229 	INIT_LIST_HEAD(&net->ipv6.mr6_tables);
230 
231 	mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
232 	if (IS_ERR(mrt)) {
233 		err = PTR_ERR(mrt);
234 		goto err1;
235 	}
236 
237 	err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
238 	if (err < 0)
239 		goto err2;
240 
241 	net->ipv6.mr6_rules_ops = ops;
242 	return 0;
243 
244 err2:
245 	ip6mr_free_table(mrt);
246 err1:
247 	fib_rules_unregister(ops);
248 	return err;
249 }
250 
251 static void __net_exit ip6mr_rules_exit(struct net *net)
252 {
253 	struct mr_table *mrt, *next;
254 
255 	rtnl_lock();
256 	list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
257 		list_del(&mrt->list);
258 		ip6mr_free_table(mrt);
259 	}
260 	fib_rules_unregister(net->ipv6.mr6_rules_ops);
261 	rtnl_unlock();
262 }
263 
264 static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb)
265 {
266 	return fib_rules_dump(net, nb, RTNL_FAMILY_IP6MR);
267 }
268 
269 static unsigned int ip6mr_rules_seq_read(struct net *net)
270 {
271 	return fib_rules_seq_read(net, RTNL_FAMILY_IP6MR);
272 }
273 
274 bool ip6mr_rule_default(const struct fib_rule *rule)
275 {
276 	return fib_rule_matchall(rule) && rule->action == FR_ACT_TO_TBL &&
277 	       rule->table == RT6_TABLE_DFLT && !rule->l3mdev;
278 }
279 EXPORT_SYMBOL(ip6mr_rule_default);
280 #else
281 #define ip6mr_for_each_table(mrt, net) \
282 	for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
283 
284 static struct mr_table *ip6mr_mr_table_iter(struct net *net,
285 					    struct mr_table *mrt)
286 {
287 	if (!mrt)
288 		return net->ipv6.mrt6;
289 	return NULL;
290 }
291 
292 static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
293 {
294 	return net->ipv6.mrt6;
295 }
296 
297 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
298 			    struct mr_table **mrt)
299 {
300 	*mrt = net->ipv6.mrt6;
301 	return 0;
302 }
303 
304 static int __net_init ip6mr_rules_init(struct net *net)
305 {
306 	struct mr_table *mrt;
307 
308 	mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
309 	if (IS_ERR(mrt))
310 		return PTR_ERR(mrt);
311 	net->ipv6.mrt6 = mrt;
312 	return 0;
313 }
314 
315 static void __net_exit ip6mr_rules_exit(struct net *net)
316 {
317 	rtnl_lock();
318 	ip6mr_free_table(net->ipv6.mrt6);
319 	net->ipv6.mrt6 = NULL;
320 	rtnl_unlock();
321 }
322 
323 static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb)
324 {
325 	return 0;
326 }
327 
328 static unsigned int ip6mr_rules_seq_read(struct net *net)
329 {
330 	return 0;
331 }
332 #endif
333 
334 static int ip6mr_hash_cmp(struct rhashtable_compare_arg *arg,
335 			  const void *ptr)
336 {
337 	const struct mfc6_cache_cmp_arg *cmparg = arg->key;
338 	struct mfc6_cache *c = (struct mfc6_cache *)ptr;
339 
340 	return !ipv6_addr_equal(&c->mf6c_mcastgrp, &cmparg->mf6c_mcastgrp) ||
341 	       !ipv6_addr_equal(&c->mf6c_origin, &cmparg->mf6c_origin);
342 }
343 
344 static const struct rhashtable_params ip6mr_rht_params = {
345 	.head_offset = offsetof(struct mr_mfc, mnode),
346 	.key_offset = offsetof(struct mfc6_cache, cmparg),
347 	.key_len = sizeof(struct mfc6_cache_cmp_arg),
348 	.nelem_hint = 3,
349 	.locks_mul = 1,
350 	.obj_cmpfn = ip6mr_hash_cmp,
351 	.automatic_shrinking = true,
352 };
353 
354 static void ip6mr_new_table_set(struct mr_table *mrt,
355 				struct net *net)
356 {
357 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
358 	list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
359 #endif
360 }
361 
362 static struct mfc6_cache_cmp_arg ip6mr_mr_table_ops_cmparg_any = {
363 	.mf6c_origin = IN6ADDR_ANY_INIT,
364 	.mf6c_mcastgrp = IN6ADDR_ANY_INIT,
365 };
366 
367 static struct mr_table_ops ip6mr_mr_table_ops = {
368 	.rht_params = &ip6mr_rht_params,
369 	.cmparg_any = &ip6mr_mr_table_ops_cmparg_any,
370 };
371 
372 static struct mr_table *ip6mr_new_table(struct net *net, u32 id)
373 {
374 	struct mr_table *mrt;
375 
376 	mrt = ip6mr_get_table(net, id);
377 	if (mrt)
378 		return mrt;
379 
380 	return mr_table_alloc(net, id, &ip6mr_mr_table_ops,
381 			      ipmr_expire_process, ip6mr_new_table_set);
382 }
383 
384 static void ip6mr_free_table(struct mr_table *mrt)
385 {
386 	del_timer_sync(&mrt->ipmr_expire_timer);
387 	mroute_clean_tables(mrt, true);
388 	rhltable_destroy(&mrt->mfc_hash);
389 	kfree(mrt);
390 }
391 
392 #ifdef CONFIG_PROC_FS
393 /* The /proc interfaces to multicast routing
394  * /proc/ip6_mr_cache /proc/ip6_mr_vif
395  */
396 
397 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
398 	__acquires(mrt_lock)
399 {
400 	struct mr_vif_iter *iter = seq->private;
401 	struct net *net = seq_file_net(seq);
402 	struct mr_table *mrt;
403 
404 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
405 	if (!mrt)
406 		return ERR_PTR(-ENOENT);
407 
408 	iter->mrt = mrt;
409 
410 	read_lock(&mrt_lock);
411 	return mr_vif_seq_start(seq, pos);
412 }
413 
414 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
415 	__releases(mrt_lock)
416 {
417 	read_unlock(&mrt_lock);
418 }
419 
420 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
421 {
422 	struct mr_vif_iter *iter = seq->private;
423 	struct mr_table *mrt = iter->mrt;
424 
425 	if (v == SEQ_START_TOKEN) {
426 		seq_puts(seq,
427 			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
428 	} else {
429 		const struct vif_device *vif = v;
430 		const char *name = vif->dev ? vif->dev->name : "none";
431 
432 		seq_printf(seq,
433 			   "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
434 			   vif - mrt->vif_table,
435 			   name, vif->bytes_in, vif->pkt_in,
436 			   vif->bytes_out, vif->pkt_out,
437 			   vif->flags);
438 	}
439 	return 0;
440 }
441 
442 static const struct seq_operations ip6mr_vif_seq_ops = {
443 	.start = ip6mr_vif_seq_start,
444 	.next  = mr_vif_seq_next,
445 	.stop  = ip6mr_vif_seq_stop,
446 	.show  = ip6mr_vif_seq_show,
447 };
448 
449 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
450 {
451 	struct net *net = seq_file_net(seq);
452 	struct mr_table *mrt;
453 
454 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
455 	if (!mrt)
456 		return ERR_PTR(-ENOENT);
457 
458 	return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock);
459 }
460 
461 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
462 {
463 	int n;
464 
465 	if (v == SEQ_START_TOKEN) {
466 		seq_puts(seq,
467 			 "Group                            "
468 			 "Origin                           "
469 			 "Iif      Pkts  Bytes     Wrong  Oifs\n");
470 	} else {
471 		const struct mfc6_cache *mfc = v;
472 		const struct mr_mfc_iter *it = seq->private;
473 		struct mr_table *mrt = it->mrt;
474 
475 		seq_printf(seq, "%pI6 %pI6 %-3hd",
476 			   &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
477 			   mfc->_c.mfc_parent);
478 
479 		if (it->cache != &mrt->mfc_unres_queue) {
480 			seq_printf(seq, " %8lu %8lu %8lu",
481 				   mfc->_c.mfc_un.res.pkt,
482 				   mfc->_c.mfc_un.res.bytes,
483 				   mfc->_c.mfc_un.res.wrong_if);
484 			for (n = mfc->_c.mfc_un.res.minvif;
485 			     n < mfc->_c.mfc_un.res.maxvif; n++) {
486 				if (VIF_EXISTS(mrt, n) &&
487 				    mfc->_c.mfc_un.res.ttls[n] < 255)
488 					seq_printf(seq,
489 						   " %2d:%-3d", n,
490 						   mfc->_c.mfc_un.res.ttls[n]);
491 			}
492 		} else {
493 			/* unresolved mfc_caches don't contain
494 			 * pkt, bytes and wrong_if values
495 			 */
496 			seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
497 		}
498 		seq_putc(seq, '\n');
499 	}
500 	return 0;
501 }
502 
503 static const struct seq_operations ipmr_mfc_seq_ops = {
504 	.start = ipmr_mfc_seq_start,
505 	.next  = mr_mfc_seq_next,
506 	.stop  = mr_mfc_seq_stop,
507 	.show  = ipmr_mfc_seq_show,
508 };
509 #endif
510 
511 #ifdef CONFIG_IPV6_PIMSM_V2
512 
513 static int pim6_rcv(struct sk_buff *skb)
514 {
515 	struct pimreghdr *pim;
516 	struct ipv6hdr   *encap;
517 	struct net_device  *reg_dev = NULL;
518 	struct net *net = dev_net(skb->dev);
519 	struct mr_table *mrt;
520 	struct flowi6 fl6 = {
521 		.flowi6_iif	= skb->dev->ifindex,
522 		.flowi6_mark	= skb->mark,
523 	};
524 	int reg_vif_num;
525 
526 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
527 		goto drop;
528 
529 	pim = (struct pimreghdr *)skb_transport_header(skb);
530 	if (pim->type != ((PIM_VERSION << 4) | PIM_TYPE_REGISTER) ||
531 	    (pim->flags & PIM_NULL_REGISTER) ||
532 	    (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
533 			     sizeof(*pim), IPPROTO_PIM,
534 			     csum_partial((void *)pim, sizeof(*pim), 0)) &&
535 	     csum_fold(skb_checksum(skb, 0, skb->len, 0))))
536 		goto drop;
537 
538 	/* check if the inner packet is destined to mcast group */
539 	encap = (struct ipv6hdr *)(skb_transport_header(skb) +
540 				   sizeof(*pim));
541 
542 	if (!ipv6_addr_is_multicast(&encap->daddr) ||
543 	    encap->payload_len == 0 ||
544 	    ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
545 		goto drop;
546 
547 	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
548 		goto drop;
549 	reg_vif_num = mrt->mroute_reg_vif_num;
550 
551 	read_lock(&mrt_lock);
552 	if (reg_vif_num >= 0)
553 		reg_dev = mrt->vif_table[reg_vif_num].dev;
554 	if (reg_dev)
555 		dev_hold(reg_dev);
556 	read_unlock(&mrt_lock);
557 
558 	if (!reg_dev)
559 		goto drop;
560 
561 	skb->mac_header = skb->network_header;
562 	skb_pull(skb, (u8 *)encap - skb->data);
563 	skb_reset_network_header(skb);
564 	skb->protocol = htons(ETH_P_IPV6);
565 	skb->ip_summed = CHECKSUM_NONE;
566 
567 	skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
568 
569 	netif_rx(skb);
570 
571 	dev_put(reg_dev);
572 	return 0;
573  drop:
574 	kfree_skb(skb);
575 	return 0;
576 }
577 
578 static const struct inet6_protocol pim6_protocol = {
579 	.handler	=	pim6_rcv,
580 };
581 
582 /* Service routines creating virtual interfaces: PIMREG */
583 
584 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
585 				      struct net_device *dev)
586 {
587 	struct net *net = dev_net(dev);
588 	struct mr_table *mrt;
589 	struct flowi6 fl6 = {
590 		.flowi6_oif	= dev->ifindex,
591 		.flowi6_iif	= skb->skb_iif ? : LOOPBACK_IFINDEX,
592 		.flowi6_mark	= skb->mark,
593 	};
594 	int err;
595 
596 	err = ip6mr_fib_lookup(net, &fl6, &mrt);
597 	if (err < 0) {
598 		kfree_skb(skb);
599 		return err;
600 	}
601 
602 	read_lock(&mrt_lock);
603 	dev->stats.tx_bytes += skb->len;
604 	dev->stats.tx_packets++;
605 	ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
606 	read_unlock(&mrt_lock);
607 	kfree_skb(skb);
608 	return NETDEV_TX_OK;
609 }
610 
611 static int reg_vif_get_iflink(const struct net_device *dev)
612 {
613 	return 0;
614 }
615 
616 static const struct net_device_ops reg_vif_netdev_ops = {
617 	.ndo_start_xmit	= reg_vif_xmit,
618 	.ndo_get_iflink = reg_vif_get_iflink,
619 };
620 
621 static void reg_vif_setup(struct net_device *dev)
622 {
623 	dev->type		= ARPHRD_PIMREG;
624 	dev->mtu		= 1500 - sizeof(struct ipv6hdr) - 8;
625 	dev->flags		= IFF_NOARP;
626 	dev->netdev_ops		= &reg_vif_netdev_ops;
627 	dev->needs_free_netdev	= true;
628 	dev->features		|= NETIF_F_NETNS_LOCAL;
629 }
630 
631 static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt)
632 {
633 	struct net_device *dev;
634 	char name[IFNAMSIZ];
635 
636 	if (mrt->id == RT6_TABLE_DFLT)
637 		sprintf(name, "pim6reg");
638 	else
639 		sprintf(name, "pim6reg%u", mrt->id);
640 
641 	dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup);
642 	if (!dev)
643 		return NULL;
644 
645 	dev_net_set(dev, net);
646 
647 	if (register_netdevice(dev)) {
648 		free_netdev(dev);
649 		return NULL;
650 	}
651 
652 	if (dev_open(dev))
653 		goto failure;
654 
655 	dev_hold(dev);
656 	return dev;
657 
658 failure:
659 	unregister_netdevice(dev);
660 	return NULL;
661 }
662 #endif
663 
664 static int call_ip6mr_vif_entry_notifiers(struct net *net,
665 					  enum fib_event_type event_type,
666 					  struct vif_device *vif,
667 					  mifi_t vif_index, u32 tb_id)
668 {
669 	return mr_call_vif_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
670 				     vif, vif_index, tb_id,
671 				     &net->ipv6.ipmr_seq);
672 }
673 
674 static int call_ip6mr_mfc_entry_notifiers(struct net *net,
675 					  enum fib_event_type event_type,
676 					  struct mfc6_cache *mfc, u32 tb_id)
677 {
678 	return mr_call_mfc_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
679 				     &mfc->_c, tb_id, &net->ipv6.ipmr_seq);
680 }
681 
682 /* Delete a VIF entry */
683 static int mif6_delete(struct mr_table *mrt, int vifi, int notify,
684 		       struct list_head *head)
685 {
686 	struct vif_device *v;
687 	struct net_device *dev;
688 	struct inet6_dev *in6_dev;
689 
690 	if (vifi < 0 || vifi >= mrt->maxvif)
691 		return -EADDRNOTAVAIL;
692 
693 	v = &mrt->vif_table[vifi];
694 
695 	if (VIF_EXISTS(mrt, vifi))
696 		call_ip6mr_vif_entry_notifiers(read_pnet(&mrt->net),
697 					       FIB_EVENT_VIF_DEL, v, vifi,
698 					       mrt->id);
699 
700 	write_lock_bh(&mrt_lock);
701 	dev = v->dev;
702 	v->dev = NULL;
703 
704 	if (!dev) {
705 		write_unlock_bh(&mrt_lock);
706 		return -EADDRNOTAVAIL;
707 	}
708 
709 #ifdef CONFIG_IPV6_PIMSM_V2
710 	if (vifi == mrt->mroute_reg_vif_num)
711 		mrt->mroute_reg_vif_num = -1;
712 #endif
713 
714 	if (vifi + 1 == mrt->maxvif) {
715 		int tmp;
716 		for (tmp = vifi - 1; tmp >= 0; tmp--) {
717 			if (VIF_EXISTS(mrt, tmp))
718 				break;
719 		}
720 		mrt->maxvif = tmp + 1;
721 	}
722 
723 	write_unlock_bh(&mrt_lock);
724 
725 	dev_set_allmulti(dev, -1);
726 
727 	in6_dev = __in6_dev_get(dev);
728 	if (in6_dev) {
729 		in6_dev->cnf.mc_forwarding--;
730 		inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
731 					     NETCONFA_MC_FORWARDING,
732 					     dev->ifindex, &in6_dev->cnf);
733 	}
734 
735 	if ((v->flags & MIFF_REGISTER) && !notify)
736 		unregister_netdevice_queue(dev, head);
737 
738 	dev_put(dev);
739 	return 0;
740 }
741 
742 static inline void ip6mr_cache_free_rcu(struct rcu_head *head)
743 {
744 	struct mr_mfc *c = container_of(head, struct mr_mfc, rcu);
745 
746 	kmem_cache_free(mrt_cachep, (struct mfc6_cache *)c);
747 }
748 
749 static inline void ip6mr_cache_free(struct mfc6_cache *c)
750 {
751 	call_rcu(&c->_c.rcu, ip6mr_cache_free_rcu);
752 }
753 
754 /* Destroy an unresolved cache entry, killing queued skbs
755    and reporting error to netlink readers.
756  */
757 
758 static void ip6mr_destroy_unres(struct mr_table *mrt, struct mfc6_cache *c)
759 {
760 	struct net *net = read_pnet(&mrt->net);
761 	struct sk_buff *skb;
762 
763 	atomic_dec(&mrt->cache_resolve_queue_len);
764 
765 	while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved)) != NULL) {
766 		if (ipv6_hdr(skb)->version == 0) {
767 			struct nlmsghdr *nlh = skb_pull(skb,
768 							sizeof(struct ipv6hdr));
769 			nlh->nlmsg_type = NLMSG_ERROR;
770 			nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
771 			skb_trim(skb, nlh->nlmsg_len);
772 			((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT;
773 			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
774 		} else
775 			kfree_skb(skb);
776 	}
777 
778 	ip6mr_cache_free(c);
779 }
780 
781 
782 /* Timer process for all the unresolved queue. */
783 
784 static void ipmr_do_expire_process(struct mr_table *mrt)
785 {
786 	unsigned long now = jiffies;
787 	unsigned long expires = 10 * HZ;
788 	struct mr_mfc *c, *next;
789 
790 	list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
791 		if (time_after(c->mfc_un.unres.expires, now)) {
792 			/* not yet... */
793 			unsigned long interval = c->mfc_un.unres.expires - now;
794 			if (interval < expires)
795 				expires = interval;
796 			continue;
797 		}
798 
799 		list_del(&c->list);
800 		mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
801 		ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
802 	}
803 
804 	if (!list_empty(&mrt->mfc_unres_queue))
805 		mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
806 }
807 
808 static void ipmr_expire_process(struct timer_list *t)
809 {
810 	struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
811 
812 	if (!spin_trylock(&mfc_unres_lock)) {
813 		mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
814 		return;
815 	}
816 
817 	if (!list_empty(&mrt->mfc_unres_queue))
818 		ipmr_do_expire_process(mrt);
819 
820 	spin_unlock(&mfc_unres_lock);
821 }
822 
823 /* Fill oifs list. It is called under write locked mrt_lock. */
824 
825 static void ip6mr_update_thresholds(struct mr_table *mrt,
826 				    struct mr_mfc *cache,
827 				    unsigned char *ttls)
828 {
829 	int vifi;
830 
831 	cache->mfc_un.res.minvif = MAXMIFS;
832 	cache->mfc_un.res.maxvif = 0;
833 	memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
834 
835 	for (vifi = 0; vifi < mrt->maxvif; vifi++) {
836 		if (VIF_EXISTS(mrt, vifi) &&
837 		    ttls[vifi] && ttls[vifi] < 255) {
838 			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
839 			if (cache->mfc_un.res.minvif > vifi)
840 				cache->mfc_un.res.minvif = vifi;
841 			if (cache->mfc_un.res.maxvif <= vifi)
842 				cache->mfc_un.res.maxvif = vifi + 1;
843 		}
844 	}
845 	cache->mfc_un.res.lastuse = jiffies;
846 }
847 
848 static int mif6_add(struct net *net, struct mr_table *mrt,
849 		    struct mif6ctl *vifc, int mrtsock)
850 {
851 	int vifi = vifc->mif6c_mifi;
852 	struct vif_device *v = &mrt->vif_table[vifi];
853 	struct net_device *dev;
854 	struct inet6_dev *in6_dev;
855 	int err;
856 
857 	/* Is vif busy ? */
858 	if (VIF_EXISTS(mrt, vifi))
859 		return -EADDRINUSE;
860 
861 	switch (vifc->mif6c_flags) {
862 #ifdef CONFIG_IPV6_PIMSM_V2
863 	case MIFF_REGISTER:
864 		/*
865 		 * Special Purpose VIF in PIM
866 		 * All the packets will be sent to the daemon
867 		 */
868 		if (mrt->mroute_reg_vif_num >= 0)
869 			return -EADDRINUSE;
870 		dev = ip6mr_reg_vif(net, mrt);
871 		if (!dev)
872 			return -ENOBUFS;
873 		err = dev_set_allmulti(dev, 1);
874 		if (err) {
875 			unregister_netdevice(dev);
876 			dev_put(dev);
877 			return err;
878 		}
879 		break;
880 #endif
881 	case 0:
882 		dev = dev_get_by_index(net, vifc->mif6c_pifi);
883 		if (!dev)
884 			return -EADDRNOTAVAIL;
885 		err = dev_set_allmulti(dev, 1);
886 		if (err) {
887 			dev_put(dev);
888 			return err;
889 		}
890 		break;
891 	default:
892 		return -EINVAL;
893 	}
894 
895 	in6_dev = __in6_dev_get(dev);
896 	if (in6_dev) {
897 		in6_dev->cnf.mc_forwarding++;
898 		inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
899 					     NETCONFA_MC_FORWARDING,
900 					     dev->ifindex, &in6_dev->cnf);
901 	}
902 
903 	/* Fill in the VIF structures */
904 	vif_device_init(v, dev, vifc->vifc_rate_limit, vifc->vifc_threshold,
905 			vifc->mif6c_flags | (!mrtsock ? VIFF_STATIC : 0),
906 			MIFF_REGISTER);
907 
908 	/* And finish update writing critical data */
909 	write_lock_bh(&mrt_lock);
910 	v->dev = dev;
911 #ifdef CONFIG_IPV6_PIMSM_V2
912 	if (v->flags & MIFF_REGISTER)
913 		mrt->mroute_reg_vif_num = vifi;
914 #endif
915 	if (vifi + 1 > mrt->maxvif)
916 		mrt->maxvif = vifi + 1;
917 	write_unlock_bh(&mrt_lock);
918 	call_ip6mr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD,
919 				       v, vifi, mrt->id);
920 	return 0;
921 }
922 
923 static struct mfc6_cache *ip6mr_cache_find(struct mr_table *mrt,
924 					   const struct in6_addr *origin,
925 					   const struct in6_addr *mcastgrp)
926 {
927 	struct mfc6_cache_cmp_arg arg = {
928 		.mf6c_origin = *origin,
929 		.mf6c_mcastgrp = *mcastgrp,
930 	};
931 
932 	return mr_mfc_find(mrt, &arg);
933 }
934 
935 /* Look for a (*,G) entry */
936 static struct mfc6_cache *ip6mr_cache_find_any(struct mr_table *mrt,
937 					       struct in6_addr *mcastgrp,
938 					       mifi_t mifi)
939 {
940 	struct mfc6_cache_cmp_arg arg = {
941 		.mf6c_origin = in6addr_any,
942 		.mf6c_mcastgrp = *mcastgrp,
943 	};
944 
945 	if (ipv6_addr_any(mcastgrp))
946 		return mr_mfc_find_any_parent(mrt, mifi);
947 	return mr_mfc_find_any(mrt, mifi, &arg);
948 }
949 
950 /* Look for a (S,G,iif) entry if parent != -1 */
951 static struct mfc6_cache *
952 ip6mr_cache_find_parent(struct mr_table *mrt,
953 			const struct in6_addr *origin,
954 			const struct in6_addr *mcastgrp,
955 			int parent)
956 {
957 	struct mfc6_cache_cmp_arg arg = {
958 		.mf6c_origin = *origin,
959 		.mf6c_mcastgrp = *mcastgrp,
960 	};
961 
962 	return mr_mfc_find_parent(mrt, &arg, parent);
963 }
964 
965 /* Allocate a multicast cache entry */
966 static struct mfc6_cache *ip6mr_cache_alloc(void)
967 {
968 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
969 	if (!c)
970 		return NULL;
971 	c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
972 	c->_c.mfc_un.res.minvif = MAXMIFS;
973 	c->_c.free = ip6mr_cache_free_rcu;
974 	refcount_set(&c->_c.mfc_un.res.refcount, 1);
975 	return c;
976 }
977 
978 static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
979 {
980 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
981 	if (!c)
982 		return NULL;
983 	skb_queue_head_init(&c->_c.mfc_un.unres.unresolved);
984 	c->_c.mfc_un.unres.expires = jiffies + 10 * HZ;
985 	return c;
986 }
987 
988 /*
989  *	A cache entry has gone into a resolved state from queued
990  */
991 
992 static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt,
993 				struct mfc6_cache *uc, struct mfc6_cache *c)
994 {
995 	struct sk_buff *skb;
996 
997 	/*
998 	 *	Play the pending entries through our router
999 	 */
1000 
1001 	while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) {
1002 		if (ipv6_hdr(skb)->version == 0) {
1003 			struct nlmsghdr *nlh = skb_pull(skb,
1004 							sizeof(struct ipv6hdr));
1005 
1006 			if (mr_fill_mroute(mrt, skb, &c->_c,
1007 					   nlmsg_data(nlh)) > 0) {
1008 				nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1009 			} else {
1010 				nlh->nlmsg_type = NLMSG_ERROR;
1011 				nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
1012 				skb_trim(skb, nlh->nlmsg_len);
1013 				((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE;
1014 			}
1015 			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
1016 		} else
1017 			ip6_mr_forward(net, mrt, skb, c);
1018 	}
1019 }
1020 
1021 /*
1022  *	Bounce a cache query up to pim6sd and netlink.
1023  *
1024  *	Called under mrt_lock.
1025  */
1026 
1027 static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
1028 			      mifi_t mifi, int assert)
1029 {
1030 	struct sock *mroute6_sk;
1031 	struct sk_buff *skb;
1032 	struct mrt6msg *msg;
1033 	int ret;
1034 
1035 #ifdef CONFIG_IPV6_PIMSM_V2
1036 	if (assert == MRT6MSG_WHOLEPKT)
1037 		skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
1038 						+sizeof(*msg));
1039 	else
1040 #endif
1041 		skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
1042 
1043 	if (!skb)
1044 		return -ENOBUFS;
1045 
1046 	/* I suppose that internal messages
1047 	 * do not require checksums */
1048 
1049 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1050 
1051 #ifdef CONFIG_IPV6_PIMSM_V2
1052 	if (assert == MRT6MSG_WHOLEPKT) {
1053 		/* Ugly, but we have no choice with this interface.
1054 		   Duplicate old header, fix length etc.
1055 		   And all this only to mangle msg->im6_msgtype and
1056 		   to set msg->im6_mbz to "mbz" :-)
1057 		 */
1058 		skb_push(skb, -skb_network_offset(pkt));
1059 
1060 		skb_push(skb, sizeof(*msg));
1061 		skb_reset_transport_header(skb);
1062 		msg = (struct mrt6msg *)skb_transport_header(skb);
1063 		msg->im6_mbz = 0;
1064 		msg->im6_msgtype = MRT6MSG_WHOLEPKT;
1065 		msg->im6_mif = mrt->mroute_reg_vif_num;
1066 		msg->im6_pad = 0;
1067 		msg->im6_src = ipv6_hdr(pkt)->saddr;
1068 		msg->im6_dst = ipv6_hdr(pkt)->daddr;
1069 
1070 		skb->ip_summed = CHECKSUM_UNNECESSARY;
1071 	} else
1072 #endif
1073 	{
1074 	/*
1075 	 *	Copy the IP header
1076 	 */
1077 
1078 	skb_put(skb, sizeof(struct ipv6hdr));
1079 	skb_reset_network_header(skb);
1080 	skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
1081 
1082 	/*
1083 	 *	Add our header
1084 	 */
1085 	skb_put(skb, sizeof(*msg));
1086 	skb_reset_transport_header(skb);
1087 	msg = (struct mrt6msg *)skb_transport_header(skb);
1088 
1089 	msg->im6_mbz = 0;
1090 	msg->im6_msgtype = assert;
1091 	msg->im6_mif = mifi;
1092 	msg->im6_pad = 0;
1093 	msg->im6_src = ipv6_hdr(pkt)->saddr;
1094 	msg->im6_dst = ipv6_hdr(pkt)->daddr;
1095 
1096 	skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1097 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1098 	}
1099 
1100 	rcu_read_lock();
1101 	mroute6_sk = rcu_dereference(mrt->mroute_sk);
1102 	if (!mroute6_sk) {
1103 		rcu_read_unlock();
1104 		kfree_skb(skb);
1105 		return -EINVAL;
1106 	}
1107 
1108 	mrt6msg_netlink_event(mrt, skb);
1109 
1110 	/* Deliver to user space multicast routing algorithms */
1111 	ret = sock_queue_rcv_skb(mroute6_sk, skb);
1112 	rcu_read_unlock();
1113 	if (ret < 0) {
1114 		net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
1115 		kfree_skb(skb);
1116 	}
1117 
1118 	return ret;
1119 }
1120 
1121 /* Queue a packet for resolution. It gets locked cache entry! */
1122 static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi,
1123 				  struct sk_buff *skb)
1124 {
1125 	struct mfc6_cache *c;
1126 	bool found = false;
1127 	int err;
1128 
1129 	spin_lock_bh(&mfc_unres_lock);
1130 	list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) {
1131 		if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1132 		    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
1133 			found = true;
1134 			break;
1135 		}
1136 	}
1137 
1138 	if (!found) {
1139 		/*
1140 		 *	Create a new entry if allowable
1141 		 */
1142 
1143 		if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
1144 		    (c = ip6mr_cache_alloc_unres()) == NULL) {
1145 			spin_unlock_bh(&mfc_unres_lock);
1146 
1147 			kfree_skb(skb);
1148 			return -ENOBUFS;
1149 		}
1150 
1151 		/* Fill in the new cache entry */
1152 		c->_c.mfc_parent = -1;
1153 		c->mf6c_origin = ipv6_hdr(skb)->saddr;
1154 		c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
1155 
1156 		/*
1157 		 *	Reflect first query at pim6sd
1158 		 */
1159 		err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1160 		if (err < 0) {
1161 			/* If the report failed throw the cache entry
1162 			   out - Brad Parker
1163 			 */
1164 			spin_unlock_bh(&mfc_unres_lock);
1165 
1166 			ip6mr_cache_free(c);
1167 			kfree_skb(skb);
1168 			return err;
1169 		}
1170 
1171 		atomic_inc(&mrt->cache_resolve_queue_len);
1172 		list_add(&c->_c.list, &mrt->mfc_unres_queue);
1173 		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1174 
1175 		ipmr_do_expire_process(mrt);
1176 	}
1177 
1178 	/* See if we can append the packet */
1179 	if (c->_c.mfc_un.unres.unresolved.qlen > 3) {
1180 		kfree_skb(skb);
1181 		err = -ENOBUFS;
1182 	} else {
1183 		skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb);
1184 		err = 0;
1185 	}
1186 
1187 	spin_unlock_bh(&mfc_unres_lock);
1188 	return err;
1189 }
1190 
1191 /*
1192  *	MFC6 cache manipulation by user space
1193  */
1194 
1195 static int ip6mr_mfc_delete(struct mr_table *mrt, struct mf6cctl *mfc,
1196 			    int parent)
1197 {
1198 	struct mfc6_cache *c;
1199 
1200 	/* The entries are added/deleted only under RTNL */
1201 	rcu_read_lock();
1202 	c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
1203 				    &mfc->mf6cc_mcastgrp.sin6_addr, parent);
1204 	rcu_read_unlock();
1205 	if (!c)
1206 		return -ENOENT;
1207 	rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ip6mr_rht_params);
1208 	list_del_rcu(&c->_c.list);
1209 
1210 	call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
1211 				       FIB_EVENT_ENTRY_DEL, c, mrt->id);
1212 	mr6_netlink_event(mrt, c, RTM_DELROUTE);
1213 	mr_cache_put(&c->_c);
1214 	return 0;
1215 }
1216 
1217 static int ip6mr_device_event(struct notifier_block *this,
1218 			      unsigned long event, void *ptr)
1219 {
1220 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1221 	struct net *net = dev_net(dev);
1222 	struct mr_table *mrt;
1223 	struct vif_device *v;
1224 	int ct;
1225 
1226 	if (event != NETDEV_UNREGISTER)
1227 		return NOTIFY_DONE;
1228 
1229 	ip6mr_for_each_table(mrt, net) {
1230 		v = &mrt->vif_table[0];
1231 		for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1232 			if (v->dev == dev)
1233 				mif6_delete(mrt, ct, 1, NULL);
1234 		}
1235 	}
1236 
1237 	return NOTIFY_DONE;
1238 }
1239 
1240 static unsigned int ip6mr_seq_read(struct net *net)
1241 {
1242 	ASSERT_RTNL();
1243 
1244 	return net->ipv6.ipmr_seq + ip6mr_rules_seq_read(net);
1245 }
1246 
1247 static int ip6mr_dump(struct net *net, struct notifier_block *nb)
1248 {
1249 	return mr_dump(net, nb, RTNL_FAMILY_IP6MR, ip6mr_rules_dump,
1250 		       ip6mr_mr_table_iter, &mrt_lock);
1251 }
1252 
1253 static struct notifier_block ip6_mr_notifier = {
1254 	.notifier_call = ip6mr_device_event
1255 };
1256 
1257 static const struct fib_notifier_ops ip6mr_notifier_ops_template = {
1258 	.family		= RTNL_FAMILY_IP6MR,
1259 	.fib_seq_read	= ip6mr_seq_read,
1260 	.fib_dump	= ip6mr_dump,
1261 	.owner		= THIS_MODULE,
1262 };
1263 
1264 static int __net_init ip6mr_notifier_init(struct net *net)
1265 {
1266 	struct fib_notifier_ops *ops;
1267 
1268 	net->ipv6.ipmr_seq = 0;
1269 
1270 	ops = fib_notifier_ops_register(&ip6mr_notifier_ops_template, net);
1271 	if (IS_ERR(ops))
1272 		return PTR_ERR(ops);
1273 
1274 	net->ipv6.ip6mr_notifier_ops = ops;
1275 
1276 	return 0;
1277 }
1278 
1279 static void __net_exit ip6mr_notifier_exit(struct net *net)
1280 {
1281 	fib_notifier_ops_unregister(net->ipv6.ip6mr_notifier_ops);
1282 	net->ipv6.ip6mr_notifier_ops = NULL;
1283 }
1284 
1285 /* Setup for IP multicast routing */
1286 static int __net_init ip6mr_net_init(struct net *net)
1287 {
1288 	int err;
1289 
1290 	err = ip6mr_notifier_init(net);
1291 	if (err)
1292 		return err;
1293 
1294 	err = ip6mr_rules_init(net);
1295 	if (err < 0)
1296 		goto ip6mr_rules_fail;
1297 
1298 #ifdef CONFIG_PROC_FS
1299 	err = -ENOMEM;
1300 	if (!proc_create_net("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_seq_ops,
1301 			sizeof(struct mr_vif_iter)))
1302 		goto proc_vif_fail;
1303 	if (!proc_create_net("ip6_mr_cache", 0, net->proc_net, &ipmr_mfc_seq_ops,
1304 			sizeof(struct mr_mfc_iter)))
1305 		goto proc_cache_fail;
1306 #endif
1307 
1308 	return 0;
1309 
1310 #ifdef CONFIG_PROC_FS
1311 proc_cache_fail:
1312 	remove_proc_entry("ip6_mr_vif", net->proc_net);
1313 proc_vif_fail:
1314 	ip6mr_rules_exit(net);
1315 #endif
1316 ip6mr_rules_fail:
1317 	ip6mr_notifier_exit(net);
1318 	return err;
1319 }
1320 
1321 static void __net_exit ip6mr_net_exit(struct net *net)
1322 {
1323 #ifdef CONFIG_PROC_FS
1324 	remove_proc_entry("ip6_mr_cache", net->proc_net);
1325 	remove_proc_entry("ip6_mr_vif", net->proc_net);
1326 #endif
1327 	ip6mr_rules_exit(net);
1328 	ip6mr_notifier_exit(net);
1329 }
1330 
1331 static struct pernet_operations ip6mr_net_ops = {
1332 	.init = ip6mr_net_init,
1333 	.exit = ip6mr_net_exit,
1334 };
1335 
1336 int __init ip6_mr_init(void)
1337 {
1338 	int err;
1339 
1340 	mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1341 				       sizeof(struct mfc6_cache),
1342 				       0, SLAB_HWCACHE_ALIGN,
1343 				       NULL);
1344 	if (!mrt_cachep)
1345 		return -ENOMEM;
1346 
1347 	err = register_pernet_subsys(&ip6mr_net_ops);
1348 	if (err)
1349 		goto reg_pernet_fail;
1350 
1351 	err = register_netdevice_notifier(&ip6_mr_notifier);
1352 	if (err)
1353 		goto reg_notif_fail;
1354 #ifdef CONFIG_IPV6_PIMSM_V2
1355 	if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1356 		pr_err("%s: can't add PIM protocol\n", __func__);
1357 		err = -EAGAIN;
1358 		goto add_proto_fail;
1359 	}
1360 #endif
1361 	err = rtnl_register_module(THIS_MODULE, RTNL_FAMILY_IP6MR, RTM_GETROUTE,
1362 				   NULL, ip6mr_rtm_dumproute, 0);
1363 	if (err == 0)
1364 		return 0;
1365 
1366 #ifdef CONFIG_IPV6_PIMSM_V2
1367 	inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1368 add_proto_fail:
1369 	unregister_netdevice_notifier(&ip6_mr_notifier);
1370 #endif
1371 reg_notif_fail:
1372 	unregister_pernet_subsys(&ip6mr_net_ops);
1373 reg_pernet_fail:
1374 	kmem_cache_destroy(mrt_cachep);
1375 	return err;
1376 }
1377 
1378 void ip6_mr_cleanup(void)
1379 {
1380 	rtnl_unregister(RTNL_FAMILY_IP6MR, RTM_GETROUTE);
1381 #ifdef CONFIG_IPV6_PIMSM_V2
1382 	inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1383 #endif
1384 	unregister_netdevice_notifier(&ip6_mr_notifier);
1385 	unregister_pernet_subsys(&ip6mr_net_ops);
1386 	kmem_cache_destroy(mrt_cachep);
1387 }
1388 
1389 static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt,
1390 			 struct mf6cctl *mfc, int mrtsock, int parent)
1391 {
1392 	unsigned char ttls[MAXMIFS];
1393 	struct mfc6_cache *uc, *c;
1394 	struct mr_mfc *_uc;
1395 	bool found;
1396 	int i, err;
1397 
1398 	if (mfc->mf6cc_parent >= MAXMIFS)
1399 		return -ENFILE;
1400 
1401 	memset(ttls, 255, MAXMIFS);
1402 	for (i = 0; i < MAXMIFS; i++) {
1403 		if (IF_ISSET(i, &mfc->mf6cc_ifset))
1404 			ttls[i] = 1;
1405 	}
1406 
1407 	/* The entries are added/deleted only under RTNL */
1408 	rcu_read_lock();
1409 	c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
1410 				    &mfc->mf6cc_mcastgrp.sin6_addr, parent);
1411 	rcu_read_unlock();
1412 	if (c) {
1413 		write_lock_bh(&mrt_lock);
1414 		c->_c.mfc_parent = mfc->mf6cc_parent;
1415 		ip6mr_update_thresholds(mrt, &c->_c, ttls);
1416 		if (!mrtsock)
1417 			c->_c.mfc_flags |= MFC_STATIC;
1418 		write_unlock_bh(&mrt_lock);
1419 		call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE,
1420 					       c, mrt->id);
1421 		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1422 		return 0;
1423 	}
1424 
1425 	if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) &&
1426 	    !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1427 		return -EINVAL;
1428 
1429 	c = ip6mr_cache_alloc();
1430 	if (!c)
1431 		return -ENOMEM;
1432 
1433 	c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1434 	c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1435 	c->_c.mfc_parent = mfc->mf6cc_parent;
1436 	ip6mr_update_thresholds(mrt, &c->_c, ttls);
1437 	if (!mrtsock)
1438 		c->_c.mfc_flags |= MFC_STATIC;
1439 
1440 	err = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode,
1441 				  ip6mr_rht_params);
1442 	if (err) {
1443 		pr_err("ip6mr: rhtable insert error %d\n", err);
1444 		ip6mr_cache_free(c);
1445 		return err;
1446 	}
1447 	list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list);
1448 
1449 	/* Check to see if we resolved a queued list. If so we
1450 	 * need to send on the frames and tidy up.
1451 	 */
1452 	found = false;
1453 	spin_lock_bh(&mfc_unres_lock);
1454 	list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) {
1455 		uc = (struct mfc6_cache *)_uc;
1456 		if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1457 		    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1458 			list_del(&_uc->list);
1459 			atomic_dec(&mrt->cache_resolve_queue_len);
1460 			found = true;
1461 			break;
1462 		}
1463 	}
1464 	if (list_empty(&mrt->mfc_unres_queue))
1465 		del_timer(&mrt->ipmr_expire_timer);
1466 	spin_unlock_bh(&mfc_unres_lock);
1467 
1468 	if (found) {
1469 		ip6mr_cache_resolve(net, mrt, uc, c);
1470 		ip6mr_cache_free(uc);
1471 	}
1472 	call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD,
1473 				       c, mrt->id);
1474 	mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1475 	return 0;
1476 }
1477 
1478 /*
1479  *	Close the multicast socket, and clear the vif tables etc
1480  */
1481 
1482 static void mroute_clean_tables(struct mr_table *mrt, bool all)
1483 {
1484 	struct mr_mfc *c, *tmp;
1485 	LIST_HEAD(list);
1486 	int i;
1487 
1488 	/* Shut down all active vif entries */
1489 	for (i = 0; i < mrt->maxvif; i++) {
1490 		if (!all && (mrt->vif_table[i].flags & VIFF_STATIC))
1491 			continue;
1492 		mif6_delete(mrt, i, 0, &list);
1493 	}
1494 	unregister_netdevice_many(&list);
1495 
1496 	/* Wipe the cache */
1497 	list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
1498 		if (!all && (c->mfc_flags & MFC_STATIC))
1499 			continue;
1500 		rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params);
1501 		list_del_rcu(&c->list);
1502 		mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
1503 		mr_cache_put(c);
1504 	}
1505 
1506 	if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1507 		spin_lock_bh(&mfc_unres_lock);
1508 		list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
1509 			list_del(&c->list);
1510 			call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
1511 						       FIB_EVENT_ENTRY_DEL,
1512 						       (struct mfc6_cache *)c,
1513 						       mrt->id);
1514 			mr6_netlink_event(mrt, (struct mfc6_cache *)c,
1515 					  RTM_DELROUTE);
1516 			ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
1517 		}
1518 		spin_unlock_bh(&mfc_unres_lock);
1519 	}
1520 }
1521 
1522 static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk)
1523 {
1524 	int err = 0;
1525 	struct net *net = sock_net(sk);
1526 
1527 	rtnl_lock();
1528 	write_lock_bh(&mrt_lock);
1529 	if (rtnl_dereference(mrt->mroute_sk)) {
1530 		err = -EADDRINUSE;
1531 	} else {
1532 		rcu_assign_pointer(mrt->mroute_sk, sk);
1533 		sock_set_flag(sk, SOCK_RCU_FREE);
1534 		net->ipv6.devconf_all->mc_forwarding++;
1535 	}
1536 	write_unlock_bh(&mrt_lock);
1537 
1538 	if (!err)
1539 		inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
1540 					     NETCONFA_MC_FORWARDING,
1541 					     NETCONFA_IFINDEX_ALL,
1542 					     net->ipv6.devconf_all);
1543 	rtnl_unlock();
1544 
1545 	return err;
1546 }
1547 
1548 int ip6mr_sk_done(struct sock *sk)
1549 {
1550 	int err = -EACCES;
1551 	struct net *net = sock_net(sk);
1552 	struct mr_table *mrt;
1553 
1554 	if (sk->sk_type != SOCK_RAW ||
1555 	    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1556 		return err;
1557 
1558 	rtnl_lock();
1559 	ip6mr_for_each_table(mrt, net) {
1560 		if (sk == rtnl_dereference(mrt->mroute_sk)) {
1561 			write_lock_bh(&mrt_lock);
1562 			RCU_INIT_POINTER(mrt->mroute_sk, NULL);
1563 			/* Note that mroute_sk had SOCK_RCU_FREE set,
1564 			 * so the RCU grace period before sk freeing
1565 			 * is guaranteed by sk_destruct()
1566 			 */
1567 			net->ipv6.devconf_all->mc_forwarding--;
1568 			write_unlock_bh(&mrt_lock);
1569 			inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
1570 						     NETCONFA_MC_FORWARDING,
1571 						     NETCONFA_IFINDEX_ALL,
1572 						     net->ipv6.devconf_all);
1573 
1574 			mroute_clean_tables(mrt, false);
1575 			err = 0;
1576 			break;
1577 		}
1578 	}
1579 	rtnl_unlock();
1580 
1581 	return err;
1582 }
1583 
1584 bool mroute6_is_socket(struct net *net, struct sk_buff *skb)
1585 {
1586 	struct mr_table *mrt;
1587 	struct flowi6 fl6 = {
1588 		.flowi6_iif	= skb->skb_iif ? : LOOPBACK_IFINDEX,
1589 		.flowi6_oif	= skb->dev->ifindex,
1590 		.flowi6_mark	= skb->mark,
1591 	};
1592 
1593 	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
1594 		return NULL;
1595 
1596 	return rcu_access_pointer(mrt->mroute_sk);
1597 }
1598 EXPORT_SYMBOL(mroute6_is_socket);
1599 
1600 /*
1601  *	Socket options and virtual interface manipulation. The whole
1602  *	virtual interface system is a complete heap, but unfortunately
1603  *	that's how BSD mrouted happens to think. Maybe one day with a proper
1604  *	MOSPF/PIM router set up we can clean this up.
1605  */
1606 
1607 int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1608 {
1609 	int ret, parent = 0;
1610 	struct mif6ctl vif;
1611 	struct mf6cctl mfc;
1612 	mifi_t mifi;
1613 	struct net *net = sock_net(sk);
1614 	struct mr_table *mrt;
1615 
1616 	if (sk->sk_type != SOCK_RAW ||
1617 	    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1618 		return -EOPNOTSUPP;
1619 
1620 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1621 	if (!mrt)
1622 		return -ENOENT;
1623 
1624 	if (optname != MRT6_INIT) {
1625 		if (sk != rcu_access_pointer(mrt->mroute_sk) &&
1626 		    !ns_capable(net->user_ns, CAP_NET_ADMIN))
1627 			return -EACCES;
1628 	}
1629 
1630 	switch (optname) {
1631 	case MRT6_INIT:
1632 		if (optlen < sizeof(int))
1633 			return -EINVAL;
1634 
1635 		return ip6mr_sk_init(mrt, sk);
1636 
1637 	case MRT6_DONE:
1638 		return ip6mr_sk_done(sk);
1639 
1640 	case MRT6_ADD_MIF:
1641 		if (optlen < sizeof(vif))
1642 			return -EINVAL;
1643 		if (copy_from_user(&vif, optval, sizeof(vif)))
1644 			return -EFAULT;
1645 		if (vif.mif6c_mifi >= MAXMIFS)
1646 			return -ENFILE;
1647 		rtnl_lock();
1648 		ret = mif6_add(net, mrt, &vif,
1649 			       sk == rtnl_dereference(mrt->mroute_sk));
1650 		rtnl_unlock();
1651 		return ret;
1652 
1653 	case MRT6_DEL_MIF:
1654 		if (optlen < sizeof(mifi_t))
1655 			return -EINVAL;
1656 		if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1657 			return -EFAULT;
1658 		rtnl_lock();
1659 		ret = mif6_delete(mrt, mifi, 0, NULL);
1660 		rtnl_unlock();
1661 		return ret;
1662 
1663 	/*
1664 	 *	Manipulate the forwarding caches. These live
1665 	 *	in a sort of kernel/user symbiosis.
1666 	 */
1667 	case MRT6_ADD_MFC:
1668 	case MRT6_DEL_MFC:
1669 		parent = -1;
1670 		/* fall through */
1671 	case MRT6_ADD_MFC_PROXY:
1672 	case MRT6_DEL_MFC_PROXY:
1673 		if (optlen < sizeof(mfc))
1674 			return -EINVAL;
1675 		if (copy_from_user(&mfc, optval, sizeof(mfc)))
1676 			return -EFAULT;
1677 		if (parent == 0)
1678 			parent = mfc.mf6cc_parent;
1679 		rtnl_lock();
1680 		if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY)
1681 			ret = ip6mr_mfc_delete(mrt, &mfc, parent);
1682 		else
1683 			ret = ip6mr_mfc_add(net, mrt, &mfc,
1684 					    sk ==
1685 					    rtnl_dereference(mrt->mroute_sk),
1686 					    parent);
1687 		rtnl_unlock();
1688 		return ret;
1689 
1690 	/*
1691 	 *	Control PIM assert (to activate pim will activate assert)
1692 	 */
1693 	case MRT6_ASSERT:
1694 	{
1695 		int v;
1696 
1697 		if (optlen != sizeof(v))
1698 			return -EINVAL;
1699 		if (get_user(v, (int __user *)optval))
1700 			return -EFAULT;
1701 		mrt->mroute_do_assert = v;
1702 		return 0;
1703 	}
1704 
1705 #ifdef CONFIG_IPV6_PIMSM_V2
1706 	case MRT6_PIM:
1707 	{
1708 		int v;
1709 
1710 		if (optlen != sizeof(v))
1711 			return -EINVAL;
1712 		if (get_user(v, (int __user *)optval))
1713 			return -EFAULT;
1714 		v = !!v;
1715 		rtnl_lock();
1716 		ret = 0;
1717 		if (v != mrt->mroute_do_pim) {
1718 			mrt->mroute_do_pim = v;
1719 			mrt->mroute_do_assert = v;
1720 		}
1721 		rtnl_unlock();
1722 		return ret;
1723 	}
1724 
1725 #endif
1726 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1727 	case MRT6_TABLE:
1728 	{
1729 		u32 v;
1730 
1731 		if (optlen != sizeof(u32))
1732 			return -EINVAL;
1733 		if (get_user(v, (u32 __user *)optval))
1734 			return -EFAULT;
1735 		/* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
1736 		if (v != RT_TABLE_DEFAULT && v >= 100000000)
1737 			return -EINVAL;
1738 		if (sk == rcu_access_pointer(mrt->mroute_sk))
1739 			return -EBUSY;
1740 
1741 		rtnl_lock();
1742 		ret = 0;
1743 		mrt = ip6mr_new_table(net, v);
1744 		if (IS_ERR(mrt))
1745 			ret = PTR_ERR(mrt);
1746 		else
1747 			raw6_sk(sk)->ip6mr_table = v;
1748 		rtnl_unlock();
1749 		return ret;
1750 	}
1751 #endif
1752 	/*
1753 	 *	Spurious command, or MRT6_VERSION which you cannot
1754 	 *	set.
1755 	 */
1756 	default:
1757 		return -ENOPROTOOPT;
1758 	}
1759 }
1760 
1761 /*
1762  *	Getsock opt support for the multicast routing system.
1763  */
1764 
1765 int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1766 			  int __user *optlen)
1767 {
1768 	int olr;
1769 	int val;
1770 	struct net *net = sock_net(sk);
1771 	struct mr_table *mrt;
1772 
1773 	if (sk->sk_type != SOCK_RAW ||
1774 	    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1775 		return -EOPNOTSUPP;
1776 
1777 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1778 	if (!mrt)
1779 		return -ENOENT;
1780 
1781 	switch (optname) {
1782 	case MRT6_VERSION:
1783 		val = 0x0305;
1784 		break;
1785 #ifdef CONFIG_IPV6_PIMSM_V2
1786 	case MRT6_PIM:
1787 		val = mrt->mroute_do_pim;
1788 		break;
1789 #endif
1790 	case MRT6_ASSERT:
1791 		val = mrt->mroute_do_assert;
1792 		break;
1793 	default:
1794 		return -ENOPROTOOPT;
1795 	}
1796 
1797 	if (get_user(olr, optlen))
1798 		return -EFAULT;
1799 
1800 	olr = min_t(int, olr, sizeof(int));
1801 	if (olr < 0)
1802 		return -EINVAL;
1803 
1804 	if (put_user(olr, optlen))
1805 		return -EFAULT;
1806 	if (copy_to_user(optval, &val, olr))
1807 		return -EFAULT;
1808 	return 0;
1809 }
1810 
1811 /*
1812  *	The IP multicast ioctl support routines.
1813  */
1814 
1815 int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1816 {
1817 	struct sioc_sg_req6 sr;
1818 	struct sioc_mif_req6 vr;
1819 	struct vif_device *vif;
1820 	struct mfc6_cache *c;
1821 	struct net *net = sock_net(sk);
1822 	struct mr_table *mrt;
1823 
1824 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1825 	if (!mrt)
1826 		return -ENOENT;
1827 
1828 	switch (cmd) {
1829 	case SIOCGETMIFCNT_IN6:
1830 		if (copy_from_user(&vr, arg, sizeof(vr)))
1831 			return -EFAULT;
1832 		if (vr.mifi >= mrt->maxvif)
1833 			return -EINVAL;
1834 		read_lock(&mrt_lock);
1835 		vif = &mrt->vif_table[vr.mifi];
1836 		if (VIF_EXISTS(mrt, vr.mifi)) {
1837 			vr.icount = vif->pkt_in;
1838 			vr.ocount = vif->pkt_out;
1839 			vr.ibytes = vif->bytes_in;
1840 			vr.obytes = vif->bytes_out;
1841 			read_unlock(&mrt_lock);
1842 
1843 			if (copy_to_user(arg, &vr, sizeof(vr)))
1844 				return -EFAULT;
1845 			return 0;
1846 		}
1847 		read_unlock(&mrt_lock);
1848 		return -EADDRNOTAVAIL;
1849 	case SIOCGETSGCNT_IN6:
1850 		if (copy_from_user(&sr, arg, sizeof(sr)))
1851 			return -EFAULT;
1852 
1853 		rcu_read_lock();
1854 		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1855 		if (c) {
1856 			sr.pktcnt = c->_c.mfc_un.res.pkt;
1857 			sr.bytecnt = c->_c.mfc_un.res.bytes;
1858 			sr.wrong_if = c->_c.mfc_un.res.wrong_if;
1859 			rcu_read_unlock();
1860 
1861 			if (copy_to_user(arg, &sr, sizeof(sr)))
1862 				return -EFAULT;
1863 			return 0;
1864 		}
1865 		rcu_read_unlock();
1866 		return -EADDRNOTAVAIL;
1867 	default:
1868 		return -ENOIOCTLCMD;
1869 	}
1870 }
1871 
1872 #ifdef CONFIG_COMPAT
1873 struct compat_sioc_sg_req6 {
1874 	struct sockaddr_in6 src;
1875 	struct sockaddr_in6 grp;
1876 	compat_ulong_t pktcnt;
1877 	compat_ulong_t bytecnt;
1878 	compat_ulong_t wrong_if;
1879 };
1880 
1881 struct compat_sioc_mif_req6 {
1882 	mifi_t	mifi;
1883 	compat_ulong_t icount;
1884 	compat_ulong_t ocount;
1885 	compat_ulong_t ibytes;
1886 	compat_ulong_t obytes;
1887 };
1888 
1889 int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1890 {
1891 	struct compat_sioc_sg_req6 sr;
1892 	struct compat_sioc_mif_req6 vr;
1893 	struct vif_device *vif;
1894 	struct mfc6_cache *c;
1895 	struct net *net = sock_net(sk);
1896 	struct mr_table *mrt;
1897 
1898 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1899 	if (!mrt)
1900 		return -ENOENT;
1901 
1902 	switch (cmd) {
1903 	case SIOCGETMIFCNT_IN6:
1904 		if (copy_from_user(&vr, arg, sizeof(vr)))
1905 			return -EFAULT;
1906 		if (vr.mifi >= mrt->maxvif)
1907 			return -EINVAL;
1908 		read_lock(&mrt_lock);
1909 		vif = &mrt->vif_table[vr.mifi];
1910 		if (VIF_EXISTS(mrt, vr.mifi)) {
1911 			vr.icount = vif->pkt_in;
1912 			vr.ocount = vif->pkt_out;
1913 			vr.ibytes = vif->bytes_in;
1914 			vr.obytes = vif->bytes_out;
1915 			read_unlock(&mrt_lock);
1916 
1917 			if (copy_to_user(arg, &vr, sizeof(vr)))
1918 				return -EFAULT;
1919 			return 0;
1920 		}
1921 		read_unlock(&mrt_lock);
1922 		return -EADDRNOTAVAIL;
1923 	case SIOCGETSGCNT_IN6:
1924 		if (copy_from_user(&sr, arg, sizeof(sr)))
1925 			return -EFAULT;
1926 
1927 		rcu_read_lock();
1928 		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1929 		if (c) {
1930 			sr.pktcnt = c->_c.mfc_un.res.pkt;
1931 			sr.bytecnt = c->_c.mfc_un.res.bytes;
1932 			sr.wrong_if = c->_c.mfc_un.res.wrong_if;
1933 			rcu_read_unlock();
1934 
1935 			if (copy_to_user(arg, &sr, sizeof(sr)))
1936 				return -EFAULT;
1937 			return 0;
1938 		}
1939 		rcu_read_unlock();
1940 		return -EADDRNOTAVAIL;
1941 	default:
1942 		return -ENOIOCTLCMD;
1943 	}
1944 }
1945 #endif
1946 
1947 static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
1948 {
1949 	__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
1950 			IPSTATS_MIB_OUTFORWDATAGRAMS);
1951 	__IP6_ADD_STATS(net, ip6_dst_idev(skb_dst(skb)),
1952 			IPSTATS_MIB_OUTOCTETS, skb->len);
1953 	return dst_output(net, sk, skb);
1954 }
1955 
1956 /*
1957  *	Processing handlers for ip6mr_forward
1958  */
1959 
1960 static int ip6mr_forward2(struct net *net, struct mr_table *mrt,
1961 			  struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1962 {
1963 	struct ipv6hdr *ipv6h;
1964 	struct vif_device *vif = &mrt->vif_table[vifi];
1965 	struct net_device *dev;
1966 	struct dst_entry *dst;
1967 	struct flowi6 fl6;
1968 
1969 	if (!vif->dev)
1970 		goto out_free;
1971 
1972 #ifdef CONFIG_IPV6_PIMSM_V2
1973 	if (vif->flags & MIFF_REGISTER) {
1974 		vif->pkt_out++;
1975 		vif->bytes_out += skb->len;
1976 		vif->dev->stats.tx_bytes += skb->len;
1977 		vif->dev->stats.tx_packets++;
1978 		ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
1979 		goto out_free;
1980 	}
1981 #endif
1982 
1983 	ipv6h = ipv6_hdr(skb);
1984 
1985 	fl6 = (struct flowi6) {
1986 		.flowi6_oif = vif->link,
1987 		.daddr = ipv6h->daddr,
1988 	};
1989 
1990 	dst = ip6_route_output(net, NULL, &fl6);
1991 	if (dst->error) {
1992 		dst_release(dst);
1993 		goto out_free;
1994 	}
1995 
1996 	skb_dst_drop(skb);
1997 	skb_dst_set(skb, dst);
1998 
1999 	/*
2000 	 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
2001 	 * not only before forwarding, but after forwarding on all output
2002 	 * interfaces. It is clear, if mrouter runs a multicasting
2003 	 * program, it should receive packets not depending to what interface
2004 	 * program is joined.
2005 	 * If we will not make it, the program will have to join on all
2006 	 * interfaces. On the other hand, multihoming host (or router, but
2007 	 * not mrouter) cannot join to more than one interface - it will
2008 	 * result in receiving multiple packets.
2009 	 */
2010 	dev = vif->dev;
2011 	skb->dev = dev;
2012 	vif->pkt_out++;
2013 	vif->bytes_out += skb->len;
2014 
2015 	/* We are about to write */
2016 	/* XXX: extension headers? */
2017 	if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
2018 		goto out_free;
2019 
2020 	ipv6h = ipv6_hdr(skb);
2021 	ipv6h->hop_limit--;
2022 
2023 	IP6CB(skb)->flags |= IP6SKB_FORWARDED;
2024 
2025 	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
2026 		       net, NULL, skb, skb->dev, dev,
2027 		       ip6mr_forward2_finish);
2028 
2029 out_free:
2030 	kfree_skb(skb);
2031 	return 0;
2032 }
2033 
2034 static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev)
2035 {
2036 	int ct;
2037 
2038 	for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
2039 		if (mrt->vif_table[ct].dev == dev)
2040 			break;
2041 	}
2042 	return ct;
2043 }
2044 
2045 static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
2046 			   struct sk_buff *skb, struct mfc6_cache *c)
2047 {
2048 	int psend = -1;
2049 	int vif, ct;
2050 	int true_vifi = ip6mr_find_vif(mrt, skb->dev);
2051 
2052 	vif = c->_c.mfc_parent;
2053 	c->_c.mfc_un.res.pkt++;
2054 	c->_c.mfc_un.res.bytes += skb->len;
2055 	c->_c.mfc_un.res.lastuse = jiffies;
2056 
2057 	if (ipv6_addr_any(&c->mf6c_origin) && true_vifi >= 0) {
2058 		struct mfc6_cache *cache_proxy;
2059 
2060 		/* For an (*,G) entry, we only check that the incoming
2061 		 * interface is part of the static tree.
2062 		 */
2063 		rcu_read_lock();
2064 		cache_proxy = mr_mfc_find_any_parent(mrt, vif);
2065 		if (cache_proxy &&
2066 		    cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255) {
2067 			rcu_read_unlock();
2068 			goto forward;
2069 		}
2070 		rcu_read_unlock();
2071 	}
2072 
2073 	/*
2074 	 * Wrong interface: drop packet and (maybe) send PIM assert.
2075 	 */
2076 	if (mrt->vif_table[vif].dev != skb->dev) {
2077 		c->_c.mfc_un.res.wrong_if++;
2078 
2079 		if (true_vifi >= 0 && mrt->mroute_do_assert &&
2080 		    /* pimsm uses asserts, when switching from RPT to SPT,
2081 		       so that we cannot check that packet arrived on an oif.
2082 		       It is bad, but otherwise we would need to move pretty
2083 		       large chunk of pimd to kernel. Ough... --ANK
2084 		     */
2085 		    (mrt->mroute_do_pim ||
2086 		     c->_c.mfc_un.res.ttls[true_vifi] < 255) &&
2087 		    time_after(jiffies,
2088 			       c->_c.mfc_un.res.last_assert +
2089 			       MFC_ASSERT_THRESH)) {
2090 			c->_c.mfc_un.res.last_assert = jiffies;
2091 			ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
2092 		}
2093 		goto dont_forward;
2094 	}
2095 
2096 forward:
2097 	mrt->vif_table[vif].pkt_in++;
2098 	mrt->vif_table[vif].bytes_in += skb->len;
2099 
2100 	/*
2101 	 *	Forward the frame
2102 	 */
2103 	if (ipv6_addr_any(&c->mf6c_origin) &&
2104 	    ipv6_addr_any(&c->mf6c_mcastgrp)) {
2105 		if (true_vifi >= 0 &&
2106 		    true_vifi != c->_c.mfc_parent &&
2107 		    ipv6_hdr(skb)->hop_limit >
2108 				c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
2109 			/* It's an (*,*) entry and the packet is not coming from
2110 			 * the upstream: forward the packet to the upstream
2111 			 * only.
2112 			 */
2113 			psend = c->_c.mfc_parent;
2114 			goto last_forward;
2115 		}
2116 		goto dont_forward;
2117 	}
2118 	for (ct = c->_c.mfc_un.res.maxvif - 1;
2119 	     ct >= c->_c.mfc_un.res.minvif; ct--) {
2120 		/* For (*,G) entry, don't forward to the incoming interface */
2121 		if ((!ipv6_addr_any(&c->mf6c_origin) || ct != true_vifi) &&
2122 		    ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) {
2123 			if (psend != -1) {
2124 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2125 				if (skb2)
2126 					ip6mr_forward2(net, mrt, skb2,
2127 						       c, psend);
2128 			}
2129 			psend = ct;
2130 		}
2131 	}
2132 last_forward:
2133 	if (psend != -1) {
2134 		ip6mr_forward2(net, mrt, skb, c, psend);
2135 		return;
2136 	}
2137 
2138 dont_forward:
2139 	kfree_skb(skb);
2140 }
2141 
2142 
2143 /*
2144  *	Multicast packets for forwarding arrive here
2145  */
2146 
2147 int ip6_mr_input(struct sk_buff *skb)
2148 {
2149 	struct mfc6_cache *cache;
2150 	struct net *net = dev_net(skb->dev);
2151 	struct mr_table *mrt;
2152 	struct flowi6 fl6 = {
2153 		.flowi6_iif	= skb->dev->ifindex,
2154 		.flowi6_mark	= skb->mark,
2155 	};
2156 	int err;
2157 
2158 	err = ip6mr_fib_lookup(net, &fl6, &mrt);
2159 	if (err < 0) {
2160 		kfree_skb(skb);
2161 		return err;
2162 	}
2163 
2164 	read_lock(&mrt_lock);
2165 	cache = ip6mr_cache_find(mrt,
2166 				 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2167 	if (!cache) {
2168 		int vif = ip6mr_find_vif(mrt, skb->dev);
2169 
2170 		if (vif >= 0)
2171 			cache = ip6mr_cache_find_any(mrt,
2172 						     &ipv6_hdr(skb)->daddr,
2173 						     vif);
2174 	}
2175 
2176 	/*
2177 	 *	No usable cache entry
2178 	 */
2179 	if (!cache) {
2180 		int vif;
2181 
2182 		vif = ip6mr_find_vif(mrt, skb->dev);
2183 		if (vif >= 0) {
2184 			int err = ip6mr_cache_unresolved(mrt, vif, skb);
2185 			read_unlock(&mrt_lock);
2186 
2187 			return err;
2188 		}
2189 		read_unlock(&mrt_lock);
2190 		kfree_skb(skb);
2191 		return -ENODEV;
2192 	}
2193 
2194 	ip6_mr_forward(net, mrt, skb, cache);
2195 
2196 	read_unlock(&mrt_lock);
2197 
2198 	return 0;
2199 }
2200 
2201 int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
2202 		    u32 portid)
2203 {
2204 	int err;
2205 	struct mr_table *mrt;
2206 	struct mfc6_cache *cache;
2207 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
2208 
2209 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
2210 	if (!mrt)
2211 		return -ENOENT;
2212 
2213 	read_lock(&mrt_lock);
2214 	cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
2215 	if (!cache && skb->dev) {
2216 		int vif = ip6mr_find_vif(mrt, skb->dev);
2217 
2218 		if (vif >= 0)
2219 			cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr,
2220 						     vif);
2221 	}
2222 
2223 	if (!cache) {
2224 		struct sk_buff *skb2;
2225 		struct ipv6hdr *iph;
2226 		struct net_device *dev;
2227 		int vif;
2228 
2229 		dev = skb->dev;
2230 		if (!dev || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2231 			read_unlock(&mrt_lock);
2232 			return -ENODEV;
2233 		}
2234 
2235 		/* really correct? */
2236 		skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
2237 		if (!skb2) {
2238 			read_unlock(&mrt_lock);
2239 			return -ENOMEM;
2240 		}
2241 
2242 		NETLINK_CB(skb2).portid = portid;
2243 		skb_reset_transport_header(skb2);
2244 
2245 		skb_put(skb2, sizeof(struct ipv6hdr));
2246 		skb_reset_network_header(skb2);
2247 
2248 		iph = ipv6_hdr(skb2);
2249 		iph->version = 0;
2250 		iph->priority = 0;
2251 		iph->flow_lbl[0] = 0;
2252 		iph->flow_lbl[1] = 0;
2253 		iph->flow_lbl[2] = 0;
2254 		iph->payload_len = 0;
2255 		iph->nexthdr = IPPROTO_NONE;
2256 		iph->hop_limit = 0;
2257 		iph->saddr = rt->rt6i_src.addr;
2258 		iph->daddr = rt->rt6i_dst.addr;
2259 
2260 		err = ip6mr_cache_unresolved(mrt, vif, skb2);
2261 		read_unlock(&mrt_lock);
2262 
2263 		return err;
2264 	}
2265 
2266 	err = mr_fill_mroute(mrt, skb, &cache->_c, rtm);
2267 	read_unlock(&mrt_lock);
2268 	return err;
2269 }
2270 
2271 static int ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2272 			     u32 portid, u32 seq, struct mfc6_cache *c, int cmd,
2273 			     int flags)
2274 {
2275 	struct nlmsghdr *nlh;
2276 	struct rtmsg *rtm;
2277 	int err;
2278 
2279 	nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags);
2280 	if (!nlh)
2281 		return -EMSGSIZE;
2282 
2283 	rtm = nlmsg_data(nlh);
2284 	rtm->rtm_family   = RTNL_FAMILY_IP6MR;
2285 	rtm->rtm_dst_len  = 128;
2286 	rtm->rtm_src_len  = 128;
2287 	rtm->rtm_tos      = 0;
2288 	rtm->rtm_table    = mrt->id;
2289 	if (nla_put_u32(skb, RTA_TABLE, mrt->id))
2290 		goto nla_put_failure;
2291 	rtm->rtm_type = RTN_MULTICAST;
2292 	rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
2293 	if (c->_c.mfc_flags & MFC_STATIC)
2294 		rtm->rtm_protocol = RTPROT_STATIC;
2295 	else
2296 		rtm->rtm_protocol = RTPROT_MROUTED;
2297 	rtm->rtm_flags    = 0;
2298 
2299 	if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) ||
2300 	    nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp))
2301 		goto nla_put_failure;
2302 	err = mr_fill_mroute(mrt, skb, &c->_c, rtm);
2303 	/* do not break the dump if cache is unresolved */
2304 	if (err < 0 && err != -ENOENT)
2305 		goto nla_put_failure;
2306 
2307 	nlmsg_end(skb, nlh);
2308 	return 0;
2309 
2310 nla_put_failure:
2311 	nlmsg_cancel(skb, nlh);
2312 	return -EMSGSIZE;
2313 }
2314 
2315 static int _ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2316 			      u32 portid, u32 seq, struct mr_mfc *c,
2317 			      int cmd, int flags)
2318 {
2319 	return ip6mr_fill_mroute(mrt, skb, portid, seq, (struct mfc6_cache *)c,
2320 				 cmd, flags);
2321 }
2322 
2323 static int mr6_msgsize(bool unresolved, int maxvif)
2324 {
2325 	size_t len =
2326 		NLMSG_ALIGN(sizeof(struct rtmsg))
2327 		+ nla_total_size(4)	/* RTA_TABLE */
2328 		+ nla_total_size(sizeof(struct in6_addr))	/* RTA_SRC */
2329 		+ nla_total_size(sizeof(struct in6_addr))	/* RTA_DST */
2330 		;
2331 
2332 	if (!unresolved)
2333 		len = len
2334 		      + nla_total_size(4)	/* RTA_IIF */
2335 		      + nla_total_size(0)	/* RTA_MULTIPATH */
2336 		      + maxvif * NLA_ALIGN(sizeof(struct rtnexthop))
2337 						/* RTA_MFC_STATS */
2338 		      + nla_total_size_64bit(sizeof(struct rta_mfc_stats))
2339 		;
2340 
2341 	return len;
2342 }
2343 
2344 static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
2345 			      int cmd)
2346 {
2347 	struct net *net = read_pnet(&mrt->net);
2348 	struct sk_buff *skb;
2349 	int err = -ENOBUFS;
2350 
2351 	skb = nlmsg_new(mr6_msgsize(mfc->_c.mfc_parent >= MAXMIFS, mrt->maxvif),
2352 			GFP_ATOMIC);
2353 	if (!skb)
2354 		goto errout;
2355 
2356 	err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0);
2357 	if (err < 0)
2358 		goto errout;
2359 
2360 	rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC);
2361 	return;
2362 
2363 errout:
2364 	kfree_skb(skb);
2365 	if (err < 0)
2366 		rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
2367 }
2368 
2369 static size_t mrt6msg_netlink_msgsize(size_t payloadlen)
2370 {
2371 	size_t len =
2372 		NLMSG_ALIGN(sizeof(struct rtgenmsg))
2373 		+ nla_total_size(1)	/* IP6MRA_CREPORT_MSGTYPE */
2374 		+ nla_total_size(4)	/* IP6MRA_CREPORT_MIF_ID */
2375 					/* IP6MRA_CREPORT_SRC_ADDR */
2376 		+ nla_total_size(sizeof(struct in6_addr))
2377 					/* IP6MRA_CREPORT_DST_ADDR */
2378 		+ nla_total_size(sizeof(struct in6_addr))
2379 					/* IP6MRA_CREPORT_PKT */
2380 		+ nla_total_size(payloadlen)
2381 		;
2382 
2383 	return len;
2384 }
2385 
2386 static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt)
2387 {
2388 	struct net *net = read_pnet(&mrt->net);
2389 	struct nlmsghdr *nlh;
2390 	struct rtgenmsg *rtgenm;
2391 	struct mrt6msg *msg;
2392 	struct sk_buff *skb;
2393 	struct nlattr *nla;
2394 	int payloadlen;
2395 
2396 	payloadlen = pkt->len - sizeof(struct mrt6msg);
2397 	msg = (struct mrt6msg *)skb_transport_header(pkt);
2398 
2399 	skb = nlmsg_new(mrt6msg_netlink_msgsize(payloadlen), GFP_ATOMIC);
2400 	if (!skb)
2401 		goto errout;
2402 
2403 	nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT,
2404 			sizeof(struct rtgenmsg), 0);
2405 	if (!nlh)
2406 		goto errout;
2407 	rtgenm = nlmsg_data(nlh);
2408 	rtgenm->rtgen_family = RTNL_FAMILY_IP6MR;
2409 	if (nla_put_u8(skb, IP6MRA_CREPORT_MSGTYPE, msg->im6_msgtype) ||
2410 	    nla_put_u32(skb, IP6MRA_CREPORT_MIF_ID, msg->im6_mif) ||
2411 	    nla_put_in6_addr(skb, IP6MRA_CREPORT_SRC_ADDR,
2412 			     &msg->im6_src) ||
2413 	    nla_put_in6_addr(skb, IP6MRA_CREPORT_DST_ADDR,
2414 			     &msg->im6_dst))
2415 		goto nla_put_failure;
2416 
2417 	nla = nla_reserve(skb, IP6MRA_CREPORT_PKT, payloadlen);
2418 	if (!nla || skb_copy_bits(pkt, sizeof(struct mrt6msg),
2419 				  nla_data(nla), payloadlen))
2420 		goto nla_put_failure;
2421 
2422 	nlmsg_end(skb, nlh);
2423 
2424 	rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE_R, NULL, GFP_ATOMIC);
2425 	return;
2426 
2427 nla_put_failure:
2428 	nlmsg_cancel(skb, nlh);
2429 errout:
2430 	kfree_skb(skb);
2431 	rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE_R, -ENOBUFS);
2432 }
2433 
2434 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2435 {
2436 	return mr_rtm_dumproute(skb, cb, ip6mr_mr_table_iter,
2437 				_ip6mr_fill_mroute, &mfc_unres_lock);
2438 }
2439