xref: /openbmc/linux/net/ipv6/ip6mr.c (revision 6396bb221514d2876fd6dc0aa2a1f240d99b37bb)
1 /*
2  *	Linux IPv6 multicast routing support for BSD pim6sd
3  *	Based on net/ipv4/ipmr.c.
4  *
5  *	(c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
6  *		LSIIT Laboratory, Strasbourg, France
7  *	(c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
8  *		6WIND, Paris, France
9  *	Copyright (C)2007,2008 USAGI/WIDE Project
10  *		YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
11  *
12  *	This program is free software; you can redistribute it and/or
13  *	modify it under the terms of the GNU General Public License
14  *	as published by the Free Software Foundation; either version
15  *	2 of the License, or (at your option) any later version.
16  *
17  */
18 
19 #include <linux/uaccess.h>
20 #include <linux/types.h>
21 #include <linux/sched.h>
22 #include <linux/errno.h>
23 #include <linux/mm.h>
24 #include <linux/kernel.h>
25 #include <linux/fcntl.h>
26 #include <linux/stat.h>
27 #include <linux/socket.h>
28 #include <linux/inet.h>
29 #include <linux/netdevice.h>
30 #include <linux/inetdevice.h>
31 #include <linux/proc_fs.h>
32 #include <linux/seq_file.h>
33 #include <linux/init.h>
34 #include <linux/compat.h>
35 #include <net/protocol.h>
36 #include <linux/skbuff.h>
37 #include <net/raw.h>
38 #include <linux/notifier.h>
39 #include <linux/if_arp.h>
40 #include <net/checksum.h>
41 #include <net/netlink.h>
42 #include <net/fib_rules.h>
43 
44 #include <net/ipv6.h>
45 #include <net/ip6_route.h>
46 #include <linux/mroute6.h>
47 #include <linux/pim.h>
48 #include <net/addrconf.h>
49 #include <linux/netfilter_ipv6.h>
50 #include <linux/export.h>
51 #include <net/ip6_checksum.h>
52 #include <linux/netconf.h>
53 
54 struct ip6mr_rule {
55 	struct fib_rule		common;
56 };
57 
58 struct ip6mr_result {
59 	struct mr_table	*mrt;
60 };
61 
62 /* Big lock, protecting vif table, mrt cache and mroute socket state.
63    Note that the changes are semaphored via rtnl_lock.
64  */
65 
66 static DEFINE_RWLOCK(mrt_lock);
67 
68 /* Multicast router control variables */
69 
70 /* Special spinlock for queue of unresolved entries */
71 static DEFINE_SPINLOCK(mfc_unres_lock);
72 
73 /* We return to original Alan's scheme. Hash table of resolved
74    entries is changed only in process context and protected
75    with weak lock mrt_lock. Queue of unresolved entries is protected
76    with strong spinlock mfc_unres_lock.
77 
78    In this case data path is free of exclusive locks at all.
79  */
80 
81 static struct kmem_cache *mrt_cachep __read_mostly;
82 
83 static struct mr_table *ip6mr_new_table(struct net *net, u32 id);
84 static void ip6mr_free_table(struct mr_table *mrt);
85 
86 static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
87 			   struct sk_buff *skb, struct mfc6_cache *cache);
88 static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
89 			      mifi_t mifi, int assert);
90 static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
91 			      int cmd);
92 static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt);
93 static int ip6mr_rtm_dumproute(struct sk_buff *skb,
94 			       struct netlink_callback *cb);
95 static void mroute_clean_tables(struct mr_table *mrt, bool all);
96 static void ipmr_expire_process(struct timer_list *t);
97 
98 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
99 #define ip6mr_for_each_table(mrt, net) \
100 	list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
101 
102 static struct mr_table *ip6mr_mr_table_iter(struct net *net,
103 					    struct mr_table *mrt)
104 {
105 	struct mr_table *ret;
106 
107 	if (!mrt)
108 		ret = list_entry_rcu(net->ipv6.mr6_tables.next,
109 				     struct mr_table, list);
110 	else
111 		ret = list_entry_rcu(mrt->list.next,
112 				     struct mr_table, list);
113 
114 	if (&ret->list == &net->ipv6.mr6_tables)
115 		return NULL;
116 	return ret;
117 }
118 
119 static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
120 {
121 	struct mr_table *mrt;
122 
123 	ip6mr_for_each_table(mrt, net) {
124 		if (mrt->id == id)
125 			return mrt;
126 	}
127 	return NULL;
128 }
129 
130 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
131 			    struct mr_table **mrt)
132 {
133 	int err;
134 	struct ip6mr_result res;
135 	struct fib_lookup_arg arg = {
136 		.result = &res,
137 		.flags = FIB_LOOKUP_NOREF,
138 	};
139 
140 	err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
141 			       flowi6_to_flowi(flp6), 0, &arg);
142 	if (err < 0)
143 		return err;
144 	*mrt = res.mrt;
145 	return 0;
146 }
147 
148 static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
149 			     int flags, struct fib_lookup_arg *arg)
150 {
151 	struct ip6mr_result *res = arg->result;
152 	struct mr_table *mrt;
153 
154 	switch (rule->action) {
155 	case FR_ACT_TO_TBL:
156 		break;
157 	case FR_ACT_UNREACHABLE:
158 		return -ENETUNREACH;
159 	case FR_ACT_PROHIBIT:
160 		return -EACCES;
161 	case FR_ACT_BLACKHOLE:
162 	default:
163 		return -EINVAL;
164 	}
165 
166 	mrt = ip6mr_get_table(rule->fr_net, rule->table);
167 	if (!mrt)
168 		return -EAGAIN;
169 	res->mrt = mrt;
170 	return 0;
171 }
172 
173 static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
174 {
175 	return 1;
176 }
177 
178 static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = {
179 	FRA_GENERIC_POLICY,
180 };
181 
182 static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
183 				struct fib_rule_hdr *frh, struct nlattr **tb,
184 				struct netlink_ext_ack *extack)
185 {
186 	return 0;
187 }
188 
189 static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
190 			      struct nlattr **tb)
191 {
192 	return 1;
193 }
194 
195 static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
196 			   struct fib_rule_hdr *frh)
197 {
198 	frh->dst_len = 0;
199 	frh->src_len = 0;
200 	frh->tos     = 0;
201 	return 0;
202 }
203 
204 static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
205 	.family		= RTNL_FAMILY_IP6MR,
206 	.rule_size	= sizeof(struct ip6mr_rule),
207 	.addr_size	= sizeof(struct in6_addr),
208 	.action		= ip6mr_rule_action,
209 	.match		= ip6mr_rule_match,
210 	.configure	= ip6mr_rule_configure,
211 	.compare	= ip6mr_rule_compare,
212 	.fill		= ip6mr_rule_fill,
213 	.nlgroup	= RTNLGRP_IPV6_RULE,
214 	.policy		= ip6mr_rule_policy,
215 	.owner		= THIS_MODULE,
216 };
217 
218 static int __net_init ip6mr_rules_init(struct net *net)
219 {
220 	struct fib_rules_ops *ops;
221 	struct mr_table *mrt;
222 	int err;
223 
224 	ops = fib_rules_register(&ip6mr_rules_ops_template, net);
225 	if (IS_ERR(ops))
226 		return PTR_ERR(ops);
227 
228 	INIT_LIST_HEAD(&net->ipv6.mr6_tables);
229 
230 	mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
231 	if (IS_ERR(mrt)) {
232 		err = PTR_ERR(mrt);
233 		goto err1;
234 	}
235 
236 	err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
237 	if (err < 0)
238 		goto err2;
239 
240 	net->ipv6.mr6_rules_ops = ops;
241 	return 0;
242 
243 err2:
244 	ip6mr_free_table(mrt);
245 err1:
246 	fib_rules_unregister(ops);
247 	return err;
248 }
249 
250 static void __net_exit ip6mr_rules_exit(struct net *net)
251 {
252 	struct mr_table *mrt, *next;
253 
254 	rtnl_lock();
255 	list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
256 		list_del(&mrt->list);
257 		ip6mr_free_table(mrt);
258 	}
259 	fib_rules_unregister(net->ipv6.mr6_rules_ops);
260 	rtnl_unlock();
261 }
262 
263 static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb)
264 {
265 	return fib_rules_dump(net, nb, RTNL_FAMILY_IP6MR);
266 }
267 
268 static unsigned int ip6mr_rules_seq_read(struct net *net)
269 {
270 	return fib_rules_seq_read(net, RTNL_FAMILY_IP6MR);
271 }
272 
273 bool ip6mr_rule_default(const struct fib_rule *rule)
274 {
275 	return fib_rule_matchall(rule) && rule->action == FR_ACT_TO_TBL &&
276 	       rule->table == RT6_TABLE_DFLT && !rule->l3mdev;
277 }
278 EXPORT_SYMBOL(ip6mr_rule_default);
279 #else
280 #define ip6mr_for_each_table(mrt, net) \
281 	for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
282 
283 static struct mr_table *ip6mr_mr_table_iter(struct net *net,
284 					    struct mr_table *mrt)
285 {
286 	if (!mrt)
287 		return net->ipv6.mrt6;
288 	return NULL;
289 }
290 
291 static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
292 {
293 	return net->ipv6.mrt6;
294 }
295 
296 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
297 			    struct mr_table **mrt)
298 {
299 	*mrt = net->ipv6.mrt6;
300 	return 0;
301 }
302 
303 static int __net_init ip6mr_rules_init(struct net *net)
304 {
305 	struct mr_table *mrt;
306 
307 	mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
308 	if (IS_ERR(mrt))
309 		return PTR_ERR(mrt);
310 	net->ipv6.mrt6 = mrt;
311 	return 0;
312 }
313 
314 static void __net_exit ip6mr_rules_exit(struct net *net)
315 {
316 	rtnl_lock();
317 	ip6mr_free_table(net->ipv6.mrt6);
318 	net->ipv6.mrt6 = NULL;
319 	rtnl_unlock();
320 }
321 
322 static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb)
323 {
324 	return 0;
325 }
326 
327 static unsigned int ip6mr_rules_seq_read(struct net *net)
328 {
329 	return 0;
330 }
331 #endif
332 
333 static int ip6mr_hash_cmp(struct rhashtable_compare_arg *arg,
334 			  const void *ptr)
335 {
336 	const struct mfc6_cache_cmp_arg *cmparg = arg->key;
337 	struct mfc6_cache *c = (struct mfc6_cache *)ptr;
338 
339 	return !ipv6_addr_equal(&c->mf6c_mcastgrp, &cmparg->mf6c_mcastgrp) ||
340 	       !ipv6_addr_equal(&c->mf6c_origin, &cmparg->mf6c_origin);
341 }
342 
343 static const struct rhashtable_params ip6mr_rht_params = {
344 	.head_offset = offsetof(struct mr_mfc, mnode),
345 	.key_offset = offsetof(struct mfc6_cache, cmparg),
346 	.key_len = sizeof(struct mfc6_cache_cmp_arg),
347 	.nelem_hint = 3,
348 	.locks_mul = 1,
349 	.obj_cmpfn = ip6mr_hash_cmp,
350 	.automatic_shrinking = true,
351 };
352 
353 static void ip6mr_new_table_set(struct mr_table *mrt,
354 				struct net *net)
355 {
356 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
357 	list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
358 #endif
359 }
360 
361 static struct mfc6_cache_cmp_arg ip6mr_mr_table_ops_cmparg_any = {
362 	.mf6c_origin = IN6ADDR_ANY_INIT,
363 	.mf6c_mcastgrp = IN6ADDR_ANY_INIT,
364 };
365 
366 static struct mr_table_ops ip6mr_mr_table_ops = {
367 	.rht_params = &ip6mr_rht_params,
368 	.cmparg_any = &ip6mr_mr_table_ops_cmparg_any,
369 };
370 
371 static struct mr_table *ip6mr_new_table(struct net *net, u32 id)
372 {
373 	struct mr_table *mrt;
374 
375 	mrt = ip6mr_get_table(net, id);
376 	if (mrt)
377 		return mrt;
378 
379 	return mr_table_alloc(net, id, &ip6mr_mr_table_ops,
380 			      ipmr_expire_process, ip6mr_new_table_set);
381 }
382 
383 static void ip6mr_free_table(struct mr_table *mrt)
384 {
385 	del_timer_sync(&mrt->ipmr_expire_timer);
386 	mroute_clean_tables(mrt, true);
387 	rhltable_destroy(&mrt->mfc_hash);
388 	kfree(mrt);
389 }
390 
391 #ifdef CONFIG_PROC_FS
392 /* The /proc interfaces to multicast routing
393  * /proc/ip6_mr_cache /proc/ip6_mr_vif
394  */
395 
396 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
397 	__acquires(mrt_lock)
398 {
399 	struct mr_vif_iter *iter = seq->private;
400 	struct net *net = seq_file_net(seq);
401 	struct mr_table *mrt;
402 
403 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
404 	if (!mrt)
405 		return ERR_PTR(-ENOENT);
406 
407 	iter->mrt = mrt;
408 
409 	read_lock(&mrt_lock);
410 	return mr_vif_seq_start(seq, pos);
411 }
412 
413 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
414 	__releases(mrt_lock)
415 {
416 	read_unlock(&mrt_lock);
417 }
418 
419 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
420 {
421 	struct mr_vif_iter *iter = seq->private;
422 	struct mr_table *mrt = iter->mrt;
423 
424 	if (v == SEQ_START_TOKEN) {
425 		seq_puts(seq,
426 			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
427 	} else {
428 		const struct vif_device *vif = v;
429 		const char *name = vif->dev ? vif->dev->name : "none";
430 
431 		seq_printf(seq,
432 			   "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
433 			   vif - mrt->vif_table,
434 			   name, vif->bytes_in, vif->pkt_in,
435 			   vif->bytes_out, vif->pkt_out,
436 			   vif->flags);
437 	}
438 	return 0;
439 }
440 
441 static const struct seq_operations ip6mr_vif_seq_ops = {
442 	.start = ip6mr_vif_seq_start,
443 	.next  = mr_vif_seq_next,
444 	.stop  = ip6mr_vif_seq_stop,
445 	.show  = ip6mr_vif_seq_show,
446 };
447 
448 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
449 {
450 	struct net *net = seq_file_net(seq);
451 	struct mr_table *mrt;
452 
453 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
454 	if (!mrt)
455 		return ERR_PTR(-ENOENT);
456 
457 	return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock);
458 }
459 
460 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
461 {
462 	int n;
463 
464 	if (v == SEQ_START_TOKEN) {
465 		seq_puts(seq,
466 			 "Group                            "
467 			 "Origin                           "
468 			 "Iif      Pkts  Bytes     Wrong  Oifs\n");
469 	} else {
470 		const struct mfc6_cache *mfc = v;
471 		const struct mr_mfc_iter *it = seq->private;
472 		struct mr_table *mrt = it->mrt;
473 
474 		seq_printf(seq, "%pI6 %pI6 %-3hd",
475 			   &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
476 			   mfc->_c.mfc_parent);
477 
478 		if (it->cache != &mrt->mfc_unres_queue) {
479 			seq_printf(seq, " %8lu %8lu %8lu",
480 				   mfc->_c.mfc_un.res.pkt,
481 				   mfc->_c.mfc_un.res.bytes,
482 				   mfc->_c.mfc_un.res.wrong_if);
483 			for (n = mfc->_c.mfc_un.res.minvif;
484 			     n < mfc->_c.mfc_un.res.maxvif; n++) {
485 				if (VIF_EXISTS(mrt, n) &&
486 				    mfc->_c.mfc_un.res.ttls[n] < 255)
487 					seq_printf(seq,
488 						   " %2d:%-3d", n,
489 						   mfc->_c.mfc_un.res.ttls[n]);
490 			}
491 		} else {
492 			/* unresolved mfc_caches don't contain
493 			 * pkt, bytes and wrong_if values
494 			 */
495 			seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
496 		}
497 		seq_putc(seq, '\n');
498 	}
499 	return 0;
500 }
501 
502 static const struct seq_operations ipmr_mfc_seq_ops = {
503 	.start = ipmr_mfc_seq_start,
504 	.next  = mr_mfc_seq_next,
505 	.stop  = mr_mfc_seq_stop,
506 	.show  = ipmr_mfc_seq_show,
507 };
508 #endif
509 
510 #ifdef CONFIG_IPV6_PIMSM_V2
511 
512 static int pim6_rcv(struct sk_buff *skb)
513 {
514 	struct pimreghdr *pim;
515 	struct ipv6hdr   *encap;
516 	struct net_device  *reg_dev = NULL;
517 	struct net *net = dev_net(skb->dev);
518 	struct mr_table *mrt;
519 	struct flowi6 fl6 = {
520 		.flowi6_iif	= skb->dev->ifindex,
521 		.flowi6_mark	= skb->mark,
522 	};
523 	int reg_vif_num;
524 
525 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
526 		goto drop;
527 
528 	pim = (struct pimreghdr *)skb_transport_header(skb);
529 	if (pim->type != ((PIM_VERSION << 4) | PIM_TYPE_REGISTER) ||
530 	    (pim->flags & PIM_NULL_REGISTER) ||
531 	    (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
532 			     sizeof(*pim), IPPROTO_PIM,
533 			     csum_partial((void *)pim, sizeof(*pim), 0)) &&
534 	     csum_fold(skb_checksum(skb, 0, skb->len, 0))))
535 		goto drop;
536 
537 	/* check if the inner packet is destined to mcast group */
538 	encap = (struct ipv6hdr *)(skb_transport_header(skb) +
539 				   sizeof(*pim));
540 
541 	if (!ipv6_addr_is_multicast(&encap->daddr) ||
542 	    encap->payload_len == 0 ||
543 	    ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
544 		goto drop;
545 
546 	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
547 		goto drop;
548 	reg_vif_num = mrt->mroute_reg_vif_num;
549 
550 	read_lock(&mrt_lock);
551 	if (reg_vif_num >= 0)
552 		reg_dev = mrt->vif_table[reg_vif_num].dev;
553 	if (reg_dev)
554 		dev_hold(reg_dev);
555 	read_unlock(&mrt_lock);
556 
557 	if (!reg_dev)
558 		goto drop;
559 
560 	skb->mac_header = skb->network_header;
561 	skb_pull(skb, (u8 *)encap - skb->data);
562 	skb_reset_network_header(skb);
563 	skb->protocol = htons(ETH_P_IPV6);
564 	skb->ip_summed = CHECKSUM_NONE;
565 
566 	skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
567 
568 	netif_rx(skb);
569 
570 	dev_put(reg_dev);
571 	return 0;
572  drop:
573 	kfree_skb(skb);
574 	return 0;
575 }
576 
577 static const struct inet6_protocol pim6_protocol = {
578 	.handler	=	pim6_rcv,
579 };
580 
581 /* Service routines creating virtual interfaces: PIMREG */
582 
583 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
584 				      struct net_device *dev)
585 {
586 	struct net *net = dev_net(dev);
587 	struct mr_table *mrt;
588 	struct flowi6 fl6 = {
589 		.flowi6_oif	= dev->ifindex,
590 		.flowi6_iif	= skb->skb_iif ? : LOOPBACK_IFINDEX,
591 		.flowi6_mark	= skb->mark,
592 	};
593 	int err;
594 
595 	err = ip6mr_fib_lookup(net, &fl6, &mrt);
596 	if (err < 0) {
597 		kfree_skb(skb);
598 		return err;
599 	}
600 
601 	read_lock(&mrt_lock);
602 	dev->stats.tx_bytes += skb->len;
603 	dev->stats.tx_packets++;
604 	ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
605 	read_unlock(&mrt_lock);
606 	kfree_skb(skb);
607 	return NETDEV_TX_OK;
608 }
609 
610 static int reg_vif_get_iflink(const struct net_device *dev)
611 {
612 	return 0;
613 }
614 
615 static const struct net_device_ops reg_vif_netdev_ops = {
616 	.ndo_start_xmit	= reg_vif_xmit,
617 	.ndo_get_iflink = reg_vif_get_iflink,
618 };
619 
620 static void reg_vif_setup(struct net_device *dev)
621 {
622 	dev->type		= ARPHRD_PIMREG;
623 	dev->mtu		= 1500 - sizeof(struct ipv6hdr) - 8;
624 	dev->flags		= IFF_NOARP;
625 	dev->netdev_ops		= &reg_vif_netdev_ops;
626 	dev->needs_free_netdev	= true;
627 	dev->features		|= NETIF_F_NETNS_LOCAL;
628 }
629 
630 static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt)
631 {
632 	struct net_device *dev;
633 	char name[IFNAMSIZ];
634 
635 	if (mrt->id == RT6_TABLE_DFLT)
636 		sprintf(name, "pim6reg");
637 	else
638 		sprintf(name, "pim6reg%u", mrt->id);
639 
640 	dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup);
641 	if (!dev)
642 		return NULL;
643 
644 	dev_net_set(dev, net);
645 
646 	if (register_netdevice(dev)) {
647 		free_netdev(dev);
648 		return NULL;
649 	}
650 
651 	if (dev_open(dev))
652 		goto failure;
653 
654 	dev_hold(dev);
655 	return dev;
656 
657 failure:
658 	unregister_netdevice(dev);
659 	return NULL;
660 }
661 #endif
662 
663 static int call_ip6mr_vif_entry_notifiers(struct net *net,
664 					  enum fib_event_type event_type,
665 					  struct vif_device *vif,
666 					  mifi_t vif_index, u32 tb_id)
667 {
668 	return mr_call_vif_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
669 				     vif, vif_index, tb_id,
670 				     &net->ipv6.ipmr_seq);
671 }
672 
673 static int call_ip6mr_mfc_entry_notifiers(struct net *net,
674 					  enum fib_event_type event_type,
675 					  struct mfc6_cache *mfc, u32 tb_id)
676 {
677 	return mr_call_mfc_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
678 				     &mfc->_c, tb_id, &net->ipv6.ipmr_seq);
679 }
680 
681 /* Delete a VIF entry */
682 static int mif6_delete(struct mr_table *mrt, int vifi, int notify,
683 		       struct list_head *head)
684 {
685 	struct vif_device *v;
686 	struct net_device *dev;
687 	struct inet6_dev *in6_dev;
688 
689 	if (vifi < 0 || vifi >= mrt->maxvif)
690 		return -EADDRNOTAVAIL;
691 
692 	v = &mrt->vif_table[vifi];
693 
694 	if (VIF_EXISTS(mrt, vifi))
695 		call_ip6mr_vif_entry_notifiers(read_pnet(&mrt->net),
696 					       FIB_EVENT_VIF_DEL, v, vifi,
697 					       mrt->id);
698 
699 	write_lock_bh(&mrt_lock);
700 	dev = v->dev;
701 	v->dev = NULL;
702 
703 	if (!dev) {
704 		write_unlock_bh(&mrt_lock);
705 		return -EADDRNOTAVAIL;
706 	}
707 
708 #ifdef CONFIG_IPV6_PIMSM_V2
709 	if (vifi == mrt->mroute_reg_vif_num)
710 		mrt->mroute_reg_vif_num = -1;
711 #endif
712 
713 	if (vifi + 1 == mrt->maxvif) {
714 		int tmp;
715 		for (tmp = vifi - 1; tmp >= 0; tmp--) {
716 			if (VIF_EXISTS(mrt, tmp))
717 				break;
718 		}
719 		mrt->maxvif = tmp + 1;
720 	}
721 
722 	write_unlock_bh(&mrt_lock);
723 
724 	dev_set_allmulti(dev, -1);
725 
726 	in6_dev = __in6_dev_get(dev);
727 	if (in6_dev) {
728 		in6_dev->cnf.mc_forwarding--;
729 		inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
730 					     NETCONFA_MC_FORWARDING,
731 					     dev->ifindex, &in6_dev->cnf);
732 	}
733 
734 	if ((v->flags & MIFF_REGISTER) && !notify)
735 		unregister_netdevice_queue(dev, head);
736 
737 	dev_put(dev);
738 	return 0;
739 }
740 
741 static inline void ip6mr_cache_free_rcu(struct rcu_head *head)
742 {
743 	struct mr_mfc *c = container_of(head, struct mr_mfc, rcu);
744 
745 	kmem_cache_free(mrt_cachep, (struct mfc6_cache *)c);
746 }
747 
748 static inline void ip6mr_cache_free(struct mfc6_cache *c)
749 {
750 	call_rcu(&c->_c.rcu, ip6mr_cache_free_rcu);
751 }
752 
753 /* Destroy an unresolved cache entry, killing queued skbs
754    and reporting error to netlink readers.
755  */
756 
757 static void ip6mr_destroy_unres(struct mr_table *mrt, struct mfc6_cache *c)
758 {
759 	struct net *net = read_pnet(&mrt->net);
760 	struct sk_buff *skb;
761 
762 	atomic_dec(&mrt->cache_resolve_queue_len);
763 
764 	while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved)) != NULL) {
765 		if (ipv6_hdr(skb)->version == 0) {
766 			struct nlmsghdr *nlh = skb_pull(skb,
767 							sizeof(struct ipv6hdr));
768 			nlh->nlmsg_type = NLMSG_ERROR;
769 			nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
770 			skb_trim(skb, nlh->nlmsg_len);
771 			((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT;
772 			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
773 		} else
774 			kfree_skb(skb);
775 	}
776 
777 	ip6mr_cache_free(c);
778 }
779 
780 
781 /* Timer process for all the unresolved queue. */
782 
783 static void ipmr_do_expire_process(struct mr_table *mrt)
784 {
785 	unsigned long now = jiffies;
786 	unsigned long expires = 10 * HZ;
787 	struct mr_mfc *c, *next;
788 
789 	list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
790 		if (time_after(c->mfc_un.unres.expires, now)) {
791 			/* not yet... */
792 			unsigned long interval = c->mfc_un.unres.expires - now;
793 			if (interval < expires)
794 				expires = interval;
795 			continue;
796 		}
797 
798 		list_del(&c->list);
799 		mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
800 		ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
801 	}
802 
803 	if (!list_empty(&mrt->mfc_unres_queue))
804 		mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
805 }
806 
807 static void ipmr_expire_process(struct timer_list *t)
808 {
809 	struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
810 
811 	if (!spin_trylock(&mfc_unres_lock)) {
812 		mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
813 		return;
814 	}
815 
816 	if (!list_empty(&mrt->mfc_unres_queue))
817 		ipmr_do_expire_process(mrt);
818 
819 	spin_unlock(&mfc_unres_lock);
820 }
821 
822 /* Fill oifs list. It is called under write locked mrt_lock. */
823 
824 static void ip6mr_update_thresholds(struct mr_table *mrt,
825 				    struct mr_mfc *cache,
826 				    unsigned char *ttls)
827 {
828 	int vifi;
829 
830 	cache->mfc_un.res.minvif = MAXMIFS;
831 	cache->mfc_un.res.maxvif = 0;
832 	memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
833 
834 	for (vifi = 0; vifi < mrt->maxvif; vifi++) {
835 		if (VIF_EXISTS(mrt, vifi) &&
836 		    ttls[vifi] && ttls[vifi] < 255) {
837 			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
838 			if (cache->mfc_un.res.minvif > vifi)
839 				cache->mfc_un.res.minvif = vifi;
840 			if (cache->mfc_un.res.maxvif <= vifi)
841 				cache->mfc_un.res.maxvif = vifi + 1;
842 		}
843 	}
844 	cache->mfc_un.res.lastuse = jiffies;
845 }
846 
847 static int mif6_add(struct net *net, struct mr_table *mrt,
848 		    struct mif6ctl *vifc, int mrtsock)
849 {
850 	int vifi = vifc->mif6c_mifi;
851 	struct vif_device *v = &mrt->vif_table[vifi];
852 	struct net_device *dev;
853 	struct inet6_dev *in6_dev;
854 	int err;
855 
856 	/* Is vif busy ? */
857 	if (VIF_EXISTS(mrt, vifi))
858 		return -EADDRINUSE;
859 
860 	switch (vifc->mif6c_flags) {
861 #ifdef CONFIG_IPV6_PIMSM_V2
862 	case MIFF_REGISTER:
863 		/*
864 		 * Special Purpose VIF in PIM
865 		 * All the packets will be sent to the daemon
866 		 */
867 		if (mrt->mroute_reg_vif_num >= 0)
868 			return -EADDRINUSE;
869 		dev = ip6mr_reg_vif(net, mrt);
870 		if (!dev)
871 			return -ENOBUFS;
872 		err = dev_set_allmulti(dev, 1);
873 		if (err) {
874 			unregister_netdevice(dev);
875 			dev_put(dev);
876 			return err;
877 		}
878 		break;
879 #endif
880 	case 0:
881 		dev = dev_get_by_index(net, vifc->mif6c_pifi);
882 		if (!dev)
883 			return -EADDRNOTAVAIL;
884 		err = dev_set_allmulti(dev, 1);
885 		if (err) {
886 			dev_put(dev);
887 			return err;
888 		}
889 		break;
890 	default:
891 		return -EINVAL;
892 	}
893 
894 	in6_dev = __in6_dev_get(dev);
895 	if (in6_dev) {
896 		in6_dev->cnf.mc_forwarding++;
897 		inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
898 					     NETCONFA_MC_FORWARDING,
899 					     dev->ifindex, &in6_dev->cnf);
900 	}
901 
902 	/* Fill in the VIF structures */
903 	vif_device_init(v, dev, vifc->vifc_rate_limit, vifc->vifc_threshold,
904 			vifc->mif6c_flags | (!mrtsock ? VIFF_STATIC : 0),
905 			MIFF_REGISTER);
906 
907 	/* And finish update writing critical data */
908 	write_lock_bh(&mrt_lock);
909 	v->dev = dev;
910 #ifdef CONFIG_IPV6_PIMSM_V2
911 	if (v->flags & MIFF_REGISTER)
912 		mrt->mroute_reg_vif_num = vifi;
913 #endif
914 	if (vifi + 1 > mrt->maxvif)
915 		mrt->maxvif = vifi + 1;
916 	write_unlock_bh(&mrt_lock);
917 	call_ip6mr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD,
918 				       v, vifi, mrt->id);
919 	return 0;
920 }
921 
922 static struct mfc6_cache *ip6mr_cache_find(struct mr_table *mrt,
923 					   const struct in6_addr *origin,
924 					   const struct in6_addr *mcastgrp)
925 {
926 	struct mfc6_cache_cmp_arg arg = {
927 		.mf6c_origin = *origin,
928 		.mf6c_mcastgrp = *mcastgrp,
929 	};
930 
931 	return mr_mfc_find(mrt, &arg);
932 }
933 
934 /* Look for a (*,G) entry */
935 static struct mfc6_cache *ip6mr_cache_find_any(struct mr_table *mrt,
936 					       struct in6_addr *mcastgrp,
937 					       mifi_t mifi)
938 {
939 	struct mfc6_cache_cmp_arg arg = {
940 		.mf6c_origin = in6addr_any,
941 		.mf6c_mcastgrp = *mcastgrp,
942 	};
943 
944 	if (ipv6_addr_any(mcastgrp))
945 		return mr_mfc_find_any_parent(mrt, mifi);
946 	return mr_mfc_find_any(mrt, mifi, &arg);
947 }
948 
949 /* Look for a (S,G,iif) entry if parent != -1 */
950 static struct mfc6_cache *
951 ip6mr_cache_find_parent(struct mr_table *mrt,
952 			const struct in6_addr *origin,
953 			const struct in6_addr *mcastgrp,
954 			int parent)
955 {
956 	struct mfc6_cache_cmp_arg arg = {
957 		.mf6c_origin = *origin,
958 		.mf6c_mcastgrp = *mcastgrp,
959 	};
960 
961 	return mr_mfc_find_parent(mrt, &arg, parent);
962 }
963 
964 /* Allocate a multicast cache entry */
965 static struct mfc6_cache *ip6mr_cache_alloc(void)
966 {
967 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
968 	if (!c)
969 		return NULL;
970 	c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
971 	c->_c.mfc_un.res.minvif = MAXMIFS;
972 	c->_c.free = ip6mr_cache_free_rcu;
973 	refcount_set(&c->_c.mfc_un.res.refcount, 1);
974 	return c;
975 }
976 
977 static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
978 {
979 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
980 	if (!c)
981 		return NULL;
982 	skb_queue_head_init(&c->_c.mfc_un.unres.unresolved);
983 	c->_c.mfc_un.unres.expires = jiffies + 10 * HZ;
984 	return c;
985 }
986 
987 /*
988  *	A cache entry has gone into a resolved state from queued
989  */
990 
991 static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt,
992 				struct mfc6_cache *uc, struct mfc6_cache *c)
993 {
994 	struct sk_buff *skb;
995 
996 	/*
997 	 *	Play the pending entries through our router
998 	 */
999 
1000 	while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) {
1001 		if (ipv6_hdr(skb)->version == 0) {
1002 			struct nlmsghdr *nlh = skb_pull(skb,
1003 							sizeof(struct ipv6hdr));
1004 
1005 			if (mr_fill_mroute(mrt, skb, &c->_c,
1006 					   nlmsg_data(nlh)) > 0) {
1007 				nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1008 			} else {
1009 				nlh->nlmsg_type = NLMSG_ERROR;
1010 				nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
1011 				skb_trim(skb, nlh->nlmsg_len);
1012 				((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE;
1013 			}
1014 			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
1015 		} else
1016 			ip6_mr_forward(net, mrt, skb, c);
1017 	}
1018 }
1019 
1020 /*
1021  *	Bounce a cache query up to pim6sd and netlink.
1022  *
1023  *	Called under mrt_lock.
1024  */
1025 
1026 static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
1027 			      mifi_t mifi, int assert)
1028 {
1029 	struct sock *mroute6_sk;
1030 	struct sk_buff *skb;
1031 	struct mrt6msg *msg;
1032 	int ret;
1033 
1034 #ifdef CONFIG_IPV6_PIMSM_V2
1035 	if (assert == MRT6MSG_WHOLEPKT)
1036 		skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
1037 						+sizeof(*msg));
1038 	else
1039 #endif
1040 		skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
1041 
1042 	if (!skb)
1043 		return -ENOBUFS;
1044 
1045 	/* I suppose that internal messages
1046 	 * do not require checksums */
1047 
1048 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1049 
1050 #ifdef CONFIG_IPV6_PIMSM_V2
1051 	if (assert == MRT6MSG_WHOLEPKT) {
1052 		/* Ugly, but we have no choice with this interface.
1053 		   Duplicate old header, fix length etc.
1054 		   And all this only to mangle msg->im6_msgtype and
1055 		   to set msg->im6_mbz to "mbz" :-)
1056 		 */
1057 		skb_push(skb, -skb_network_offset(pkt));
1058 
1059 		skb_push(skb, sizeof(*msg));
1060 		skb_reset_transport_header(skb);
1061 		msg = (struct mrt6msg *)skb_transport_header(skb);
1062 		msg->im6_mbz = 0;
1063 		msg->im6_msgtype = MRT6MSG_WHOLEPKT;
1064 		msg->im6_mif = mrt->mroute_reg_vif_num;
1065 		msg->im6_pad = 0;
1066 		msg->im6_src = ipv6_hdr(pkt)->saddr;
1067 		msg->im6_dst = ipv6_hdr(pkt)->daddr;
1068 
1069 		skb->ip_summed = CHECKSUM_UNNECESSARY;
1070 	} else
1071 #endif
1072 	{
1073 	/*
1074 	 *	Copy the IP header
1075 	 */
1076 
1077 	skb_put(skb, sizeof(struct ipv6hdr));
1078 	skb_reset_network_header(skb);
1079 	skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
1080 
1081 	/*
1082 	 *	Add our header
1083 	 */
1084 	skb_put(skb, sizeof(*msg));
1085 	skb_reset_transport_header(skb);
1086 	msg = (struct mrt6msg *)skb_transport_header(skb);
1087 
1088 	msg->im6_mbz = 0;
1089 	msg->im6_msgtype = assert;
1090 	msg->im6_mif = mifi;
1091 	msg->im6_pad = 0;
1092 	msg->im6_src = ipv6_hdr(pkt)->saddr;
1093 	msg->im6_dst = ipv6_hdr(pkt)->daddr;
1094 
1095 	skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1096 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1097 	}
1098 
1099 	rcu_read_lock();
1100 	mroute6_sk = rcu_dereference(mrt->mroute_sk);
1101 	if (!mroute6_sk) {
1102 		rcu_read_unlock();
1103 		kfree_skb(skb);
1104 		return -EINVAL;
1105 	}
1106 
1107 	mrt6msg_netlink_event(mrt, skb);
1108 
1109 	/* Deliver to user space multicast routing algorithms */
1110 	ret = sock_queue_rcv_skb(mroute6_sk, skb);
1111 	rcu_read_unlock();
1112 	if (ret < 0) {
1113 		net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
1114 		kfree_skb(skb);
1115 	}
1116 
1117 	return ret;
1118 }
1119 
1120 /* Queue a packet for resolution. It gets locked cache entry! */
1121 static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi,
1122 				  struct sk_buff *skb)
1123 {
1124 	struct mfc6_cache *c;
1125 	bool found = false;
1126 	int err;
1127 
1128 	spin_lock_bh(&mfc_unres_lock);
1129 	list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) {
1130 		if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1131 		    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
1132 			found = true;
1133 			break;
1134 		}
1135 	}
1136 
1137 	if (!found) {
1138 		/*
1139 		 *	Create a new entry if allowable
1140 		 */
1141 
1142 		if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
1143 		    (c = ip6mr_cache_alloc_unres()) == NULL) {
1144 			spin_unlock_bh(&mfc_unres_lock);
1145 
1146 			kfree_skb(skb);
1147 			return -ENOBUFS;
1148 		}
1149 
1150 		/* Fill in the new cache entry */
1151 		c->_c.mfc_parent = -1;
1152 		c->mf6c_origin = ipv6_hdr(skb)->saddr;
1153 		c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
1154 
1155 		/*
1156 		 *	Reflect first query at pim6sd
1157 		 */
1158 		err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1159 		if (err < 0) {
1160 			/* If the report failed throw the cache entry
1161 			   out - Brad Parker
1162 			 */
1163 			spin_unlock_bh(&mfc_unres_lock);
1164 
1165 			ip6mr_cache_free(c);
1166 			kfree_skb(skb);
1167 			return err;
1168 		}
1169 
1170 		atomic_inc(&mrt->cache_resolve_queue_len);
1171 		list_add(&c->_c.list, &mrt->mfc_unres_queue);
1172 		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1173 
1174 		ipmr_do_expire_process(mrt);
1175 	}
1176 
1177 	/* See if we can append the packet */
1178 	if (c->_c.mfc_un.unres.unresolved.qlen > 3) {
1179 		kfree_skb(skb);
1180 		err = -ENOBUFS;
1181 	} else {
1182 		skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb);
1183 		err = 0;
1184 	}
1185 
1186 	spin_unlock_bh(&mfc_unres_lock);
1187 	return err;
1188 }
1189 
1190 /*
1191  *	MFC6 cache manipulation by user space
1192  */
1193 
1194 static int ip6mr_mfc_delete(struct mr_table *mrt, struct mf6cctl *mfc,
1195 			    int parent)
1196 {
1197 	struct mfc6_cache *c;
1198 
1199 	/* The entries are added/deleted only under RTNL */
1200 	rcu_read_lock();
1201 	c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
1202 				    &mfc->mf6cc_mcastgrp.sin6_addr, parent);
1203 	rcu_read_unlock();
1204 	if (!c)
1205 		return -ENOENT;
1206 	rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ip6mr_rht_params);
1207 	list_del_rcu(&c->_c.list);
1208 
1209 	call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
1210 				       FIB_EVENT_ENTRY_DEL, c, mrt->id);
1211 	mr6_netlink_event(mrt, c, RTM_DELROUTE);
1212 	mr_cache_put(&c->_c);
1213 	return 0;
1214 }
1215 
1216 static int ip6mr_device_event(struct notifier_block *this,
1217 			      unsigned long event, void *ptr)
1218 {
1219 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1220 	struct net *net = dev_net(dev);
1221 	struct mr_table *mrt;
1222 	struct vif_device *v;
1223 	int ct;
1224 
1225 	if (event != NETDEV_UNREGISTER)
1226 		return NOTIFY_DONE;
1227 
1228 	ip6mr_for_each_table(mrt, net) {
1229 		v = &mrt->vif_table[0];
1230 		for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1231 			if (v->dev == dev)
1232 				mif6_delete(mrt, ct, 1, NULL);
1233 		}
1234 	}
1235 
1236 	return NOTIFY_DONE;
1237 }
1238 
1239 static unsigned int ip6mr_seq_read(struct net *net)
1240 {
1241 	ASSERT_RTNL();
1242 
1243 	return net->ipv6.ipmr_seq + ip6mr_rules_seq_read(net);
1244 }
1245 
1246 static int ip6mr_dump(struct net *net, struct notifier_block *nb)
1247 {
1248 	return mr_dump(net, nb, RTNL_FAMILY_IP6MR, ip6mr_rules_dump,
1249 		       ip6mr_mr_table_iter, &mrt_lock);
1250 }
1251 
1252 static struct notifier_block ip6_mr_notifier = {
1253 	.notifier_call = ip6mr_device_event
1254 };
1255 
1256 static const struct fib_notifier_ops ip6mr_notifier_ops_template = {
1257 	.family		= RTNL_FAMILY_IP6MR,
1258 	.fib_seq_read	= ip6mr_seq_read,
1259 	.fib_dump	= ip6mr_dump,
1260 	.owner		= THIS_MODULE,
1261 };
1262 
1263 static int __net_init ip6mr_notifier_init(struct net *net)
1264 {
1265 	struct fib_notifier_ops *ops;
1266 
1267 	net->ipv6.ipmr_seq = 0;
1268 
1269 	ops = fib_notifier_ops_register(&ip6mr_notifier_ops_template, net);
1270 	if (IS_ERR(ops))
1271 		return PTR_ERR(ops);
1272 
1273 	net->ipv6.ip6mr_notifier_ops = ops;
1274 
1275 	return 0;
1276 }
1277 
1278 static void __net_exit ip6mr_notifier_exit(struct net *net)
1279 {
1280 	fib_notifier_ops_unregister(net->ipv6.ip6mr_notifier_ops);
1281 	net->ipv6.ip6mr_notifier_ops = NULL;
1282 }
1283 
1284 /* Setup for IP multicast routing */
1285 static int __net_init ip6mr_net_init(struct net *net)
1286 {
1287 	int err;
1288 
1289 	err = ip6mr_notifier_init(net);
1290 	if (err)
1291 		return err;
1292 
1293 	err = ip6mr_rules_init(net);
1294 	if (err < 0)
1295 		goto ip6mr_rules_fail;
1296 
1297 #ifdef CONFIG_PROC_FS
1298 	err = -ENOMEM;
1299 	if (!proc_create_net("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_seq_ops,
1300 			sizeof(struct mr_vif_iter)))
1301 		goto proc_vif_fail;
1302 	if (!proc_create_net("ip6_mr_cache", 0, net->proc_net, &ipmr_mfc_seq_ops,
1303 			sizeof(struct mr_mfc_iter)))
1304 		goto proc_cache_fail;
1305 #endif
1306 
1307 	return 0;
1308 
1309 #ifdef CONFIG_PROC_FS
1310 proc_cache_fail:
1311 	remove_proc_entry("ip6_mr_vif", net->proc_net);
1312 proc_vif_fail:
1313 	ip6mr_rules_exit(net);
1314 #endif
1315 ip6mr_rules_fail:
1316 	ip6mr_notifier_exit(net);
1317 	return err;
1318 }
1319 
1320 static void __net_exit ip6mr_net_exit(struct net *net)
1321 {
1322 #ifdef CONFIG_PROC_FS
1323 	remove_proc_entry("ip6_mr_cache", net->proc_net);
1324 	remove_proc_entry("ip6_mr_vif", net->proc_net);
1325 #endif
1326 	ip6mr_rules_exit(net);
1327 	ip6mr_notifier_exit(net);
1328 }
1329 
1330 static struct pernet_operations ip6mr_net_ops = {
1331 	.init = ip6mr_net_init,
1332 	.exit = ip6mr_net_exit,
1333 };
1334 
1335 int __init ip6_mr_init(void)
1336 {
1337 	int err;
1338 
1339 	mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1340 				       sizeof(struct mfc6_cache),
1341 				       0, SLAB_HWCACHE_ALIGN,
1342 				       NULL);
1343 	if (!mrt_cachep)
1344 		return -ENOMEM;
1345 
1346 	err = register_pernet_subsys(&ip6mr_net_ops);
1347 	if (err)
1348 		goto reg_pernet_fail;
1349 
1350 	err = register_netdevice_notifier(&ip6_mr_notifier);
1351 	if (err)
1352 		goto reg_notif_fail;
1353 #ifdef CONFIG_IPV6_PIMSM_V2
1354 	if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1355 		pr_err("%s: can't add PIM protocol\n", __func__);
1356 		err = -EAGAIN;
1357 		goto add_proto_fail;
1358 	}
1359 #endif
1360 	err = rtnl_register_module(THIS_MODULE, RTNL_FAMILY_IP6MR, RTM_GETROUTE,
1361 				   NULL, ip6mr_rtm_dumproute, 0);
1362 	if (err == 0)
1363 		return 0;
1364 
1365 #ifdef CONFIG_IPV6_PIMSM_V2
1366 	inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1367 add_proto_fail:
1368 	unregister_netdevice_notifier(&ip6_mr_notifier);
1369 #endif
1370 reg_notif_fail:
1371 	unregister_pernet_subsys(&ip6mr_net_ops);
1372 reg_pernet_fail:
1373 	kmem_cache_destroy(mrt_cachep);
1374 	return err;
1375 }
1376 
1377 void ip6_mr_cleanup(void)
1378 {
1379 	rtnl_unregister(RTNL_FAMILY_IP6MR, RTM_GETROUTE);
1380 #ifdef CONFIG_IPV6_PIMSM_V2
1381 	inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1382 #endif
1383 	unregister_netdevice_notifier(&ip6_mr_notifier);
1384 	unregister_pernet_subsys(&ip6mr_net_ops);
1385 	kmem_cache_destroy(mrt_cachep);
1386 }
1387 
1388 static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt,
1389 			 struct mf6cctl *mfc, int mrtsock, int parent)
1390 {
1391 	unsigned char ttls[MAXMIFS];
1392 	struct mfc6_cache *uc, *c;
1393 	struct mr_mfc *_uc;
1394 	bool found;
1395 	int i, err;
1396 
1397 	if (mfc->mf6cc_parent >= MAXMIFS)
1398 		return -ENFILE;
1399 
1400 	memset(ttls, 255, MAXMIFS);
1401 	for (i = 0; i < MAXMIFS; i++) {
1402 		if (IF_ISSET(i, &mfc->mf6cc_ifset))
1403 			ttls[i] = 1;
1404 	}
1405 
1406 	/* The entries are added/deleted only under RTNL */
1407 	rcu_read_lock();
1408 	c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
1409 				    &mfc->mf6cc_mcastgrp.sin6_addr, parent);
1410 	rcu_read_unlock();
1411 	if (c) {
1412 		write_lock_bh(&mrt_lock);
1413 		c->_c.mfc_parent = mfc->mf6cc_parent;
1414 		ip6mr_update_thresholds(mrt, &c->_c, ttls);
1415 		if (!mrtsock)
1416 			c->_c.mfc_flags |= MFC_STATIC;
1417 		write_unlock_bh(&mrt_lock);
1418 		call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE,
1419 					       c, mrt->id);
1420 		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1421 		return 0;
1422 	}
1423 
1424 	if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) &&
1425 	    !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1426 		return -EINVAL;
1427 
1428 	c = ip6mr_cache_alloc();
1429 	if (!c)
1430 		return -ENOMEM;
1431 
1432 	c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1433 	c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1434 	c->_c.mfc_parent = mfc->mf6cc_parent;
1435 	ip6mr_update_thresholds(mrt, &c->_c, ttls);
1436 	if (!mrtsock)
1437 		c->_c.mfc_flags |= MFC_STATIC;
1438 
1439 	err = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode,
1440 				  ip6mr_rht_params);
1441 	if (err) {
1442 		pr_err("ip6mr: rhtable insert error %d\n", err);
1443 		ip6mr_cache_free(c);
1444 		return err;
1445 	}
1446 	list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list);
1447 
1448 	/* Check to see if we resolved a queued list. If so we
1449 	 * need to send on the frames and tidy up.
1450 	 */
1451 	found = false;
1452 	spin_lock_bh(&mfc_unres_lock);
1453 	list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) {
1454 		uc = (struct mfc6_cache *)_uc;
1455 		if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1456 		    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1457 			list_del(&_uc->list);
1458 			atomic_dec(&mrt->cache_resolve_queue_len);
1459 			found = true;
1460 			break;
1461 		}
1462 	}
1463 	if (list_empty(&mrt->mfc_unres_queue))
1464 		del_timer(&mrt->ipmr_expire_timer);
1465 	spin_unlock_bh(&mfc_unres_lock);
1466 
1467 	if (found) {
1468 		ip6mr_cache_resolve(net, mrt, uc, c);
1469 		ip6mr_cache_free(uc);
1470 	}
1471 	call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD,
1472 				       c, mrt->id);
1473 	mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1474 	return 0;
1475 }
1476 
1477 /*
1478  *	Close the multicast socket, and clear the vif tables etc
1479  */
1480 
1481 static void mroute_clean_tables(struct mr_table *mrt, bool all)
1482 {
1483 	struct mr_mfc *c, *tmp;
1484 	LIST_HEAD(list);
1485 	int i;
1486 
1487 	/* Shut down all active vif entries */
1488 	for (i = 0; i < mrt->maxvif; i++) {
1489 		if (!all && (mrt->vif_table[i].flags & VIFF_STATIC))
1490 			continue;
1491 		mif6_delete(mrt, i, 0, &list);
1492 	}
1493 	unregister_netdevice_many(&list);
1494 
1495 	/* Wipe the cache */
1496 	list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
1497 		if (!all && (c->mfc_flags & MFC_STATIC))
1498 			continue;
1499 		rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params);
1500 		list_del_rcu(&c->list);
1501 		mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
1502 		mr_cache_put(c);
1503 	}
1504 
1505 	if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1506 		spin_lock_bh(&mfc_unres_lock);
1507 		list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
1508 			list_del(&c->list);
1509 			call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
1510 						       FIB_EVENT_ENTRY_DEL,
1511 						       (struct mfc6_cache *)c,
1512 						       mrt->id);
1513 			mr6_netlink_event(mrt, (struct mfc6_cache *)c,
1514 					  RTM_DELROUTE);
1515 			ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
1516 		}
1517 		spin_unlock_bh(&mfc_unres_lock);
1518 	}
1519 }
1520 
1521 static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk)
1522 {
1523 	int err = 0;
1524 	struct net *net = sock_net(sk);
1525 
1526 	rtnl_lock();
1527 	write_lock_bh(&mrt_lock);
1528 	if (rtnl_dereference(mrt->mroute_sk)) {
1529 		err = -EADDRINUSE;
1530 	} else {
1531 		rcu_assign_pointer(mrt->mroute_sk, sk);
1532 		sock_set_flag(sk, SOCK_RCU_FREE);
1533 		net->ipv6.devconf_all->mc_forwarding++;
1534 	}
1535 	write_unlock_bh(&mrt_lock);
1536 
1537 	if (!err)
1538 		inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
1539 					     NETCONFA_MC_FORWARDING,
1540 					     NETCONFA_IFINDEX_ALL,
1541 					     net->ipv6.devconf_all);
1542 	rtnl_unlock();
1543 
1544 	return err;
1545 }
1546 
1547 int ip6mr_sk_done(struct sock *sk)
1548 {
1549 	int err = -EACCES;
1550 	struct net *net = sock_net(sk);
1551 	struct mr_table *mrt;
1552 
1553 	if (sk->sk_type != SOCK_RAW ||
1554 	    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1555 		return err;
1556 
1557 	rtnl_lock();
1558 	ip6mr_for_each_table(mrt, net) {
1559 		if (sk == rtnl_dereference(mrt->mroute_sk)) {
1560 			write_lock_bh(&mrt_lock);
1561 			RCU_INIT_POINTER(mrt->mroute_sk, NULL);
1562 			/* Note that mroute_sk had SOCK_RCU_FREE set,
1563 			 * so the RCU grace period before sk freeing
1564 			 * is guaranteed by sk_destruct()
1565 			 */
1566 			net->ipv6.devconf_all->mc_forwarding--;
1567 			write_unlock_bh(&mrt_lock);
1568 			inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
1569 						     NETCONFA_MC_FORWARDING,
1570 						     NETCONFA_IFINDEX_ALL,
1571 						     net->ipv6.devconf_all);
1572 
1573 			mroute_clean_tables(mrt, false);
1574 			err = 0;
1575 			break;
1576 		}
1577 	}
1578 	rtnl_unlock();
1579 
1580 	return err;
1581 }
1582 
1583 bool mroute6_is_socket(struct net *net, struct sk_buff *skb)
1584 {
1585 	struct mr_table *mrt;
1586 	struct flowi6 fl6 = {
1587 		.flowi6_iif	= skb->skb_iif ? : LOOPBACK_IFINDEX,
1588 		.flowi6_oif	= skb->dev->ifindex,
1589 		.flowi6_mark	= skb->mark,
1590 	};
1591 
1592 	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
1593 		return NULL;
1594 
1595 	return rcu_access_pointer(mrt->mroute_sk);
1596 }
1597 EXPORT_SYMBOL(mroute6_is_socket);
1598 
1599 /*
1600  *	Socket options and virtual interface manipulation. The whole
1601  *	virtual interface system is a complete heap, but unfortunately
1602  *	that's how BSD mrouted happens to think. Maybe one day with a proper
1603  *	MOSPF/PIM router set up we can clean this up.
1604  */
1605 
1606 int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1607 {
1608 	int ret, parent = 0;
1609 	struct mif6ctl vif;
1610 	struct mf6cctl mfc;
1611 	mifi_t mifi;
1612 	struct net *net = sock_net(sk);
1613 	struct mr_table *mrt;
1614 
1615 	if (sk->sk_type != SOCK_RAW ||
1616 	    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1617 		return -EOPNOTSUPP;
1618 
1619 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1620 	if (!mrt)
1621 		return -ENOENT;
1622 
1623 	if (optname != MRT6_INIT) {
1624 		if (sk != rcu_access_pointer(mrt->mroute_sk) &&
1625 		    !ns_capable(net->user_ns, CAP_NET_ADMIN))
1626 			return -EACCES;
1627 	}
1628 
1629 	switch (optname) {
1630 	case MRT6_INIT:
1631 		if (optlen < sizeof(int))
1632 			return -EINVAL;
1633 
1634 		return ip6mr_sk_init(mrt, sk);
1635 
1636 	case MRT6_DONE:
1637 		return ip6mr_sk_done(sk);
1638 
1639 	case MRT6_ADD_MIF:
1640 		if (optlen < sizeof(vif))
1641 			return -EINVAL;
1642 		if (copy_from_user(&vif, optval, sizeof(vif)))
1643 			return -EFAULT;
1644 		if (vif.mif6c_mifi >= MAXMIFS)
1645 			return -ENFILE;
1646 		rtnl_lock();
1647 		ret = mif6_add(net, mrt, &vif,
1648 			       sk == rtnl_dereference(mrt->mroute_sk));
1649 		rtnl_unlock();
1650 		return ret;
1651 
1652 	case MRT6_DEL_MIF:
1653 		if (optlen < sizeof(mifi_t))
1654 			return -EINVAL;
1655 		if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1656 			return -EFAULT;
1657 		rtnl_lock();
1658 		ret = mif6_delete(mrt, mifi, 0, NULL);
1659 		rtnl_unlock();
1660 		return ret;
1661 
1662 	/*
1663 	 *	Manipulate the forwarding caches. These live
1664 	 *	in a sort of kernel/user symbiosis.
1665 	 */
1666 	case MRT6_ADD_MFC:
1667 	case MRT6_DEL_MFC:
1668 		parent = -1;
1669 		/* fall through */
1670 	case MRT6_ADD_MFC_PROXY:
1671 	case MRT6_DEL_MFC_PROXY:
1672 		if (optlen < sizeof(mfc))
1673 			return -EINVAL;
1674 		if (copy_from_user(&mfc, optval, sizeof(mfc)))
1675 			return -EFAULT;
1676 		if (parent == 0)
1677 			parent = mfc.mf6cc_parent;
1678 		rtnl_lock();
1679 		if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY)
1680 			ret = ip6mr_mfc_delete(mrt, &mfc, parent);
1681 		else
1682 			ret = ip6mr_mfc_add(net, mrt, &mfc,
1683 					    sk ==
1684 					    rtnl_dereference(mrt->mroute_sk),
1685 					    parent);
1686 		rtnl_unlock();
1687 		return ret;
1688 
1689 	/*
1690 	 *	Control PIM assert (to activate pim will activate assert)
1691 	 */
1692 	case MRT6_ASSERT:
1693 	{
1694 		int v;
1695 
1696 		if (optlen != sizeof(v))
1697 			return -EINVAL;
1698 		if (get_user(v, (int __user *)optval))
1699 			return -EFAULT;
1700 		mrt->mroute_do_assert = v;
1701 		return 0;
1702 	}
1703 
1704 #ifdef CONFIG_IPV6_PIMSM_V2
1705 	case MRT6_PIM:
1706 	{
1707 		int v;
1708 
1709 		if (optlen != sizeof(v))
1710 			return -EINVAL;
1711 		if (get_user(v, (int __user *)optval))
1712 			return -EFAULT;
1713 		v = !!v;
1714 		rtnl_lock();
1715 		ret = 0;
1716 		if (v != mrt->mroute_do_pim) {
1717 			mrt->mroute_do_pim = v;
1718 			mrt->mroute_do_assert = v;
1719 		}
1720 		rtnl_unlock();
1721 		return ret;
1722 	}
1723 
1724 #endif
1725 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1726 	case MRT6_TABLE:
1727 	{
1728 		u32 v;
1729 
1730 		if (optlen != sizeof(u32))
1731 			return -EINVAL;
1732 		if (get_user(v, (u32 __user *)optval))
1733 			return -EFAULT;
1734 		/* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
1735 		if (v != RT_TABLE_DEFAULT && v >= 100000000)
1736 			return -EINVAL;
1737 		if (sk == rcu_access_pointer(mrt->mroute_sk))
1738 			return -EBUSY;
1739 
1740 		rtnl_lock();
1741 		ret = 0;
1742 		mrt = ip6mr_new_table(net, v);
1743 		if (IS_ERR(mrt))
1744 			ret = PTR_ERR(mrt);
1745 		else
1746 			raw6_sk(sk)->ip6mr_table = v;
1747 		rtnl_unlock();
1748 		return ret;
1749 	}
1750 #endif
1751 	/*
1752 	 *	Spurious command, or MRT6_VERSION which you cannot
1753 	 *	set.
1754 	 */
1755 	default:
1756 		return -ENOPROTOOPT;
1757 	}
1758 }
1759 
1760 /*
1761  *	Getsock opt support for the multicast routing system.
1762  */
1763 
1764 int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1765 			  int __user *optlen)
1766 {
1767 	int olr;
1768 	int val;
1769 	struct net *net = sock_net(sk);
1770 	struct mr_table *mrt;
1771 
1772 	if (sk->sk_type != SOCK_RAW ||
1773 	    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1774 		return -EOPNOTSUPP;
1775 
1776 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1777 	if (!mrt)
1778 		return -ENOENT;
1779 
1780 	switch (optname) {
1781 	case MRT6_VERSION:
1782 		val = 0x0305;
1783 		break;
1784 #ifdef CONFIG_IPV6_PIMSM_V2
1785 	case MRT6_PIM:
1786 		val = mrt->mroute_do_pim;
1787 		break;
1788 #endif
1789 	case MRT6_ASSERT:
1790 		val = mrt->mroute_do_assert;
1791 		break;
1792 	default:
1793 		return -ENOPROTOOPT;
1794 	}
1795 
1796 	if (get_user(olr, optlen))
1797 		return -EFAULT;
1798 
1799 	olr = min_t(int, olr, sizeof(int));
1800 	if (olr < 0)
1801 		return -EINVAL;
1802 
1803 	if (put_user(olr, optlen))
1804 		return -EFAULT;
1805 	if (copy_to_user(optval, &val, olr))
1806 		return -EFAULT;
1807 	return 0;
1808 }
1809 
1810 /*
1811  *	The IP multicast ioctl support routines.
1812  */
1813 
1814 int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1815 {
1816 	struct sioc_sg_req6 sr;
1817 	struct sioc_mif_req6 vr;
1818 	struct vif_device *vif;
1819 	struct mfc6_cache *c;
1820 	struct net *net = sock_net(sk);
1821 	struct mr_table *mrt;
1822 
1823 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1824 	if (!mrt)
1825 		return -ENOENT;
1826 
1827 	switch (cmd) {
1828 	case SIOCGETMIFCNT_IN6:
1829 		if (copy_from_user(&vr, arg, sizeof(vr)))
1830 			return -EFAULT;
1831 		if (vr.mifi >= mrt->maxvif)
1832 			return -EINVAL;
1833 		read_lock(&mrt_lock);
1834 		vif = &mrt->vif_table[vr.mifi];
1835 		if (VIF_EXISTS(mrt, vr.mifi)) {
1836 			vr.icount = vif->pkt_in;
1837 			vr.ocount = vif->pkt_out;
1838 			vr.ibytes = vif->bytes_in;
1839 			vr.obytes = vif->bytes_out;
1840 			read_unlock(&mrt_lock);
1841 
1842 			if (copy_to_user(arg, &vr, sizeof(vr)))
1843 				return -EFAULT;
1844 			return 0;
1845 		}
1846 		read_unlock(&mrt_lock);
1847 		return -EADDRNOTAVAIL;
1848 	case SIOCGETSGCNT_IN6:
1849 		if (copy_from_user(&sr, arg, sizeof(sr)))
1850 			return -EFAULT;
1851 
1852 		rcu_read_lock();
1853 		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1854 		if (c) {
1855 			sr.pktcnt = c->_c.mfc_un.res.pkt;
1856 			sr.bytecnt = c->_c.mfc_un.res.bytes;
1857 			sr.wrong_if = c->_c.mfc_un.res.wrong_if;
1858 			rcu_read_unlock();
1859 
1860 			if (copy_to_user(arg, &sr, sizeof(sr)))
1861 				return -EFAULT;
1862 			return 0;
1863 		}
1864 		rcu_read_unlock();
1865 		return -EADDRNOTAVAIL;
1866 	default:
1867 		return -ENOIOCTLCMD;
1868 	}
1869 }
1870 
1871 #ifdef CONFIG_COMPAT
1872 struct compat_sioc_sg_req6 {
1873 	struct sockaddr_in6 src;
1874 	struct sockaddr_in6 grp;
1875 	compat_ulong_t pktcnt;
1876 	compat_ulong_t bytecnt;
1877 	compat_ulong_t wrong_if;
1878 };
1879 
1880 struct compat_sioc_mif_req6 {
1881 	mifi_t	mifi;
1882 	compat_ulong_t icount;
1883 	compat_ulong_t ocount;
1884 	compat_ulong_t ibytes;
1885 	compat_ulong_t obytes;
1886 };
1887 
1888 int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1889 {
1890 	struct compat_sioc_sg_req6 sr;
1891 	struct compat_sioc_mif_req6 vr;
1892 	struct vif_device *vif;
1893 	struct mfc6_cache *c;
1894 	struct net *net = sock_net(sk);
1895 	struct mr_table *mrt;
1896 
1897 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1898 	if (!mrt)
1899 		return -ENOENT;
1900 
1901 	switch (cmd) {
1902 	case SIOCGETMIFCNT_IN6:
1903 		if (copy_from_user(&vr, arg, sizeof(vr)))
1904 			return -EFAULT;
1905 		if (vr.mifi >= mrt->maxvif)
1906 			return -EINVAL;
1907 		read_lock(&mrt_lock);
1908 		vif = &mrt->vif_table[vr.mifi];
1909 		if (VIF_EXISTS(mrt, vr.mifi)) {
1910 			vr.icount = vif->pkt_in;
1911 			vr.ocount = vif->pkt_out;
1912 			vr.ibytes = vif->bytes_in;
1913 			vr.obytes = vif->bytes_out;
1914 			read_unlock(&mrt_lock);
1915 
1916 			if (copy_to_user(arg, &vr, sizeof(vr)))
1917 				return -EFAULT;
1918 			return 0;
1919 		}
1920 		read_unlock(&mrt_lock);
1921 		return -EADDRNOTAVAIL;
1922 	case SIOCGETSGCNT_IN6:
1923 		if (copy_from_user(&sr, arg, sizeof(sr)))
1924 			return -EFAULT;
1925 
1926 		rcu_read_lock();
1927 		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1928 		if (c) {
1929 			sr.pktcnt = c->_c.mfc_un.res.pkt;
1930 			sr.bytecnt = c->_c.mfc_un.res.bytes;
1931 			sr.wrong_if = c->_c.mfc_un.res.wrong_if;
1932 			rcu_read_unlock();
1933 
1934 			if (copy_to_user(arg, &sr, sizeof(sr)))
1935 				return -EFAULT;
1936 			return 0;
1937 		}
1938 		rcu_read_unlock();
1939 		return -EADDRNOTAVAIL;
1940 	default:
1941 		return -ENOIOCTLCMD;
1942 	}
1943 }
1944 #endif
1945 
1946 static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
1947 {
1948 	__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
1949 			IPSTATS_MIB_OUTFORWDATAGRAMS);
1950 	__IP6_ADD_STATS(net, ip6_dst_idev(skb_dst(skb)),
1951 			IPSTATS_MIB_OUTOCTETS, skb->len);
1952 	return dst_output(net, sk, skb);
1953 }
1954 
1955 /*
1956  *	Processing handlers for ip6mr_forward
1957  */
1958 
1959 static int ip6mr_forward2(struct net *net, struct mr_table *mrt,
1960 			  struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1961 {
1962 	struct ipv6hdr *ipv6h;
1963 	struct vif_device *vif = &mrt->vif_table[vifi];
1964 	struct net_device *dev;
1965 	struct dst_entry *dst;
1966 	struct flowi6 fl6;
1967 
1968 	if (!vif->dev)
1969 		goto out_free;
1970 
1971 #ifdef CONFIG_IPV6_PIMSM_V2
1972 	if (vif->flags & MIFF_REGISTER) {
1973 		vif->pkt_out++;
1974 		vif->bytes_out += skb->len;
1975 		vif->dev->stats.tx_bytes += skb->len;
1976 		vif->dev->stats.tx_packets++;
1977 		ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
1978 		goto out_free;
1979 	}
1980 #endif
1981 
1982 	ipv6h = ipv6_hdr(skb);
1983 
1984 	fl6 = (struct flowi6) {
1985 		.flowi6_oif = vif->link,
1986 		.daddr = ipv6h->daddr,
1987 	};
1988 
1989 	dst = ip6_route_output(net, NULL, &fl6);
1990 	if (dst->error) {
1991 		dst_release(dst);
1992 		goto out_free;
1993 	}
1994 
1995 	skb_dst_drop(skb);
1996 	skb_dst_set(skb, dst);
1997 
1998 	/*
1999 	 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
2000 	 * not only before forwarding, but after forwarding on all output
2001 	 * interfaces. It is clear, if mrouter runs a multicasting
2002 	 * program, it should receive packets not depending to what interface
2003 	 * program is joined.
2004 	 * If we will not make it, the program will have to join on all
2005 	 * interfaces. On the other hand, multihoming host (or router, but
2006 	 * not mrouter) cannot join to more than one interface - it will
2007 	 * result in receiving multiple packets.
2008 	 */
2009 	dev = vif->dev;
2010 	skb->dev = dev;
2011 	vif->pkt_out++;
2012 	vif->bytes_out += skb->len;
2013 
2014 	/* We are about to write */
2015 	/* XXX: extension headers? */
2016 	if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
2017 		goto out_free;
2018 
2019 	ipv6h = ipv6_hdr(skb);
2020 	ipv6h->hop_limit--;
2021 
2022 	IP6CB(skb)->flags |= IP6SKB_FORWARDED;
2023 
2024 	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
2025 		       net, NULL, skb, skb->dev, dev,
2026 		       ip6mr_forward2_finish);
2027 
2028 out_free:
2029 	kfree_skb(skb);
2030 	return 0;
2031 }
2032 
2033 static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev)
2034 {
2035 	int ct;
2036 
2037 	for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
2038 		if (mrt->vif_table[ct].dev == dev)
2039 			break;
2040 	}
2041 	return ct;
2042 }
2043 
2044 static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
2045 			   struct sk_buff *skb, struct mfc6_cache *c)
2046 {
2047 	int psend = -1;
2048 	int vif, ct;
2049 	int true_vifi = ip6mr_find_vif(mrt, skb->dev);
2050 
2051 	vif = c->_c.mfc_parent;
2052 	c->_c.mfc_un.res.pkt++;
2053 	c->_c.mfc_un.res.bytes += skb->len;
2054 	c->_c.mfc_un.res.lastuse = jiffies;
2055 
2056 	if (ipv6_addr_any(&c->mf6c_origin) && true_vifi >= 0) {
2057 		struct mfc6_cache *cache_proxy;
2058 
2059 		/* For an (*,G) entry, we only check that the incoming
2060 		 * interface is part of the static tree.
2061 		 */
2062 		rcu_read_lock();
2063 		cache_proxy = mr_mfc_find_any_parent(mrt, vif);
2064 		if (cache_proxy &&
2065 		    cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255) {
2066 			rcu_read_unlock();
2067 			goto forward;
2068 		}
2069 		rcu_read_unlock();
2070 	}
2071 
2072 	/*
2073 	 * Wrong interface: drop packet and (maybe) send PIM assert.
2074 	 */
2075 	if (mrt->vif_table[vif].dev != skb->dev) {
2076 		c->_c.mfc_un.res.wrong_if++;
2077 
2078 		if (true_vifi >= 0 && mrt->mroute_do_assert &&
2079 		    /* pimsm uses asserts, when switching from RPT to SPT,
2080 		       so that we cannot check that packet arrived on an oif.
2081 		       It is bad, but otherwise we would need to move pretty
2082 		       large chunk of pimd to kernel. Ough... --ANK
2083 		     */
2084 		    (mrt->mroute_do_pim ||
2085 		     c->_c.mfc_un.res.ttls[true_vifi] < 255) &&
2086 		    time_after(jiffies,
2087 			       c->_c.mfc_un.res.last_assert +
2088 			       MFC_ASSERT_THRESH)) {
2089 			c->_c.mfc_un.res.last_assert = jiffies;
2090 			ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
2091 		}
2092 		goto dont_forward;
2093 	}
2094 
2095 forward:
2096 	mrt->vif_table[vif].pkt_in++;
2097 	mrt->vif_table[vif].bytes_in += skb->len;
2098 
2099 	/*
2100 	 *	Forward the frame
2101 	 */
2102 	if (ipv6_addr_any(&c->mf6c_origin) &&
2103 	    ipv6_addr_any(&c->mf6c_mcastgrp)) {
2104 		if (true_vifi >= 0 &&
2105 		    true_vifi != c->_c.mfc_parent &&
2106 		    ipv6_hdr(skb)->hop_limit >
2107 				c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
2108 			/* It's an (*,*) entry and the packet is not coming from
2109 			 * the upstream: forward the packet to the upstream
2110 			 * only.
2111 			 */
2112 			psend = c->_c.mfc_parent;
2113 			goto last_forward;
2114 		}
2115 		goto dont_forward;
2116 	}
2117 	for (ct = c->_c.mfc_un.res.maxvif - 1;
2118 	     ct >= c->_c.mfc_un.res.minvif; ct--) {
2119 		/* For (*,G) entry, don't forward to the incoming interface */
2120 		if ((!ipv6_addr_any(&c->mf6c_origin) || ct != true_vifi) &&
2121 		    ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) {
2122 			if (psend != -1) {
2123 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2124 				if (skb2)
2125 					ip6mr_forward2(net, mrt, skb2,
2126 						       c, psend);
2127 			}
2128 			psend = ct;
2129 		}
2130 	}
2131 last_forward:
2132 	if (psend != -1) {
2133 		ip6mr_forward2(net, mrt, skb, c, psend);
2134 		return;
2135 	}
2136 
2137 dont_forward:
2138 	kfree_skb(skb);
2139 }
2140 
2141 
2142 /*
2143  *	Multicast packets for forwarding arrive here
2144  */
2145 
2146 int ip6_mr_input(struct sk_buff *skb)
2147 {
2148 	struct mfc6_cache *cache;
2149 	struct net *net = dev_net(skb->dev);
2150 	struct mr_table *mrt;
2151 	struct flowi6 fl6 = {
2152 		.flowi6_iif	= skb->dev->ifindex,
2153 		.flowi6_mark	= skb->mark,
2154 	};
2155 	int err;
2156 
2157 	err = ip6mr_fib_lookup(net, &fl6, &mrt);
2158 	if (err < 0) {
2159 		kfree_skb(skb);
2160 		return err;
2161 	}
2162 
2163 	read_lock(&mrt_lock);
2164 	cache = ip6mr_cache_find(mrt,
2165 				 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2166 	if (!cache) {
2167 		int vif = ip6mr_find_vif(mrt, skb->dev);
2168 
2169 		if (vif >= 0)
2170 			cache = ip6mr_cache_find_any(mrt,
2171 						     &ipv6_hdr(skb)->daddr,
2172 						     vif);
2173 	}
2174 
2175 	/*
2176 	 *	No usable cache entry
2177 	 */
2178 	if (!cache) {
2179 		int vif;
2180 
2181 		vif = ip6mr_find_vif(mrt, skb->dev);
2182 		if (vif >= 0) {
2183 			int err = ip6mr_cache_unresolved(mrt, vif, skb);
2184 			read_unlock(&mrt_lock);
2185 
2186 			return err;
2187 		}
2188 		read_unlock(&mrt_lock);
2189 		kfree_skb(skb);
2190 		return -ENODEV;
2191 	}
2192 
2193 	ip6_mr_forward(net, mrt, skb, cache);
2194 
2195 	read_unlock(&mrt_lock);
2196 
2197 	return 0;
2198 }
2199 
2200 int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
2201 		    u32 portid)
2202 {
2203 	int err;
2204 	struct mr_table *mrt;
2205 	struct mfc6_cache *cache;
2206 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
2207 
2208 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
2209 	if (!mrt)
2210 		return -ENOENT;
2211 
2212 	read_lock(&mrt_lock);
2213 	cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
2214 	if (!cache && skb->dev) {
2215 		int vif = ip6mr_find_vif(mrt, skb->dev);
2216 
2217 		if (vif >= 0)
2218 			cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr,
2219 						     vif);
2220 	}
2221 
2222 	if (!cache) {
2223 		struct sk_buff *skb2;
2224 		struct ipv6hdr *iph;
2225 		struct net_device *dev;
2226 		int vif;
2227 
2228 		dev = skb->dev;
2229 		if (!dev || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2230 			read_unlock(&mrt_lock);
2231 			return -ENODEV;
2232 		}
2233 
2234 		/* really correct? */
2235 		skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
2236 		if (!skb2) {
2237 			read_unlock(&mrt_lock);
2238 			return -ENOMEM;
2239 		}
2240 
2241 		NETLINK_CB(skb2).portid = portid;
2242 		skb_reset_transport_header(skb2);
2243 
2244 		skb_put(skb2, sizeof(struct ipv6hdr));
2245 		skb_reset_network_header(skb2);
2246 
2247 		iph = ipv6_hdr(skb2);
2248 		iph->version = 0;
2249 		iph->priority = 0;
2250 		iph->flow_lbl[0] = 0;
2251 		iph->flow_lbl[1] = 0;
2252 		iph->flow_lbl[2] = 0;
2253 		iph->payload_len = 0;
2254 		iph->nexthdr = IPPROTO_NONE;
2255 		iph->hop_limit = 0;
2256 		iph->saddr = rt->rt6i_src.addr;
2257 		iph->daddr = rt->rt6i_dst.addr;
2258 
2259 		err = ip6mr_cache_unresolved(mrt, vif, skb2);
2260 		read_unlock(&mrt_lock);
2261 
2262 		return err;
2263 	}
2264 
2265 	err = mr_fill_mroute(mrt, skb, &cache->_c, rtm);
2266 	read_unlock(&mrt_lock);
2267 	return err;
2268 }
2269 
2270 static int ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2271 			     u32 portid, u32 seq, struct mfc6_cache *c, int cmd,
2272 			     int flags)
2273 {
2274 	struct nlmsghdr *nlh;
2275 	struct rtmsg *rtm;
2276 	int err;
2277 
2278 	nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags);
2279 	if (!nlh)
2280 		return -EMSGSIZE;
2281 
2282 	rtm = nlmsg_data(nlh);
2283 	rtm->rtm_family   = RTNL_FAMILY_IP6MR;
2284 	rtm->rtm_dst_len  = 128;
2285 	rtm->rtm_src_len  = 128;
2286 	rtm->rtm_tos      = 0;
2287 	rtm->rtm_table    = mrt->id;
2288 	if (nla_put_u32(skb, RTA_TABLE, mrt->id))
2289 		goto nla_put_failure;
2290 	rtm->rtm_type = RTN_MULTICAST;
2291 	rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
2292 	if (c->_c.mfc_flags & MFC_STATIC)
2293 		rtm->rtm_protocol = RTPROT_STATIC;
2294 	else
2295 		rtm->rtm_protocol = RTPROT_MROUTED;
2296 	rtm->rtm_flags    = 0;
2297 
2298 	if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) ||
2299 	    nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp))
2300 		goto nla_put_failure;
2301 	err = mr_fill_mroute(mrt, skb, &c->_c, rtm);
2302 	/* do not break the dump if cache is unresolved */
2303 	if (err < 0 && err != -ENOENT)
2304 		goto nla_put_failure;
2305 
2306 	nlmsg_end(skb, nlh);
2307 	return 0;
2308 
2309 nla_put_failure:
2310 	nlmsg_cancel(skb, nlh);
2311 	return -EMSGSIZE;
2312 }
2313 
2314 static int _ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2315 			      u32 portid, u32 seq, struct mr_mfc *c,
2316 			      int cmd, int flags)
2317 {
2318 	return ip6mr_fill_mroute(mrt, skb, portid, seq, (struct mfc6_cache *)c,
2319 				 cmd, flags);
2320 }
2321 
2322 static int mr6_msgsize(bool unresolved, int maxvif)
2323 {
2324 	size_t len =
2325 		NLMSG_ALIGN(sizeof(struct rtmsg))
2326 		+ nla_total_size(4)	/* RTA_TABLE */
2327 		+ nla_total_size(sizeof(struct in6_addr))	/* RTA_SRC */
2328 		+ nla_total_size(sizeof(struct in6_addr))	/* RTA_DST */
2329 		;
2330 
2331 	if (!unresolved)
2332 		len = len
2333 		      + nla_total_size(4)	/* RTA_IIF */
2334 		      + nla_total_size(0)	/* RTA_MULTIPATH */
2335 		      + maxvif * NLA_ALIGN(sizeof(struct rtnexthop))
2336 						/* RTA_MFC_STATS */
2337 		      + nla_total_size_64bit(sizeof(struct rta_mfc_stats))
2338 		;
2339 
2340 	return len;
2341 }
2342 
2343 static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
2344 			      int cmd)
2345 {
2346 	struct net *net = read_pnet(&mrt->net);
2347 	struct sk_buff *skb;
2348 	int err = -ENOBUFS;
2349 
2350 	skb = nlmsg_new(mr6_msgsize(mfc->_c.mfc_parent >= MAXMIFS, mrt->maxvif),
2351 			GFP_ATOMIC);
2352 	if (!skb)
2353 		goto errout;
2354 
2355 	err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0);
2356 	if (err < 0)
2357 		goto errout;
2358 
2359 	rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC);
2360 	return;
2361 
2362 errout:
2363 	kfree_skb(skb);
2364 	if (err < 0)
2365 		rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
2366 }
2367 
2368 static size_t mrt6msg_netlink_msgsize(size_t payloadlen)
2369 {
2370 	size_t len =
2371 		NLMSG_ALIGN(sizeof(struct rtgenmsg))
2372 		+ nla_total_size(1)	/* IP6MRA_CREPORT_MSGTYPE */
2373 		+ nla_total_size(4)	/* IP6MRA_CREPORT_MIF_ID */
2374 					/* IP6MRA_CREPORT_SRC_ADDR */
2375 		+ nla_total_size(sizeof(struct in6_addr))
2376 					/* IP6MRA_CREPORT_DST_ADDR */
2377 		+ nla_total_size(sizeof(struct in6_addr))
2378 					/* IP6MRA_CREPORT_PKT */
2379 		+ nla_total_size(payloadlen)
2380 		;
2381 
2382 	return len;
2383 }
2384 
2385 static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt)
2386 {
2387 	struct net *net = read_pnet(&mrt->net);
2388 	struct nlmsghdr *nlh;
2389 	struct rtgenmsg *rtgenm;
2390 	struct mrt6msg *msg;
2391 	struct sk_buff *skb;
2392 	struct nlattr *nla;
2393 	int payloadlen;
2394 
2395 	payloadlen = pkt->len - sizeof(struct mrt6msg);
2396 	msg = (struct mrt6msg *)skb_transport_header(pkt);
2397 
2398 	skb = nlmsg_new(mrt6msg_netlink_msgsize(payloadlen), GFP_ATOMIC);
2399 	if (!skb)
2400 		goto errout;
2401 
2402 	nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT,
2403 			sizeof(struct rtgenmsg), 0);
2404 	if (!nlh)
2405 		goto errout;
2406 	rtgenm = nlmsg_data(nlh);
2407 	rtgenm->rtgen_family = RTNL_FAMILY_IP6MR;
2408 	if (nla_put_u8(skb, IP6MRA_CREPORT_MSGTYPE, msg->im6_msgtype) ||
2409 	    nla_put_u32(skb, IP6MRA_CREPORT_MIF_ID, msg->im6_mif) ||
2410 	    nla_put_in6_addr(skb, IP6MRA_CREPORT_SRC_ADDR,
2411 			     &msg->im6_src) ||
2412 	    nla_put_in6_addr(skb, IP6MRA_CREPORT_DST_ADDR,
2413 			     &msg->im6_dst))
2414 		goto nla_put_failure;
2415 
2416 	nla = nla_reserve(skb, IP6MRA_CREPORT_PKT, payloadlen);
2417 	if (!nla || skb_copy_bits(pkt, sizeof(struct mrt6msg),
2418 				  nla_data(nla), payloadlen))
2419 		goto nla_put_failure;
2420 
2421 	nlmsg_end(skb, nlh);
2422 
2423 	rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE_R, NULL, GFP_ATOMIC);
2424 	return;
2425 
2426 nla_put_failure:
2427 	nlmsg_cancel(skb, nlh);
2428 errout:
2429 	kfree_skb(skb);
2430 	rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE_R, -ENOBUFS);
2431 }
2432 
2433 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2434 {
2435 	return mr_rtm_dumproute(skb, cb, ip6mr_mr_table_iter,
2436 				_ip6mr_fill_mroute, &mfc_unres_lock);
2437 }
2438