xref: /openbmc/linux/net/ipv6/ip6mr.c (revision e5242c5f)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	Linux IPv6 multicast routing support for BSD pim6sd
4  *	Based on net/ipv4/ipmr.c.
5  *
6  *	(c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
7  *		LSIIT Laboratory, Strasbourg, France
8  *	(c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
9  *		6WIND, Paris, France
10  *	Copyright (C)2007,2008 USAGI/WIDE Project
11  *		YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
12  */
13 
14 #include <linux/uaccess.h>
15 #include <linux/types.h>
16 #include <linux/sched.h>
17 #include <linux/errno.h>
18 #include <linux/mm.h>
19 #include <linux/kernel.h>
20 #include <linux/fcntl.h>
21 #include <linux/stat.h>
22 #include <linux/socket.h>
23 #include <linux/inet.h>
24 #include <linux/netdevice.h>
25 #include <linux/inetdevice.h>
26 #include <linux/proc_fs.h>
27 #include <linux/seq_file.h>
28 #include <linux/init.h>
29 #include <linux/compat.h>
30 #include <linux/rhashtable.h>
31 #include <net/protocol.h>
32 #include <linux/skbuff.h>
33 #include <net/raw.h>
34 #include <linux/notifier.h>
35 #include <linux/if_arp.h>
36 #include <net/checksum.h>
37 #include <net/netlink.h>
38 #include <net/fib_rules.h>
39 
40 #include <net/ipv6.h>
41 #include <net/ip6_route.h>
42 #include <linux/mroute6.h>
43 #include <linux/pim.h>
44 #include <net/addrconf.h>
45 #include <linux/netfilter_ipv6.h>
46 #include <linux/export.h>
47 #include <net/ip6_checksum.h>
48 #include <linux/netconf.h>
49 #include <net/ip_tunnels.h>
50 
51 #include <linux/nospec.h>
52 
53 struct ip6mr_rule {
54 	struct fib_rule		common;
55 };
56 
57 struct ip6mr_result {
58 	struct mr_table	*mrt;
59 };
60 
61 /* Big lock, protecting vif table, mrt cache and mroute socket state.
62    Note that the changes are semaphored via rtnl_lock.
63  */
64 
65 static DEFINE_SPINLOCK(mrt_lock);
66 
67 static struct net_device *vif_dev_read(const struct vif_device *vif)
68 {
69 	return rcu_dereference(vif->dev);
70 }
71 
72 /* Multicast router control variables */
73 
74 /* Special spinlock for queue of unresolved entries */
75 static DEFINE_SPINLOCK(mfc_unres_lock);
76 
77 /* We return to original Alan's scheme. Hash table of resolved
78    entries is changed only in process context and protected
79    with weak lock mrt_lock. Queue of unresolved entries is protected
80    with strong spinlock mfc_unres_lock.
81 
82    In this case data path is free of exclusive locks at all.
83  */
84 
85 static struct kmem_cache *mrt_cachep __read_mostly;
86 
87 static struct mr_table *ip6mr_new_table(struct net *net, u32 id);
88 static void ip6mr_free_table(struct mr_table *mrt);
89 
90 static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
91 			   struct net_device *dev, struct sk_buff *skb,
92 			   struct mfc6_cache *cache);
93 static int ip6mr_cache_report(const struct mr_table *mrt, struct sk_buff *pkt,
94 			      mifi_t mifi, int assert);
95 static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
96 			      int cmd);
97 static void mrt6msg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt);
98 static int ip6mr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
99 			      struct netlink_ext_ack *extack);
100 static int ip6mr_rtm_dumproute(struct sk_buff *skb,
101 			       struct netlink_callback *cb);
102 static void mroute_clean_tables(struct mr_table *mrt, int flags);
103 static void ipmr_expire_process(struct timer_list *t);
104 
105 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
106 #define ip6mr_for_each_table(mrt, net) \
107 	list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list, \
108 				lockdep_rtnl_is_held() || \
109 				list_empty(&net->ipv6.mr6_tables))
110 
111 static struct mr_table *ip6mr_mr_table_iter(struct net *net,
112 					    struct mr_table *mrt)
113 {
114 	struct mr_table *ret;
115 
116 	if (!mrt)
117 		ret = list_entry_rcu(net->ipv6.mr6_tables.next,
118 				     struct mr_table, list);
119 	else
120 		ret = list_entry_rcu(mrt->list.next,
121 				     struct mr_table, list);
122 
123 	if (&ret->list == &net->ipv6.mr6_tables)
124 		return NULL;
125 	return ret;
126 }
127 
128 static struct mr_table *__ip6mr_get_table(struct net *net, u32 id)
129 {
130 	struct mr_table *mrt;
131 
132 	ip6mr_for_each_table(mrt, net) {
133 		if (mrt->id == id)
134 			return mrt;
135 	}
136 	return NULL;
137 }
138 
139 static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
140 {
141 	struct mr_table *mrt;
142 
143 	rcu_read_lock();
144 	mrt = __ip6mr_get_table(net, id);
145 	rcu_read_unlock();
146 	return mrt;
147 }
148 
149 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
150 			    struct mr_table **mrt)
151 {
152 	int err;
153 	struct ip6mr_result res;
154 	struct fib_lookup_arg arg = {
155 		.result = &res,
156 		.flags = FIB_LOOKUP_NOREF,
157 	};
158 
159 	/* update flow if oif or iif point to device enslaved to l3mdev */
160 	l3mdev_update_flow(net, flowi6_to_flowi(flp6));
161 
162 	err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
163 			       flowi6_to_flowi(flp6), 0, &arg);
164 	if (err < 0)
165 		return err;
166 	*mrt = res.mrt;
167 	return 0;
168 }
169 
170 static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
171 			     int flags, struct fib_lookup_arg *arg)
172 {
173 	struct ip6mr_result *res = arg->result;
174 	struct mr_table *mrt;
175 
176 	switch (rule->action) {
177 	case FR_ACT_TO_TBL:
178 		break;
179 	case FR_ACT_UNREACHABLE:
180 		return -ENETUNREACH;
181 	case FR_ACT_PROHIBIT:
182 		return -EACCES;
183 	case FR_ACT_BLACKHOLE:
184 	default:
185 		return -EINVAL;
186 	}
187 
188 	arg->table = fib_rule_get_table(rule, arg);
189 
190 	mrt = __ip6mr_get_table(rule->fr_net, arg->table);
191 	if (!mrt)
192 		return -EAGAIN;
193 	res->mrt = mrt;
194 	return 0;
195 }
196 
197 static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
198 {
199 	return 1;
200 }
201 
202 static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
203 				struct fib_rule_hdr *frh, struct nlattr **tb,
204 				struct netlink_ext_ack *extack)
205 {
206 	return 0;
207 }
208 
209 static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
210 			      struct nlattr **tb)
211 {
212 	return 1;
213 }
214 
215 static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
216 			   struct fib_rule_hdr *frh)
217 {
218 	frh->dst_len = 0;
219 	frh->src_len = 0;
220 	frh->tos     = 0;
221 	return 0;
222 }
223 
224 static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
225 	.family		= RTNL_FAMILY_IP6MR,
226 	.rule_size	= sizeof(struct ip6mr_rule),
227 	.addr_size	= sizeof(struct in6_addr),
228 	.action		= ip6mr_rule_action,
229 	.match		= ip6mr_rule_match,
230 	.configure	= ip6mr_rule_configure,
231 	.compare	= ip6mr_rule_compare,
232 	.fill		= ip6mr_rule_fill,
233 	.nlgroup	= RTNLGRP_IPV6_RULE,
234 	.owner		= THIS_MODULE,
235 };
236 
237 static int __net_init ip6mr_rules_init(struct net *net)
238 {
239 	struct fib_rules_ops *ops;
240 	struct mr_table *mrt;
241 	int err;
242 
243 	ops = fib_rules_register(&ip6mr_rules_ops_template, net);
244 	if (IS_ERR(ops))
245 		return PTR_ERR(ops);
246 
247 	INIT_LIST_HEAD(&net->ipv6.mr6_tables);
248 
249 	mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
250 	if (IS_ERR(mrt)) {
251 		err = PTR_ERR(mrt);
252 		goto err1;
253 	}
254 
255 	err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
256 	if (err < 0)
257 		goto err2;
258 
259 	net->ipv6.mr6_rules_ops = ops;
260 	return 0;
261 
262 err2:
263 	rtnl_lock();
264 	ip6mr_free_table(mrt);
265 	rtnl_unlock();
266 err1:
267 	fib_rules_unregister(ops);
268 	return err;
269 }
270 
271 static void __net_exit ip6mr_rules_exit(struct net *net)
272 {
273 	struct mr_table *mrt, *next;
274 
275 	ASSERT_RTNL();
276 	list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
277 		list_del(&mrt->list);
278 		ip6mr_free_table(mrt);
279 	}
280 	fib_rules_unregister(net->ipv6.mr6_rules_ops);
281 }
282 
283 static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb,
284 			    struct netlink_ext_ack *extack)
285 {
286 	return fib_rules_dump(net, nb, RTNL_FAMILY_IP6MR, extack);
287 }
288 
289 static unsigned int ip6mr_rules_seq_read(struct net *net)
290 {
291 	return fib_rules_seq_read(net, RTNL_FAMILY_IP6MR);
292 }
293 
294 bool ip6mr_rule_default(const struct fib_rule *rule)
295 {
296 	return fib_rule_matchall(rule) && rule->action == FR_ACT_TO_TBL &&
297 	       rule->table == RT6_TABLE_DFLT && !rule->l3mdev;
298 }
299 EXPORT_SYMBOL(ip6mr_rule_default);
300 #else
301 #define ip6mr_for_each_table(mrt, net) \
302 	for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
303 
304 static struct mr_table *ip6mr_mr_table_iter(struct net *net,
305 					    struct mr_table *mrt)
306 {
307 	if (!mrt)
308 		return net->ipv6.mrt6;
309 	return NULL;
310 }
311 
312 static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
313 {
314 	return net->ipv6.mrt6;
315 }
316 
317 #define __ip6mr_get_table ip6mr_get_table
318 
319 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
320 			    struct mr_table **mrt)
321 {
322 	*mrt = net->ipv6.mrt6;
323 	return 0;
324 }
325 
326 static int __net_init ip6mr_rules_init(struct net *net)
327 {
328 	struct mr_table *mrt;
329 
330 	mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
331 	if (IS_ERR(mrt))
332 		return PTR_ERR(mrt);
333 	net->ipv6.mrt6 = mrt;
334 	return 0;
335 }
336 
337 static void __net_exit ip6mr_rules_exit(struct net *net)
338 {
339 	ASSERT_RTNL();
340 	ip6mr_free_table(net->ipv6.mrt6);
341 	net->ipv6.mrt6 = NULL;
342 }
343 
344 static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb,
345 			    struct netlink_ext_ack *extack)
346 {
347 	return 0;
348 }
349 
350 static unsigned int ip6mr_rules_seq_read(struct net *net)
351 {
352 	return 0;
353 }
354 #endif
355 
356 static int ip6mr_hash_cmp(struct rhashtable_compare_arg *arg,
357 			  const void *ptr)
358 {
359 	const struct mfc6_cache_cmp_arg *cmparg = arg->key;
360 	struct mfc6_cache *c = (struct mfc6_cache *)ptr;
361 
362 	return !ipv6_addr_equal(&c->mf6c_mcastgrp, &cmparg->mf6c_mcastgrp) ||
363 	       !ipv6_addr_equal(&c->mf6c_origin, &cmparg->mf6c_origin);
364 }
365 
366 static const struct rhashtable_params ip6mr_rht_params = {
367 	.head_offset = offsetof(struct mr_mfc, mnode),
368 	.key_offset = offsetof(struct mfc6_cache, cmparg),
369 	.key_len = sizeof(struct mfc6_cache_cmp_arg),
370 	.nelem_hint = 3,
371 	.obj_cmpfn = ip6mr_hash_cmp,
372 	.automatic_shrinking = true,
373 };
374 
375 static void ip6mr_new_table_set(struct mr_table *mrt,
376 				struct net *net)
377 {
378 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
379 	list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
380 #endif
381 }
382 
383 static struct mfc6_cache_cmp_arg ip6mr_mr_table_ops_cmparg_any = {
384 	.mf6c_origin = IN6ADDR_ANY_INIT,
385 	.mf6c_mcastgrp = IN6ADDR_ANY_INIT,
386 };
387 
388 static struct mr_table_ops ip6mr_mr_table_ops = {
389 	.rht_params = &ip6mr_rht_params,
390 	.cmparg_any = &ip6mr_mr_table_ops_cmparg_any,
391 };
392 
393 static struct mr_table *ip6mr_new_table(struct net *net, u32 id)
394 {
395 	struct mr_table *mrt;
396 
397 	mrt = __ip6mr_get_table(net, id);
398 	if (mrt)
399 		return mrt;
400 
401 	return mr_table_alloc(net, id, &ip6mr_mr_table_ops,
402 			      ipmr_expire_process, ip6mr_new_table_set);
403 }
404 
405 static void ip6mr_free_table(struct mr_table *mrt)
406 {
407 	timer_shutdown_sync(&mrt->ipmr_expire_timer);
408 	mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC |
409 				 MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC);
410 	rhltable_destroy(&mrt->mfc_hash);
411 	kfree(mrt);
412 }
413 
414 #ifdef CONFIG_PROC_FS
415 /* The /proc interfaces to multicast routing
416  * /proc/ip6_mr_cache /proc/ip6_mr_vif
417  */
418 
419 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
420 	__acquires(RCU)
421 {
422 	struct mr_vif_iter *iter = seq->private;
423 	struct net *net = seq_file_net(seq);
424 	struct mr_table *mrt;
425 
426 	rcu_read_lock();
427 	mrt = __ip6mr_get_table(net, RT6_TABLE_DFLT);
428 	if (!mrt) {
429 		rcu_read_unlock();
430 		return ERR_PTR(-ENOENT);
431 	}
432 
433 	iter->mrt = mrt;
434 
435 	return mr_vif_seq_start(seq, pos);
436 }
437 
438 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
439 	__releases(RCU)
440 {
441 	rcu_read_unlock();
442 }
443 
444 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
445 {
446 	struct mr_vif_iter *iter = seq->private;
447 	struct mr_table *mrt = iter->mrt;
448 
449 	if (v == SEQ_START_TOKEN) {
450 		seq_puts(seq,
451 			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
452 	} else {
453 		const struct vif_device *vif = v;
454 		const struct net_device *vif_dev;
455 		const char *name;
456 
457 		vif_dev = vif_dev_read(vif);
458 		name = vif_dev ? vif_dev->name : "none";
459 
460 		seq_printf(seq,
461 			   "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
462 			   vif - mrt->vif_table,
463 			   name, vif->bytes_in, vif->pkt_in,
464 			   vif->bytes_out, vif->pkt_out,
465 			   vif->flags);
466 	}
467 	return 0;
468 }
469 
470 static const struct seq_operations ip6mr_vif_seq_ops = {
471 	.start = ip6mr_vif_seq_start,
472 	.next  = mr_vif_seq_next,
473 	.stop  = ip6mr_vif_seq_stop,
474 	.show  = ip6mr_vif_seq_show,
475 };
476 
477 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
478 {
479 	struct net *net = seq_file_net(seq);
480 	struct mr_table *mrt;
481 
482 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
483 	if (!mrt)
484 		return ERR_PTR(-ENOENT);
485 
486 	return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock);
487 }
488 
489 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
490 {
491 	int n;
492 
493 	if (v == SEQ_START_TOKEN) {
494 		seq_puts(seq,
495 			 "Group                            "
496 			 "Origin                           "
497 			 "Iif      Pkts  Bytes     Wrong  Oifs\n");
498 	} else {
499 		const struct mfc6_cache *mfc = v;
500 		const struct mr_mfc_iter *it = seq->private;
501 		struct mr_table *mrt = it->mrt;
502 
503 		seq_printf(seq, "%pI6 %pI6 %-3hd",
504 			   &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
505 			   mfc->_c.mfc_parent);
506 
507 		if (it->cache != &mrt->mfc_unres_queue) {
508 			seq_printf(seq, " %8lu %8lu %8lu",
509 				   mfc->_c.mfc_un.res.pkt,
510 				   mfc->_c.mfc_un.res.bytes,
511 				   mfc->_c.mfc_un.res.wrong_if);
512 			for (n = mfc->_c.mfc_un.res.minvif;
513 			     n < mfc->_c.mfc_un.res.maxvif; n++) {
514 				if (VIF_EXISTS(mrt, n) &&
515 				    mfc->_c.mfc_un.res.ttls[n] < 255)
516 					seq_printf(seq,
517 						   " %2d:%-3d", n,
518 						   mfc->_c.mfc_un.res.ttls[n]);
519 			}
520 		} else {
521 			/* unresolved mfc_caches don't contain
522 			 * pkt, bytes and wrong_if values
523 			 */
524 			seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
525 		}
526 		seq_putc(seq, '\n');
527 	}
528 	return 0;
529 }
530 
531 static const struct seq_operations ipmr_mfc_seq_ops = {
532 	.start = ipmr_mfc_seq_start,
533 	.next  = mr_mfc_seq_next,
534 	.stop  = mr_mfc_seq_stop,
535 	.show  = ipmr_mfc_seq_show,
536 };
537 #endif
538 
539 #ifdef CONFIG_IPV6_PIMSM_V2
540 
541 static int pim6_rcv(struct sk_buff *skb)
542 {
543 	struct pimreghdr *pim;
544 	struct ipv6hdr   *encap;
545 	struct net_device  *reg_dev = NULL;
546 	struct net *net = dev_net(skb->dev);
547 	struct mr_table *mrt;
548 	struct flowi6 fl6 = {
549 		.flowi6_iif	= skb->dev->ifindex,
550 		.flowi6_mark	= skb->mark,
551 	};
552 	int reg_vif_num;
553 
554 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
555 		goto drop;
556 
557 	pim = (struct pimreghdr *)skb_transport_header(skb);
558 	if (pim->type != ((PIM_VERSION << 4) | PIM_TYPE_REGISTER) ||
559 	    (pim->flags & PIM_NULL_REGISTER) ||
560 	    (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
561 			     sizeof(*pim), IPPROTO_PIM,
562 			     csum_partial((void *)pim, sizeof(*pim), 0)) &&
563 	     csum_fold(skb_checksum(skb, 0, skb->len, 0))))
564 		goto drop;
565 
566 	/* check if the inner packet is destined to mcast group */
567 	encap = (struct ipv6hdr *)(skb_transport_header(skb) +
568 				   sizeof(*pim));
569 
570 	if (!ipv6_addr_is_multicast(&encap->daddr) ||
571 	    encap->payload_len == 0 ||
572 	    ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
573 		goto drop;
574 
575 	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
576 		goto drop;
577 
578 	/* Pairs with WRITE_ONCE() in mif6_add()/mif6_delete() */
579 	reg_vif_num = READ_ONCE(mrt->mroute_reg_vif_num);
580 	if (reg_vif_num >= 0)
581 		reg_dev = vif_dev_read(&mrt->vif_table[reg_vif_num]);
582 
583 	if (!reg_dev)
584 		goto drop;
585 
586 	skb->mac_header = skb->network_header;
587 	skb_pull(skb, (u8 *)encap - skb->data);
588 	skb_reset_network_header(skb);
589 	skb->protocol = htons(ETH_P_IPV6);
590 	skb->ip_summed = CHECKSUM_NONE;
591 
592 	skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
593 
594 	netif_rx(skb);
595 
596 	return 0;
597  drop:
598 	kfree_skb(skb);
599 	return 0;
600 }
601 
602 static const struct inet6_protocol pim6_protocol = {
603 	.handler	=	pim6_rcv,
604 };
605 
606 /* Service routines creating virtual interfaces: PIMREG */
607 
608 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
609 				      struct net_device *dev)
610 {
611 	struct net *net = dev_net(dev);
612 	struct mr_table *mrt;
613 	struct flowi6 fl6 = {
614 		.flowi6_oif	= dev->ifindex,
615 		.flowi6_iif	= skb->skb_iif ? : LOOPBACK_IFINDEX,
616 		.flowi6_mark	= skb->mark,
617 	};
618 
619 	if (!pskb_inet_may_pull(skb))
620 		goto tx_err;
621 
622 	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
623 		goto tx_err;
624 
625 	DEV_STATS_ADD(dev, tx_bytes, skb->len);
626 	DEV_STATS_INC(dev, tx_packets);
627 	rcu_read_lock();
628 	ip6mr_cache_report(mrt, skb, READ_ONCE(mrt->mroute_reg_vif_num),
629 			   MRT6MSG_WHOLEPKT);
630 	rcu_read_unlock();
631 	kfree_skb(skb);
632 	return NETDEV_TX_OK;
633 
634 tx_err:
635 	DEV_STATS_INC(dev, tx_errors);
636 	kfree_skb(skb);
637 	return NETDEV_TX_OK;
638 }
639 
640 static int reg_vif_get_iflink(const struct net_device *dev)
641 {
642 	return 0;
643 }
644 
645 static const struct net_device_ops reg_vif_netdev_ops = {
646 	.ndo_start_xmit	= reg_vif_xmit,
647 	.ndo_get_iflink = reg_vif_get_iflink,
648 };
649 
650 static void reg_vif_setup(struct net_device *dev)
651 {
652 	dev->type		= ARPHRD_PIMREG;
653 	dev->mtu		= 1500 - sizeof(struct ipv6hdr) - 8;
654 	dev->flags		= IFF_NOARP;
655 	dev->netdev_ops		= &reg_vif_netdev_ops;
656 	dev->needs_free_netdev	= true;
657 	dev->features		|= NETIF_F_NETNS_LOCAL;
658 }
659 
660 static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt)
661 {
662 	struct net_device *dev;
663 	char name[IFNAMSIZ];
664 
665 	if (mrt->id == RT6_TABLE_DFLT)
666 		sprintf(name, "pim6reg");
667 	else
668 		sprintf(name, "pim6reg%u", mrt->id);
669 
670 	dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup);
671 	if (!dev)
672 		return NULL;
673 
674 	dev_net_set(dev, net);
675 
676 	if (register_netdevice(dev)) {
677 		free_netdev(dev);
678 		return NULL;
679 	}
680 
681 	if (dev_open(dev, NULL))
682 		goto failure;
683 
684 	dev_hold(dev);
685 	return dev;
686 
687 failure:
688 	unregister_netdevice(dev);
689 	return NULL;
690 }
691 #endif
692 
693 static int call_ip6mr_vif_entry_notifiers(struct net *net,
694 					  enum fib_event_type event_type,
695 					  struct vif_device *vif,
696 					  struct net_device *vif_dev,
697 					  mifi_t vif_index, u32 tb_id)
698 {
699 	return mr_call_vif_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
700 				     vif, vif_dev, vif_index, tb_id,
701 				     &net->ipv6.ipmr_seq);
702 }
703 
704 static int call_ip6mr_mfc_entry_notifiers(struct net *net,
705 					  enum fib_event_type event_type,
706 					  struct mfc6_cache *mfc, u32 tb_id)
707 {
708 	return mr_call_mfc_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
709 				     &mfc->_c, tb_id, &net->ipv6.ipmr_seq);
710 }
711 
712 /* Delete a VIF entry */
713 static int mif6_delete(struct mr_table *mrt, int vifi, int notify,
714 		       struct list_head *head)
715 {
716 	struct vif_device *v;
717 	struct net_device *dev;
718 	struct inet6_dev *in6_dev;
719 
720 	if (vifi < 0 || vifi >= mrt->maxvif)
721 		return -EADDRNOTAVAIL;
722 
723 	v = &mrt->vif_table[vifi];
724 
725 	dev = rtnl_dereference(v->dev);
726 	if (!dev)
727 		return -EADDRNOTAVAIL;
728 
729 	call_ip6mr_vif_entry_notifiers(read_pnet(&mrt->net),
730 				       FIB_EVENT_VIF_DEL, v, dev,
731 				       vifi, mrt->id);
732 	spin_lock(&mrt_lock);
733 	RCU_INIT_POINTER(v->dev, NULL);
734 
735 #ifdef CONFIG_IPV6_PIMSM_V2
736 	if (vifi == mrt->mroute_reg_vif_num) {
737 		/* Pairs with READ_ONCE() in ip6mr_cache_report() and reg_vif_xmit() */
738 		WRITE_ONCE(mrt->mroute_reg_vif_num, -1);
739 	}
740 #endif
741 
742 	if (vifi + 1 == mrt->maxvif) {
743 		int tmp;
744 		for (tmp = vifi - 1; tmp >= 0; tmp--) {
745 			if (VIF_EXISTS(mrt, tmp))
746 				break;
747 		}
748 		WRITE_ONCE(mrt->maxvif, tmp + 1);
749 	}
750 
751 	spin_unlock(&mrt_lock);
752 
753 	dev_set_allmulti(dev, -1);
754 
755 	in6_dev = __in6_dev_get(dev);
756 	if (in6_dev) {
757 		atomic_dec(&in6_dev->cnf.mc_forwarding);
758 		inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
759 					     NETCONFA_MC_FORWARDING,
760 					     dev->ifindex, &in6_dev->cnf);
761 	}
762 
763 	if ((v->flags & MIFF_REGISTER) && !notify)
764 		unregister_netdevice_queue(dev, head);
765 
766 	netdev_put(dev, &v->dev_tracker);
767 	return 0;
768 }
769 
770 static inline void ip6mr_cache_free_rcu(struct rcu_head *head)
771 {
772 	struct mr_mfc *c = container_of(head, struct mr_mfc, rcu);
773 
774 	kmem_cache_free(mrt_cachep, (struct mfc6_cache *)c);
775 }
776 
777 static inline void ip6mr_cache_free(struct mfc6_cache *c)
778 {
779 	call_rcu(&c->_c.rcu, ip6mr_cache_free_rcu);
780 }
781 
782 /* Destroy an unresolved cache entry, killing queued skbs
783    and reporting error to netlink readers.
784  */
785 
786 static void ip6mr_destroy_unres(struct mr_table *mrt, struct mfc6_cache *c)
787 {
788 	struct net *net = read_pnet(&mrt->net);
789 	struct sk_buff *skb;
790 
791 	atomic_dec(&mrt->cache_resolve_queue_len);
792 
793 	while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved)) != NULL) {
794 		if (ipv6_hdr(skb)->version == 0) {
795 			struct nlmsghdr *nlh = skb_pull(skb,
796 							sizeof(struct ipv6hdr));
797 			nlh->nlmsg_type = NLMSG_ERROR;
798 			nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
799 			skb_trim(skb, nlh->nlmsg_len);
800 			((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT;
801 			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
802 		} else
803 			kfree_skb(skb);
804 	}
805 
806 	ip6mr_cache_free(c);
807 }
808 
809 
810 /* Timer process for all the unresolved queue. */
811 
812 static void ipmr_do_expire_process(struct mr_table *mrt)
813 {
814 	unsigned long now = jiffies;
815 	unsigned long expires = 10 * HZ;
816 	struct mr_mfc *c, *next;
817 
818 	list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
819 		if (time_after(c->mfc_un.unres.expires, now)) {
820 			/* not yet... */
821 			unsigned long interval = c->mfc_un.unres.expires - now;
822 			if (interval < expires)
823 				expires = interval;
824 			continue;
825 		}
826 
827 		list_del(&c->list);
828 		mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
829 		ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
830 	}
831 
832 	if (!list_empty(&mrt->mfc_unres_queue))
833 		mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
834 }
835 
836 static void ipmr_expire_process(struct timer_list *t)
837 {
838 	struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
839 
840 	if (!spin_trylock(&mfc_unres_lock)) {
841 		mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
842 		return;
843 	}
844 
845 	if (!list_empty(&mrt->mfc_unres_queue))
846 		ipmr_do_expire_process(mrt);
847 
848 	spin_unlock(&mfc_unres_lock);
849 }
850 
851 /* Fill oifs list. It is called under locked mrt_lock. */
852 
853 static void ip6mr_update_thresholds(struct mr_table *mrt,
854 				    struct mr_mfc *cache,
855 				    unsigned char *ttls)
856 {
857 	int vifi;
858 
859 	cache->mfc_un.res.minvif = MAXMIFS;
860 	cache->mfc_un.res.maxvif = 0;
861 	memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
862 
863 	for (vifi = 0; vifi < mrt->maxvif; vifi++) {
864 		if (VIF_EXISTS(mrt, vifi) &&
865 		    ttls[vifi] && ttls[vifi] < 255) {
866 			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
867 			if (cache->mfc_un.res.minvif > vifi)
868 				cache->mfc_un.res.minvif = vifi;
869 			if (cache->mfc_un.res.maxvif <= vifi)
870 				cache->mfc_un.res.maxvif = vifi + 1;
871 		}
872 	}
873 	cache->mfc_un.res.lastuse = jiffies;
874 }
875 
876 static int mif6_add(struct net *net, struct mr_table *mrt,
877 		    struct mif6ctl *vifc, int mrtsock)
878 {
879 	int vifi = vifc->mif6c_mifi;
880 	struct vif_device *v = &mrt->vif_table[vifi];
881 	struct net_device *dev;
882 	struct inet6_dev *in6_dev;
883 	int err;
884 
885 	/* Is vif busy ? */
886 	if (VIF_EXISTS(mrt, vifi))
887 		return -EADDRINUSE;
888 
889 	switch (vifc->mif6c_flags) {
890 #ifdef CONFIG_IPV6_PIMSM_V2
891 	case MIFF_REGISTER:
892 		/*
893 		 * Special Purpose VIF in PIM
894 		 * All the packets will be sent to the daemon
895 		 */
896 		if (mrt->mroute_reg_vif_num >= 0)
897 			return -EADDRINUSE;
898 		dev = ip6mr_reg_vif(net, mrt);
899 		if (!dev)
900 			return -ENOBUFS;
901 		err = dev_set_allmulti(dev, 1);
902 		if (err) {
903 			unregister_netdevice(dev);
904 			dev_put(dev);
905 			return err;
906 		}
907 		break;
908 #endif
909 	case 0:
910 		dev = dev_get_by_index(net, vifc->mif6c_pifi);
911 		if (!dev)
912 			return -EADDRNOTAVAIL;
913 		err = dev_set_allmulti(dev, 1);
914 		if (err) {
915 			dev_put(dev);
916 			return err;
917 		}
918 		break;
919 	default:
920 		return -EINVAL;
921 	}
922 
923 	in6_dev = __in6_dev_get(dev);
924 	if (in6_dev) {
925 		atomic_inc(&in6_dev->cnf.mc_forwarding);
926 		inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
927 					     NETCONFA_MC_FORWARDING,
928 					     dev->ifindex, &in6_dev->cnf);
929 	}
930 
931 	/* Fill in the VIF structures */
932 	vif_device_init(v, dev, vifc->vifc_rate_limit, vifc->vifc_threshold,
933 			vifc->mif6c_flags | (!mrtsock ? VIFF_STATIC : 0),
934 			MIFF_REGISTER);
935 
936 	/* And finish update writing critical data */
937 	spin_lock(&mrt_lock);
938 	rcu_assign_pointer(v->dev, dev);
939 	netdev_tracker_alloc(dev, &v->dev_tracker, GFP_ATOMIC);
940 #ifdef CONFIG_IPV6_PIMSM_V2
941 	if (v->flags & MIFF_REGISTER)
942 		WRITE_ONCE(mrt->mroute_reg_vif_num, vifi);
943 #endif
944 	if (vifi + 1 > mrt->maxvif)
945 		WRITE_ONCE(mrt->maxvif, vifi + 1);
946 	spin_unlock(&mrt_lock);
947 	call_ip6mr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD,
948 				       v, dev, vifi, mrt->id);
949 	return 0;
950 }
951 
952 static struct mfc6_cache *ip6mr_cache_find(struct mr_table *mrt,
953 					   const struct in6_addr *origin,
954 					   const struct in6_addr *mcastgrp)
955 {
956 	struct mfc6_cache_cmp_arg arg = {
957 		.mf6c_origin = *origin,
958 		.mf6c_mcastgrp = *mcastgrp,
959 	};
960 
961 	return mr_mfc_find(mrt, &arg);
962 }
963 
964 /* Look for a (*,G) entry */
965 static struct mfc6_cache *ip6mr_cache_find_any(struct mr_table *mrt,
966 					       struct in6_addr *mcastgrp,
967 					       mifi_t mifi)
968 {
969 	struct mfc6_cache_cmp_arg arg = {
970 		.mf6c_origin = in6addr_any,
971 		.mf6c_mcastgrp = *mcastgrp,
972 	};
973 
974 	if (ipv6_addr_any(mcastgrp))
975 		return mr_mfc_find_any_parent(mrt, mifi);
976 	return mr_mfc_find_any(mrt, mifi, &arg);
977 }
978 
979 /* Look for a (S,G,iif) entry if parent != -1 */
980 static struct mfc6_cache *
981 ip6mr_cache_find_parent(struct mr_table *mrt,
982 			const struct in6_addr *origin,
983 			const struct in6_addr *mcastgrp,
984 			int parent)
985 {
986 	struct mfc6_cache_cmp_arg arg = {
987 		.mf6c_origin = *origin,
988 		.mf6c_mcastgrp = *mcastgrp,
989 	};
990 
991 	return mr_mfc_find_parent(mrt, &arg, parent);
992 }
993 
994 /* Allocate a multicast cache entry */
995 static struct mfc6_cache *ip6mr_cache_alloc(void)
996 {
997 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
998 	if (!c)
999 		return NULL;
1000 	c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
1001 	c->_c.mfc_un.res.minvif = MAXMIFS;
1002 	c->_c.free = ip6mr_cache_free_rcu;
1003 	refcount_set(&c->_c.mfc_un.res.refcount, 1);
1004 	return c;
1005 }
1006 
1007 static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
1008 {
1009 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
1010 	if (!c)
1011 		return NULL;
1012 	skb_queue_head_init(&c->_c.mfc_un.unres.unresolved);
1013 	c->_c.mfc_un.unres.expires = jiffies + 10 * HZ;
1014 	return c;
1015 }
1016 
1017 /*
1018  *	A cache entry has gone into a resolved state from queued
1019  */
1020 
1021 static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt,
1022 				struct mfc6_cache *uc, struct mfc6_cache *c)
1023 {
1024 	struct sk_buff *skb;
1025 
1026 	/*
1027 	 *	Play the pending entries through our router
1028 	 */
1029 
1030 	while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) {
1031 		if (ipv6_hdr(skb)->version == 0) {
1032 			struct nlmsghdr *nlh = skb_pull(skb,
1033 							sizeof(struct ipv6hdr));
1034 
1035 			if (mr_fill_mroute(mrt, skb, &c->_c,
1036 					   nlmsg_data(nlh)) > 0) {
1037 				nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1038 			} else {
1039 				nlh->nlmsg_type = NLMSG_ERROR;
1040 				nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
1041 				skb_trim(skb, nlh->nlmsg_len);
1042 				((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE;
1043 			}
1044 			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
1045 		} else {
1046 			rcu_read_lock();
1047 			ip6_mr_forward(net, mrt, skb->dev, skb, c);
1048 			rcu_read_unlock();
1049 		}
1050 	}
1051 }
1052 
1053 /*
1054  *	Bounce a cache query up to pim6sd and netlink.
1055  *
1056  *	Called under rcu_read_lock()
1057  */
1058 
1059 static int ip6mr_cache_report(const struct mr_table *mrt, struct sk_buff *pkt,
1060 			      mifi_t mifi, int assert)
1061 {
1062 	struct sock *mroute6_sk;
1063 	struct sk_buff *skb;
1064 	struct mrt6msg *msg;
1065 	int ret;
1066 
1067 #ifdef CONFIG_IPV6_PIMSM_V2
1068 	if (assert == MRT6MSG_WHOLEPKT || assert == MRT6MSG_WRMIFWHOLE)
1069 		skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
1070 						+sizeof(*msg));
1071 	else
1072 #endif
1073 		skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
1074 
1075 	if (!skb)
1076 		return -ENOBUFS;
1077 
1078 	/* I suppose that internal messages
1079 	 * do not require checksums */
1080 
1081 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1082 
1083 #ifdef CONFIG_IPV6_PIMSM_V2
1084 	if (assert == MRT6MSG_WHOLEPKT || assert == MRT6MSG_WRMIFWHOLE) {
1085 		/* Ugly, but we have no choice with this interface.
1086 		   Duplicate old header, fix length etc.
1087 		   And all this only to mangle msg->im6_msgtype and
1088 		   to set msg->im6_mbz to "mbz" :-)
1089 		 */
1090 		__skb_pull(skb, skb_network_offset(pkt));
1091 
1092 		skb_push(skb, sizeof(*msg));
1093 		skb_reset_transport_header(skb);
1094 		msg = (struct mrt6msg *)skb_transport_header(skb);
1095 		msg->im6_mbz = 0;
1096 		msg->im6_msgtype = assert;
1097 		if (assert == MRT6MSG_WRMIFWHOLE)
1098 			msg->im6_mif = mifi;
1099 		else
1100 			msg->im6_mif = READ_ONCE(mrt->mroute_reg_vif_num);
1101 		msg->im6_pad = 0;
1102 		msg->im6_src = ipv6_hdr(pkt)->saddr;
1103 		msg->im6_dst = ipv6_hdr(pkt)->daddr;
1104 
1105 		skb->ip_summed = CHECKSUM_UNNECESSARY;
1106 	} else
1107 #endif
1108 	{
1109 	/*
1110 	 *	Copy the IP header
1111 	 */
1112 
1113 	skb_put(skb, sizeof(struct ipv6hdr));
1114 	skb_reset_network_header(skb);
1115 	skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
1116 
1117 	/*
1118 	 *	Add our header
1119 	 */
1120 	skb_put(skb, sizeof(*msg));
1121 	skb_reset_transport_header(skb);
1122 	msg = (struct mrt6msg *)skb_transport_header(skb);
1123 
1124 	msg->im6_mbz = 0;
1125 	msg->im6_msgtype = assert;
1126 	msg->im6_mif = mifi;
1127 	msg->im6_pad = 0;
1128 	msg->im6_src = ipv6_hdr(pkt)->saddr;
1129 	msg->im6_dst = ipv6_hdr(pkt)->daddr;
1130 
1131 	skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1132 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1133 	}
1134 
1135 	mroute6_sk = rcu_dereference(mrt->mroute_sk);
1136 	if (!mroute6_sk) {
1137 		kfree_skb(skb);
1138 		return -EINVAL;
1139 	}
1140 
1141 	mrt6msg_netlink_event(mrt, skb);
1142 
1143 	/* Deliver to user space multicast routing algorithms */
1144 	ret = sock_queue_rcv_skb(mroute6_sk, skb);
1145 
1146 	if (ret < 0) {
1147 		net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
1148 		kfree_skb(skb);
1149 	}
1150 
1151 	return ret;
1152 }
1153 
1154 /* Queue a packet for resolution. It gets locked cache entry! */
1155 static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi,
1156 				  struct sk_buff *skb, struct net_device *dev)
1157 {
1158 	struct mfc6_cache *c;
1159 	bool found = false;
1160 	int err;
1161 
1162 	spin_lock_bh(&mfc_unres_lock);
1163 	list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) {
1164 		if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1165 		    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
1166 			found = true;
1167 			break;
1168 		}
1169 	}
1170 
1171 	if (!found) {
1172 		/*
1173 		 *	Create a new entry if allowable
1174 		 */
1175 
1176 		c = ip6mr_cache_alloc_unres();
1177 		if (!c) {
1178 			spin_unlock_bh(&mfc_unres_lock);
1179 
1180 			kfree_skb(skb);
1181 			return -ENOBUFS;
1182 		}
1183 
1184 		/* Fill in the new cache entry */
1185 		c->_c.mfc_parent = -1;
1186 		c->mf6c_origin = ipv6_hdr(skb)->saddr;
1187 		c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
1188 
1189 		/*
1190 		 *	Reflect first query at pim6sd
1191 		 */
1192 		err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1193 		if (err < 0) {
1194 			/* If the report failed throw the cache entry
1195 			   out - Brad Parker
1196 			 */
1197 			spin_unlock_bh(&mfc_unres_lock);
1198 
1199 			ip6mr_cache_free(c);
1200 			kfree_skb(skb);
1201 			return err;
1202 		}
1203 
1204 		atomic_inc(&mrt->cache_resolve_queue_len);
1205 		list_add(&c->_c.list, &mrt->mfc_unres_queue);
1206 		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1207 
1208 		ipmr_do_expire_process(mrt);
1209 	}
1210 
1211 	/* See if we can append the packet */
1212 	if (c->_c.mfc_un.unres.unresolved.qlen > 3) {
1213 		kfree_skb(skb);
1214 		err = -ENOBUFS;
1215 	} else {
1216 		if (dev) {
1217 			skb->dev = dev;
1218 			skb->skb_iif = dev->ifindex;
1219 		}
1220 		skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb);
1221 		err = 0;
1222 	}
1223 
1224 	spin_unlock_bh(&mfc_unres_lock);
1225 	return err;
1226 }
1227 
1228 /*
1229  *	MFC6 cache manipulation by user space
1230  */
1231 
1232 static int ip6mr_mfc_delete(struct mr_table *mrt, struct mf6cctl *mfc,
1233 			    int parent)
1234 {
1235 	struct mfc6_cache *c;
1236 
1237 	/* The entries are added/deleted only under RTNL */
1238 	rcu_read_lock();
1239 	c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
1240 				    &mfc->mf6cc_mcastgrp.sin6_addr, parent);
1241 	rcu_read_unlock();
1242 	if (!c)
1243 		return -ENOENT;
1244 	rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ip6mr_rht_params);
1245 	list_del_rcu(&c->_c.list);
1246 
1247 	call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
1248 				       FIB_EVENT_ENTRY_DEL, c, mrt->id);
1249 	mr6_netlink_event(mrt, c, RTM_DELROUTE);
1250 	mr_cache_put(&c->_c);
1251 	return 0;
1252 }
1253 
1254 static int ip6mr_device_event(struct notifier_block *this,
1255 			      unsigned long event, void *ptr)
1256 {
1257 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1258 	struct net *net = dev_net(dev);
1259 	struct mr_table *mrt;
1260 	struct vif_device *v;
1261 	int ct;
1262 
1263 	if (event != NETDEV_UNREGISTER)
1264 		return NOTIFY_DONE;
1265 
1266 	ip6mr_for_each_table(mrt, net) {
1267 		v = &mrt->vif_table[0];
1268 		for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1269 			if (rcu_access_pointer(v->dev) == dev)
1270 				mif6_delete(mrt, ct, 1, NULL);
1271 		}
1272 	}
1273 
1274 	return NOTIFY_DONE;
1275 }
1276 
1277 static unsigned int ip6mr_seq_read(struct net *net)
1278 {
1279 	ASSERT_RTNL();
1280 
1281 	return net->ipv6.ipmr_seq + ip6mr_rules_seq_read(net);
1282 }
1283 
1284 static int ip6mr_dump(struct net *net, struct notifier_block *nb,
1285 		      struct netlink_ext_ack *extack)
1286 {
1287 	return mr_dump(net, nb, RTNL_FAMILY_IP6MR, ip6mr_rules_dump,
1288 		       ip6mr_mr_table_iter, extack);
1289 }
1290 
1291 static struct notifier_block ip6_mr_notifier = {
1292 	.notifier_call = ip6mr_device_event
1293 };
1294 
1295 static const struct fib_notifier_ops ip6mr_notifier_ops_template = {
1296 	.family		= RTNL_FAMILY_IP6MR,
1297 	.fib_seq_read	= ip6mr_seq_read,
1298 	.fib_dump	= ip6mr_dump,
1299 	.owner		= THIS_MODULE,
1300 };
1301 
1302 static int __net_init ip6mr_notifier_init(struct net *net)
1303 {
1304 	struct fib_notifier_ops *ops;
1305 
1306 	net->ipv6.ipmr_seq = 0;
1307 
1308 	ops = fib_notifier_ops_register(&ip6mr_notifier_ops_template, net);
1309 	if (IS_ERR(ops))
1310 		return PTR_ERR(ops);
1311 
1312 	net->ipv6.ip6mr_notifier_ops = ops;
1313 
1314 	return 0;
1315 }
1316 
1317 static void __net_exit ip6mr_notifier_exit(struct net *net)
1318 {
1319 	fib_notifier_ops_unregister(net->ipv6.ip6mr_notifier_ops);
1320 	net->ipv6.ip6mr_notifier_ops = NULL;
1321 }
1322 
1323 /* Setup for IP multicast routing */
1324 static int __net_init ip6mr_net_init(struct net *net)
1325 {
1326 	int err;
1327 
1328 	err = ip6mr_notifier_init(net);
1329 	if (err)
1330 		return err;
1331 
1332 	err = ip6mr_rules_init(net);
1333 	if (err < 0)
1334 		goto ip6mr_rules_fail;
1335 
1336 #ifdef CONFIG_PROC_FS
1337 	err = -ENOMEM;
1338 	if (!proc_create_net("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_seq_ops,
1339 			sizeof(struct mr_vif_iter)))
1340 		goto proc_vif_fail;
1341 	if (!proc_create_net("ip6_mr_cache", 0, net->proc_net, &ipmr_mfc_seq_ops,
1342 			sizeof(struct mr_mfc_iter)))
1343 		goto proc_cache_fail;
1344 #endif
1345 
1346 	return 0;
1347 
1348 #ifdef CONFIG_PROC_FS
1349 proc_cache_fail:
1350 	remove_proc_entry("ip6_mr_vif", net->proc_net);
1351 proc_vif_fail:
1352 	rtnl_lock();
1353 	ip6mr_rules_exit(net);
1354 	rtnl_unlock();
1355 #endif
1356 ip6mr_rules_fail:
1357 	ip6mr_notifier_exit(net);
1358 	return err;
1359 }
1360 
1361 static void __net_exit ip6mr_net_exit(struct net *net)
1362 {
1363 #ifdef CONFIG_PROC_FS
1364 	remove_proc_entry("ip6_mr_cache", net->proc_net);
1365 	remove_proc_entry("ip6_mr_vif", net->proc_net);
1366 #endif
1367 	ip6mr_notifier_exit(net);
1368 }
1369 
1370 static void __net_exit ip6mr_net_exit_batch(struct list_head *net_list)
1371 {
1372 	struct net *net;
1373 
1374 	rtnl_lock();
1375 	list_for_each_entry(net, net_list, exit_list)
1376 		ip6mr_rules_exit(net);
1377 	rtnl_unlock();
1378 }
1379 
1380 static struct pernet_operations ip6mr_net_ops = {
1381 	.init = ip6mr_net_init,
1382 	.exit = ip6mr_net_exit,
1383 	.exit_batch = ip6mr_net_exit_batch,
1384 };
1385 
1386 int __init ip6_mr_init(void)
1387 {
1388 	int err;
1389 
1390 	mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1391 				       sizeof(struct mfc6_cache),
1392 				       0, SLAB_HWCACHE_ALIGN,
1393 				       NULL);
1394 	if (!mrt_cachep)
1395 		return -ENOMEM;
1396 
1397 	err = register_pernet_subsys(&ip6mr_net_ops);
1398 	if (err)
1399 		goto reg_pernet_fail;
1400 
1401 	err = register_netdevice_notifier(&ip6_mr_notifier);
1402 	if (err)
1403 		goto reg_notif_fail;
1404 #ifdef CONFIG_IPV6_PIMSM_V2
1405 	if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1406 		pr_err("%s: can't add PIM protocol\n", __func__);
1407 		err = -EAGAIN;
1408 		goto add_proto_fail;
1409 	}
1410 #endif
1411 	err = rtnl_register_module(THIS_MODULE, RTNL_FAMILY_IP6MR, RTM_GETROUTE,
1412 				   ip6mr_rtm_getroute, ip6mr_rtm_dumproute, 0);
1413 	if (err == 0)
1414 		return 0;
1415 
1416 #ifdef CONFIG_IPV6_PIMSM_V2
1417 	inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1418 add_proto_fail:
1419 	unregister_netdevice_notifier(&ip6_mr_notifier);
1420 #endif
1421 reg_notif_fail:
1422 	unregister_pernet_subsys(&ip6mr_net_ops);
1423 reg_pernet_fail:
1424 	kmem_cache_destroy(mrt_cachep);
1425 	return err;
1426 }
1427 
1428 void ip6_mr_cleanup(void)
1429 {
1430 	rtnl_unregister(RTNL_FAMILY_IP6MR, RTM_GETROUTE);
1431 #ifdef CONFIG_IPV6_PIMSM_V2
1432 	inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1433 #endif
1434 	unregister_netdevice_notifier(&ip6_mr_notifier);
1435 	unregister_pernet_subsys(&ip6mr_net_ops);
1436 	kmem_cache_destroy(mrt_cachep);
1437 }
1438 
1439 static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt,
1440 			 struct mf6cctl *mfc, int mrtsock, int parent)
1441 {
1442 	unsigned char ttls[MAXMIFS];
1443 	struct mfc6_cache *uc, *c;
1444 	struct mr_mfc *_uc;
1445 	bool found;
1446 	int i, err;
1447 
1448 	if (mfc->mf6cc_parent >= MAXMIFS)
1449 		return -ENFILE;
1450 
1451 	memset(ttls, 255, MAXMIFS);
1452 	for (i = 0; i < MAXMIFS; i++) {
1453 		if (IF_ISSET(i, &mfc->mf6cc_ifset))
1454 			ttls[i] = 1;
1455 	}
1456 
1457 	/* The entries are added/deleted only under RTNL */
1458 	rcu_read_lock();
1459 	c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
1460 				    &mfc->mf6cc_mcastgrp.sin6_addr, parent);
1461 	rcu_read_unlock();
1462 	if (c) {
1463 		spin_lock(&mrt_lock);
1464 		c->_c.mfc_parent = mfc->mf6cc_parent;
1465 		ip6mr_update_thresholds(mrt, &c->_c, ttls);
1466 		if (!mrtsock)
1467 			c->_c.mfc_flags |= MFC_STATIC;
1468 		spin_unlock(&mrt_lock);
1469 		call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE,
1470 					       c, mrt->id);
1471 		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1472 		return 0;
1473 	}
1474 
1475 	if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) &&
1476 	    !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1477 		return -EINVAL;
1478 
1479 	c = ip6mr_cache_alloc();
1480 	if (!c)
1481 		return -ENOMEM;
1482 
1483 	c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1484 	c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1485 	c->_c.mfc_parent = mfc->mf6cc_parent;
1486 	ip6mr_update_thresholds(mrt, &c->_c, ttls);
1487 	if (!mrtsock)
1488 		c->_c.mfc_flags |= MFC_STATIC;
1489 
1490 	err = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode,
1491 				  ip6mr_rht_params);
1492 	if (err) {
1493 		pr_err("ip6mr: rhtable insert error %d\n", err);
1494 		ip6mr_cache_free(c);
1495 		return err;
1496 	}
1497 	list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list);
1498 
1499 	/* Check to see if we resolved a queued list. If so we
1500 	 * need to send on the frames and tidy up.
1501 	 */
1502 	found = false;
1503 	spin_lock_bh(&mfc_unres_lock);
1504 	list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) {
1505 		uc = (struct mfc6_cache *)_uc;
1506 		if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1507 		    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1508 			list_del(&_uc->list);
1509 			atomic_dec(&mrt->cache_resolve_queue_len);
1510 			found = true;
1511 			break;
1512 		}
1513 	}
1514 	if (list_empty(&mrt->mfc_unres_queue))
1515 		del_timer(&mrt->ipmr_expire_timer);
1516 	spin_unlock_bh(&mfc_unres_lock);
1517 
1518 	if (found) {
1519 		ip6mr_cache_resolve(net, mrt, uc, c);
1520 		ip6mr_cache_free(uc);
1521 	}
1522 	call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD,
1523 				       c, mrt->id);
1524 	mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1525 	return 0;
1526 }
1527 
1528 /*
1529  *	Close the multicast socket, and clear the vif tables etc
1530  */
1531 
1532 static void mroute_clean_tables(struct mr_table *mrt, int flags)
1533 {
1534 	struct mr_mfc *c, *tmp;
1535 	LIST_HEAD(list);
1536 	int i;
1537 
1538 	/* Shut down all active vif entries */
1539 	if (flags & (MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC)) {
1540 		for (i = 0; i < mrt->maxvif; i++) {
1541 			if (((mrt->vif_table[i].flags & VIFF_STATIC) &&
1542 			     !(flags & MRT6_FLUSH_MIFS_STATIC)) ||
1543 			    (!(mrt->vif_table[i].flags & VIFF_STATIC) && !(flags & MRT6_FLUSH_MIFS)))
1544 				continue;
1545 			mif6_delete(mrt, i, 0, &list);
1546 		}
1547 		unregister_netdevice_many(&list);
1548 	}
1549 
1550 	/* Wipe the cache */
1551 	if (flags & (MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC)) {
1552 		list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
1553 			if (((c->mfc_flags & MFC_STATIC) && !(flags & MRT6_FLUSH_MFC_STATIC)) ||
1554 			    (!(c->mfc_flags & MFC_STATIC) && !(flags & MRT6_FLUSH_MFC)))
1555 				continue;
1556 			rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params);
1557 			list_del_rcu(&c->list);
1558 			call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
1559 						       FIB_EVENT_ENTRY_DEL,
1560 						       (struct mfc6_cache *)c, mrt->id);
1561 			mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
1562 			mr_cache_put(c);
1563 		}
1564 	}
1565 
1566 	if (flags & MRT6_FLUSH_MFC) {
1567 		if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1568 			spin_lock_bh(&mfc_unres_lock);
1569 			list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
1570 				list_del(&c->list);
1571 				mr6_netlink_event(mrt, (struct mfc6_cache *)c,
1572 						  RTM_DELROUTE);
1573 				ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
1574 			}
1575 			spin_unlock_bh(&mfc_unres_lock);
1576 		}
1577 	}
1578 }
1579 
1580 static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk)
1581 {
1582 	int err = 0;
1583 	struct net *net = sock_net(sk);
1584 
1585 	rtnl_lock();
1586 	spin_lock(&mrt_lock);
1587 	if (rtnl_dereference(mrt->mroute_sk)) {
1588 		err = -EADDRINUSE;
1589 	} else {
1590 		rcu_assign_pointer(mrt->mroute_sk, sk);
1591 		sock_set_flag(sk, SOCK_RCU_FREE);
1592 		atomic_inc(&net->ipv6.devconf_all->mc_forwarding);
1593 	}
1594 	spin_unlock(&mrt_lock);
1595 
1596 	if (!err)
1597 		inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
1598 					     NETCONFA_MC_FORWARDING,
1599 					     NETCONFA_IFINDEX_ALL,
1600 					     net->ipv6.devconf_all);
1601 	rtnl_unlock();
1602 
1603 	return err;
1604 }
1605 
1606 int ip6mr_sk_done(struct sock *sk)
1607 {
1608 	struct net *net = sock_net(sk);
1609 	struct ipv6_devconf *devconf;
1610 	struct mr_table *mrt;
1611 	int err = -EACCES;
1612 
1613 	if (sk->sk_type != SOCK_RAW ||
1614 	    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1615 		return err;
1616 
1617 	devconf = net->ipv6.devconf_all;
1618 	if (!devconf || !atomic_read(&devconf->mc_forwarding))
1619 		return err;
1620 
1621 	rtnl_lock();
1622 	ip6mr_for_each_table(mrt, net) {
1623 		if (sk == rtnl_dereference(mrt->mroute_sk)) {
1624 			spin_lock(&mrt_lock);
1625 			RCU_INIT_POINTER(mrt->mroute_sk, NULL);
1626 			/* Note that mroute_sk had SOCK_RCU_FREE set,
1627 			 * so the RCU grace period before sk freeing
1628 			 * is guaranteed by sk_destruct()
1629 			 */
1630 			atomic_dec(&devconf->mc_forwarding);
1631 			spin_unlock(&mrt_lock);
1632 			inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
1633 						     NETCONFA_MC_FORWARDING,
1634 						     NETCONFA_IFINDEX_ALL,
1635 						     net->ipv6.devconf_all);
1636 
1637 			mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MFC);
1638 			err = 0;
1639 			break;
1640 		}
1641 	}
1642 	rtnl_unlock();
1643 
1644 	return err;
1645 }
1646 
1647 bool mroute6_is_socket(struct net *net, struct sk_buff *skb)
1648 {
1649 	struct mr_table *mrt;
1650 	struct flowi6 fl6 = {
1651 		.flowi6_iif	= skb->skb_iif ? : LOOPBACK_IFINDEX,
1652 		.flowi6_oif	= skb->dev->ifindex,
1653 		.flowi6_mark	= skb->mark,
1654 	};
1655 
1656 	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
1657 		return NULL;
1658 
1659 	return rcu_access_pointer(mrt->mroute_sk);
1660 }
1661 EXPORT_SYMBOL(mroute6_is_socket);
1662 
1663 /*
1664  *	Socket options and virtual interface manipulation. The whole
1665  *	virtual interface system is a complete heap, but unfortunately
1666  *	that's how BSD mrouted happens to think. Maybe one day with a proper
1667  *	MOSPF/PIM router set up we can clean this up.
1668  */
1669 
1670 int ip6_mroute_setsockopt(struct sock *sk, int optname, sockptr_t optval,
1671 			  unsigned int optlen)
1672 {
1673 	int ret, parent = 0;
1674 	struct mif6ctl vif;
1675 	struct mf6cctl mfc;
1676 	mifi_t mifi;
1677 	struct net *net = sock_net(sk);
1678 	struct mr_table *mrt;
1679 
1680 	if (sk->sk_type != SOCK_RAW ||
1681 	    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1682 		return -EOPNOTSUPP;
1683 
1684 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1685 	if (!mrt)
1686 		return -ENOENT;
1687 
1688 	if (optname != MRT6_INIT) {
1689 		if (sk != rcu_access_pointer(mrt->mroute_sk) &&
1690 		    !ns_capable(net->user_ns, CAP_NET_ADMIN))
1691 			return -EACCES;
1692 	}
1693 
1694 	switch (optname) {
1695 	case MRT6_INIT:
1696 		if (optlen < sizeof(int))
1697 			return -EINVAL;
1698 
1699 		return ip6mr_sk_init(mrt, sk);
1700 
1701 	case MRT6_DONE:
1702 		return ip6mr_sk_done(sk);
1703 
1704 	case MRT6_ADD_MIF:
1705 		if (optlen < sizeof(vif))
1706 			return -EINVAL;
1707 		if (copy_from_sockptr(&vif, optval, sizeof(vif)))
1708 			return -EFAULT;
1709 		if (vif.mif6c_mifi >= MAXMIFS)
1710 			return -ENFILE;
1711 		rtnl_lock();
1712 		ret = mif6_add(net, mrt, &vif,
1713 			       sk == rtnl_dereference(mrt->mroute_sk));
1714 		rtnl_unlock();
1715 		return ret;
1716 
1717 	case MRT6_DEL_MIF:
1718 		if (optlen < sizeof(mifi_t))
1719 			return -EINVAL;
1720 		if (copy_from_sockptr(&mifi, optval, sizeof(mifi_t)))
1721 			return -EFAULT;
1722 		rtnl_lock();
1723 		ret = mif6_delete(mrt, mifi, 0, NULL);
1724 		rtnl_unlock();
1725 		return ret;
1726 
1727 	/*
1728 	 *	Manipulate the forwarding caches. These live
1729 	 *	in a sort of kernel/user symbiosis.
1730 	 */
1731 	case MRT6_ADD_MFC:
1732 	case MRT6_DEL_MFC:
1733 		parent = -1;
1734 		fallthrough;
1735 	case MRT6_ADD_MFC_PROXY:
1736 	case MRT6_DEL_MFC_PROXY:
1737 		if (optlen < sizeof(mfc))
1738 			return -EINVAL;
1739 		if (copy_from_sockptr(&mfc, optval, sizeof(mfc)))
1740 			return -EFAULT;
1741 		if (parent == 0)
1742 			parent = mfc.mf6cc_parent;
1743 		rtnl_lock();
1744 		if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY)
1745 			ret = ip6mr_mfc_delete(mrt, &mfc, parent);
1746 		else
1747 			ret = ip6mr_mfc_add(net, mrt, &mfc,
1748 					    sk ==
1749 					    rtnl_dereference(mrt->mroute_sk),
1750 					    parent);
1751 		rtnl_unlock();
1752 		return ret;
1753 
1754 	case MRT6_FLUSH:
1755 	{
1756 		int flags;
1757 
1758 		if (optlen != sizeof(flags))
1759 			return -EINVAL;
1760 		if (copy_from_sockptr(&flags, optval, sizeof(flags)))
1761 			return -EFAULT;
1762 		rtnl_lock();
1763 		mroute_clean_tables(mrt, flags);
1764 		rtnl_unlock();
1765 		return 0;
1766 	}
1767 
1768 	/*
1769 	 *	Control PIM assert (to activate pim will activate assert)
1770 	 */
1771 	case MRT6_ASSERT:
1772 	{
1773 		int v;
1774 
1775 		if (optlen != sizeof(v))
1776 			return -EINVAL;
1777 		if (copy_from_sockptr(&v, optval, sizeof(v)))
1778 			return -EFAULT;
1779 		mrt->mroute_do_assert = v;
1780 		return 0;
1781 	}
1782 
1783 #ifdef CONFIG_IPV6_PIMSM_V2
1784 	case MRT6_PIM:
1785 	{
1786 		bool do_wrmifwhole;
1787 		int v;
1788 
1789 		if (optlen != sizeof(v))
1790 			return -EINVAL;
1791 		if (copy_from_sockptr(&v, optval, sizeof(v)))
1792 			return -EFAULT;
1793 
1794 		do_wrmifwhole = (v == MRT6MSG_WRMIFWHOLE);
1795 		v = !!v;
1796 		rtnl_lock();
1797 		ret = 0;
1798 		if (v != mrt->mroute_do_pim) {
1799 			mrt->mroute_do_pim = v;
1800 			mrt->mroute_do_assert = v;
1801 			mrt->mroute_do_wrvifwhole = do_wrmifwhole;
1802 		}
1803 		rtnl_unlock();
1804 		return ret;
1805 	}
1806 
1807 #endif
1808 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1809 	case MRT6_TABLE:
1810 	{
1811 		u32 v;
1812 
1813 		if (optlen != sizeof(u32))
1814 			return -EINVAL;
1815 		if (copy_from_sockptr(&v, optval, sizeof(v)))
1816 			return -EFAULT;
1817 		/* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
1818 		if (v != RT_TABLE_DEFAULT && v >= 100000000)
1819 			return -EINVAL;
1820 		if (sk == rcu_access_pointer(mrt->mroute_sk))
1821 			return -EBUSY;
1822 
1823 		rtnl_lock();
1824 		ret = 0;
1825 		mrt = ip6mr_new_table(net, v);
1826 		if (IS_ERR(mrt))
1827 			ret = PTR_ERR(mrt);
1828 		else
1829 			raw6_sk(sk)->ip6mr_table = v;
1830 		rtnl_unlock();
1831 		return ret;
1832 	}
1833 #endif
1834 	/*
1835 	 *	Spurious command, or MRT6_VERSION which you cannot
1836 	 *	set.
1837 	 */
1838 	default:
1839 		return -ENOPROTOOPT;
1840 	}
1841 }
1842 
1843 /*
1844  *	Getsock opt support for the multicast routing system.
1845  */
1846 
1847 int ip6_mroute_getsockopt(struct sock *sk, int optname, sockptr_t optval,
1848 			  sockptr_t optlen)
1849 {
1850 	int olr;
1851 	int val;
1852 	struct net *net = sock_net(sk);
1853 	struct mr_table *mrt;
1854 
1855 	if (sk->sk_type != SOCK_RAW ||
1856 	    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1857 		return -EOPNOTSUPP;
1858 
1859 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1860 	if (!mrt)
1861 		return -ENOENT;
1862 
1863 	switch (optname) {
1864 	case MRT6_VERSION:
1865 		val = 0x0305;
1866 		break;
1867 #ifdef CONFIG_IPV6_PIMSM_V2
1868 	case MRT6_PIM:
1869 		val = mrt->mroute_do_pim;
1870 		break;
1871 #endif
1872 	case MRT6_ASSERT:
1873 		val = mrt->mroute_do_assert;
1874 		break;
1875 	default:
1876 		return -ENOPROTOOPT;
1877 	}
1878 
1879 	if (copy_from_sockptr(&olr, optlen, sizeof(int)))
1880 		return -EFAULT;
1881 
1882 	olr = min_t(int, olr, sizeof(int));
1883 	if (olr < 0)
1884 		return -EINVAL;
1885 
1886 	if (copy_to_sockptr(optlen, &olr, sizeof(int)))
1887 		return -EFAULT;
1888 	if (copy_to_sockptr(optval, &val, olr))
1889 		return -EFAULT;
1890 	return 0;
1891 }
1892 
1893 /*
1894  *	The IP multicast ioctl support routines.
1895  */
1896 int ip6mr_ioctl(struct sock *sk, int cmd, void *arg)
1897 {
1898 	struct sioc_sg_req6 *sr;
1899 	struct sioc_mif_req6 *vr;
1900 	struct vif_device *vif;
1901 	struct mfc6_cache *c;
1902 	struct net *net = sock_net(sk);
1903 	struct mr_table *mrt;
1904 
1905 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1906 	if (!mrt)
1907 		return -ENOENT;
1908 
1909 	switch (cmd) {
1910 	case SIOCGETMIFCNT_IN6:
1911 		vr = (struct sioc_mif_req6 *)arg;
1912 		if (vr->mifi >= mrt->maxvif)
1913 			return -EINVAL;
1914 		vr->mifi = array_index_nospec(vr->mifi, mrt->maxvif);
1915 		rcu_read_lock();
1916 		vif = &mrt->vif_table[vr->mifi];
1917 		if (VIF_EXISTS(mrt, vr->mifi)) {
1918 			vr->icount = READ_ONCE(vif->pkt_in);
1919 			vr->ocount = READ_ONCE(vif->pkt_out);
1920 			vr->ibytes = READ_ONCE(vif->bytes_in);
1921 			vr->obytes = READ_ONCE(vif->bytes_out);
1922 			rcu_read_unlock();
1923 			return 0;
1924 		}
1925 		rcu_read_unlock();
1926 		return -EADDRNOTAVAIL;
1927 	case SIOCGETSGCNT_IN6:
1928 		sr = (struct sioc_sg_req6 *)arg;
1929 
1930 		rcu_read_lock();
1931 		c = ip6mr_cache_find(mrt, &sr->src.sin6_addr,
1932 				     &sr->grp.sin6_addr);
1933 		if (c) {
1934 			sr->pktcnt = c->_c.mfc_un.res.pkt;
1935 			sr->bytecnt = c->_c.mfc_un.res.bytes;
1936 			sr->wrong_if = c->_c.mfc_un.res.wrong_if;
1937 			rcu_read_unlock();
1938 			return 0;
1939 		}
1940 		rcu_read_unlock();
1941 		return -EADDRNOTAVAIL;
1942 	default:
1943 		return -ENOIOCTLCMD;
1944 	}
1945 }
1946 
1947 #ifdef CONFIG_COMPAT
1948 struct compat_sioc_sg_req6 {
1949 	struct sockaddr_in6 src;
1950 	struct sockaddr_in6 grp;
1951 	compat_ulong_t pktcnt;
1952 	compat_ulong_t bytecnt;
1953 	compat_ulong_t wrong_if;
1954 };
1955 
1956 struct compat_sioc_mif_req6 {
1957 	mifi_t	mifi;
1958 	compat_ulong_t icount;
1959 	compat_ulong_t ocount;
1960 	compat_ulong_t ibytes;
1961 	compat_ulong_t obytes;
1962 };
1963 
1964 int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1965 {
1966 	struct compat_sioc_sg_req6 sr;
1967 	struct compat_sioc_mif_req6 vr;
1968 	struct vif_device *vif;
1969 	struct mfc6_cache *c;
1970 	struct net *net = sock_net(sk);
1971 	struct mr_table *mrt;
1972 
1973 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1974 	if (!mrt)
1975 		return -ENOENT;
1976 
1977 	switch (cmd) {
1978 	case SIOCGETMIFCNT_IN6:
1979 		if (copy_from_user(&vr, arg, sizeof(vr)))
1980 			return -EFAULT;
1981 		if (vr.mifi >= mrt->maxvif)
1982 			return -EINVAL;
1983 		vr.mifi = array_index_nospec(vr.mifi, mrt->maxvif);
1984 		rcu_read_lock();
1985 		vif = &mrt->vif_table[vr.mifi];
1986 		if (VIF_EXISTS(mrt, vr.mifi)) {
1987 			vr.icount = READ_ONCE(vif->pkt_in);
1988 			vr.ocount = READ_ONCE(vif->pkt_out);
1989 			vr.ibytes = READ_ONCE(vif->bytes_in);
1990 			vr.obytes = READ_ONCE(vif->bytes_out);
1991 			rcu_read_unlock();
1992 
1993 			if (copy_to_user(arg, &vr, sizeof(vr)))
1994 				return -EFAULT;
1995 			return 0;
1996 		}
1997 		rcu_read_unlock();
1998 		return -EADDRNOTAVAIL;
1999 	case SIOCGETSGCNT_IN6:
2000 		if (copy_from_user(&sr, arg, sizeof(sr)))
2001 			return -EFAULT;
2002 
2003 		rcu_read_lock();
2004 		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
2005 		if (c) {
2006 			sr.pktcnt = c->_c.mfc_un.res.pkt;
2007 			sr.bytecnt = c->_c.mfc_un.res.bytes;
2008 			sr.wrong_if = c->_c.mfc_un.res.wrong_if;
2009 			rcu_read_unlock();
2010 
2011 			if (copy_to_user(arg, &sr, sizeof(sr)))
2012 				return -EFAULT;
2013 			return 0;
2014 		}
2015 		rcu_read_unlock();
2016 		return -EADDRNOTAVAIL;
2017 	default:
2018 		return -ENOIOCTLCMD;
2019 	}
2020 }
2021 #endif
2022 
2023 static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
2024 {
2025 	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
2026 		      IPSTATS_MIB_OUTFORWDATAGRAMS);
2027 	return dst_output(net, sk, skb);
2028 }
2029 
2030 /*
2031  *	Processing handlers for ip6mr_forward
2032  */
2033 
2034 static int ip6mr_forward2(struct net *net, struct mr_table *mrt,
2035 			  struct sk_buff *skb, int vifi)
2036 {
2037 	struct vif_device *vif = &mrt->vif_table[vifi];
2038 	struct net_device *vif_dev;
2039 	struct ipv6hdr *ipv6h;
2040 	struct dst_entry *dst;
2041 	struct flowi6 fl6;
2042 
2043 	vif_dev = vif_dev_read(vif);
2044 	if (!vif_dev)
2045 		goto out_free;
2046 
2047 #ifdef CONFIG_IPV6_PIMSM_V2
2048 	if (vif->flags & MIFF_REGISTER) {
2049 		WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1);
2050 		WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len);
2051 		DEV_STATS_ADD(vif_dev, tx_bytes, skb->len);
2052 		DEV_STATS_INC(vif_dev, tx_packets);
2053 		ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
2054 		goto out_free;
2055 	}
2056 #endif
2057 
2058 	ipv6h = ipv6_hdr(skb);
2059 
2060 	fl6 = (struct flowi6) {
2061 		.flowi6_oif = vif->link,
2062 		.daddr = ipv6h->daddr,
2063 	};
2064 
2065 	dst = ip6_route_output(net, NULL, &fl6);
2066 	if (dst->error) {
2067 		dst_release(dst);
2068 		goto out_free;
2069 	}
2070 
2071 	skb_dst_drop(skb);
2072 	skb_dst_set(skb, dst);
2073 
2074 	/*
2075 	 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
2076 	 * not only before forwarding, but after forwarding on all output
2077 	 * interfaces. It is clear, if mrouter runs a multicasting
2078 	 * program, it should receive packets not depending to what interface
2079 	 * program is joined.
2080 	 * If we will not make it, the program will have to join on all
2081 	 * interfaces. On the other hand, multihoming host (or router, but
2082 	 * not mrouter) cannot join to more than one interface - it will
2083 	 * result in receiving multiple packets.
2084 	 */
2085 	skb->dev = vif_dev;
2086 	WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1);
2087 	WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len);
2088 
2089 	/* We are about to write */
2090 	/* XXX: extension headers? */
2091 	if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(vif_dev)))
2092 		goto out_free;
2093 
2094 	ipv6h = ipv6_hdr(skb);
2095 	ipv6h->hop_limit--;
2096 
2097 	IP6CB(skb)->flags |= IP6SKB_FORWARDED;
2098 
2099 	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
2100 		       net, NULL, skb, skb->dev, vif_dev,
2101 		       ip6mr_forward2_finish);
2102 
2103 out_free:
2104 	kfree_skb(skb);
2105 	return 0;
2106 }
2107 
2108 /* Called with rcu_read_lock() */
2109 static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev)
2110 {
2111 	int ct;
2112 
2113 	/* Pairs with WRITE_ONCE() in mif6_delete()/mif6_add() */
2114 	for (ct = READ_ONCE(mrt->maxvif) - 1; ct >= 0; ct--) {
2115 		if (rcu_access_pointer(mrt->vif_table[ct].dev) == dev)
2116 			break;
2117 	}
2118 	return ct;
2119 }
2120 
2121 /* Called under rcu_read_lock() */
2122 static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
2123 			   struct net_device *dev, struct sk_buff *skb,
2124 			   struct mfc6_cache *c)
2125 {
2126 	int psend = -1;
2127 	int vif, ct;
2128 	int true_vifi = ip6mr_find_vif(mrt, dev);
2129 
2130 	vif = c->_c.mfc_parent;
2131 	c->_c.mfc_un.res.pkt++;
2132 	c->_c.mfc_un.res.bytes += skb->len;
2133 	c->_c.mfc_un.res.lastuse = jiffies;
2134 
2135 	if (ipv6_addr_any(&c->mf6c_origin) && true_vifi >= 0) {
2136 		struct mfc6_cache *cache_proxy;
2137 
2138 		/* For an (*,G) entry, we only check that the incoming
2139 		 * interface is part of the static tree.
2140 		 */
2141 		cache_proxy = mr_mfc_find_any_parent(mrt, vif);
2142 		if (cache_proxy &&
2143 		    cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255)
2144 			goto forward;
2145 	}
2146 
2147 	/*
2148 	 * Wrong interface: drop packet and (maybe) send PIM assert.
2149 	 */
2150 	if (rcu_access_pointer(mrt->vif_table[vif].dev) != dev) {
2151 		c->_c.mfc_un.res.wrong_if++;
2152 
2153 		if (true_vifi >= 0 && mrt->mroute_do_assert &&
2154 		    /* pimsm uses asserts, when switching from RPT to SPT,
2155 		       so that we cannot check that packet arrived on an oif.
2156 		       It is bad, but otherwise we would need to move pretty
2157 		       large chunk of pimd to kernel. Ough... --ANK
2158 		     */
2159 		    (mrt->mroute_do_pim ||
2160 		     c->_c.mfc_un.res.ttls[true_vifi] < 255) &&
2161 		    time_after(jiffies,
2162 			       c->_c.mfc_un.res.last_assert +
2163 			       MFC_ASSERT_THRESH)) {
2164 			c->_c.mfc_un.res.last_assert = jiffies;
2165 			ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
2166 			if (mrt->mroute_do_wrvifwhole)
2167 				ip6mr_cache_report(mrt, skb, true_vifi,
2168 						   MRT6MSG_WRMIFWHOLE);
2169 		}
2170 		goto dont_forward;
2171 	}
2172 
2173 forward:
2174 	WRITE_ONCE(mrt->vif_table[vif].pkt_in,
2175 		   mrt->vif_table[vif].pkt_in + 1);
2176 	WRITE_ONCE(mrt->vif_table[vif].bytes_in,
2177 		   mrt->vif_table[vif].bytes_in + skb->len);
2178 
2179 	/*
2180 	 *	Forward the frame
2181 	 */
2182 	if (ipv6_addr_any(&c->mf6c_origin) &&
2183 	    ipv6_addr_any(&c->mf6c_mcastgrp)) {
2184 		if (true_vifi >= 0 &&
2185 		    true_vifi != c->_c.mfc_parent &&
2186 		    ipv6_hdr(skb)->hop_limit >
2187 				c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
2188 			/* It's an (*,*) entry and the packet is not coming from
2189 			 * the upstream: forward the packet to the upstream
2190 			 * only.
2191 			 */
2192 			psend = c->_c.mfc_parent;
2193 			goto last_forward;
2194 		}
2195 		goto dont_forward;
2196 	}
2197 	for (ct = c->_c.mfc_un.res.maxvif - 1;
2198 	     ct >= c->_c.mfc_un.res.minvif; ct--) {
2199 		/* For (*,G) entry, don't forward to the incoming interface */
2200 		if ((!ipv6_addr_any(&c->mf6c_origin) || ct != true_vifi) &&
2201 		    ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) {
2202 			if (psend != -1) {
2203 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2204 				if (skb2)
2205 					ip6mr_forward2(net, mrt, skb2, psend);
2206 			}
2207 			psend = ct;
2208 		}
2209 	}
2210 last_forward:
2211 	if (psend != -1) {
2212 		ip6mr_forward2(net, mrt, skb, psend);
2213 		return;
2214 	}
2215 
2216 dont_forward:
2217 	kfree_skb(skb);
2218 }
2219 
2220 
2221 /*
2222  *	Multicast packets for forwarding arrive here
2223  */
2224 
2225 int ip6_mr_input(struct sk_buff *skb)
2226 {
2227 	struct mfc6_cache *cache;
2228 	struct net *net = dev_net(skb->dev);
2229 	struct mr_table *mrt;
2230 	struct flowi6 fl6 = {
2231 		.flowi6_iif	= skb->dev->ifindex,
2232 		.flowi6_mark	= skb->mark,
2233 	};
2234 	int err;
2235 	struct net_device *dev;
2236 
2237 	/* skb->dev passed in is the master dev for vrfs.
2238 	 * Get the proper interface that does have a vif associated with it.
2239 	 */
2240 	dev = skb->dev;
2241 	if (netif_is_l3_master(skb->dev)) {
2242 		dev = dev_get_by_index_rcu(net, IPCB(skb)->iif);
2243 		if (!dev) {
2244 			kfree_skb(skb);
2245 			return -ENODEV;
2246 		}
2247 	}
2248 
2249 	err = ip6mr_fib_lookup(net, &fl6, &mrt);
2250 	if (err < 0) {
2251 		kfree_skb(skb);
2252 		return err;
2253 	}
2254 
2255 	cache = ip6mr_cache_find(mrt,
2256 				 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2257 	if (!cache) {
2258 		int vif = ip6mr_find_vif(mrt, dev);
2259 
2260 		if (vif >= 0)
2261 			cache = ip6mr_cache_find_any(mrt,
2262 						     &ipv6_hdr(skb)->daddr,
2263 						     vif);
2264 	}
2265 
2266 	/*
2267 	 *	No usable cache entry
2268 	 */
2269 	if (!cache) {
2270 		int vif;
2271 
2272 		vif = ip6mr_find_vif(mrt, dev);
2273 		if (vif >= 0) {
2274 			int err = ip6mr_cache_unresolved(mrt, vif, skb, dev);
2275 
2276 			return err;
2277 		}
2278 		kfree_skb(skb);
2279 		return -ENODEV;
2280 	}
2281 
2282 	ip6_mr_forward(net, mrt, dev, skb, cache);
2283 
2284 	return 0;
2285 }
2286 
2287 int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
2288 		    u32 portid)
2289 {
2290 	int err;
2291 	struct mr_table *mrt;
2292 	struct mfc6_cache *cache;
2293 	struct rt6_info *rt = dst_rt6_info(skb_dst(skb));
2294 
2295 	rcu_read_lock();
2296 	mrt = __ip6mr_get_table(net, RT6_TABLE_DFLT);
2297 	if (!mrt) {
2298 		rcu_read_unlock();
2299 		return -ENOENT;
2300 	}
2301 
2302 	cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
2303 	if (!cache && skb->dev) {
2304 		int vif = ip6mr_find_vif(mrt, skb->dev);
2305 
2306 		if (vif >= 0)
2307 			cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr,
2308 						     vif);
2309 	}
2310 
2311 	if (!cache) {
2312 		struct sk_buff *skb2;
2313 		struct ipv6hdr *iph;
2314 		struct net_device *dev;
2315 		int vif;
2316 
2317 		dev = skb->dev;
2318 		if (!dev || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2319 			rcu_read_unlock();
2320 			return -ENODEV;
2321 		}
2322 
2323 		/* really correct? */
2324 		skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
2325 		if (!skb2) {
2326 			rcu_read_unlock();
2327 			return -ENOMEM;
2328 		}
2329 
2330 		NETLINK_CB(skb2).portid = portid;
2331 		skb_reset_transport_header(skb2);
2332 
2333 		skb_put(skb2, sizeof(struct ipv6hdr));
2334 		skb_reset_network_header(skb2);
2335 
2336 		iph = ipv6_hdr(skb2);
2337 		iph->version = 0;
2338 		iph->priority = 0;
2339 		iph->flow_lbl[0] = 0;
2340 		iph->flow_lbl[1] = 0;
2341 		iph->flow_lbl[2] = 0;
2342 		iph->payload_len = 0;
2343 		iph->nexthdr = IPPROTO_NONE;
2344 		iph->hop_limit = 0;
2345 		iph->saddr = rt->rt6i_src.addr;
2346 		iph->daddr = rt->rt6i_dst.addr;
2347 
2348 		err = ip6mr_cache_unresolved(mrt, vif, skb2, dev);
2349 		rcu_read_unlock();
2350 
2351 		return err;
2352 	}
2353 
2354 	err = mr_fill_mroute(mrt, skb, &cache->_c, rtm);
2355 	rcu_read_unlock();
2356 	return err;
2357 }
2358 
2359 static int ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2360 			     u32 portid, u32 seq, struct mfc6_cache *c, int cmd,
2361 			     int flags)
2362 {
2363 	struct nlmsghdr *nlh;
2364 	struct rtmsg *rtm;
2365 	int err;
2366 
2367 	nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags);
2368 	if (!nlh)
2369 		return -EMSGSIZE;
2370 
2371 	rtm = nlmsg_data(nlh);
2372 	rtm->rtm_family   = RTNL_FAMILY_IP6MR;
2373 	rtm->rtm_dst_len  = 128;
2374 	rtm->rtm_src_len  = 128;
2375 	rtm->rtm_tos      = 0;
2376 	rtm->rtm_table    = mrt->id;
2377 	if (nla_put_u32(skb, RTA_TABLE, mrt->id))
2378 		goto nla_put_failure;
2379 	rtm->rtm_type = RTN_MULTICAST;
2380 	rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
2381 	if (c->_c.mfc_flags & MFC_STATIC)
2382 		rtm->rtm_protocol = RTPROT_STATIC;
2383 	else
2384 		rtm->rtm_protocol = RTPROT_MROUTED;
2385 	rtm->rtm_flags    = 0;
2386 
2387 	if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) ||
2388 	    nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp))
2389 		goto nla_put_failure;
2390 	err = mr_fill_mroute(mrt, skb, &c->_c, rtm);
2391 	/* do not break the dump if cache is unresolved */
2392 	if (err < 0 && err != -ENOENT)
2393 		goto nla_put_failure;
2394 
2395 	nlmsg_end(skb, nlh);
2396 	return 0;
2397 
2398 nla_put_failure:
2399 	nlmsg_cancel(skb, nlh);
2400 	return -EMSGSIZE;
2401 }
2402 
2403 static int _ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2404 			      u32 portid, u32 seq, struct mr_mfc *c,
2405 			      int cmd, int flags)
2406 {
2407 	return ip6mr_fill_mroute(mrt, skb, portid, seq, (struct mfc6_cache *)c,
2408 				 cmd, flags);
2409 }
2410 
2411 static int mr6_msgsize(bool unresolved, int maxvif)
2412 {
2413 	size_t len =
2414 		NLMSG_ALIGN(sizeof(struct rtmsg))
2415 		+ nla_total_size(4)	/* RTA_TABLE */
2416 		+ nla_total_size(sizeof(struct in6_addr))	/* RTA_SRC */
2417 		+ nla_total_size(sizeof(struct in6_addr))	/* RTA_DST */
2418 		;
2419 
2420 	if (!unresolved)
2421 		len = len
2422 		      + nla_total_size(4)	/* RTA_IIF */
2423 		      + nla_total_size(0)	/* RTA_MULTIPATH */
2424 		      + maxvif * NLA_ALIGN(sizeof(struct rtnexthop))
2425 						/* RTA_MFC_STATS */
2426 		      + nla_total_size_64bit(sizeof(struct rta_mfc_stats))
2427 		;
2428 
2429 	return len;
2430 }
2431 
2432 static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
2433 			      int cmd)
2434 {
2435 	struct net *net = read_pnet(&mrt->net);
2436 	struct sk_buff *skb;
2437 	int err = -ENOBUFS;
2438 
2439 	skb = nlmsg_new(mr6_msgsize(mfc->_c.mfc_parent >= MAXMIFS, mrt->maxvif),
2440 			GFP_ATOMIC);
2441 	if (!skb)
2442 		goto errout;
2443 
2444 	err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0);
2445 	if (err < 0)
2446 		goto errout;
2447 
2448 	rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC);
2449 	return;
2450 
2451 errout:
2452 	kfree_skb(skb);
2453 	if (err < 0)
2454 		rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
2455 }
2456 
2457 static size_t mrt6msg_netlink_msgsize(size_t payloadlen)
2458 {
2459 	size_t len =
2460 		NLMSG_ALIGN(sizeof(struct rtgenmsg))
2461 		+ nla_total_size(1)	/* IP6MRA_CREPORT_MSGTYPE */
2462 		+ nla_total_size(4)	/* IP6MRA_CREPORT_MIF_ID */
2463 					/* IP6MRA_CREPORT_SRC_ADDR */
2464 		+ nla_total_size(sizeof(struct in6_addr))
2465 					/* IP6MRA_CREPORT_DST_ADDR */
2466 		+ nla_total_size(sizeof(struct in6_addr))
2467 					/* IP6MRA_CREPORT_PKT */
2468 		+ nla_total_size(payloadlen)
2469 		;
2470 
2471 	return len;
2472 }
2473 
2474 static void mrt6msg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt)
2475 {
2476 	struct net *net = read_pnet(&mrt->net);
2477 	struct nlmsghdr *nlh;
2478 	struct rtgenmsg *rtgenm;
2479 	struct mrt6msg *msg;
2480 	struct sk_buff *skb;
2481 	struct nlattr *nla;
2482 	int payloadlen;
2483 
2484 	payloadlen = pkt->len - sizeof(struct mrt6msg);
2485 	msg = (struct mrt6msg *)skb_transport_header(pkt);
2486 
2487 	skb = nlmsg_new(mrt6msg_netlink_msgsize(payloadlen), GFP_ATOMIC);
2488 	if (!skb)
2489 		goto errout;
2490 
2491 	nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT,
2492 			sizeof(struct rtgenmsg), 0);
2493 	if (!nlh)
2494 		goto errout;
2495 	rtgenm = nlmsg_data(nlh);
2496 	rtgenm->rtgen_family = RTNL_FAMILY_IP6MR;
2497 	if (nla_put_u8(skb, IP6MRA_CREPORT_MSGTYPE, msg->im6_msgtype) ||
2498 	    nla_put_u32(skb, IP6MRA_CREPORT_MIF_ID, msg->im6_mif) ||
2499 	    nla_put_in6_addr(skb, IP6MRA_CREPORT_SRC_ADDR,
2500 			     &msg->im6_src) ||
2501 	    nla_put_in6_addr(skb, IP6MRA_CREPORT_DST_ADDR,
2502 			     &msg->im6_dst))
2503 		goto nla_put_failure;
2504 
2505 	nla = nla_reserve(skb, IP6MRA_CREPORT_PKT, payloadlen);
2506 	if (!nla || skb_copy_bits(pkt, sizeof(struct mrt6msg),
2507 				  nla_data(nla), payloadlen))
2508 		goto nla_put_failure;
2509 
2510 	nlmsg_end(skb, nlh);
2511 
2512 	rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE_R, NULL, GFP_ATOMIC);
2513 	return;
2514 
2515 nla_put_failure:
2516 	nlmsg_cancel(skb, nlh);
2517 errout:
2518 	kfree_skb(skb);
2519 	rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE_R, -ENOBUFS);
2520 }
2521 
2522 static const struct nla_policy ip6mr_getroute_policy[RTA_MAX + 1] = {
2523 	[RTA_SRC]		= NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
2524 	[RTA_DST]		= NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
2525 	[RTA_TABLE]		= { .type = NLA_U32 },
2526 };
2527 
2528 static int ip6mr_rtm_valid_getroute_req(struct sk_buff *skb,
2529 					const struct nlmsghdr *nlh,
2530 					struct nlattr **tb,
2531 					struct netlink_ext_ack *extack)
2532 {
2533 	struct rtmsg *rtm;
2534 	int err;
2535 
2536 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, ip6mr_getroute_policy,
2537 			  extack);
2538 	if (err)
2539 		return err;
2540 
2541 	rtm = nlmsg_data(nlh);
2542 	if ((rtm->rtm_src_len && rtm->rtm_src_len != 128) ||
2543 	    (rtm->rtm_dst_len && rtm->rtm_dst_len != 128) ||
2544 	    rtm->rtm_tos || rtm->rtm_table || rtm->rtm_protocol ||
2545 	    rtm->rtm_scope || rtm->rtm_type || rtm->rtm_flags) {
2546 		NL_SET_ERR_MSG_MOD(extack,
2547 				   "Invalid values in header for multicast route get request");
2548 		return -EINVAL;
2549 	}
2550 
2551 	if ((tb[RTA_SRC] && !rtm->rtm_src_len) ||
2552 	    (tb[RTA_DST] && !rtm->rtm_dst_len)) {
2553 		NL_SET_ERR_MSG_MOD(extack, "rtm_src_len and rtm_dst_len must be 128 for IPv6");
2554 		return -EINVAL;
2555 	}
2556 
2557 	return 0;
2558 }
2559 
2560 static int ip6mr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
2561 			      struct netlink_ext_ack *extack)
2562 {
2563 	struct net *net = sock_net(in_skb->sk);
2564 	struct in6_addr src = {}, grp = {};
2565 	struct nlattr *tb[RTA_MAX + 1];
2566 	struct mfc6_cache *cache;
2567 	struct mr_table *mrt;
2568 	struct sk_buff *skb;
2569 	u32 tableid;
2570 	int err;
2571 
2572 	err = ip6mr_rtm_valid_getroute_req(in_skb, nlh, tb, extack);
2573 	if (err < 0)
2574 		return err;
2575 
2576 	if (tb[RTA_SRC])
2577 		src = nla_get_in6_addr(tb[RTA_SRC]);
2578 	if (tb[RTA_DST])
2579 		grp = nla_get_in6_addr(tb[RTA_DST]);
2580 	tableid = tb[RTA_TABLE] ? nla_get_u32(tb[RTA_TABLE]) : 0;
2581 
2582 	mrt = __ip6mr_get_table(net, tableid ?: RT_TABLE_DEFAULT);
2583 	if (!mrt) {
2584 		NL_SET_ERR_MSG_MOD(extack, "MR table does not exist");
2585 		return -ENOENT;
2586 	}
2587 
2588 	/* entries are added/deleted only under RTNL */
2589 	rcu_read_lock();
2590 	cache = ip6mr_cache_find(mrt, &src, &grp);
2591 	rcu_read_unlock();
2592 	if (!cache) {
2593 		NL_SET_ERR_MSG_MOD(extack, "MR cache entry not found");
2594 		return -ENOENT;
2595 	}
2596 
2597 	skb = nlmsg_new(mr6_msgsize(false, mrt->maxvif), GFP_KERNEL);
2598 	if (!skb)
2599 		return -ENOBUFS;
2600 
2601 	err = ip6mr_fill_mroute(mrt, skb, NETLINK_CB(in_skb).portid,
2602 				nlh->nlmsg_seq, cache, RTM_NEWROUTE, 0);
2603 	if (err < 0) {
2604 		kfree_skb(skb);
2605 		return err;
2606 	}
2607 
2608 	return rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2609 }
2610 
2611 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2612 {
2613 	const struct nlmsghdr *nlh = cb->nlh;
2614 	struct fib_dump_filter filter = {};
2615 	int err;
2616 
2617 	if (cb->strict_check) {
2618 		err = ip_valid_fib_dump_req(sock_net(skb->sk), nlh,
2619 					    &filter, cb);
2620 		if (err < 0)
2621 			return err;
2622 	}
2623 
2624 	if (filter.table_id) {
2625 		struct mr_table *mrt;
2626 
2627 		mrt = __ip6mr_get_table(sock_net(skb->sk), filter.table_id);
2628 		if (!mrt) {
2629 			if (rtnl_msg_family(cb->nlh) != RTNL_FAMILY_IP6MR)
2630 				return skb->len;
2631 
2632 			NL_SET_ERR_MSG_MOD(cb->extack, "MR table does not exist");
2633 			return -ENOENT;
2634 		}
2635 		err = mr_table_dump(mrt, skb, cb, _ip6mr_fill_mroute,
2636 				    &mfc_unres_lock, &filter);
2637 		return skb->len ? : err;
2638 	}
2639 
2640 	return mr_rtm_dumproute(skb, cb, ip6mr_mr_table_iter,
2641 				_ip6mr_fill_mroute, &mfc_unres_lock, &filter);
2642 }
2643