xref: /openbmc/linux/net/ipv6/ip6mr.c (revision 6853f21f764b04e58df5e44629fec1fb8f3cbf2e)
1 /*
2  *	Linux IPv6 multicast routing support for BSD pim6sd
3  *	Based on net/ipv4/ipmr.c.
4  *
5  *	(c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
6  *		LSIIT Laboratory, Strasbourg, France
7  *	(c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
8  *		6WIND, Paris, France
9  *	Copyright (C)2007,2008 USAGI/WIDE Project
10  *		YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
11  *
12  *	This program is free software; you can redistribute it and/or
13  *	modify it under the terms of the GNU General Public License
14  *	as published by the Free Software Foundation; either version
15  *	2 of the License, or (at your option) any later version.
16  *
17  */
18 
19 #include <linux/uaccess.h>
20 #include <linux/types.h>
21 #include <linux/sched.h>
22 #include <linux/errno.h>
23 #include <linux/timer.h>
24 #include <linux/mm.h>
25 #include <linux/kernel.h>
26 #include <linux/fcntl.h>
27 #include <linux/stat.h>
28 #include <linux/socket.h>
29 #include <linux/inet.h>
30 #include <linux/netdevice.h>
31 #include <linux/inetdevice.h>
32 #include <linux/proc_fs.h>
33 #include <linux/seq_file.h>
34 #include <linux/init.h>
35 #include <linux/slab.h>
36 #include <linux/compat.h>
37 #include <net/protocol.h>
38 #include <linux/skbuff.h>
39 #include <net/sock.h>
40 #include <net/raw.h>
41 #include <linux/notifier.h>
42 #include <linux/if_arp.h>
43 #include <net/checksum.h>
44 #include <net/netlink.h>
45 #include <net/fib_rules.h>
46 
47 #include <net/ipv6.h>
48 #include <net/ip6_route.h>
49 #include <linux/mroute6.h>
50 #include <linux/pim.h>
51 #include <net/addrconf.h>
52 #include <linux/netfilter_ipv6.h>
53 #include <linux/export.h>
54 #include <net/ip6_checksum.h>
55 #include <linux/netconf.h>
56 
57 struct mr6_table {
58 	struct list_head	list;
59 	possible_net_t		net;
60 	u32			id;
61 	struct sock		*mroute6_sk;
62 	struct timer_list	ipmr_expire_timer;
63 	struct list_head	mfc6_unres_queue;
64 	struct list_head	mfc6_cache_array[MFC6_LINES];
65 	struct vif_device	vif6_table[MAXMIFS];
66 	int			maxvif;
67 	atomic_t		cache_resolve_queue_len;
68 	bool			mroute_do_assert;
69 	bool			mroute_do_pim;
70 #ifdef CONFIG_IPV6_PIMSM_V2
71 	int			mroute_reg_vif_num;
72 #endif
73 };
74 
75 struct ip6mr_rule {
76 	struct fib_rule		common;
77 };
78 
79 struct ip6mr_result {
80 	struct mr6_table	*mrt;
81 };
82 
83 /* Big lock, protecting vif table, mrt cache and mroute socket state.
84    Note that the changes are semaphored via rtnl_lock.
85  */
86 
87 static DEFINE_RWLOCK(mrt_lock);
88 
89 /*
90  *	Multicast router control variables
91  */
92 
93 #define MIF_EXISTS(_mrt, _idx) ((_mrt)->vif6_table[_idx].dev != NULL)
94 
95 /* Special spinlock for queue of unresolved entries */
96 static DEFINE_SPINLOCK(mfc_unres_lock);
97 
98 /* We return to original Alan's scheme. Hash table of resolved
99    entries is changed only in process context and protected
100    with weak lock mrt_lock. Queue of unresolved entries is protected
101    with strong spinlock mfc_unres_lock.
102 
103    In this case data path is free of exclusive locks at all.
104  */
105 
106 static struct kmem_cache *mrt_cachep __read_mostly;
107 
108 static struct mr6_table *ip6mr_new_table(struct net *net, u32 id);
109 static void ip6mr_free_table(struct mr6_table *mrt);
110 
111 static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
112 			   struct sk_buff *skb, struct mfc6_cache *cache);
113 static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
114 			      mifi_t mifi, int assert);
115 static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
116 			       struct mfc6_cache *c, struct rtmsg *rtm);
117 static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
118 			      int cmd);
119 static void mrt6msg_netlink_event(struct mr6_table *mrt, struct sk_buff *pkt);
120 static int ip6mr_rtm_dumproute(struct sk_buff *skb,
121 			       struct netlink_callback *cb);
122 static void mroute_clean_tables(struct mr6_table *mrt, bool all);
123 static void ipmr_expire_process(struct timer_list *t);
124 
125 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
126 #define ip6mr_for_each_table(mrt, net) \
127 	list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
128 
129 static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
130 {
131 	struct mr6_table *mrt;
132 
133 	ip6mr_for_each_table(mrt, net) {
134 		if (mrt->id == id)
135 			return mrt;
136 	}
137 	return NULL;
138 }
139 
140 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
141 			    struct mr6_table **mrt)
142 {
143 	int err;
144 	struct ip6mr_result res;
145 	struct fib_lookup_arg arg = {
146 		.result = &res,
147 		.flags = FIB_LOOKUP_NOREF,
148 	};
149 
150 	err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
151 			       flowi6_to_flowi(flp6), 0, &arg);
152 	if (err < 0)
153 		return err;
154 	*mrt = res.mrt;
155 	return 0;
156 }
157 
158 static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
159 			     int flags, struct fib_lookup_arg *arg)
160 {
161 	struct ip6mr_result *res = arg->result;
162 	struct mr6_table *mrt;
163 
164 	switch (rule->action) {
165 	case FR_ACT_TO_TBL:
166 		break;
167 	case FR_ACT_UNREACHABLE:
168 		return -ENETUNREACH;
169 	case FR_ACT_PROHIBIT:
170 		return -EACCES;
171 	case FR_ACT_BLACKHOLE:
172 	default:
173 		return -EINVAL;
174 	}
175 
176 	mrt = ip6mr_get_table(rule->fr_net, rule->table);
177 	if (!mrt)
178 		return -EAGAIN;
179 	res->mrt = mrt;
180 	return 0;
181 }
182 
183 static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
184 {
185 	return 1;
186 }
187 
188 static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = {
189 	FRA_GENERIC_POLICY,
190 };
191 
192 static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
193 				struct fib_rule_hdr *frh, struct nlattr **tb)
194 {
195 	return 0;
196 }
197 
198 static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
199 			      struct nlattr **tb)
200 {
201 	return 1;
202 }
203 
204 static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
205 			   struct fib_rule_hdr *frh)
206 {
207 	frh->dst_len = 0;
208 	frh->src_len = 0;
209 	frh->tos     = 0;
210 	return 0;
211 }
212 
213 static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
214 	.family		= RTNL_FAMILY_IP6MR,
215 	.rule_size	= sizeof(struct ip6mr_rule),
216 	.addr_size	= sizeof(struct in6_addr),
217 	.action		= ip6mr_rule_action,
218 	.match		= ip6mr_rule_match,
219 	.configure	= ip6mr_rule_configure,
220 	.compare	= ip6mr_rule_compare,
221 	.fill		= ip6mr_rule_fill,
222 	.nlgroup	= RTNLGRP_IPV6_RULE,
223 	.policy		= ip6mr_rule_policy,
224 	.owner		= THIS_MODULE,
225 };
226 
227 static int __net_init ip6mr_rules_init(struct net *net)
228 {
229 	struct fib_rules_ops *ops;
230 	struct mr6_table *mrt;
231 	int err;
232 
233 	ops = fib_rules_register(&ip6mr_rules_ops_template, net);
234 	if (IS_ERR(ops))
235 		return PTR_ERR(ops);
236 
237 	INIT_LIST_HEAD(&net->ipv6.mr6_tables);
238 
239 	mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
240 	if (!mrt) {
241 		err = -ENOMEM;
242 		goto err1;
243 	}
244 
245 	err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
246 	if (err < 0)
247 		goto err2;
248 
249 	net->ipv6.mr6_rules_ops = ops;
250 	return 0;
251 
252 err2:
253 	ip6mr_free_table(mrt);
254 err1:
255 	fib_rules_unregister(ops);
256 	return err;
257 }
258 
259 static void __net_exit ip6mr_rules_exit(struct net *net)
260 {
261 	struct mr6_table *mrt, *next;
262 
263 	rtnl_lock();
264 	list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
265 		list_del(&mrt->list);
266 		ip6mr_free_table(mrt);
267 	}
268 	fib_rules_unregister(net->ipv6.mr6_rules_ops);
269 	rtnl_unlock();
270 }
271 #else
272 #define ip6mr_for_each_table(mrt, net) \
273 	for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
274 
275 static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
276 {
277 	return net->ipv6.mrt6;
278 }
279 
280 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
281 			    struct mr6_table **mrt)
282 {
283 	*mrt = net->ipv6.mrt6;
284 	return 0;
285 }
286 
287 static int __net_init ip6mr_rules_init(struct net *net)
288 {
289 	net->ipv6.mrt6 = ip6mr_new_table(net, RT6_TABLE_DFLT);
290 	return net->ipv6.mrt6 ? 0 : -ENOMEM;
291 }
292 
293 static void __net_exit ip6mr_rules_exit(struct net *net)
294 {
295 	rtnl_lock();
296 	ip6mr_free_table(net->ipv6.mrt6);
297 	net->ipv6.mrt6 = NULL;
298 	rtnl_unlock();
299 }
300 #endif
301 
302 static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
303 {
304 	struct mr6_table *mrt;
305 	unsigned int i;
306 
307 	mrt = ip6mr_get_table(net, id);
308 	if (mrt)
309 		return mrt;
310 
311 	mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
312 	if (!mrt)
313 		return NULL;
314 	mrt->id = id;
315 	write_pnet(&mrt->net, net);
316 
317 	/* Forwarding cache */
318 	for (i = 0; i < MFC6_LINES; i++)
319 		INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]);
320 
321 	INIT_LIST_HEAD(&mrt->mfc6_unres_queue);
322 
323 	timer_setup(&mrt->ipmr_expire_timer, ipmr_expire_process, 0);
324 
325 #ifdef CONFIG_IPV6_PIMSM_V2
326 	mrt->mroute_reg_vif_num = -1;
327 #endif
328 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
329 	list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
330 #endif
331 	return mrt;
332 }
333 
334 static void ip6mr_free_table(struct mr6_table *mrt)
335 {
336 	del_timer_sync(&mrt->ipmr_expire_timer);
337 	mroute_clean_tables(mrt, true);
338 	kfree(mrt);
339 }
340 
341 #ifdef CONFIG_PROC_FS
342 
343 struct ipmr_mfc_iter {
344 	struct seq_net_private p;
345 	struct mr6_table *mrt;
346 	struct list_head *cache;
347 	int ct;
348 };
349 
350 
351 static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
352 					   struct ipmr_mfc_iter *it, loff_t pos)
353 {
354 	struct mr6_table *mrt = it->mrt;
355 	struct mfc6_cache *mfc;
356 
357 	read_lock(&mrt_lock);
358 	for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) {
359 		it->cache = &mrt->mfc6_cache_array[it->ct];
360 		list_for_each_entry(mfc, it->cache, list)
361 			if (pos-- == 0)
362 				return mfc;
363 	}
364 	read_unlock(&mrt_lock);
365 
366 	spin_lock_bh(&mfc_unres_lock);
367 	it->cache = &mrt->mfc6_unres_queue;
368 	list_for_each_entry(mfc, it->cache, list)
369 		if (pos-- == 0)
370 			return mfc;
371 	spin_unlock_bh(&mfc_unres_lock);
372 
373 	it->cache = NULL;
374 	return NULL;
375 }
376 
377 /*
378  *	The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
379  */
380 
381 struct ipmr_vif_iter {
382 	struct seq_net_private p;
383 	struct mr6_table *mrt;
384 	int ct;
385 };
386 
387 static struct vif_device *ip6mr_vif_seq_idx(struct net *net,
388 					    struct ipmr_vif_iter *iter,
389 					    loff_t pos)
390 {
391 	struct mr6_table *mrt = iter->mrt;
392 
393 	for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
394 		if (!MIF_EXISTS(mrt, iter->ct))
395 			continue;
396 		if (pos-- == 0)
397 			return &mrt->vif6_table[iter->ct];
398 	}
399 	return NULL;
400 }
401 
402 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
403 	__acquires(mrt_lock)
404 {
405 	struct ipmr_vif_iter *iter = seq->private;
406 	struct net *net = seq_file_net(seq);
407 	struct mr6_table *mrt;
408 
409 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
410 	if (!mrt)
411 		return ERR_PTR(-ENOENT);
412 
413 	iter->mrt = mrt;
414 
415 	read_lock(&mrt_lock);
416 	return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1)
417 		: SEQ_START_TOKEN;
418 }
419 
420 static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
421 {
422 	struct ipmr_vif_iter *iter = seq->private;
423 	struct net *net = seq_file_net(seq);
424 	struct mr6_table *mrt = iter->mrt;
425 
426 	++*pos;
427 	if (v == SEQ_START_TOKEN)
428 		return ip6mr_vif_seq_idx(net, iter, 0);
429 
430 	while (++iter->ct < mrt->maxvif) {
431 		if (!MIF_EXISTS(mrt, iter->ct))
432 			continue;
433 		return &mrt->vif6_table[iter->ct];
434 	}
435 	return NULL;
436 }
437 
438 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
439 	__releases(mrt_lock)
440 {
441 	read_unlock(&mrt_lock);
442 }
443 
444 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
445 {
446 	struct ipmr_vif_iter *iter = seq->private;
447 	struct mr6_table *mrt = iter->mrt;
448 
449 	if (v == SEQ_START_TOKEN) {
450 		seq_puts(seq,
451 			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
452 	} else {
453 		const struct vif_device *vif = v;
454 		const char *name = vif->dev ? vif->dev->name : "none";
455 
456 		seq_printf(seq,
457 			   "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
458 			   vif - mrt->vif6_table,
459 			   name, vif->bytes_in, vif->pkt_in,
460 			   vif->bytes_out, vif->pkt_out,
461 			   vif->flags);
462 	}
463 	return 0;
464 }
465 
466 static const struct seq_operations ip6mr_vif_seq_ops = {
467 	.start = ip6mr_vif_seq_start,
468 	.next  = ip6mr_vif_seq_next,
469 	.stop  = ip6mr_vif_seq_stop,
470 	.show  = ip6mr_vif_seq_show,
471 };
472 
473 static int ip6mr_vif_open(struct inode *inode, struct file *file)
474 {
475 	return seq_open_net(inode, file, &ip6mr_vif_seq_ops,
476 			    sizeof(struct ipmr_vif_iter));
477 }
478 
479 static const struct file_operations ip6mr_vif_fops = {
480 	.open    = ip6mr_vif_open,
481 	.read    = seq_read,
482 	.llseek  = seq_lseek,
483 	.release = seq_release_net,
484 };
485 
486 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
487 {
488 	struct ipmr_mfc_iter *it = seq->private;
489 	struct net *net = seq_file_net(seq);
490 	struct mr6_table *mrt;
491 
492 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
493 	if (!mrt)
494 		return ERR_PTR(-ENOENT);
495 
496 	it->mrt = mrt;
497 	it->cache = NULL;
498 	return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
499 		: SEQ_START_TOKEN;
500 }
501 
502 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
503 {
504 	struct mfc6_cache *mfc = v;
505 	struct ipmr_mfc_iter *it = seq->private;
506 	struct net *net = seq_file_net(seq);
507 	struct mr6_table *mrt = it->mrt;
508 
509 	++*pos;
510 
511 	if (v == SEQ_START_TOKEN)
512 		return ipmr_mfc_seq_idx(net, seq->private, 0);
513 
514 	if (mfc->list.next != it->cache)
515 		return list_entry(mfc->list.next, struct mfc6_cache, list);
516 
517 	if (it->cache == &mrt->mfc6_unres_queue)
518 		goto end_of_list;
519 
520 	BUG_ON(it->cache != &mrt->mfc6_cache_array[it->ct]);
521 
522 	while (++it->ct < MFC6_LINES) {
523 		it->cache = &mrt->mfc6_cache_array[it->ct];
524 		if (list_empty(it->cache))
525 			continue;
526 		return list_first_entry(it->cache, struct mfc6_cache, list);
527 	}
528 
529 	/* exhausted cache_array, show unresolved */
530 	read_unlock(&mrt_lock);
531 	it->cache = &mrt->mfc6_unres_queue;
532 	it->ct = 0;
533 
534 	spin_lock_bh(&mfc_unres_lock);
535 	if (!list_empty(it->cache))
536 		return list_first_entry(it->cache, struct mfc6_cache, list);
537 
538  end_of_list:
539 	spin_unlock_bh(&mfc_unres_lock);
540 	it->cache = NULL;
541 
542 	return NULL;
543 }
544 
545 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
546 {
547 	struct ipmr_mfc_iter *it = seq->private;
548 	struct mr6_table *mrt = it->mrt;
549 
550 	if (it->cache == &mrt->mfc6_unres_queue)
551 		spin_unlock_bh(&mfc_unres_lock);
552 	else if (it->cache == &mrt->mfc6_cache_array[it->ct])
553 		read_unlock(&mrt_lock);
554 }
555 
556 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
557 {
558 	int n;
559 
560 	if (v == SEQ_START_TOKEN) {
561 		seq_puts(seq,
562 			 "Group                            "
563 			 "Origin                           "
564 			 "Iif      Pkts  Bytes     Wrong  Oifs\n");
565 	} else {
566 		const struct mfc6_cache *mfc = v;
567 		const struct ipmr_mfc_iter *it = seq->private;
568 		struct mr6_table *mrt = it->mrt;
569 
570 		seq_printf(seq, "%pI6 %pI6 %-3hd",
571 			   &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
572 			   mfc->mf6c_parent);
573 
574 		if (it->cache != &mrt->mfc6_unres_queue) {
575 			seq_printf(seq, " %8lu %8lu %8lu",
576 				   mfc->mfc_un.res.pkt,
577 				   mfc->mfc_un.res.bytes,
578 				   mfc->mfc_un.res.wrong_if);
579 			for (n = mfc->mfc_un.res.minvif;
580 			     n < mfc->mfc_un.res.maxvif; n++) {
581 				if (MIF_EXISTS(mrt, n) &&
582 				    mfc->mfc_un.res.ttls[n] < 255)
583 					seq_printf(seq,
584 						   " %2d:%-3d",
585 						   n, mfc->mfc_un.res.ttls[n]);
586 			}
587 		} else {
588 			/* unresolved mfc_caches don't contain
589 			 * pkt, bytes and wrong_if values
590 			 */
591 			seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
592 		}
593 		seq_putc(seq, '\n');
594 	}
595 	return 0;
596 }
597 
598 static const struct seq_operations ipmr_mfc_seq_ops = {
599 	.start = ipmr_mfc_seq_start,
600 	.next  = ipmr_mfc_seq_next,
601 	.stop  = ipmr_mfc_seq_stop,
602 	.show  = ipmr_mfc_seq_show,
603 };
604 
605 static int ipmr_mfc_open(struct inode *inode, struct file *file)
606 {
607 	return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
608 			    sizeof(struct ipmr_mfc_iter));
609 }
610 
611 static const struct file_operations ip6mr_mfc_fops = {
612 	.open    = ipmr_mfc_open,
613 	.read    = seq_read,
614 	.llseek  = seq_lseek,
615 	.release = seq_release_net,
616 };
617 #endif
618 
619 #ifdef CONFIG_IPV6_PIMSM_V2
620 
621 static int pim6_rcv(struct sk_buff *skb)
622 {
623 	struct pimreghdr *pim;
624 	struct ipv6hdr   *encap;
625 	struct net_device  *reg_dev = NULL;
626 	struct net *net = dev_net(skb->dev);
627 	struct mr6_table *mrt;
628 	struct flowi6 fl6 = {
629 		.flowi6_iif	= skb->dev->ifindex,
630 		.flowi6_mark	= skb->mark,
631 	};
632 	int reg_vif_num;
633 
634 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
635 		goto drop;
636 
637 	pim = (struct pimreghdr *)skb_transport_header(skb);
638 	if (pim->type != ((PIM_VERSION << 4) | PIM_TYPE_REGISTER) ||
639 	    (pim->flags & PIM_NULL_REGISTER) ||
640 	    (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
641 			     sizeof(*pim), IPPROTO_PIM,
642 			     csum_partial((void *)pim, sizeof(*pim), 0)) &&
643 	     csum_fold(skb_checksum(skb, 0, skb->len, 0))))
644 		goto drop;
645 
646 	/* check if the inner packet is destined to mcast group */
647 	encap = (struct ipv6hdr *)(skb_transport_header(skb) +
648 				   sizeof(*pim));
649 
650 	if (!ipv6_addr_is_multicast(&encap->daddr) ||
651 	    encap->payload_len == 0 ||
652 	    ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
653 		goto drop;
654 
655 	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
656 		goto drop;
657 	reg_vif_num = mrt->mroute_reg_vif_num;
658 
659 	read_lock(&mrt_lock);
660 	if (reg_vif_num >= 0)
661 		reg_dev = mrt->vif6_table[reg_vif_num].dev;
662 	if (reg_dev)
663 		dev_hold(reg_dev);
664 	read_unlock(&mrt_lock);
665 
666 	if (!reg_dev)
667 		goto drop;
668 
669 	skb->mac_header = skb->network_header;
670 	skb_pull(skb, (u8 *)encap - skb->data);
671 	skb_reset_network_header(skb);
672 	skb->protocol = htons(ETH_P_IPV6);
673 	skb->ip_summed = CHECKSUM_NONE;
674 
675 	skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
676 
677 	netif_rx(skb);
678 
679 	dev_put(reg_dev);
680 	return 0;
681  drop:
682 	kfree_skb(skb);
683 	return 0;
684 }
685 
686 static const struct inet6_protocol pim6_protocol = {
687 	.handler	=	pim6_rcv,
688 };
689 
690 /* Service routines creating virtual interfaces: PIMREG */
691 
692 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
693 				      struct net_device *dev)
694 {
695 	struct net *net = dev_net(dev);
696 	struct mr6_table *mrt;
697 	struct flowi6 fl6 = {
698 		.flowi6_oif	= dev->ifindex,
699 		.flowi6_iif	= skb->skb_iif ? : LOOPBACK_IFINDEX,
700 		.flowi6_mark	= skb->mark,
701 	};
702 	int err;
703 
704 	err = ip6mr_fib_lookup(net, &fl6, &mrt);
705 	if (err < 0) {
706 		kfree_skb(skb);
707 		return err;
708 	}
709 
710 	read_lock(&mrt_lock);
711 	dev->stats.tx_bytes += skb->len;
712 	dev->stats.tx_packets++;
713 	ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
714 	read_unlock(&mrt_lock);
715 	kfree_skb(skb);
716 	return NETDEV_TX_OK;
717 }
718 
719 static int reg_vif_get_iflink(const struct net_device *dev)
720 {
721 	return 0;
722 }
723 
724 static const struct net_device_ops reg_vif_netdev_ops = {
725 	.ndo_start_xmit	= reg_vif_xmit,
726 	.ndo_get_iflink = reg_vif_get_iflink,
727 };
728 
729 static void reg_vif_setup(struct net_device *dev)
730 {
731 	dev->type		= ARPHRD_PIMREG;
732 	dev->mtu		= 1500 - sizeof(struct ipv6hdr) - 8;
733 	dev->flags		= IFF_NOARP;
734 	dev->netdev_ops		= &reg_vif_netdev_ops;
735 	dev->needs_free_netdev	= true;
736 	dev->features		|= NETIF_F_NETNS_LOCAL;
737 }
738 
739 static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt)
740 {
741 	struct net_device *dev;
742 	char name[IFNAMSIZ];
743 
744 	if (mrt->id == RT6_TABLE_DFLT)
745 		sprintf(name, "pim6reg");
746 	else
747 		sprintf(name, "pim6reg%u", mrt->id);
748 
749 	dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup);
750 	if (!dev)
751 		return NULL;
752 
753 	dev_net_set(dev, net);
754 
755 	if (register_netdevice(dev)) {
756 		free_netdev(dev);
757 		return NULL;
758 	}
759 
760 	if (dev_open(dev))
761 		goto failure;
762 
763 	dev_hold(dev);
764 	return dev;
765 
766 failure:
767 	unregister_netdevice(dev);
768 	return NULL;
769 }
770 #endif
771 
772 /*
773  *	Delete a VIF entry
774  */
775 
776 static int mif6_delete(struct mr6_table *mrt, int vifi, int notify,
777 		       struct list_head *head)
778 {
779 	struct vif_device *v;
780 	struct net_device *dev;
781 	struct inet6_dev *in6_dev;
782 
783 	if (vifi < 0 || vifi >= mrt->maxvif)
784 		return -EADDRNOTAVAIL;
785 
786 	v = &mrt->vif6_table[vifi];
787 
788 	write_lock_bh(&mrt_lock);
789 	dev = v->dev;
790 	v->dev = NULL;
791 
792 	if (!dev) {
793 		write_unlock_bh(&mrt_lock);
794 		return -EADDRNOTAVAIL;
795 	}
796 
797 #ifdef CONFIG_IPV6_PIMSM_V2
798 	if (vifi == mrt->mroute_reg_vif_num)
799 		mrt->mroute_reg_vif_num = -1;
800 #endif
801 
802 	if (vifi + 1 == mrt->maxvif) {
803 		int tmp;
804 		for (tmp = vifi - 1; tmp >= 0; tmp--) {
805 			if (MIF_EXISTS(mrt, tmp))
806 				break;
807 		}
808 		mrt->maxvif = tmp + 1;
809 	}
810 
811 	write_unlock_bh(&mrt_lock);
812 
813 	dev_set_allmulti(dev, -1);
814 
815 	in6_dev = __in6_dev_get(dev);
816 	if (in6_dev) {
817 		in6_dev->cnf.mc_forwarding--;
818 		inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
819 					     NETCONFA_MC_FORWARDING,
820 					     dev->ifindex, &in6_dev->cnf);
821 	}
822 
823 	if ((v->flags & MIFF_REGISTER) && !notify)
824 		unregister_netdevice_queue(dev, head);
825 
826 	dev_put(dev);
827 	return 0;
828 }
829 
830 static inline void ip6mr_cache_free(struct mfc6_cache *c)
831 {
832 	kmem_cache_free(mrt_cachep, c);
833 }
834 
835 /* Destroy an unresolved cache entry, killing queued skbs
836    and reporting error to netlink readers.
837  */
838 
839 static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
840 {
841 	struct net *net = read_pnet(&mrt->net);
842 	struct sk_buff *skb;
843 
844 	atomic_dec(&mrt->cache_resolve_queue_len);
845 
846 	while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
847 		if (ipv6_hdr(skb)->version == 0) {
848 			struct nlmsghdr *nlh = skb_pull(skb,
849 							sizeof(struct ipv6hdr));
850 			nlh->nlmsg_type = NLMSG_ERROR;
851 			nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
852 			skb_trim(skb, nlh->nlmsg_len);
853 			((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT;
854 			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
855 		} else
856 			kfree_skb(skb);
857 	}
858 
859 	ip6mr_cache_free(c);
860 }
861 
862 
863 /* Timer process for all the unresolved queue. */
864 
865 static void ipmr_do_expire_process(struct mr6_table *mrt)
866 {
867 	unsigned long now = jiffies;
868 	unsigned long expires = 10 * HZ;
869 	struct mfc6_cache *c, *next;
870 
871 	list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
872 		if (time_after(c->mfc_un.unres.expires, now)) {
873 			/* not yet... */
874 			unsigned long interval = c->mfc_un.unres.expires - now;
875 			if (interval < expires)
876 				expires = interval;
877 			continue;
878 		}
879 
880 		list_del(&c->list);
881 		mr6_netlink_event(mrt, c, RTM_DELROUTE);
882 		ip6mr_destroy_unres(mrt, c);
883 	}
884 
885 	if (!list_empty(&mrt->mfc6_unres_queue))
886 		mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
887 }
888 
889 static void ipmr_expire_process(struct timer_list *t)
890 {
891 	struct mr6_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
892 
893 	if (!spin_trylock(&mfc_unres_lock)) {
894 		mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
895 		return;
896 	}
897 
898 	if (!list_empty(&mrt->mfc6_unres_queue))
899 		ipmr_do_expire_process(mrt);
900 
901 	spin_unlock(&mfc_unres_lock);
902 }
903 
904 /* Fill oifs list. It is called under write locked mrt_lock. */
905 
906 static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *cache,
907 				    unsigned char *ttls)
908 {
909 	int vifi;
910 
911 	cache->mfc_un.res.minvif = MAXMIFS;
912 	cache->mfc_un.res.maxvif = 0;
913 	memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
914 
915 	for (vifi = 0; vifi < mrt->maxvif; vifi++) {
916 		if (MIF_EXISTS(mrt, vifi) &&
917 		    ttls[vifi] && ttls[vifi] < 255) {
918 			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
919 			if (cache->mfc_un.res.minvif > vifi)
920 				cache->mfc_un.res.minvif = vifi;
921 			if (cache->mfc_un.res.maxvif <= vifi)
922 				cache->mfc_un.res.maxvif = vifi + 1;
923 		}
924 	}
925 	cache->mfc_un.res.lastuse = jiffies;
926 }
927 
928 static int mif6_add(struct net *net, struct mr6_table *mrt,
929 		    struct mif6ctl *vifc, int mrtsock)
930 {
931 	int vifi = vifc->mif6c_mifi;
932 	struct vif_device *v = &mrt->vif6_table[vifi];
933 	struct net_device *dev;
934 	struct inet6_dev *in6_dev;
935 	int err;
936 
937 	/* Is vif busy ? */
938 	if (MIF_EXISTS(mrt, vifi))
939 		return -EADDRINUSE;
940 
941 	switch (vifc->mif6c_flags) {
942 #ifdef CONFIG_IPV6_PIMSM_V2
943 	case MIFF_REGISTER:
944 		/*
945 		 * Special Purpose VIF in PIM
946 		 * All the packets will be sent to the daemon
947 		 */
948 		if (mrt->mroute_reg_vif_num >= 0)
949 			return -EADDRINUSE;
950 		dev = ip6mr_reg_vif(net, mrt);
951 		if (!dev)
952 			return -ENOBUFS;
953 		err = dev_set_allmulti(dev, 1);
954 		if (err) {
955 			unregister_netdevice(dev);
956 			dev_put(dev);
957 			return err;
958 		}
959 		break;
960 #endif
961 	case 0:
962 		dev = dev_get_by_index(net, vifc->mif6c_pifi);
963 		if (!dev)
964 			return -EADDRNOTAVAIL;
965 		err = dev_set_allmulti(dev, 1);
966 		if (err) {
967 			dev_put(dev);
968 			return err;
969 		}
970 		break;
971 	default:
972 		return -EINVAL;
973 	}
974 
975 	in6_dev = __in6_dev_get(dev);
976 	if (in6_dev) {
977 		in6_dev->cnf.mc_forwarding++;
978 		inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
979 					     NETCONFA_MC_FORWARDING,
980 					     dev->ifindex, &in6_dev->cnf);
981 	}
982 
983 	/* Fill in the VIF structures */
984 	vif_device_init(v, dev, vifc->vifc_rate_limit, vifc->vifc_threshold,
985 			vifc->mif6c_flags | (!mrtsock ? VIFF_STATIC : 0),
986 			MIFF_REGISTER);
987 
988 	/* And finish update writing critical data */
989 	write_lock_bh(&mrt_lock);
990 	v->dev = dev;
991 #ifdef CONFIG_IPV6_PIMSM_V2
992 	if (v->flags & MIFF_REGISTER)
993 		mrt->mroute_reg_vif_num = vifi;
994 #endif
995 	if (vifi + 1 > mrt->maxvif)
996 		mrt->maxvif = vifi + 1;
997 	write_unlock_bh(&mrt_lock);
998 	return 0;
999 }
1000 
1001 static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt,
1002 					   const struct in6_addr *origin,
1003 					   const struct in6_addr *mcastgrp)
1004 {
1005 	int line = MFC6_HASH(mcastgrp, origin);
1006 	struct mfc6_cache *c;
1007 
1008 	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
1009 		if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
1010 		    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
1011 			return c;
1012 	}
1013 	return NULL;
1014 }
1015 
1016 /* Look for a (*,*,oif) entry */
1017 static struct mfc6_cache *ip6mr_cache_find_any_parent(struct mr6_table *mrt,
1018 						      mifi_t mifi)
1019 {
1020 	int line = MFC6_HASH(&in6addr_any, &in6addr_any);
1021 	struct mfc6_cache *c;
1022 
1023 	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
1024 		if (ipv6_addr_any(&c->mf6c_origin) &&
1025 		    ipv6_addr_any(&c->mf6c_mcastgrp) &&
1026 		    (c->mfc_un.res.ttls[mifi] < 255))
1027 			return c;
1028 
1029 	return NULL;
1030 }
1031 
1032 /* Look for a (*,G) entry */
1033 static struct mfc6_cache *ip6mr_cache_find_any(struct mr6_table *mrt,
1034 					       struct in6_addr *mcastgrp,
1035 					       mifi_t mifi)
1036 {
1037 	int line = MFC6_HASH(mcastgrp, &in6addr_any);
1038 	struct mfc6_cache *c, *proxy;
1039 
1040 	if (ipv6_addr_any(mcastgrp))
1041 		goto skip;
1042 
1043 	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
1044 		if (ipv6_addr_any(&c->mf6c_origin) &&
1045 		    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp)) {
1046 			if (c->mfc_un.res.ttls[mifi] < 255)
1047 				return c;
1048 
1049 			/* It's ok if the mifi is part of the static tree */
1050 			proxy = ip6mr_cache_find_any_parent(mrt,
1051 							    c->mf6c_parent);
1052 			if (proxy && proxy->mfc_un.res.ttls[mifi] < 255)
1053 				return c;
1054 		}
1055 
1056 skip:
1057 	return ip6mr_cache_find_any_parent(mrt, mifi);
1058 }
1059 
1060 /*
1061  *	Allocate a multicast cache entry
1062  */
1063 static struct mfc6_cache *ip6mr_cache_alloc(void)
1064 {
1065 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
1066 	if (!c)
1067 		return NULL;
1068 	c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
1069 	c->mfc_un.res.minvif = MAXMIFS;
1070 	return c;
1071 }
1072 
1073 static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
1074 {
1075 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
1076 	if (!c)
1077 		return NULL;
1078 	skb_queue_head_init(&c->mfc_un.unres.unresolved);
1079 	c->mfc_un.unres.expires = jiffies + 10 * HZ;
1080 	return c;
1081 }
1082 
1083 /*
1084  *	A cache entry has gone into a resolved state from queued
1085  */
1086 
1087 static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
1088 				struct mfc6_cache *uc, struct mfc6_cache *c)
1089 {
1090 	struct sk_buff *skb;
1091 
1092 	/*
1093 	 *	Play the pending entries through our router
1094 	 */
1095 
1096 	while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
1097 		if (ipv6_hdr(skb)->version == 0) {
1098 			struct nlmsghdr *nlh = skb_pull(skb,
1099 							sizeof(struct ipv6hdr));
1100 
1101 			if (__ip6mr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) {
1102 				nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1103 			} else {
1104 				nlh->nlmsg_type = NLMSG_ERROR;
1105 				nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
1106 				skb_trim(skb, nlh->nlmsg_len);
1107 				((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE;
1108 			}
1109 			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
1110 		} else
1111 			ip6_mr_forward(net, mrt, skb, c);
1112 	}
1113 }
1114 
1115 /*
1116  *	Bounce a cache query up to pim6sd and netlink.
1117  *
1118  *	Called under mrt_lock.
1119  */
1120 
1121 static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
1122 			      mifi_t mifi, int assert)
1123 {
1124 	struct sk_buff *skb;
1125 	struct mrt6msg *msg;
1126 	int ret;
1127 
1128 #ifdef CONFIG_IPV6_PIMSM_V2
1129 	if (assert == MRT6MSG_WHOLEPKT)
1130 		skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
1131 						+sizeof(*msg));
1132 	else
1133 #endif
1134 		skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
1135 
1136 	if (!skb)
1137 		return -ENOBUFS;
1138 
1139 	/* I suppose that internal messages
1140 	 * do not require checksums */
1141 
1142 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1143 
1144 #ifdef CONFIG_IPV6_PIMSM_V2
1145 	if (assert == MRT6MSG_WHOLEPKT) {
1146 		/* Ugly, but we have no choice with this interface.
1147 		   Duplicate old header, fix length etc.
1148 		   And all this only to mangle msg->im6_msgtype and
1149 		   to set msg->im6_mbz to "mbz" :-)
1150 		 */
1151 		skb_push(skb, -skb_network_offset(pkt));
1152 
1153 		skb_push(skb, sizeof(*msg));
1154 		skb_reset_transport_header(skb);
1155 		msg = (struct mrt6msg *)skb_transport_header(skb);
1156 		msg->im6_mbz = 0;
1157 		msg->im6_msgtype = MRT6MSG_WHOLEPKT;
1158 		msg->im6_mif = mrt->mroute_reg_vif_num;
1159 		msg->im6_pad = 0;
1160 		msg->im6_src = ipv6_hdr(pkt)->saddr;
1161 		msg->im6_dst = ipv6_hdr(pkt)->daddr;
1162 
1163 		skb->ip_summed = CHECKSUM_UNNECESSARY;
1164 	} else
1165 #endif
1166 	{
1167 	/*
1168 	 *	Copy the IP header
1169 	 */
1170 
1171 	skb_put(skb, sizeof(struct ipv6hdr));
1172 	skb_reset_network_header(skb);
1173 	skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
1174 
1175 	/*
1176 	 *	Add our header
1177 	 */
1178 	skb_put(skb, sizeof(*msg));
1179 	skb_reset_transport_header(skb);
1180 	msg = (struct mrt6msg *)skb_transport_header(skb);
1181 
1182 	msg->im6_mbz = 0;
1183 	msg->im6_msgtype = assert;
1184 	msg->im6_mif = mifi;
1185 	msg->im6_pad = 0;
1186 	msg->im6_src = ipv6_hdr(pkt)->saddr;
1187 	msg->im6_dst = ipv6_hdr(pkt)->daddr;
1188 
1189 	skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1190 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1191 	}
1192 
1193 	if (!mrt->mroute6_sk) {
1194 		kfree_skb(skb);
1195 		return -EINVAL;
1196 	}
1197 
1198 	mrt6msg_netlink_event(mrt, skb);
1199 
1200 	/*
1201 	 *	Deliver to user space multicast routing algorithms
1202 	 */
1203 	ret = sock_queue_rcv_skb(mrt->mroute6_sk, skb);
1204 	if (ret < 0) {
1205 		net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
1206 		kfree_skb(skb);
1207 	}
1208 
1209 	return ret;
1210 }
1211 
1212 /*
1213  *	Queue a packet for resolution. It gets locked cache entry!
1214  */
1215 
1216 static int
1217 ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
1218 {
1219 	bool found = false;
1220 	int err;
1221 	struct mfc6_cache *c;
1222 
1223 	spin_lock_bh(&mfc_unres_lock);
1224 	list_for_each_entry(c, &mrt->mfc6_unres_queue, list) {
1225 		if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1226 		    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
1227 			found = true;
1228 			break;
1229 		}
1230 	}
1231 
1232 	if (!found) {
1233 		/*
1234 		 *	Create a new entry if allowable
1235 		 */
1236 
1237 		if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
1238 		    (c = ip6mr_cache_alloc_unres()) == NULL) {
1239 			spin_unlock_bh(&mfc_unres_lock);
1240 
1241 			kfree_skb(skb);
1242 			return -ENOBUFS;
1243 		}
1244 
1245 		/*
1246 		 *	Fill in the new cache entry
1247 		 */
1248 		c->mf6c_parent = -1;
1249 		c->mf6c_origin = ipv6_hdr(skb)->saddr;
1250 		c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
1251 
1252 		/*
1253 		 *	Reflect first query at pim6sd
1254 		 */
1255 		err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1256 		if (err < 0) {
1257 			/* If the report failed throw the cache entry
1258 			   out - Brad Parker
1259 			 */
1260 			spin_unlock_bh(&mfc_unres_lock);
1261 
1262 			ip6mr_cache_free(c);
1263 			kfree_skb(skb);
1264 			return err;
1265 		}
1266 
1267 		atomic_inc(&mrt->cache_resolve_queue_len);
1268 		list_add(&c->list, &mrt->mfc6_unres_queue);
1269 		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1270 
1271 		ipmr_do_expire_process(mrt);
1272 	}
1273 
1274 	/*
1275 	 *	See if we can append the packet
1276 	 */
1277 	if (c->mfc_un.unres.unresolved.qlen > 3) {
1278 		kfree_skb(skb);
1279 		err = -ENOBUFS;
1280 	} else {
1281 		skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1282 		err = 0;
1283 	}
1284 
1285 	spin_unlock_bh(&mfc_unres_lock);
1286 	return err;
1287 }
1288 
1289 /*
1290  *	MFC6 cache manipulation by user space
1291  */
1292 
1293 static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc,
1294 			    int parent)
1295 {
1296 	int line;
1297 	struct mfc6_cache *c, *next;
1298 
1299 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1300 
1301 	list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) {
1302 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1303 		    ipv6_addr_equal(&c->mf6c_mcastgrp,
1304 				    &mfc->mf6cc_mcastgrp.sin6_addr) &&
1305 		    (parent == -1 || parent == c->mf6c_parent)) {
1306 			write_lock_bh(&mrt_lock);
1307 			list_del(&c->list);
1308 			write_unlock_bh(&mrt_lock);
1309 
1310 			mr6_netlink_event(mrt, c, RTM_DELROUTE);
1311 			ip6mr_cache_free(c);
1312 			return 0;
1313 		}
1314 	}
1315 	return -ENOENT;
1316 }
1317 
1318 static int ip6mr_device_event(struct notifier_block *this,
1319 			      unsigned long event, void *ptr)
1320 {
1321 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1322 	struct net *net = dev_net(dev);
1323 	struct mr6_table *mrt;
1324 	struct vif_device *v;
1325 	int ct;
1326 
1327 	if (event != NETDEV_UNREGISTER)
1328 		return NOTIFY_DONE;
1329 
1330 	ip6mr_for_each_table(mrt, net) {
1331 		v = &mrt->vif6_table[0];
1332 		for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1333 			if (v->dev == dev)
1334 				mif6_delete(mrt, ct, 1, NULL);
1335 		}
1336 	}
1337 
1338 	return NOTIFY_DONE;
1339 }
1340 
1341 static struct notifier_block ip6_mr_notifier = {
1342 	.notifier_call = ip6mr_device_event
1343 };
1344 
1345 /*
1346  *	Setup for IP multicast routing
1347  */
1348 
1349 static int __net_init ip6mr_net_init(struct net *net)
1350 {
1351 	int err;
1352 
1353 	err = ip6mr_rules_init(net);
1354 	if (err < 0)
1355 		goto fail;
1356 
1357 #ifdef CONFIG_PROC_FS
1358 	err = -ENOMEM;
1359 	if (!proc_create("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_fops))
1360 		goto proc_vif_fail;
1361 	if (!proc_create("ip6_mr_cache", 0, net->proc_net, &ip6mr_mfc_fops))
1362 		goto proc_cache_fail;
1363 #endif
1364 
1365 	return 0;
1366 
1367 #ifdef CONFIG_PROC_FS
1368 proc_cache_fail:
1369 	remove_proc_entry("ip6_mr_vif", net->proc_net);
1370 proc_vif_fail:
1371 	ip6mr_rules_exit(net);
1372 #endif
1373 fail:
1374 	return err;
1375 }
1376 
1377 static void __net_exit ip6mr_net_exit(struct net *net)
1378 {
1379 #ifdef CONFIG_PROC_FS
1380 	remove_proc_entry("ip6_mr_cache", net->proc_net);
1381 	remove_proc_entry("ip6_mr_vif", net->proc_net);
1382 #endif
1383 	ip6mr_rules_exit(net);
1384 }
1385 
1386 static struct pernet_operations ip6mr_net_ops = {
1387 	.init = ip6mr_net_init,
1388 	.exit = ip6mr_net_exit,
1389 	.async = true,
1390 };
1391 
1392 int __init ip6_mr_init(void)
1393 {
1394 	int err;
1395 
1396 	mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1397 				       sizeof(struct mfc6_cache),
1398 				       0, SLAB_HWCACHE_ALIGN,
1399 				       NULL);
1400 	if (!mrt_cachep)
1401 		return -ENOMEM;
1402 
1403 	err = register_pernet_subsys(&ip6mr_net_ops);
1404 	if (err)
1405 		goto reg_pernet_fail;
1406 
1407 	err = register_netdevice_notifier(&ip6_mr_notifier);
1408 	if (err)
1409 		goto reg_notif_fail;
1410 #ifdef CONFIG_IPV6_PIMSM_V2
1411 	if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1412 		pr_err("%s: can't add PIM protocol\n", __func__);
1413 		err = -EAGAIN;
1414 		goto add_proto_fail;
1415 	}
1416 #endif
1417 	err = rtnl_register_module(THIS_MODULE, RTNL_FAMILY_IP6MR, RTM_GETROUTE,
1418 				   NULL, ip6mr_rtm_dumproute, 0);
1419 	if (err == 0)
1420 		return 0;
1421 
1422 #ifdef CONFIG_IPV6_PIMSM_V2
1423 	inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1424 add_proto_fail:
1425 	unregister_netdevice_notifier(&ip6_mr_notifier);
1426 #endif
1427 reg_notif_fail:
1428 	unregister_pernet_subsys(&ip6mr_net_ops);
1429 reg_pernet_fail:
1430 	kmem_cache_destroy(mrt_cachep);
1431 	return err;
1432 }
1433 
1434 void ip6_mr_cleanup(void)
1435 {
1436 	rtnl_unregister(RTNL_FAMILY_IP6MR, RTM_GETROUTE);
1437 #ifdef CONFIG_IPV6_PIMSM_V2
1438 	inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1439 #endif
1440 	unregister_netdevice_notifier(&ip6_mr_notifier);
1441 	unregister_pernet_subsys(&ip6mr_net_ops);
1442 	kmem_cache_destroy(mrt_cachep);
1443 }
1444 
1445 static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
1446 			 struct mf6cctl *mfc, int mrtsock, int parent)
1447 {
1448 	bool found = false;
1449 	int line;
1450 	struct mfc6_cache *uc, *c;
1451 	unsigned char ttls[MAXMIFS];
1452 	int i;
1453 
1454 	if (mfc->mf6cc_parent >= MAXMIFS)
1455 		return -ENFILE;
1456 
1457 	memset(ttls, 255, MAXMIFS);
1458 	for (i = 0; i < MAXMIFS; i++) {
1459 		if (IF_ISSET(i, &mfc->mf6cc_ifset))
1460 			ttls[i] = 1;
1461 
1462 	}
1463 
1464 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1465 
1466 	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
1467 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1468 		    ipv6_addr_equal(&c->mf6c_mcastgrp,
1469 				    &mfc->mf6cc_mcastgrp.sin6_addr) &&
1470 		    (parent == -1 || parent == mfc->mf6cc_parent)) {
1471 			found = true;
1472 			break;
1473 		}
1474 	}
1475 
1476 	if (found) {
1477 		write_lock_bh(&mrt_lock);
1478 		c->mf6c_parent = mfc->mf6cc_parent;
1479 		ip6mr_update_thresholds(mrt, c, ttls);
1480 		if (!mrtsock)
1481 			c->mfc_flags |= MFC_STATIC;
1482 		write_unlock_bh(&mrt_lock);
1483 		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1484 		return 0;
1485 	}
1486 
1487 	if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) &&
1488 	    !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1489 		return -EINVAL;
1490 
1491 	c = ip6mr_cache_alloc();
1492 	if (!c)
1493 		return -ENOMEM;
1494 
1495 	c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1496 	c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1497 	c->mf6c_parent = mfc->mf6cc_parent;
1498 	ip6mr_update_thresholds(mrt, c, ttls);
1499 	if (!mrtsock)
1500 		c->mfc_flags |= MFC_STATIC;
1501 
1502 	write_lock_bh(&mrt_lock);
1503 	list_add(&c->list, &mrt->mfc6_cache_array[line]);
1504 	write_unlock_bh(&mrt_lock);
1505 
1506 	/*
1507 	 *	Check to see if we resolved a queued list. If so we
1508 	 *	need to send on the frames and tidy up.
1509 	 */
1510 	found = false;
1511 	spin_lock_bh(&mfc_unres_lock);
1512 	list_for_each_entry(uc, &mrt->mfc6_unres_queue, list) {
1513 		if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1514 		    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1515 			list_del(&uc->list);
1516 			atomic_dec(&mrt->cache_resolve_queue_len);
1517 			found = true;
1518 			break;
1519 		}
1520 	}
1521 	if (list_empty(&mrt->mfc6_unres_queue))
1522 		del_timer(&mrt->ipmr_expire_timer);
1523 	spin_unlock_bh(&mfc_unres_lock);
1524 
1525 	if (found) {
1526 		ip6mr_cache_resolve(net, mrt, uc, c);
1527 		ip6mr_cache_free(uc);
1528 	}
1529 	mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1530 	return 0;
1531 }
1532 
1533 /*
1534  *	Close the multicast socket, and clear the vif tables etc
1535  */
1536 
1537 static void mroute_clean_tables(struct mr6_table *mrt, bool all)
1538 {
1539 	int i;
1540 	LIST_HEAD(list);
1541 	struct mfc6_cache *c, *next;
1542 
1543 	/*
1544 	 *	Shut down all active vif entries
1545 	 */
1546 	for (i = 0; i < mrt->maxvif; i++) {
1547 		if (!all && (mrt->vif6_table[i].flags & VIFF_STATIC))
1548 			continue;
1549 		mif6_delete(mrt, i, 0, &list);
1550 	}
1551 	unregister_netdevice_many(&list);
1552 
1553 	/*
1554 	 *	Wipe the cache
1555 	 */
1556 	for (i = 0; i < MFC6_LINES; i++) {
1557 		list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) {
1558 			if (!all && (c->mfc_flags & MFC_STATIC))
1559 				continue;
1560 			write_lock_bh(&mrt_lock);
1561 			list_del(&c->list);
1562 			write_unlock_bh(&mrt_lock);
1563 
1564 			mr6_netlink_event(mrt, c, RTM_DELROUTE);
1565 			ip6mr_cache_free(c);
1566 		}
1567 	}
1568 
1569 	if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1570 		spin_lock_bh(&mfc_unres_lock);
1571 		list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
1572 			list_del(&c->list);
1573 			mr6_netlink_event(mrt, c, RTM_DELROUTE);
1574 			ip6mr_destroy_unres(mrt, c);
1575 		}
1576 		spin_unlock_bh(&mfc_unres_lock);
1577 	}
1578 }
1579 
1580 static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
1581 {
1582 	int err = 0;
1583 	struct net *net = sock_net(sk);
1584 
1585 	rtnl_lock();
1586 	write_lock_bh(&mrt_lock);
1587 	if (likely(mrt->mroute6_sk == NULL)) {
1588 		mrt->mroute6_sk = sk;
1589 		net->ipv6.devconf_all->mc_forwarding++;
1590 	} else {
1591 		err = -EADDRINUSE;
1592 	}
1593 	write_unlock_bh(&mrt_lock);
1594 
1595 	if (!err)
1596 		inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
1597 					     NETCONFA_MC_FORWARDING,
1598 					     NETCONFA_IFINDEX_ALL,
1599 					     net->ipv6.devconf_all);
1600 	rtnl_unlock();
1601 
1602 	return err;
1603 }
1604 
1605 int ip6mr_sk_done(struct sock *sk)
1606 {
1607 	int err = -EACCES;
1608 	struct net *net = sock_net(sk);
1609 	struct mr6_table *mrt;
1610 
1611 	if (sk->sk_type != SOCK_RAW ||
1612 	    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1613 		return err;
1614 
1615 	rtnl_lock();
1616 	ip6mr_for_each_table(mrt, net) {
1617 		if (sk == mrt->mroute6_sk) {
1618 			write_lock_bh(&mrt_lock);
1619 			mrt->mroute6_sk = NULL;
1620 			net->ipv6.devconf_all->mc_forwarding--;
1621 			write_unlock_bh(&mrt_lock);
1622 			inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
1623 						     NETCONFA_MC_FORWARDING,
1624 						     NETCONFA_IFINDEX_ALL,
1625 						     net->ipv6.devconf_all);
1626 
1627 			mroute_clean_tables(mrt, false);
1628 			err = 0;
1629 			break;
1630 		}
1631 	}
1632 	rtnl_unlock();
1633 
1634 	return err;
1635 }
1636 
1637 struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
1638 {
1639 	struct mr6_table *mrt;
1640 	struct flowi6 fl6 = {
1641 		.flowi6_iif	= skb->skb_iif ? : LOOPBACK_IFINDEX,
1642 		.flowi6_oif	= skb->dev->ifindex,
1643 		.flowi6_mark	= skb->mark,
1644 	};
1645 
1646 	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
1647 		return NULL;
1648 
1649 	return mrt->mroute6_sk;
1650 }
1651 
1652 /*
1653  *	Socket options and virtual interface manipulation. The whole
1654  *	virtual interface system is a complete heap, but unfortunately
1655  *	that's how BSD mrouted happens to think. Maybe one day with a proper
1656  *	MOSPF/PIM router set up we can clean this up.
1657  */
1658 
1659 int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1660 {
1661 	int ret, parent = 0;
1662 	struct mif6ctl vif;
1663 	struct mf6cctl mfc;
1664 	mifi_t mifi;
1665 	struct net *net = sock_net(sk);
1666 	struct mr6_table *mrt;
1667 
1668 	if (sk->sk_type != SOCK_RAW ||
1669 	    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1670 		return -EOPNOTSUPP;
1671 
1672 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1673 	if (!mrt)
1674 		return -ENOENT;
1675 
1676 	if (optname != MRT6_INIT) {
1677 		if (sk != mrt->mroute6_sk && !ns_capable(net->user_ns, CAP_NET_ADMIN))
1678 			return -EACCES;
1679 	}
1680 
1681 	switch (optname) {
1682 	case MRT6_INIT:
1683 		if (optlen < sizeof(int))
1684 			return -EINVAL;
1685 
1686 		return ip6mr_sk_init(mrt, sk);
1687 
1688 	case MRT6_DONE:
1689 		return ip6mr_sk_done(sk);
1690 
1691 	case MRT6_ADD_MIF:
1692 		if (optlen < sizeof(vif))
1693 			return -EINVAL;
1694 		if (copy_from_user(&vif, optval, sizeof(vif)))
1695 			return -EFAULT;
1696 		if (vif.mif6c_mifi >= MAXMIFS)
1697 			return -ENFILE;
1698 		rtnl_lock();
1699 		ret = mif6_add(net, mrt, &vif, sk == mrt->mroute6_sk);
1700 		rtnl_unlock();
1701 		return ret;
1702 
1703 	case MRT6_DEL_MIF:
1704 		if (optlen < sizeof(mifi_t))
1705 			return -EINVAL;
1706 		if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1707 			return -EFAULT;
1708 		rtnl_lock();
1709 		ret = mif6_delete(mrt, mifi, 0, NULL);
1710 		rtnl_unlock();
1711 		return ret;
1712 
1713 	/*
1714 	 *	Manipulate the forwarding caches. These live
1715 	 *	in a sort of kernel/user symbiosis.
1716 	 */
1717 	case MRT6_ADD_MFC:
1718 	case MRT6_DEL_MFC:
1719 		parent = -1;
1720 		/* fall through */
1721 	case MRT6_ADD_MFC_PROXY:
1722 	case MRT6_DEL_MFC_PROXY:
1723 		if (optlen < sizeof(mfc))
1724 			return -EINVAL;
1725 		if (copy_from_user(&mfc, optval, sizeof(mfc)))
1726 			return -EFAULT;
1727 		if (parent == 0)
1728 			parent = mfc.mf6cc_parent;
1729 		rtnl_lock();
1730 		if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY)
1731 			ret = ip6mr_mfc_delete(mrt, &mfc, parent);
1732 		else
1733 			ret = ip6mr_mfc_add(net, mrt, &mfc,
1734 					    sk == mrt->mroute6_sk, parent);
1735 		rtnl_unlock();
1736 		return ret;
1737 
1738 	/*
1739 	 *	Control PIM assert (to activate pim will activate assert)
1740 	 */
1741 	case MRT6_ASSERT:
1742 	{
1743 		int v;
1744 
1745 		if (optlen != sizeof(v))
1746 			return -EINVAL;
1747 		if (get_user(v, (int __user *)optval))
1748 			return -EFAULT;
1749 		mrt->mroute_do_assert = v;
1750 		return 0;
1751 	}
1752 
1753 #ifdef CONFIG_IPV6_PIMSM_V2
1754 	case MRT6_PIM:
1755 	{
1756 		int v;
1757 
1758 		if (optlen != sizeof(v))
1759 			return -EINVAL;
1760 		if (get_user(v, (int __user *)optval))
1761 			return -EFAULT;
1762 		v = !!v;
1763 		rtnl_lock();
1764 		ret = 0;
1765 		if (v != mrt->mroute_do_pim) {
1766 			mrt->mroute_do_pim = v;
1767 			mrt->mroute_do_assert = v;
1768 		}
1769 		rtnl_unlock();
1770 		return ret;
1771 	}
1772 
1773 #endif
1774 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1775 	case MRT6_TABLE:
1776 	{
1777 		u32 v;
1778 
1779 		if (optlen != sizeof(u32))
1780 			return -EINVAL;
1781 		if (get_user(v, (u32 __user *)optval))
1782 			return -EFAULT;
1783 		/* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
1784 		if (v != RT_TABLE_DEFAULT && v >= 100000000)
1785 			return -EINVAL;
1786 		if (sk == mrt->mroute6_sk)
1787 			return -EBUSY;
1788 
1789 		rtnl_lock();
1790 		ret = 0;
1791 		if (!ip6mr_new_table(net, v))
1792 			ret = -ENOMEM;
1793 		raw6_sk(sk)->ip6mr_table = v;
1794 		rtnl_unlock();
1795 		return ret;
1796 	}
1797 #endif
1798 	/*
1799 	 *	Spurious command, or MRT6_VERSION which you cannot
1800 	 *	set.
1801 	 */
1802 	default:
1803 		return -ENOPROTOOPT;
1804 	}
1805 }
1806 
1807 /*
1808  *	Getsock opt support for the multicast routing system.
1809  */
1810 
1811 int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1812 			  int __user *optlen)
1813 {
1814 	int olr;
1815 	int val;
1816 	struct net *net = sock_net(sk);
1817 	struct mr6_table *mrt;
1818 
1819 	if (sk->sk_type != SOCK_RAW ||
1820 	    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1821 		return -EOPNOTSUPP;
1822 
1823 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1824 	if (!mrt)
1825 		return -ENOENT;
1826 
1827 	switch (optname) {
1828 	case MRT6_VERSION:
1829 		val = 0x0305;
1830 		break;
1831 #ifdef CONFIG_IPV6_PIMSM_V2
1832 	case MRT6_PIM:
1833 		val = mrt->mroute_do_pim;
1834 		break;
1835 #endif
1836 	case MRT6_ASSERT:
1837 		val = mrt->mroute_do_assert;
1838 		break;
1839 	default:
1840 		return -ENOPROTOOPT;
1841 	}
1842 
1843 	if (get_user(olr, optlen))
1844 		return -EFAULT;
1845 
1846 	olr = min_t(int, olr, sizeof(int));
1847 	if (olr < 0)
1848 		return -EINVAL;
1849 
1850 	if (put_user(olr, optlen))
1851 		return -EFAULT;
1852 	if (copy_to_user(optval, &val, olr))
1853 		return -EFAULT;
1854 	return 0;
1855 }
1856 
1857 /*
1858  *	The IP multicast ioctl support routines.
1859  */
1860 
1861 int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1862 {
1863 	struct sioc_sg_req6 sr;
1864 	struct sioc_mif_req6 vr;
1865 	struct vif_device *vif;
1866 	struct mfc6_cache *c;
1867 	struct net *net = sock_net(sk);
1868 	struct mr6_table *mrt;
1869 
1870 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1871 	if (!mrt)
1872 		return -ENOENT;
1873 
1874 	switch (cmd) {
1875 	case SIOCGETMIFCNT_IN6:
1876 		if (copy_from_user(&vr, arg, sizeof(vr)))
1877 			return -EFAULT;
1878 		if (vr.mifi >= mrt->maxvif)
1879 			return -EINVAL;
1880 		read_lock(&mrt_lock);
1881 		vif = &mrt->vif6_table[vr.mifi];
1882 		if (MIF_EXISTS(mrt, vr.mifi)) {
1883 			vr.icount = vif->pkt_in;
1884 			vr.ocount = vif->pkt_out;
1885 			vr.ibytes = vif->bytes_in;
1886 			vr.obytes = vif->bytes_out;
1887 			read_unlock(&mrt_lock);
1888 
1889 			if (copy_to_user(arg, &vr, sizeof(vr)))
1890 				return -EFAULT;
1891 			return 0;
1892 		}
1893 		read_unlock(&mrt_lock);
1894 		return -EADDRNOTAVAIL;
1895 	case SIOCGETSGCNT_IN6:
1896 		if (copy_from_user(&sr, arg, sizeof(sr)))
1897 			return -EFAULT;
1898 
1899 		read_lock(&mrt_lock);
1900 		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1901 		if (c) {
1902 			sr.pktcnt = c->mfc_un.res.pkt;
1903 			sr.bytecnt = c->mfc_un.res.bytes;
1904 			sr.wrong_if = c->mfc_un.res.wrong_if;
1905 			read_unlock(&mrt_lock);
1906 
1907 			if (copy_to_user(arg, &sr, sizeof(sr)))
1908 				return -EFAULT;
1909 			return 0;
1910 		}
1911 		read_unlock(&mrt_lock);
1912 		return -EADDRNOTAVAIL;
1913 	default:
1914 		return -ENOIOCTLCMD;
1915 	}
1916 }
1917 
1918 #ifdef CONFIG_COMPAT
1919 struct compat_sioc_sg_req6 {
1920 	struct sockaddr_in6 src;
1921 	struct sockaddr_in6 grp;
1922 	compat_ulong_t pktcnt;
1923 	compat_ulong_t bytecnt;
1924 	compat_ulong_t wrong_if;
1925 };
1926 
1927 struct compat_sioc_mif_req6 {
1928 	mifi_t	mifi;
1929 	compat_ulong_t icount;
1930 	compat_ulong_t ocount;
1931 	compat_ulong_t ibytes;
1932 	compat_ulong_t obytes;
1933 };
1934 
1935 int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1936 {
1937 	struct compat_sioc_sg_req6 sr;
1938 	struct compat_sioc_mif_req6 vr;
1939 	struct vif_device *vif;
1940 	struct mfc6_cache *c;
1941 	struct net *net = sock_net(sk);
1942 	struct mr6_table *mrt;
1943 
1944 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1945 	if (!mrt)
1946 		return -ENOENT;
1947 
1948 	switch (cmd) {
1949 	case SIOCGETMIFCNT_IN6:
1950 		if (copy_from_user(&vr, arg, sizeof(vr)))
1951 			return -EFAULT;
1952 		if (vr.mifi >= mrt->maxvif)
1953 			return -EINVAL;
1954 		read_lock(&mrt_lock);
1955 		vif = &mrt->vif6_table[vr.mifi];
1956 		if (MIF_EXISTS(mrt, vr.mifi)) {
1957 			vr.icount = vif->pkt_in;
1958 			vr.ocount = vif->pkt_out;
1959 			vr.ibytes = vif->bytes_in;
1960 			vr.obytes = vif->bytes_out;
1961 			read_unlock(&mrt_lock);
1962 
1963 			if (copy_to_user(arg, &vr, sizeof(vr)))
1964 				return -EFAULT;
1965 			return 0;
1966 		}
1967 		read_unlock(&mrt_lock);
1968 		return -EADDRNOTAVAIL;
1969 	case SIOCGETSGCNT_IN6:
1970 		if (copy_from_user(&sr, arg, sizeof(sr)))
1971 			return -EFAULT;
1972 
1973 		read_lock(&mrt_lock);
1974 		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1975 		if (c) {
1976 			sr.pktcnt = c->mfc_un.res.pkt;
1977 			sr.bytecnt = c->mfc_un.res.bytes;
1978 			sr.wrong_if = c->mfc_un.res.wrong_if;
1979 			read_unlock(&mrt_lock);
1980 
1981 			if (copy_to_user(arg, &sr, sizeof(sr)))
1982 				return -EFAULT;
1983 			return 0;
1984 		}
1985 		read_unlock(&mrt_lock);
1986 		return -EADDRNOTAVAIL;
1987 	default:
1988 		return -ENOIOCTLCMD;
1989 	}
1990 }
1991 #endif
1992 
1993 static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
1994 {
1995 	__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
1996 			IPSTATS_MIB_OUTFORWDATAGRAMS);
1997 	__IP6_ADD_STATS(net, ip6_dst_idev(skb_dst(skb)),
1998 			IPSTATS_MIB_OUTOCTETS, skb->len);
1999 	return dst_output(net, sk, skb);
2000 }
2001 
2002 /*
2003  *	Processing handlers for ip6mr_forward
2004  */
2005 
2006 static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
2007 			  struct sk_buff *skb, struct mfc6_cache *c, int vifi)
2008 {
2009 	struct ipv6hdr *ipv6h;
2010 	struct vif_device *vif = &mrt->vif6_table[vifi];
2011 	struct net_device *dev;
2012 	struct dst_entry *dst;
2013 	struct flowi6 fl6;
2014 
2015 	if (!vif->dev)
2016 		goto out_free;
2017 
2018 #ifdef CONFIG_IPV6_PIMSM_V2
2019 	if (vif->flags & MIFF_REGISTER) {
2020 		vif->pkt_out++;
2021 		vif->bytes_out += skb->len;
2022 		vif->dev->stats.tx_bytes += skb->len;
2023 		vif->dev->stats.tx_packets++;
2024 		ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
2025 		goto out_free;
2026 	}
2027 #endif
2028 
2029 	ipv6h = ipv6_hdr(skb);
2030 
2031 	fl6 = (struct flowi6) {
2032 		.flowi6_oif = vif->link,
2033 		.daddr = ipv6h->daddr,
2034 	};
2035 
2036 	dst = ip6_route_output(net, NULL, &fl6);
2037 	if (dst->error) {
2038 		dst_release(dst);
2039 		goto out_free;
2040 	}
2041 
2042 	skb_dst_drop(skb);
2043 	skb_dst_set(skb, dst);
2044 
2045 	/*
2046 	 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
2047 	 * not only before forwarding, but after forwarding on all output
2048 	 * interfaces. It is clear, if mrouter runs a multicasting
2049 	 * program, it should receive packets not depending to what interface
2050 	 * program is joined.
2051 	 * If we will not make it, the program will have to join on all
2052 	 * interfaces. On the other hand, multihoming host (or router, but
2053 	 * not mrouter) cannot join to more than one interface - it will
2054 	 * result in receiving multiple packets.
2055 	 */
2056 	dev = vif->dev;
2057 	skb->dev = dev;
2058 	vif->pkt_out++;
2059 	vif->bytes_out += skb->len;
2060 
2061 	/* We are about to write */
2062 	/* XXX: extension headers? */
2063 	if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
2064 		goto out_free;
2065 
2066 	ipv6h = ipv6_hdr(skb);
2067 	ipv6h->hop_limit--;
2068 
2069 	IP6CB(skb)->flags |= IP6SKB_FORWARDED;
2070 
2071 	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
2072 		       net, NULL, skb, skb->dev, dev,
2073 		       ip6mr_forward2_finish);
2074 
2075 out_free:
2076 	kfree_skb(skb);
2077 	return 0;
2078 }
2079 
2080 static int ip6mr_find_vif(struct mr6_table *mrt, struct net_device *dev)
2081 {
2082 	int ct;
2083 
2084 	for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
2085 		if (mrt->vif6_table[ct].dev == dev)
2086 			break;
2087 	}
2088 	return ct;
2089 }
2090 
2091 static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
2092 			   struct sk_buff *skb, struct mfc6_cache *cache)
2093 {
2094 	int psend = -1;
2095 	int vif, ct;
2096 	int true_vifi = ip6mr_find_vif(mrt, skb->dev);
2097 
2098 	vif = cache->mf6c_parent;
2099 	cache->mfc_un.res.pkt++;
2100 	cache->mfc_un.res.bytes += skb->len;
2101 	cache->mfc_un.res.lastuse = jiffies;
2102 
2103 	if (ipv6_addr_any(&cache->mf6c_origin) && true_vifi >= 0) {
2104 		struct mfc6_cache *cache_proxy;
2105 
2106 		/* For an (*,G) entry, we only check that the incoming
2107 		 * interface is part of the static tree.
2108 		 */
2109 		cache_proxy = ip6mr_cache_find_any_parent(mrt, vif);
2110 		if (cache_proxy &&
2111 		    cache_proxy->mfc_un.res.ttls[true_vifi] < 255)
2112 			goto forward;
2113 	}
2114 
2115 	/*
2116 	 * Wrong interface: drop packet and (maybe) send PIM assert.
2117 	 */
2118 	if (mrt->vif6_table[vif].dev != skb->dev) {
2119 		cache->mfc_un.res.wrong_if++;
2120 
2121 		if (true_vifi >= 0 && mrt->mroute_do_assert &&
2122 		    /* pimsm uses asserts, when switching from RPT to SPT,
2123 		       so that we cannot check that packet arrived on an oif.
2124 		       It is bad, but otherwise we would need to move pretty
2125 		       large chunk of pimd to kernel. Ough... --ANK
2126 		     */
2127 		    (mrt->mroute_do_pim ||
2128 		     cache->mfc_un.res.ttls[true_vifi] < 255) &&
2129 		    time_after(jiffies,
2130 			       cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
2131 			cache->mfc_un.res.last_assert = jiffies;
2132 			ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
2133 		}
2134 		goto dont_forward;
2135 	}
2136 
2137 forward:
2138 	mrt->vif6_table[vif].pkt_in++;
2139 	mrt->vif6_table[vif].bytes_in += skb->len;
2140 
2141 	/*
2142 	 *	Forward the frame
2143 	 */
2144 	if (ipv6_addr_any(&cache->mf6c_origin) &&
2145 	    ipv6_addr_any(&cache->mf6c_mcastgrp)) {
2146 		if (true_vifi >= 0 &&
2147 		    true_vifi != cache->mf6c_parent &&
2148 		    ipv6_hdr(skb)->hop_limit >
2149 				cache->mfc_un.res.ttls[cache->mf6c_parent]) {
2150 			/* It's an (*,*) entry and the packet is not coming from
2151 			 * the upstream: forward the packet to the upstream
2152 			 * only.
2153 			 */
2154 			psend = cache->mf6c_parent;
2155 			goto last_forward;
2156 		}
2157 		goto dont_forward;
2158 	}
2159 	for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
2160 		/* For (*,G) entry, don't forward to the incoming interface */
2161 		if ((!ipv6_addr_any(&cache->mf6c_origin) || ct != true_vifi) &&
2162 		    ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
2163 			if (psend != -1) {
2164 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2165 				if (skb2)
2166 					ip6mr_forward2(net, mrt, skb2, cache, psend);
2167 			}
2168 			psend = ct;
2169 		}
2170 	}
2171 last_forward:
2172 	if (psend != -1) {
2173 		ip6mr_forward2(net, mrt, skb, cache, psend);
2174 		return;
2175 	}
2176 
2177 dont_forward:
2178 	kfree_skb(skb);
2179 }
2180 
2181 
2182 /*
2183  *	Multicast packets for forwarding arrive here
2184  */
2185 
2186 int ip6_mr_input(struct sk_buff *skb)
2187 {
2188 	struct mfc6_cache *cache;
2189 	struct net *net = dev_net(skb->dev);
2190 	struct mr6_table *mrt;
2191 	struct flowi6 fl6 = {
2192 		.flowi6_iif	= skb->dev->ifindex,
2193 		.flowi6_mark	= skb->mark,
2194 	};
2195 	int err;
2196 
2197 	err = ip6mr_fib_lookup(net, &fl6, &mrt);
2198 	if (err < 0) {
2199 		kfree_skb(skb);
2200 		return err;
2201 	}
2202 
2203 	read_lock(&mrt_lock);
2204 	cache = ip6mr_cache_find(mrt,
2205 				 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2206 	if (!cache) {
2207 		int vif = ip6mr_find_vif(mrt, skb->dev);
2208 
2209 		if (vif >= 0)
2210 			cache = ip6mr_cache_find_any(mrt,
2211 						     &ipv6_hdr(skb)->daddr,
2212 						     vif);
2213 	}
2214 
2215 	/*
2216 	 *	No usable cache entry
2217 	 */
2218 	if (!cache) {
2219 		int vif;
2220 
2221 		vif = ip6mr_find_vif(mrt, skb->dev);
2222 		if (vif >= 0) {
2223 			int err = ip6mr_cache_unresolved(mrt, vif, skb);
2224 			read_unlock(&mrt_lock);
2225 
2226 			return err;
2227 		}
2228 		read_unlock(&mrt_lock);
2229 		kfree_skb(skb);
2230 		return -ENODEV;
2231 	}
2232 
2233 	ip6_mr_forward(net, mrt, skb, cache);
2234 
2235 	read_unlock(&mrt_lock);
2236 
2237 	return 0;
2238 }
2239 
2240 
2241 static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2242 			       struct mfc6_cache *c, struct rtmsg *rtm)
2243 {
2244 	struct rta_mfc_stats mfcs;
2245 	struct nlattr *mp_attr;
2246 	struct rtnexthop *nhp;
2247 	unsigned long lastuse;
2248 	int ct;
2249 
2250 	/* If cache is unresolved, don't try to parse IIF and OIF */
2251 	if (c->mf6c_parent >= MAXMIFS) {
2252 		rtm->rtm_flags |= RTNH_F_UNRESOLVED;
2253 		return -ENOENT;
2254 	}
2255 
2256 	if (MIF_EXISTS(mrt, c->mf6c_parent) &&
2257 	    nla_put_u32(skb, RTA_IIF, mrt->vif6_table[c->mf6c_parent].dev->ifindex) < 0)
2258 		return -EMSGSIZE;
2259 	mp_attr = nla_nest_start(skb, RTA_MULTIPATH);
2260 	if (!mp_attr)
2261 		return -EMSGSIZE;
2262 
2263 	for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
2264 		if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
2265 			nhp = nla_reserve_nohdr(skb, sizeof(*nhp));
2266 			if (!nhp) {
2267 				nla_nest_cancel(skb, mp_attr);
2268 				return -EMSGSIZE;
2269 			}
2270 
2271 			nhp->rtnh_flags = 0;
2272 			nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
2273 			nhp->rtnh_ifindex = mrt->vif6_table[ct].dev->ifindex;
2274 			nhp->rtnh_len = sizeof(*nhp);
2275 		}
2276 	}
2277 
2278 	nla_nest_end(skb, mp_attr);
2279 
2280 	lastuse = READ_ONCE(c->mfc_un.res.lastuse);
2281 	lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0;
2282 
2283 	mfcs.mfcs_packets = c->mfc_un.res.pkt;
2284 	mfcs.mfcs_bytes = c->mfc_un.res.bytes;
2285 	mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
2286 	if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) ||
2287 	    nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse),
2288 			      RTA_PAD))
2289 		return -EMSGSIZE;
2290 
2291 	rtm->rtm_type = RTN_MULTICAST;
2292 	return 1;
2293 }
2294 
2295 int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
2296 		    u32 portid)
2297 {
2298 	int err;
2299 	struct mr6_table *mrt;
2300 	struct mfc6_cache *cache;
2301 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
2302 
2303 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
2304 	if (!mrt)
2305 		return -ENOENT;
2306 
2307 	read_lock(&mrt_lock);
2308 	cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
2309 	if (!cache && skb->dev) {
2310 		int vif = ip6mr_find_vif(mrt, skb->dev);
2311 
2312 		if (vif >= 0)
2313 			cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr,
2314 						     vif);
2315 	}
2316 
2317 	if (!cache) {
2318 		struct sk_buff *skb2;
2319 		struct ipv6hdr *iph;
2320 		struct net_device *dev;
2321 		int vif;
2322 
2323 		dev = skb->dev;
2324 		if (!dev || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2325 			read_unlock(&mrt_lock);
2326 			return -ENODEV;
2327 		}
2328 
2329 		/* really correct? */
2330 		skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
2331 		if (!skb2) {
2332 			read_unlock(&mrt_lock);
2333 			return -ENOMEM;
2334 		}
2335 
2336 		NETLINK_CB(skb2).portid = portid;
2337 		skb_reset_transport_header(skb2);
2338 
2339 		skb_put(skb2, sizeof(struct ipv6hdr));
2340 		skb_reset_network_header(skb2);
2341 
2342 		iph = ipv6_hdr(skb2);
2343 		iph->version = 0;
2344 		iph->priority = 0;
2345 		iph->flow_lbl[0] = 0;
2346 		iph->flow_lbl[1] = 0;
2347 		iph->flow_lbl[2] = 0;
2348 		iph->payload_len = 0;
2349 		iph->nexthdr = IPPROTO_NONE;
2350 		iph->hop_limit = 0;
2351 		iph->saddr = rt->rt6i_src.addr;
2352 		iph->daddr = rt->rt6i_dst.addr;
2353 
2354 		err = ip6mr_cache_unresolved(mrt, vif, skb2);
2355 		read_unlock(&mrt_lock);
2356 
2357 		return err;
2358 	}
2359 
2360 	if (rtm->rtm_flags & RTM_F_NOTIFY)
2361 		cache->mfc_flags |= MFC_NOTIFY;
2362 
2363 	err = __ip6mr_fill_mroute(mrt, skb, cache, rtm);
2364 	read_unlock(&mrt_lock);
2365 	return err;
2366 }
2367 
2368 static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2369 			     u32 portid, u32 seq, struct mfc6_cache *c, int cmd,
2370 			     int flags)
2371 {
2372 	struct nlmsghdr *nlh;
2373 	struct rtmsg *rtm;
2374 	int err;
2375 
2376 	nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags);
2377 	if (!nlh)
2378 		return -EMSGSIZE;
2379 
2380 	rtm = nlmsg_data(nlh);
2381 	rtm->rtm_family   = RTNL_FAMILY_IP6MR;
2382 	rtm->rtm_dst_len  = 128;
2383 	rtm->rtm_src_len  = 128;
2384 	rtm->rtm_tos      = 0;
2385 	rtm->rtm_table    = mrt->id;
2386 	if (nla_put_u32(skb, RTA_TABLE, mrt->id))
2387 		goto nla_put_failure;
2388 	rtm->rtm_type = RTN_MULTICAST;
2389 	rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
2390 	if (c->mfc_flags & MFC_STATIC)
2391 		rtm->rtm_protocol = RTPROT_STATIC;
2392 	else
2393 		rtm->rtm_protocol = RTPROT_MROUTED;
2394 	rtm->rtm_flags    = 0;
2395 
2396 	if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) ||
2397 	    nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp))
2398 		goto nla_put_failure;
2399 	err = __ip6mr_fill_mroute(mrt, skb, c, rtm);
2400 	/* do not break the dump if cache is unresolved */
2401 	if (err < 0 && err != -ENOENT)
2402 		goto nla_put_failure;
2403 
2404 	nlmsg_end(skb, nlh);
2405 	return 0;
2406 
2407 nla_put_failure:
2408 	nlmsg_cancel(skb, nlh);
2409 	return -EMSGSIZE;
2410 }
2411 
2412 static int mr6_msgsize(bool unresolved, int maxvif)
2413 {
2414 	size_t len =
2415 		NLMSG_ALIGN(sizeof(struct rtmsg))
2416 		+ nla_total_size(4)	/* RTA_TABLE */
2417 		+ nla_total_size(sizeof(struct in6_addr))	/* RTA_SRC */
2418 		+ nla_total_size(sizeof(struct in6_addr))	/* RTA_DST */
2419 		;
2420 
2421 	if (!unresolved)
2422 		len = len
2423 		      + nla_total_size(4)	/* RTA_IIF */
2424 		      + nla_total_size(0)	/* RTA_MULTIPATH */
2425 		      + maxvif * NLA_ALIGN(sizeof(struct rtnexthop))
2426 						/* RTA_MFC_STATS */
2427 		      + nla_total_size_64bit(sizeof(struct rta_mfc_stats))
2428 		;
2429 
2430 	return len;
2431 }
2432 
2433 static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
2434 			      int cmd)
2435 {
2436 	struct net *net = read_pnet(&mrt->net);
2437 	struct sk_buff *skb;
2438 	int err = -ENOBUFS;
2439 
2440 	skb = nlmsg_new(mr6_msgsize(mfc->mf6c_parent >= MAXMIFS, mrt->maxvif),
2441 			GFP_ATOMIC);
2442 	if (!skb)
2443 		goto errout;
2444 
2445 	err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0);
2446 	if (err < 0)
2447 		goto errout;
2448 
2449 	rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC);
2450 	return;
2451 
2452 errout:
2453 	kfree_skb(skb);
2454 	if (err < 0)
2455 		rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
2456 }
2457 
2458 static size_t mrt6msg_netlink_msgsize(size_t payloadlen)
2459 {
2460 	size_t len =
2461 		NLMSG_ALIGN(sizeof(struct rtgenmsg))
2462 		+ nla_total_size(1)	/* IP6MRA_CREPORT_MSGTYPE */
2463 		+ nla_total_size(4)	/* IP6MRA_CREPORT_MIF_ID */
2464 					/* IP6MRA_CREPORT_SRC_ADDR */
2465 		+ nla_total_size(sizeof(struct in6_addr))
2466 					/* IP6MRA_CREPORT_DST_ADDR */
2467 		+ nla_total_size(sizeof(struct in6_addr))
2468 					/* IP6MRA_CREPORT_PKT */
2469 		+ nla_total_size(payloadlen)
2470 		;
2471 
2472 	return len;
2473 }
2474 
2475 static void mrt6msg_netlink_event(struct mr6_table *mrt, struct sk_buff *pkt)
2476 {
2477 	struct net *net = read_pnet(&mrt->net);
2478 	struct nlmsghdr *nlh;
2479 	struct rtgenmsg *rtgenm;
2480 	struct mrt6msg *msg;
2481 	struct sk_buff *skb;
2482 	struct nlattr *nla;
2483 	int payloadlen;
2484 
2485 	payloadlen = pkt->len - sizeof(struct mrt6msg);
2486 	msg = (struct mrt6msg *)skb_transport_header(pkt);
2487 
2488 	skb = nlmsg_new(mrt6msg_netlink_msgsize(payloadlen), GFP_ATOMIC);
2489 	if (!skb)
2490 		goto errout;
2491 
2492 	nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT,
2493 			sizeof(struct rtgenmsg), 0);
2494 	if (!nlh)
2495 		goto errout;
2496 	rtgenm = nlmsg_data(nlh);
2497 	rtgenm->rtgen_family = RTNL_FAMILY_IP6MR;
2498 	if (nla_put_u8(skb, IP6MRA_CREPORT_MSGTYPE, msg->im6_msgtype) ||
2499 	    nla_put_u32(skb, IP6MRA_CREPORT_MIF_ID, msg->im6_mif) ||
2500 	    nla_put_in6_addr(skb, IP6MRA_CREPORT_SRC_ADDR,
2501 			     &msg->im6_src) ||
2502 	    nla_put_in6_addr(skb, IP6MRA_CREPORT_DST_ADDR,
2503 			     &msg->im6_dst))
2504 		goto nla_put_failure;
2505 
2506 	nla = nla_reserve(skb, IP6MRA_CREPORT_PKT, payloadlen);
2507 	if (!nla || skb_copy_bits(pkt, sizeof(struct mrt6msg),
2508 				  nla_data(nla), payloadlen))
2509 		goto nla_put_failure;
2510 
2511 	nlmsg_end(skb, nlh);
2512 
2513 	rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE_R, NULL, GFP_ATOMIC);
2514 	return;
2515 
2516 nla_put_failure:
2517 	nlmsg_cancel(skb, nlh);
2518 errout:
2519 	kfree_skb(skb);
2520 	rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE_R, -ENOBUFS);
2521 }
2522 
2523 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2524 {
2525 	struct net *net = sock_net(skb->sk);
2526 	struct mr6_table *mrt;
2527 	struct mfc6_cache *mfc;
2528 	unsigned int t = 0, s_t;
2529 	unsigned int h = 0, s_h;
2530 	unsigned int e = 0, s_e;
2531 
2532 	s_t = cb->args[0];
2533 	s_h = cb->args[1];
2534 	s_e = cb->args[2];
2535 
2536 	read_lock(&mrt_lock);
2537 	ip6mr_for_each_table(mrt, net) {
2538 		if (t < s_t)
2539 			goto next_table;
2540 		if (t > s_t)
2541 			s_h = 0;
2542 		for (h = s_h; h < MFC6_LINES; h++) {
2543 			list_for_each_entry(mfc, &mrt->mfc6_cache_array[h], list) {
2544 				if (e < s_e)
2545 					goto next_entry;
2546 				if (ip6mr_fill_mroute(mrt, skb,
2547 						      NETLINK_CB(cb->skb).portid,
2548 						      cb->nlh->nlmsg_seq,
2549 						      mfc, RTM_NEWROUTE,
2550 						      NLM_F_MULTI) < 0)
2551 					goto done;
2552 next_entry:
2553 				e++;
2554 			}
2555 			e = s_e = 0;
2556 		}
2557 		spin_lock_bh(&mfc_unres_lock);
2558 		list_for_each_entry(mfc, &mrt->mfc6_unres_queue, list) {
2559 			if (e < s_e)
2560 				goto next_entry2;
2561 			if (ip6mr_fill_mroute(mrt, skb,
2562 					      NETLINK_CB(cb->skb).portid,
2563 					      cb->nlh->nlmsg_seq,
2564 					      mfc, RTM_NEWROUTE,
2565 					      NLM_F_MULTI) < 0) {
2566 				spin_unlock_bh(&mfc_unres_lock);
2567 				goto done;
2568 			}
2569 next_entry2:
2570 			e++;
2571 		}
2572 		spin_unlock_bh(&mfc_unres_lock);
2573 		e = s_e = 0;
2574 		s_h = 0;
2575 next_table:
2576 		t++;
2577 	}
2578 done:
2579 	read_unlock(&mrt_lock);
2580 
2581 	cb->args[2] = e;
2582 	cb->args[1] = h;
2583 	cb->args[0] = t;
2584 
2585 	return skb->len;
2586 }
2587