xref: /openbmc/linux/net/ipv6/ip6mr.c (revision 4f139972b489f8bc2c821aa25ac65018d92af3f7)
1 /*
2  *	Linux IPv6 multicast routing support for BSD pim6sd
3  *	Based on net/ipv4/ipmr.c.
4  *
5  *	(c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
6  *		LSIIT Laboratory, Strasbourg, France
7  *	(c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
8  *		6WIND, Paris, France
9  *	Copyright (C)2007,2008 USAGI/WIDE Project
10  *		YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
11  *
12  *	This program is free software; you can redistribute it and/or
13  *	modify it under the terms of the GNU General Public License
14  *	as published by the Free Software Foundation; either version
15  *	2 of the License, or (at your option) any later version.
16  *
17  */
18 
19 #include <linux/uaccess.h>
20 #include <linux/types.h>
21 #include <linux/sched.h>
22 #include <linux/errno.h>
23 #include <linux/timer.h>
24 #include <linux/mm.h>
25 #include <linux/kernel.h>
26 #include <linux/fcntl.h>
27 #include <linux/stat.h>
28 #include <linux/socket.h>
29 #include <linux/inet.h>
30 #include <linux/netdevice.h>
31 #include <linux/inetdevice.h>
32 #include <linux/proc_fs.h>
33 #include <linux/seq_file.h>
34 #include <linux/init.h>
35 #include <linux/slab.h>
36 #include <linux/compat.h>
37 #include <net/protocol.h>
38 #include <linux/skbuff.h>
39 #include <net/sock.h>
40 #include <net/raw.h>
41 #include <linux/notifier.h>
42 #include <linux/if_arp.h>
43 #include <net/checksum.h>
44 #include <net/netlink.h>
45 #include <net/fib_rules.h>
46 
47 #include <net/ipv6.h>
48 #include <net/ip6_route.h>
49 #include <linux/mroute6.h>
50 #include <linux/pim.h>
51 #include <net/addrconf.h>
52 #include <linux/netfilter_ipv6.h>
53 #include <linux/export.h>
54 #include <net/ip6_checksum.h>
55 #include <linux/netconf.h>
56 
57 struct mr6_table {
58 	struct list_head	list;
59 	possible_net_t		net;
60 	u32			id;
61 	struct sock		*mroute6_sk;
62 	struct timer_list	ipmr_expire_timer;
63 	struct list_head	mfc6_unres_queue;
64 	struct list_head	mfc6_cache_array[MFC6_LINES];
65 	struct mif_device	vif6_table[MAXMIFS];
66 	int			maxvif;
67 	atomic_t		cache_resolve_queue_len;
68 	bool			mroute_do_assert;
69 	bool			mroute_do_pim;
70 #ifdef CONFIG_IPV6_PIMSM_V2
71 	int			mroute_reg_vif_num;
72 #endif
73 };
74 
75 struct ip6mr_rule {
76 	struct fib_rule		common;
77 };
78 
79 struct ip6mr_result {
80 	struct mr6_table	*mrt;
81 };
82 
83 /* Big lock, protecting vif table, mrt cache and mroute socket state.
84    Note that the changes are semaphored via rtnl_lock.
85  */
86 
87 static DEFINE_RWLOCK(mrt_lock);
88 
89 /*
90  *	Multicast router control variables
91  */
92 
93 #define MIF_EXISTS(_mrt, _idx) ((_mrt)->vif6_table[_idx].dev != NULL)
94 
95 /* Special spinlock for queue of unresolved entries */
96 static DEFINE_SPINLOCK(mfc_unres_lock);
97 
98 /* We return to original Alan's scheme. Hash table of resolved
99    entries is changed only in process context and protected
100    with weak lock mrt_lock. Queue of unresolved entries is protected
101    with strong spinlock mfc_unres_lock.
102 
103    In this case data path is free of exclusive locks at all.
104  */
105 
106 static struct kmem_cache *mrt_cachep __read_mostly;
107 
108 static struct mr6_table *ip6mr_new_table(struct net *net, u32 id);
109 static void ip6mr_free_table(struct mr6_table *mrt);
110 
111 static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
112 			   struct sk_buff *skb, struct mfc6_cache *cache);
113 static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
114 			      mifi_t mifi, int assert);
115 static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
116 			       struct mfc6_cache *c, struct rtmsg *rtm);
117 static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
118 			      int cmd);
119 static int ip6mr_rtm_dumproute(struct sk_buff *skb,
120 			       struct netlink_callback *cb);
121 static void mroute_clean_tables(struct mr6_table *mrt, bool all);
122 static void ipmr_expire_process(unsigned long arg);
123 
124 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
125 #define ip6mr_for_each_table(mrt, net) \
126 	list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
127 
128 static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
129 {
130 	struct mr6_table *mrt;
131 
132 	ip6mr_for_each_table(mrt, net) {
133 		if (mrt->id == id)
134 			return mrt;
135 	}
136 	return NULL;
137 }
138 
139 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
140 			    struct mr6_table **mrt)
141 {
142 	int err;
143 	struct ip6mr_result res;
144 	struct fib_lookup_arg arg = {
145 		.result = &res,
146 		.flags = FIB_LOOKUP_NOREF,
147 	};
148 
149 	err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
150 			       flowi6_to_flowi(flp6), 0, &arg);
151 	if (err < 0)
152 		return err;
153 	*mrt = res.mrt;
154 	return 0;
155 }
156 
157 static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
158 			     int flags, struct fib_lookup_arg *arg)
159 {
160 	struct ip6mr_result *res = arg->result;
161 	struct mr6_table *mrt;
162 
163 	switch (rule->action) {
164 	case FR_ACT_TO_TBL:
165 		break;
166 	case FR_ACT_UNREACHABLE:
167 		return -ENETUNREACH;
168 	case FR_ACT_PROHIBIT:
169 		return -EACCES;
170 	case FR_ACT_BLACKHOLE:
171 	default:
172 		return -EINVAL;
173 	}
174 
175 	mrt = ip6mr_get_table(rule->fr_net, rule->table);
176 	if (!mrt)
177 		return -EAGAIN;
178 	res->mrt = mrt;
179 	return 0;
180 }
181 
182 static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
183 {
184 	return 1;
185 }
186 
187 static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = {
188 	FRA_GENERIC_POLICY,
189 };
190 
191 static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
192 				struct fib_rule_hdr *frh, struct nlattr **tb)
193 {
194 	return 0;
195 }
196 
197 static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
198 			      struct nlattr **tb)
199 {
200 	return 1;
201 }
202 
203 static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
204 			   struct fib_rule_hdr *frh)
205 {
206 	frh->dst_len = 0;
207 	frh->src_len = 0;
208 	frh->tos     = 0;
209 	return 0;
210 }
211 
212 static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
213 	.family		= RTNL_FAMILY_IP6MR,
214 	.rule_size	= sizeof(struct ip6mr_rule),
215 	.addr_size	= sizeof(struct in6_addr),
216 	.action		= ip6mr_rule_action,
217 	.match		= ip6mr_rule_match,
218 	.configure	= ip6mr_rule_configure,
219 	.compare	= ip6mr_rule_compare,
220 	.fill		= ip6mr_rule_fill,
221 	.nlgroup	= RTNLGRP_IPV6_RULE,
222 	.policy		= ip6mr_rule_policy,
223 	.owner		= THIS_MODULE,
224 };
225 
226 static int __net_init ip6mr_rules_init(struct net *net)
227 {
228 	struct fib_rules_ops *ops;
229 	struct mr6_table *mrt;
230 	int err;
231 
232 	ops = fib_rules_register(&ip6mr_rules_ops_template, net);
233 	if (IS_ERR(ops))
234 		return PTR_ERR(ops);
235 
236 	INIT_LIST_HEAD(&net->ipv6.mr6_tables);
237 
238 	mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
239 	if (!mrt) {
240 		err = -ENOMEM;
241 		goto err1;
242 	}
243 
244 	err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
245 	if (err < 0)
246 		goto err2;
247 
248 	net->ipv6.mr6_rules_ops = ops;
249 	return 0;
250 
251 err2:
252 	ip6mr_free_table(mrt);
253 err1:
254 	fib_rules_unregister(ops);
255 	return err;
256 }
257 
258 static void __net_exit ip6mr_rules_exit(struct net *net)
259 {
260 	struct mr6_table *mrt, *next;
261 
262 	rtnl_lock();
263 	list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
264 		list_del(&mrt->list);
265 		ip6mr_free_table(mrt);
266 	}
267 	fib_rules_unregister(net->ipv6.mr6_rules_ops);
268 	rtnl_unlock();
269 }
270 #else
271 #define ip6mr_for_each_table(mrt, net) \
272 	for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
273 
274 static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
275 {
276 	return net->ipv6.mrt6;
277 }
278 
279 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
280 			    struct mr6_table **mrt)
281 {
282 	*mrt = net->ipv6.mrt6;
283 	return 0;
284 }
285 
286 static int __net_init ip6mr_rules_init(struct net *net)
287 {
288 	net->ipv6.mrt6 = ip6mr_new_table(net, RT6_TABLE_DFLT);
289 	return net->ipv6.mrt6 ? 0 : -ENOMEM;
290 }
291 
292 static void __net_exit ip6mr_rules_exit(struct net *net)
293 {
294 	rtnl_lock();
295 	ip6mr_free_table(net->ipv6.mrt6);
296 	net->ipv6.mrt6 = NULL;
297 	rtnl_unlock();
298 }
299 #endif
300 
301 static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
302 {
303 	struct mr6_table *mrt;
304 	unsigned int i;
305 
306 	mrt = ip6mr_get_table(net, id);
307 	if (mrt)
308 		return mrt;
309 
310 	mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
311 	if (!mrt)
312 		return NULL;
313 	mrt->id = id;
314 	write_pnet(&mrt->net, net);
315 
316 	/* Forwarding cache */
317 	for (i = 0; i < MFC6_LINES; i++)
318 		INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]);
319 
320 	INIT_LIST_HEAD(&mrt->mfc6_unres_queue);
321 
322 	setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
323 		    (unsigned long)mrt);
324 
325 #ifdef CONFIG_IPV6_PIMSM_V2
326 	mrt->mroute_reg_vif_num = -1;
327 #endif
328 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
329 	list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
330 #endif
331 	return mrt;
332 }
333 
334 static void ip6mr_free_table(struct mr6_table *mrt)
335 {
336 	del_timer_sync(&mrt->ipmr_expire_timer);
337 	mroute_clean_tables(mrt, true);
338 	kfree(mrt);
339 }
340 
341 #ifdef CONFIG_PROC_FS
342 
343 struct ipmr_mfc_iter {
344 	struct seq_net_private p;
345 	struct mr6_table *mrt;
346 	struct list_head *cache;
347 	int ct;
348 };
349 
350 
351 static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
352 					   struct ipmr_mfc_iter *it, loff_t pos)
353 {
354 	struct mr6_table *mrt = it->mrt;
355 	struct mfc6_cache *mfc;
356 
357 	read_lock(&mrt_lock);
358 	for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) {
359 		it->cache = &mrt->mfc6_cache_array[it->ct];
360 		list_for_each_entry(mfc, it->cache, list)
361 			if (pos-- == 0)
362 				return mfc;
363 	}
364 	read_unlock(&mrt_lock);
365 
366 	spin_lock_bh(&mfc_unres_lock);
367 	it->cache = &mrt->mfc6_unres_queue;
368 	list_for_each_entry(mfc, it->cache, list)
369 		if (pos-- == 0)
370 			return mfc;
371 	spin_unlock_bh(&mfc_unres_lock);
372 
373 	it->cache = NULL;
374 	return NULL;
375 }
376 
377 /*
378  *	The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
379  */
380 
381 struct ipmr_vif_iter {
382 	struct seq_net_private p;
383 	struct mr6_table *mrt;
384 	int ct;
385 };
386 
387 static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
388 					    struct ipmr_vif_iter *iter,
389 					    loff_t pos)
390 {
391 	struct mr6_table *mrt = iter->mrt;
392 
393 	for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
394 		if (!MIF_EXISTS(mrt, iter->ct))
395 			continue;
396 		if (pos-- == 0)
397 			return &mrt->vif6_table[iter->ct];
398 	}
399 	return NULL;
400 }
401 
402 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
403 	__acquires(mrt_lock)
404 {
405 	struct ipmr_vif_iter *iter = seq->private;
406 	struct net *net = seq_file_net(seq);
407 	struct mr6_table *mrt;
408 
409 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
410 	if (!mrt)
411 		return ERR_PTR(-ENOENT);
412 
413 	iter->mrt = mrt;
414 
415 	read_lock(&mrt_lock);
416 	return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1)
417 		: SEQ_START_TOKEN;
418 }
419 
420 static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
421 {
422 	struct ipmr_vif_iter *iter = seq->private;
423 	struct net *net = seq_file_net(seq);
424 	struct mr6_table *mrt = iter->mrt;
425 
426 	++*pos;
427 	if (v == SEQ_START_TOKEN)
428 		return ip6mr_vif_seq_idx(net, iter, 0);
429 
430 	while (++iter->ct < mrt->maxvif) {
431 		if (!MIF_EXISTS(mrt, iter->ct))
432 			continue;
433 		return &mrt->vif6_table[iter->ct];
434 	}
435 	return NULL;
436 }
437 
438 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
439 	__releases(mrt_lock)
440 {
441 	read_unlock(&mrt_lock);
442 }
443 
444 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
445 {
446 	struct ipmr_vif_iter *iter = seq->private;
447 	struct mr6_table *mrt = iter->mrt;
448 
449 	if (v == SEQ_START_TOKEN) {
450 		seq_puts(seq,
451 			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
452 	} else {
453 		const struct mif_device *vif = v;
454 		const char *name = vif->dev ? vif->dev->name : "none";
455 
456 		seq_printf(seq,
457 			   "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
458 			   vif - mrt->vif6_table,
459 			   name, vif->bytes_in, vif->pkt_in,
460 			   vif->bytes_out, vif->pkt_out,
461 			   vif->flags);
462 	}
463 	return 0;
464 }
465 
466 static const struct seq_operations ip6mr_vif_seq_ops = {
467 	.start = ip6mr_vif_seq_start,
468 	.next  = ip6mr_vif_seq_next,
469 	.stop  = ip6mr_vif_seq_stop,
470 	.show  = ip6mr_vif_seq_show,
471 };
472 
473 static int ip6mr_vif_open(struct inode *inode, struct file *file)
474 {
475 	return seq_open_net(inode, file, &ip6mr_vif_seq_ops,
476 			    sizeof(struct ipmr_vif_iter));
477 }
478 
479 static const struct file_operations ip6mr_vif_fops = {
480 	.owner	 = THIS_MODULE,
481 	.open    = ip6mr_vif_open,
482 	.read    = seq_read,
483 	.llseek  = seq_lseek,
484 	.release = seq_release_net,
485 };
486 
487 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
488 {
489 	struct ipmr_mfc_iter *it = seq->private;
490 	struct net *net = seq_file_net(seq);
491 	struct mr6_table *mrt;
492 
493 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
494 	if (!mrt)
495 		return ERR_PTR(-ENOENT);
496 
497 	it->mrt = mrt;
498 	return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
499 		: SEQ_START_TOKEN;
500 }
501 
502 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
503 {
504 	struct mfc6_cache *mfc = v;
505 	struct ipmr_mfc_iter *it = seq->private;
506 	struct net *net = seq_file_net(seq);
507 	struct mr6_table *mrt = it->mrt;
508 
509 	++*pos;
510 
511 	if (v == SEQ_START_TOKEN)
512 		return ipmr_mfc_seq_idx(net, seq->private, 0);
513 
514 	if (mfc->list.next != it->cache)
515 		return list_entry(mfc->list.next, struct mfc6_cache, list);
516 
517 	if (it->cache == &mrt->mfc6_unres_queue)
518 		goto end_of_list;
519 
520 	BUG_ON(it->cache != &mrt->mfc6_cache_array[it->ct]);
521 
522 	while (++it->ct < MFC6_LINES) {
523 		it->cache = &mrt->mfc6_cache_array[it->ct];
524 		if (list_empty(it->cache))
525 			continue;
526 		return list_first_entry(it->cache, struct mfc6_cache, list);
527 	}
528 
529 	/* exhausted cache_array, show unresolved */
530 	read_unlock(&mrt_lock);
531 	it->cache = &mrt->mfc6_unres_queue;
532 	it->ct = 0;
533 
534 	spin_lock_bh(&mfc_unres_lock);
535 	if (!list_empty(it->cache))
536 		return list_first_entry(it->cache, struct mfc6_cache, list);
537 
538  end_of_list:
539 	spin_unlock_bh(&mfc_unres_lock);
540 	it->cache = NULL;
541 
542 	return NULL;
543 }
544 
545 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
546 {
547 	struct ipmr_mfc_iter *it = seq->private;
548 	struct mr6_table *mrt = it->mrt;
549 
550 	if (it->cache == &mrt->mfc6_unres_queue)
551 		spin_unlock_bh(&mfc_unres_lock);
552 	else if (it->cache == &mrt->mfc6_cache_array[it->ct])
553 		read_unlock(&mrt_lock);
554 }
555 
556 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
557 {
558 	int n;
559 
560 	if (v == SEQ_START_TOKEN) {
561 		seq_puts(seq,
562 			 "Group                            "
563 			 "Origin                           "
564 			 "Iif      Pkts  Bytes     Wrong  Oifs\n");
565 	} else {
566 		const struct mfc6_cache *mfc = v;
567 		const struct ipmr_mfc_iter *it = seq->private;
568 		struct mr6_table *mrt = it->mrt;
569 
570 		seq_printf(seq, "%pI6 %pI6 %-3hd",
571 			   &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
572 			   mfc->mf6c_parent);
573 
574 		if (it->cache != &mrt->mfc6_unres_queue) {
575 			seq_printf(seq, " %8lu %8lu %8lu",
576 				   mfc->mfc_un.res.pkt,
577 				   mfc->mfc_un.res.bytes,
578 				   mfc->mfc_un.res.wrong_if);
579 			for (n = mfc->mfc_un.res.minvif;
580 			     n < mfc->mfc_un.res.maxvif; n++) {
581 				if (MIF_EXISTS(mrt, n) &&
582 				    mfc->mfc_un.res.ttls[n] < 255)
583 					seq_printf(seq,
584 						   " %2d:%-3d",
585 						   n, mfc->mfc_un.res.ttls[n]);
586 			}
587 		} else {
588 			/* unresolved mfc_caches don't contain
589 			 * pkt, bytes and wrong_if values
590 			 */
591 			seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
592 		}
593 		seq_putc(seq, '\n');
594 	}
595 	return 0;
596 }
597 
598 static const struct seq_operations ipmr_mfc_seq_ops = {
599 	.start = ipmr_mfc_seq_start,
600 	.next  = ipmr_mfc_seq_next,
601 	.stop  = ipmr_mfc_seq_stop,
602 	.show  = ipmr_mfc_seq_show,
603 };
604 
605 static int ipmr_mfc_open(struct inode *inode, struct file *file)
606 {
607 	return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
608 			    sizeof(struct ipmr_mfc_iter));
609 }
610 
611 static const struct file_operations ip6mr_mfc_fops = {
612 	.owner	 = THIS_MODULE,
613 	.open    = ipmr_mfc_open,
614 	.read    = seq_read,
615 	.llseek  = seq_lseek,
616 	.release = seq_release_net,
617 };
618 #endif
619 
620 #ifdef CONFIG_IPV6_PIMSM_V2
621 
622 static int pim6_rcv(struct sk_buff *skb)
623 {
624 	struct pimreghdr *pim;
625 	struct ipv6hdr   *encap;
626 	struct net_device  *reg_dev = NULL;
627 	struct net *net = dev_net(skb->dev);
628 	struct mr6_table *mrt;
629 	struct flowi6 fl6 = {
630 		.flowi6_iif	= skb->dev->ifindex,
631 		.flowi6_mark	= skb->mark,
632 	};
633 	int reg_vif_num;
634 
635 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
636 		goto drop;
637 
638 	pim = (struct pimreghdr *)skb_transport_header(skb);
639 	if (pim->type != ((PIM_VERSION << 4) | PIM_TYPE_REGISTER) ||
640 	    (pim->flags & PIM_NULL_REGISTER) ||
641 	    (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
642 			     sizeof(*pim), IPPROTO_PIM,
643 			     csum_partial((void *)pim, sizeof(*pim), 0)) &&
644 	     csum_fold(skb_checksum(skb, 0, skb->len, 0))))
645 		goto drop;
646 
647 	/* check if the inner packet is destined to mcast group */
648 	encap = (struct ipv6hdr *)(skb_transport_header(skb) +
649 				   sizeof(*pim));
650 
651 	if (!ipv6_addr_is_multicast(&encap->daddr) ||
652 	    encap->payload_len == 0 ||
653 	    ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
654 		goto drop;
655 
656 	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
657 		goto drop;
658 	reg_vif_num = mrt->mroute_reg_vif_num;
659 
660 	read_lock(&mrt_lock);
661 	if (reg_vif_num >= 0)
662 		reg_dev = mrt->vif6_table[reg_vif_num].dev;
663 	if (reg_dev)
664 		dev_hold(reg_dev);
665 	read_unlock(&mrt_lock);
666 
667 	if (!reg_dev)
668 		goto drop;
669 
670 	skb->mac_header = skb->network_header;
671 	skb_pull(skb, (u8 *)encap - skb->data);
672 	skb_reset_network_header(skb);
673 	skb->protocol = htons(ETH_P_IPV6);
674 	skb->ip_summed = CHECKSUM_NONE;
675 
676 	skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
677 
678 	netif_rx(skb);
679 
680 	dev_put(reg_dev);
681 	return 0;
682  drop:
683 	kfree_skb(skb);
684 	return 0;
685 }
686 
687 static const struct inet6_protocol pim6_protocol = {
688 	.handler	=	pim6_rcv,
689 };
690 
691 /* Service routines creating virtual interfaces: PIMREG */
692 
693 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
694 				      struct net_device *dev)
695 {
696 	struct net *net = dev_net(dev);
697 	struct mr6_table *mrt;
698 	struct flowi6 fl6 = {
699 		.flowi6_oif	= dev->ifindex,
700 		.flowi6_iif	= skb->skb_iif ? : LOOPBACK_IFINDEX,
701 		.flowi6_mark	= skb->mark,
702 	};
703 	int err;
704 
705 	err = ip6mr_fib_lookup(net, &fl6, &mrt);
706 	if (err < 0) {
707 		kfree_skb(skb);
708 		return err;
709 	}
710 
711 	read_lock(&mrt_lock);
712 	dev->stats.tx_bytes += skb->len;
713 	dev->stats.tx_packets++;
714 	ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
715 	read_unlock(&mrt_lock);
716 	kfree_skb(skb);
717 	return NETDEV_TX_OK;
718 }
719 
720 static int reg_vif_get_iflink(const struct net_device *dev)
721 {
722 	return 0;
723 }
724 
725 static const struct net_device_ops reg_vif_netdev_ops = {
726 	.ndo_start_xmit	= reg_vif_xmit,
727 	.ndo_get_iflink = reg_vif_get_iflink,
728 };
729 
730 static void reg_vif_setup(struct net_device *dev)
731 {
732 	dev->type		= ARPHRD_PIMREG;
733 	dev->mtu		= 1500 - sizeof(struct ipv6hdr) - 8;
734 	dev->flags		= IFF_NOARP;
735 	dev->netdev_ops		= &reg_vif_netdev_ops;
736 	dev->destructor		= free_netdev;
737 	dev->features		|= NETIF_F_NETNS_LOCAL;
738 }
739 
740 static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt)
741 {
742 	struct net_device *dev;
743 	char name[IFNAMSIZ];
744 
745 	if (mrt->id == RT6_TABLE_DFLT)
746 		sprintf(name, "pim6reg");
747 	else
748 		sprintf(name, "pim6reg%u", mrt->id);
749 
750 	dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup);
751 	if (!dev)
752 		return NULL;
753 
754 	dev_net_set(dev, net);
755 
756 	if (register_netdevice(dev)) {
757 		free_netdev(dev);
758 		return NULL;
759 	}
760 
761 	if (dev_open(dev))
762 		goto failure;
763 
764 	dev_hold(dev);
765 	return dev;
766 
767 failure:
768 	unregister_netdevice(dev);
769 	return NULL;
770 }
771 #endif
772 
773 /*
774  *	Delete a VIF entry
775  */
776 
777 static int mif6_delete(struct mr6_table *mrt, int vifi, struct list_head *head)
778 {
779 	struct mif_device *v;
780 	struct net_device *dev;
781 	struct inet6_dev *in6_dev;
782 
783 	if (vifi < 0 || vifi >= mrt->maxvif)
784 		return -EADDRNOTAVAIL;
785 
786 	v = &mrt->vif6_table[vifi];
787 
788 	write_lock_bh(&mrt_lock);
789 	dev = v->dev;
790 	v->dev = NULL;
791 
792 	if (!dev) {
793 		write_unlock_bh(&mrt_lock);
794 		return -EADDRNOTAVAIL;
795 	}
796 
797 #ifdef CONFIG_IPV6_PIMSM_V2
798 	if (vifi == mrt->mroute_reg_vif_num)
799 		mrt->mroute_reg_vif_num = -1;
800 #endif
801 
802 	if (vifi + 1 == mrt->maxvif) {
803 		int tmp;
804 		for (tmp = vifi - 1; tmp >= 0; tmp--) {
805 			if (MIF_EXISTS(mrt, tmp))
806 				break;
807 		}
808 		mrt->maxvif = tmp + 1;
809 	}
810 
811 	write_unlock_bh(&mrt_lock);
812 
813 	dev_set_allmulti(dev, -1);
814 
815 	in6_dev = __in6_dev_get(dev);
816 	if (in6_dev) {
817 		in6_dev->cnf.mc_forwarding--;
818 		inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
819 					     NETCONFA_MC_FORWARDING,
820 					     dev->ifindex, &in6_dev->cnf);
821 	}
822 
823 	if (v->flags & MIFF_REGISTER)
824 		unregister_netdevice_queue(dev, head);
825 
826 	dev_put(dev);
827 	return 0;
828 }
829 
830 static inline void ip6mr_cache_free(struct mfc6_cache *c)
831 {
832 	kmem_cache_free(mrt_cachep, c);
833 }
834 
835 /* Destroy an unresolved cache entry, killing queued skbs
836    and reporting error to netlink readers.
837  */
838 
839 static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
840 {
841 	struct net *net = read_pnet(&mrt->net);
842 	struct sk_buff *skb;
843 
844 	atomic_dec(&mrt->cache_resolve_queue_len);
845 
846 	while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
847 		if (ipv6_hdr(skb)->version == 0) {
848 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
849 			nlh->nlmsg_type = NLMSG_ERROR;
850 			nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
851 			skb_trim(skb, nlh->nlmsg_len);
852 			((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT;
853 			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
854 		} else
855 			kfree_skb(skb);
856 	}
857 
858 	ip6mr_cache_free(c);
859 }
860 
861 
862 /* Timer process for all the unresolved queue. */
863 
864 static void ipmr_do_expire_process(struct mr6_table *mrt)
865 {
866 	unsigned long now = jiffies;
867 	unsigned long expires = 10 * HZ;
868 	struct mfc6_cache *c, *next;
869 
870 	list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
871 		if (time_after(c->mfc_un.unres.expires, now)) {
872 			/* not yet... */
873 			unsigned long interval = c->mfc_un.unres.expires - now;
874 			if (interval < expires)
875 				expires = interval;
876 			continue;
877 		}
878 
879 		list_del(&c->list);
880 		mr6_netlink_event(mrt, c, RTM_DELROUTE);
881 		ip6mr_destroy_unres(mrt, c);
882 	}
883 
884 	if (!list_empty(&mrt->mfc6_unres_queue))
885 		mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
886 }
887 
888 static void ipmr_expire_process(unsigned long arg)
889 {
890 	struct mr6_table *mrt = (struct mr6_table *)arg;
891 
892 	if (!spin_trylock(&mfc_unres_lock)) {
893 		mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
894 		return;
895 	}
896 
897 	if (!list_empty(&mrt->mfc6_unres_queue))
898 		ipmr_do_expire_process(mrt);
899 
900 	spin_unlock(&mfc_unres_lock);
901 }
902 
903 /* Fill oifs list. It is called under write locked mrt_lock. */
904 
905 static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *cache,
906 				    unsigned char *ttls)
907 {
908 	int vifi;
909 
910 	cache->mfc_un.res.minvif = MAXMIFS;
911 	cache->mfc_un.res.maxvif = 0;
912 	memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
913 
914 	for (vifi = 0; vifi < mrt->maxvif; vifi++) {
915 		if (MIF_EXISTS(mrt, vifi) &&
916 		    ttls[vifi] && ttls[vifi] < 255) {
917 			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
918 			if (cache->mfc_un.res.minvif > vifi)
919 				cache->mfc_un.res.minvif = vifi;
920 			if (cache->mfc_un.res.maxvif <= vifi)
921 				cache->mfc_un.res.maxvif = vifi + 1;
922 		}
923 	}
924 	cache->mfc_un.res.lastuse = jiffies;
925 }
926 
927 static int mif6_add(struct net *net, struct mr6_table *mrt,
928 		    struct mif6ctl *vifc, int mrtsock)
929 {
930 	int vifi = vifc->mif6c_mifi;
931 	struct mif_device *v = &mrt->vif6_table[vifi];
932 	struct net_device *dev;
933 	struct inet6_dev *in6_dev;
934 	int err;
935 
936 	/* Is vif busy ? */
937 	if (MIF_EXISTS(mrt, vifi))
938 		return -EADDRINUSE;
939 
940 	switch (vifc->mif6c_flags) {
941 #ifdef CONFIG_IPV6_PIMSM_V2
942 	case MIFF_REGISTER:
943 		/*
944 		 * Special Purpose VIF in PIM
945 		 * All the packets will be sent to the daemon
946 		 */
947 		if (mrt->mroute_reg_vif_num >= 0)
948 			return -EADDRINUSE;
949 		dev = ip6mr_reg_vif(net, mrt);
950 		if (!dev)
951 			return -ENOBUFS;
952 		err = dev_set_allmulti(dev, 1);
953 		if (err) {
954 			unregister_netdevice(dev);
955 			dev_put(dev);
956 			return err;
957 		}
958 		break;
959 #endif
960 	case 0:
961 		dev = dev_get_by_index(net, vifc->mif6c_pifi);
962 		if (!dev)
963 			return -EADDRNOTAVAIL;
964 		err = dev_set_allmulti(dev, 1);
965 		if (err) {
966 			dev_put(dev);
967 			return err;
968 		}
969 		break;
970 	default:
971 		return -EINVAL;
972 	}
973 
974 	in6_dev = __in6_dev_get(dev);
975 	if (in6_dev) {
976 		in6_dev->cnf.mc_forwarding++;
977 		inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
978 					     NETCONFA_MC_FORWARDING,
979 					     dev->ifindex, &in6_dev->cnf);
980 	}
981 
982 	/*
983 	 *	Fill in the VIF structures
984 	 */
985 	v->rate_limit = vifc->vifc_rate_limit;
986 	v->flags = vifc->mif6c_flags;
987 	if (!mrtsock)
988 		v->flags |= VIFF_STATIC;
989 	v->threshold = vifc->vifc_threshold;
990 	v->bytes_in = 0;
991 	v->bytes_out = 0;
992 	v->pkt_in = 0;
993 	v->pkt_out = 0;
994 	v->link = dev->ifindex;
995 	if (v->flags & MIFF_REGISTER)
996 		v->link = dev_get_iflink(dev);
997 
998 	/* And finish update writing critical data */
999 	write_lock_bh(&mrt_lock);
1000 	v->dev = dev;
1001 #ifdef CONFIG_IPV6_PIMSM_V2
1002 	if (v->flags & MIFF_REGISTER)
1003 		mrt->mroute_reg_vif_num = vifi;
1004 #endif
1005 	if (vifi + 1 > mrt->maxvif)
1006 		mrt->maxvif = vifi + 1;
1007 	write_unlock_bh(&mrt_lock);
1008 	return 0;
1009 }
1010 
1011 static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt,
1012 					   const struct in6_addr *origin,
1013 					   const struct in6_addr *mcastgrp)
1014 {
1015 	int line = MFC6_HASH(mcastgrp, origin);
1016 	struct mfc6_cache *c;
1017 
1018 	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
1019 		if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
1020 		    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
1021 			return c;
1022 	}
1023 	return NULL;
1024 }
1025 
1026 /* Look for a (*,*,oif) entry */
1027 static struct mfc6_cache *ip6mr_cache_find_any_parent(struct mr6_table *mrt,
1028 						      mifi_t mifi)
1029 {
1030 	int line = MFC6_HASH(&in6addr_any, &in6addr_any);
1031 	struct mfc6_cache *c;
1032 
1033 	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
1034 		if (ipv6_addr_any(&c->mf6c_origin) &&
1035 		    ipv6_addr_any(&c->mf6c_mcastgrp) &&
1036 		    (c->mfc_un.res.ttls[mifi] < 255))
1037 			return c;
1038 
1039 	return NULL;
1040 }
1041 
1042 /* Look for a (*,G) entry */
1043 static struct mfc6_cache *ip6mr_cache_find_any(struct mr6_table *mrt,
1044 					       struct in6_addr *mcastgrp,
1045 					       mifi_t mifi)
1046 {
1047 	int line = MFC6_HASH(mcastgrp, &in6addr_any);
1048 	struct mfc6_cache *c, *proxy;
1049 
1050 	if (ipv6_addr_any(mcastgrp))
1051 		goto skip;
1052 
1053 	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
1054 		if (ipv6_addr_any(&c->mf6c_origin) &&
1055 		    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp)) {
1056 			if (c->mfc_un.res.ttls[mifi] < 255)
1057 				return c;
1058 
1059 			/* It's ok if the mifi is part of the static tree */
1060 			proxy = ip6mr_cache_find_any_parent(mrt,
1061 							    c->mf6c_parent);
1062 			if (proxy && proxy->mfc_un.res.ttls[mifi] < 255)
1063 				return c;
1064 		}
1065 
1066 skip:
1067 	return ip6mr_cache_find_any_parent(mrt, mifi);
1068 }
1069 
1070 /*
1071  *	Allocate a multicast cache entry
1072  */
1073 static struct mfc6_cache *ip6mr_cache_alloc(void)
1074 {
1075 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
1076 	if (!c)
1077 		return NULL;
1078 	c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
1079 	c->mfc_un.res.minvif = MAXMIFS;
1080 	return c;
1081 }
1082 
1083 static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
1084 {
1085 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
1086 	if (!c)
1087 		return NULL;
1088 	skb_queue_head_init(&c->mfc_un.unres.unresolved);
1089 	c->mfc_un.unres.expires = jiffies + 10 * HZ;
1090 	return c;
1091 }
1092 
1093 /*
1094  *	A cache entry has gone into a resolved state from queued
1095  */
1096 
1097 static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
1098 				struct mfc6_cache *uc, struct mfc6_cache *c)
1099 {
1100 	struct sk_buff *skb;
1101 
1102 	/*
1103 	 *	Play the pending entries through our router
1104 	 */
1105 
1106 	while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
1107 		if (ipv6_hdr(skb)->version == 0) {
1108 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
1109 
1110 			if (__ip6mr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) {
1111 				nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1112 			} else {
1113 				nlh->nlmsg_type = NLMSG_ERROR;
1114 				nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
1115 				skb_trim(skb, nlh->nlmsg_len);
1116 				((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE;
1117 			}
1118 			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
1119 		} else
1120 			ip6_mr_forward(net, mrt, skb, c);
1121 	}
1122 }
1123 
1124 /*
1125  *	Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
1126  *	expects the following bizarre scheme.
1127  *
1128  *	Called under mrt_lock.
1129  */
1130 
1131 static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
1132 			      mifi_t mifi, int assert)
1133 {
1134 	struct sk_buff *skb;
1135 	struct mrt6msg *msg;
1136 	int ret;
1137 
1138 #ifdef CONFIG_IPV6_PIMSM_V2
1139 	if (assert == MRT6MSG_WHOLEPKT)
1140 		skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
1141 						+sizeof(*msg));
1142 	else
1143 #endif
1144 		skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
1145 
1146 	if (!skb)
1147 		return -ENOBUFS;
1148 
1149 	/* I suppose that internal messages
1150 	 * do not require checksums */
1151 
1152 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1153 
1154 #ifdef CONFIG_IPV6_PIMSM_V2
1155 	if (assert == MRT6MSG_WHOLEPKT) {
1156 		/* Ugly, but we have no choice with this interface.
1157 		   Duplicate old header, fix length etc.
1158 		   And all this only to mangle msg->im6_msgtype and
1159 		   to set msg->im6_mbz to "mbz" :-)
1160 		 */
1161 		skb_push(skb, -skb_network_offset(pkt));
1162 
1163 		skb_push(skb, sizeof(*msg));
1164 		skb_reset_transport_header(skb);
1165 		msg = (struct mrt6msg *)skb_transport_header(skb);
1166 		msg->im6_mbz = 0;
1167 		msg->im6_msgtype = MRT6MSG_WHOLEPKT;
1168 		msg->im6_mif = mrt->mroute_reg_vif_num;
1169 		msg->im6_pad = 0;
1170 		msg->im6_src = ipv6_hdr(pkt)->saddr;
1171 		msg->im6_dst = ipv6_hdr(pkt)->daddr;
1172 
1173 		skb->ip_summed = CHECKSUM_UNNECESSARY;
1174 	} else
1175 #endif
1176 	{
1177 	/*
1178 	 *	Copy the IP header
1179 	 */
1180 
1181 	skb_put(skb, sizeof(struct ipv6hdr));
1182 	skb_reset_network_header(skb);
1183 	skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
1184 
1185 	/*
1186 	 *	Add our header
1187 	 */
1188 	skb_put(skb, sizeof(*msg));
1189 	skb_reset_transport_header(skb);
1190 	msg = (struct mrt6msg *)skb_transport_header(skb);
1191 
1192 	msg->im6_mbz = 0;
1193 	msg->im6_msgtype = assert;
1194 	msg->im6_mif = mifi;
1195 	msg->im6_pad = 0;
1196 	msg->im6_src = ipv6_hdr(pkt)->saddr;
1197 	msg->im6_dst = ipv6_hdr(pkt)->daddr;
1198 
1199 	skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1200 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1201 	}
1202 
1203 	if (!mrt->mroute6_sk) {
1204 		kfree_skb(skb);
1205 		return -EINVAL;
1206 	}
1207 
1208 	/*
1209 	 *	Deliver to user space multicast routing algorithms
1210 	 */
1211 	ret = sock_queue_rcv_skb(mrt->mroute6_sk, skb);
1212 	if (ret < 0) {
1213 		net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
1214 		kfree_skb(skb);
1215 	}
1216 
1217 	return ret;
1218 }
1219 
1220 /*
1221  *	Queue a packet for resolution. It gets locked cache entry!
1222  */
1223 
1224 static int
1225 ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
1226 {
1227 	bool found = false;
1228 	int err;
1229 	struct mfc6_cache *c;
1230 
1231 	spin_lock_bh(&mfc_unres_lock);
1232 	list_for_each_entry(c, &mrt->mfc6_unres_queue, list) {
1233 		if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1234 		    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
1235 			found = true;
1236 			break;
1237 		}
1238 	}
1239 
1240 	if (!found) {
1241 		/*
1242 		 *	Create a new entry if allowable
1243 		 */
1244 
1245 		if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
1246 		    (c = ip6mr_cache_alloc_unres()) == NULL) {
1247 			spin_unlock_bh(&mfc_unres_lock);
1248 
1249 			kfree_skb(skb);
1250 			return -ENOBUFS;
1251 		}
1252 
1253 		/*
1254 		 *	Fill in the new cache entry
1255 		 */
1256 		c->mf6c_parent = -1;
1257 		c->mf6c_origin = ipv6_hdr(skb)->saddr;
1258 		c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
1259 
1260 		/*
1261 		 *	Reflect first query at pim6sd
1262 		 */
1263 		err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1264 		if (err < 0) {
1265 			/* If the report failed throw the cache entry
1266 			   out - Brad Parker
1267 			 */
1268 			spin_unlock_bh(&mfc_unres_lock);
1269 
1270 			ip6mr_cache_free(c);
1271 			kfree_skb(skb);
1272 			return err;
1273 		}
1274 
1275 		atomic_inc(&mrt->cache_resolve_queue_len);
1276 		list_add(&c->list, &mrt->mfc6_unres_queue);
1277 		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1278 
1279 		ipmr_do_expire_process(mrt);
1280 	}
1281 
1282 	/*
1283 	 *	See if we can append the packet
1284 	 */
1285 	if (c->mfc_un.unres.unresolved.qlen > 3) {
1286 		kfree_skb(skb);
1287 		err = -ENOBUFS;
1288 	} else {
1289 		skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1290 		err = 0;
1291 	}
1292 
1293 	spin_unlock_bh(&mfc_unres_lock);
1294 	return err;
1295 }
1296 
1297 /*
1298  *	MFC6 cache manipulation by user space
1299  */
1300 
1301 static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc,
1302 			    int parent)
1303 {
1304 	int line;
1305 	struct mfc6_cache *c, *next;
1306 
1307 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1308 
1309 	list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) {
1310 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1311 		    ipv6_addr_equal(&c->mf6c_mcastgrp,
1312 				    &mfc->mf6cc_mcastgrp.sin6_addr) &&
1313 		    (parent == -1 || parent == c->mf6c_parent)) {
1314 			write_lock_bh(&mrt_lock);
1315 			list_del(&c->list);
1316 			write_unlock_bh(&mrt_lock);
1317 
1318 			mr6_netlink_event(mrt, c, RTM_DELROUTE);
1319 			ip6mr_cache_free(c);
1320 			return 0;
1321 		}
1322 	}
1323 	return -ENOENT;
1324 }
1325 
1326 static int ip6mr_device_event(struct notifier_block *this,
1327 			      unsigned long event, void *ptr)
1328 {
1329 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1330 	struct net *net = dev_net(dev);
1331 	struct mr6_table *mrt;
1332 	struct mif_device *v;
1333 	int ct;
1334 	LIST_HEAD(list);
1335 
1336 	if (event != NETDEV_UNREGISTER)
1337 		return NOTIFY_DONE;
1338 
1339 	ip6mr_for_each_table(mrt, net) {
1340 		v = &mrt->vif6_table[0];
1341 		for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1342 			if (v->dev == dev)
1343 				mif6_delete(mrt, ct, &list);
1344 		}
1345 	}
1346 	unregister_netdevice_many(&list);
1347 
1348 	return NOTIFY_DONE;
1349 }
1350 
1351 static struct notifier_block ip6_mr_notifier = {
1352 	.notifier_call = ip6mr_device_event
1353 };
1354 
1355 /*
1356  *	Setup for IP multicast routing
1357  */
1358 
1359 static int __net_init ip6mr_net_init(struct net *net)
1360 {
1361 	int err;
1362 
1363 	err = ip6mr_rules_init(net);
1364 	if (err < 0)
1365 		goto fail;
1366 
1367 #ifdef CONFIG_PROC_FS
1368 	err = -ENOMEM;
1369 	if (!proc_create("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_fops))
1370 		goto proc_vif_fail;
1371 	if (!proc_create("ip6_mr_cache", 0, net->proc_net, &ip6mr_mfc_fops))
1372 		goto proc_cache_fail;
1373 #endif
1374 
1375 	return 0;
1376 
1377 #ifdef CONFIG_PROC_FS
1378 proc_cache_fail:
1379 	remove_proc_entry("ip6_mr_vif", net->proc_net);
1380 proc_vif_fail:
1381 	ip6mr_rules_exit(net);
1382 #endif
1383 fail:
1384 	return err;
1385 }
1386 
1387 static void __net_exit ip6mr_net_exit(struct net *net)
1388 {
1389 #ifdef CONFIG_PROC_FS
1390 	remove_proc_entry("ip6_mr_cache", net->proc_net);
1391 	remove_proc_entry("ip6_mr_vif", net->proc_net);
1392 #endif
1393 	ip6mr_rules_exit(net);
1394 }
1395 
1396 static struct pernet_operations ip6mr_net_ops = {
1397 	.init = ip6mr_net_init,
1398 	.exit = ip6mr_net_exit,
1399 };
1400 
1401 int __init ip6_mr_init(void)
1402 {
1403 	int err;
1404 
1405 	mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1406 				       sizeof(struct mfc6_cache),
1407 				       0, SLAB_HWCACHE_ALIGN,
1408 				       NULL);
1409 	if (!mrt_cachep)
1410 		return -ENOMEM;
1411 
1412 	err = register_pernet_subsys(&ip6mr_net_ops);
1413 	if (err)
1414 		goto reg_pernet_fail;
1415 
1416 	err = register_netdevice_notifier(&ip6_mr_notifier);
1417 	if (err)
1418 		goto reg_notif_fail;
1419 #ifdef CONFIG_IPV6_PIMSM_V2
1420 	if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1421 		pr_err("%s: can't add PIM protocol\n", __func__);
1422 		err = -EAGAIN;
1423 		goto add_proto_fail;
1424 	}
1425 #endif
1426 	rtnl_register(RTNL_FAMILY_IP6MR, RTM_GETROUTE, NULL,
1427 		      ip6mr_rtm_dumproute, NULL);
1428 	return 0;
1429 #ifdef CONFIG_IPV6_PIMSM_V2
1430 add_proto_fail:
1431 	unregister_netdevice_notifier(&ip6_mr_notifier);
1432 #endif
1433 reg_notif_fail:
1434 	unregister_pernet_subsys(&ip6mr_net_ops);
1435 reg_pernet_fail:
1436 	kmem_cache_destroy(mrt_cachep);
1437 	return err;
1438 }
1439 
1440 void ip6_mr_cleanup(void)
1441 {
1442 	rtnl_unregister(RTNL_FAMILY_IP6MR, RTM_GETROUTE);
1443 #ifdef CONFIG_IPV6_PIMSM_V2
1444 	inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1445 #endif
1446 	unregister_netdevice_notifier(&ip6_mr_notifier);
1447 	unregister_pernet_subsys(&ip6mr_net_ops);
1448 	kmem_cache_destroy(mrt_cachep);
1449 }
1450 
1451 static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
1452 			 struct mf6cctl *mfc, int mrtsock, int parent)
1453 {
1454 	bool found = false;
1455 	int line;
1456 	struct mfc6_cache *uc, *c;
1457 	unsigned char ttls[MAXMIFS];
1458 	int i;
1459 
1460 	if (mfc->mf6cc_parent >= MAXMIFS)
1461 		return -ENFILE;
1462 
1463 	memset(ttls, 255, MAXMIFS);
1464 	for (i = 0; i < MAXMIFS; i++) {
1465 		if (IF_ISSET(i, &mfc->mf6cc_ifset))
1466 			ttls[i] = 1;
1467 
1468 	}
1469 
1470 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1471 
1472 	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
1473 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1474 		    ipv6_addr_equal(&c->mf6c_mcastgrp,
1475 				    &mfc->mf6cc_mcastgrp.sin6_addr) &&
1476 		    (parent == -1 || parent == mfc->mf6cc_parent)) {
1477 			found = true;
1478 			break;
1479 		}
1480 	}
1481 
1482 	if (found) {
1483 		write_lock_bh(&mrt_lock);
1484 		c->mf6c_parent = mfc->mf6cc_parent;
1485 		ip6mr_update_thresholds(mrt, c, ttls);
1486 		if (!mrtsock)
1487 			c->mfc_flags |= MFC_STATIC;
1488 		write_unlock_bh(&mrt_lock);
1489 		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1490 		return 0;
1491 	}
1492 
1493 	if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) &&
1494 	    !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1495 		return -EINVAL;
1496 
1497 	c = ip6mr_cache_alloc();
1498 	if (!c)
1499 		return -ENOMEM;
1500 
1501 	c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1502 	c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1503 	c->mf6c_parent = mfc->mf6cc_parent;
1504 	ip6mr_update_thresholds(mrt, c, ttls);
1505 	if (!mrtsock)
1506 		c->mfc_flags |= MFC_STATIC;
1507 
1508 	write_lock_bh(&mrt_lock);
1509 	list_add(&c->list, &mrt->mfc6_cache_array[line]);
1510 	write_unlock_bh(&mrt_lock);
1511 
1512 	/*
1513 	 *	Check to see if we resolved a queued list. If so we
1514 	 *	need to send on the frames and tidy up.
1515 	 */
1516 	found = false;
1517 	spin_lock_bh(&mfc_unres_lock);
1518 	list_for_each_entry(uc, &mrt->mfc6_unres_queue, list) {
1519 		if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1520 		    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1521 			list_del(&uc->list);
1522 			atomic_dec(&mrt->cache_resolve_queue_len);
1523 			found = true;
1524 			break;
1525 		}
1526 	}
1527 	if (list_empty(&mrt->mfc6_unres_queue))
1528 		del_timer(&mrt->ipmr_expire_timer);
1529 	spin_unlock_bh(&mfc_unres_lock);
1530 
1531 	if (found) {
1532 		ip6mr_cache_resolve(net, mrt, uc, c);
1533 		ip6mr_cache_free(uc);
1534 	}
1535 	mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1536 	return 0;
1537 }
1538 
1539 /*
1540  *	Close the multicast socket, and clear the vif tables etc
1541  */
1542 
1543 static void mroute_clean_tables(struct mr6_table *mrt, bool all)
1544 {
1545 	int i;
1546 	LIST_HEAD(list);
1547 	struct mfc6_cache *c, *next;
1548 
1549 	/*
1550 	 *	Shut down all active vif entries
1551 	 */
1552 	for (i = 0; i < mrt->maxvif; i++) {
1553 		if (!all && (mrt->vif6_table[i].flags & VIFF_STATIC))
1554 			continue;
1555 		mif6_delete(mrt, i, &list);
1556 	}
1557 	unregister_netdevice_many(&list);
1558 
1559 	/*
1560 	 *	Wipe the cache
1561 	 */
1562 	for (i = 0; i < MFC6_LINES; i++) {
1563 		list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) {
1564 			if (!all && (c->mfc_flags & MFC_STATIC))
1565 				continue;
1566 			write_lock_bh(&mrt_lock);
1567 			list_del(&c->list);
1568 			write_unlock_bh(&mrt_lock);
1569 
1570 			mr6_netlink_event(mrt, c, RTM_DELROUTE);
1571 			ip6mr_cache_free(c);
1572 		}
1573 	}
1574 
1575 	if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1576 		spin_lock_bh(&mfc_unres_lock);
1577 		list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
1578 			list_del(&c->list);
1579 			mr6_netlink_event(mrt, c, RTM_DELROUTE);
1580 			ip6mr_destroy_unres(mrt, c);
1581 		}
1582 		spin_unlock_bh(&mfc_unres_lock);
1583 	}
1584 }
1585 
1586 static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
1587 {
1588 	int err = 0;
1589 	struct net *net = sock_net(sk);
1590 
1591 	rtnl_lock();
1592 	write_lock_bh(&mrt_lock);
1593 	if (likely(mrt->mroute6_sk == NULL)) {
1594 		mrt->mroute6_sk = sk;
1595 		net->ipv6.devconf_all->mc_forwarding++;
1596 	} else {
1597 		err = -EADDRINUSE;
1598 	}
1599 	write_unlock_bh(&mrt_lock);
1600 
1601 	if (!err)
1602 		inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
1603 					     NETCONFA_MC_FORWARDING,
1604 					     NETCONFA_IFINDEX_ALL,
1605 					     net->ipv6.devconf_all);
1606 	rtnl_unlock();
1607 
1608 	return err;
1609 }
1610 
1611 int ip6mr_sk_done(struct sock *sk)
1612 {
1613 	int err = -EACCES;
1614 	struct net *net = sock_net(sk);
1615 	struct mr6_table *mrt;
1616 
1617 	rtnl_lock();
1618 	ip6mr_for_each_table(mrt, net) {
1619 		if (sk == mrt->mroute6_sk) {
1620 			write_lock_bh(&mrt_lock);
1621 			mrt->mroute6_sk = NULL;
1622 			net->ipv6.devconf_all->mc_forwarding--;
1623 			write_unlock_bh(&mrt_lock);
1624 			inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
1625 						     NETCONFA_MC_FORWARDING,
1626 						     NETCONFA_IFINDEX_ALL,
1627 						     net->ipv6.devconf_all);
1628 
1629 			mroute_clean_tables(mrt, false);
1630 			err = 0;
1631 			break;
1632 		}
1633 	}
1634 	rtnl_unlock();
1635 
1636 	return err;
1637 }
1638 
1639 struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
1640 {
1641 	struct mr6_table *mrt;
1642 	struct flowi6 fl6 = {
1643 		.flowi6_iif	= skb->skb_iif ? : LOOPBACK_IFINDEX,
1644 		.flowi6_oif	= skb->dev->ifindex,
1645 		.flowi6_mark	= skb->mark,
1646 	};
1647 
1648 	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
1649 		return NULL;
1650 
1651 	return mrt->mroute6_sk;
1652 }
1653 
1654 /*
1655  *	Socket options and virtual interface manipulation. The whole
1656  *	virtual interface system is a complete heap, but unfortunately
1657  *	that's how BSD mrouted happens to think. Maybe one day with a proper
1658  *	MOSPF/PIM router set up we can clean this up.
1659  */
1660 
1661 int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1662 {
1663 	int ret, parent = 0;
1664 	struct mif6ctl vif;
1665 	struct mf6cctl mfc;
1666 	mifi_t mifi;
1667 	struct net *net = sock_net(sk);
1668 	struct mr6_table *mrt;
1669 
1670 	if (sk->sk_type != SOCK_RAW ||
1671 	    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1672 		return -EOPNOTSUPP;
1673 
1674 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1675 	if (!mrt)
1676 		return -ENOENT;
1677 
1678 	if (optname != MRT6_INIT) {
1679 		if (sk != mrt->mroute6_sk && !ns_capable(net->user_ns, CAP_NET_ADMIN))
1680 			return -EACCES;
1681 	}
1682 
1683 	switch (optname) {
1684 	case MRT6_INIT:
1685 		if (optlen < sizeof(int))
1686 			return -EINVAL;
1687 
1688 		return ip6mr_sk_init(mrt, sk);
1689 
1690 	case MRT6_DONE:
1691 		return ip6mr_sk_done(sk);
1692 
1693 	case MRT6_ADD_MIF:
1694 		if (optlen < sizeof(vif))
1695 			return -EINVAL;
1696 		if (copy_from_user(&vif, optval, sizeof(vif)))
1697 			return -EFAULT;
1698 		if (vif.mif6c_mifi >= MAXMIFS)
1699 			return -ENFILE;
1700 		rtnl_lock();
1701 		ret = mif6_add(net, mrt, &vif, sk == mrt->mroute6_sk);
1702 		rtnl_unlock();
1703 		return ret;
1704 
1705 	case MRT6_DEL_MIF:
1706 		if (optlen < sizeof(mifi_t))
1707 			return -EINVAL;
1708 		if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1709 			return -EFAULT;
1710 		rtnl_lock();
1711 		ret = mif6_delete(mrt, mifi, NULL);
1712 		rtnl_unlock();
1713 		return ret;
1714 
1715 	/*
1716 	 *	Manipulate the forwarding caches. These live
1717 	 *	in a sort of kernel/user symbiosis.
1718 	 */
1719 	case MRT6_ADD_MFC:
1720 	case MRT6_DEL_MFC:
1721 		parent = -1;
1722 	case MRT6_ADD_MFC_PROXY:
1723 	case MRT6_DEL_MFC_PROXY:
1724 		if (optlen < sizeof(mfc))
1725 			return -EINVAL;
1726 		if (copy_from_user(&mfc, optval, sizeof(mfc)))
1727 			return -EFAULT;
1728 		if (parent == 0)
1729 			parent = mfc.mf6cc_parent;
1730 		rtnl_lock();
1731 		if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY)
1732 			ret = ip6mr_mfc_delete(mrt, &mfc, parent);
1733 		else
1734 			ret = ip6mr_mfc_add(net, mrt, &mfc,
1735 					    sk == mrt->mroute6_sk, parent);
1736 		rtnl_unlock();
1737 		return ret;
1738 
1739 	/*
1740 	 *	Control PIM assert (to activate pim will activate assert)
1741 	 */
1742 	case MRT6_ASSERT:
1743 	{
1744 		int v;
1745 
1746 		if (optlen != sizeof(v))
1747 			return -EINVAL;
1748 		if (get_user(v, (int __user *)optval))
1749 			return -EFAULT;
1750 		mrt->mroute_do_assert = v;
1751 		return 0;
1752 	}
1753 
1754 #ifdef CONFIG_IPV6_PIMSM_V2
1755 	case MRT6_PIM:
1756 	{
1757 		int v;
1758 
1759 		if (optlen != sizeof(v))
1760 			return -EINVAL;
1761 		if (get_user(v, (int __user *)optval))
1762 			return -EFAULT;
1763 		v = !!v;
1764 		rtnl_lock();
1765 		ret = 0;
1766 		if (v != mrt->mroute_do_pim) {
1767 			mrt->mroute_do_pim = v;
1768 			mrt->mroute_do_assert = v;
1769 		}
1770 		rtnl_unlock();
1771 		return ret;
1772 	}
1773 
1774 #endif
1775 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1776 	case MRT6_TABLE:
1777 	{
1778 		u32 v;
1779 
1780 		if (optlen != sizeof(u32))
1781 			return -EINVAL;
1782 		if (get_user(v, (u32 __user *)optval))
1783 			return -EFAULT;
1784 		/* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
1785 		if (v != RT_TABLE_DEFAULT && v >= 100000000)
1786 			return -EINVAL;
1787 		if (sk == mrt->mroute6_sk)
1788 			return -EBUSY;
1789 
1790 		rtnl_lock();
1791 		ret = 0;
1792 		if (!ip6mr_new_table(net, v))
1793 			ret = -ENOMEM;
1794 		raw6_sk(sk)->ip6mr_table = v;
1795 		rtnl_unlock();
1796 		return ret;
1797 	}
1798 #endif
1799 	/*
1800 	 *	Spurious command, or MRT6_VERSION which you cannot
1801 	 *	set.
1802 	 */
1803 	default:
1804 		return -ENOPROTOOPT;
1805 	}
1806 }
1807 
1808 /*
1809  *	Getsock opt support for the multicast routing system.
1810  */
1811 
1812 int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1813 			  int __user *optlen)
1814 {
1815 	int olr;
1816 	int val;
1817 	struct net *net = sock_net(sk);
1818 	struct mr6_table *mrt;
1819 
1820 	if (sk->sk_type != SOCK_RAW ||
1821 	    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1822 		return -EOPNOTSUPP;
1823 
1824 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1825 	if (!mrt)
1826 		return -ENOENT;
1827 
1828 	switch (optname) {
1829 	case MRT6_VERSION:
1830 		val = 0x0305;
1831 		break;
1832 #ifdef CONFIG_IPV6_PIMSM_V2
1833 	case MRT6_PIM:
1834 		val = mrt->mroute_do_pim;
1835 		break;
1836 #endif
1837 	case MRT6_ASSERT:
1838 		val = mrt->mroute_do_assert;
1839 		break;
1840 	default:
1841 		return -ENOPROTOOPT;
1842 	}
1843 
1844 	if (get_user(olr, optlen))
1845 		return -EFAULT;
1846 
1847 	olr = min_t(int, olr, sizeof(int));
1848 	if (olr < 0)
1849 		return -EINVAL;
1850 
1851 	if (put_user(olr, optlen))
1852 		return -EFAULT;
1853 	if (copy_to_user(optval, &val, olr))
1854 		return -EFAULT;
1855 	return 0;
1856 }
1857 
1858 /*
1859  *	The IP multicast ioctl support routines.
1860  */
1861 
1862 int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1863 {
1864 	struct sioc_sg_req6 sr;
1865 	struct sioc_mif_req6 vr;
1866 	struct mif_device *vif;
1867 	struct mfc6_cache *c;
1868 	struct net *net = sock_net(sk);
1869 	struct mr6_table *mrt;
1870 
1871 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1872 	if (!mrt)
1873 		return -ENOENT;
1874 
1875 	switch (cmd) {
1876 	case SIOCGETMIFCNT_IN6:
1877 		if (copy_from_user(&vr, arg, sizeof(vr)))
1878 			return -EFAULT;
1879 		if (vr.mifi >= mrt->maxvif)
1880 			return -EINVAL;
1881 		read_lock(&mrt_lock);
1882 		vif = &mrt->vif6_table[vr.mifi];
1883 		if (MIF_EXISTS(mrt, vr.mifi)) {
1884 			vr.icount = vif->pkt_in;
1885 			vr.ocount = vif->pkt_out;
1886 			vr.ibytes = vif->bytes_in;
1887 			vr.obytes = vif->bytes_out;
1888 			read_unlock(&mrt_lock);
1889 
1890 			if (copy_to_user(arg, &vr, sizeof(vr)))
1891 				return -EFAULT;
1892 			return 0;
1893 		}
1894 		read_unlock(&mrt_lock);
1895 		return -EADDRNOTAVAIL;
1896 	case SIOCGETSGCNT_IN6:
1897 		if (copy_from_user(&sr, arg, sizeof(sr)))
1898 			return -EFAULT;
1899 
1900 		read_lock(&mrt_lock);
1901 		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1902 		if (c) {
1903 			sr.pktcnt = c->mfc_un.res.pkt;
1904 			sr.bytecnt = c->mfc_un.res.bytes;
1905 			sr.wrong_if = c->mfc_un.res.wrong_if;
1906 			read_unlock(&mrt_lock);
1907 
1908 			if (copy_to_user(arg, &sr, sizeof(sr)))
1909 				return -EFAULT;
1910 			return 0;
1911 		}
1912 		read_unlock(&mrt_lock);
1913 		return -EADDRNOTAVAIL;
1914 	default:
1915 		return -ENOIOCTLCMD;
1916 	}
1917 }
1918 
1919 #ifdef CONFIG_COMPAT
1920 struct compat_sioc_sg_req6 {
1921 	struct sockaddr_in6 src;
1922 	struct sockaddr_in6 grp;
1923 	compat_ulong_t pktcnt;
1924 	compat_ulong_t bytecnt;
1925 	compat_ulong_t wrong_if;
1926 };
1927 
1928 struct compat_sioc_mif_req6 {
1929 	mifi_t	mifi;
1930 	compat_ulong_t icount;
1931 	compat_ulong_t ocount;
1932 	compat_ulong_t ibytes;
1933 	compat_ulong_t obytes;
1934 };
1935 
1936 int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1937 {
1938 	struct compat_sioc_sg_req6 sr;
1939 	struct compat_sioc_mif_req6 vr;
1940 	struct mif_device *vif;
1941 	struct mfc6_cache *c;
1942 	struct net *net = sock_net(sk);
1943 	struct mr6_table *mrt;
1944 
1945 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1946 	if (!mrt)
1947 		return -ENOENT;
1948 
1949 	switch (cmd) {
1950 	case SIOCGETMIFCNT_IN6:
1951 		if (copy_from_user(&vr, arg, sizeof(vr)))
1952 			return -EFAULT;
1953 		if (vr.mifi >= mrt->maxvif)
1954 			return -EINVAL;
1955 		read_lock(&mrt_lock);
1956 		vif = &mrt->vif6_table[vr.mifi];
1957 		if (MIF_EXISTS(mrt, vr.mifi)) {
1958 			vr.icount = vif->pkt_in;
1959 			vr.ocount = vif->pkt_out;
1960 			vr.ibytes = vif->bytes_in;
1961 			vr.obytes = vif->bytes_out;
1962 			read_unlock(&mrt_lock);
1963 
1964 			if (copy_to_user(arg, &vr, sizeof(vr)))
1965 				return -EFAULT;
1966 			return 0;
1967 		}
1968 		read_unlock(&mrt_lock);
1969 		return -EADDRNOTAVAIL;
1970 	case SIOCGETSGCNT_IN6:
1971 		if (copy_from_user(&sr, arg, sizeof(sr)))
1972 			return -EFAULT;
1973 
1974 		read_lock(&mrt_lock);
1975 		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1976 		if (c) {
1977 			sr.pktcnt = c->mfc_un.res.pkt;
1978 			sr.bytecnt = c->mfc_un.res.bytes;
1979 			sr.wrong_if = c->mfc_un.res.wrong_if;
1980 			read_unlock(&mrt_lock);
1981 
1982 			if (copy_to_user(arg, &sr, sizeof(sr)))
1983 				return -EFAULT;
1984 			return 0;
1985 		}
1986 		read_unlock(&mrt_lock);
1987 		return -EADDRNOTAVAIL;
1988 	default:
1989 		return -ENOIOCTLCMD;
1990 	}
1991 }
1992 #endif
1993 
1994 static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
1995 {
1996 	__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
1997 			IPSTATS_MIB_OUTFORWDATAGRAMS);
1998 	__IP6_ADD_STATS(net, ip6_dst_idev(skb_dst(skb)),
1999 			IPSTATS_MIB_OUTOCTETS, skb->len);
2000 	return dst_output(net, sk, skb);
2001 }
2002 
2003 /*
2004  *	Processing handlers for ip6mr_forward
2005  */
2006 
2007 static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
2008 			  struct sk_buff *skb, struct mfc6_cache *c, int vifi)
2009 {
2010 	struct ipv6hdr *ipv6h;
2011 	struct mif_device *vif = &mrt->vif6_table[vifi];
2012 	struct net_device *dev;
2013 	struct dst_entry *dst;
2014 	struct flowi6 fl6;
2015 
2016 	if (!vif->dev)
2017 		goto out_free;
2018 
2019 #ifdef CONFIG_IPV6_PIMSM_V2
2020 	if (vif->flags & MIFF_REGISTER) {
2021 		vif->pkt_out++;
2022 		vif->bytes_out += skb->len;
2023 		vif->dev->stats.tx_bytes += skb->len;
2024 		vif->dev->stats.tx_packets++;
2025 		ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
2026 		goto out_free;
2027 	}
2028 #endif
2029 
2030 	ipv6h = ipv6_hdr(skb);
2031 
2032 	fl6 = (struct flowi6) {
2033 		.flowi6_oif = vif->link,
2034 		.daddr = ipv6h->daddr,
2035 	};
2036 
2037 	dst = ip6_route_output(net, NULL, &fl6);
2038 	if (dst->error) {
2039 		dst_release(dst);
2040 		goto out_free;
2041 	}
2042 
2043 	skb_dst_drop(skb);
2044 	skb_dst_set(skb, dst);
2045 
2046 	/*
2047 	 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
2048 	 * not only before forwarding, but after forwarding on all output
2049 	 * interfaces. It is clear, if mrouter runs a multicasting
2050 	 * program, it should receive packets not depending to what interface
2051 	 * program is joined.
2052 	 * If we will not make it, the program will have to join on all
2053 	 * interfaces. On the other hand, multihoming host (or router, but
2054 	 * not mrouter) cannot join to more than one interface - it will
2055 	 * result in receiving multiple packets.
2056 	 */
2057 	dev = vif->dev;
2058 	skb->dev = dev;
2059 	vif->pkt_out++;
2060 	vif->bytes_out += skb->len;
2061 
2062 	/* We are about to write */
2063 	/* XXX: extension headers? */
2064 	if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
2065 		goto out_free;
2066 
2067 	ipv6h = ipv6_hdr(skb);
2068 	ipv6h->hop_limit--;
2069 
2070 	IP6CB(skb)->flags |= IP6SKB_FORWARDED;
2071 
2072 	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
2073 		       net, NULL, skb, skb->dev, dev,
2074 		       ip6mr_forward2_finish);
2075 
2076 out_free:
2077 	kfree_skb(skb);
2078 	return 0;
2079 }
2080 
2081 static int ip6mr_find_vif(struct mr6_table *mrt, struct net_device *dev)
2082 {
2083 	int ct;
2084 
2085 	for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
2086 		if (mrt->vif6_table[ct].dev == dev)
2087 			break;
2088 	}
2089 	return ct;
2090 }
2091 
2092 static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
2093 			   struct sk_buff *skb, struct mfc6_cache *cache)
2094 {
2095 	int psend = -1;
2096 	int vif, ct;
2097 	int true_vifi = ip6mr_find_vif(mrt, skb->dev);
2098 
2099 	vif = cache->mf6c_parent;
2100 	cache->mfc_un.res.pkt++;
2101 	cache->mfc_un.res.bytes += skb->len;
2102 	cache->mfc_un.res.lastuse = jiffies;
2103 
2104 	if (ipv6_addr_any(&cache->mf6c_origin) && true_vifi >= 0) {
2105 		struct mfc6_cache *cache_proxy;
2106 
2107 		/* For an (*,G) entry, we only check that the incoming
2108 		 * interface is part of the static tree.
2109 		 */
2110 		cache_proxy = ip6mr_cache_find_any_parent(mrt, vif);
2111 		if (cache_proxy &&
2112 		    cache_proxy->mfc_un.res.ttls[true_vifi] < 255)
2113 			goto forward;
2114 	}
2115 
2116 	/*
2117 	 * Wrong interface: drop packet and (maybe) send PIM assert.
2118 	 */
2119 	if (mrt->vif6_table[vif].dev != skb->dev) {
2120 		cache->mfc_un.res.wrong_if++;
2121 
2122 		if (true_vifi >= 0 && mrt->mroute_do_assert &&
2123 		    /* pimsm uses asserts, when switching from RPT to SPT,
2124 		       so that we cannot check that packet arrived on an oif.
2125 		       It is bad, but otherwise we would need to move pretty
2126 		       large chunk of pimd to kernel. Ough... --ANK
2127 		     */
2128 		    (mrt->mroute_do_pim ||
2129 		     cache->mfc_un.res.ttls[true_vifi] < 255) &&
2130 		    time_after(jiffies,
2131 			       cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
2132 			cache->mfc_un.res.last_assert = jiffies;
2133 			ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
2134 		}
2135 		goto dont_forward;
2136 	}
2137 
2138 forward:
2139 	mrt->vif6_table[vif].pkt_in++;
2140 	mrt->vif6_table[vif].bytes_in += skb->len;
2141 
2142 	/*
2143 	 *	Forward the frame
2144 	 */
2145 	if (ipv6_addr_any(&cache->mf6c_origin) &&
2146 	    ipv6_addr_any(&cache->mf6c_mcastgrp)) {
2147 		if (true_vifi >= 0 &&
2148 		    true_vifi != cache->mf6c_parent &&
2149 		    ipv6_hdr(skb)->hop_limit >
2150 				cache->mfc_un.res.ttls[cache->mf6c_parent]) {
2151 			/* It's an (*,*) entry and the packet is not coming from
2152 			 * the upstream: forward the packet to the upstream
2153 			 * only.
2154 			 */
2155 			psend = cache->mf6c_parent;
2156 			goto last_forward;
2157 		}
2158 		goto dont_forward;
2159 	}
2160 	for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
2161 		/* For (*,G) entry, don't forward to the incoming interface */
2162 		if ((!ipv6_addr_any(&cache->mf6c_origin) || ct != true_vifi) &&
2163 		    ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
2164 			if (psend != -1) {
2165 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2166 				if (skb2)
2167 					ip6mr_forward2(net, mrt, skb2, cache, psend);
2168 			}
2169 			psend = ct;
2170 		}
2171 	}
2172 last_forward:
2173 	if (psend != -1) {
2174 		ip6mr_forward2(net, mrt, skb, cache, psend);
2175 		return;
2176 	}
2177 
2178 dont_forward:
2179 	kfree_skb(skb);
2180 }
2181 
2182 
2183 /*
2184  *	Multicast packets for forwarding arrive here
2185  */
2186 
2187 int ip6_mr_input(struct sk_buff *skb)
2188 {
2189 	struct mfc6_cache *cache;
2190 	struct net *net = dev_net(skb->dev);
2191 	struct mr6_table *mrt;
2192 	struct flowi6 fl6 = {
2193 		.flowi6_iif	= skb->dev->ifindex,
2194 		.flowi6_mark	= skb->mark,
2195 	};
2196 	int err;
2197 
2198 	err = ip6mr_fib_lookup(net, &fl6, &mrt);
2199 	if (err < 0) {
2200 		kfree_skb(skb);
2201 		return err;
2202 	}
2203 
2204 	read_lock(&mrt_lock);
2205 	cache = ip6mr_cache_find(mrt,
2206 				 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2207 	if (!cache) {
2208 		int vif = ip6mr_find_vif(mrt, skb->dev);
2209 
2210 		if (vif >= 0)
2211 			cache = ip6mr_cache_find_any(mrt,
2212 						     &ipv6_hdr(skb)->daddr,
2213 						     vif);
2214 	}
2215 
2216 	/*
2217 	 *	No usable cache entry
2218 	 */
2219 	if (!cache) {
2220 		int vif;
2221 
2222 		vif = ip6mr_find_vif(mrt, skb->dev);
2223 		if (vif >= 0) {
2224 			int err = ip6mr_cache_unresolved(mrt, vif, skb);
2225 			read_unlock(&mrt_lock);
2226 
2227 			return err;
2228 		}
2229 		read_unlock(&mrt_lock);
2230 		kfree_skb(skb);
2231 		return -ENODEV;
2232 	}
2233 
2234 	ip6_mr_forward(net, mrt, skb, cache);
2235 
2236 	read_unlock(&mrt_lock);
2237 
2238 	return 0;
2239 }
2240 
2241 
2242 static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2243 			       struct mfc6_cache *c, struct rtmsg *rtm)
2244 {
2245 	struct rta_mfc_stats mfcs;
2246 	struct nlattr *mp_attr;
2247 	struct rtnexthop *nhp;
2248 	unsigned long lastuse;
2249 	int ct;
2250 
2251 	/* If cache is unresolved, don't try to parse IIF and OIF */
2252 	if (c->mf6c_parent >= MAXMIFS) {
2253 		rtm->rtm_flags |= RTNH_F_UNRESOLVED;
2254 		return -ENOENT;
2255 	}
2256 
2257 	if (MIF_EXISTS(mrt, c->mf6c_parent) &&
2258 	    nla_put_u32(skb, RTA_IIF, mrt->vif6_table[c->mf6c_parent].dev->ifindex) < 0)
2259 		return -EMSGSIZE;
2260 	mp_attr = nla_nest_start(skb, RTA_MULTIPATH);
2261 	if (!mp_attr)
2262 		return -EMSGSIZE;
2263 
2264 	for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
2265 		if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
2266 			nhp = nla_reserve_nohdr(skb, sizeof(*nhp));
2267 			if (!nhp) {
2268 				nla_nest_cancel(skb, mp_attr);
2269 				return -EMSGSIZE;
2270 			}
2271 
2272 			nhp->rtnh_flags = 0;
2273 			nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
2274 			nhp->rtnh_ifindex = mrt->vif6_table[ct].dev->ifindex;
2275 			nhp->rtnh_len = sizeof(*nhp);
2276 		}
2277 	}
2278 
2279 	nla_nest_end(skb, mp_attr);
2280 
2281 	lastuse = READ_ONCE(c->mfc_un.res.lastuse);
2282 	lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0;
2283 
2284 	mfcs.mfcs_packets = c->mfc_un.res.pkt;
2285 	mfcs.mfcs_bytes = c->mfc_un.res.bytes;
2286 	mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
2287 	if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) ||
2288 	    nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse),
2289 			      RTA_PAD))
2290 		return -EMSGSIZE;
2291 
2292 	rtm->rtm_type = RTN_MULTICAST;
2293 	return 1;
2294 }
2295 
2296 int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
2297 		    u32 portid)
2298 {
2299 	int err;
2300 	struct mr6_table *mrt;
2301 	struct mfc6_cache *cache;
2302 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
2303 
2304 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
2305 	if (!mrt)
2306 		return -ENOENT;
2307 
2308 	read_lock(&mrt_lock);
2309 	cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
2310 	if (!cache && skb->dev) {
2311 		int vif = ip6mr_find_vif(mrt, skb->dev);
2312 
2313 		if (vif >= 0)
2314 			cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr,
2315 						     vif);
2316 	}
2317 
2318 	if (!cache) {
2319 		struct sk_buff *skb2;
2320 		struct ipv6hdr *iph;
2321 		struct net_device *dev;
2322 		int vif;
2323 
2324 		dev = skb->dev;
2325 		if (!dev || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2326 			read_unlock(&mrt_lock);
2327 			return -ENODEV;
2328 		}
2329 
2330 		/* really correct? */
2331 		skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
2332 		if (!skb2) {
2333 			read_unlock(&mrt_lock);
2334 			return -ENOMEM;
2335 		}
2336 
2337 		NETLINK_CB(skb2).portid = portid;
2338 		skb_reset_transport_header(skb2);
2339 
2340 		skb_put(skb2, sizeof(struct ipv6hdr));
2341 		skb_reset_network_header(skb2);
2342 
2343 		iph = ipv6_hdr(skb2);
2344 		iph->version = 0;
2345 		iph->priority = 0;
2346 		iph->flow_lbl[0] = 0;
2347 		iph->flow_lbl[1] = 0;
2348 		iph->flow_lbl[2] = 0;
2349 		iph->payload_len = 0;
2350 		iph->nexthdr = IPPROTO_NONE;
2351 		iph->hop_limit = 0;
2352 		iph->saddr = rt->rt6i_src.addr;
2353 		iph->daddr = rt->rt6i_dst.addr;
2354 
2355 		err = ip6mr_cache_unresolved(mrt, vif, skb2);
2356 		read_unlock(&mrt_lock);
2357 
2358 		return err;
2359 	}
2360 
2361 	if (rtm->rtm_flags & RTM_F_NOTIFY)
2362 		cache->mfc_flags |= MFC_NOTIFY;
2363 
2364 	err = __ip6mr_fill_mroute(mrt, skb, cache, rtm);
2365 	read_unlock(&mrt_lock);
2366 	return err;
2367 }
2368 
2369 static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2370 			     u32 portid, u32 seq, struct mfc6_cache *c, int cmd,
2371 			     int flags)
2372 {
2373 	struct nlmsghdr *nlh;
2374 	struct rtmsg *rtm;
2375 	int err;
2376 
2377 	nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags);
2378 	if (!nlh)
2379 		return -EMSGSIZE;
2380 
2381 	rtm = nlmsg_data(nlh);
2382 	rtm->rtm_family   = RTNL_FAMILY_IP6MR;
2383 	rtm->rtm_dst_len  = 128;
2384 	rtm->rtm_src_len  = 128;
2385 	rtm->rtm_tos      = 0;
2386 	rtm->rtm_table    = mrt->id;
2387 	if (nla_put_u32(skb, RTA_TABLE, mrt->id))
2388 		goto nla_put_failure;
2389 	rtm->rtm_type = RTN_MULTICAST;
2390 	rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
2391 	if (c->mfc_flags & MFC_STATIC)
2392 		rtm->rtm_protocol = RTPROT_STATIC;
2393 	else
2394 		rtm->rtm_protocol = RTPROT_MROUTED;
2395 	rtm->rtm_flags    = 0;
2396 
2397 	if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) ||
2398 	    nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp))
2399 		goto nla_put_failure;
2400 	err = __ip6mr_fill_mroute(mrt, skb, c, rtm);
2401 	/* do not break the dump if cache is unresolved */
2402 	if (err < 0 && err != -ENOENT)
2403 		goto nla_put_failure;
2404 
2405 	nlmsg_end(skb, nlh);
2406 	return 0;
2407 
2408 nla_put_failure:
2409 	nlmsg_cancel(skb, nlh);
2410 	return -EMSGSIZE;
2411 }
2412 
2413 static int mr6_msgsize(bool unresolved, int maxvif)
2414 {
2415 	size_t len =
2416 		NLMSG_ALIGN(sizeof(struct rtmsg))
2417 		+ nla_total_size(4)	/* RTA_TABLE */
2418 		+ nla_total_size(sizeof(struct in6_addr))	/* RTA_SRC */
2419 		+ nla_total_size(sizeof(struct in6_addr))	/* RTA_DST */
2420 		;
2421 
2422 	if (!unresolved)
2423 		len = len
2424 		      + nla_total_size(4)	/* RTA_IIF */
2425 		      + nla_total_size(0)	/* RTA_MULTIPATH */
2426 		      + maxvif * NLA_ALIGN(sizeof(struct rtnexthop))
2427 						/* RTA_MFC_STATS */
2428 		      + nla_total_size_64bit(sizeof(struct rta_mfc_stats))
2429 		;
2430 
2431 	return len;
2432 }
2433 
2434 static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
2435 			      int cmd)
2436 {
2437 	struct net *net = read_pnet(&mrt->net);
2438 	struct sk_buff *skb;
2439 	int err = -ENOBUFS;
2440 
2441 	skb = nlmsg_new(mr6_msgsize(mfc->mf6c_parent >= MAXMIFS, mrt->maxvif),
2442 			GFP_ATOMIC);
2443 	if (!skb)
2444 		goto errout;
2445 
2446 	err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0);
2447 	if (err < 0)
2448 		goto errout;
2449 
2450 	rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC);
2451 	return;
2452 
2453 errout:
2454 	kfree_skb(skb);
2455 	if (err < 0)
2456 		rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
2457 }
2458 
2459 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2460 {
2461 	struct net *net = sock_net(skb->sk);
2462 	struct mr6_table *mrt;
2463 	struct mfc6_cache *mfc;
2464 	unsigned int t = 0, s_t;
2465 	unsigned int h = 0, s_h;
2466 	unsigned int e = 0, s_e;
2467 
2468 	s_t = cb->args[0];
2469 	s_h = cb->args[1];
2470 	s_e = cb->args[2];
2471 
2472 	read_lock(&mrt_lock);
2473 	ip6mr_for_each_table(mrt, net) {
2474 		if (t < s_t)
2475 			goto next_table;
2476 		if (t > s_t)
2477 			s_h = 0;
2478 		for (h = s_h; h < MFC6_LINES; h++) {
2479 			list_for_each_entry(mfc, &mrt->mfc6_cache_array[h], list) {
2480 				if (e < s_e)
2481 					goto next_entry;
2482 				if (ip6mr_fill_mroute(mrt, skb,
2483 						      NETLINK_CB(cb->skb).portid,
2484 						      cb->nlh->nlmsg_seq,
2485 						      mfc, RTM_NEWROUTE,
2486 						      NLM_F_MULTI) < 0)
2487 					goto done;
2488 next_entry:
2489 				e++;
2490 			}
2491 			e = s_e = 0;
2492 		}
2493 		spin_lock_bh(&mfc_unres_lock);
2494 		list_for_each_entry(mfc, &mrt->mfc6_unres_queue, list) {
2495 			if (e < s_e)
2496 				goto next_entry2;
2497 			if (ip6mr_fill_mroute(mrt, skb,
2498 					      NETLINK_CB(cb->skb).portid,
2499 					      cb->nlh->nlmsg_seq,
2500 					      mfc, RTM_NEWROUTE,
2501 					      NLM_F_MULTI) < 0) {
2502 				spin_unlock_bh(&mfc_unres_lock);
2503 				goto done;
2504 			}
2505 next_entry2:
2506 			e++;
2507 		}
2508 		spin_unlock_bh(&mfc_unres_lock);
2509 		e = s_e = 0;
2510 		s_h = 0;
2511 next_table:
2512 		t++;
2513 	}
2514 done:
2515 	read_unlock(&mrt_lock);
2516 
2517 	cb->args[2] = e;
2518 	cb->args[1] = h;
2519 	cb->args[0] = t;
2520 
2521 	return skb->len;
2522 }
2523