xref: /openbmc/linux/net/ipv4/ipmr.c (revision 814137768b5a9504f758aa760e7b1ac355539783)
1  /*
2   *	IP multicast routing support for mrouted 3.6/3.8
3   *
4   *		(c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
5   *	  Linux Consultancy and Custom Driver Development
6   *
7   *	This program is free software; you can redistribute it and/or
8   *	modify it under the terms of the GNU General Public License
9   *	as published by the Free Software Foundation; either version
10   *	2 of the License, or (at your option) any later version.
11   *
12   *	Fixes:
13   *	Michael Chastain	:	Incorrect size of copying.
14   *	Alan Cox		:	Added the cache manager code
15   *	Alan Cox		:	Fixed the clone/copy bug and device race.
16   *	Mike McLagan		:	Routing by source
17   *	Malcolm Beattie		:	Buffer handling fixes.
18   *	Alexey Kuznetsov	:	Double buffer free and other fixes.
19   *	SVR Anand		:	Fixed several multicast bugs and problems.
20   *	Alexey Kuznetsov	:	Status, optimisations and more.
21   *	Brad Parker		:	Better behaviour on mrouted upcall
22   *					overflow.
23   *      Carlos Picoto           :       PIMv1 Support
24   *	Pavlin Ivanov Radoslavov:	PIMv2 Registers must checksum only PIM header
25   *					Relax this requirement to work with older peers.
26   *
27   */
28  
29  #include <linux/uaccess.h>
30  #include <linux/types.h>
31  #include <linux/cache.h>
32  #include <linux/capability.h>
33  #include <linux/errno.h>
34  #include <linux/mm.h>
35  #include <linux/kernel.h>
36  #include <linux/fcntl.h>
37  #include <linux/stat.h>
38  #include <linux/socket.h>
39  #include <linux/in.h>
40  #include <linux/inet.h>
41  #include <linux/netdevice.h>
42  #include <linux/inetdevice.h>
43  #include <linux/igmp.h>
44  #include <linux/proc_fs.h>
45  #include <linux/seq_file.h>
46  #include <linux/mroute.h>
47  #include <linux/init.h>
48  #include <linux/if_ether.h>
49  #include <linux/slab.h>
50  #include <net/net_namespace.h>
51  #include <net/ip.h>
52  #include <net/protocol.h>
53  #include <linux/skbuff.h>
54  #include <net/route.h>
55  #include <net/icmp.h>
56  #include <net/udp.h>
57  #include <net/raw.h>
58  #include <linux/notifier.h>
59  #include <linux/if_arp.h>
60  #include <linux/netfilter_ipv4.h>
61  #include <linux/compat.h>
62  #include <linux/export.h>
63  #include <linux/rhashtable.h>
64  #include <net/ip_tunnels.h>
65  #include <net/checksum.h>
66  #include <net/netlink.h>
67  #include <net/fib_rules.h>
68  #include <linux/netconf.h>
69  #include <net/rtnh.h>
70  
71  #include <linux/nospec.h>
72  
73  struct ipmr_rule {
74  	struct fib_rule		common;
75  };
76  
77  struct ipmr_result {
78  	struct mr_table		*mrt;
79  };
80  
81  /* Big lock, protecting vif table, mrt cache and mroute socket state.
82   * Note that the changes are semaphored via rtnl_lock.
83   */
84  
85  static DEFINE_RWLOCK(mrt_lock);
86  
87  /* Multicast router control variables */
88  
89  /* Special spinlock for queue of unresolved entries */
90  static DEFINE_SPINLOCK(mfc_unres_lock);
91  
92  /* We return to original Alan's scheme. Hash table of resolved
93   * entries is changed only in process context and protected
94   * with weak lock mrt_lock. Queue of unresolved entries is protected
95   * with strong spinlock mfc_unres_lock.
96   *
97   * In this case data path is free of exclusive locks at all.
98   */
99  
100  static struct kmem_cache *mrt_cachep __ro_after_init;
101  
102  static struct mr_table *ipmr_new_table(struct net *net, u32 id);
103  static void ipmr_free_table(struct mr_table *mrt);
104  
105  static void ip_mr_forward(struct net *net, struct mr_table *mrt,
106  			  struct net_device *dev, struct sk_buff *skb,
107  			  struct mfc_cache *cache, int local);
108  static int ipmr_cache_report(struct mr_table *mrt,
109  			     struct sk_buff *pkt, vifi_t vifi, int assert);
110  static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc,
111  				 int cmd);
112  static void igmpmsg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt);
113  static void mroute_clean_tables(struct mr_table *mrt, int flags);
114  static void ipmr_expire_process(struct timer_list *t);
115  
116  #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
117  #define ipmr_for_each_table(mrt, net) \
118  	list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list)
119  
120  static struct mr_table *ipmr_mr_table_iter(struct net *net,
121  					   struct mr_table *mrt)
122  {
123  	struct mr_table *ret;
124  
125  	if (!mrt)
126  		ret = list_entry_rcu(net->ipv4.mr_tables.next,
127  				     struct mr_table, list);
128  	else
129  		ret = list_entry_rcu(mrt->list.next,
130  				     struct mr_table, list);
131  
132  	if (&ret->list == &net->ipv4.mr_tables)
133  		return NULL;
134  	return ret;
135  }
136  
137  static struct mr_table *ipmr_get_table(struct net *net, u32 id)
138  {
139  	struct mr_table *mrt;
140  
141  	ipmr_for_each_table(mrt, net) {
142  		if (mrt->id == id)
143  			return mrt;
144  	}
145  	return NULL;
146  }
147  
148  static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4,
149  			   struct mr_table **mrt)
150  {
151  	int err;
152  	struct ipmr_result res;
153  	struct fib_lookup_arg arg = {
154  		.result = &res,
155  		.flags = FIB_LOOKUP_NOREF,
156  	};
157  
158  	/* update flow if oif or iif point to device enslaved to l3mdev */
159  	l3mdev_update_flow(net, flowi4_to_flowi(flp4));
160  
161  	err = fib_rules_lookup(net->ipv4.mr_rules_ops,
162  			       flowi4_to_flowi(flp4), 0, &arg);
163  	if (err < 0)
164  		return err;
165  	*mrt = res.mrt;
166  	return 0;
167  }
168  
169  static int ipmr_rule_action(struct fib_rule *rule, struct flowi *flp,
170  			    int flags, struct fib_lookup_arg *arg)
171  {
172  	struct ipmr_result *res = arg->result;
173  	struct mr_table *mrt;
174  
175  	switch (rule->action) {
176  	case FR_ACT_TO_TBL:
177  		break;
178  	case FR_ACT_UNREACHABLE:
179  		return -ENETUNREACH;
180  	case FR_ACT_PROHIBIT:
181  		return -EACCES;
182  	case FR_ACT_BLACKHOLE:
183  	default:
184  		return -EINVAL;
185  	}
186  
187  	arg->table = fib_rule_get_table(rule, arg);
188  
189  	mrt = ipmr_get_table(rule->fr_net, arg->table);
190  	if (!mrt)
191  		return -EAGAIN;
192  	res->mrt = mrt;
193  	return 0;
194  }
195  
196  static int ipmr_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
197  {
198  	return 1;
199  }
200  
201  static const struct nla_policy ipmr_rule_policy[FRA_MAX + 1] = {
202  	FRA_GENERIC_POLICY,
203  };
204  
205  static int ipmr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
206  			       struct fib_rule_hdr *frh, struct nlattr **tb,
207  			       struct netlink_ext_ack *extack)
208  {
209  	return 0;
210  }
211  
212  static int ipmr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
213  			     struct nlattr **tb)
214  {
215  	return 1;
216  }
217  
218  static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
219  			  struct fib_rule_hdr *frh)
220  {
221  	frh->dst_len = 0;
222  	frh->src_len = 0;
223  	frh->tos     = 0;
224  	return 0;
225  }
226  
227  static const struct fib_rules_ops __net_initconst ipmr_rules_ops_template = {
228  	.family		= RTNL_FAMILY_IPMR,
229  	.rule_size	= sizeof(struct ipmr_rule),
230  	.addr_size	= sizeof(u32),
231  	.action		= ipmr_rule_action,
232  	.match		= ipmr_rule_match,
233  	.configure	= ipmr_rule_configure,
234  	.compare	= ipmr_rule_compare,
235  	.fill		= ipmr_rule_fill,
236  	.nlgroup	= RTNLGRP_IPV4_RULE,
237  	.policy		= ipmr_rule_policy,
238  	.owner		= THIS_MODULE,
239  };
240  
241  static int __net_init ipmr_rules_init(struct net *net)
242  {
243  	struct fib_rules_ops *ops;
244  	struct mr_table *mrt;
245  	int err;
246  
247  	ops = fib_rules_register(&ipmr_rules_ops_template, net);
248  	if (IS_ERR(ops))
249  		return PTR_ERR(ops);
250  
251  	INIT_LIST_HEAD(&net->ipv4.mr_tables);
252  
253  	mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
254  	if (IS_ERR(mrt)) {
255  		err = PTR_ERR(mrt);
256  		goto err1;
257  	}
258  
259  	err = fib_default_rule_add(ops, 0x7fff, RT_TABLE_DEFAULT, 0);
260  	if (err < 0)
261  		goto err2;
262  
263  	net->ipv4.mr_rules_ops = ops;
264  	return 0;
265  
266  err2:
267  	ipmr_free_table(mrt);
268  err1:
269  	fib_rules_unregister(ops);
270  	return err;
271  }
272  
273  static void __net_exit ipmr_rules_exit(struct net *net)
274  {
275  	struct mr_table *mrt, *next;
276  
277  	rtnl_lock();
278  	list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) {
279  		list_del(&mrt->list);
280  		ipmr_free_table(mrt);
281  	}
282  	fib_rules_unregister(net->ipv4.mr_rules_ops);
283  	rtnl_unlock();
284  }
285  
286  static int ipmr_rules_dump(struct net *net, struct notifier_block *nb)
287  {
288  	return fib_rules_dump(net, nb, RTNL_FAMILY_IPMR);
289  }
290  
291  static unsigned int ipmr_rules_seq_read(struct net *net)
292  {
293  	return fib_rules_seq_read(net, RTNL_FAMILY_IPMR);
294  }
295  
296  bool ipmr_rule_default(const struct fib_rule *rule)
297  {
298  	return fib_rule_matchall(rule) && rule->table == RT_TABLE_DEFAULT;
299  }
300  EXPORT_SYMBOL(ipmr_rule_default);
301  #else
302  #define ipmr_for_each_table(mrt, net) \
303  	for (mrt = net->ipv4.mrt; mrt; mrt = NULL)
304  
305  static struct mr_table *ipmr_mr_table_iter(struct net *net,
306  					   struct mr_table *mrt)
307  {
308  	if (!mrt)
309  		return net->ipv4.mrt;
310  	return NULL;
311  }
312  
313  static struct mr_table *ipmr_get_table(struct net *net, u32 id)
314  {
315  	return net->ipv4.mrt;
316  }
317  
318  static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4,
319  			   struct mr_table **mrt)
320  {
321  	*mrt = net->ipv4.mrt;
322  	return 0;
323  }
324  
325  static int __net_init ipmr_rules_init(struct net *net)
326  {
327  	struct mr_table *mrt;
328  
329  	mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
330  	if (IS_ERR(mrt))
331  		return PTR_ERR(mrt);
332  	net->ipv4.mrt = mrt;
333  	return 0;
334  }
335  
336  static void __net_exit ipmr_rules_exit(struct net *net)
337  {
338  	rtnl_lock();
339  	ipmr_free_table(net->ipv4.mrt);
340  	net->ipv4.mrt = NULL;
341  	rtnl_unlock();
342  }
343  
344  static int ipmr_rules_dump(struct net *net, struct notifier_block *nb)
345  {
346  	return 0;
347  }
348  
349  static unsigned int ipmr_rules_seq_read(struct net *net)
350  {
351  	return 0;
352  }
353  
354  bool ipmr_rule_default(const struct fib_rule *rule)
355  {
356  	return true;
357  }
358  EXPORT_SYMBOL(ipmr_rule_default);
359  #endif
360  
361  static inline int ipmr_hash_cmp(struct rhashtable_compare_arg *arg,
362  				const void *ptr)
363  {
364  	const struct mfc_cache_cmp_arg *cmparg = arg->key;
365  	struct mfc_cache *c = (struct mfc_cache *)ptr;
366  
367  	return cmparg->mfc_mcastgrp != c->mfc_mcastgrp ||
368  	       cmparg->mfc_origin != c->mfc_origin;
369  }
370  
371  static const struct rhashtable_params ipmr_rht_params = {
372  	.head_offset = offsetof(struct mr_mfc, mnode),
373  	.key_offset = offsetof(struct mfc_cache, cmparg),
374  	.key_len = sizeof(struct mfc_cache_cmp_arg),
375  	.nelem_hint = 3,
376  	.obj_cmpfn = ipmr_hash_cmp,
377  	.automatic_shrinking = true,
378  };
379  
380  static void ipmr_new_table_set(struct mr_table *mrt,
381  			       struct net *net)
382  {
383  #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
384  	list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables);
385  #endif
386  }
387  
388  static struct mfc_cache_cmp_arg ipmr_mr_table_ops_cmparg_any = {
389  	.mfc_mcastgrp = htonl(INADDR_ANY),
390  	.mfc_origin = htonl(INADDR_ANY),
391  };
392  
393  static struct mr_table_ops ipmr_mr_table_ops = {
394  	.rht_params = &ipmr_rht_params,
395  	.cmparg_any = &ipmr_mr_table_ops_cmparg_any,
396  };
397  
398  static struct mr_table *ipmr_new_table(struct net *net, u32 id)
399  {
400  	struct mr_table *mrt;
401  
402  	/* "pimreg%u" should not exceed 16 bytes (IFNAMSIZ) */
403  	if (id != RT_TABLE_DEFAULT && id >= 1000000000)
404  		return ERR_PTR(-EINVAL);
405  
406  	mrt = ipmr_get_table(net, id);
407  	if (mrt)
408  		return mrt;
409  
410  	return mr_table_alloc(net, id, &ipmr_mr_table_ops,
411  			      ipmr_expire_process, ipmr_new_table_set);
412  }
413  
414  static void ipmr_free_table(struct mr_table *mrt)
415  {
416  	del_timer_sync(&mrt->ipmr_expire_timer);
417  	mroute_clean_tables(mrt, MRT_FLUSH_VIFS | MRT_FLUSH_VIFS_STATIC |
418  				 MRT_FLUSH_MFC | MRT_FLUSH_MFC_STATIC);
419  	rhltable_destroy(&mrt->mfc_hash);
420  	kfree(mrt);
421  }
422  
423  /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
424  
425  static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
426  {
427  	struct net *net = dev_net(dev);
428  
429  	dev_close(dev);
430  
431  	dev = __dev_get_by_name(net, "tunl0");
432  	if (dev) {
433  		const struct net_device_ops *ops = dev->netdev_ops;
434  		struct ifreq ifr;
435  		struct ip_tunnel_parm p;
436  
437  		memset(&p, 0, sizeof(p));
438  		p.iph.daddr = v->vifc_rmt_addr.s_addr;
439  		p.iph.saddr = v->vifc_lcl_addr.s_addr;
440  		p.iph.version = 4;
441  		p.iph.ihl = 5;
442  		p.iph.protocol = IPPROTO_IPIP;
443  		sprintf(p.name, "dvmrp%d", v->vifc_vifi);
444  		ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
445  
446  		if (ops->ndo_do_ioctl) {
447  			mm_segment_t oldfs = get_fs();
448  
449  			set_fs(KERNEL_DS);
450  			ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
451  			set_fs(oldfs);
452  		}
453  	}
454  }
455  
456  /* Initialize ipmr pimreg/tunnel in_device */
457  static bool ipmr_init_vif_indev(const struct net_device *dev)
458  {
459  	struct in_device *in_dev;
460  
461  	ASSERT_RTNL();
462  
463  	in_dev = __in_dev_get_rtnl(dev);
464  	if (!in_dev)
465  		return false;
466  	ipv4_devconf_setall(in_dev);
467  	neigh_parms_data_state_setall(in_dev->arp_parms);
468  	IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
469  
470  	return true;
471  }
472  
473  static struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
474  {
475  	struct net_device  *dev;
476  
477  	dev = __dev_get_by_name(net, "tunl0");
478  
479  	if (dev) {
480  		const struct net_device_ops *ops = dev->netdev_ops;
481  		int err;
482  		struct ifreq ifr;
483  		struct ip_tunnel_parm p;
484  
485  		memset(&p, 0, sizeof(p));
486  		p.iph.daddr = v->vifc_rmt_addr.s_addr;
487  		p.iph.saddr = v->vifc_lcl_addr.s_addr;
488  		p.iph.version = 4;
489  		p.iph.ihl = 5;
490  		p.iph.protocol = IPPROTO_IPIP;
491  		sprintf(p.name, "dvmrp%d", v->vifc_vifi);
492  		ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
493  
494  		if (ops->ndo_do_ioctl) {
495  			mm_segment_t oldfs = get_fs();
496  
497  			set_fs(KERNEL_DS);
498  			err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
499  			set_fs(oldfs);
500  		} else {
501  			err = -EOPNOTSUPP;
502  		}
503  		dev = NULL;
504  
505  		if (err == 0 &&
506  		    (dev = __dev_get_by_name(net, p.name)) != NULL) {
507  			dev->flags |= IFF_MULTICAST;
508  			if (!ipmr_init_vif_indev(dev))
509  				goto failure;
510  			if (dev_open(dev, NULL))
511  				goto failure;
512  			dev_hold(dev);
513  		}
514  	}
515  	return dev;
516  
517  failure:
518  	unregister_netdevice(dev);
519  	return NULL;
520  }
521  
522  #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
523  static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
524  {
525  	struct net *net = dev_net(dev);
526  	struct mr_table *mrt;
527  	struct flowi4 fl4 = {
528  		.flowi4_oif	= dev->ifindex,
529  		.flowi4_iif	= skb->skb_iif ? : LOOPBACK_IFINDEX,
530  		.flowi4_mark	= skb->mark,
531  	};
532  	int err;
533  
534  	err = ipmr_fib_lookup(net, &fl4, &mrt);
535  	if (err < 0) {
536  		kfree_skb(skb);
537  		return err;
538  	}
539  
540  	read_lock(&mrt_lock);
541  	dev->stats.tx_bytes += skb->len;
542  	dev->stats.tx_packets++;
543  	ipmr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, IGMPMSG_WHOLEPKT);
544  	read_unlock(&mrt_lock);
545  	kfree_skb(skb);
546  	return NETDEV_TX_OK;
547  }
548  
549  static int reg_vif_get_iflink(const struct net_device *dev)
550  {
551  	return 0;
552  }
553  
554  static const struct net_device_ops reg_vif_netdev_ops = {
555  	.ndo_start_xmit	= reg_vif_xmit,
556  	.ndo_get_iflink = reg_vif_get_iflink,
557  };
558  
559  static void reg_vif_setup(struct net_device *dev)
560  {
561  	dev->type		= ARPHRD_PIMREG;
562  	dev->mtu		= ETH_DATA_LEN - sizeof(struct iphdr) - 8;
563  	dev->flags		= IFF_NOARP;
564  	dev->netdev_ops		= &reg_vif_netdev_ops;
565  	dev->needs_free_netdev	= true;
566  	dev->features		|= NETIF_F_NETNS_LOCAL;
567  }
568  
569  static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
570  {
571  	struct net_device *dev;
572  	char name[IFNAMSIZ];
573  
574  	if (mrt->id == RT_TABLE_DEFAULT)
575  		sprintf(name, "pimreg");
576  	else
577  		sprintf(name, "pimreg%u", mrt->id);
578  
579  	dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup);
580  
581  	if (!dev)
582  		return NULL;
583  
584  	dev_net_set(dev, net);
585  
586  	if (register_netdevice(dev)) {
587  		free_netdev(dev);
588  		return NULL;
589  	}
590  
591  	if (!ipmr_init_vif_indev(dev))
592  		goto failure;
593  	if (dev_open(dev, NULL))
594  		goto failure;
595  
596  	dev_hold(dev);
597  
598  	return dev;
599  
600  failure:
601  	unregister_netdevice(dev);
602  	return NULL;
603  }
604  
605  /* called with rcu_read_lock() */
606  static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
607  		     unsigned int pimlen)
608  {
609  	struct net_device *reg_dev = NULL;
610  	struct iphdr *encap;
611  
612  	encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
613  	/* Check that:
614  	 * a. packet is really sent to a multicast group
615  	 * b. packet is not a NULL-REGISTER
616  	 * c. packet is not truncated
617  	 */
618  	if (!ipv4_is_multicast(encap->daddr) ||
619  	    encap->tot_len == 0 ||
620  	    ntohs(encap->tot_len) + pimlen > skb->len)
621  		return 1;
622  
623  	read_lock(&mrt_lock);
624  	if (mrt->mroute_reg_vif_num >= 0)
625  		reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev;
626  	read_unlock(&mrt_lock);
627  
628  	if (!reg_dev)
629  		return 1;
630  
631  	skb->mac_header = skb->network_header;
632  	skb_pull(skb, (u8 *)encap - skb->data);
633  	skb_reset_network_header(skb);
634  	skb->protocol = htons(ETH_P_IP);
635  	skb->ip_summed = CHECKSUM_NONE;
636  
637  	skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
638  
639  	netif_rx(skb);
640  
641  	return NET_RX_SUCCESS;
642  }
643  #else
644  static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
645  {
646  	return NULL;
647  }
648  #endif
649  
650  static int call_ipmr_vif_entry_notifiers(struct net *net,
651  					 enum fib_event_type event_type,
652  					 struct vif_device *vif,
653  					 vifi_t vif_index, u32 tb_id)
654  {
655  	return mr_call_vif_notifiers(net, RTNL_FAMILY_IPMR, event_type,
656  				     vif, vif_index, tb_id,
657  				     &net->ipv4.ipmr_seq);
658  }
659  
660  static int call_ipmr_mfc_entry_notifiers(struct net *net,
661  					 enum fib_event_type event_type,
662  					 struct mfc_cache *mfc, u32 tb_id)
663  {
664  	return mr_call_mfc_notifiers(net, RTNL_FAMILY_IPMR, event_type,
665  				     &mfc->_c, tb_id, &net->ipv4.ipmr_seq);
666  }
667  
668  /**
669   *	vif_delete - Delete a VIF entry
670   *	@notify: Set to 1, if the caller is a notifier_call
671   */
672  static int vif_delete(struct mr_table *mrt, int vifi, int notify,
673  		      struct list_head *head)
674  {
675  	struct net *net = read_pnet(&mrt->net);
676  	struct vif_device *v;
677  	struct net_device *dev;
678  	struct in_device *in_dev;
679  
680  	if (vifi < 0 || vifi >= mrt->maxvif)
681  		return -EADDRNOTAVAIL;
682  
683  	v = &mrt->vif_table[vifi];
684  
685  	if (VIF_EXISTS(mrt, vifi))
686  		call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_DEL, v, vifi,
687  					      mrt->id);
688  
689  	write_lock_bh(&mrt_lock);
690  	dev = v->dev;
691  	v->dev = NULL;
692  
693  	if (!dev) {
694  		write_unlock_bh(&mrt_lock);
695  		return -EADDRNOTAVAIL;
696  	}
697  
698  	if (vifi == mrt->mroute_reg_vif_num)
699  		mrt->mroute_reg_vif_num = -1;
700  
701  	if (vifi + 1 == mrt->maxvif) {
702  		int tmp;
703  
704  		for (tmp = vifi - 1; tmp >= 0; tmp--) {
705  			if (VIF_EXISTS(mrt, tmp))
706  				break;
707  		}
708  		mrt->maxvif = tmp+1;
709  	}
710  
711  	write_unlock_bh(&mrt_lock);
712  
713  	dev_set_allmulti(dev, -1);
714  
715  	in_dev = __in_dev_get_rtnl(dev);
716  	if (in_dev) {
717  		IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
718  		inet_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
719  					    NETCONFA_MC_FORWARDING,
720  					    dev->ifindex, &in_dev->cnf);
721  		ip_rt_multicast_event(in_dev);
722  	}
723  
724  	if (v->flags & (VIFF_TUNNEL | VIFF_REGISTER) && !notify)
725  		unregister_netdevice_queue(dev, head);
726  
727  	dev_put(dev);
728  	return 0;
729  }
730  
731  static void ipmr_cache_free_rcu(struct rcu_head *head)
732  {
733  	struct mr_mfc *c = container_of(head, struct mr_mfc, rcu);
734  
735  	kmem_cache_free(mrt_cachep, (struct mfc_cache *)c);
736  }
737  
738  static void ipmr_cache_free(struct mfc_cache *c)
739  {
740  	call_rcu(&c->_c.rcu, ipmr_cache_free_rcu);
741  }
742  
743  /* Destroy an unresolved cache entry, killing queued skbs
744   * and reporting error to netlink readers.
745   */
746  static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
747  {
748  	struct net *net = read_pnet(&mrt->net);
749  	struct sk_buff *skb;
750  	struct nlmsgerr *e;
751  
752  	atomic_dec(&mrt->cache_resolve_queue_len);
753  
754  	while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved))) {
755  		if (ip_hdr(skb)->version == 0) {
756  			struct nlmsghdr *nlh = skb_pull(skb,
757  							sizeof(struct iphdr));
758  			nlh->nlmsg_type = NLMSG_ERROR;
759  			nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
760  			skb_trim(skb, nlh->nlmsg_len);
761  			e = nlmsg_data(nlh);
762  			e->error = -ETIMEDOUT;
763  			memset(&e->msg, 0, sizeof(e->msg));
764  
765  			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
766  		} else {
767  			kfree_skb(skb);
768  		}
769  	}
770  
771  	ipmr_cache_free(c);
772  }
773  
774  /* Timer process for the unresolved queue. */
775  static void ipmr_expire_process(struct timer_list *t)
776  {
777  	struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
778  	struct mr_mfc *c, *next;
779  	unsigned long expires;
780  	unsigned long now;
781  
782  	if (!spin_trylock(&mfc_unres_lock)) {
783  		mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10);
784  		return;
785  	}
786  
787  	if (list_empty(&mrt->mfc_unres_queue))
788  		goto out;
789  
790  	now = jiffies;
791  	expires = 10*HZ;
792  
793  	list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
794  		if (time_after(c->mfc_un.unres.expires, now)) {
795  			unsigned long interval = c->mfc_un.unres.expires - now;
796  			if (interval < expires)
797  				expires = interval;
798  			continue;
799  		}
800  
801  		list_del(&c->list);
802  		mroute_netlink_event(mrt, (struct mfc_cache *)c, RTM_DELROUTE);
803  		ipmr_destroy_unres(mrt, (struct mfc_cache *)c);
804  	}
805  
806  	if (!list_empty(&mrt->mfc_unres_queue))
807  		mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
808  
809  out:
810  	spin_unlock(&mfc_unres_lock);
811  }
812  
813  /* Fill oifs list. It is called under write locked mrt_lock. */
814  static void ipmr_update_thresholds(struct mr_table *mrt, struct mr_mfc *cache,
815  				   unsigned char *ttls)
816  {
817  	int vifi;
818  
819  	cache->mfc_un.res.minvif = MAXVIFS;
820  	cache->mfc_un.res.maxvif = 0;
821  	memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
822  
823  	for (vifi = 0; vifi < mrt->maxvif; vifi++) {
824  		if (VIF_EXISTS(mrt, vifi) &&
825  		    ttls[vifi] && ttls[vifi] < 255) {
826  			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
827  			if (cache->mfc_un.res.minvif > vifi)
828  				cache->mfc_un.res.minvif = vifi;
829  			if (cache->mfc_un.res.maxvif <= vifi)
830  				cache->mfc_un.res.maxvif = vifi + 1;
831  		}
832  	}
833  	cache->mfc_un.res.lastuse = jiffies;
834  }
835  
836  static int vif_add(struct net *net, struct mr_table *mrt,
837  		   struct vifctl *vifc, int mrtsock)
838  {
839  	struct netdev_phys_item_id ppid = { };
840  	int vifi = vifc->vifc_vifi;
841  	struct vif_device *v = &mrt->vif_table[vifi];
842  	struct net_device *dev;
843  	struct in_device *in_dev;
844  	int err;
845  
846  	/* Is vif busy ? */
847  	if (VIF_EXISTS(mrt, vifi))
848  		return -EADDRINUSE;
849  
850  	switch (vifc->vifc_flags) {
851  	case VIFF_REGISTER:
852  		if (!ipmr_pimsm_enabled())
853  			return -EINVAL;
854  		/* Special Purpose VIF in PIM
855  		 * All the packets will be sent to the daemon
856  		 */
857  		if (mrt->mroute_reg_vif_num >= 0)
858  			return -EADDRINUSE;
859  		dev = ipmr_reg_vif(net, mrt);
860  		if (!dev)
861  			return -ENOBUFS;
862  		err = dev_set_allmulti(dev, 1);
863  		if (err) {
864  			unregister_netdevice(dev);
865  			dev_put(dev);
866  			return err;
867  		}
868  		break;
869  	case VIFF_TUNNEL:
870  		dev = ipmr_new_tunnel(net, vifc);
871  		if (!dev)
872  			return -ENOBUFS;
873  		err = dev_set_allmulti(dev, 1);
874  		if (err) {
875  			ipmr_del_tunnel(dev, vifc);
876  			dev_put(dev);
877  			return err;
878  		}
879  		break;
880  	case VIFF_USE_IFINDEX:
881  	case 0:
882  		if (vifc->vifc_flags == VIFF_USE_IFINDEX) {
883  			dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex);
884  			if (dev && !__in_dev_get_rtnl(dev)) {
885  				dev_put(dev);
886  				return -EADDRNOTAVAIL;
887  			}
888  		} else {
889  			dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
890  		}
891  		if (!dev)
892  			return -EADDRNOTAVAIL;
893  		err = dev_set_allmulti(dev, 1);
894  		if (err) {
895  			dev_put(dev);
896  			return err;
897  		}
898  		break;
899  	default:
900  		return -EINVAL;
901  	}
902  
903  	in_dev = __in_dev_get_rtnl(dev);
904  	if (!in_dev) {
905  		dev_put(dev);
906  		return -EADDRNOTAVAIL;
907  	}
908  	IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
909  	inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_MC_FORWARDING,
910  				    dev->ifindex, &in_dev->cnf);
911  	ip_rt_multicast_event(in_dev);
912  
913  	/* Fill in the VIF structures */
914  	vif_device_init(v, dev, vifc->vifc_rate_limit,
915  			vifc->vifc_threshold,
916  			vifc->vifc_flags | (!mrtsock ? VIFF_STATIC : 0),
917  			(VIFF_TUNNEL | VIFF_REGISTER));
918  
919  	err = dev_get_port_parent_id(dev, &ppid, true);
920  	if (err == 0) {
921  		memcpy(v->dev_parent_id.id, ppid.id, ppid.id_len);
922  		v->dev_parent_id.id_len = ppid.id_len;
923  	} else {
924  		v->dev_parent_id.id_len = 0;
925  	}
926  
927  	v->local = vifc->vifc_lcl_addr.s_addr;
928  	v->remote = vifc->vifc_rmt_addr.s_addr;
929  
930  	/* And finish update writing critical data */
931  	write_lock_bh(&mrt_lock);
932  	v->dev = dev;
933  	if (v->flags & VIFF_REGISTER)
934  		mrt->mroute_reg_vif_num = vifi;
935  	if (vifi+1 > mrt->maxvif)
936  		mrt->maxvif = vifi+1;
937  	write_unlock_bh(&mrt_lock);
938  	call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD, v, vifi, mrt->id);
939  	return 0;
940  }
941  
942  /* called with rcu_read_lock() */
943  static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
944  					 __be32 origin,
945  					 __be32 mcastgrp)
946  {
947  	struct mfc_cache_cmp_arg arg = {
948  			.mfc_mcastgrp = mcastgrp,
949  			.mfc_origin = origin
950  	};
951  
952  	return mr_mfc_find(mrt, &arg);
953  }
954  
955  /* Look for a (*,G) entry */
956  static struct mfc_cache *ipmr_cache_find_any(struct mr_table *mrt,
957  					     __be32 mcastgrp, int vifi)
958  {
959  	struct mfc_cache_cmp_arg arg = {
960  			.mfc_mcastgrp = mcastgrp,
961  			.mfc_origin = htonl(INADDR_ANY)
962  	};
963  
964  	if (mcastgrp == htonl(INADDR_ANY))
965  		return mr_mfc_find_any_parent(mrt, vifi);
966  	return mr_mfc_find_any(mrt, vifi, &arg);
967  }
968  
969  /* Look for a (S,G,iif) entry if parent != -1 */
970  static struct mfc_cache *ipmr_cache_find_parent(struct mr_table *mrt,
971  						__be32 origin, __be32 mcastgrp,
972  						int parent)
973  {
974  	struct mfc_cache_cmp_arg arg = {
975  			.mfc_mcastgrp = mcastgrp,
976  			.mfc_origin = origin,
977  	};
978  
979  	return mr_mfc_find_parent(mrt, &arg, parent);
980  }
981  
982  /* Allocate a multicast cache entry */
983  static struct mfc_cache *ipmr_cache_alloc(void)
984  {
985  	struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
986  
987  	if (c) {
988  		c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
989  		c->_c.mfc_un.res.minvif = MAXVIFS;
990  		c->_c.free = ipmr_cache_free_rcu;
991  		refcount_set(&c->_c.mfc_un.res.refcount, 1);
992  	}
993  	return c;
994  }
995  
996  static struct mfc_cache *ipmr_cache_alloc_unres(void)
997  {
998  	struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
999  
1000  	if (c) {
1001  		skb_queue_head_init(&c->_c.mfc_un.unres.unresolved);
1002  		c->_c.mfc_un.unres.expires = jiffies + 10 * HZ;
1003  	}
1004  	return c;
1005  }
1006  
1007  /* A cache entry has gone into a resolved state from queued */
1008  static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
1009  			       struct mfc_cache *uc, struct mfc_cache *c)
1010  {
1011  	struct sk_buff *skb;
1012  	struct nlmsgerr *e;
1013  
1014  	/* Play the pending entries through our router */
1015  	while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) {
1016  		if (ip_hdr(skb)->version == 0) {
1017  			struct nlmsghdr *nlh = skb_pull(skb,
1018  							sizeof(struct iphdr));
1019  
1020  			if (mr_fill_mroute(mrt, skb, &c->_c,
1021  					   nlmsg_data(nlh)) > 0) {
1022  				nlh->nlmsg_len = skb_tail_pointer(skb) -
1023  						 (u8 *)nlh;
1024  			} else {
1025  				nlh->nlmsg_type = NLMSG_ERROR;
1026  				nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
1027  				skb_trim(skb, nlh->nlmsg_len);
1028  				e = nlmsg_data(nlh);
1029  				e->error = -EMSGSIZE;
1030  				memset(&e->msg, 0, sizeof(e->msg));
1031  			}
1032  
1033  			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
1034  		} else {
1035  			ip_mr_forward(net, mrt, skb->dev, skb, c, 0);
1036  		}
1037  	}
1038  }
1039  
1040  /* Bounce a cache query up to mrouted and netlink.
1041   *
1042   * Called under mrt_lock.
1043   */
1044  static int ipmr_cache_report(struct mr_table *mrt,
1045  			     struct sk_buff *pkt, vifi_t vifi, int assert)
1046  {
1047  	const int ihl = ip_hdrlen(pkt);
1048  	struct sock *mroute_sk;
1049  	struct igmphdr *igmp;
1050  	struct igmpmsg *msg;
1051  	struct sk_buff *skb;
1052  	int ret;
1053  
1054  	if (assert == IGMPMSG_WHOLEPKT || assert == IGMPMSG_WRVIFWHOLE)
1055  		skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
1056  	else
1057  		skb = alloc_skb(128, GFP_ATOMIC);
1058  
1059  	if (!skb)
1060  		return -ENOBUFS;
1061  
1062  	if (assert == IGMPMSG_WHOLEPKT || assert == IGMPMSG_WRVIFWHOLE) {
1063  		/* Ugly, but we have no choice with this interface.
1064  		 * Duplicate old header, fix ihl, length etc.
1065  		 * And all this only to mangle msg->im_msgtype and
1066  		 * to set msg->im_mbz to "mbz" :-)
1067  		 */
1068  		skb_push(skb, sizeof(struct iphdr));
1069  		skb_reset_network_header(skb);
1070  		skb_reset_transport_header(skb);
1071  		msg = (struct igmpmsg *)skb_network_header(skb);
1072  		memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
1073  		msg->im_msgtype = assert;
1074  		msg->im_mbz = 0;
1075  		if (assert == IGMPMSG_WRVIFWHOLE)
1076  			msg->im_vif = vifi;
1077  		else
1078  			msg->im_vif = mrt->mroute_reg_vif_num;
1079  		ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
1080  		ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
1081  					     sizeof(struct iphdr));
1082  	} else {
1083  		/* Copy the IP header */
1084  		skb_set_network_header(skb, skb->len);
1085  		skb_put(skb, ihl);
1086  		skb_copy_to_linear_data(skb, pkt->data, ihl);
1087  		/* Flag to the kernel this is a route add */
1088  		ip_hdr(skb)->protocol = 0;
1089  		msg = (struct igmpmsg *)skb_network_header(skb);
1090  		msg->im_vif = vifi;
1091  		skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1092  		/* Add our header */
1093  		igmp = skb_put(skb, sizeof(struct igmphdr));
1094  		igmp->type = assert;
1095  		msg->im_msgtype = assert;
1096  		igmp->code = 0;
1097  		ip_hdr(skb)->tot_len = htons(skb->len);	/* Fix the length */
1098  		skb->transport_header = skb->network_header;
1099  	}
1100  
1101  	rcu_read_lock();
1102  	mroute_sk = rcu_dereference(mrt->mroute_sk);
1103  	if (!mroute_sk) {
1104  		rcu_read_unlock();
1105  		kfree_skb(skb);
1106  		return -EINVAL;
1107  	}
1108  
1109  	igmpmsg_netlink_event(mrt, skb);
1110  
1111  	/* Deliver to mrouted */
1112  	ret = sock_queue_rcv_skb(mroute_sk, skb);
1113  	rcu_read_unlock();
1114  	if (ret < 0) {
1115  		net_warn_ratelimited("mroute: pending queue full, dropping entries\n");
1116  		kfree_skb(skb);
1117  	}
1118  
1119  	return ret;
1120  }
1121  
1122  /* Queue a packet for resolution. It gets locked cache entry! */
1123  static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
1124  				 struct sk_buff *skb, struct net_device *dev)
1125  {
1126  	const struct iphdr *iph = ip_hdr(skb);
1127  	struct mfc_cache *c;
1128  	bool found = false;
1129  	int err;
1130  
1131  	spin_lock_bh(&mfc_unres_lock);
1132  	list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) {
1133  		if (c->mfc_mcastgrp == iph->daddr &&
1134  		    c->mfc_origin == iph->saddr) {
1135  			found = true;
1136  			break;
1137  		}
1138  	}
1139  
1140  	if (!found) {
1141  		/* Create a new entry if allowable */
1142  		if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
1143  		    (c = ipmr_cache_alloc_unres()) == NULL) {
1144  			spin_unlock_bh(&mfc_unres_lock);
1145  
1146  			kfree_skb(skb);
1147  			return -ENOBUFS;
1148  		}
1149  
1150  		/* Fill in the new cache entry */
1151  		c->_c.mfc_parent = -1;
1152  		c->mfc_origin	= iph->saddr;
1153  		c->mfc_mcastgrp	= iph->daddr;
1154  
1155  		/* Reflect first query at mrouted. */
1156  		err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE);
1157  
1158  		if (err < 0) {
1159  			/* If the report failed throw the cache entry
1160  			   out - Brad Parker
1161  			 */
1162  			spin_unlock_bh(&mfc_unres_lock);
1163  
1164  			ipmr_cache_free(c);
1165  			kfree_skb(skb);
1166  			return err;
1167  		}
1168  
1169  		atomic_inc(&mrt->cache_resolve_queue_len);
1170  		list_add(&c->_c.list, &mrt->mfc_unres_queue);
1171  		mroute_netlink_event(mrt, c, RTM_NEWROUTE);
1172  
1173  		if (atomic_read(&mrt->cache_resolve_queue_len) == 1)
1174  			mod_timer(&mrt->ipmr_expire_timer,
1175  				  c->_c.mfc_un.unres.expires);
1176  	}
1177  
1178  	/* See if we can append the packet */
1179  	if (c->_c.mfc_un.unres.unresolved.qlen > 3) {
1180  		kfree_skb(skb);
1181  		err = -ENOBUFS;
1182  	} else {
1183  		if (dev) {
1184  			skb->dev = dev;
1185  			skb->skb_iif = dev->ifindex;
1186  		}
1187  		skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb);
1188  		err = 0;
1189  	}
1190  
1191  	spin_unlock_bh(&mfc_unres_lock);
1192  	return err;
1193  }
1194  
1195  /* MFC cache manipulation by user space mroute daemon */
1196  
1197  static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent)
1198  {
1199  	struct net *net = read_pnet(&mrt->net);
1200  	struct mfc_cache *c;
1201  
1202  	/* The entries are added/deleted only under RTNL */
1203  	rcu_read_lock();
1204  	c = ipmr_cache_find_parent(mrt, mfc->mfcc_origin.s_addr,
1205  				   mfc->mfcc_mcastgrp.s_addr, parent);
1206  	rcu_read_unlock();
1207  	if (!c)
1208  		return -ENOENT;
1209  	rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ipmr_rht_params);
1210  	list_del_rcu(&c->_c.list);
1211  	call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c, mrt->id);
1212  	mroute_netlink_event(mrt, c, RTM_DELROUTE);
1213  	mr_cache_put(&c->_c);
1214  
1215  	return 0;
1216  }
1217  
1218  static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
1219  			struct mfcctl *mfc, int mrtsock, int parent)
1220  {
1221  	struct mfc_cache *uc, *c;
1222  	struct mr_mfc *_uc;
1223  	bool found;
1224  	int ret;
1225  
1226  	if (mfc->mfcc_parent >= MAXVIFS)
1227  		return -ENFILE;
1228  
1229  	/* The entries are added/deleted only under RTNL */
1230  	rcu_read_lock();
1231  	c = ipmr_cache_find_parent(mrt, mfc->mfcc_origin.s_addr,
1232  				   mfc->mfcc_mcastgrp.s_addr, parent);
1233  	rcu_read_unlock();
1234  	if (c) {
1235  		write_lock_bh(&mrt_lock);
1236  		c->_c.mfc_parent = mfc->mfcc_parent;
1237  		ipmr_update_thresholds(mrt, &c->_c, mfc->mfcc_ttls);
1238  		if (!mrtsock)
1239  			c->_c.mfc_flags |= MFC_STATIC;
1240  		write_unlock_bh(&mrt_lock);
1241  		call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, c,
1242  					      mrt->id);
1243  		mroute_netlink_event(mrt, c, RTM_NEWROUTE);
1244  		return 0;
1245  	}
1246  
1247  	if (mfc->mfcc_mcastgrp.s_addr != htonl(INADDR_ANY) &&
1248  	    !ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
1249  		return -EINVAL;
1250  
1251  	c = ipmr_cache_alloc();
1252  	if (!c)
1253  		return -ENOMEM;
1254  
1255  	c->mfc_origin = mfc->mfcc_origin.s_addr;
1256  	c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
1257  	c->_c.mfc_parent = mfc->mfcc_parent;
1258  	ipmr_update_thresholds(mrt, &c->_c, mfc->mfcc_ttls);
1259  	if (!mrtsock)
1260  		c->_c.mfc_flags |= MFC_STATIC;
1261  
1262  	ret = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode,
1263  				  ipmr_rht_params);
1264  	if (ret) {
1265  		pr_err("ipmr: rhtable insert error %d\n", ret);
1266  		ipmr_cache_free(c);
1267  		return ret;
1268  	}
1269  	list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list);
1270  	/* Check to see if we resolved a queued list. If so we
1271  	 * need to send on the frames and tidy up.
1272  	 */
1273  	found = false;
1274  	spin_lock_bh(&mfc_unres_lock);
1275  	list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) {
1276  		uc = (struct mfc_cache *)_uc;
1277  		if (uc->mfc_origin == c->mfc_origin &&
1278  		    uc->mfc_mcastgrp == c->mfc_mcastgrp) {
1279  			list_del(&_uc->list);
1280  			atomic_dec(&mrt->cache_resolve_queue_len);
1281  			found = true;
1282  			break;
1283  		}
1284  	}
1285  	if (list_empty(&mrt->mfc_unres_queue))
1286  		del_timer(&mrt->ipmr_expire_timer);
1287  	spin_unlock_bh(&mfc_unres_lock);
1288  
1289  	if (found) {
1290  		ipmr_cache_resolve(net, mrt, uc, c);
1291  		ipmr_cache_free(uc);
1292  	}
1293  	call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD, c, mrt->id);
1294  	mroute_netlink_event(mrt, c, RTM_NEWROUTE);
1295  	return 0;
1296  }
1297  
1298  /* Close the multicast socket, and clear the vif tables etc */
1299  static void mroute_clean_tables(struct mr_table *mrt, int flags)
1300  {
1301  	struct net *net = read_pnet(&mrt->net);
1302  	struct mr_mfc *c, *tmp;
1303  	struct mfc_cache *cache;
1304  	LIST_HEAD(list);
1305  	int i;
1306  
1307  	/* Shut down all active vif entries */
1308  	if (flags & (MRT_FLUSH_VIFS | MRT_FLUSH_VIFS_STATIC)) {
1309  		for (i = 0; i < mrt->maxvif; i++) {
1310  			if (((mrt->vif_table[i].flags & VIFF_STATIC) &&
1311  			     !(flags & MRT_FLUSH_VIFS_STATIC)) ||
1312  			    (!(mrt->vif_table[i].flags & VIFF_STATIC) && !(flags & MRT_FLUSH_VIFS)))
1313  				continue;
1314  			vif_delete(mrt, i, 0, &list);
1315  		}
1316  		unregister_netdevice_many(&list);
1317  	}
1318  
1319  	/* Wipe the cache */
1320  	if (flags & (MRT_FLUSH_MFC | MRT_FLUSH_MFC_STATIC)) {
1321  		list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
1322  			if (((c->mfc_flags & MFC_STATIC) && !(flags & MRT_FLUSH_MFC_STATIC)) ||
1323  			    (!(c->mfc_flags & MFC_STATIC) && !(flags & MRT_FLUSH_MFC)))
1324  				continue;
1325  			rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params);
1326  			list_del_rcu(&c->list);
1327  			cache = (struct mfc_cache *)c;
1328  			call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, cache,
1329  						      mrt->id);
1330  			mroute_netlink_event(mrt, cache, RTM_DELROUTE);
1331  			mr_cache_put(c);
1332  		}
1333  	}
1334  
1335  	if (flags & MRT_FLUSH_MFC) {
1336  		if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1337  			spin_lock_bh(&mfc_unres_lock);
1338  			list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
1339  				list_del(&c->list);
1340  				cache = (struct mfc_cache *)c;
1341  				mroute_netlink_event(mrt, cache, RTM_DELROUTE);
1342  				ipmr_destroy_unres(mrt, cache);
1343  			}
1344  			spin_unlock_bh(&mfc_unres_lock);
1345  		}
1346  	}
1347  }
1348  
1349  /* called from ip_ra_control(), before an RCU grace period,
1350   * we dont need to call synchronize_rcu() here
1351   */
1352  static void mrtsock_destruct(struct sock *sk)
1353  {
1354  	struct net *net = sock_net(sk);
1355  	struct mr_table *mrt;
1356  
1357  	rtnl_lock();
1358  	ipmr_for_each_table(mrt, net) {
1359  		if (sk == rtnl_dereference(mrt->mroute_sk)) {
1360  			IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
1361  			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
1362  						    NETCONFA_MC_FORWARDING,
1363  						    NETCONFA_IFINDEX_ALL,
1364  						    net->ipv4.devconf_all);
1365  			RCU_INIT_POINTER(mrt->mroute_sk, NULL);
1366  			mroute_clean_tables(mrt, MRT_FLUSH_VIFS | MRT_FLUSH_MFC);
1367  		}
1368  	}
1369  	rtnl_unlock();
1370  }
1371  
1372  /* Socket options and virtual interface manipulation. The whole
1373   * virtual interface system is a complete heap, but unfortunately
1374   * that's how BSD mrouted happens to think. Maybe one day with a proper
1375   * MOSPF/PIM router set up we can clean this up.
1376   */
1377  
1378  int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval,
1379  			 unsigned int optlen)
1380  {
1381  	struct net *net = sock_net(sk);
1382  	int val, ret = 0, parent = 0;
1383  	struct mr_table *mrt;
1384  	struct vifctl vif;
1385  	struct mfcctl mfc;
1386  	bool do_wrvifwhole;
1387  	u32 uval;
1388  
1389  	/* There's one exception to the lock - MRT_DONE which needs to unlock */
1390  	rtnl_lock();
1391  	if (sk->sk_type != SOCK_RAW ||
1392  	    inet_sk(sk)->inet_num != IPPROTO_IGMP) {
1393  		ret = -EOPNOTSUPP;
1394  		goto out_unlock;
1395  	}
1396  
1397  	mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1398  	if (!mrt) {
1399  		ret = -ENOENT;
1400  		goto out_unlock;
1401  	}
1402  	if (optname != MRT_INIT) {
1403  		if (sk != rcu_access_pointer(mrt->mroute_sk) &&
1404  		    !ns_capable(net->user_ns, CAP_NET_ADMIN)) {
1405  			ret = -EACCES;
1406  			goto out_unlock;
1407  		}
1408  	}
1409  
1410  	switch (optname) {
1411  	case MRT_INIT:
1412  		if (optlen != sizeof(int)) {
1413  			ret = -EINVAL;
1414  			break;
1415  		}
1416  		if (rtnl_dereference(mrt->mroute_sk)) {
1417  			ret = -EADDRINUSE;
1418  			break;
1419  		}
1420  
1421  		ret = ip_ra_control(sk, 1, mrtsock_destruct);
1422  		if (ret == 0) {
1423  			rcu_assign_pointer(mrt->mroute_sk, sk);
1424  			IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
1425  			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
1426  						    NETCONFA_MC_FORWARDING,
1427  						    NETCONFA_IFINDEX_ALL,
1428  						    net->ipv4.devconf_all);
1429  		}
1430  		break;
1431  	case MRT_DONE:
1432  		if (sk != rcu_access_pointer(mrt->mroute_sk)) {
1433  			ret = -EACCES;
1434  		} else {
1435  			/* We need to unlock here because mrtsock_destruct takes
1436  			 * care of rtnl itself and we can't change that due to
1437  			 * the IP_ROUTER_ALERT setsockopt which runs without it.
1438  			 */
1439  			rtnl_unlock();
1440  			ret = ip_ra_control(sk, 0, NULL);
1441  			goto out;
1442  		}
1443  		break;
1444  	case MRT_ADD_VIF:
1445  	case MRT_DEL_VIF:
1446  		if (optlen != sizeof(vif)) {
1447  			ret = -EINVAL;
1448  			break;
1449  		}
1450  		if (copy_from_user(&vif, optval, sizeof(vif))) {
1451  			ret = -EFAULT;
1452  			break;
1453  		}
1454  		if (vif.vifc_vifi >= MAXVIFS) {
1455  			ret = -ENFILE;
1456  			break;
1457  		}
1458  		if (optname == MRT_ADD_VIF) {
1459  			ret = vif_add(net, mrt, &vif,
1460  				      sk == rtnl_dereference(mrt->mroute_sk));
1461  		} else {
1462  			ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL);
1463  		}
1464  		break;
1465  	/* Manipulate the forwarding caches. These live
1466  	 * in a sort of kernel/user symbiosis.
1467  	 */
1468  	case MRT_ADD_MFC:
1469  	case MRT_DEL_MFC:
1470  		parent = -1;
1471  		/* fall through */
1472  	case MRT_ADD_MFC_PROXY:
1473  	case MRT_DEL_MFC_PROXY:
1474  		if (optlen != sizeof(mfc)) {
1475  			ret = -EINVAL;
1476  			break;
1477  		}
1478  		if (copy_from_user(&mfc, optval, sizeof(mfc))) {
1479  			ret = -EFAULT;
1480  			break;
1481  		}
1482  		if (parent == 0)
1483  			parent = mfc.mfcc_parent;
1484  		if (optname == MRT_DEL_MFC || optname == MRT_DEL_MFC_PROXY)
1485  			ret = ipmr_mfc_delete(mrt, &mfc, parent);
1486  		else
1487  			ret = ipmr_mfc_add(net, mrt, &mfc,
1488  					   sk == rtnl_dereference(mrt->mroute_sk),
1489  					   parent);
1490  		break;
1491  	case MRT_FLUSH:
1492  		if (optlen != sizeof(val)) {
1493  			ret = -EINVAL;
1494  			break;
1495  		}
1496  		if (get_user(val, (int __user *)optval)) {
1497  			ret = -EFAULT;
1498  			break;
1499  		}
1500  		mroute_clean_tables(mrt, val);
1501  		break;
1502  	/* Control PIM assert. */
1503  	case MRT_ASSERT:
1504  		if (optlen != sizeof(val)) {
1505  			ret = -EINVAL;
1506  			break;
1507  		}
1508  		if (get_user(val, (int __user *)optval)) {
1509  			ret = -EFAULT;
1510  			break;
1511  		}
1512  		mrt->mroute_do_assert = val;
1513  		break;
1514  	case MRT_PIM:
1515  		if (!ipmr_pimsm_enabled()) {
1516  			ret = -ENOPROTOOPT;
1517  			break;
1518  		}
1519  		if (optlen != sizeof(val)) {
1520  			ret = -EINVAL;
1521  			break;
1522  		}
1523  		if (get_user(val, (int __user *)optval)) {
1524  			ret = -EFAULT;
1525  			break;
1526  		}
1527  
1528  		do_wrvifwhole = (val == IGMPMSG_WRVIFWHOLE);
1529  		val = !!val;
1530  		if (val != mrt->mroute_do_pim) {
1531  			mrt->mroute_do_pim = val;
1532  			mrt->mroute_do_assert = val;
1533  			mrt->mroute_do_wrvifwhole = do_wrvifwhole;
1534  		}
1535  		break;
1536  	case MRT_TABLE:
1537  		if (!IS_BUILTIN(CONFIG_IP_MROUTE_MULTIPLE_TABLES)) {
1538  			ret = -ENOPROTOOPT;
1539  			break;
1540  		}
1541  		if (optlen != sizeof(uval)) {
1542  			ret = -EINVAL;
1543  			break;
1544  		}
1545  		if (get_user(uval, (u32 __user *)optval)) {
1546  			ret = -EFAULT;
1547  			break;
1548  		}
1549  
1550  		if (sk == rtnl_dereference(mrt->mroute_sk)) {
1551  			ret = -EBUSY;
1552  		} else {
1553  			mrt = ipmr_new_table(net, uval);
1554  			if (IS_ERR(mrt))
1555  				ret = PTR_ERR(mrt);
1556  			else
1557  				raw_sk(sk)->ipmr_table = uval;
1558  		}
1559  		break;
1560  	/* Spurious command, or MRT_VERSION which you cannot set. */
1561  	default:
1562  		ret = -ENOPROTOOPT;
1563  	}
1564  out_unlock:
1565  	rtnl_unlock();
1566  out:
1567  	return ret;
1568  }
1569  
1570  /* Getsock opt support for the multicast routing system. */
1571  int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
1572  {
1573  	int olr;
1574  	int val;
1575  	struct net *net = sock_net(sk);
1576  	struct mr_table *mrt;
1577  
1578  	if (sk->sk_type != SOCK_RAW ||
1579  	    inet_sk(sk)->inet_num != IPPROTO_IGMP)
1580  		return -EOPNOTSUPP;
1581  
1582  	mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1583  	if (!mrt)
1584  		return -ENOENT;
1585  
1586  	switch (optname) {
1587  	case MRT_VERSION:
1588  		val = 0x0305;
1589  		break;
1590  	case MRT_PIM:
1591  		if (!ipmr_pimsm_enabled())
1592  			return -ENOPROTOOPT;
1593  		val = mrt->mroute_do_pim;
1594  		break;
1595  	case MRT_ASSERT:
1596  		val = mrt->mroute_do_assert;
1597  		break;
1598  	default:
1599  		return -ENOPROTOOPT;
1600  	}
1601  
1602  	if (get_user(olr, optlen))
1603  		return -EFAULT;
1604  	olr = min_t(unsigned int, olr, sizeof(int));
1605  	if (olr < 0)
1606  		return -EINVAL;
1607  	if (put_user(olr, optlen))
1608  		return -EFAULT;
1609  	if (copy_to_user(optval, &val, olr))
1610  		return -EFAULT;
1611  	return 0;
1612  }
1613  
1614  /* The IP multicast ioctl support routines. */
1615  int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1616  {
1617  	struct sioc_sg_req sr;
1618  	struct sioc_vif_req vr;
1619  	struct vif_device *vif;
1620  	struct mfc_cache *c;
1621  	struct net *net = sock_net(sk);
1622  	struct mr_table *mrt;
1623  
1624  	mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1625  	if (!mrt)
1626  		return -ENOENT;
1627  
1628  	switch (cmd) {
1629  	case SIOCGETVIFCNT:
1630  		if (copy_from_user(&vr, arg, sizeof(vr)))
1631  			return -EFAULT;
1632  		if (vr.vifi >= mrt->maxvif)
1633  			return -EINVAL;
1634  		vr.vifi = array_index_nospec(vr.vifi, mrt->maxvif);
1635  		read_lock(&mrt_lock);
1636  		vif = &mrt->vif_table[vr.vifi];
1637  		if (VIF_EXISTS(mrt, vr.vifi)) {
1638  			vr.icount = vif->pkt_in;
1639  			vr.ocount = vif->pkt_out;
1640  			vr.ibytes = vif->bytes_in;
1641  			vr.obytes = vif->bytes_out;
1642  			read_unlock(&mrt_lock);
1643  
1644  			if (copy_to_user(arg, &vr, sizeof(vr)))
1645  				return -EFAULT;
1646  			return 0;
1647  		}
1648  		read_unlock(&mrt_lock);
1649  		return -EADDRNOTAVAIL;
1650  	case SIOCGETSGCNT:
1651  		if (copy_from_user(&sr, arg, sizeof(sr)))
1652  			return -EFAULT;
1653  
1654  		rcu_read_lock();
1655  		c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
1656  		if (c) {
1657  			sr.pktcnt = c->_c.mfc_un.res.pkt;
1658  			sr.bytecnt = c->_c.mfc_un.res.bytes;
1659  			sr.wrong_if = c->_c.mfc_un.res.wrong_if;
1660  			rcu_read_unlock();
1661  
1662  			if (copy_to_user(arg, &sr, sizeof(sr)))
1663  				return -EFAULT;
1664  			return 0;
1665  		}
1666  		rcu_read_unlock();
1667  		return -EADDRNOTAVAIL;
1668  	default:
1669  		return -ENOIOCTLCMD;
1670  	}
1671  }
1672  
1673  #ifdef CONFIG_COMPAT
1674  struct compat_sioc_sg_req {
1675  	struct in_addr src;
1676  	struct in_addr grp;
1677  	compat_ulong_t pktcnt;
1678  	compat_ulong_t bytecnt;
1679  	compat_ulong_t wrong_if;
1680  };
1681  
1682  struct compat_sioc_vif_req {
1683  	vifi_t	vifi;		/* Which iface */
1684  	compat_ulong_t icount;
1685  	compat_ulong_t ocount;
1686  	compat_ulong_t ibytes;
1687  	compat_ulong_t obytes;
1688  };
1689  
1690  int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1691  {
1692  	struct compat_sioc_sg_req sr;
1693  	struct compat_sioc_vif_req vr;
1694  	struct vif_device *vif;
1695  	struct mfc_cache *c;
1696  	struct net *net = sock_net(sk);
1697  	struct mr_table *mrt;
1698  
1699  	mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1700  	if (!mrt)
1701  		return -ENOENT;
1702  
1703  	switch (cmd) {
1704  	case SIOCGETVIFCNT:
1705  		if (copy_from_user(&vr, arg, sizeof(vr)))
1706  			return -EFAULT;
1707  		if (vr.vifi >= mrt->maxvif)
1708  			return -EINVAL;
1709  		vr.vifi = array_index_nospec(vr.vifi, mrt->maxvif);
1710  		read_lock(&mrt_lock);
1711  		vif = &mrt->vif_table[vr.vifi];
1712  		if (VIF_EXISTS(mrt, vr.vifi)) {
1713  			vr.icount = vif->pkt_in;
1714  			vr.ocount = vif->pkt_out;
1715  			vr.ibytes = vif->bytes_in;
1716  			vr.obytes = vif->bytes_out;
1717  			read_unlock(&mrt_lock);
1718  
1719  			if (copy_to_user(arg, &vr, sizeof(vr)))
1720  				return -EFAULT;
1721  			return 0;
1722  		}
1723  		read_unlock(&mrt_lock);
1724  		return -EADDRNOTAVAIL;
1725  	case SIOCGETSGCNT:
1726  		if (copy_from_user(&sr, arg, sizeof(sr)))
1727  			return -EFAULT;
1728  
1729  		rcu_read_lock();
1730  		c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
1731  		if (c) {
1732  			sr.pktcnt = c->_c.mfc_un.res.pkt;
1733  			sr.bytecnt = c->_c.mfc_un.res.bytes;
1734  			sr.wrong_if = c->_c.mfc_un.res.wrong_if;
1735  			rcu_read_unlock();
1736  
1737  			if (copy_to_user(arg, &sr, sizeof(sr)))
1738  				return -EFAULT;
1739  			return 0;
1740  		}
1741  		rcu_read_unlock();
1742  		return -EADDRNOTAVAIL;
1743  	default:
1744  		return -ENOIOCTLCMD;
1745  	}
1746  }
1747  #endif
1748  
1749  static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1750  {
1751  	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1752  	struct net *net = dev_net(dev);
1753  	struct mr_table *mrt;
1754  	struct vif_device *v;
1755  	int ct;
1756  
1757  	if (event != NETDEV_UNREGISTER)
1758  		return NOTIFY_DONE;
1759  
1760  	ipmr_for_each_table(mrt, net) {
1761  		v = &mrt->vif_table[0];
1762  		for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1763  			if (v->dev == dev)
1764  				vif_delete(mrt, ct, 1, NULL);
1765  		}
1766  	}
1767  	return NOTIFY_DONE;
1768  }
1769  
1770  static struct notifier_block ip_mr_notifier = {
1771  	.notifier_call = ipmr_device_event,
1772  };
1773  
1774  /* Encapsulate a packet by attaching a valid IPIP header to it.
1775   * This avoids tunnel drivers and other mess and gives us the speed so
1776   * important for multicast video.
1777   */
1778  static void ip_encap(struct net *net, struct sk_buff *skb,
1779  		     __be32 saddr, __be32 daddr)
1780  {
1781  	struct iphdr *iph;
1782  	const struct iphdr *old_iph = ip_hdr(skb);
1783  
1784  	skb_push(skb, sizeof(struct iphdr));
1785  	skb->transport_header = skb->network_header;
1786  	skb_reset_network_header(skb);
1787  	iph = ip_hdr(skb);
1788  
1789  	iph->version	=	4;
1790  	iph->tos	=	old_iph->tos;
1791  	iph->ttl	=	old_iph->ttl;
1792  	iph->frag_off	=	0;
1793  	iph->daddr	=	daddr;
1794  	iph->saddr	=	saddr;
1795  	iph->protocol	=	IPPROTO_IPIP;
1796  	iph->ihl	=	5;
1797  	iph->tot_len	=	htons(skb->len);
1798  	ip_select_ident(net, skb, NULL);
1799  	ip_send_check(iph);
1800  
1801  	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1802  	nf_reset(skb);
1803  }
1804  
1805  static inline int ipmr_forward_finish(struct net *net, struct sock *sk,
1806  				      struct sk_buff *skb)
1807  {
1808  	struct ip_options *opt = &(IPCB(skb)->opt);
1809  
1810  	IP_INC_STATS(net, IPSTATS_MIB_OUTFORWDATAGRAMS);
1811  	IP_ADD_STATS(net, IPSTATS_MIB_OUTOCTETS, skb->len);
1812  
1813  	if (unlikely(opt->optlen))
1814  		ip_forward_options(skb);
1815  
1816  	return dst_output(net, sk, skb);
1817  }
1818  
1819  #ifdef CONFIG_NET_SWITCHDEV
1820  static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt,
1821  				   int in_vifi, int out_vifi)
1822  {
1823  	struct vif_device *out_vif = &mrt->vif_table[out_vifi];
1824  	struct vif_device *in_vif = &mrt->vif_table[in_vifi];
1825  
1826  	if (!skb->offload_l3_fwd_mark)
1827  		return false;
1828  	if (!out_vif->dev_parent_id.id_len || !in_vif->dev_parent_id.id_len)
1829  		return false;
1830  	return netdev_phys_item_id_same(&out_vif->dev_parent_id,
1831  					&in_vif->dev_parent_id);
1832  }
1833  #else
1834  static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt,
1835  				   int in_vifi, int out_vifi)
1836  {
1837  	return false;
1838  }
1839  #endif
1840  
1841  /* Processing handlers for ipmr_forward */
1842  
1843  static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
1844  			    int in_vifi, struct sk_buff *skb, int vifi)
1845  {
1846  	const struct iphdr *iph = ip_hdr(skb);
1847  	struct vif_device *vif = &mrt->vif_table[vifi];
1848  	struct net_device *dev;
1849  	struct rtable *rt;
1850  	struct flowi4 fl4;
1851  	int    encap = 0;
1852  
1853  	if (!vif->dev)
1854  		goto out_free;
1855  
1856  	if (vif->flags & VIFF_REGISTER) {
1857  		vif->pkt_out++;
1858  		vif->bytes_out += skb->len;
1859  		vif->dev->stats.tx_bytes += skb->len;
1860  		vif->dev->stats.tx_packets++;
1861  		ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT);
1862  		goto out_free;
1863  	}
1864  
1865  	if (ipmr_forward_offloaded(skb, mrt, in_vifi, vifi))
1866  		goto out_free;
1867  
1868  	if (vif->flags & VIFF_TUNNEL) {
1869  		rt = ip_route_output_ports(net, &fl4, NULL,
1870  					   vif->remote, vif->local,
1871  					   0, 0,
1872  					   IPPROTO_IPIP,
1873  					   RT_TOS(iph->tos), vif->link);
1874  		if (IS_ERR(rt))
1875  			goto out_free;
1876  		encap = sizeof(struct iphdr);
1877  	} else {
1878  		rt = ip_route_output_ports(net, &fl4, NULL, iph->daddr, 0,
1879  					   0, 0,
1880  					   IPPROTO_IPIP,
1881  					   RT_TOS(iph->tos), vif->link);
1882  		if (IS_ERR(rt))
1883  			goto out_free;
1884  	}
1885  
1886  	dev = rt->dst.dev;
1887  
1888  	if (skb->len+encap > dst_mtu(&rt->dst) && (ntohs(iph->frag_off) & IP_DF)) {
1889  		/* Do not fragment multicasts. Alas, IPv4 does not
1890  		 * allow to send ICMP, so that packets will disappear
1891  		 * to blackhole.
1892  		 */
1893  		IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS);
1894  		ip_rt_put(rt);
1895  		goto out_free;
1896  	}
1897  
1898  	encap += LL_RESERVED_SPACE(dev) + rt->dst.header_len;
1899  
1900  	if (skb_cow(skb, encap)) {
1901  		ip_rt_put(rt);
1902  		goto out_free;
1903  	}
1904  
1905  	vif->pkt_out++;
1906  	vif->bytes_out += skb->len;
1907  
1908  	skb_dst_drop(skb);
1909  	skb_dst_set(skb, &rt->dst);
1910  	ip_decrease_ttl(ip_hdr(skb));
1911  
1912  	/* FIXME: forward and output firewalls used to be called here.
1913  	 * What do we do with netfilter? -- RR
1914  	 */
1915  	if (vif->flags & VIFF_TUNNEL) {
1916  		ip_encap(net, skb, vif->local, vif->remote);
1917  		/* FIXME: extra output firewall step used to be here. --RR */
1918  		vif->dev->stats.tx_packets++;
1919  		vif->dev->stats.tx_bytes += skb->len;
1920  	}
1921  
1922  	IPCB(skb)->flags |= IPSKB_FORWARDED;
1923  
1924  	/* RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1925  	 * not only before forwarding, but after forwarding on all output
1926  	 * interfaces. It is clear, if mrouter runs a multicasting
1927  	 * program, it should receive packets not depending to what interface
1928  	 * program is joined.
1929  	 * If we will not make it, the program will have to join on all
1930  	 * interfaces. On the other hand, multihoming host (or router, but
1931  	 * not mrouter) cannot join to more than one interface - it will
1932  	 * result in receiving multiple packets.
1933  	 */
1934  	NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD,
1935  		net, NULL, skb, skb->dev, dev,
1936  		ipmr_forward_finish);
1937  	return;
1938  
1939  out_free:
1940  	kfree_skb(skb);
1941  }
1942  
1943  static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev)
1944  {
1945  	int ct;
1946  
1947  	for (ct = mrt->maxvif-1; ct >= 0; ct--) {
1948  		if (mrt->vif_table[ct].dev == dev)
1949  			break;
1950  	}
1951  	return ct;
1952  }
1953  
1954  /* "local" means that we should preserve one skb (for local delivery) */
1955  static void ip_mr_forward(struct net *net, struct mr_table *mrt,
1956  			  struct net_device *dev, struct sk_buff *skb,
1957  			  struct mfc_cache *c, int local)
1958  {
1959  	int true_vifi = ipmr_find_vif(mrt, dev);
1960  	int psend = -1;
1961  	int vif, ct;
1962  
1963  	vif = c->_c.mfc_parent;
1964  	c->_c.mfc_un.res.pkt++;
1965  	c->_c.mfc_un.res.bytes += skb->len;
1966  	c->_c.mfc_un.res.lastuse = jiffies;
1967  
1968  	if (c->mfc_origin == htonl(INADDR_ANY) && true_vifi >= 0) {
1969  		struct mfc_cache *cache_proxy;
1970  
1971  		/* For an (*,G) entry, we only check that the incomming
1972  		 * interface is part of the static tree.
1973  		 */
1974  		cache_proxy = mr_mfc_find_any_parent(mrt, vif);
1975  		if (cache_proxy &&
1976  		    cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255)
1977  			goto forward;
1978  	}
1979  
1980  	/* Wrong interface: drop packet and (maybe) send PIM assert. */
1981  	if (mrt->vif_table[vif].dev != dev) {
1982  		if (rt_is_output_route(skb_rtable(skb))) {
1983  			/* It is our own packet, looped back.
1984  			 * Very complicated situation...
1985  			 *
1986  			 * The best workaround until routing daemons will be
1987  			 * fixed is not to redistribute packet, if it was
1988  			 * send through wrong interface. It means, that
1989  			 * multicast applications WILL NOT work for
1990  			 * (S,G), which have default multicast route pointing
1991  			 * to wrong oif. In any case, it is not a good
1992  			 * idea to use multicasting applications on router.
1993  			 */
1994  			goto dont_forward;
1995  		}
1996  
1997  		c->_c.mfc_un.res.wrong_if++;
1998  
1999  		if (true_vifi >= 0 && mrt->mroute_do_assert &&
2000  		    /* pimsm uses asserts, when switching from RPT to SPT,
2001  		     * so that we cannot check that packet arrived on an oif.
2002  		     * It is bad, but otherwise we would need to move pretty
2003  		     * large chunk of pimd to kernel. Ough... --ANK
2004  		     */
2005  		    (mrt->mroute_do_pim ||
2006  		     c->_c.mfc_un.res.ttls[true_vifi] < 255) &&
2007  		    time_after(jiffies,
2008  			       c->_c.mfc_un.res.last_assert +
2009  			       MFC_ASSERT_THRESH)) {
2010  			c->_c.mfc_un.res.last_assert = jiffies;
2011  			ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF);
2012  			if (mrt->mroute_do_wrvifwhole)
2013  				ipmr_cache_report(mrt, skb, true_vifi,
2014  						  IGMPMSG_WRVIFWHOLE);
2015  		}
2016  		goto dont_forward;
2017  	}
2018  
2019  forward:
2020  	mrt->vif_table[vif].pkt_in++;
2021  	mrt->vif_table[vif].bytes_in += skb->len;
2022  
2023  	/* Forward the frame */
2024  	if (c->mfc_origin == htonl(INADDR_ANY) &&
2025  	    c->mfc_mcastgrp == htonl(INADDR_ANY)) {
2026  		if (true_vifi >= 0 &&
2027  		    true_vifi != c->_c.mfc_parent &&
2028  		    ip_hdr(skb)->ttl >
2029  				c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
2030  			/* It's an (*,*) entry and the packet is not coming from
2031  			 * the upstream: forward the packet to the upstream
2032  			 * only.
2033  			 */
2034  			psend = c->_c.mfc_parent;
2035  			goto last_forward;
2036  		}
2037  		goto dont_forward;
2038  	}
2039  	for (ct = c->_c.mfc_un.res.maxvif - 1;
2040  	     ct >= c->_c.mfc_un.res.minvif; ct--) {
2041  		/* For (*,G) entry, don't forward to the incoming interface */
2042  		if ((c->mfc_origin != htonl(INADDR_ANY) ||
2043  		     ct != true_vifi) &&
2044  		    ip_hdr(skb)->ttl > c->_c.mfc_un.res.ttls[ct]) {
2045  			if (psend != -1) {
2046  				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2047  
2048  				if (skb2)
2049  					ipmr_queue_xmit(net, mrt, true_vifi,
2050  							skb2, psend);
2051  			}
2052  			psend = ct;
2053  		}
2054  	}
2055  last_forward:
2056  	if (psend != -1) {
2057  		if (local) {
2058  			struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2059  
2060  			if (skb2)
2061  				ipmr_queue_xmit(net, mrt, true_vifi, skb2,
2062  						psend);
2063  		} else {
2064  			ipmr_queue_xmit(net, mrt, true_vifi, skb, psend);
2065  			return;
2066  		}
2067  	}
2068  
2069  dont_forward:
2070  	if (!local)
2071  		kfree_skb(skb);
2072  }
2073  
2074  static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct sk_buff *skb)
2075  {
2076  	struct rtable *rt = skb_rtable(skb);
2077  	struct iphdr *iph = ip_hdr(skb);
2078  	struct flowi4 fl4 = {
2079  		.daddr = iph->daddr,
2080  		.saddr = iph->saddr,
2081  		.flowi4_tos = RT_TOS(iph->tos),
2082  		.flowi4_oif = (rt_is_output_route(rt) ?
2083  			       skb->dev->ifindex : 0),
2084  		.flowi4_iif = (rt_is_output_route(rt) ?
2085  			       LOOPBACK_IFINDEX :
2086  			       skb->dev->ifindex),
2087  		.flowi4_mark = skb->mark,
2088  	};
2089  	struct mr_table *mrt;
2090  	int err;
2091  
2092  	err = ipmr_fib_lookup(net, &fl4, &mrt);
2093  	if (err)
2094  		return ERR_PTR(err);
2095  	return mrt;
2096  }
2097  
2098  /* Multicast packets for forwarding arrive here
2099   * Called with rcu_read_lock();
2100   */
2101  int ip_mr_input(struct sk_buff *skb)
2102  {
2103  	struct mfc_cache *cache;
2104  	struct net *net = dev_net(skb->dev);
2105  	int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
2106  	struct mr_table *mrt;
2107  	struct net_device *dev;
2108  
2109  	/* skb->dev passed in is the loX master dev for vrfs.
2110  	 * As there are no vifs associated with loopback devices,
2111  	 * get the proper interface that does have a vif associated with it.
2112  	 */
2113  	dev = skb->dev;
2114  	if (netif_is_l3_master(skb->dev)) {
2115  		dev = dev_get_by_index_rcu(net, IPCB(skb)->iif);
2116  		if (!dev) {
2117  			kfree_skb(skb);
2118  			return -ENODEV;
2119  		}
2120  	}
2121  
2122  	/* Packet is looped back after forward, it should not be
2123  	 * forwarded second time, but still can be delivered locally.
2124  	 */
2125  	if (IPCB(skb)->flags & IPSKB_FORWARDED)
2126  		goto dont_forward;
2127  
2128  	mrt = ipmr_rt_fib_lookup(net, skb);
2129  	if (IS_ERR(mrt)) {
2130  		kfree_skb(skb);
2131  		return PTR_ERR(mrt);
2132  	}
2133  	if (!local) {
2134  		if (IPCB(skb)->opt.router_alert) {
2135  			if (ip_call_ra_chain(skb))
2136  				return 0;
2137  		} else if (ip_hdr(skb)->protocol == IPPROTO_IGMP) {
2138  			/* IGMPv1 (and broken IGMPv2 implementations sort of
2139  			 * Cisco IOS <= 11.2(8)) do not put router alert
2140  			 * option to IGMP packets destined to routable
2141  			 * groups. It is very bad, because it means
2142  			 * that we can forward NO IGMP messages.
2143  			 */
2144  			struct sock *mroute_sk;
2145  
2146  			mroute_sk = rcu_dereference(mrt->mroute_sk);
2147  			if (mroute_sk) {
2148  				nf_reset(skb);
2149  				raw_rcv(mroute_sk, skb);
2150  				return 0;
2151  			}
2152  		    }
2153  	}
2154  
2155  	/* already under rcu_read_lock() */
2156  	cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
2157  	if (!cache) {
2158  		int vif = ipmr_find_vif(mrt, dev);
2159  
2160  		if (vif >= 0)
2161  			cache = ipmr_cache_find_any(mrt, ip_hdr(skb)->daddr,
2162  						    vif);
2163  	}
2164  
2165  	/* No usable cache entry */
2166  	if (!cache) {
2167  		int vif;
2168  
2169  		if (local) {
2170  			struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2171  			ip_local_deliver(skb);
2172  			if (!skb2)
2173  				return -ENOBUFS;
2174  			skb = skb2;
2175  		}
2176  
2177  		read_lock(&mrt_lock);
2178  		vif = ipmr_find_vif(mrt, dev);
2179  		if (vif >= 0) {
2180  			int err2 = ipmr_cache_unresolved(mrt, vif, skb, dev);
2181  			read_unlock(&mrt_lock);
2182  
2183  			return err2;
2184  		}
2185  		read_unlock(&mrt_lock);
2186  		kfree_skb(skb);
2187  		return -ENODEV;
2188  	}
2189  
2190  	read_lock(&mrt_lock);
2191  	ip_mr_forward(net, mrt, dev, skb, cache, local);
2192  	read_unlock(&mrt_lock);
2193  
2194  	if (local)
2195  		return ip_local_deliver(skb);
2196  
2197  	return 0;
2198  
2199  dont_forward:
2200  	if (local)
2201  		return ip_local_deliver(skb);
2202  	kfree_skb(skb);
2203  	return 0;
2204  }
2205  
2206  #ifdef CONFIG_IP_PIMSM_V1
2207  /* Handle IGMP messages of PIMv1 */
2208  int pim_rcv_v1(struct sk_buff *skb)
2209  {
2210  	struct igmphdr *pim;
2211  	struct net *net = dev_net(skb->dev);
2212  	struct mr_table *mrt;
2213  
2214  	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
2215  		goto drop;
2216  
2217  	pim = igmp_hdr(skb);
2218  
2219  	mrt = ipmr_rt_fib_lookup(net, skb);
2220  	if (IS_ERR(mrt))
2221  		goto drop;
2222  	if (!mrt->mroute_do_pim ||
2223  	    pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
2224  		goto drop;
2225  
2226  	if (__pim_rcv(mrt, skb, sizeof(*pim))) {
2227  drop:
2228  		kfree_skb(skb);
2229  	}
2230  	return 0;
2231  }
2232  #endif
2233  
2234  #ifdef CONFIG_IP_PIMSM_V2
2235  static int pim_rcv(struct sk_buff *skb)
2236  {
2237  	struct pimreghdr *pim;
2238  	struct net *net = dev_net(skb->dev);
2239  	struct mr_table *mrt;
2240  
2241  	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
2242  		goto drop;
2243  
2244  	pim = (struct pimreghdr *)skb_transport_header(skb);
2245  	if (pim->type != ((PIM_VERSION << 4) | (PIM_TYPE_REGISTER)) ||
2246  	    (pim->flags & PIM_NULL_REGISTER) ||
2247  	    (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
2248  	     csum_fold(skb_checksum(skb, 0, skb->len, 0))))
2249  		goto drop;
2250  
2251  	mrt = ipmr_rt_fib_lookup(net, skb);
2252  	if (IS_ERR(mrt))
2253  		goto drop;
2254  	if (__pim_rcv(mrt, skb, sizeof(*pim))) {
2255  drop:
2256  		kfree_skb(skb);
2257  	}
2258  	return 0;
2259  }
2260  #endif
2261  
2262  int ipmr_get_route(struct net *net, struct sk_buff *skb,
2263  		   __be32 saddr, __be32 daddr,
2264  		   struct rtmsg *rtm, u32 portid)
2265  {
2266  	struct mfc_cache *cache;
2267  	struct mr_table *mrt;
2268  	int err;
2269  
2270  	mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
2271  	if (!mrt)
2272  		return -ENOENT;
2273  
2274  	rcu_read_lock();
2275  	cache = ipmr_cache_find(mrt, saddr, daddr);
2276  	if (!cache && skb->dev) {
2277  		int vif = ipmr_find_vif(mrt, skb->dev);
2278  
2279  		if (vif >= 0)
2280  			cache = ipmr_cache_find_any(mrt, daddr, vif);
2281  	}
2282  	if (!cache) {
2283  		struct sk_buff *skb2;
2284  		struct iphdr *iph;
2285  		struct net_device *dev;
2286  		int vif = -1;
2287  
2288  		dev = skb->dev;
2289  		read_lock(&mrt_lock);
2290  		if (dev)
2291  			vif = ipmr_find_vif(mrt, dev);
2292  		if (vif < 0) {
2293  			read_unlock(&mrt_lock);
2294  			rcu_read_unlock();
2295  			return -ENODEV;
2296  		}
2297  		skb2 = skb_clone(skb, GFP_ATOMIC);
2298  		if (!skb2) {
2299  			read_unlock(&mrt_lock);
2300  			rcu_read_unlock();
2301  			return -ENOMEM;
2302  		}
2303  
2304  		NETLINK_CB(skb2).portid = portid;
2305  		skb_push(skb2, sizeof(struct iphdr));
2306  		skb_reset_network_header(skb2);
2307  		iph = ip_hdr(skb2);
2308  		iph->ihl = sizeof(struct iphdr) >> 2;
2309  		iph->saddr = saddr;
2310  		iph->daddr = daddr;
2311  		iph->version = 0;
2312  		err = ipmr_cache_unresolved(mrt, vif, skb2, dev);
2313  		read_unlock(&mrt_lock);
2314  		rcu_read_unlock();
2315  		return err;
2316  	}
2317  
2318  	read_lock(&mrt_lock);
2319  	err = mr_fill_mroute(mrt, skb, &cache->_c, rtm);
2320  	read_unlock(&mrt_lock);
2321  	rcu_read_unlock();
2322  	return err;
2323  }
2324  
2325  static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2326  			    u32 portid, u32 seq, struct mfc_cache *c, int cmd,
2327  			    int flags)
2328  {
2329  	struct nlmsghdr *nlh;
2330  	struct rtmsg *rtm;
2331  	int err;
2332  
2333  	nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags);
2334  	if (!nlh)
2335  		return -EMSGSIZE;
2336  
2337  	rtm = nlmsg_data(nlh);
2338  	rtm->rtm_family   = RTNL_FAMILY_IPMR;
2339  	rtm->rtm_dst_len  = 32;
2340  	rtm->rtm_src_len  = 32;
2341  	rtm->rtm_tos      = 0;
2342  	rtm->rtm_table    = mrt->id;
2343  	if (nla_put_u32(skb, RTA_TABLE, mrt->id))
2344  		goto nla_put_failure;
2345  	rtm->rtm_type     = RTN_MULTICAST;
2346  	rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
2347  	if (c->_c.mfc_flags & MFC_STATIC)
2348  		rtm->rtm_protocol = RTPROT_STATIC;
2349  	else
2350  		rtm->rtm_protocol = RTPROT_MROUTED;
2351  	rtm->rtm_flags    = 0;
2352  
2353  	if (nla_put_in_addr(skb, RTA_SRC, c->mfc_origin) ||
2354  	    nla_put_in_addr(skb, RTA_DST, c->mfc_mcastgrp))
2355  		goto nla_put_failure;
2356  	err = mr_fill_mroute(mrt, skb, &c->_c, rtm);
2357  	/* do not break the dump if cache is unresolved */
2358  	if (err < 0 && err != -ENOENT)
2359  		goto nla_put_failure;
2360  
2361  	nlmsg_end(skb, nlh);
2362  	return 0;
2363  
2364  nla_put_failure:
2365  	nlmsg_cancel(skb, nlh);
2366  	return -EMSGSIZE;
2367  }
2368  
2369  static int _ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2370  			     u32 portid, u32 seq, struct mr_mfc *c, int cmd,
2371  			     int flags)
2372  {
2373  	return ipmr_fill_mroute(mrt, skb, portid, seq, (struct mfc_cache *)c,
2374  				cmd, flags);
2375  }
2376  
2377  static size_t mroute_msgsize(bool unresolved, int maxvif)
2378  {
2379  	size_t len =
2380  		NLMSG_ALIGN(sizeof(struct rtmsg))
2381  		+ nla_total_size(4)	/* RTA_TABLE */
2382  		+ nla_total_size(4)	/* RTA_SRC */
2383  		+ nla_total_size(4)	/* RTA_DST */
2384  		;
2385  
2386  	if (!unresolved)
2387  		len = len
2388  		      + nla_total_size(4)	/* RTA_IIF */
2389  		      + nla_total_size(0)	/* RTA_MULTIPATH */
2390  		      + maxvif * NLA_ALIGN(sizeof(struct rtnexthop))
2391  						/* RTA_MFC_STATS */
2392  		      + nla_total_size_64bit(sizeof(struct rta_mfc_stats))
2393  		;
2394  
2395  	return len;
2396  }
2397  
2398  static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc,
2399  				 int cmd)
2400  {
2401  	struct net *net = read_pnet(&mrt->net);
2402  	struct sk_buff *skb;
2403  	int err = -ENOBUFS;
2404  
2405  	skb = nlmsg_new(mroute_msgsize(mfc->_c.mfc_parent >= MAXVIFS,
2406  				       mrt->maxvif),
2407  			GFP_ATOMIC);
2408  	if (!skb)
2409  		goto errout;
2410  
2411  	err = ipmr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0);
2412  	if (err < 0)
2413  		goto errout;
2414  
2415  	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_MROUTE, NULL, GFP_ATOMIC);
2416  	return;
2417  
2418  errout:
2419  	kfree_skb(skb);
2420  	if (err < 0)
2421  		rtnl_set_sk_err(net, RTNLGRP_IPV4_MROUTE, err);
2422  }
2423  
2424  static size_t igmpmsg_netlink_msgsize(size_t payloadlen)
2425  {
2426  	size_t len =
2427  		NLMSG_ALIGN(sizeof(struct rtgenmsg))
2428  		+ nla_total_size(1)	/* IPMRA_CREPORT_MSGTYPE */
2429  		+ nla_total_size(4)	/* IPMRA_CREPORT_VIF_ID */
2430  		+ nla_total_size(4)	/* IPMRA_CREPORT_SRC_ADDR */
2431  		+ nla_total_size(4)	/* IPMRA_CREPORT_DST_ADDR */
2432  					/* IPMRA_CREPORT_PKT */
2433  		+ nla_total_size(payloadlen)
2434  		;
2435  
2436  	return len;
2437  }
2438  
2439  static void igmpmsg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt)
2440  {
2441  	struct net *net = read_pnet(&mrt->net);
2442  	struct nlmsghdr *nlh;
2443  	struct rtgenmsg *rtgenm;
2444  	struct igmpmsg *msg;
2445  	struct sk_buff *skb;
2446  	struct nlattr *nla;
2447  	int payloadlen;
2448  
2449  	payloadlen = pkt->len - sizeof(struct igmpmsg);
2450  	msg = (struct igmpmsg *)skb_network_header(pkt);
2451  
2452  	skb = nlmsg_new(igmpmsg_netlink_msgsize(payloadlen), GFP_ATOMIC);
2453  	if (!skb)
2454  		goto errout;
2455  
2456  	nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT,
2457  			sizeof(struct rtgenmsg), 0);
2458  	if (!nlh)
2459  		goto errout;
2460  	rtgenm = nlmsg_data(nlh);
2461  	rtgenm->rtgen_family = RTNL_FAMILY_IPMR;
2462  	if (nla_put_u8(skb, IPMRA_CREPORT_MSGTYPE, msg->im_msgtype) ||
2463  	    nla_put_u32(skb, IPMRA_CREPORT_VIF_ID, msg->im_vif) ||
2464  	    nla_put_in_addr(skb, IPMRA_CREPORT_SRC_ADDR,
2465  			    msg->im_src.s_addr) ||
2466  	    nla_put_in_addr(skb, IPMRA_CREPORT_DST_ADDR,
2467  			    msg->im_dst.s_addr))
2468  		goto nla_put_failure;
2469  
2470  	nla = nla_reserve(skb, IPMRA_CREPORT_PKT, payloadlen);
2471  	if (!nla || skb_copy_bits(pkt, sizeof(struct igmpmsg),
2472  				  nla_data(nla), payloadlen))
2473  		goto nla_put_failure;
2474  
2475  	nlmsg_end(skb, nlh);
2476  
2477  	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_MROUTE_R, NULL, GFP_ATOMIC);
2478  	return;
2479  
2480  nla_put_failure:
2481  	nlmsg_cancel(skb, nlh);
2482  errout:
2483  	kfree_skb(skb);
2484  	rtnl_set_sk_err(net, RTNLGRP_IPV4_MROUTE_R, -ENOBUFS);
2485  }
2486  
2487  static int ipmr_rtm_valid_getroute_req(struct sk_buff *skb,
2488  				       const struct nlmsghdr *nlh,
2489  				       struct nlattr **tb,
2490  				       struct netlink_ext_ack *extack)
2491  {
2492  	struct rtmsg *rtm;
2493  	int i, err;
2494  
2495  	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
2496  		NL_SET_ERR_MSG(extack, "ipv4: Invalid header for multicast route get request");
2497  		return -EINVAL;
2498  	}
2499  
2500  	if (!netlink_strict_get_check(skb))
2501  		return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
2502  					      rtm_ipv4_policy, extack);
2503  
2504  	rtm = nlmsg_data(nlh);
2505  	if ((rtm->rtm_src_len && rtm->rtm_src_len != 32) ||
2506  	    (rtm->rtm_dst_len && rtm->rtm_dst_len != 32) ||
2507  	    rtm->rtm_tos || rtm->rtm_table || rtm->rtm_protocol ||
2508  	    rtm->rtm_scope || rtm->rtm_type || rtm->rtm_flags) {
2509  		NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for multicast route get request");
2510  		return -EINVAL;
2511  	}
2512  
2513  	err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
2514  					    rtm_ipv4_policy, extack);
2515  	if (err)
2516  		return err;
2517  
2518  	if ((tb[RTA_SRC] && !rtm->rtm_src_len) ||
2519  	    (tb[RTA_DST] && !rtm->rtm_dst_len)) {
2520  		NL_SET_ERR_MSG(extack, "ipv4: rtm_src_len and rtm_dst_len must be 32 for IPv4");
2521  		return -EINVAL;
2522  	}
2523  
2524  	for (i = 0; i <= RTA_MAX; i++) {
2525  		if (!tb[i])
2526  			continue;
2527  
2528  		switch (i) {
2529  		case RTA_SRC:
2530  		case RTA_DST:
2531  		case RTA_TABLE:
2532  			break;
2533  		default:
2534  			NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in multicast route get request");
2535  			return -EINVAL;
2536  		}
2537  	}
2538  
2539  	return 0;
2540  }
2541  
2542  static int ipmr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
2543  			     struct netlink_ext_ack *extack)
2544  {
2545  	struct net *net = sock_net(in_skb->sk);
2546  	struct nlattr *tb[RTA_MAX + 1];
2547  	struct sk_buff *skb = NULL;
2548  	struct mfc_cache *cache;
2549  	struct mr_table *mrt;
2550  	__be32 src, grp;
2551  	u32 tableid;
2552  	int err;
2553  
2554  	err = ipmr_rtm_valid_getroute_req(in_skb, nlh, tb, extack);
2555  	if (err < 0)
2556  		goto errout;
2557  
2558  	src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0;
2559  	grp = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0;
2560  	tableid = tb[RTA_TABLE] ? nla_get_u32(tb[RTA_TABLE]) : 0;
2561  
2562  	mrt = ipmr_get_table(net, tableid ? tableid : RT_TABLE_DEFAULT);
2563  	if (!mrt) {
2564  		err = -ENOENT;
2565  		goto errout_free;
2566  	}
2567  
2568  	/* entries are added/deleted only under RTNL */
2569  	rcu_read_lock();
2570  	cache = ipmr_cache_find(mrt, src, grp);
2571  	rcu_read_unlock();
2572  	if (!cache) {
2573  		err = -ENOENT;
2574  		goto errout_free;
2575  	}
2576  
2577  	skb = nlmsg_new(mroute_msgsize(false, mrt->maxvif), GFP_KERNEL);
2578  	if (!skb) {
2579  		err = -ENOBUFS;
2580  		goto errout_free;
2581  	}
2582  
2583  	err = ipmr_fill_mroute(mrt, skb, NETLINK_CB(in_skb).portid,
2584  			       nlh->nlmsg_seq, cache,
2585  			       RTM_NEWROUTE, 0);
2586  	if (err < 0)
2587  		goto errout_free;
2588  
2589  	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2590  
2591  errout:
2592  	return err;
2593  
2594  errout_free:
2595  	kfree_skb(skb);
2596  	goto errout;
2597  }
2598  
2599  static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2600  {
2601  	struct fib_dump_filter filter = {};
2602  	int err;
2603  
2604  	if (cb->strict_check) {
2605  		err = ip_valid_fib_dump_req(sock_net(skb->sk), cb->nlh,
2606  					    &filter, cb);
2607  		if (err < 0)
2608  			return err;
2609  	}
2610  
2611  	if (filter.table_id) {
2612  		struct mr_table *mrt;
2613  
2614  		mrt = ipmr_get_table(sock_net(skb->sk), filter.table_id);
2615  		if (!mrt) {
2616  			if (filter.dump_all_families)
2617  				return skb->len;
2618  
2619  			NL_SET_ERR_MSG(cb->extack, "ipv4: MR table does not exist");
2620  			return -ENOENT;
2621  		}
2622  		err = mr_table_dump(mrt, skb, cb, _ipmr_fill_mroute,
2623  				    &mfc_unres_lock, &filter);
2624  		return skb->len ? : err;
2625  	}
2626  
2627  	return mr_rtm_dumproute(skb, cb, ipmr_mr_table_iter,
2628  				_ipmr_fill_mroute, &mfc_unres_lock, &filter);
2629  }
2630  
2631  static const struct nla_policy rtm_ipmr_policy[RTA_MAX + 1] = {
2632  	[RTA_SRC]	= { .type = NLA_U32 },
2633  	[RTA_DST]	= { .type = NLA_U32 },
2634  	[RTA_IIF]	= { .type = NLA_U32 },
2635  	[RTA_TABLE]	= { .type = NLA_U32 },
2636  	[RTA_MULTIPATH]	= { .len = sizeof(struct rtnexthop) },
2637  };
2638  
2639  static bool ipmr_rtm_validate_proto(unsigned char rtm_protocol)
2640  {
2641  	switch (rtm_protocol) {
2642  	case RTPROT_STATIC:
2643  	case RTPROT_MROUTED:
2644  		return true;
2645  	}
2646  	return false;
2647  }
2648  
2649  static int ipmr_nla_get_ttls(const struct nlattr *nla, struct mfcctl *mfcc)
2650  {
2651  	struct rtnexthop *rtnh = nla_data(nla);
2652  	int remaining = nla_len(nla), vifi = 0;
2653  
2654  	while (rtnh_ok(rtnh, remaining)) {
2655  		mfcc->mfcc_ttls[vifi] = rtnh->rtnh_hops;
2656  		if (++vifi == MAXVIFS)
2657  			break;
2658  		rtnh = rtnh_next(rtnh, &remaining);
2659  	}
2660  
2661  	return remaining > 0 ? -EINVAL : vifi;
2662  }
2663  
2664  /* returns < 0 on error, 0 for ADD_MFC and 1 for ADD_MFC_PROXY */
2665  static int rtm_to_ipmr_mfcc(struct net *net, struct nlmsghdr *nlh,
2666  			    struct mfcctl *mfcc, int *mrtsock,
2667  			    struct mr_table **mrtret,
2668  			    struct netlink_ext_ack *extack)
2669  {
2670  	struct net_device *dev = NULL;
2671  	u32 tblid = RT_TABLE_DEFAULT;
2672  	struct mr_table *mrt;
2673  	struct nlattr *attr;
2674  	struct rtmsg *rtm;
2675  	int ret, rem;
2676  
2677  	ret = nlmsg_validate_deprecated(nlh, sizeof(*rtm), RTA_MAX,
2678  					rtm_ipmr_policy, extack);
2679  	if (ret < 0)
2680  		goto out;
2681  	rtm = nlmsg_data(nlh);
2682  
2683  	ret = -EINVAL;
2684  	if (rtm->rtm_family != RTNL_FAMILY_IPMR || rtm->rtm_dst_len != 32 ||
2685  	    rtm->rtm_type != RTN_MULTICAST ||
2686  	    rtm->rtm_scope != RT_SCOPE_UNIVERSE ||
2687  	    !ipmr_rtm_validate_proto(rtm->rtm_protocol))
2688  		goto out;
2689  
2690  	memset(mfcc, 0, sizeof(*mfcc));
2691  	mfcc->mfcc_parent = -1;
2692  	ret = 0;
2693  	nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), rem) {
2694  		switch (nla_type(attr)) {
2695  		case RTA_SRC:
2696  			mfcc->mfcc_origin.s_addr = nla_get_be32(attr);
2697  			break;
2698  		case RTA_DST:
2699  			mfcc->mfcc_mcastgrp.s_addr = nla_get_be32(attr);
2700  			break;
2701  		case RTA_IIF:
2702  			dev = __dev_get_by_index(net, nla_get_u32(attr));
2703  			if (!dev) {
2704  				ret = -ENODEV;
2705  				goto out;
2706  			}
2707  			break;
2708  		case RTA_MULTIPATH:
2709  			if (ipmr_nla_get_ttls(attr, mfcc) < 0) {
2710  				ret = -EINVAL;
2711  				goto out;
2712  			}
2713  			break;
2714  		case RTA_PREFSRC:
2715  			ret = 1;
2716  			break;
2717  		case RTA_TABLE:
2718  			tblid = nla_get_u32(attr);
2719  			break;
2720  		}
2721  	}
2722  	mrt = ipmr_get_table(net, tblid);
2723  	if (!mrt) {
2724  		ret = -ENOENT;
2725  		goto out;
2726  	}
2727  	*mrtret = mrt;
2728  	*mrtsock = rtm->rtm_protocol == RTPROT_MROUTED ? 1 : 0;
2729  	if (dev)
2730  		mfcc->mfcc_parent = ipmr_find_vif(mrt, dev);
2731  
2732  out:
2733  	return ret;
2734  }
2735  
2736  /* takes care of both newroute and delroute */
2737  static int ipmr_rtm_route(struct sk_buff *skb, struct nlmsghdr *nlh,
2738  			  struct netlink_ext_ack *extack)
2739  {
2740  	struct net *net = sock_net(skb->sk);
2741  	int ret, mrtsock, parent;
2742  	struct mr_table *tbl;
2743  	struct mfcctl mfcc;
2744  
2745  	mrtsock = 0;
2746  	tbl = NULL;
2747  	ret = rtm_to_ipmr_mfcc(net, nlh, &mfcc, &mrtsock, &tbl, extack);
2748  	if (ret < 0)
2749  		return ret;
2750  
2751  	parent = ret ? mfcc.mfcc_parent : -1;
2752  	if (nlh->nlmsg_type == RTM_NEWROUTE)
2753  		return ipmr_mfc_add(net, tbl, &mfcc, mrtsock, parent);
2754  	else
2755  		return ipmr_mfc_delete(tbl, &mfcc, parent);
2756  }
2757  
2758  static bool ipmr_fill_table(struct mr_table *mrt, struct sk_buff *skb)
2759  {
2760  	u32 queue_len = atomic_read(&mrt->cache_resolve_queue_len);
2761  
2762  	if (nla_put_u32(skb, IPMRA_TABLE_ID, mrt->id) ||
2763  	    nla_put_u32(skb, IPMRA_TABLE_CACHE_RES_QUEUE_LEN, queue_len) ||
2764  	    nla_put_s32(skb, IPMRA_TABLE_MROUTE_REG_VIF_NUM,
2765  			mrt->mroute_reg_vif_num) ||
2766  	    nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_ASSERT,
2767  		       mrt->mroute_do_assert) ||
2768  	    nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_PIM, mrt->mroute_do_pim) ||
2769  	    nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_WRVIFWHOLE,
2770  		       mrt->mroute_do_wrvifwhole))
2771  		return false;
2772  
2773  	return true;
2774  }
2775  
2776  static bool ipmr_fill_vif(struct mr_table *mrt, u32 vifid, struct sk_buff *skb)
2777  {
2778  	struct nlattr *vif_nest;
2779  	struct vif_device *vif;
2780  
2781  	/* if the VIF doesn't exist just continue */
2782  	if (!VIF_EXISTS(mrt, vifid))
2783  		return true;
2784  
2785  	vif = &mrt->vif_table[vifid];
2786  	vif_nest = nla_nest_start_noflag(skb, IPMRA_VIF);
2787  	if (!vif_nest)
2788  		return false;
2789  	if (nla_put_u32(skb, IPMRA_VIFA_IFINDEX, vif->dev->ifindex) ||
2790  	    nla_put_u32(skb, IPMRA_VIFA_VIF_ID, vifid) ||
2791  	    nla_put_u16(skb, IPMRA_VIFA_FLAGS, vif->flags) ||
2792  	    nla_put_u64_64bit(skb, IPMRA_VIFA_BYTES_IN, vif->bytes_in,
2793  			      IPMRA_VIFA_PAD) ||
2794  	    nla_put_u64_64bit(skb, IPMRA_VIFA_BYTES_OUT, vif->bytes_out,
2795  			      IPMRA_VIFA_PAD) ||
2796  	    nla_put_u64_64bit(skb, IPMRA_VIFA_PACKETS_IN, vif->pkt_in,
2797  			      IPMRA_VIFA_PAD) ||
2798  	    nla_put_u64_64bit(skb, IPMRA_VIFA_PACKETS_OUT, vif->pkt_out,
2799  			      IPMRA_VIFA_PAD) ||
2800  	    nla_put_be32(skb, IPMRA_VIFA_LOCAL_ADDR, vif->local) ||
2801  	    nla_put_be32(skb, IPMRA_VIFA_REMOTE_ADDR, vif->remote)) {
2802  		nla_nest_cancel(skb, vif_nest);
2803  		return false;
2804  	}
2805  	nla_nest_end(skb, vif_nest);
2806  
2807  	return true;
2808  }
2809  
2810  static int ipmr_valid_dumplink(const struct nlmsghdr *nlh,
2811  			       struct netlink_ext_ack *extack)
2812  {
2813  	struct ifinfomsg *ifm;
2814  
2815  	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
2816  		NL_SET_ERR_MSG(extack, "ipv4: Invalid header for ipmr link dump");
2817  		return -EINVAL;
2818  	}
2819  
2820  	if (nlmsg_attrlen(nlh, sizeof(*ifm))) {
2821  		NL_SET_ERR_MSG(extack, "Invalid data after header in ipmr link dump");
2822  		return -EINVAL;
2823  	}
2824  
2825  	ifm = nlmsg_data(nlh);
2826  	if (ifm->__ifi_pad || ifm->ifi_type || ifm->ifi_flags ||
2827  	    ifm->ifi_change || ifm->ifi_index) {
2828  		NL_SET_ERR_MSG(extack, "Invalid values in header for ipmr link dump request");
2829  		return -EINVAL;
2830  	}
2831  
2832  	return 0;
2833  }
2834  
2835  static int ipmr_rtm_dumplink(struct sk_buff *skb, struct netlink_callback *cb)
2836  {
2837  	struct net *net = sock_net(skb->sk);
2838  	struct nlmsghdr *nlh = NULL;
2839  	unsigned int t = 0, s_t;
2840  	unsigned int e = 0, s_e;
2841  	struct mr_table *mrt;
2842  
2843  	if (cb->strict_check) {
2844  		int err = ipmr_valid_dumplink(cb->nlh, cb->extack);
2845  
2846  		if (err < 0)
2847  			return err;
2848  	}
2849  
2850  	s_t = cb->args[0];
2851  	s_e = cb->args[1];
2852  
2853  	ipmr_for_each_table(mrt, net) {
2854  		struct nlattr *vifs, *af;
2855  		struct ifinfomsg *hdr;
2856  		u32 i;
2857  
2858  		if (t < s_t)
2859  			goto skip_table;
2860  		nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid,
2861  				cb->nlh->nlmsg_seq, RTM_NEWLINK,
2862  				sizeof(*hdr), NLM_F_MULTI);
2863  		if (!nlh)
2864  			break;
2865  
2866  		hdr = nlmsg_data(nlh);
2867  		memset(hdr, 0, sizeof(*hdr));
2868  		hdr->ifi_family = RTNL_FAMILY_IPMR;
2869  
2870  		af = nla_nest_start_noflag(skb, IFLA_AF_SPEC);
2871  		if (!af) {
2872  			nlmsg_cancel(skb, nlh);
2873  			goto out;
2874  		}
2875  
2876  		if (!ipmr_fill_table(mrt, skb)) {
2877  			nlmsg_cancel(skb, nlh);
2878  			goto out;
2879  		}
2880  
2881  		vifs = nla_nest_start_noflag(skb, IPMRA_TABLE_VIFS);
2882  		if (!vifs) {
2883  			nla_nest_end(skb, af);
2884  			nlmsg_end(skb, nlh);
2885  			goto out;
2886  		}
2887  		for (i = 0; i < mrt->maxvif; i++) {
2888  			if (e < s_e)
2889  				goto skip_entry;
2890  			if (!ipmr_fill_vif(mrt, i, skb)) {
2891  				nla_nest_end(skb, vifs);
2892  				nla_nest_end(skb, af);
2893  				nlmsg_end(skb, nlh);
2894  				goto out;
2895  			}
2896  skip_entry:
2897  			e++;
2898  		}
2899  		s_e = 0;
2900  		e = 0;
2901  		nla_nest_end(skb, vifs);
2902  		nla_nest_end(skb, af);
2903  		nlmsg_end(skb, nlh);
2904  skip_table:
2905  		t++;
2906  	}
2907  
2908  out:
2909  	cb->args[1] = e;
2910  	cb->args[0] = t;
2911  
2912  	return skb->len;
2913  }
2914  
2915  #ifdef CONFIG_PROC_FS
2916  /* The /proc interfaces to multicast routing :
2917   * /proc/net/ip_mr_cache & /proc/net/ip_mr_vif
2918   */
2919  
2920  static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
2921  	__acquires(mrt_lock)
2922  {
2923  	struct mr_vif_iter *iter = seq->private;
2924  	struct net *net = seq_file_net(seq);
2925  	struct mr_table *mrt;
2926  
2927  	mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
2928  	if (!mrt)
2929  		return ERR_PTR(-ENOENT);
2930  
2931  	iter->mrt = mrt;
2932  
2933  	read_lock(&mrt_lock);
2934  	return mr_vif_seq_start(seq, pos);
2935  }
2936  
2937  static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
2938  	__releases(mrt_lock)
2939  {
2940  	read_unlock(&mrt_lock);
2941  }
2942  
2943  static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
2944  {
2945  	struct mr_vif_iter *iter = seq->private;
2946  	struct mr_table *mrt = iter->mrt;
2947  
2948  	if (v == SEQ_START_TOKEN) {
2949  		seq_puts(seq,
2950  			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags Local    Remote\n");
2951  	} else {
2952  		const struct vif_device *vif = v;
2953  		const char *name =  vif->dev ?
2954  				    vif->dev->name : "none";
2955  
2956  		seq_printf(seq,
2957  			   "%2td %-10s %8ld %7ld  %8ld %7ld %05X %08X %08X\n",
2958  			   vif - mrt->vif_table,
2959  			   name, vif->bytes_in, vif->pkt_in,
2960  			   vif->bytes_out, vif->pkt_out,
2961  			   vif->flags, vif->local, vif->remote);
2962  	}
2963  	return 0;
2964  }
2965  
2966  static const struct seq_operations ipmr_vif_seq_ops = {
2967  	.start = ipmr_vif_seq_start,
2968  	.next  = mr_vif_seq_next,
2969  	.stop  = ipmr_vif_seq_stop,
2970  	.show  = ipmr_vif_seq_show,
2971  };
2972  
2973  static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
2974  {
2975  	struct net *net = seq_file_net(seq);
2976  	struct mr_table *mrt;
2977  
2978  	mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
2979  	if (!mrt)
2980  		return ERR_PTR(-ENOENT);
2981  
2982  	return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock);
2983  }
2984  
2985  static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
2986  {
2987  	int n;
2988  
2989  	if (v == SEQ_START_TOKEN) {
2990  		seq_puts(seq,
2991  		 "Group    Origin   Iif     Pkts    Bytes    Wrong Oifs\n");
2992  	} else {
2993  		const struct mfc_cache *mfc = v;
2994  		const struct mr_mfc_iter *it = seq->private;
2995  		const struct mr_table *mrt = it->mrt;
2996  
2997  		seq_printf(seq, "%08X %08X %-3hd",
2998  			   (__force u32) mfc->mfc_mcastgrp,
2999  			   (__force u32) mfc->mfc_origin,
3000  			   mfc->_c.mfc_parent);
3001  
3002  		if (it->cache != &mrt->mfc_unres_queue) {
3003  			seq_printf(seq, " %8lu %8lu %8lu",
3004  				   mfc->_c.mfc_un.res.pkt,
3005  				   mfc->_c.mfc_un.res.bytes,
3006  				   mfc->_c.mfc_un.res.wrong_if);
3007  			for (n = mfc->_c.mfc_un.res.minvif;
3008  			     n < mfc->_c.mfc_un.res.maxvif; n++) {
3009  				if (VIF_EXISTS(mrt, n) &&
3010  				    mfc->_c.mfc_un.res.ttls[n] < 255)
3011  					seq_printf(seq,
3012  					   " %2d:%-3d",
3013  					   n, mfc->_c.mfc_un.res.ttls[n]);
3014  			}
3015  		} else {
3016  			/* unresolved mfc_caches don't contain
3017  			 * pkt, bytes and wrong_if values
3018  			 */
3019  			seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
3020  		}
3021  		seq_putc(seq, '\n');
3022  	}
3023  	return 0;
3024  }
3025  
3026  static const struct seq_operations ipmr_mfc_seq_ops = {
3027  	.start = ipmr_mfc_seq_start,
3028  	.next  = mr_mfc_seq_next,
3029  	.stop  = mr_mfc_seq_stop,
3030  	.show  = ipmr_mfc_seq_show,
3031  };
3032  #endif
3033  
3034  #ifdef CONFIG_IP_PIMSM_V2
3035  static const struct net_protocol pim_protocol = {
3036  	.handler	=	pim_rcv,
3037  	.netns_ok	=	1,
3038  };
3039  #endif
3040  
3041  static unsigned int ipmr_seq_read(struct net *net)
3042  {
3043  	ASSERT_RTNL();
3044  
3045  	return net->ipv4.ipmr_seq + ipmr_rules_seq_read(net);
3046  }
3047  
3048  static int ipmr_dump(struct net *net, struct notifier_block *nb)
3049  {
3050  	return mr_dump(net, nb, RTNL_FAMILY_IPMR, ipmr_rules_dump,
3051  		       ipmr_mr_table_iter, &mrt_lock);
3052  }
3053  
3054  static const struct fib_notifier_ops ipmr_notifier_ops_template = {
3055  	.family		= RTNL_FAMILY_IPMR,
3056  	.fib_seq_read	= ipmr_seq_read,
3057  	.fib_dump	= ipmr_dump,
3058  	.owner		= THIS_MODULE,
3059  };
3060  
3061  static int __net_init ipmr_notifier_init(struct net *net)
3062  {
3063  	struct fib_notifier_ops *ops;
3064  
3065  	net->ipv4.ipmr_seq = 0;
3066  
3067  	ops = fib_notifier_ops_register(&ipmr_notifier_ops_template, net);
3068  	if (IS_ERR(ops))
3069  		return PTR_ERR(ops);
3070  	net->ipv4.ipmr_notifier_ops = ops;
3071  
3072  	return 0;
3073  }
3074  
3075  static void __net_exit ipmr_notifier_exit(struct net *net)
3076  {
3077  	fib_notifier_ops_unregister(net->ipv4.ipmr_notifier_ops);
3078  	net->ipv4.ipmr_notifier_ops = NULL;
3079  }
3080  
3081  /* Setup for IP multicast routing */
3082  static int __net_init ipmr_net_init(struct net *net)
3083  {
3084  	int err;
3085  
3086  	err = ipmr_notifier_init(net);
3087  	if (err)
3088  		goto ipmr_notifier_fail;
3089  
3090  	err = ipmr_rules_init(net);
3091  	if (err < 0)
3092  		goto ipmr_rules_fail;
3093  
3094  #ifdef CONFIG_PROC_FS
3095  	err = -ENOMEM;
3096  	if (!proc_create_net("ip_mr_vif", 0, net->proc_net, &ipmr_vif_seq_ops,
3097  			sizeof(struct mr_vif_iter)))
3098  		goto proc_vif_fail;
3099  	if (!proc_create_net("ip_mr_cache", 0, net->proc_net, &ipmr_mfc_seq_ops,
3100  			sizeof(struct mr_mfc_iter)))
3101  		goto proc_cache_fail;
3102  #endif
3103  	return 0;
3104  
3105  #ifdef CONFIG_PROC_FS
3106  proc_cache_fail:
3107  	remove_proc_entry("ip_mr_vif", net->proc_net);
3108  proc_vif_fail:
3109  	ipmr_rules_exit(net);
3110  #endif
3111  ipmr_rules_fail:
3112  	ipmr_notifier_exit(net);
3113  ipmr_notifier_fail:
3114  	return err;
3115  }
3116  
3117  static void __net_exit ipmr_net_exit(struct net *net)
3118  {
3119  #ifdef CONFIG_PROC_FS
3120  	remove_proc_entry("ip_mr_cache", net->proc_net);
3121  	remove_proc_entry("ip_mr_vif", net->proc_net);
3122  #endif
3123  	ipmr_notifier_exit(net);
3124  	ipmr_rules_exit(net);
3125  }
3126  
3127  static struct pernet_operations ipmr_net_ops = {
3128  	.init = ipmr_net_init,
3129  	.exit = ipmr_net_exit,
3130  };
3131  
3132  int __init ip_mr_init(void)
3133  {
3134  	int err;
3135  
3136  	mrt_cachep = kmem_cache_create("ip_mrt_cache",
3137  				       sizeof(struct mfc_cache),
3138  				       0, SLAB_HWCACHE_ALIGN | SLAB_PANIC,
3139  				       NULL);
3140  
3141  	err = register_pernet_subsys(&ipmr_net_ops);
3142  	if (err)
3143  		goto reg_pernet_fail;
3144  
3145  	err = register_netdevice_notifier(&ip_mr_notifier);
3146  	if (err)
3147  		goto reg_notif_fail;
3148  #ifdef CONFIG_IP_PIMSM_V2
3149  	if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) {
3150  		pr_err("%s: can't add PIM protocol\n", __func__);
3151  		err = -EAGAIN;
3152  		goto add_proto_fail;
3153  	}
3154  #endif
3155  	rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE,
3156  		      ipmr_rtm_getroute, ipmr_rtm_dumproute, 0);
3157  	rtnl_register(RTNL_FAMILY_IPMR, RTM_NEWROUTE,
3158  		      ipmr_rtm_route, NULL, 0);
3159  	rtnl_register(RTNL_FAMILY_IPMR, RTM_DELROUTE,
3160  		      ipmr_rtm_route, NULL, 0);
3161  
3162  	rtnl_register(RTNL_FAMILY_IPMR, RTM_GETLINK,
3163  		      NULL, ipmr_rtm_dumplink, 0);
3164  	return 0;
3165  
3166  #ifdef CONFIG_IP_PIMSM_V2
3167  add_proto_fail:
3168  	unregister_netdevice_notifier(&ip_mr_notifier);
3169  #endif
3170  reg_notif_fail:
3171  	unregister_pernet_subsys(&ipmr_net_ops);
3172  reg_pernet_fail:
3173  	kmem_cache_destroy(mrt_cachep);
3174  	return err;
3175  }
3176