1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Linux IPv6 multicast routing support for BSD pim6sd
4 * Based on net/ipv4/ipmr.c.
5 *
6 * (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
7 * LSIIT Laboratory, Strasbourg, France
8 * (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
9 * 6WIND, Paris, France
10 * Copyright (C)2007,2008 USAGI/WIDE Project
11 * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
12 */
13
14 #include <linux/uaccess.h>
15 #include <linux/types.h>
16 #include <linux/sched.h>
17 #include <linux/errno.h>
18 #include <linux/mm.h>
19 #include <linux/kernel.h>
20 #include <linux/fcntl.h>
21 #include <linux/stat.h>
22 #include <linux/socket.h>
23 #include <linux/inet.h>
24 #include <linux/netdevice.h>
25 #include <linux/inetdevice.h>
26 #include <linux/proc_fs.h>
27 #include <linux/seq_file.h>
28 #include <linux/init.h>
29 #include <linux/compat.h>
30 #include <linux/rhashtable.h>
31 #include <net/protocol.h>
32 #include <linux/skbuff.h>
33 #include <net/raw.h>
34 #include <linux/notifier.h>
35 #include <linux/if_arp.h>
36 #include <net/checksum.h>
37 #include <net/netlink.h>
38 #include <net/fib_rules.h>
39
40 #include <net/ipv6.h>
41 #include <net/ip6_route.h>
42 #include <linux/mroute6.h>
43 #include <linux/pim.h>
44 #include <net/addrconf.h>
45 #include <linux/netfilter_ipv6.h>
46 #include <linux/export.h>
47 #include <net/ip6_checksum.h>
48 #include <linux/netconf.h>
49 #include <net/ip_tunnels.h>
50
51 #include <linux/nospec.h>
52
53 struct ip6mr_rule {
54 struct fib_rule common;
55 };
56
57 struct ip6mr_result {
58 struct mr_table *mrt;
59 };
60
61 /* Big lock, protecting vif table, mrt cache and mroute socket state.
62 Note that the changes are semaphored via rtnl_lock.
63 */
64
65 static DEFINE_SPINLOCK(mrt_lock);
66
vif_dev_read(const struct vif_device * vif)67 static struct net_device *vif_dev_read(const struct vif_device *vif)
68 {
69 return rcu_dereference(vif->dev);
70 }
71
72 /* Multicast router control variables */
73
74 /* Special spinlock for queue of unresolved entries */
75 static DEFINE_SPINLOCK(mfc_unres_lock);
76
77 /* We return to original Alan's scheme. Hash table of resolved
78 entries is changed only in process context and protected
79 with weak lock mrt_lock. Queue of unresolved entries is protected
80 with strong spinlock mfc_unres_lock.
81
82 In this case data path is free of exclusive locks at all.
83 */
84
85 static struct kmem_cache *mrt_cachep __read_mostly;
86
87 static struct mr_table *ip6mr_new_table(struct net *net, u32 id);
88 static void ip6mr_free_table(struct mr_table *mrt);
89
90 static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
91 struct net_device *dev, struct sk_buff *skb,
92 struct mfc6_cache *cache);
93 static int ip6mr_cache_report(const struct mr_table *mrt, struct sk_buff *pkt,
94 mifi_t mifi, int assert);
95 static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
96 int cmd);
97 static void mrt6msg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt);
98 static int ip6mr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
99 struct netlink_ext_ack *extack);
100 static int ip6mr_rtm_dumproute(struct sk_buff *skb,
101 struct netlink_callback *cb);
102 static void mroute_clean_tables(struct mr_table *mrt, int flags);
103 static void ipmr_expire_process(struct timer_list *t);
104
105 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
106 #define ip6mr_for_each_table(mrt, net) \
107 list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list, \
108 lockdep_rtnl_is_held() || \
109 list_empty(&net->ipv6.mr6_tables))
110
ip6mr_mr_table_iter(struct net * net,struct mr_table * mrt)111 static struct mr_table *ip6mr_mr_table_iter(struct net *net,
112 struct mr_table *mrt)
113 {
114 struct mr_table *ret;
115
116 if (!mrt)
117 ret = list_entry_rcu(net->ipv6.mr6_tables.next,
118 struct mr_table, list);
119 else
120 ret = list_entry_rcu(mrt->list.next,
121 struct mr_table, list);
122
123 if (&ret->list == &net->ipv6.mr6_tables)
124 return NULL;
125 return ret;
126 }
127
__ip6mr_get_table(struct net * net,u32 id)128 static struct mr_table *__ip6mr_get_table(struct net *net, u32 id)
129 {
130 struct mr_table *mrt;
131
132 ip6mr_for_each_table(mrt, net) {
133 if (mrt->id == id)
134 return mrt;
135 }
136 return NULL;
137 }
138
ip6mr_get_table(struct net * net,u32 id)139 static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
140 {
141 struct mr_table *mrt;
142
143 rcu_read_lock();
144 mrt = __ip6mr_get_table(net, id);
145 rcu_read_unlock();
146 return mrt;
147 }
148
ip6mr_fib_lookup(struct net * net,struct flowi6 * flp6,struct mr_table ** mrt)149 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
150 struct mr_table **mrt)
151 {
152 int err;
153 struct ip6mr_result res;
154 struct fib_lookup_arg arg = {
155 .result = &res,
156 .flags = FIB_LOOKUP_NOREF,
157 };
158
159 /* update flow if oif or iif point to device enslaved to l3mdev */
160 l3mdev_update_flow(net, flowi6_to_flowi(flp6));
161
162 err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
163 flowi6_to_flowi(flp6), 0, &arg);
164 if (err < 0)
165 return err;
166 *mrt = res.mrt;
167 return 0;
168 }
169
ip6mr_rule_action(struct fib_rule * rule,struct flowi * flp,int flags,struct fib_lookup_arg * arg)170 static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
171 int flags, struct fib_lookup_arg *arg)
172 {
173 struct ip6mr_result *res = arg->result;
174 struct mr_table *mrt;
175
176 switch (rule->action) {
177 case FR_ACT_TO_TBL:
178 break;
179 case FR_ACT_UNREACHABLE:
180 return -ENETUNREACH;
181 case FR_ACT_PROHIBIT:
182 return -EACCES;
183 case FR_ACT_BLACKHOLE:
184 default:
185 return -EINVAL;
186 }
187
188 arg->table = fib_rule_get_table(rule, arg);
189
190 mrt = __ip6mr_get_table(rule->fr_net, arg->table);
191 if (!mrt)
192 return -EAGAIN;
193 res->mrt = mrt;
194 return 0;
195 }
196
ip6mr_rule_match(struct fib_rule * rule,struct flowi * flp,int flags)197 static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
198 {
199 return 1;
200 }
201
ip6mr_rule_configure(struct fib_rule * rule,struct sk_buff * skb,struct fib_rule_hdr * frh,struct nlattr ** tb,struct netlink_ext_ack * extack)202 static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
203 struct fib_rule_hdr *frh, struct nlattr **tb,
204 struct netlink_ext_ack *extack)
205 {
206 return 0;
207 }
208
ip6mr_rule_compare(struct fib_rule * rule,struct fib_rule_hdr * frh,struct nlattr ** tb)209 static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
210 struct nlattr **tb)
211 {
212 return 1;
213 }
214
ip6mr_rule_fill(struct fib_rule * rule,struct sk_buff * skb,struct fib_rule_hdr * frh)215 static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
216 struct fib_rule_hdr *frh)
217 {
218 frh->dst_len = 0;
219 frh->src_len = 0;
220 frh->tos = 0;
221 return 0;
222 }
223
224 static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
225 .family = RTNL_FAMILY_IP6MR,
226 .rule_size = sizeof(struct ip6mr_rule),
227 .addr_size = sizeof(struct in6_addr),
228 .action = ip6mr_rule_action,
229 .match = ip6mr_rule_match,
230 .configure = ip6mr_rule_configure,
231 .compare = ip6mr_rule_compare,
232 .fill = ip6mr_rule_fill,
233 .nlgroup = RTNLGRP_IPV6_RULE,
234 .owner = THIS_MODULE,
235 };
236
ip6mr_rules_init(struct net * net)237 static int __net_init ip6mr_rules_init(struct net *net)
238 {
239 struct fib_rules_ops *ops;
240 struct mr_table *mrt;
241 int err;
242
243 ops = fib_rules_register(&ip6mr_rules_ops_template, net);
244 if (IS_ERR(ops))
245 return PTR_ERR(ops);
246
247 INIT_LIST_HEAD(&net->ipv6.mr6_tables);
248
249 mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
250 if (IS_ERR(mrt)) {
251 err = PTR_ERR(mrt);
252 goto err1;
253 }
254
255 err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
256 if (err < 0)
257 goto err2;
258
259 net->ipv6.mr6_rules_ops = ops;
260 return 0;
261
262 err2:
263 rtnl_lock();
264 ip6mr_free_table(mrt);
265 rtnl_unlock();
266 err1:
267 fib_rules_unregister(ops);
268 return err;
269 }
270
ip6mr_rules_exit(struct net * net)271 static void __net_exit ip6mr_rules_exit(struct net *net)
272 {
273 struct mr_table *mrt, *next;
274
275 ASSERT_RTNL();
276 list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
277 list_del(&mrt->list);
278 ip6mr_free_table(mrt);
279 }
280 fib_rules_unregister(net->ipv6.mr6_rules_ops);
281 }
282
ip6mr_rules_dump(struct net * net,struct notifier_block * nb,struct netlink_ext_ack * extack)283 static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb,
284 struct netlink_ext_ack *extack)
285 {
286 return fib_rules_dump(net, nb, RTNL_FAMILY_IP6MR, extack);
287 }
288
ip6mr_rules_seq_read(struct net * net)289 static unsigned int ip6mr_rules_seq_read(struct net *net)
290 {
291 return fib_rules_seq_read(net, RTNL_FAMILY_IP6MR);
292 }
293
ip6mr_rule_default(const struct fib_rule * rule)294 bool ip6mr_rule_default(const struct fib_rule *rule)
295 {
296 return fib_rule_matchall(rule) && rule->action == FR_ACT_TO_TBL &&
297 rule->table == RT6_TABLE_DFLT && !rule->l3mdev;
298 }
299 EXPORT_SYMBOL(ip6mr_rule_default);
300 #else
301 #define ip6mr_for_each_table(mrt, net) \
302 for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
303
ip6mr_mr_table_iter(struct net * net,struct mr_table * mrt)304 static struct mr_table *ip6mr_mr_table_iter(struct net *net,
305 struct mr_table *mrt)
306 {
307 if (!mrt)
308 return net->ipv6.mrt6;
309 return NULL;
310 }
311
ip6mr_get_table(struct net * net,u32 id)312 static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
313 {
314 return net->ipv6.mrt6;
315 }
316
317 #define __ip6mr_get_table ip6mr_get_table
318
ip6mr_fib_lookup(struct net * net,struct flowi6 * flp6,struct mr_table ** mrt)319 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
320 struct mr_table **mrt)
321 {
322 *mrt = net->ipv6.mrt6;
323 return 0;
324 }
325
ip6mr_rules_init(struct net * net)326 static int __net_init ip6mr_rules_init(struct net *net)
327 {
328 struct mr_table *mrt;
329
330 mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
331 if (IS_ERR(mrt))
332 return PTR_ERR(mrt);
333 net->ipv6.mrt6 = mrt;
334 return 0;
335 }
336
ip6mr_rules_exit(struct net * net)337 static void __net_exit ip6mr_rules_exit(struct net *net)
338 {
339 ASSERT_RTNL();
340 ip6mr_free_table(net->ipv6.mrt6);
341 net->ipv6.mrt6 = NULL;
342 }
343
ip6mr_rules_dump(struct net * net,struct notifier_block * nb,struct netlink_ext_ack * extack)344 static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb,
345 struct netlink_ext_ack *extack)
346 {
347 return 0;
348 }
349
ip6mr_rules_seq_read(struct net * net)350 static unsigned int ip6mr_rules_seq_read(struct net *net)
351 {
352 return 0;
353 }
354 #endif
355
ip6mr_hash_cmp(struct rhashtable_compare_arg * arg,const void * ptr)356 static int ip6mr_hash_cmp(struct rhashtable_compare_arg *arg,
357 const void *ptr)
358 {
359 const struct mfc6_cache_cmp_arg *cmparg = arg->key;
360 struct mfc6_cache *c = (struct mfc6_cache *)ptr;
361
362 return !ipv6_addr_equal(&c->mf6c_mcastgrp, &cmparg->mf6c_mcastgrp) ||
363 !ipv6_addr_equal(&c->mf6c_origin, &cmparg->mf6c_origin);
364 }
365
366 static const struct rhashtable_params ip6mr_rht_params = {
367 .head_offset = offsetof(struct mr_mfc, mnode),
368 .key_offset = offsetof(struct mfc6_cache, cmparg),
369 .key_len = sizeof(struct mfc6_cache_cmp_arg),
370 .nelem_hint = 3,
371 .obj_cmpfn = ip6mr_hash_cmp,
372 .automatic_shrinking = true,
373 };
374
ip6mr_new_table_set(struct mr_table * mrt,struct net * net)375 static void ip6mr_new_table_set(struct mr_table *mrt,
376 struct net *net)
377 {
378 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
379 list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
380 #endif
381 }
382
383 static struct mfc6_cache_cmp_arg ip6mr_mr_table_ops_cmparg_any = {
384 .mf6c_origin = IN6ADDR_ANY_INIT,
385 .mf6c_mcastgrp = IN6ADDR_ANY_INIT,
386 };
387
388 static struct mr_table_ops ip6mr_mr_table_ops = {
389 .rht_params = &ip6mr_rht_params,
390 .cmparg_any = &ip6mr_mr_table_ops_cmparg_any,
391 };
392
ip6mr_new_table(struct net * net,u32 id)393 static struct mr_table *ip6mr_new_table(struct net *net, u32 id)
394 {
395 struct mr_table *mrt;
396
397 mrt = __ip6mr_get_table(net, id);
398 if (mrt)
399 return mrt;
400
401 return mr_table_alloc(net, id, &ip6mr_mr_table_ops,
402 ipmr_expire_process, ip6mr_new_table_set);
403 }
404
ip6mr_free_table(struct mr_table * mrt)405 static void ip6mr_free_table(struct mr_table *mrt)
406 {
407 timer_shutdown_sync(&mrt->ipmr_expire_timer);
408 mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC |
409 MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC);
410 rhltable_destroy(&mrt->mfc_hash);
411 kfree(mrt);
412 }
413
414 #ifdef CONFIG_PROC_FS
415 /* The /proc interfaces to multicast routing
416 * /proc/ip6_mr_cache /proc/ip6_mr_vif
417 */
418
ip6mr_vif_seq_start(struct seq_file * seq,loff_t * pos)419 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
420 __acquires(RCU)
421 {
422 struct mr_vif_iter *iter = seq->private;
423 struct net *net = seq_file_net(seq);
424 struct mr_table *mrt;
425
426 rcu_read_lock();
427 mrt = __ip6mr_get_table(net, RT6_TABLE_DFLT);
428 if (!mrt) {
429 rcu_read_unlock();
430 return ERR_PTR(-ENOENT);
431 }
432
433 iter->mrt = mrt;
434
435 return mr_vif_seq_start(seq, pos);
436 }
437
ip6mr_vif_seq_stop(struct seq_file * seq,void * v)438 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
439 __releases(RCU)
440 {
441 rcu_read_unlock();
442 }
443
ip6mr_vif_seq_show(struct seq_file * seq,void * v)444 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
445 {
446 struct mr_vif_iter *iter = seq->private;
447 struct mr_table *mrt = iter->mrt;
448
449 if (v == SEQ_START_TOKEN) {
450 seq_puts(seq,
451 "Interface BytesIn PktsIn BytesOut PktsOut Flags\n");
452 } else {
453 const struct vif_device *vif = v;
454 const struct net_device *vif_dev;
455 const char *name;
456
457 vif_dev = vif_dev_read(vif);
458 name = vif_dev ? vif_dev->name : "none";
459
460 seq_printf(seq,
461 "%2td %-10s %8ld %7ld %8ld %7ld %05X\n",
462 vif - mrt->vif_table,
463 name, vif->bytes_in, vif->pkt_in,
464 vif->bytes_out, vif->pkt_out,
465 vif->flags);
466 }
467 return 0;
468 }
469
470 static const struct seq_operations ip6mr_vif_seq_ops = {
471 .start = ip6mr_vif_seq_start,
472 .next = mr_vif_seq_next,
473 .stop = ip6mr_vif_seq_stop,
474 .show = ip6mr_vif_seq_show,
475 };
476
ipmr_mfc_seq_start(struct seq_file * seq,loff_t * pos)477 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
478 {
479 struct net *net = seq_file_net(seq);
480 struct mr_table *mrt;
481
482 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
483 if (!mrt)
484 return ERR_PTR(-ENOENT);
485
486 return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock);
487 }
488
ipmr_mfc_seq_show(struct seq_file * seq,void * v)489 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
490 {
491 int n;
492
493 if (v == SEQ_START_TOKEN) {
494 seq_puts(seq,
495 "Group "
496 "Origin "
497 "Iif Pkts Bytes Wrong Oifs\n");
498 } else {
499 const struct mfc6_cache *mfc = v;
500 const struct mr_mfc_iter *it = seq->private;
501 struct mr_table *mrt = it->mrt;
502
503 seq_printf(seq, "%pI6 %pI6 %-3hd",
504 &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
505 mfc->_c.mfc_parent);
506
507 if (it->cache != &mrt->mfc_unres_queue) {
508 seq_printf(seq, " %8lu %8lu %8lu",
509 mfc->_c.mfc_un.res.pkt,
510 mfc->_c.mfc_un.res.bytes,
511 mfc->_c.mfc_un.res.wrong_if);
512 for (n = mfc->_c.mfc_un.res.minvif;
513 n < mfc->_c.mfc_un.res.maxvif; n++) {
514 if (VIF_EXISTS(mrt, n) &&
515 mfc->_c.mfc_un.res.ttls[n] < 255)
516 seq_printf(seq,
517 " %2d:%-3d", n,
518 mfc->_c.mfc_un.res.ttls[n]);
519 }
520 } else {
521 /* unresolved mfc_caches don't contain
522 * pkt, bytes and wrong_if values
523 */
524 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
525 }
526 seq_putc(seq, '\n');
527 }
528 return 0;
529 }
530
531 static const struct seq_operations ipmr_mfc_seq_ops = {
532 .start = ipmr_mfc_seq_start,
533 .next = mr_mfc_seq_next,
534 .stop = mr_mfc_seq_stop,
535 .show = ipmr_mfc_seq_show,
536 };
537 #endif
538
539 #ifdef CONFIG_IPV6_PIMSM_V2
540
pim6_rcv(struct sk_buff * skb)541 static int pim6_rcv(struct sk_buff *skb)
542 {
543 struct pimreghdr *pim;
544 struct ipv6hdr *encap;
545 struct net_device *reg_dev = NULL;
546 struct net *net = dev_net(skb->dev);
547 struct mr_table *mrt;
548 struct flowi6 fl6 = {
549 .flowi6_iif = skb->dev->ifindex,
550 .flowi6_mark = skb->mark,
551 };
552 int reg_vif_num;
553
554 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
555 goto drop;
556
557 pim = (struct pimreghdr *)skb_transport_header(skb);
558 if (pim->type != ((PIM_VERSION << 4) | PIM_TYPE_REGISTER) ||
559 (pim->flags & PIM_NULL_REGISTER) ||
560 (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
561 sizeof(*pim), IPPROTO_PIM,
562 csum_partial((void *)pim, sizeof(*pim), 0)) &&
563 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
564 goto drop;
565
566 /* check if the inner packet is destined to mcast group */
567 encap = (struct ipv6hdr *)(skb_transport_header(skb) +
568 sizeof(*pim));
569
570 if (!ipv6_addr_is_multicast(&encap->daddr) ||
571 encap->payload_len == 0 ||
572 ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
573 goto drop;
574
575 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
576 goto drop;
577
578 /* Pairs with WRITE_ONCE() in mif6_add()/mif6_delete() */
579 reg_vif_num = READ_ONCE(mrt->mroute_reg_vif_num);
580 if (reg_vif_num >= 0)
581 reg_dev = vif_dev_read(&mrt->vif_table[reg_vif_num]);
582
583 if (!reg_dev)
584 goto drop;
585
586 skb->mac_header = skb->network_header;
587 skb_pull(skb, (u8 *)encap - skb->data);
588 skb_reset_network_header(skb);
589 skb->protocol = htons(ETH_P_IPV6);
590 skb->ip_summed = CHECKSUM_NONE;
591
592 skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
593
594 netif_rx(skb);
595
596 return 0;
597 drop:
598 kfree_skb(skb);
599 return 0;
600 }
601
602 static const struct inet6_protocol pim6_protocol = {
603 .handler = pim6_rcv,
604 };
605
606 /* Service routines creating virtual interfaces: PIMREG */
607
reg_vif_xmit(struct sk_buff * skb,struct net_device * dev)608 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
609 struct net_device *dev)
610 {
611 struct net *net = dev_net(dev);
612 struct mr_table *mrt;
613 struct flowi6 fl6 = {
614 .flowi6_oif = dev->ifindex,
615 .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX,
616 .flowi6_mark = skb->mark,
617 };
618
619 if (!pskb_inet_may_pull(skb))
620 goto tx_err;
621
622 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
623 goto tx_err;
624
625 DEV_STATS_ADD(dev, tx_bytes, skb->len);
626 DEV_STATS_INC(dev, tx_packets);
627 rcu_read_lock();
628 ip6mr_cache_report(mrt, skb, READ_ONCE(mrt->mroute_reg_vif_num),
629 MRT6MSG_WHOLEPKT);
630 rcu_read_unlock();
631 kfree_skb(skb);
632 return NETDEV_TX_OK;
633
634 tx_err:
635 DEV_STATS_INC(dev, tx_errors);
636 kfree_skb(skb);
637 return NETDEV_TX_OK;
638 }
639
reg_vif_get_iflink(const struct net_device * dev)640 static int reg_vif_get_iflink(const struct net_device *dev)
641 {
642 return 0;
643 }
644
645 static const struct net_device_ops reg_vif_netdev_ops = {
646 .ndo_start_xmit = reg_vif_xmit,
647 .ndo_get_iflink = reg_vif_get_iflink,
648 };
649
reg_vif_setup(struct net_device * dev)650 static void reg_vif_setup(struct net_device *dev)
651 {
652 dev->type = ARPHRD_PIMREG;
653 dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8;
654 dev->flags = IFF_NOARP;
655 dev->netdev_ops = ®_vif_netdev_ops;
656 dev->needs_free_netdev = true;
657 dev->features |= NETIF_F_NETNS_LOCAL;
658 }
659
ip6mr_reg_vif(struct net * net,struct mr_table * mrt)660 static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt)
661 {
662 struct net_device *dev;
663 char name[IFNAMSIZ];
664
665 if (mrt->id == RT6_TABLE_DFLT)
666 sprintf(name, "pim6reg");
667 else
668 sprintf(name, "pim6reg%u", mrt->id);
669
670 dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup);
671 if (!dev)
672 return NULL;
673
674 dev_net_set(dev, net);
675
676 if (register_netdevice(dev)) {
677 free_netdev(dev);
678 return NULL;
679 }
680
681 if (dev_open(dev, NULL))
682 goto failure;
683
684 dev_hold(dev);
685 return dev;
686
687 failure:
688 unregister_netdevice(dev);
689 return NULL;
690 }
691 #endif
692
call_ip6mr_vif_entry_notifiers(struct net * net,enum fib_event_type event_type,struct vif_device * vif,struct net_device * vif_dev,mifi_t vif_index,u32 tb_id)693 static int call_ip6mr_vif_entry_notifiers(struct net *net,
694 enum fib_event_type event_type,
695 struct vif_device *vif,
696 struct net_device *vif_dev,
697 mifi_t vif_index, u32 tb_id)
698 {
699 return mr_call_vif_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
700 vif, vif_dev, vif_index, tb_id,
701 &net->ipv6.ipmr_seq);
702 }
703
call_ip6mr_mfc_entry_notifiers(struct net * net,enum fib_event_type event_type,struct mfc6_cache * mfc,u32 tb_id)704 static int call_ip6mr_mfc_entry_notifiers(struct net *net,
705 enum fib_event_type event_type,
706 struct mfc6_cache *mfc, u32 tb_id)
707 {
708 return mr_call_mfc_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
709 &mfc->_c, tb_id, &net->ipv6.ipmr_seq);
710 }
711
712 /* Delete a VIF entry */
mif6_delete(struct mr_table * mrt,int vifi,int notify,struct list_head * head)713 static int mif6_delete(struct mr_table *mrt, int vifi, int notify,
714 struct list_head *head)
715 {
716 struct vif_device *v;
717 struct net_device *dev;
718 struct inet6_dev *in6_dev;
719
720 if (vifi < 0 || vifi >= mrt->maxvif)
721 return -EADDRNOTAVAIL;
722
723 v = &mrt->vif_table[vifi];
724
725 dev = rtnl_dereference(v->dev);
726 if (!dev)
727 return -EADDRNOTAVAIL;
728
729 call_ip6mr_vif_entry_notifiers(read_pnet(&mrt->net),
730 FIB_EVENT_VIF_DEL, v, dev,
731 vifi, mrt->id);
732 spin_lock(&mrt_lock);
733 RCU_INIT_POINTER(v->dev, NULL);
734
735 #ifdef CONFIG_IPV6_PIMSM_V2
736 if (vifi == mrt->mroute_reg_vif_num) {
737 /* Pairs with READ_ONCE() in ip6mr_cache_report() and reg_vif_xmit() */
738 WRITE_ONCE(mrt->mroute_reg_vif_num, -1);
739 }
740 #endif
741
742 if (vifi + 1 == mrt->maxvif) {
743 int tmp;
744 for (tmp = vifi - 1; tmp >= 0; tmp--) {
745 if (VIF_EXISTS(mrt, tmp))
746 break;
747 }
748 WRITE_ONCE(mrt->maxvif, tmp + 1);
749 }
750
751 spin_unlock(&mrt_lock);
752
753 dev_set_allmulti(dev, -1);
754
755 in6_dev = __in6_dev_get(dev);
756 if (in6_dev) {
757 atomic_dec(&in6_dev->cnf.mc_forwarding);
758 inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
759 NETCONFA_MC_FORWARDING,
760 dev->ifindex, &in6_dev->cnf);
761 }
762
763 if ((v->flags & MIFF_REGISTER) && !notify)
764 unregister_netdevice_queue(dev, head);
765
766 netdev_put(dev, &v->dev_tracker);
767 return 0;
768 }
769
ip6mr_cache_free_rcu(struct rcu_head * head)770 static inline void ip6mr_cache_free_rcu(struct rcu_head *head)
771 {
772 struct mr_mfc *c = container_of(head, struct mr_mfc, rcu);
773
774 kmem_cache_free(mrt_cachep, (struct mfc6_cache *)c);
775 }
776
ip6mr_cache_free(struct mfc6_cache * c)777 static inline void ip6mr_cache_free(struct mfc6_cache *c)
778 {
779 call_rcu(&c->_c.rcu, ip6mr_cache_free_rcu);
780 }
781
782 /* Destroy an unresolved cache entry, killing queued skbs
783 and reporting error to netlink readers.
784 */
785
ip6mr_destroy_unres(struct mr_table * mrt,struct mfc6_cache * c)786 static void ip6mr_destroy_unres(struct mr_table *mrt, struct mfc6_cache *c)
787 {
788 struct net *net = read_pnet(&mrt->net);
789 struct sk_buff *skb;
790
791 atomic_dec(&mrt->cache_resolve_queue_len);
792
793 while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved)) != NULL) {
794 if (ipv6_hdr(skb)->version == 0) {
795 struct nlmsghdr *nlh = skb_pull(skb,
796 sizeof(struct ipv6hdr));
797 nlh->nlmsg_type = NLMSG_ERROR;
798 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
799 skb_trim(skb, nlh->nlmsg_len);
800 ((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT;
801 rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
802 } else
803 kfree_skb(skb);
804 }
805
806 ip6mr_cache_free(c);
807 }
808
809
810 /* Timer process for all the unresolved queue. */
811
ipmr_do_expire_process(struct mr_table * mrt)812 static void ipmr_do_expire_process(struct mr_table *mrt)
813 {
814 unsigned long now = jiffies;
815 unsigned long expires = 10 * HZ;
816 struct mr_mfc *c, *next;
817
818 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
819 if (time_after(c->mfc_un.unres.expires, now)) {
820 /* not yet... */
821 unsigned long interval = c->mfc_un.unres.expires - now;
822 if (interval < expires)
823 expires = interval;
824 continue;
825 }
826
827 list_del(&c->list);
828 mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
829 ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
830 }
831
832 if (!list_empty(&mrt->mfc_unres_queue))
833 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
834 }
835
ipmr_expire_process(struct timer_list * t)836 static void ipmr_expire_process(struct timer_list *t)
837 {
838 struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
839
840 if (!spin_trylock(&mfc_unres_lock)) {
841 mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
842 return;
843 }
844
845 if (!list_empty(&mrt->mfc_unres_queue))
846 ipmr_do_expire_process(mrt);
847
848 spin_unlock(&mfc_unres_lock);
849 }
850
851 /* Fill oifs list. It is called under locked mrt_lock. */
852
ip6mr_update_thresholds(struct mr_table * mrt,struct mr_mfc * cache,unsigned char * ttls)853 static void ip6mr_update_thresholds(struct mr_table *mrt,
854 struct mr_mfc *cache,
855 unsigned char *ttls)
856 {
857 int vifi;
858
859 cache->mfc_un.res.minvif = MAXMIFS;
860 cache->mfc_un.res.maxvif = 0;
861 memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
862
863 for (vifi = 0; vifi < mrt->maxvif; vifi++) {
864 if (VIF_EXISTS(mrt, vifi) &&
865 ttls[vifi] && ttls[vifi] < 255) {
866 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
867 if (cache->mfc_un.res.minvif > vifi)
868 cache->mfc_un.res.minvif = vifi;
869 if (cache->mfc_un.res.maxvif <= vifi)
870 cache->mfc_un.res.maxvif = vifi + 1;
871 }
872 }
873 cache->mfc_un.res.lastuse = jiffies;
874 }
875
mif6_add(struct net * net,struct mr_table * mrt,struct mif6ctl * vifc,int mrtsock)876 static int mif6_add(struct net *net, struct mr_table *mrt,
877 struct mif6ctl *vifc, int mrtsock)
878 {
879 int vifi = vifc->mif6c_mifi;
880 struct vif_device *v = &mrt->vif_table[vifi];
881 struct net_device *dev;
882 struct inet6_dev *in6_dev;
883 int err;
884
885 /* Is vif busy ? */
886 if (VIF_EXISTS(mrt, vifi))
887 return -EADDRINUSE;
888
889 switch (vifc->mif6c_flags) {
890 #ifdef CONFIG_IPV6_PIMSM_V2
891 case MIFF_REGISTER:
892 /*
893 * Special Purpose VIF in PIM
894 * All the packets will be sent to the daemon
895 */
896 if (mrt->mroute_reg_vif_num >= 0)
897 return -EADDRINUSE;
898 dev = ip6mr_reg_vif(net, mrt);
899 if (!dev)
900 return -ENOBUFS;
901 err = dev_set_allmulti(dev, 1);
902 if (err) {
903 unregister_netdevice(dev);
904 dev_put(dev);
905 return err;
906 }
907 break;
908 #endif
909 case 0:
910 dev = dev_get_by_index(net, vifc->mif6c_pifi);
911 if (!dev)
912 return -EADDRNOTAVAIL;
913 err = dev_set_allmulti(dev, 1);
914 if (err) {
915 dev_put(dev);
916 return err;
917 }
918 break;
919 default:
920 return -EINVAL;
921 }
922
923 in6_dev = __in6_dev_get(dev);
924 if (in6_dev) {
925 atomic_inc(&in6_dev->cnf.mc_forwarding);
926 inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
927 NETCONFA_MC_FORWARDING,
928 dev->ifindex, &in6_dev->cnf);
929 }
930
931 /* Fill in the VIF structures */
932 vif_device_init(v, dev, vifc->vifc_rate_limit, vifc->vifc_threshold,
933 vifc->mif6c_flags | (!mrtsock ? VIFF_STATIC : 0),
934 MIFF_REGISTER);
935
936 /* And finish update writing critical data */
937 spin_lock(&mrt_lock);
938 rcu_assign_pointer(v->dev, dev);
939 netdev_tracker_alloc(dev, &v->dev_tracker, GFP_ATOMIC);
940 #ifdef CONFIG_IPV6_PIMSM_V2
941 if (v->flags & MIFF_REGISTER)
942 WRITE_ONCE(mrt->mroute_reg_vif_num, vifi);
943 #endif
944 if (vifi + 1 > mrt->maxvif)
945 WRITE_ONCE(mrt->maxvif, vifi + 1);
946 spin_unlock(&mrt_lock);
947 call_ip6mr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD,
948 v, dev, vifi, mrt->id);
949 return 0;
950 }
951
ip6mr_cache_find(struct mr_table * mrt,const struct in6_addr * origin,const struct in6_addr * mcastgrp)952 static struct mfc6_cache *ip6mr_cache_find(struct mr_table *mrt,
953 const struct in6_addr *origin,
954 const struct in6_addr *mcastgrp)
955 {
956 struct mfc6_cache_cmp_arg arg = {
957 .mf6c_origin = *origin,
958 .mf6c_mcastgrp = *mcastgrp,
959 };
960
961 return mr_mfc_find(mrt, &arg);
962 }
963
964 /* Look for a (*,G) entry */
ip6mr_cache_find_any(struct mr_table * mrt,struct in6_addr * mcastgrp,mifi_t mifi)965 static struct mfc6_cache *ip6mr_cache_find_any(struct mr_table *mrt,
966 struct in6_addr *mcastgrp,
967 mifi_t mifi)
968 {
969 struct mfc6_cache_cmp_arg arg = {
970 .mf6c_origin = in6addr_any,
971 .mf6c_mcastgrp = *mcastgrp,
972 };
973
974 if (ipv6_addr_any(mcastgrp))
975 return mr_mfc_find_any_parent(mrt, mifi);
976 return mr_mfc_find_any(mrt, mifi, &arg);
977 }
978
979 /* Look for a (S,G,iif) entry if parent != -1 */
980 static struct mfc6_cache *
ip6mr_cache_find_parent(struct mr_table * mrt,const struct in6_addr * origin,const struct in6_addr * mcastgrp,int parent)981 ip6mr_cache_find_parent(struct mr_table *mrt,
982 const struct in6_addr *origin,
983 const struct in6_addr *mcastgrp,
984 int parent)
985 {
986 struct mfc6_cache_cmp_arg arg = {
987 .mf6c_origin = *origin,
988 .mf6c_mcastgrp = *mcastgrp,
989 };
990
991 return mr_mfc_find_parent(mrt, &arg, parent);
992 }
993
994 /* Allocate a multicast cache entry */
ip6mr_cache_alloc(void)995 static struct mfc6_cache *ip6mr_cache_alloc(void)
996 {
997 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
998 if (!c)
999 return NULL;
1000 c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
1001 c->_c.mfc_un.res.minvif = MAXMIFS;
1002 c->_c.free = ip6mr_cache_free_rcu;
1003 refcount_set(&c->_c.mfc_un.res.refcount, 1);
1004 return c;
1005 }
1006
ip6mr_cache_alloc_unres(void)1007 static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
1008 {
1009 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
1010 if (!c)
1011 return NULL;
1012 skb_queue_head_init(&c->_c.mfc_un.unres.unresolved);
1013 c->_c.mfc_un.unres.expires = jiffies + 10 * HZ;
1014 return c;
1015 }
1016
1017 /*
1018 * A cache entry has gone into a resolved state from queued
1019 */
1020
ip6mr_cache_resolve(struct net * net,struct mr_table * mrt,struct mfc6_cache * uc,struct mfc6_cache * c)1021 static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt,
1022 struct mfc6_cache *uc, struct mfc6_cache *c)
1023 {
1024 struct sk_buff *skb;
1025
1026 /*
1027 * Play the pending entries through our router
1028 */
1029
1030 while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) {
1031 if (ipv6_hdr(skb)->version == 0) {
1032 struct nlmsghdr *nlh = skb_pull(skb,
1033 sizeof(struct ipv6hdr));
1034
1035 if (mr_fill_mroute(mrt, skb, &c->_c,
1036 nlmsg_data(nlh)) > 0) {
1037 nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1038 } else {
1039 nlh->nlmsg_type = NLMSG_ERROR;
1040 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
1041 skb_trim(skb, nlh->nlmsg_len);
1042 ((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE;
1043 }
1044 rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
1045 } else {
1046 rcu_read_lock();
1047 ip6_mr_forward(net, mrt, skb->dev, skb, c);
1048 rcu_read_unlock();
1049 }
1050 }
1051 }
1052
1053 /*
1054 * Bounce a cache query up to pim6sd and netlink.
1055 *
1056 * Called under rcu_read_lock()
1057 */
1058
ip6mr_cache_report(const struct mr_table * mrt,struct sk_buff * pkt,mifi_t mifi,int assert)1059 static int ip6mr_cache_report(const struct mr_table *mrt, struct sk_buff *pkt,
1060 mifi_t mifi, int assert)
1061 {
1062 struct sock *mroute6_sk;
1063 struct sk_buff *skb;
1064 struct mrt6msg *msg;
1065 int ret;
1066
1067 #ifdef CONFIG_IPV6_PIMSM_V2
1068 if (assert == MRT6MSG_WHOLEPKT || assert == MRT6MSG_WRMIFWHOLE)
1069 skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
1070 +sizeof(*msg));
1071 else
1072 #endif
1073 skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
1074
1075 if (!skb)
1076 return -ENOBUFS;
1077
1078 /* I suppose that internal messages
1079 * do not require checksums */
1080
1081 skb->ip_summed = CHECKSUM_UNNECESSARY;
1082
1083 #ifdef CONFIG_IPV6_PIMSM_V2
1084 if (assert == MRT6MSG_WHOLEPKT || assert == MRT6MSG_WRMIFWHOLE) {
1085 /* Ugly, but we have no choice with this interface.
1086 Duplicate old header, fix length etc.
1087 And all this only to mangle msg->im6_msgtype and
1088 to set msg->im6_mbz to "mbz" :-)
1089 */
1090 __skb_pull(skb, skb_network_offset(pkt));
1091
1092 skb_push(skb, sizeof(*msg));
1093 skb_reset_transport_header(skb);
1094 msg = (struct mrt6msg *)skb_transport_header(skb);
1095 msg->im6_mbz = 0;
1096 msg->im6_msgtype = assert;
1097 if (assert == MRT6MSG_WRMIFWHOLE)
1098 msg->im6_mif = mifi;
1099 else
1100 msg->im6_mif = READ_ONCE(mrt->mroute_reg_vif_num);
1101 msg->im6_pad = 0;
1102 msg->im6_src = ipv6_hdr(pkt)->saddr;
1103 msg->im6_dst = ipv6_hdr(pkt)->daddr;
1104
1105 skb->ip_summed = CHECKSUM_UNNECESSARY;
1106 } else
1107 #endif
1108 {
1109 /*
1110 * Copy the IP header
1111 */
1112
1113 skb_put(skb, sizeof(struct ipv6hdr));
1114 skb_reset_network_header(skb);
1115 skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
1116
1117 /*
1118 * Add our header
1119 */
1120 skb_put(skb, sizeof(*msg));
1121 skb_reset_transport_header(skb);
1122 msg = (struct mrt6msg *)skb_transport_header(skb);
1123
1124 msg->im6_mbz = 0;
1125 msg->im6_msgtype = assert;
1126 msg->im6_mif = mifi;
1127 msg->im6_pad = 0;
1128 msg->im6_src = ipv6_hdr(pkt)->saddr;
1129 msg->im6_dst = ipv6_hdr(pkt)->daddr;
1130
1131 skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1132 skb->ip_summed = CHECKSUM_UNNECESSARY;
1133 }
1134
1135 mroute6_sk = rcu_dereference(mrt->mroute_sk);
1136 if (!mroute6_sk) {
1137 kfree_skb(skb);
1138 return -EINVAL;
1139 }
1140
1141 mrt6msg_netlink_event(mrt, skb);
1142
1143 /* Deliver to user space multicast routing algorithms */
1144 ret = sock_queue_rcv_skb(mroute6_sk, skb);
1145
1146 if (ret < 0) {
1147 net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
1148 kfree_skb(skb);
1149 }
1150
1151 return ret;
1152 }
1153
1154 /* Queue a packet for resolution. It gets locked cache entry! */
ip6mr_cache_unresolved(struct mr_table * mrt,mifi_t mifi,struct sk_buff * skb,struct net_device * dev)1155 static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi,
1156 struct sk_buff *skb, struct net_device *dev)
1157 {
1158 struct mfc6_cache *c;
1159 bool found = false;
1160 int err;
1161
1162 spin_lock_bh(&mfc_unres_lock);
1163 list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) {
1164 if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1165 ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
1166 found = true;
1167 break;
1168 }
1169 }
1170
1171 if (!found) {
1172 /*
1173 * Create a new entry if allowable
1174 */
1175
1176 c = ip6mr_cache_alloc_unres();
1177 if (!c) {
1178 spin_unlock_bh(&mfc_unres_lock);
1179
1180 kfree_skb(skb);
1181 return -ENOBUFS;
1182 }
1183
1184 /* Fill in the new cache entry */
1185 c->_c.mfc_parent = -1;
1186 c->mf6c_origin = ipv6_hdr(skb)->saddr;
1187 c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
1188
1189 /*
1190 * Reflect first query at pim6sd
1191 */
1192 err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1193 if (err < 0) {
1194 /* If the report failed throw the cache entry
1195 out - Brad Parker
1196 */
1197 spin_unlock_bh(&mfc_unres_lock);
1198
1199 ip6mr_cache_free(c);
1200 kfree_skb(skb);
1201 return err;
1202 }
1203
1204 atomic_inc(&mrt->cache_resolve_queue_len);
1205 list_add(&c->_c.list, &mrt->mfc_unres_queue);
1206 mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1207
1208 ipmr_do_expire_process(mrt);
1209 }
1210
1211 /* See if we can append the packet */
1212 if (c->_c.mfc_un.unres.unresolved.qlen > 3) {
1213 kfree_skb(skb);
1214 err = -ENOBUFS;
1215 } else {
1216 if (dev) {
1217 skb->dev = dev;
1218 skb->skb_iif = dev->ifindex;
1219 }
1220 skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb);
1221 err = 0;
1222 }
1223
1224 spin_unlock_bh(&mfc_unres_lock);
1225 return err;
1226 }
1227
1228 /*
1229 * MFC6 cache manipulation by user space
1230 */
1231
ip6mr_mfc_delete(struct mr_table * mrt,struct mf6cctl * mfc,int parent)1232 static int ip6mr_mfc_delete(struct mr_table *mrt, struct mf6cctl *mfc,
1233 int parent)
1234 {
1235 struct mfc6_cache *c;
1236
1237 /* The entries are added/deleted only under RTNL */
1238 rcu_read_lock();
1239 c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
1240 &mfc->mf6cc_mcastgrp.sin6_addr, parent);
1241 rcu_read_unlock();
1242 if (!c)
1243 return -ENOENT;
1244 rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ip6mr_rht_params);
1245 list_del_rcu(&c->_c.list);
1246
1247 call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
1248 FIB_EVENT_ENTRY_DEL, c, mrt->id);
1249 mr6_netlink_event(mrt, c, RTM_DELROUTE);
1250 mr_cache_put(&c->_c);
1251 return 0;
1252 }
1253
ip6mr_device_event(struct notifier_block * this,unsigned long event,void * ptr)1254 static int ip6mr_device_event(struct notifier_block *this,
1255 unsigned long event, void *ptr)
1256 {
1257 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1258 struct net *net = dev_net(dev);
1259 struct mr_table *mrt;
1260 struct vif_device *v;
1261 int ct;
1262
1263 if (event != NETDEV_UNREGISTER)
1264 return NOTIFY_DONE;
1265
1266 ip6mr_for_each_table(mrt, net) {
1267 v = &mrt->vif_table[0];
1268 for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1269 if (rcu_access_pointer(v->dev) == dev)
1270 mif6_delete(mrt, ct, 1, NULL);
1271 }
1272 }
1273
1274 return NOTIFY_DONE;
1275 }
1276
ip6mr_seq_read(struct net * net)1277 static unsigned int ip6mr_seq_read(struct net *net)
1278 {
1279 ASSERT_RTNL();
1280
1281 return net->ipv6.ipmr_seq + ip6mr_rules_seq_read(net);
1282 }
1283
ip6mr_dump(struct net * net,struct notifier_block * nb,struct netlink_ext_ack * extack)1284 static int ip6mr_dump(struct net *net, struct notifier_block *nb,
1285 struct netlink_ext_ack *extack)
1286 {
1287 return mr_dump(net, nb, RTNL_FAMILY_IP6MR, ip6mr_rules_dump,
1288 ip6mr_mr_table_iter, extack);
1289 }
1290
1291 static struct notifier_block ip6_mr_notifier = {
1292 .notifier_call = ip6mr_device_event
1293 };
1294
1295 static const struct fib_notifier_ops ip6mr_notifier_ops_template = {
1296 .family = RTNL_FAMILY_IP6MR,
1297 .fib_seq_read = ip6mr_seq_read,
1298 .fib_dump = ip6mr_dump,
1299 .owner = THIS_MODULE,
1300 };
1301
ip6mr_notifier_init(struct net * net)1302 static int __net_init ip6mr_notifier_init(struct net *net)
1303 {
1304 struct fib_notifier_ops *ops;
1305
1306 net->ipv6.ipmr_seq = 0;
1307
1308 ops = fib_notifier_ops_register(&ip6mr_notifier_ops_template, net);
1309 if (IS_ERR(ops))
1310 return PTR_ERR(ops);
1311
1312 net->ipv6.ip6mr_notifier_ops = ops;
1313
1314 return 0;
1315 }
1316
ip6mr_notifier_exit(struct net * net)1317 static void __net_exit ip6mr_notifier_exit(struct net *net)
1318 {
1319 fib_notifier_ops_unregister(net->ipv6.ip6mr_notifier_ops);
1320 net->ipv6.ip6mr_notifier_ops = NULL;
1321 }
1322
1323 /* Setup for IP multicast routing */
ip6mr_net_init(struct net * net)1324 static int __net_init ip6mr_net_init(struct net *net)
1325 {
1326 int err;
1327
1328 err = ip6mr_notifier_init(net);
1329 if (err)
1330 return err;
1331
1332 err = ip6mr_rules_init(net);
1333 if (err < 0)
1334 goto ip6mr_rules_fail;
1335
1336 #ifdef CONFIG_PROC_FS
1337 err = -ENOMEM;
1338 if (!proc_create_net("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_seq_ops,
1339 sizeof(struct mr_vif_iter)))
1340 goto proc_vif_fail;
1341 if (!proc_create_net("ip6_mr_cache", 0, net->proc_net, &ipmr_mfc_seq_ops,
1342 sizeof(struct mr_mfc_iter)))
1343 goto proc_cache_fail;
1344 #endif
1345
1346 return 0;
1347
1348 #ifdef CONFIG_PROC_FS
1349 proc_cache_fail:
1350 remove_proc_entry("ip6_mr_vif", net->proc_net);
1351 proc_vif_fail:
1352 rtnl_lock();
1353 ip6mr_rules_exit(net);
1354 rtnl_unlock();
1355 #endif
1356 ip6mr_rules_fail:
1357 ip6mr_notifier_exit(net);
1358 return err;
1359 }
1360
ip6mr_net_exit(struct net * net)1361 static void __net_exit ip6mr_net_exit(struct net *net)
1362 {
1363 #ifdef CONFIG_PROC_FS
1364 remove_proc_entry("ip6_mr_cache", net->proc_net);
1365 remove_proc_entry("ip6_mr_vif", net->proc_net);
1366 #endif
1367 ip6mr_notifier_exit(net);
1368 }
1369
ip6mr_net_exit_batch(struct list_head * net_list)1370 static void __net_exit ip6mr_net_exit_batch(struct list_head *net_list)
1371 {
1372 struct net *net;
1373
1374 rtnl_lock();
1375 list_for_each_entry(net, net_list, exit_list)
1376 ip6mr_rules_exit(net);
1377 rtnl_unlock();
1378 }
1379
1380 static struct pernet_operations ip6mr_net_ops = {
1381 .init = ip6mr_net_init,
1382 .exit = ip6mr_net_exit,
1383 .exit_batch = ip6mr_net_exit_batch,
1384 };
1385
ip6_mr_init(void)1386 int __init ip6_mr_init(void)
1387 {
1388 int err;
1389
1390 mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1391 sizeof(struct mfc6_cache),
1392 0, SLAB_HWCACHE_ALIGN,
1393 NULL);
1394 if (!mrt_cachep)
1395 return -ENOMEM;
1396
1397 err = register_pernet_subsys(&ip6mr_net_ops);
1398 if (err)
1399 goto reg_pernet_fail;
1400
1401 err = register_netdevice_notifier(&ip6_mr_notifier);
1402 if (err)
1403 goto reg_notif_fail;
1404 #ifdef CONFIG_IPV6_PIMSM_V2
1405 if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1406 pr_err("%s: can't add PIM protocol\n", __func__);
1407 err = -EAGAIN;
1408 goto add_proto_fail;
1409 }
1410 #endif
1411 err = rtnl_register_module(THIS_MODULE, RTNL_FAMILY_IP6MR, RTM_GETROUTE,
1412 ip6mr_rtm_getroute, ip6mr_rtm_dumproute, 0);
1413 if (err == 0)
1414 return 0;
1415
1416 #ifdef CONFIG_IPV6_PIMSM_V2
1417 inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1418 add_proto_fail:
1419 unregister_netdevice_notifier(&ip6_mr_notifier);
1420 #endif
1421 reg_notif_fail:
1422 unregister_pernet_subsys(&ip6mr_net_ops);
1423 reg_pernet_fail:
1424 kmem_cache_destroy(mrt_cachep);
1425 return err;
1426 }
1427
ip6_mr_cleanup(void)1428 void ip6_mr_cleanup(void)
1429 {
1430 rtnl_unregister(RTNL_FAMILY_IP6MR, RTM_GETROUTE);
1431 #ifdef CONFIG_IPV6_PIMSM_V2
1432 inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1433 #endif
1434 unregister_netdevice_notifier(&ip6_mr_notifier);
1435 unregister_pernet_subsys(&ip6mr_net_ops);
1436 kmem_cache_destroy(mrt_cachep);
1437 }
1438
ip6mr_mfc_add(struct net * net,struct mr_table * mrt,struct mf6cctl * mfc,int mrtsock,int parent)1439 static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt,
1440 struct mf6cctl *mfc, int mrtsock, int parent)
1441 {
1442 unsigned char ttls[MAXMIFS];
1443 struct mfc6_cache *uc, *c;
1444 struct mr_mfc *_uc;
1445 bool found;
1446 int i, err;
1447
1448 if (mfc->mf6cc_parent >= MAXMIFS)
1449 return -ENFILE;
1450
1451 memset(ttls, 255, MAXMIFS);
1452 for (i = 0; i < MAXMIFS; i++) {
1453 if (IF_ISSET(i, &mfc->mf6cc_ifset))
1454 ttls[i] = 1;
1455 }
1456
1457 /* The entries are added/deleted only under RTNL */
1458 rcu_read_lock();
1459 c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
1460 &mfc->mf6cc_mcastgrp.sin6_addr, parent);
1461 rcu_read_unlock();
1462 if (c) {
1463 spin_lock(&mrt_lock);
1464 c->_c.mfc_parent = mfc->mf6cc_parent;
1465 ip6mr_update_thresholds(mrt, &c->_c, ttls);
1466 if (!mrtsock)
1467 c->_c.mfc_flags |= MFC_STATIC;
1468 spin_unlock(&mrt_lock);
1469 call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE,
1470 c, mrt->id);
1471 mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1472 return 0;
1473 }
1474
1475 if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) &&
1476 !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1477 return -EINVAL;
1478
1479 c = ip6mr_cache_alloc();
1480 if (!c)
1481 return -ENOMEM;
1482
1483 c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1484 c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1485 c->_c.mfc_parent = mfc->mf6cc_parent;
1486 ip6mr_update_thresholds(mrt, &c->_c, ttls);
1487 if (!mrtsock)
1488 c->_c.mfc_flags |= MFC_STATIC;
1489
1490 err = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode,
1491 ip6mr_rht_params);
1492 if (err) {
1493 pr_err("ip6mr: rhtable insert error %d\n", err);
1494 ip6mr_cache_free(c);
1495 return err;
1496 }
1497 list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list);
1498
1499 /* Check to see if we resolved a queued list. If so we
1500 * need to send on the frames and tidy up.
1501 */
1502 found = false;
1503 spin_lock_bh(&mfc_unres_lock);
1504 list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) {
1505 uc = (struct mfc6_cache *)_uc;
1506 if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1507 ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1508 list_del(&_uc->list);
1509 atomic_dec(&mrt->cache_resolve_queue_len);
1510 found = true;
1511 break;
1512 }
1513 }
1514 if (list_empty(&mrt->mfc_unres_queue))
1515 del_timer(&mrt->ipmr_expire_timer);
1516 spin_unlock_bh(&mfc_unres_lock);
1517
1518 if (found) {
1519 ip6mr_cache_resolve(net, mrt, uc, c);
1520 ip6mr_cache_free(uc);
1521 }
1522 call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD,
1523 c, mrt->id);
1524 mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1525 return 0;
1526 }
1527
1528 /*
1529 * Close the multicast socket, and clear the vif tables etc
1530 */
1531
mroute_clean_tables(struct mr_table * mrt,int flags)1532 static void mroute_clean_tables(struct mr_table *mrt, int flags)
1533 {
1534 struct mr_mfc *c, *tmp;
1535 LIST_HEAD(list);
1536 int i;
1537
1538 /* Shut down all active vif entries */
1539 if (flags & (MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC)) {
1540 for (i = 0; i < mrt->maxvif; i++) {
1541 if (((mrt->vif_table[i].flags & VIFF_STATIC) &&
1542 !(flags & MRT6_FLUSH_MIFS_STATIC)) ||
1543 (!(mrt->vif_table[i].flags & VIFF_STATIC) && !(flags & MRT6_FLUSH_MIFS)))
1544 continue;
1545 mif6_delete(mrt, i, 0, &list);
1546 }
1547 unregister_netdevice_many(&list);
1548 }
1549
1550 /* Wipe the cache */
1551 if (flags & (MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC)) {
1552 list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
1553 if (((c->mfc_flags & MFC_STATIC) && !(flags & MRT6_FLUSH_MFC_STATIC)) ||
1554 (!(c->mfc_flags & MFC_STATIC) && !(flags & MRT6_FLUSH_MFC)))
1555 continue;
1556 rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params);
1557 list_del_rcu(&c->list);
1558 call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
1559 FIB_EVENT_ENTRY_DEL,
1560 (struct mfc6_cache *)c, mrt->id);
1561 mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
1562 mr_cache_put(c);
1563 }
1564 }
1565
1566 if (flags & MRT6_FLUSH_MFC) {
1567 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1568 spin_lock_bh(&mfc_unres_lock);
1569 list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
1570 list_del(&c->list);
1571 mr6_netlink_event(mrt, (struct mfc6_cache *)c,
1572 RTM_DELROUTE);
1573 ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
1574 }
1575 spin_unlock_bh(&mfc_unres_lock);
1576 }
1577 }
1578 }
1579
ip6mr_sk_init(struct mr_table * mrt,struct sock * sk)1580 static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk)
1581 {
1582 int err = 0;
1583 struct net *net = sock_net(sk);
1584
1585 rtnl_lock();
1586 spin_lock(&mrt_lock);
1587 if (rtnl_dereference(mrt->mroute_sk)) {
1588 err = -EADDRINUSE;
1589 } else {
1590 rcu_assign_pointer(mrt->mroute_sk, sk);
1591 sock_set_flag(sk, SOCK_RCU_FREE);
1592 atomic_inc(&net->ipv6.devconf_all->mc_forwarding);
1593 }
1594 spin_unlock(&mrt_lock);
1595
1596 if (!err)
1597 inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
1598 NETCONFA_MC_FORWARDING,
1599 NETCONFA_IFINDEX_ALL,
1600 net->ipv6.devconf_all);
1601 rtnl_unlock();
1602
1603 return err;
1604 }
1605
ip6mr_sk_done(struct sock * sk)1606 int ip6mr_sk_done(struct sock *sk)
1607 {
1608 struct net *net = sock_net(sk);
1609 struct ipv6_devconf *devconf;
1610 struct mr_table *mrt;
1611 int err = -EACCES;
1612
1613 if (sk->sk_type != SOCK_RAW ||
1614 inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1615 return err;
1616
1617 devconf = net->ipv6.devconf_all;
1618 if (!devconf || !atomic_read(&devconf->mc_forwarding))
1619 return err;
1620
1621 rtnl_lock();
1622 ip6mr_for_each_table(mrt, net) {
1623 if (sk == rtnl_dereference(mrt->mroute_sk)) {
1624 spin_lock(&mrt_lock);
1625 RCU_INIT_POINTER(mrt->mroute_sk, NULL);
1626 /* Note that mroute_sk had SOCK_RCU_FREE set,
1627 * so the RCU grace period before sk freeing
1628 * is guaranteed by sk_destruct()
1629 */
1630 atomic_dec(&devconf->mc_forwarding);
1631 spin_unlock(&mrt_lock);
1632 inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
1633 NETCONFA_MC_FORWARDING,
1634 NETCONFA_IFINDEX_ALL,
1635 net->ipv6.devconf_all);
1636
1637 mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MFC);
1638 err = 0;
1639 break;
1640 }
1641 }
1642 rtnl_unlock();
1643
1644 return err;
1645 }
1646
mroute6_is_socket(struct net * net,struct sk_buff * skb)1647 bool mroute6_is_socket(struct net *net, struct sk_buff *skb)
1648 {
1649 struct mr_table *mrt;
1650 struct flowi6 fl6 = {
1651 .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX,
1652 .flowi6_oif = skb->dev->ifindex,
1653 .flowi6_mark = skb->mark,
1654 };
1655
1656 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
1657 return NULL;
1658
1659 return rcu_access_pointer(mrt->mroute_sk);
1660 }
1661 EXPORT_SYMBOL(mroute6_is_socket);
1662
1663 /*
1664 * Socket options and virtual interface manipulation. The whole
1665 * virtual interface system is a complete heap, but unfortunately
1666 * that's how BSD mrouted happens to think. Maybe one day with a proper
1667 * MOSPF/PIM router set up we can clean this up.
1668 */
1669
ip6_mroute_setsockopt(struct sock * sk,int optname,sockptr_t optval,unsigned int optlen)1670 int ip6_mroute_setsockopt(struct sock *sk, int optname, sockptr_t optval,
1671 unsigned int optlen)
1672 {
1673 int ret, parent = 0;
1674 struct mif6ctl vif;
1675 struct mf6cctl mfc;
1676 mifi_t mifi;
1677 struct net *net = sock_net(sk);
1678 struct mr_table *mrt;
1679
1680 if (sk->sk_type != SOCK_RAW ||
1681 inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1682 return -EOPNOTSUPP;
1683
1684 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1685 if (!mrt)
1686 return -ENOENT;
1687
1688 if (optname != MRT6_INIT) {
1689 if (sk != rcu_access_pointer(mrt->mroute_sk) &&
1690 !ns_capable(net->user_ns, CAP_NET_ADMIN))
1691 return -EACCES;
1692 }
1693
1694 switch (optname) {
1695 case MRT6_INIT:
1696 if (optlen < sizeof(int))
1697 return -EINVAL;
1698
1699 return ip6mr_sk_init(mrt, sk);
1700
1701 case MRT6_DONE:
1702 return ip6mr_sk_done(sk);
1703
1704 case MRT6_ADD_MIF:
1705 if (optlen < sizeof(vif))
1706 return -EINVAL;
1707 if (copy_from_sockptr(&vif, optval, sizeof(vif)))
1708 return -EFAULT;
1709 if (vif.mif6c_mifi >= MAXMIFS)
1710 return -ENFILE;
1711 rtnl_lock();
1712 ret = mif6_add(net, mrt, &vif,
1713 sk == rtnl_dereference(mrt->mroute_sk));
1714 rtnl_unlock();
1715 return ret;
1716
1717 case MRT6_DEL_MIF:
1718 if (optlen < sizeof(mifi_t))
1719 return -EINVAL;
1720 if (copy_from_sockptr(&mifi, optval, sizeof(mifi_t)))
1721 return -EFAULT;
1722 rtnl_lock();
1723 ret = mif6_delete(mrt, mifi, 0, NULL);
1724 rtnl_unlock();
1725 return ret;
1726
1727 /*
1728 * Manipulate the forwarding caches. These live
1729 * in a sort of kernel/user symbiosis.
1730 */
1731 case MRT6_ADD_MFC:
1732 case MRT6_DEL_MFC:
1733 parent = -1;
1734 fallthrough;
1735 case MRT6_ADD_MFC_PROXY:
1736 case MRT6_DEL_MFC_PROXY:
1737 if (optlen < sizeof(mfc))
1738 return -EINVAL;
1739 if (copy_from_sockptr(&mfc, optval, sizeof(mfc)))
1740 return -EFAULT;
1741 if (parent == 0)
1742 parent = mfc.mf6cc_parent;
1743 rtnl_lock();
1744 if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY)
1745 ret = ip6mr_mfc_delete(mrt, &mfc, parent);
1746 else
1747 ret = ip6mr_mfc_add(net, mrt, &mfc,
1748 sk ==
1749 rtnl_dereference(mrt->mroute_sk),
1750 parent);
1751 rtnl_unlock();
1752 return ret;
1753
1754 case MRT6_FLUSH:
1755 {
1756 int flags;
1757
1758 if (optlen != sizeof(flags))
1759 return -EINVAL;
1760 if (copy_from_sockptr(&flags, optval, sizeof(flags)))
1761 return -EFAULT;
1762 rtnl_lock();
1763 mroute_clean_tables(mrt, flags);
1764 rtnl_unlock();
1765 return 0;
1766 }
1767
1768 /*
1769 * Control PIM assert (to activate pim will activate assert)
1770 */
1771 case MRT6_ASSERT:
1772 {
1773 int v;
1774
1775 if (optlen != sizeof(v))
1776 return -EINVAL;
1777 if (copy_from_sockptr(&v, optval, sizeof(v)))
1778 return -EFAULT;
1779 mrt->mroute_do_assert = v;
1780 return 0;
1781 }
1782
1783 #ifdef CONFIG_IPV6_PIMSM_V2
1784 case MRT6_PIM:
1785 {
1786 bool do_wrmifwhole;
1787 int v;
1788
1789 if (optlen != sizeof(v))
1790 return -EINVAL;
1791 if (copy_from_sockptr(&v, optval, sizeof(v)))
1792 return -EFAULT;
1793
1794 do_wrmifwhole = (v == MRT6MSG_WRMIFWHOLE);
1795 v = !!v;
1796 rtnl_lock();
1797 ret = 0;
1798 if (v != mrt->mroute_do_pim) {
1799 mrt->mroute_do_pim = v;
1800 mrt->mroute_do_assert = v;
1801 mrt->mroute_do_wrvifwhole = do_wrmifwhole;
1802 }
1803 rtnl_unlock();
1804 return ret;
1805 }
1806
1807 #endif
1808 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1809 case MRT6_TABLE:
1810 {
1811 u32 v;
1812
1813 if (optlen != sizeof(u32))
1814 return -EINVAL;
1815 if (copy_from_sockptr(&v, optval, sizeof(v)))
1816 return -EFAULT;
1817 /* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
1818 if (v != RT_TABLE_DEFAULT && v >= 100000000)
1819 return -EINVAL;
1820 if (sk == rcu_access_pointer(mrt->mroute_sk))
1821 return -EBUSY;
1822
1823 rtnl_lock();
1824 ret = 0;
1825 mrt = ip6mr_new_table(net, v);
1826 if (IS_ERR(mrt))
1827 ret = PTR_ERR(mrt);
1828 else
1829 raw6_sk(sk)->ip6mr_table = v;
1830 rtnl_unlock();
1831 return ret;
1832 }
1833 #endif
1834 /*
1835 * Spurious command, or MRT6_VERSION which you cannot
1836 * set.
1837 */
1838 default:
1839 return -ENOPROTOOPT;
1840 }
1841 }
1842
1843 /*
1844 * Getsock opt support for the multicast routing system.
1845 */
1846
ip6_mroute_getsockopt(struct sock * sk,int optname,sockptr_t optval,sockptr_t optlen)1847 int ip6_mroute_getsockopt(struct sock *sk, int optname, sockptr_t optval,
1848 sockptr_t optlen)
1849 {
1850 int olr;
1851 int val;
1852 struct net *net = sock_net(sk);
1853 struct mr_table *mrt;
1854
1855 if (sk->sk_type != SOCK_RAW ||
1856 inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1857 return -EOPNOTSUPP;
1858
1859 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1860 if (!mrt)
1861 return -ENOENT;
1862
1863 switch (optname) {
1864 case MRT6_VERSION:
1865 val = 0x0305;
1866 break;
1867 #ifdef CONFIG_IPV6_PIMSM_V2
1868 case MRT6_PIM:
1869 val = mrt->mroute_do_pim;
1870 break;
1871 #endif
1872 case MRT6_ASSERT:
1873 val = mrt->mroute_do_assert;
1874 break;
1875 default:
1876 return -ENOPROTOOPT;
1877 }
1878
1879 if (copy_from_sockptr(&olr, optlen, sizeof(int)))
1880 return -EFAULT;
1881
1882 olr = min_t(int, olr, sizeof(int));
1883 if (olr < 0)
1884 return -EINVAL;
1885
1886 if (copy_to_sockptr(optlen, &olr, sizeof(int)))
1887 return -EFAULT;
1888 if (copy_to_sockptr(optval, &val, olr))
1889 return -EFAULT;
1890 return 0;
1891 }
1892
1893 /*
1894 * The IP multicast ioctl support routines.
1895 */
ip6mr_ioctl(struct sock * sk,int cmd,void * arg)1896 int ip6mr_ioctl(struct sock *sk, int cmd, void *arg)
1897 {
1898 struct sioc_sg_req6 *sr;
1899 struct sioc_mif_req6 *vr;
1900 struct vif_device *vif;
1901 struct mfc6_cache *c;
1902 struct net *net = sock_net(sk);
1903 struct mr_table *mrt;
1904
1905 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1906 if (!mrt)
1907 return -ENOENT;
1908
1909 switch (cmd) {
1910 case SIOCGETMIFCNT_IN6:
1911 vr = (struct sioc_mif_req6 *)arg;
1912 if (vr->mifi >= mrt->maxvif)
1913 return -EINVAL;
1914 vr->mifi = array_index_nospec(vr->mifi, mrt->maxvif);
1915 rcu_read_lock();
1916 vif = &mrt->vif_table[vr->mifi];
1917 if (VIF_EXISTS(mrt, vr->mifi)) {
1918 vr->icount = READ_ONCE(vif->pkt_in);
1919 vr->ocount = READ_ONCE(vif->pkt_out);
1920 vr->ibytes = READ_ONCE(vif->bytes_in);
1921 vr->obytes = READ_ONCE(vif->bytes_out);
1922 rcu_read_unlock();
1923 return 0;
1924 }
1925 rcu_read_unlock();
1926 return -EADDRNOTAVAIL;
1927 case SIOCGETSGCNT_IN6:
1928 sr = (struct sioc_sg_req6 *)arg;
1929
1930 rcu_read_lock();
1931 c = ip6mr_cache_find(mrt, &sr->src.sin6_addr,
1932 &sr->grp.sin6_addr);
1933 if (c) {
1934 sr->pktcnt = c->_c.mfc_un.res.pkt;
1935 sr->bytecnt = c->_c.mfc_un.res.bytes;
1936 sr->wrong_if = c->_c.mfc_un.res.wrong_if;
1937 rcu_read_unlock();
1938 return 0;
1939 }
1940 rcu_read_unlock();
1941 return -EADDRNOTAVAIL;
1942 default:
1943 return -ENOIOCTLCMD;
1944 }
1945 }
1946
1947 #ifdef CONFIG_COMPAT
1948 struct compat_sioc_sg_req6 {
1949 struct sockaddr_in6 src;
1950 struct sockaddr_in6 grp;
1951 compat_ulong_t pktcnt;
1952 compat_ulong_t bytecnt;
1953 compat_ulong_t wrong_if;
1954 };
1955
1956 struct compat_sioc_mif_req6 {
1957 mifi_t mifi;
1958 compat_ulong_t icount;
1959 compat_ulong_t ocount;
1960 compat_ulong_t ibytes;
1961 compat_ulong_t obytes;
1962 };
1963
ip6mr_compat_ioctl(struct sock * sk,unsigned int cmd,void __user * arg)1964 int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1965 {
1966 struct compat_sioc_sg_req6 sr;
1967 struct compat_sioc_mif_req6 vr;
1968 struct vif_device *vif;
1969 struct mfc6_cache *c;
1970 struct net *net = sock_net(sk);
1971 struct mr_table *mrt;
1972
1973 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1974 if (!mrt)
1975 return -ENOENT;
1976
1977 switch (cmd) {
1978 case SIOCGETMIFCNT_IN6:
1979 if (copy_from_user(&vr, arg, sizeof(vr)))
1980 return -EFAULT;
1981 if (vr.mifi >= mrt->maxvif)
1982 return -EINVAL;
1983 vr.mifi = array_index_nospec(vr.mifi, mrt->maxvif);
1984 rcu_read_lock();
1985 vif = &mrt->vif_table[vr.mifi];
1986 if (VIF_EXISTS(mrt, vr.mifi)) {
1987 vr.icount = READ_ONCE(vif->pkt_in);
1988 vr.ocount = READ_ONCE(vif->pkt_out);
1989 vr.ibytes = READ_ONCE(vif->bytes_in);
1990 vr.obytes = READ_ONCE(vif->bytes_out);
1991 rcu_read_unlock();
1992
1993 if (copy_to_user(arg, &vr, sizeof(vr)))
1994 return -EFAULT;
1995 return 0;
1996 }
1997 rcu_read_unlock();
1998 return -EADDRNOTAVAIL;
1999 case SIOCGETSGCNT_IN6:
2000 if (copy_from_user(&sr, arg, sizeof(sr)))
2001 return -EFAULT;
2002
2003 rcu_read_lock();
2004 c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
2005 if (c) {
2006 sr.pktcnt = c->_c.mfc_un.res.pkt;
2007 sr.bytecnt = c->_c.mfc_un.res.bytes;
2008 sr.wrong_if = c->_c.mfc_un.res.wrong_if;
2009 rcu_read_unlock();
2010
2011 if (copy_to_user(arg, &sr, sizeof(sr)))
2012 return -EFAULT;
2013 return 0;
2014 }
2015 rcu_read_unlock();
2016 return -EADDRNOTAVAIL;
2017 default:
2018 return -ENOIOCTLCMD;
2019 }
2020 }
2021 #endif
2022
ip6mr_forward2_finish(struct net * net,struct sock * sk,struct sk_buff * skb)2023 static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
2024 {
2025 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
2026 IPSTATS_MIB_OUTFORWDATAGRAMS);
2027 return dst_output(net, sk, skb);
2028 }
2029
2030 /*
2031 * Processing handlers for ip6mr_forward
2032 */
2033
ip6mr_forward2(struct net * net,struct mr_table * mrt,struct sk_buff * skb,int vifi)2034 static int ip6mr_forward2(struct net *net, struct mr_table *mrt,
2035 struct sk_buff *skb, int vifi)
2036 {
2037 struct vif_device *vif = &mrt->vif_table[vifi];
2038 struct net_device *vif_dev;
2039 struct ipv6hdr *ipv6h;
2040 struct dst_entry *dst;
2041 struct flowi6 fl6;
2042
2043 vif_dev = vif_dev_read(vif);
2044 if (!vif_dev)
2045 goto out_free;
2046
2047 #ifdef CONFIG_IPV6_PIMSM_V2
2048 if (vif->flags & MIFF_REGISTER) {
2049 WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1);
2050 WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len);
2051 DEV_STATS_ADD(vif_dev, tx_bytes, skb->len);
2052 DEV_STATS_INC(vif_dev, tx_packets);
2053 ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
2054 goto out_free;
2055 }
2056 #endif
2057
2058 ipv6h = ipv6_hdr(skb);
2059
2060 fl6 = (struct flowi6) {
2061 .flowi6_oif = vif->link,
2062 .daddr = ipv6h->daddr,
2063 };
2064
2065 dst = ip6_route_output(net, NULL, &fl6);
2066 if (dst->error) {
2067 dst_release(dst);
2068 goto out_free;
2069 }
2070
2071 skb_dst_drop(skb);
2072 skb_dst_set(skb, dst);
2073
2074 /*
2075 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
2076 * not only before forwarding, but after forwarding on all output
2077 * interfaces. It is clear, if mrouter runs a multicasting
2078 * program, it should receive packets not depending to what interface
2079 * program is joined.
2080 * If we will not make it, the program will have to join on all
2081 * interfaces. On the other hand, multihoming host (or router, but
2082 * not mrouter) cannot join to more than one interface - it will
2083 * result in receiving multiple packets.
2084 */
2085 skb->dev = vif_dev;
2086 WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1);
2087 WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len);
2088
2089 /* We are about to write */
2090 /* XXX: extension headers? */
2091 if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(vif_dev)))
2092 goto out_free;
2093
2094 ipv6h = ipv6_hdr(skb);
2095 ipv6h->hop_limit--;
2096
2097 IP6CB(skb)->flags |= IP6SKB_FORWARDED;
2098
2099 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
2100 net, NULL, skb, skb->dev, vif_dev,
2101 ip6mr_forward2_finish);
2102
2103 out_free:
2104 kfree_skb(skb);
2105 return 0;
2106 }
2107
2108 /* Called with rcu_read_lock() */
ip6mr_find_vif(struct mr_table * mrt,struct net_device * dev)2109 static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev)
2110 {
2111 int ct;
2112
2113 /* Pairs with WRITE_ONCE() in mif6_delete()/mif6_add() */
2114 for (ct = READ_ONCE(mrt->maxvif) - 1; ct >= 0; ct--) {
2115 if (rcu_access_pointer(mrt->vif_table[ct].dev) == dev)
2116 break;
2117 }
2118 return ct;
2119 }
2120
2121 /* Called under rcu_read_lock() */
ip6_mr_forward(struct net * net,struct mr_table * mrt,struct net_device * dev,struct sk_buff * skb,struct mfc6_cache * c)2122 static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
2123 struct net_device *dev, struct sk_buff *skb,
2124 struct mfc6_cache *c)
2125 {
2126 int psend = -1;
2127 int vif, ct;
2128 int true_vifi = ip6mr_find_vif(mrt, dev);
2129
2130 vif = c->_c.mfc_parent;
2131 c->_c.mfc_un.res.pkt++;
2132 c->_c.mfc_un.res.bytes += skb->len;
2133 c->_c.mfc_un.res.lastuse = jiffies;
2134
2135 if (ipv6_addr_any(&c->mf6c_origin) && true_vifi >= 0) {
2136 struct mfc6_cache *cache_proxy;
2137
2138 /* For an (*,G) entry, we only check that the incoming
2139 * interface is part of the static tree.
2140 */
2141 cache_proxy = mr_mfc_find_any_parent(mrt, vif);
2142 if (cache_proxy &&
2143 cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255)
2144 goto forward;
2145 }
2146
2147 /*
2148 * Wrong interface: drop packet and (maybe) send PIM assert.
2149 */
2150 if (rcu_access_pointer(mrt->vif_table[vif].dev) != dev) {
2151 c->_c.mfc_un.res.wrong_if++;
2152
2153 if (true_vifi >= 0 && mrt->mroute_do_assert &&
2154 /* pimsm uses asserts, when switching from RPT to SPT,
2155 so that we cannot check that packet arrived on an oif.
2156 It is bad, but otherwise we would need to move pretty
2157 large chunk of pimd to kernel. Ough... --ANK
2158 */
2159 (mrt->mroute_do_pim ||
2160 c->_c.mfc_un.res.ttls[true_vifi] < 255) &&
2161 time_after(jiffies,
2162 c->_c.mfc_un.res.last_assert +
2163 MFC_ASSERT_THRESH)) {
2164 c->_c.mfc_un.res.last_assert = jiffies;
2165 ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
2166 if (mrt->mroute_do_wrvifwhole)
2167 ip6mr_cache_report(mrt, skb, true_vifi,
2168 MRT6MSG_WRMIFWHOLE);
2169 }
2170 goto dont_forward;
2171 }
2172
2173 forward:
2174 WRITE_ONCE(mrt->vif_table[vif].pkt_in,
2175 mrt->vif_table[vif].pkt_in + 1);
2176 WRITE_ONCE(mrt->vif_table[vif].bytes_in,
2177 mrt->vif_table[vif].bytes_in + skb->len);
2178
2179 /*
2180 * Forward the frame
2181 */
2182 if (ipv6_addr_any(&c->mf6c_origin) &&
2183 ipv6_addr_any(&c->mf6c_mcastgrp)) {
2184 if (true_vifi >= 0 &&
2185 true_vifi != c->_c.mfc_parent &&
2186 ipv6_hdr(skb)->hop_limit >
2187 c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
2188 /* It's an (*,*) entry and the packet is not coming from
2189 * the upstream: forward the packet to the upstream
2190 * only.
2191 */
2192 psend = c->_c.mfc_parent;
2193 goto last_forward;
2194 }
2195 goto dont_forward;
2196 }
2197 for (ct = c->_c.mfc_un.res.maxvif - 1;
2198 ct >= c->_c.mfc_un.res.minvif; ct--) {
2199 /* For (*,G) entry, don't forward to the incoming interface */
2200 if ((!ipv6_addr_any(&c->mf6c_origin) || ct != true_vifi) &&
2201 ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) {
2202 if (psend != -1) {
2203 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2204 if (skb2)
2205 ip6mr_forward2(net, mrt, skb2, psend);
2206 }
2207 psend = ct;
2208 }
2209 }
2210 last_forward:
2211 if (psend != -1) {
2212 ip6mr_forward2(net, mrt, skb, psend);
2213 return;
2214 }
2215
2216 dont_forward:
2217 kfree_skb(skb);
2218 }
2219
2220
2221 /*
2222 * Multicast packets for forwarding arrive here
2223 */
2224
ip6_mr_input(struct sk_buff * skb)2225 int ip6_mr_input(struct sk_buff *skb)
2226 {
2227 struct mfc6_cache *cache;
2228 struct net *net = dev_net(skb->dev);
2229 struct mr_table *mrt;
2230 struct flowi6 fl6 = {
2231 .flowi6_iif = skb->dev->ifindex,
2232 .flowi6_mark = skb->mark,
2233 };
2234 int err;
2235 struct net_device *dev;
2236
2237 /* skb->dev passed in is the master dev for vrfs.
2238 * Get the proper interface that does have a vif associated with it.
2239 */
2240 dev = skb->dev;
2241 if (netif_is_l3_master(skb->dev)) {
2242 dev = dev_get_by_index_rcu(net, IPCB(skb)->iif);
2243 if (!dev) {
2244 kfree_skb(skb);
2245 return -ENODEV;
2246 }
2247 }
2248
2249 err = ip6mr_fib_lookup(net, &fl6, &mrt);
2250 if (err < 0) {
2251 kfree_skb(skb);
2252 return err;
2253 }
2254
2255 cache = ip6mr_cache_find(mrt,
2256 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2257 if (!cache) {
2258 int vif = ip6mr_find_vif(mrt, dev);
2259
2260 if (vif >= 0)
2261 cache = ip6mr_cache_find_any(mrt,
2262 &ipv6_hdr(skb)->daddr,
2263 vif);
2264 }
2265
2266 /*
2267 * No usable cache entry
2268 */
2269 if (!cache) {
2270 int vif;
2271
2272 vif = ip6mr_find_vif(mrt, dev);
2273 if (vif >= 0) {
2274 int err = ip6mr_cache_unresolved(mrt, vif, skb, dev);
2275
2276 return err;
2277 }
2278 kfree_skb(skb);
2279 return -ENODEV;
2280 }
2281
2282 ip6_mr_forward(net, mrt, dev, skb, cache);
2283
2284 return 0;
2285 }
2286
ip6mr_get_route(struct net * net,struct sk_buff * skb,struct rtmsg * rtm,u32 portid)2287 int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
2288 u32 portid)
2289 {
2290 int err;
2291 struct mr_table *mrt;
2292 struct mfc6_cache *cache;
2293 struct rt6_info *rt = dst_rt6_info(skb_dst(skb));
2294
2295 rcu_read_lock();
2296 mrt = __ip6mr_get_table(net, RT6_TABLE_DFLT);
2297 if (!mrt) {
2298 rcu_read_unlock();
2299 return -ENOENT;
2300 }
2301
2302 cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
2303 if (!cache && skb->dev) {
2304 int vif = ip6mr_find_vif(mrt, skb->dev);
2305
2306 if (vif >= 0)
2307 cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr,
2308 vif);
2309 }
2310
2311 if (!cache) {
2312 struct sk_buff *skb2;
2313 struct ipv6hdr *iph;
2314 struct net_device *dev;
2315 int vif;
2316
2317 dev = skb->dev;
2318 if (!dev || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2319 rcu_read_unlock();
2320 return -ENODEV;
2321 }
2322
2323 /* really correct? */
2324 skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
2325 if (!skb2) {
2326 rcu_read_unlock();
2327 return -ENOMEM;
2328 }
2329
2330 NETLINK_CB(skb2).portid = portid;
2331 skb_reset_transport_header(skb2);
2332
2333 skb_put(skb2, sizeof(struct ipv6hdr));
2334 skb_reset_network_header(skb2);
2335
2336 iph = ipv6_hdr(skb2);
2337 iph->version = 0;
2338 iph->priority = 0;
2339 iph->flow_lbl[0] = 0;
2340 iph->flow_lbl[1] = 0;
2341 iph->flow_lbl[2] = 0;
2342 iph->payload_len = 0;
2343 iph->nexthdr = IPPROTO_NONE;
2344 iph->hop_limit = 0;
2345 iph->saddr = rt->rt6i_src.addr;
2346 iph->daddr = rt->rt6i_dst.addr;
2347
2348 err = ip6mr_cache_unresolved(mrt, vif, skb2, dev);
2349 rcu_read_unlock();
2350
2351 return err;
2352 }
2353
2354 err = mr_fill_mroute(mrt, skb, &cache->_c, rtm);
2355 rcu_read_unlock();
2356 return err;
2357 }
2358
ip6mr_fill_mroute(struct mr_table * mrt,struct sk_buff * skb,u32 portid,u32 seq,struct mfc6_cache * c,int cmd,int flags)2359 static int ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2360 u32 portid, u32 seq, struct mfc6_cache *c, int cmd,
2361 int flags)
2362 {
2363 struct nlmsghdr *nlh;
2364 struct rtmsg *rtm;
2365 int err;
2366
2367 nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags);
2368 if (!nlh)
2369 return -EMSGSIZE;
2370
2371 rtm = nlmsg_data(nlh);
2372 rtm->rtm_family = RTNL_FAMILY_IP6MR;
2373 rtm->rtm_dst_len = 128;
2374 rtm->rtm_src_len = 128;
2375 rtm->rtm_tos = 0;
2376 rtm->rtm_table = mrt->id;
2377 if (nla_put_u32(skb, RTA_TABLE, mrt->id))
2378 goto nla_put_failure;
2379 rtm->rtm_type = RTN_MULTICAST;
2380 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2381 if (c->_c.mfc_flags & MFC_STATIC)
2382 rtm->rtm_protocol = RTPROT_STATIC;
2383 else
2384 rtm->rtm_protocol = RTPROT_MROUTED;
2385 rtm->rtm_flags = 0;
2386
2387 if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) ||
2388 nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp))
2389 goto nla_put_failure;
2390 err = mr_fill_mroute(mrt, skb, &c->_c, rtm);
2391 /* do not break the dump if cache is unresolved */
2392 if (err < 0 && err != -ENOENT)
2393 goto nla_put_failure;
2394
2395 nlmsg_end(skb, nlh);
2396 return 0;
2397
2398 nla_put_failure:
2399 nlmsg_cancel(skb, nlh);
2400 return -EMSGSIZE;
2401 }
2402
_ip6mr_fill_mroute(struct mr_table * mrt,struct sk_buff * skb,u32 portid,u32 seq,struct mr_mfc * c,int cmd,int flags)2403 static int _ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2404 u32 portid, u32 seq, struct mr_mfc *c,
2405 int cmd, int flags)
2406 {
2407 return ip6mr_fill_mroute(mrt, skb, portid, seq, (struct mfc6_cache *)c,
2408 cmd, flags);
2409 }
2410
mr6_msgsize(bool unresolved,int maxvif)2411 static int mr6_msgsize(bool unresolved, int maxvif)
2412 {
2413 size_t len =
2414 NLMSG_ALIGN(sizeof(struct rtmsg))
2415 + nla_total_size(4) /* RTA_TABLE */
2416 + nla_total_size(sizeof(struct in6_addr)) /* RTA_SRC */
2417 + nla_total_size(sizeof(struct in6_addr)) /* RTA_DST */
2418 ;
2419
2420 if (!unresolved)
2421 len = len
2422 + nla_total_size(4) /* RTA_IIF */
2423 + nla_total_size(0) /* RTA_MULTIPATH */
2424 + maxvif * NLA_ALIGN(sizeof(struct rtnexthop))
2425 /* RTA_MFC_STATS */
2426 + nla_total_size_64bit(sizeof(struct rta_mfc_stats))
2427 ;
2428
2429 return len;
2430 }
2431
mr6_netlink_event(struct mr_table * mrt,struct mfc6_cache * mfc,int cmd)2432 static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
2433 int cmd)
2434 {
2435 struct net *net = read_pnet(&mrt->net);
2436 struct sk_buff *skb;
2437 int err = -ENOBUFS;
2438
2439 skb = nlmsg_new(mr6_msgsize(mfc->_c.mfc_parent >= MAXMIFS, mrt->maxvif),
2440 GFP_ATOMIC);
2441 if (!skb)
2442 goto errout;
2443
2444 err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0);
2445 if (err < 0)
2446 goto errout;
2447
2448 rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC);
2449 return;
2450
2451 errout:
2452 kfree_skb(skb);
2453 if (err < 0)
2454 rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
2455 }
2456
mrt6msg_netlink_msgsize(size_t payloadlen)2457 static size_t mrt6msg_netlink_msgsize(size_t payloadlen)
2458 {
2459 size_t len =
2460 NLMSG_ALIGN(sizeof(struct rtgenmsg))
2461 + nla_total_size(1) /* IP6MRA_CREPORT_MSGTYPE */
2462 + nla_total_size(4) /* IP6MRA_CREPORT_MIF_ID */
2463 /* IP6MRA_CREPORT_SRC_ADDR */
2464 + nla_total_size(sizeof(struct in6_addr))
2465 /* IP6MRA_CREPORT_DST_ADDR */
2466 + nla_total_size(sizeof(struct in6_addr))
2467 /* IP6MRA_CREPORT_PKT */
2468 + nla_total_size(payloadlen)
2469 ;
2470
2471 return len;
2472 }
2473
mrt6msg_netlink_event(const struct mr_table * mrt,struct sk_buff * pkt)2474 static void mrt6msg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt)
2475 {
2476 struct net *net = read_pnet(&mrt->net);
2477 struct nlmsghdr *nlh;
2478 struct rtgenmsg *rtgenm;
2479 struct mrt6msg *msg;
2480 struct sk_buff *skb;
2481 struct nlattr *nla;
2482 int payloadlen;
2483
2484 payloadlen = pkt->len - sizeof(struct mrt6msg);
2485 msg = (struct mrt6msg *)skb_transport_header(pkt);
2486
2487 skb = nlmsg_new(mrt6msg_netlink_msgsize(payloadlen), GFP_ATOMIC);
2488 if (!skb)
2489 goto errout;
2490
2491 nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT,
2492 sizeof(struct rtgenmsg), 0);
2493 if (!nlh)
2494 goto errout;
2495 rtgenm = nlmsg_data(nlh);
2496 rtgenm->rtgen_family = RTNL_FAMILY_IP6MR;
2497 if (nla_put_u8(skb, IP6MRA_CREPORT_MSGTYPE, msg->im6_msgtype) ||
2498 nla_put_u32(skb, IP6MRA_CREPORT_MIF_ID, msg->im6_mif) ||
2499 nla_put_in6_addr(skb, IP6MRA_CREPORT_SRC_ADDR,
2500 &msg->im6_src) ||
2501 nla_put_in6_addr(skb, IP6MRA_CREPORT_DST_ADDR,
2502 &msg->im6_dst))
2503 goto nla_put_failure;
2504
2505 nla = nla_reserve(skb, IP6MRA_CREPORT_PKT, payloadlen);
2506 if (!nla || skb_copy_bits(pkt, sizeof(struct mrt6msg),
2507 nla_data(nla), payloadlen))
2508 goto nla_put_failure;
2509
2510 nlmsg_end(skb, nlh);
2511
2512 rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE_R, NULL, GFP_ATOMIC);
2513 return;
2514
2515 nla_put_failure:
2516 nlmsg_cancel(skb, nlh);
2517 errout:
2518 kfree_skb(skb);
2519 rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE_R, -ENOBUFS);
2520 }
2521
2522 static const struct nla_policy ip6mr_getroute_policy[RTA_MAX + 1] = {
2523 [RTA_SRC] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
2524 [RTA_DST] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
2525 [RTA_TABLE] = { .type = NLA_U32 },
2526 };
2527
ip6mr_rtm_valid_getroute_req(struct sk_buff * skb,const struct nlmsghdr * nlh,struct nlattr ** tb,struct netlink_ext_ack * extack)2528 static int ip6mr_rtm_valid_getroute_req(struct sk_buff *skb,
2529 const struct nlmsghdr *nlh,
2530 struct nlattr **tb,
2531 struct netlink_ext_ack *extack)
2532 {
2533 struct rtmsg *rtm;
2534 int err;
2535
2536 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, ip6mr_getroute_policy,
2537 extack);
2538 if (err)
2539 return err;
2540
2541 rtm = nlmsg_data(nlh);
2542 if ((rtm->rtm_src_len && rtm->rtm_src_len != 128) ||
2543 (rtm->rtm_dst_len && rtm->rtm_dst_len != 128) ||
2544 rtm->rtm_tos || rtm->rtm_table || rtm->rtm_protocol ||
2545 rtm->rtm_scope || rtm->rtm_type || rtm->rtm_flags) {
2546 NL_SET_ERR_MSG_MOD(extack,
2547 "Invalid values in header for multicast route get request");
2548 return -EINVAL;
2549 }
2550
2551 if ((tb[RTA_SRC] && !rtm->rtm_src_len) ||
2552 (tb[RTA_DST] && !rtm->rtm_dst_len)) {
2553 NL_SET_ERR_MSG_MOD(extack, "rtm_src_len and rtm_dst_len must be 128 for IPv6");
2554 return -EINVAL;
2555 }
2556
2557 return 0;
2558 }
2559
ip6mr_rtm_getroute(struct sk_buff * in_skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)2560 static int ip6mr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
2561 struct netlink_ext_ack *extack)
2562 {
2563 struct net *net = sock_net(in_skb->sk);
2564 struct in6_addr src = {}, grp = {};
2565 struct nlattr *tb[RTA_MAX + 1];
2566 struct mfc6_cache *cache;
2567 struct mr_table *mrt;
2568 struct sk_buff *skb;
2569 u32 tableid;
2570 int err;
2571
2572 err = ip6mr_rtm_valid_getroute_req(in_skb, nlh, tb, extack);
2573 if (err < 0)
2574 return err;
2575
2576 if (tb[RTA_SRC])
2577 src = nla_get_in6_addr(tb[RTA_SRC]);
2578 if (tb[RTA_DST])
2579 grp = nla_get_in6_addr(tb[RTA_DST]);
2580 tableid = tb[RTA_TABLE] ? nla_get_u32(tb[RTA_TABLE]) : 0;
2581
2582 mrt = __ip6mr_get_table(net, tableid ?: RT_TABLE_DEFAULT);
2583 if (!mrt) {
2584 NL_SET_ERR_MSG_MOD(extack, "MR table does not exist");
2585 return -ENOENT;
2586 }
2587
2588 /* entries are added/deleted only under RTNL */
2589 rcu_read_lock();
2590 cache = ip6mr_cache_find(mrt, &src, &grp);
2591 rcu_read_unlock();
2592 if (!cache) {
2593 NL_SET_ERR_MSG_MOD(extack, "MR cache entry not found");
2594 return -ENOENT;
2595 }
2596
2597 skb = nlmsg_new(mr6_msgsize(false, mrt->maxvif), GFP_KERNEL);
2598 if (!skb)
2599 return -ENOBUFS;
2600
2601 err = ip6mr_fill_mroute(mrt, skb, NETLINK_CB(in_skb).portid,
2602 nlh->nlmsg_seq, cache, RTM_NEWROUTE, 0);
2603 if (err < 0) {
2604 kfree_skb(skb);
2605 return err;
2606 }
2607
2608 return rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2609 }
2610
ip6mr_rtm_dumproute(struct sk_buff * skb,struct netlink_callback * cb)2611 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2612 {
2613 const struct nlmsghdr *nlh = cb->nlh;
2614 struct fib_dump_filter filter = {};
2615 int err;
2616
2617 if (cb->strict_check) {
2618 err = ip_valid_fib_dump_req(sock_net(skb->sk), nlh,
2619 &filter, cb);
2620 if (err < 0)
2621 return err;
2622 }
2623
2624 if (filter.table_id) {
2625 struct mr_table *mrt;
2626
2627 mrt = __ip6mr_get_table(sock_net(skb->sk), filter.table_id);
2628 if (!mrt) {
2629 if (rtnl_msg_family(cb->nlh) != RTNL_FAMILY_IP6MR)
2630 return skb->len;
2631
2632 NL_SET_ERR_MSG_MOD(cb->extack, "MR table does not exist");
2633 return -ENOENT;
2634 }
2635 err = mr_table_dump(mrt, skb, cb, _ip6mr_fill_mroute,
2636 &mfc_unres_lock, &filter);
2637 return skb->len ? : err;
2638 }
2639
2640 return mr_rtm_dumproute(skb, cb, ip6mr_mr_table_iter,
2641 _ip6mr_fill_mroute, &mfc_unres_lock, &filter);
2642 }
2643