1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Linux IPv6 multicast routing support for BSD pim6sd 4 * Based on net/ipv4/ipmr.c. 5 * 6 * (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr> 7 * LSIIT Laboratory, Strasbourg, France 8 * (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com> 9 * 6WIND, Paris, France 10 * Copyright (C)2007,2008 USAGI/WIDE Project 11 * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org> 12 */ 13 14 #include <linux/uaccess.h> 15 #include <linux/types.h> 16 #include <linux/sched.h> 17 #include <linux/errno.h> 18 #include <linux/mm.h> 19 #include <linux/kernel.h> 20 #include <linux/fcntl.h> 21 #include <linux/stat.h> 22 #include <linux/socket.h> 23 #include <linux/inet.h> 24 #include <linux/netdevice.h> 25 #include <linux/inetdevice.h> 26 #include <linux/proc_fs.h> 27 #include <linux/seq_file.h> 28 #include <linux/init.h> 29 #include <linux/compat.h> 30 #include <linux/rhashtable.h> 31 #include <net/protocol.h> 32 #include <linux/skbuff.h> 33 #include <net/raw.h> 34 #include <linux/notifier.h> 35 #include <linux/if_arp.h> 36 #include <net/checksum.h> 37 #include <net/netlink.h> 38 #include <net/fib_rules.h> 39 40 #include <net/ipv6.h> 41 #include <net/ip6_route.h> 42 #include <linux/mroute6.h> 43 #include <linux/pim.h> 44 #include <net/addrconf.h> 45 #include <linux/netfilter_ipv6.h> 46 #include <linux/export.h> 47 #include <net/ip6_checksum.h> 48 #include <linux/netconf.h> 49 #include <net/ip_tunnels.h> 50 51 #include <linux/nospec.h> 52 53 struct ip6mr_rule { 54 struct fib_rule common; 55 }; 56 57 struct ip6mr_result { 58 struct mr_table *mrt; 59 }; 60 61 /* Big lock, protecting vif table, mrt cache and mroute socket state. 62 Note that the changes are semaphored via rtnl_lock. 63 */ 64 65 static DEFINE_SPINLOCK(mrt_lock); 66 67 static struct net_device *vif_dev_read(const struct vif_device *vif) 68 { 69 return rcu_dereference(vif->dev); 70 } 71 72 /* Multicast router control variables */ 73 74 /* Special spinlock for queue of unresolved entries */ 75 static DEFINE_SPINLOCK(mfc_unres_lock); 76 77 /* We return to original Alan's scheme. Hash table of resolved 78 entries is changed only in process context and protected 79 with weak lock mrt_lock. Queue of unresolved entries is protected 80 with strong spinlock mfc_unres_lock. 81 82 In this case data path is free of exclusive locks at all. 83 */ 84 85 static struct kmem_cache *mrt_cachep __read_mostly; 86 87 static struct mr_table *ip6mr_new_table(struct net *net, u32 id); 88 static void ip6mr_free_table(struct mr_table *mrt); 89 90 static void ip6_mr_forward(struct net *net, struct mr_table *mrt, 91 struct net_device *dev, struct sk_buff *skb, 92 struct mfc6_cache *cache); 93 static int ip6mr_cache_report(const struct mr_table *mrt, struct sk_buff *pkt, 94 mifi_t mifi, int assert); 95 static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc, 96 int cmd); 97 static void mrt6msg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt); 98 static int ip6mr_rtm_dumproute(struct sk_buff *skb, 99 struct netlink_callback *cb); 100 static void mroute_clean_tables(struct mr_table *mrt, int flags); 101 static void ipmr_expire_process(struct timer_list *t); 102 103 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES 104 #define ip6mr_for_each_table(mrt, net) \ 105 list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list, \ 106 lockdep_rtnl_is_held() || \ 107 list_empty(&net->ipv6.mr6_tables)) 108 109 static struct mr_table *ip6mr_mr_table_iter(struct net *net, 110 struct mr_table *mrt) 111 { 112 struct mr_table *ret; 113 114 if (!mrt) 115 ret = list_entry_rcu(net->ipv6.mr6_tables.next, 116 struct mr_table, list); 117 else 118 ret = list_entry_rcu(mrt->list.next, 119 struct mr_table, list); 120 121 if (&ret->list == &net->ipv6.mr6_tables) 122 return NULL; 123 return ret; 124 } 125 126 static struct mr_table *ip6mr_get_table(struct net *net, u32 id) 127 { 128 struct mr_table *mrt; 129 130 ip6mr_for_each_table(mrt, net) { 131 if (mrt->id == id) 132 return mrt; 133 } 134 return NULL; 135 } 136 137 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6, 138 struct mr_table **mrt) 139 { 140 int err; 141 struct ip6mr_result res; 142 struct fib_lookup_arg arg = { 143 .result = &res, 144 .flags = FIB_LOOKUP_NOREF, 145 }; 146 147 /* update flow if oif or iif point to device enslaved to l3mdev */ 148 l3mdev_update_flow(net, flowi6_to_flowi(flp6)); 149 150 err = fib_rules_lookup(net->ipv6.mr6_rules_ops, 151 flowi6_to_flowi(flp6), 0, &arg); 152 if (err < 0) 153 return err; 154 *mrt = res.mrt; 155 return 0; 156 } 157 158 static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp, 159 int flags, struct fib_lookup_arg *arg) 160 { 161 struct ip6mr_result *res = arg->result; 162 struct mr_table *mrt; 163 164 switch (rule->action) { 165 case FR_ACT_TO_TBL: 166 break; 167 case FR_ACT_UNREACHABLE: 168 return -ENETUNREACH; 169 case FR_ACT_PROHIBIT: 170 return -EACCES; 171 case FR_ACT_BLACKHOLE: 172 default: 173 return -EINVAL; 174 } 175 176 arg->table = fib_rule_get_table(rule, arg); 177 178 mrt = ip6mr_get_table(rule->fr_net, arg->table); 179 if (!mrt) 180 return -EAGAIN; 181 res->mrt = mrt; 182 return 0; 183 } 184 185 static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags) 186 { 187 return 1; 188 } 189 190 static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb, 191 struct fib_rule_hdr *frh, struct nlattr **tb, 192 struct netlink_ext_ack *extack) 193 { 194 return 0; 195 } 196 197 static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, 198 struct nlattr **tb) 199 { 200 return 1; 201 } 202 203 static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb, 204 struct fib_rule_hdr *frh) 205 { 206 frh->dst_len = 0; 207 frh->src_len = 0; 208 frh->tos = 0; 209 return 0; 210 } 211 212 static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = { 213 .family = RTNL_FAMILY_IP6MR, 214 .rule_size = sizeof(struct ip6mr_rule), 215 .addr_size = sizeof(struct in6_addr), 216 .action = ip6mr_rule_action, 217 .match = ip6mr_rule_match, 218 .configure = ip6mr_rule_configure, 219 .compare = ip6mr_rule_compare, 220 .fill = ip6mr_rule_fill, 221 .nlgroup = RTNLGRP_IPV6_RULE, 222 .owner = THIS_MODULE, 223 }; 224 225 static int __net_init ip6mr_rules_init(struct net *net) 226 { 227 struct fib_rules_ops *ops; 228 struct mr_table *mrt; 229 int err; 230 231 ops = fib_rules_register(&ip6mr_rules_ops_template, net); 232 if (IS_ERR(ops)) 233 return PTR_ERR(ops); 234 235 INIT_LIST_HEAD(&net->ipv6.mr6_tables); 236 237 mrt = ip6mr_new_table(net, RT6_TABLE_DFLT); 238 if (IS_ERR(mrt)) { 239 err = PTR_ERR(mrt); 240 goto err1; 241 } 242 243 err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0); 244 if (err < 0) 245 goto err2; 246 247 net->ipv6.mr6_rules_ops = ops; 248 return 0; 249 250 err2: 251 rtnl_lock(); 252 ip6mr_free_table(mrt); 253 rtnl_unlock(); 254 err1: 255 fib_rules_unregister(ops); 256 return err; 257 } 258 259 static void __net_exit ip6mr_rules_exit(struct net *net) 260 { 261 struct mr_table *mrt, *next; 262 263 ASSERT_RTNL(); 264 list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) { 265 list_del(&mrt->list); 266 ip6mr_free_table(mrt); 267 } 268 fib_rules_unregister(net->ipv6.mr6_rules_ops); 269 } 270 271 static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb, 272 struct netlink_ext_ack *extack) 273 { 274 return fib_rules_dump(net, nb, RTNL_FAMILY_IP6MR, extack); 275 } 276 277 static unsigned int ip6mr_rules_seq_read(struct net *net) 278 { 279 return fib_rules_seq_read(net, RTNL_FAMILY_IP6MR); 280 } 281 282 bool ip6mr_rule_default(const struct fib_rule *rule) 283 { 284 return fib_rule_matchall(rule) && rule->action == FR_ACT_TO_TBL && 285 rule->table == RT6_TABLE_DFLT && !rule->l3mdev; 286 } 287 EXPORT_SYMBOL(ip6mr_rule_default); 288 #else 289 #define ip6mr_for_each_table(mrt, net) \ 290 for (mrt = net->ipv6.mrt6; mrt; mrt = NULL) 291 292 static struct mr_table *ip6mr_mr_table_iter(struct net *net, 293 struct mr_table *mrt) 294 { 295 if (!mrt) 296 return net->ipv6.mrt6; 297 return NULL; 298 } 299 300 static struct mr_table *ip6mr_get_table(struct net *net, u32 id) 301 { 302 return net->ipv6.mrt6; 303 } 304 305 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6, 306 struct mr_table **mrt) 307 { 308 *mrt = net->ipv6.mrt6; 309 return 0; 310 } 311 312 static int __net_init ip6mr_rules_init(struct net *net) 313 { 314 struct mr_table *mrt; 315 316 mrt = ip6mr_new_table(net, RT6_TABLE_DFLT); 317 if (IS_ERR(mrt)) 318 return PTR_ERR(mrt); 319 net->ipv6.mrt6 = mrt; 320 return 0; 321 } 322 323 static void __net_exit ip6mr_rules_exit(struct net *net) 324 { 325 ASSERT_RTNL(); 326 ip6mr_free_table(net->ipv6.mrt6); 327 net->ipv6.mrt6 = NULL; 328 } 329 330 static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb, 331 struct netlink_ext_ack *extack) 332 { 333 return 0; 334 } 335 336 static unsigned int ip6mr_rules_seq_read(struct net *net) 337 { 338 return 0; 339 } 340 #endif 341 342 static int ip6mr_hash_cmp(struct rhashtable_compare_arg *arg, 343 const void *ptr) 344 { 345 const struct mfc6_cache_cmp_arg *cmparg = arg->key; 346 struct mfc6_cache *c = (struct mfc6_cache *)ptr; 347 348 return !ipv6_addr_equal(&c->mf6c_mcastgrp, &cmparg->mf6c_mcastgrp) || 349 !ipv6_addr_equal(&c->mf6c_origin, &cmparg->mf6c_origin); 350 } 351 352 static const struct rhashtable_params ip6mr_rht_params = { 353 .head_offset = offsetof(struct mr_mfc, mnode), 354 .key_offset = offsetof(struct mfc6_cache, cmparg), 355 .key_len = sizeof(struct mfc6_cache_cmp_arg), 356 .nelem_hint = 3, 357 .obj_cmpfn = ip6mr_hash_cmp, 358 .automatic_shrinking = true, 359 }; 360 361 static void ip6mr_new_table_set(struct mr_table *mrt, 362 struct net *net) 363 { 364 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES 365 list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables); 366 #endif 367 } 368 369 static struct mfc6_cache_cmp_arg ip6mr_mr_table_ops_cmparg_any = { 370 .mf6c_origin = IN6ADDR_ANY_INIT, 371 .mf6c_mcastgrp = IN6ADDR_ANY_INIT, 372 }; 373 374 static struct mr_table_ops ip6mr_mr_table_ops = { 375 .rht_params = &ip6mr_rht_params, 376 .cmparg_any = &ip6mr_mr_table_ops_cmparg_any, 377 }; 378 379 static struct mr_table *ip6mr_new_table(struct net *net, u32 id) 380 { 381 struct mr_table *mrt; 382 383 mrt = ip6mr_get_table(net, id); 384 if (mrt) 385 return mrt; 386 387 return mr_table_alloc(net, id, &ip6mr_mr_table_ops, 388 ipmr_expire_process, ip6mr_new_table_set); 389 } 390 391 static void ip6mr_free_table(struct mr_table *mrt) 392 { 393 del_timer_sync(&mrt->ipmr_expire_timer); 394 mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC | 395 MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC); 396 rhltable_destroy(&mrt->mfc_hash); 397 kfree(mrt); 398 } 399 400 #ifdef CONFIG_PROC_FS 401 /* The /proc interfaces to multicast routing 402 * /proc/ip6_mr_cache /proc/ip6_mr_vif 403 */ 404 405 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos) 406 __acquires(RCU) 407 { 408 struct mr_vif_iter *iter = seq->private; 409 struct net *net = seq_file_net(seq); 410 struct mr_table *mrt; 411 412 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT); 413 if (!mrt) 414 return ERR_PTR(-ENOENT); 415 416 iter->mrt = mrt; 417 418 rcu_read_lock(); 419 return mr_vif_seq_start(seq, pos); 420 } 421 422 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v) 423 __releases(RCU) 424 { 425 rcu_read_unlock(); 426 } 427 428 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v) 429 { 430 struct mr_vif_iter *iter = seq->private; 431 struct mr_table *mrt = iter->mrt; 432 433 if (v == SEQ_START_TOKEN) { 434 seq_puts(seq, 435 "Interface BytesIn PktsIn BytesOut PktsOut Flags\n"); 436 } else { 437 const struct vif_device *vif = v; 438 const struct net_device *vif_dev; 439 const char *name; 440 441 vif_dev = vif_dev_read(vif); 442 name = vif_dev ? vif_dev->name : "none"; 443 444 seq_printf(seq, 445 "%2td %-10s %8ld %7ld %8ld %7ld %05X\n", 446 vif - mrt->vif_table, 447 name, vif->bytes_in, vif->pkt_in, 448 vif->bytes_out, vif->pkt_out, 449 vif->flags); 450 } 451 return 0; 452 } 453 454 static const struct seq_operations ip6mr_vif_seq_ops = { 455 .start = ip6mr_vif_seq_start, 456 .next = mr_vif_seq_next, 457 .stop = ip6mr_vif_seq_stop, 458 .show = ip6mr_vif_seq_show, 459 }; 460 461 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) 462 { 463 struct net *net = seq_file_net(seq); 464 struct mr_table *mrt; 465 466 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT); 467 if (!mrt) 468 return ERR_PTR(-ENOENT); 469 470 return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock); 471 } 472 473 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) 474 { 475 int n; 476 477 if (v == SEQ_START_TOKEN) { 478 seq_puts(seq, 479 "Group " 480 "Origin " 481 "Iif Pkts Bytes Wrong Oifs\n"); 482 } else { 483 const struct mfc6_cache *mfc = v; 484 const struct mr_mfc_iter *it = seq->private; 485 struct mr_table *mrt = it->mrt; 486 487 seq_printf(seq, "%pI6 %pI6 %-3hd", 488 &mfc->mf6c_mcastgrp, &mfc->mf6c_origin, 489 mfc->_c.mfc_parent); 490 491 if (it->cache != &mrt->mfc_unres_queue) { 492 seq_printf(seq, " %8lu %8lu %8lu", 493 mfc->_c.mfc_un.res.pkt, 494 mfc->_c.mfc_un.res.bytes, 495 mfc->_c.mfc_un.res.wrong_if); 496 for (n = mfc->_c.mfc_un.res.minvif; 497 n < mfc->_c.mfc_un.res.maxvif; n++) { 498 if (VIF_EXISTS(mrt, n) && 499 mfc->_c.mfc_un.res.ttls[n] < 255) 500 seq_printf(seq, 501 " %2d:%-3d", n, 502 mfc->_c.mfc_un.res.ttls[n]); 503 } 504 } else { 505 /* unresolved mfc_caches don't contain 506 * pkt, bytes and wrong_if values 507 */ 508 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul); 509 } 510 seq_putc(seq, '\n'); 511 } 512 return 0; 513 } 514 515 static const struct seq_operations ipmr_mfc_seq_ops = { 516 .start = ipmr_mfc_seq_start, 517 .next = mr_mfc_seq_next, 518 .stop = mr_mfc_seq_stop, 519 .show = ipmr_mfc_seq_show, 520 }; 521 #endif 522 523 #ifdef CONFIG_IPV6_PIMSM_V2 524 525 static int pim6_rcv(struct sk_buff *skb) 526 { 527 struct pimreghdr *pim; 528 struct ipv6hdr *encap; 529 struct net_device *reg_dev = NULL; 530 struct net *net = dev_net(skb->dev); 531 struct mr_table *mrt; 532 struct flowi6 fl6 = { 533 .flowi6_iif = skb->dev->ifindex, 534 .flowi6_mark = skb->mark, 535 }; 536 int reg_vif_num; 537 538 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap))) 539 goto drop; 540 541 pim = (struct pimreghdr *)skb_transport_header(skb); 542 if (pim->type != ((PIM_VERSION << 4) | PIM_TYPE_REGISTER) || 543 (pim->flags & PIM_NULL_REGISTER) || 544 (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, 545 sizeof(*pim), IPPROTO_PIM, 546 csum_partial((void *)pim, sizeof(*pim), 0)) && 547 csum_fold(skb_checksum(skb, 0, skb->len, 0)))) 548 goto drop; 549 550 /* check if the inner packet is destined to mcast group */ 551 encap = (struct ipv6hdr *)(skb_transport_header(skb) + 552 sizeof(*pim)); 553 554 if (!ipv6_addr_is_multicast(&encap->daddr) || 555 encap->payload_len == 0 || 556 ntohs(encap->payload_len) + sizeof(*pim) > skb->len) 557 goto drop; 558 559 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0) 560 goto drop; 561 562 /* Pairs with WRITE_ONCE() in mif6_add()/mif6_delete() */ 563 reg_vif_num = READ_ONCE(mrt->mroute_reg_vif_num); 564 if (reg_vif_num >= 0) 565 reg_dev = vif_dev_read(&mrt->vif_table[reg_vif_num]); 566 567 if (!reg_dev) 568 goto drop; 569 570 skb->mac_header = skb->network_header; 571 skb_pull(skb, (u8 *)encap - skb->data); 572 skb_reset_network_header(skb); 573 skb->protocol = htons(ETH_P_IPV6); 574 skb->ip_summed = CHECKSUM_NONE; 575 576 skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev)); 577 578 netif_rx(skb); 579 580 return 0; 581 drop: 582 kfree_skb(skb); 583 return 0; 584 } 585 586 static const struct inet6_protocol pim6_protocol = { 587 .handler = pim6_rcv, 588 }; 589 590 /* Service routines creating virtual interfaces: PIMREG */ 591 592 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, 593 struct net_device *dev) 594 { 595 struct net *net = dev_net(dev); 596 struct mr_table *mrt; 597 struct flowi6 fl6 = { 598 .flowi6_oif = dev->ifindex, 599 .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX, 600 .flowi6_mark = skb->mark, 601 }; 602 603 if (!pskb_inet_may_pull(skb)) 604 goto tx_err; 605 606 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0) 607 goto tx_err; 608 609 dev->stats.tx_bytes += skb->len; 610 dev->stats.tx_packets++; 611 rcu_read_lock(); 612 ip6mr_cache_report(mrt, skb, READ_ONCE(mrt->mroute_reg_vif_num), 613 MRT6MSG_WHOLEPKT); 614 rcu_read_unlock(); 615 kfree_skb(skb); 616 return NETDEV_TX_OK; 617 618 tx_err: 619 dev->stats.tx_errors++; 620 kfree_skb(skb); 621 return NETDEV_TX_OK; 622 } 623 624 static int reg_vif_get_iflink(const struct net_device *dev) 625 { 626 return 0; 627 } 628 629 static const struct net_device_ops reg_vif_netdev_ops = { 630 .ndo_start_xmit = reg_vif_xmit, 631 .ndo_get_iflink = reg_vif_get_iflink, 632 }; 633 634 static void reg_vif_setup(struct net_device *dev) 635 { 636 dev->type = ARPHRD_PIMREG; 637 dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8; 638 dev->flags = IFF_NOARP; 639 dev->netdev_ops = ®_vif_netdev_ops; 640 dev->needs_free_netdev = true; 641 dev->features |= NETIF_F_NETNS_LOCAL; 642 } 643 644 static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt) 645 { 646 struct net_device *dev; 647 char name[IFNAMSIZ]; 648 649 if (mrt->id == RT6_TABLE_DFLT) 650 sprintf(name, "pim6reg"); 651 else 652 sprintf(name, "pim6reg%u", mrt->id); 653 654 dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup); 655 if (!dev) 656 return NULL; 657 658 dev_net_set(dev, net); 659 660 if (register_netdevice(dev)) { 661 free_netdev(dev); 662 return NULL; 663 } 664 665 if (dev_open(dev, NULL)) 666 goto failure; 667 668 dev_hold(dev); 669 return dev; 670 671 failure: 672 unregister_netdevice(dev); 673 return NULL; 674 } 675 #endif 676 677 static int call_ip6mr_vif_entry_notifiers(struct net *net, 678 enum fib_event_type event_type, 679 struct vif_device *vif, 680 struct net_device *vif_dev, 681 mifi_t vif_index, u32 tb_id) 682 { 683 return mr_call_vif_notifiers(net, RTNL_FAMILY_IP6MR, event_type, 684 vif, vif_dev, vif_index, tb_id, 685 &net->ipv6.ipmr_seq); 686 } 687 688 static int call_ip6mr_mfc_entry_notifiers(struct net *net, 689 enum fib_event_type event_type, 690 struct mfc6_cache *mfc, u32 tb_id) 691 { 692 return mr_call_mfc_notifiers(net, RTNL_FAMILY_IP6MR, event_type, 693 &mfc->_c, tb_id, &net->ipv6.ipmr_seq); 694 } 695 696 /* Delete a VIF entry */ 697 static int mif6_delete(struct mr_table *mrt, int vifi, int notify, 698 struct list_head *head) 699 { 700 struct vif_device *v; 701 struct net_device *dev; 702 struct inet6_dev *in6_dev; 703 704 if (vifi < 0 || vifi >= mrt->maxvif) 705 return -EADDRNOTAVAIL; 706 707 v = &mrt->vif_table[vifi]; 708 709 dev = rtnl_dereference(v->dev); 710 if (!dev) 711 return -EADDRNOTAVAIL; 712 713 call_ip6mr_vif_entry_notifiers(read_pnet(&mrt->net), 714 FIB_EVENT_VIF_DEL, v, dev, 715 vifi, mrt->id); 716 spin_lock(&mrt_lock); 717 RCU_INIT_POINTER(v->dev, NULL); 718 719 #ifdef CONFIG_IPV6_PIMSM_V2 720 if (vifi == mrt->mroute_reg_vif_num) { 721 /* Pairs with READ_ONCE() in ip6mr_cache_report() and reg_vif_xmit() */ 722 WRITE_ONCE(mrt->mroute_reg_vif_num, -1); 723 } 724 #endif 725 726 if (vifi + 1 == mrt->maxvif) { 727 int tmp; 728 for (tmp = vifi - 1; tmp >= 0; tmp--) { 729 if (VIF_EXISTS(mrt, tmp)) 730 break; 731 } 732 WRITE_ONCE(mrt->maxvif, tmp + 1); 733 } 734 735 spin_unlock(&mrt_lock); 736 737 dev_set_allmulti(dev, -1); 738 739 in6_dev = __in6_dev_get(dev); 740 if (in6_dev) { 741 atomic_dec(&in6_dev->cnf.mc_forwarding); 742 inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, 743 NETCONFA_MC_FORWARDING, 744 dev->ifindex, &in6_dev->cnf); 745 } 746 747 if ((v->flags & MIFF_REGISTER) && !notify) 748 unregister_netdevice_queue(dev, head); 749 750 netdev_put(dev, &v->dev_tracker); 751 return 0; 752 } 753 754 static inline void ip6mr_cache_free_rcu(struct rcu_head *head) 755 { 756 struct mr_mfc *c = container_of(head, struct mr_mfc, rcu); 757 758 kmem_cache_free(mrt_cachep, (struct mfc6_cache *)c); 759 } 760 761 static inline void ip6mr_cache_free(struct mfc6_cache *c) 762 { 763 call_rcu(&c->_c.rcu, ip6mr_cache_free_rcu); 764 } 765 766 /* Destroy an unresolved cache entry, killing queued skbs 767 and reporting error to netlink readers. 768 */ 769 770 static void ip6mr_destroy_unres(struct mr_table *mrt, struct mfc6_cache *c) 771 { 772 struct net *net = read_pnet(&mrt->net); 773 struct sk_buff *skb; 774 775 atomic_dec(&mrt->cache_resolve_queue_len); 776 777 while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved)) != NULL) { 778 if (ipv6_hdr(skb)->version == 0) { 779 struct nlmsghdr *nlh = skb_pull(skb, 780 sizeof(struct ipv6hdr)); 781 nlh->nlmsg_type = NLMSG_ERROR; 782 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr)); 783 skb_trim(skb, nlh->nlmsg_len); 784 ((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT; 785 rtnl_unicast(skb, net, NETLINK_CB(skb).portid); 786 } else 787 kfree_skb(skb); 788 } 789 790 ip6mr_cache_free(c); 791 } 792 793 794 /* Timer process for all the unresolved queue. */ 795 796 static void ipmr_do_expire_process(struct mr_table *mrt) 797 { 798 unsigned long now = jiffies; 799 unsigned long expires = 10 * HZ; 800 struct mr_mfc *c, *next; 801 802 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) { 803 if (time_after(c->mfc_un.unres.expires, now)) { 804 /* not yet... */ 805 unsigned long interval = c->mfc_un.unres.expires - now; 806 if (interval < expires) 807 expires = interval; 808 continue; 809 } 810 811 list_del(&c->list); 812 mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE); 813 ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c); 814 } 815 816 if (!list_empty(&mrt->mfc_unres_queue)) 817 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires); 818 } 819 820 static void ipmr_expire_process(struct timer_list *t) 821 { 822 struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer); 823 824 if (!spin_trylock(&mfc_unres_lock)) { 825 mod_timer(&mrt->ipmr_expire_timer, jiffies + 1); 826 return; 827 } 828 829 if (!list_empty(&mrt->mfc_unres_queue)) 830 ipmr_do_expire_process(mrt); 831 832 spin_unlock(&mfc_unres_lock); 833 } 834 835 /* Fill oifs list. It is called under locked mrt_lock. */ 836 837 static void ip6mr_update_thresholds(struct mr_table *mrt, 838 struct mr_mfc *cache, 839 unsigned char *ttls) 840 { 841 int vifi; 842 843 cache->mfc_un.res.minvif = MAXMIFS; 844 cache->mfc_un.res.maxvif = 0; 845 memset(cache->mfc_un.res.ttls, 255, MAXMIFS); 846 847 for (vifi = 0; vifi < mrt->maxvif; vifi++) { 848 if (VIF_EXISTS(mrt, vifi) && 849 ttls[vifi] && ttls[vifi] < 255) { 850 cache->mfc_un.res.ttls[vifi] = ttls[vifi]; 851 if (cache->mfc_un.res.minvif > vifi) 852 cache->mfc_un.res.minvif = vifi; 853 if (cache->mfc_un.res.maxvif <= vifi) 854 cache->mfc_un.res.maxvif = vifi + 1; 855 } 856 } 857 cache->mfc_un.res.lastuse = jiffies; 858 } 859 860 static int mif6_add(struct net *net, struct mr_table *mrt, 861 struct mif6ctl *vifc, int mrtsock) 862 { 863 int vifi = vifc->mif6c_mifi; 864 struct vif_device *v = &mrt->vif_table[vifi]; 865 struct net_device *dev; 866 struct inet6_dev *in6_dev; 867 int err; 868 869 /* Is vif busy ? */ 870 if (VIF_EXISTS(mrt, vifi)) 871 return -EADDRINUSE; 872 873 switch (vifc->mif6c_flags) { 874 #ifdef CONFIG_IPV6_PIMSM_V2 875 case MIFF_REGISTER: 876 /* 877 * Special Purpose VIF in PIM 878 * All the packets will be sent to the daemon 879 */ 880 if (mrt->mroute_reg_vif_num >= 0) 881 return -EADDRINUSE; 882 dev = ip6mr_reg_vif(net, mrt); 883 if (!dev) 884 return -ENOBUFS; 885 err = dev_set_allmulti(dev, 1); 886 if (err) { 887 unregister_netdevice(dev); 888 dev_put(dev); 889 return err; 890 } 891 break; 892 #endif 893 case 0: 894 dev = dev_get_by_index(net, vifc->mif6c_pifi); 895 if (!dev) 896 return -EADDRNOTAVAIL; 897 err = dev_set_allmulti(dev, 1); 898 if (err) { 899 dev_put(dev); 900 return err; 901 } 902 break; 903 default: 904 return -EINVAL; 905 } 906 907 in6_dev = __in6_dev_get(dev); 908 if (in6_dev) { 909 atomic_inc(&in6_dev->cnf.mc_forwarding); 910 inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, 911 NETCONFA_MC_FORWARDING, 912 dev->ifindex, &in6_dev->cnf); 913 } 914 915 /* Fill in the VIF structures */ 916 vif_device_init(v, dev, vifc->vifc_rate_limit, vifc->vifc_threshold, 917 vifc->mif6c_flags | (!mrtsock ? VIFF_STATIC : 0), 918 MIFF_REGISTER); 919 920 /* And finish update writing critical data */ 921 spin_lock(&mrt_lock); 922 rcu_assign_pointer(v->dev, dev); 923 netdev_tracker_alloc(dev, &v->dev_tracker, GFP_ATOMIC); 924 #ifdef CONFIG_IPV6_PIMSM_V2 925 if (v->flags & MIFF_REGISTER) 926 WRITE_ONCE(mrt->mroute_reg_vif_num, vifi); 927 #endif 928 if (vifi + 1 > mrt->maxvif) 929 WRITE_ONCE(mrt->maxvif, vifi + 1); 930 spin_unlock(&mrt_lock); 931 call_ip6mr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD, 932 v, dev, vifi, mrt->id); 933 return 0; 934 } 935 936 static struct mfc6_cache *ip6mr_cache_find(struct mr_table *mrt, 937 const struct in6_addr *origin, 938 const struct in6_addr *mcastgrp) 939 { 940 struct mfc6_cache_cmp_arg arg = { 941 .mf6c_origin = *origin, 942 .mf6c_mcastgrp = *mcastgrp, 943 }; 944 945 return mr_mfc_find(mrt, &arg); 946 } 947 948 /* Look for a (*,G) entry */ 949 static struct mfc6_cache *ip6mr_cache_find_any(struct mr_table *mrt, 950 struct in6_addr *mcastgrp, 951 mifi_t mifi) 952 { 953 struct mfc6_cache_cmp_arg arg = { 954 .mf6c_origin = in6addr_any, 955 .mf6c_mcastgrp = *mcastgrp, 956 }; 957 958 if (ipv6_addr_any(mcastgrp)) 959 return mr_mfc_find_any_parent(mrt, mifi); 960 return mr_mfc_find_any(mrt, mifi, &arg); 961 } 962 963 /* Look for a (S,G,iif) entry if parent != -1 */ 964 static struct mfc6_cache * 965 ip6mr_cache_find_parent(struct mr_table *mrt, 966 const struct in6_addr *origin, 967 const struct in6_addr *mcastgrp, 968 int parent) 969 { 970 struct mfc6_cache_cmp_arg arg = { 971 .mf6c_origin = *origin, 972 .mf6c_mcastgrp = *mcastgrp, 973 }; 974 975 return mr_mfc_find_parent(mrt, &arg, parent); 976 } 977 978 /* Allocate a multicast cache entry */ 979 static struct mfc6_cache *ip6mr_cache_alloc(void) 980 { 981 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); 982 if (!c) 983 return NULL; 984 c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1; 985 c->_c.mfc_un.res.minvif = MAXMIFS; 986 c->_c.free = ip6mr_cache_free_rcu; 987 refcount_set(&c->_c.mfc_un.res.refcount, 1); 988 return c; 989 } 990 991 static struct mfc6_cache *ip6mr_cache_alloc_unres(void) 992 { 993 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); 994 if (!c) 995 return NULL; 996 skb_queue_head_init(&c->_c.mfc_un.unres.unresolved); 997 c->_c.mfc_un.unres.expires = jiffies + 10 * HZ; 998 return c; 999 } 1000 1001 /* 1002 * A cache entry has gone into a resolved state from queued 1003 */ 1004 1005 static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt, 1006 struct mfc6_cache *uc, struct mfc6_cache *c) 1007 { 1008 struct sk_buff *skb; 1009 1010 /* 1011 * Play the pending entries through our router 1012 */ 1013 1014 while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) { 1015 if (ipv6_hdr(skb)->version == 0) { 1016 struct nlmsghdr *nlh = skb_pull(skb, 1017 sizeof(struct ipv6hdr)); 1018 1019 if (mr_fill_mroute(mrt, skb, &c->_c, 1020 nlmsg_data(nlh)) > 0) { 1021 nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh; 1022 } else { 1023 nlh->nlmsg_type = NLMSG_ERROR; 1024 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr)); 1025 skb_trim(skb, nlh->nlmsg_len); 1026 ((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE; 1027 } 1028 rtnl_unicast(skb, net, NETLINK_CB(skb).portid); 1029 } else 1030 ip6_mr_forward(net, mrt, skb->dev, skb, c); 1031 } 1032 } 1033 1034 /* 1035 * Bounce a cache query up to pim6sd and netlink. 1036 * 1037 * Called under rcu_read_lock() 1038 */ 1039 1040 static int ip6mr_cache_report(const struct mr_table *mrt, struct sk_buff *pkt, 1041 mifi_t mifi, int assert) 1042 { 1043 struct sock *mroute6_sk; 1044 struct sk_buff *skb; 1045 struct mrt6msg *msg; 1046 int ret; 1047 1048 #ifdef CONFIG_IPV6_PIMSM_V2 1049 if (assert == MRT6MSG_WHOLEPKT || assert == MRT6MSG_WRMIFWHOLE) 1050 skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt) 1051 +sizeof(*msg)); 1052 else 1053 #endif 1054 skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC); 1055 1056 if (!skb) 1057 return -ENOBUFS; 1058 1059 /* I suppose that internal messages 1060 * do not require checksums */ 1061 1062 skb->ip_summed = CHECKSUM_UNNECESSARY; 1063 1064 #ifdef CONFIG_IPV6_PIMSM_V2 1065 if (assert == MRT6MSG_WHOLEPKT || assert == MRT6MSG_WRMIFWHOLE) { 1066 /* Ugly, but we have no choice with this interface. 1067 Duplicate old header, fix length etc. 1068 And all this only to mangle msg->im6_msgtype and 1069 to set msg->im6_mbz to "mbz" :-) 1070 */ 1071 skb_push(skb, -skb_network_offset(pkt)); 1072 1073 skb_push(skb, sizeof(*msg)); 1074 skb_reset_transport_header(skb); 1075 msg = (struct mrt6msg *)skb_transport_header(skb); 1076 msg->im6_mbz = 0; 1077 msg->im6_msgtype = assert; 1078 if (assert == MRT6MSG_WRMIFWHOLE) 1079 msg->im6_mif = mifi; 1080 else 1081 msg->im6_mif = READ_ONCE(mrt->mroute_reg_vif_num); 1082 msg->im6_pad = 0; 1083 msg->im6_src = ipv6_hdr(pkt)->saddr; 1084 msg->im6_dst = ipv6_hdr(pkt)->daddr; 1085 1086 skb->ip_summed = CHECKSUM_UNNECESSARY; 1087 } else 1088 #endif 1089 { 1090 /* 1091 * Copy the IP header 1092 */ 1093 1094 skb_put(skb, sizeof(struct ipv6hdr)); 1095 skb_reset_network_header(skb); 1096 skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr)); 1097 1098 /* 1099 * Add our header 1100 */ 1101 skb_put(skb, sizeof(*msg)); 1102 skb_reset_transport_header(skb); 1103 msg = (struct mrt6msg *)skb_transport_header(skb); 1104 1105 msg->im6_mbz = 0; 1106 msg->im6_msgtype = assert; 1107 msg->im6_mif = mifi; 1108 msg->im6_pad = 0; 1109 msg->im6_src = ipv6_hdr(pkt)->saddr; 1110 msg->im6_dst = ipv6_hdr(pkt)->daddr; 1111 1112 skb_dst_set(skb, dst_clone(skb_dst(pkt))); 1113 skb->ip_summed = CHECKSUM_UNNECESSARY; 1114 } 1115 1116 mroute6_sk = rcu_dereference(mrt->mroute_sk); 1117 if (!mroute6_sk) { 1118 kfree_skb(skb); 1119 return -EINVAL; 1120 } 1121 1122 mrt6msg_netlink_event(mrt, skb); 1123 1124 /* Deliver to user space multicast routing algorithms */ 1125 ret = sock_queue_rcv_skb(mroute6_sk, skb); 1126 1127 if (ret < 0) { 1128 net_warn_ratelimited("mroute6: pending queue full, dropping entries\n"); 1129 kfree_skb(skb); 1130 } 1131 1132 return ret; 1133 } 1134 1135 /* Queue a packet for resolution. It gets locked cache entry! */ 1136 static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi, 1137 struct sk_buff *skb, struct net_device *dev) 1138 { 1139 struct mfc6_cache *c; 1140 bool found = false; 1141 int err; 1142 1143 spin_lock_bh(&mfc_unres_lock); 1144 list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) { 1145 if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) && 1146 ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) { 1147 found = true; 1148 break; 1149 } 1150 } 1151 1152 if (!found) { 1153 /* 1154 * Create a new entry if allowable 1155 */ 1156 1157 c = ip6mr_cache_alloc_unres(); 1158 if (!c) { 1159 spin_unlock_bh(&mfc_unres_lock); 1160 1161 kfree_skb(skb); 1162 return -ENOBUFS; 1163 } 1164 1165 /* Fill in the new cache entry */ 1166 c->_c.mfc_parent = -1; 1167 c->mf6c_origin = ipv6_hdr(skb)->saddr; 1168 c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr; 1169 1170 /* 1171 * Reflect first query at pim6sd 1172 */ 1173 err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE); 1174 if (err < 0) { 1175 /* If the report failed throw the cache entry 1176 out - Brad Parker 1177 */ 1178 spin_unlock_bh(&mfc_unres_lock); 1179 1180 ip6mr_cache_free(c); 1181 kfree_skb(skb); 1182 return err; 1183 } 1184 1185 atomic_inc(&mrt->cache_resolve_queue_len); 1186 list_add(&c->_c.list, &mrt->mfc_unres_queue); 1187 mr6_netlink_event(mrt, c, RTM_NEWROUTE); 1188 1189 ipmr_do_expire_process(mrt); 1190 } 1191 1192 /* See if we can append the packet */ 1193 if (c->_c.mfc_un.unres.unresolved.qlen > 3) { 1194 kfree_skb(skb); 1195 err = -ENOBUFS; 1196 } else { 1197 if (dev) { 1198 skb->dev = dev; 1199 skb->skb_iif = dev->ifindex; 1200 } 1201 skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb); 1202 err = 0; 1203 } 1204 1205 spin_unlock_bh(&mfc_unres_lock); 1206 return err; 1207 } 1208 1209 /* 1210 * MFC6 cache manipulation by user space 1211 */ 1212 1213 static int ip6mr_mfc_delete(struct mr_table *mrt, struct mf6cctl *mfc, 1214 int parent) 1215 { 1216 struct mfc6_cache *c; 1217 1218 /* The entries are added/deleted only under RTNL */ 1219 rcu_read_lock(); 1220 c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr, 1221 &mfc->mf6cc_mcastgrp.sin6_addr, parent); 1222 rcu_read_unlock(); 1223 if (!c) 1224 return -ENOENT; 1225 rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ip6mr_rht_params); 1226 list_del_rcu(&c->_c.list); 1227 1228 call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net), 1229 FIB_EVENT_ENTRY_DEL, c, mrt->id); 1230 mr6_netlink_event(mrt, c, RTM_DELROUTE); 1231 mr_cache_put(&c->_c); 1232 return 0; 1233 } 1234 1235 static int ip6mr_device_event(struct notifier_block *this, 1236 unsigned long event, void *ptr) 1237 { 1238 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 1239 struct net *net = dev_net(dev); 1240 struct mr_table *mrt; 1241 struct vif_device *v; 1242 int ct; 1243 1244 if (event != NETDEV_UNREGISTER) 1245 return NOTIFY_DONE; 1246 1247 ip6mr_for_each_table(mrt, net) { 1248 v = &mrt->vif_table[0]; 1249 for (ct = 0; ct < mrt->maxvif; ct++, v++) { 1250 if (rcu_access_pointer(v->dev) == dev) 1251 mif6_delete(mrt, ct, 1, NULL); 1252 } 1253 } 1254 1255 return NOTIFY_DONE; 1256 } 1257 1258 static unsigned int ip6mr_seq_read(struct net *net) 1259 { 1260 ASSERT_RTNL(); 1261 1262 return net->ipv6.ipmr_seq + ip6mr_rules_seq_read(net); 1263 } 1264 1265 static int ip6mr_dump(struct net *net, struct notifier_block *nb, 1266 struct netlink_ext_ack *extack) 1267 { 1268 return mr_dump(net, nb, RTNL_FAMILY_IP6MR, ip6mr_rules_dump, 1269 ip6mr_mr_table_iter, extack); 1270 } 1271 1272 static struct notifier_block ip6_mr_notifier = { 1273 .notifier_call = ip6mr_device_event 1274 }; 1275 1276 static const struct fib_notifier_ops ip6mr_notifier_ops_template = { 1277 .family = RTNL_FAMILY_IP6MR, 1278 .fib_seq_read = ip6mr_seq_read, 1279 .fib_dump = ip6mr_dump, 1280 .owner = THIS_MODULE, 1281 }; 1282 1283 static int __net_init ip6mr_notifier_init(struct net *net) 1284 { 1285 struct fib_notifier_ops *ops; 1286 1287 net->ipv6.ipmr_seq = 0; 1288 1289 ops = fib_notifier_ops_register(&ip6mr_notifier_ops_template, net); 1290 if (IS_ERR(ops)) 1291 return PTR_ERR(ops); 1292 1293 net->ipv6.ip6mr_notifier_ops = ops; 1294 1295 return 0; 1296 } 1297 1298 static void __net_exit ip6mr_notifier_exit(struct net *net) 1299 { 1300 fib_notifier_ops_unregister(net->ipv6.ip6mr_notifier_ops); 1301 net->ipv6.ip6mr_notifier_ops = NULL; 1302 } 1303 1304 /* Setup for IP multicast routing */ 1305 static int __net_init ip6mr_net_init(struct net *net) 1306 { 1307 int err; 1308 1309 err = ip6mr_notifier_init(net); 1310 if (err) 1311 return err; 1312 1313 err = ip6mr_rules_init(net); 1314 if (err < 0) 1315 goto ip6mr_rules_fail; 1316 1317 #ifdef CONFIG_PROC_FS 1318 err = -ENOMEM; 1319 if (!proc_create_net("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_seq_ops, 1320 sizeof(struct mr_vif_iter))) 1321 goto proc_vif_fail; 1322 if (!proc_create_net("ip6_mr_cache", 0, net->proc_net, &ipmr_mfc_seq_ops, 1323 sizeof(struct mr_mfc_iter))) 1324 goto proc_cache_fail; 1325 #endif 1326 1327 return 0; 1328 1329 #ifdef CONFIG_PROC_FS 1330 proc_cache_fail: 1331 remove_proc_entry("ip6_mr_vif", net->proc_net); 1332 proc_vif_fail: 1333 rtnl_lock(); 1334 ip6mr_rules_exit(net); 1335 rtnl_unlock(); 1336 #endif 1337 ip6mr_rules_fail: 1338 ip6mr_notifier_exit(net); 1339 return err; 1340 } 1341 1342 static void __net_exit ip6mr_net_exit(struct net *net) 1343 { 1344 #ifdef CONFIG_PROC_FS 1345 remove_proc_entry("ip6_mr_cache", net->proc_net); 1346 remove_proc_entry("ip6_mr_vif", net->proc_net); 1347 #endif 1348 ip6mr_notifier_exit(net); 1349 } 1350 1351 static void __net_exit ip6mr_net_exit_batch(struct list_head *net_list) 1352 { 1353 struct net *net; 1354 1355 rtnl_lock(); 1356 list_for_each_entry(net, net_list, exit_list) 1357 ip6mr_rules_exit(net); 1358 rtnl_unlock(); 1359 } 1360 1361 static struct pernet_operations ip6mr_net_ops = { 1362 .init = ip6mr_net_init, 1363 .exit = ip6mr_net_exit, 1364 .exit_batch = ip6mr_net_exit_batch, 1365 }; 1366 1367 int __init ip6_mr_init(void) 1368 { 1369 int err; 1370 1371 mrt_cachep = kmem_cache_create("ip6_mrt_cache", 1372 sizeof(struct mfc6_cache), 1373 0, SLAB_HWCACHE_ALIGN, 1374 NULL); 1375 if (!mrt_cachep) 1376 return -ENOMEM; 1377 1378 err = register_pernet_subsys(&ip6mr_net_ops); 1379 if (err) 1380 goto reg_pernet_fail; 1381 1382 err = register_netdevice_notifier(&ip6_mr_notifier); 1383 if (err) 1384 goto reg_notif_fail; 1385 #ifdef CONFIG_IPV6_PIMSM_V2 1386 if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) { 1387 pr_err("%s: can't add PIM protocol\n", __func__); 1388 err = -EAGAIN; 1389 goto add_proto_fail; 1390 } 1391 #endif 1392 err = rtnl_register_module(THIS_MODULE, RTNL_FAMILY_IP6MR, RTM_GETROUTE, 1393 NULL, ip6mr_rtm_dumproute, 0); 1394 if (err == 0) 1395 return 0; 1396 1397 #ifdef CONFIG_IPV6_PIMSM_V2 1398 inet6_del_protocol(&pim6_protocol, IPPROTO_PIM); 1399 add_proto_fail: 1400 unregister_netdevice_notifier(&ip6_mr_notifier); 1401 #endif 1402 reg_notif_fail: 1403 unregister_pernet_subsys(&ip6mr_net_ops); 1404 reg_pernet_fail: 1405 kmem_cache_destroy(mrt_cachep); 1406 return err; 1407 } 1408 1409 void ip6_mr_cleanup(void) 1410 { 1411 rtnl_unregister(RTNL_FAMILY_IP6MR, RTM_GETROUTE); 1412 #ifdef CONFIG_IPV6_PIMSM_V2 1413 inet6_del_protocol(&pim6_protocol, IPPROTO_PIM); 1414 #endif 1415 unregister_netdevice_notifier(&ip6_mr_notifier); 1416 unregister_pernet_subsys(&ip6mr_net_ops); 1417 kmem_cache_destroy(mrt_cachep); 1418 } 1419 1420 static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt, 1421 struct mf6cctl *mfc, int mrtsock, int parent) 1422 { 1423 unsigned char ttls[MAXMIFS]; 1424 struct mfc6_cache *uc, *c; 1425 struct mr_mfc *_uc; 1426 bool found; 1427 int i, err; 1428 1429 if (mfc->mf6cc_parent >= MAXMIFS) 1430 return -ENFILE; 1431 1432 memset(ttls, 255, MAXMIFS); 1433 for (i = 0; i < MAXMIFS; i++) { 1434 if (IF_ISSET(i, &mfc->mf6cc_ifset)) 1435 ttls[i] = 1; 1436 } 1437 1438 /* The entries are added/deleted only under RTNL */ 1439 rcu_read_lock(); 1440 c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr, 1441 &mfc->mf6cc_mcastgrp.sin6_addr, parent); 1442 rcu_read_unlock(); 1443 if (c) { 1444 spin_lock(&mrt_lock); 1445 c->_c.mfc_parent = mfc->mf6cc_parent; 1446 ip6mr_update_thresholds(mrt, &c->_c, ttls); 1447 if (!mrtsock) 1448 c->_c.mfc_flags |= MFC_STATIC; 1449 spin_unlock(&mrt_lock); 1450 call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, 1451 c, mrt->id); 1452 mr6_netlink_event(mrt, c, RTM_NEWROUTE); 1453 return 0; 1454 } 1455 1456 if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) && 1457 !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr)) 1458 return -EINVAL; 1459 1460 c = ip6mr_cache_alloc(); 1461 if (!c) 1462 return -ENOMEM; 1463 1464 c->mf6c_origin = mfc->mf6cc_origin.sin6_addr; 1465 c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr; 1466 c->_c.mfc_parent = mfc->mf6cc_parent; 1467 ip6mr_update_thresholds(mrt, &c->_c, ttls); 1468 if (!mrtsock) 1469 c->_c.mfc_flags |= MFC_STATIC; 1470 1471 err = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode, 1472 ip6mr_rht_params); 1473 if (err) { 1474 pr_err("ip6mr: rhtable insert error %d\n", err); 1475 ip6mr_cache_free(c); 1476 return err; 1477 } 1478 list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list); 1479 1480 /* Check to see if we resolved a queued list. If so we 1481 * need to send on the frames and tidy up. 1482 */ 1483 found = false; 1484 spin_lock_bh(&mfc_unres_lock); 1485 list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) { 1486 uc = (struct mfc6_cache *)_uc; 1487 if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) && 1488 ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) { 1489 list_del(&_uc->list); 1490 atomic_dec(&mrt->cache_resolve_queue_len); 1491 found = true; 1492 break; 1493 } 1494 } 1495 if (list_empty(&mrt->mfc_unres_queue)) 1496 del_timer(&mrt->ipmr_expire_timer); 1497 spin_unlock_bh(&mfc_unres_lock); 1498 1499 if (found) { 1500 ip6mr_cache_resolve(net, mrt, uc, c); 1501 ip6mr_cache_free(uc); 1502 } 1503 call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD, 1504 c, mrt->id); 1505 mr6_netlink_event(mrt, c, RTM_NEWROUTE); 1506 return 0; 1507 } 1508 1509 /* 1510 * Close the multicast socket, and clear the vif tables etc 1511 */ 1512 1513 static void mroute_clean_tables(struct mr_table *mrt, int flags) 1514 { 1515 struct mr_mfc *c, *tmp; 1516 LIST_HEAD(list); 1517 int i; 1518 1519 /* Shut down all active vif entries */ 1520 if (flags & (MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC)) { 1521 for (i = 0; i < mrt->maxvif; i++) { 1522 if (((mrt->vif_table[i].flags & VIFF_STATIC) && 1523 !(flags & MRT6_FLUSH_MIFS_STATIC)) || 1524 (!(mrt->vif_table[i].flags & VIFF_STATIC) && !(flags & MRT6_FLUSH_MIFS))) 1525 continue; 1526 mif6_delete(mrt, i, 0, &list); 1527 } 1528 unregister_netdevice_many(&list); 1529 } 1530 1531 /* Wipe the cache */ 1532 if (flags & (MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC)) { 1533 list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) { 1534 if (((c->mfc_flags & MFC_STATIC) && !(flags & MRT6_FLUSH_MFC_STATIC)) || 1535 (!(c->mfc_flags & MFC_STATIC) && !(flags & MRT6_FLUSH_MFC))) 1536 continue; 1537 rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params); 1538 list_del_rcu(&c->list); 1539 call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net), 1540 FIB_EVENT_ENTRY_DEL, 1541 (struct mfc6_cache *)c, mrt->id); 1542 mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE); 1543 mr_cache_put(c); 1544 } 1545 } 1546 1547 if (flags & MRT6_FLUSH_MFC) { 1548 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) { 1549 spin_lock_bh(&mfc_unres_lock); 1550 list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) { 1551 list_del(&c->list); 1552 mr6_netlink_event(mrt, (struct mfc6_cache *)c, 1553 RTM_DELROUTE); 1554 ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c); 1555 } 1556 spin_unlock_bh(&mfc_unres_lock); 1557 } 1558 } 1559 } 1560 1561 static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk) 1562 { 1563 int err = 0; 1564 struct net *net = sock_net(sk); 1565 1566 rtnl_lock(); 1567 spin_lock(&mrt_lock); 1568 if (rtnl_dereference(mrt->mroute_sk)) { 1569 err = -EADDRINUSE; 1570 } else { 1571 rcu_assign_pointer(mrt->mroute_sk, sk); 1572 sock_set_flag(sk, SOCK_RCU_FREE); 1573 atomic_inc(&net->ipv6.devconf_all->mc_forwarding); 1574 } 1575 spin_unlock(&mrt_lock); 1576 1577 if (!err) 1578 inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, 1579 NETCONFA_MC_FORWARDING, 1580 NETCONFA_IFINDEX_ALL, 1581 net->ipv6.devconf_all); 1582 rtnl_unlock(); 1583 1584 return err; 1585 } 1586 1587 int ip6mr_sk_done(struct sock *sk) 1588 { 1589 struct net *net = sock_net(sk); 1590 struct ipv6_devconf *devconf; 1591 struct mr_table *mrt; 1592 int err = -EACCES; 1593 1594 if (sk->sk_type != SOCK_RAW || 1595 inet_sk(sk)->inet_num != IPPROTO_ICMPV6) 1596 return err; 1597 1598 devconf = net->ipv6.devconf_all; 1599 if (!devconf || !atomic_read(&devconf->mc_forwarding)) 1600 return err; 1601 1602 rtnl_lock(); 1603 ip6mr_for_each_table(mrt, net) { 1604 if (sk == rtnl_dereference(mrt->mroute_sk)) { 1605 spin_lock(&mrt_lock); 1606 RCU_INIT_POINTER(mrt->mroute_sk, NULL); 1607 /* Note that mroute_sk had SOCK_RCU_FREE set, 1608 * so the RCU grace period before sk freeing 1609 * is guaranteed by sk_destruct() 1610 */ 1611 atomic_dec(&devconf->mc_forwarding); 1612 spin_unlock(&mrt_lock); 1613 inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, 1614 NETCONFA_MC_FORWARDING, 1615 NETCONFA_IFINDEX_ALL, 1616 net->ipv6.devconf_all); 1617 1618 mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MFC); 1619 err = 0; 1620 break; 1621 } 1622 } 1623 rtnl_unlock(); 1624 1625 return err; 1626 } 1627 1628 bool mroute6_is_socket(struct net *net, struct sk_buff *skb) 1629 { 1630 struct mr_table *mrt; 1631 struct flowi6 fl6 = { 1632 .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX, 1633 .flowi6_oif = skb->dev->ifindex, 1634 .flowi6_mark = skb->mark, 1635 }; 1636 1637 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0) 1638 return NULL; 1639 1640 return rcu_access_pointer(mrt->mroute_sk); 1641 } 1642 EXPORT_SYMBOL(mroute6_is_socket); 1643 1644 /* 1645 * Socket options and virtual interface manipulation. The whole 1646 * virtual interface system is a complete heap, but unfortunately 1647 * that's how BSD mrouted happens to think. Maybe one day with a proper 1648 * MOSPF/PIM router set up we can clean this up. 1649 */ 1650 1651 int ip6_mroute_setsockopt(struct sock *sk, int optname, sockptr_t optval, 1652 unsigned int optlen) 1653 { 1654 int ret, parent = 0; 1655 struct mif6ctl vif; 1656 struct mf6cctl mfc; 1657 mifi_t mifi; 1658 struct net *net = sock_net(sk); 1659 struct mr_table *mrt; 1660 1661 if (sk->sk_type != SOCK_RAW || 1662 inet_sk(sk)->inet_num != IPPROTO_ICMPV6) 1663 return -EOPNOTSUPP; 1664 1665 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); 1666 if (!mrt) 1667 return -ENOENT; 1668 1669 if (optname != MRT6_INIT) { 1670 if (sk != rcu_access_pointer(mrt->mroute_sk) && 1671 !ns_capable(net->user_ns, CAP_NET_ADMIN)) 1672 return -EACCES; 1673 } 1674 1675 switch (optname) { 1676 case MRT6_INIT: 1677 if (optlen < sizeof(int)) 1678 return -EINVAL; 1679 1680 return ip6mr_sk_init(mrt, sk); 1681 1682 case MRT6_DONE: 1683 return ip6mr_sk_done(sk); 1684 1685 case MRT6_ADD_MIF: 1686 if (optlen < sizeof(vif)) 1687 return -EINVAL; 1688 if (copy_from_sockptr(&vif, optval, sizeof(vif))) 1689 return -EFAULT; 1690 if (vif.mif6c_mifi >= MAXMIFS) 1691 return -ENFILE; 1692 rtnl_lock(); 1693 ret = mif6_add(net, mrt, &vif, 1694 sk == rtnl_dereference(mrt->mroute_sk)); 1695 rtnl_unlock(); 1696 return ret; 1697 1698 case MRT6_DEL_MIF: 1699 if (optlen < sizeof(mifi_t)) 1700 return -EINVAL; 1701 if (copy_from_sockptr(&mifi, optval, sizeof(mifi_t))) 1702 return -EFAULT; 1703 rtnl_lock(); 1704 ret = mif6_delete(mrt, mifi, 0, NULL); 1705 rtnl_unlock(); 1706 return ret; 1707 1708 /* 1709 * Manipulate the forwarding caches. These live 1710 * in a sort of kernel/user symbiosis. 1711 */ 1712 case MRT6_ADD_MFC: 1713 case MRT6_DEL_MFC: 1714 parent = -1; 1715 fallthrough; 1716 case MRT6_ADD_MFC_PROXY: 1717 case MRT6_DEL_MFC_PROXY: 1718 if (optlen < sizeof(mfc)) 1719 return -EINVAL; 1720 if (copy_from_sockptr(&mfc, optval, sizeof(mfc))) 1721 return -EFAULT; 1722 if (parent == 0) 1723 parent = mfc.mf6cc_parent; 1724 rtnl_lock(); 1725 if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY) 1726 ret = ip6mr_mfc_delete(mrt, &mfc, parent); 1727 else 1728 ret = ip6mr_mfc_add(net, mrt, &mfc, 1729 sk == 1730 rtnl_dereference(mrt->mroute_sk), 1731 parent); 1732 rtnl_unlock(); 1733 return ret; 1734 1735 case MRT6_FLUSH: 1736 { 1737 int flags; 1738 1739 if (optlen != sizeof(flags)) 1740 return -EINVAL; 1741 if (copy_from_sockptr(&flags, optval, sizeof(flags))) 1742 return -EFAULT; 1743 rtnl_lock(); 1744 mroute_clean_tables(mrt, flags); 1745 rtnl_unlock(); 1746 return 0; 1747 } 1748 1749 /* 1750 * Control PIM assert (to activate pim will activate assert) 1751 */ 1752 case MRT6_ASSERT: 1753 { 1754 int v; 1755 1756 if (optlen != sizeof(v)) 1757 return -EINVAL; 1758 if (copy_from_sockptr(&v, optval, sizeof(v))) 1759 return -EFAULT; 1760 mrt->mroute_do_assert = v; 1761 return 0; 1762 } 1763 1764 #ifdef CONFIG_IPV6_PIMSM_V2 1765 case MRT6_PIM: 1766 { 1767 bool do_wrmifwhole; 1768 int v; 1769 1770 if (optlen != sizeof(v)) 1771 return -EINVAL; 1772 if (copy_from_sockptr(&v, optval, sizeof(v))) 1773 return -EFAULT; 1774 1775 do_wrmifwhole = (v == MRT6MSG_WRMIFWHOLE); 1776 v = !!v; 1777 rtnl_lock(); 1778 ret = 0; 1779 if (v != mrt->mroute_do_pim) { 1780 mrt->mroute_do_pim = v; 1781 mrt->mroute_do_assert = v; 1782 mrt->mroute_do_wrvifwhole = do_wrmifwhole; 1783 } 1784 rtnl_unlock(); 1785 return ret; 1786 } 1787 1788 #endif 1789 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES 1790 case MRT6_TABLE: 1791 { 1792 u32 v; 1793 1794 if (optlen != sizeof(u32)) 1795 return -EINVAL; 1796 if (copy_from_sockptr(&v, optval, sizeof(v))) 1797 return -EFAULT; 1798 /* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */ 1799 if (v != RT_TABLE_DEFAULT && v >= 100000000) 1800 return -EINVAL; 1801 if (sk == rcu_access_pointer(mrt->mroute_sk)) 1802 return -EBUSY; 1803 1804 rtnl_lock(); 1805 ret = 0; 1806 mrt = ip6mr_new_table(net, v); 1807 if (IS_ERR(mrt)) 1808 ret = PTR_ERR(mrt); 1809 else 1810 raw6_sk(sk)->ip6mr_table = v; 1811 rtnl_unlock(); 1812 return ret; 1813 } 1814 #endif 1815 /* 1816 * Spurious command, or MRT6_VERSION which you cannot 1817 * set. 1818 */ 1819 default: 1820 return -ENOPROTOOPT; 1821 } 1822 } 1823 1824 /* 1825 * Getsock opt support for the multicast routing system. 1826 */ 1827 1828 int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, 1829 int __user *optlen) 1830 { 1831 int olr; 1832 int val; 1833 struct net *net = sock_net(sk); 1834 struct mr_table *mrt; 1835 1836 if (sk->sk_type != SOCK_RAW || 1837 inet_sk(sk)->inet_num != IPPROTO_ICMPV6) 1838 return -EOPNOTSUPP; 1839 1840 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); 1841 if (!mrt) 1842 return -ENOENT; 1843 1844 switch (optname) { 1845 case MRT6_VERSION: 1846 val = 0x0305; 1847 break; 1848 #ifdef CONFIG_IPV6_PIMSM_V2 1849 case MRT6_PIM: 1850 val = mrt->mroute_do_pim; 1851 break; 1852 #endif 1853 case MRT6_ASSERT: 1854 val = mrt->mroute_do_assert; 1855 break; 1856 default: 1857 return -ENOPROTOOPT; 1858 } 1859 1860 if (get_user(olr, optlen)) 1861 return -EFAULT; 1862 1863 olr = min_t(int, olr, sizeof(int)); 1864 if (olr < 0) 1865 return -EINVAL; 1866 1867 if (put_user(olr, optlen)) 1868 return -EFAULT; 1869 if (copy_to_user(optval, &val, olr)) 1870 return -EFAULT; 1871 return 0; 1872 } 1873 1874 /* 1875 * The IP multicast ioctl support routines. 1876 */ 1877 1878 int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg) 1879 { 1880 struct sioc_sg_req6 sr; 1881 struct sioc_mif_req6 vr; 1882 struct vif_device *vif; 1883 struct mfc6_cache *c; 1884 struct net *net = sock_net(sk); 1885 struct mr_table *mrt; 1886 1887 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); 1888 if (!mrt) 1889 return -ENOENT; 1890 1891 switch (cmd) { 1892 case SIOCGETMIFCNT_IN6: 1893 if (copy_from_user(&vr, arg, sizeof(vr))) 1894 return -EFAULT; 1895 if (vr.mifi >= mrt->maxvif) 1896 return -EINVAL; 1897 vr.mifi = array_index_nospec(vr.mifi, mrt->maxvif); 1898 rcu_read_lock(); 1899 vif = &mrt->vif_table[vr.mifi]; 1900 if (VIF_EXISTS(mrt, vr.mifi)) { 1901 vr.icount = READ_ONCE(vif->pkt_in); 1902 vr.ocount = READ_ONCE(vif->pkt_out); 1903 vr.ibytes = READ_ONCE(vif->bytes_in); 1904 vr.obytes = READ_ONCE(vif->bytes_out); 1905 rcu_read_unlock(); 1906 1907 if (copy_to_user(arg, &vr, sizeof(vr))) 1908 return -EFAULT; 1909 return 0; 1910 } 1911 rcu_read_unlock(); 1912 return -EADDRNOTAVAIL; 1913 case SIOCGETSGCNT_IN6: 1914 if (copy_from_user(&sr, arg, sizeof(sr))) 1915 return -EFAULT; 1916 1917 rcu_read_lock(); 1918 c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr); 1919 if (c) { 1920 sr.pktcnt = c->_c.mfc_un.res.pkt; 1921 sr.bytecnt = c->_c.mfc_un.res.bytes; 1922 sr.wrong_if = c->_c.mfc_un.res.wrong_if; 1923 rcu_read_unlock(); 1924 1925 if (copy_to_user(arg, &sr, sizeof(sr))) 1926 return -EFAULT; 1927 return 0; 1928 } 1929 rcu_read_unlock(); 1930 return -EADDRNOTAVAIL; 1931 default: 1932 return -ENOIOCTLCMD; 1933 } 1934 } 1935 1936 #ifdef CONFIG_COMPAT 1937 struct compat_sioc_sg_req6 { 1938 struct sockaddr_in6 src; 1939 struct sockaddr_in6 grp; 1940 compat_ulong_t pktcnt; 1941 compat_ulong_t bytecnt; 1942 compat_ulong_t wrong_if; 1943 }; 1944 1945 struct compat_sioc_mif_req6 { 1946 mifi_t mifi; 1947 compat_ulong_t icount; 1948 compat_ulong_t ocount; 1949 compat_ulong_t ibytes; 1950 compat_ulong_t obytes; 1951 }; 1952 1953 int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) 1954 { 1955 struct compat_sioc_sg_req6 sr; 1956 struct compat_sioc_mif_req6 vr; 1957 struct vif_device *vif; 1958 struct mfc6_cache *c; 1959 struct net *net = sock_net(sk); 1960 struct mr_table *mrt; 1961 1962 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); 1963 if (!mrt) 1964 return -ENOENT; 1965 1966 switch (cmd) { 1967 case SIOCGETMIFCNT_IN6: 1968 if (copy_from_user(&vr, arg, sizeof(vr))) 1969 return -EFAULT; 1970 if (vr.mifi >= mrt->maxvif) 1971 return -EINVAL; 1972 vr.mifi = array_index_nospec(vr.mifi, mrt->maxvif); 1973 rcu_read_lock(); 1974 vif = &mrt->vif_table[vr.mifi]; 1975 if (VIF_EXISTS(mrt, vr.mifi)) { 1976 vr.icount = READ_ONCE(vif->pkt_in); 1977 vr.ocount = READ_ONCE(vif->pkt_out); 1978 vr.ibytes = READ_ONCE(vif->bytes_in); 1979 vr.obytes = READ_ONCE(vif->bytes_out); 1980 rcu_read_unlock(); 1981 1982 if (copy_to_user(arg, &vr, sizeof(vr))) 1983 return -EFAULT; 1984 return 0; 1985 } 1986 rcu_read_unlock(); 1987 return -EADDRNOTAVAIL; 1988 case SIOCGETSGCNT_IN6: 1989 if (copy_from_user(&sr, arg, sizeof(sr))) 1990 return -EFAULT; 1991 1992 rcu_read_lock(); 1993 c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr); 1994 if (c) { 1995 sr.pktcnt = c->_c.mfc_un.res.pkt; 1996 sr.bytecnt = c->_c.mfc_un.res.bytes; 1997 sr.wrong_if = c->_c.mfc_un.res.wrong_if; 1998 rcu_read_unlock(); 1999 2000 if (copy_to_user(arg, &sr, sizeof(sr))) 2001 return -EFAULT; 2002 return 0; 2003 } 2004 rcu_read_unlock(); 2005 return -EADDRNOTAVAIL; 2006 default: 2007 return -ENOIOCTLCMD; 2008 } 2009 } 2010 #endif 2011 2012 static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb) 2013 { 2014 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 2015 IPSTATS_MIB_OUTFORWDATAGRAMS); 2016 IP6_ADD_STATS(net, ip6_dst_idev(skb_dst(skb)), 2017 IPSTATS_MIB_OUTOCTETS, skb->len); 2018 return dst_output(net, sk, skb); 2019 } 2020 2021 /* 2022 * Processing handlers for ip6mr_forward 2023 */ 2024 2025 static int ip6mr_forward2(struct net *net, struct mr_table *mrt, 2026 struct sk_buff *skb, int vifi) 2027 { 2028 struct vif_device *vif = &mrt->vif_table[vifi]; 2029 struct net_device *vif_dev; 2030 struct ipv6hdr *ipv6h; 2031 struct dst_entry *dst; 2032 struct flowi6 fl6; 2033 2034 vif_dev = vif_dev_read(vif); 2035 if (!vif_dev) 2036 goto out_free; 2037 2038 #ifdef CONFIG_IPV6_PIMSM_V2 2039 if (vif->flags & MIFF_REGISTER) { 2040 WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1); 2041 WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len); 2042 vif_dev->stats.tx_bytes += skb->len; 2043 vif_dev->stats.tx_packets++; 2044 ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT); 2045 goto out_free; 2046 } 2047 #endif 2048 2049 ipv6h = ipv6_hdr(skb); 2050 2051 fl6 = (struct flowi6) { 2052 .flowi6_oif = vif->link, 2053 .daddr = ipv6h->daddr, 2054 }; 2055 2056 dst = ip6_route_output(net, NULL, &fl6); 2057 if (dst->error) { 2058 dst_release(dst); 2059 goto out_free; 2060 } 2061 2062 skb_dst_drop(skb); 2063 skb_dst_set(skb, dst); 2064 2065 /* 2066 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally 2067 * not only before forwarding, but after forwarding on all output 2068 * interfaces. It is clear, if mrouter runs a multicasting 2069 * program, it should receive packets not depending to what interface 2070 * program is joined. 2071 * If we will not make it, the program will have to join on all 2072 * interfaces. On the other hand, multihoming host (or router, but 2073 * not mrouter) cannot join to more than one interface - it will 2074 * result in receiving multiple packets. 2075 */ 2076 skb->dev = vif_dev; 2077 WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1); 2078 WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len); 2079 2080 /* We are about to write */ 2081 /* XXX: extension headers? */ 2082 if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(vif_dev))) 2083 goto out_free; 2084 2085 ipv6h = ipv6_hdr(skb); 2086 ipv6h->hop_limit--; 2087 2088 IP6CB(skb)->flags |= IP6SKB_FORWARDED; 2089 2090 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, 2091 net, NULL, skb, skb->dev, vif_dev, 2092 ip6mr_forward2_finish); 2093 2094 out_free: 2095 kfree_skb(skb); 2096 return 0; 2097 } 2098 2099 /* Called with rcu_read_lock() */ 2100 static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev) 2101 { 2102 int ct; 2103 2104 /* Pairs with WRITE_ONCE() in mif6_delete()/mif6_add() */ 2105 for (ct = READ_ONCE(mrt->maxvif) - 1; ct >= 0; ct--) { 2106 if (rcu_access_pointer(mrt->vif_table[ct].dev) == dev) 2107 break; 2108 } 2109 return ct; 2110 } 2111 2112 /* Called under rcu_read_lock() */ 2113 static void ip6_mr_forward(struct net *net, struct mr_table *mrt, 2114 struct net_device *dev, struct sk_buff *skb, 2115 struct mfc6_cache *c) 2116 { 2117 int psend = -1; 2118 int vif, ct; 2119 int true_vifi = ip6mr_find_vif(mrt, dev); 2120 2121 vif = c->_c.mfc_parent; 2122 c->_c.mfc_un.res.pkt++; 2123 c->_c.mfc_un.res.bytes += skb->len; 2124 c->_c.mfc_un.res.lastuse = jiffies; 2125 2126 if (ipv6_addr_any(&c->mf6c_origin) && true_vifi >= 0) { 2127 struct mfc6_cache *cache_proxy; 2128 2129 /* For an (*,G) entry, we only check that the incoming 2130 * interface is part of the static tree. 2131 */ 2132 cache_proxy = mr_mfc_find_any_parent(mrt, vif); 2133 if (cache_proxy && 2134 cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255) { 2135 rcu_read_unlock(); 2136 goto forward; 2137 } 2138 } 2139 2140 /* 2141 * Wrong interface: drop packet and (maybe) send PIM assert. 2142 */ 2143 if (rcu_access_pointer(mrt->vif_table[vif].dev) != dev) { 2144 c->_c.mfc_un.res.wrong_if++; 2145 2146 if (true_vifi >= 0 && mrt->mroute_do_assert && 2147 /* pimsm uses asserts, when switching from RPT to SPT, 2148 so that we cannot check that packet arrived on an oif. 2149 It is bad, but otherwise we would need to move pretty 2150 large chunk of pimd to kernel. Ough... --ANK 2151 */ 2152 (mrt->mroute_do_pim || 2153 c->_c.mfc_un.res.ttls[true_vifi] < 255) && 2154 time_after(jiffies, 2155 c->_c.mfc_un.res.last_assert + 2156 MFC_ASSERT_THRESH)) { 2157 c->_c.mfc_un.res.last_assert = jiffies; 2158 ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF); 2159 if (mrt->mroute_do_wrvifwhole) 2160 ip6mr_cache_report(mrt, skb, true_vifi, 2161 MRT6MSG_WRMIFWHOLE); 2162 } 2163 goto dont_forward; 2164 } 2165 2166 forward: 2167 WRITE_ONCE(mrt->vif_table[vif].pkt_in, 2168 mrt->vif_table[vif].pkt_in + 1); 2169 WRITE_ONCE(mrt->vif_table[vif].bytes_in, 2170 mrt->vif_table[vif].bytes_in + skb->len); 2171 2172 /* 2173 * Forward the frame 2174 */ 2175 if (ipv6_addr_any(&c->mf6c_origin) && 2176 ipv6_addr_any(&c->mf6c_mcastgrp)) { 2177 if (true_vifi >= 0 && 2178 true_vifi != c->_c.mfc_parent && 2179 ipv6_hdr(skb)->hop_limit > 2180 c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) { 2181 /* It's an (*,*) entry and the packet is not coming from 2182 * the upstream: forward the packet to the upstream 2183 * only. 2184 */ 2185 psend = c->_c.mfc_parent; 2186 goto last_forward; 2187 } 2188 goto dont_forward; 2189 } 2190 for (ct = c->_c.mfc_un.res.maxvif - 1; 2191 ct >= c->_c.mfc_un.res.minvif; ct--) { 2192 /* For (*,G) entry, don't forward to the incoming interface */ 2193 if ((!ipv6_addr_any(&c->mf6c_origin) || ct != true_vifi) && 2194 ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) { 2195 if (psend != -1) { 2196 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 2197 if (skb2) 2198 ip6mr_forward2(net, mrt, skb2, psend); 2199 } 2200 psend = ct; 2201 } 2202 } 2203 last_forward: 2204 if (psend != -1) { 2205 ip6mr_forward2(net, mrt, skb, psend); 2206 return; 2207 } 2208 2209 dont_forward: 2210 kfree_skb(skb); 2211 } 2212 2213 2214 /* 2215 * Multicast packets for forwarding arrive here 2216 */ 2217 2218 int ip6_mr_input(struct sk_buff *skb) 2219 { 2220 struct mfc6_cache *cache; 2221 struct net *net = dev_net(skb->dev); 2222 struct mr_table *mrt; 2223 struct flowi6 fl6 = { 2224 .flowi6_iif = skb->dev->ifindex, 2225 .flowi6_mark = skb->mark, 2226 }; 2227 int err; 2228 struct net_device *dev; 2229 2230 /* skb->dev passed in is the master dev for vrfs. 2231 * Get the proper interface that does have a vif associated with it. 2232 */ 2233 dev = skb->dev; 2234 if (netif_is_l3_master(skb->dev)) { 2235 dev = dev_get_by_index_rcu(net, IPCB(skb)->iif); 2236 if (!dev) { 2237 kfree_skb(skb); 2238 return -ENODEV; 2239 } 2240 } 2241 2242 err = ip6mr_fib_lookup(net, &fl6, &mrt); 2243 if (err < 0) { 2244 kfree_skb(skb); 2245 return err; 2246 } 2247 2248 cache = ip6mr_cache_find(mrt, 2249 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr); 2250 if (!cache) { 2251 int vif = ip6mr_find_vif(mrt, dev); 2252 2253 if (vif >= 0) 2254 cache = ip6mr_cache_find_any(mrt, 2255 &ipv6_hdr(skb)->daddr, 2256 vif); 2257 } 2258 2259 /* 2260 * No usable cache entry 2261 */ 2262 if (!cache) { 2263 int vif; 2264 2265 vif = ip6mr_find_vif(mrt, dev); 2266 if (vif >= 0) { 2267 int err = ip6mr_cache_unresolved(mrt, vif, skb, dev); 2268 2269 return err; 2270 } 2271 kfree_skb(skb); 2272 return -ENODEV; 2273 } 2274 2275 ip6_mr_forward(net, mrt, dev, skb, cache); 2276 2277 return 0; 2278 } 2279 2280 int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm, 2281 u32 portid) 2282 { 2283 int err; 2284 struct mr_table *mrt; 2285 struct mfc6_cache *cache; 2286 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); 2287 2288 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT); 2289 if (!mrt) 2290 return -ENOENT; 2291 2292 rcu_read_lock(); 2293 cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr); 2294 if (!cache && skb->dev) { 2295 int vif = ip6mr_find_vif(mrt, skb->dev); 2296 2297 if (vif >= 0) 2298 cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr, 2299 vif); 2300 } 2301 2302 if (!cache) { 2303 struct sk_buff *skb2; 2304 struct ipv6hdr *iph; 2305 struct net_device *dev; 2306 int vif; 2307 2308 dev = skb->dev; 2309 if (!dev || (vif = ip6mr_find_vif(mrt, dev)) < 0) { 2310 rcu_read_unlock(); 2311 return -ENODEV; 2312 } 2313 2314 /* really correct? */ 2315 skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC); 2316 if (!skb2) { 2317 rcu_read_unlock(); 2318 return -ENOMEM; 2319 } 2320 2321 NETLINK_CB(skb2).portid = portid; 2322 skb_reset_transport_header(skb2); 2323 2324 skb_put(skb2, sizeof(struct ipv6hdr)); 2325 skb_reset_network_header(skb2); 2326 2327 iph = ipv6_hdr(skb2); 2328 iph->version = 0; 2329 iph->priority = 0; 2330 iph->flow_lbl[0] = 0; 2331 iph->flow_lbl[1] = 0; 2332 iph->flow_lbl[2] = 0; 2333 iph->payload_len = 0; 2334 iph->nexthdr = IPPROTO_NONE; 2335 iph->hop_limit = 0; 2336 iph->saddr = rt->rt6i_src.addr; 2337 iph->daddr = rt->rt6i_dst.addr; 2338 2339 err = ip6mr_cache_unresolved(mrt, vif, skb2, dev); 2340 rcu_read_unlock(); 2341 2342 return err; 2343 } 2344 2345 err = mr_fill_mroute(mrt, skb, &cache->_c, rtm); 2346 rcu_read_unlock(); 2347 return err; 2348 } 2349 2350 static int ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, 2351 u32 portid, u32 seq, struct mfc6_cache *c, int cmd, 2352 int flags) 2353 { 2354 struct nlmsghdr *nlh; 2355 struct rtmsg *rtm; 2356 int err; 2357 2358 nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags); 2359 if (!nlh) 2360 return -EMSGSIZE; 2361 2362 rtm = nlmsg_data(nlh); 2363 rtm->rtm_family = RTNL_FAMILY_IP6MR; 2364 rtm->rtm_dst_len = 128; 2365 rtm->rtm_src_len = 128; 2366 rtm->rtm_tos = 0; 2367 rtm->rtm_table = mrt->id; 2368 if (nla_put_u32(skb, RTA_TABLE, mrt->id)) 2369 goto nla_put_failure; 2370 rtm->rtm_type = RTN_MULTICAST; 2371 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 2372 if (c->_c.mfc_flags & MFC_STATIC) 2373 rtm->rtm_protocol = RTPROT_STATIC; 2374 else 2375 rtm->rtm_protocol = RTPROT_MROUTED; 2376 rtm->rtm_flags = 0; 2377 2378 if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) || 2379 nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp)) 2380 goto nla_put_failure; 2381 err = mr_fill_mroute(mrt, skb, &c->_c, rtm); 2382 /* do not break the dump if cache is unresolved */ 2383 if (err < 0 && err != -ENOENT) 2384 goto nla_put_failure; 2385 2386 nlmsg_end(skb, nlh); 2387 return 0; 2388 2389 nla_put_failure: 2390 nlmsg_cancel(skb, nlh); 2391 return -EMSGSIZE; 2392 } 2393 2394 static int _ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, 2395 u32 portid, u32 seq, struct mr_mfc *c, 2396 int cmd, int flags) 2397 { 2398 return ip6mr_fill_mroute(mrt, skb, portid, seq, (struct mfc6_cache *)c, 2399 cmd, flags); 2400 } 2401 2402 static int mr6_msgsize(bool unresolved, int maxvif) 2403 { 2404 size_t len = 2405 NLMSG_ALIGN(sizeof(struct rtmsg)) 2406 + nla_total_size(4) /* RTA_TABLE */ 2407 + nla_total_size(sizeof(struct in6_addr)) /* RTA_SRC */ 2408 + nla_total_size(sizeof(struct in6_addr)) /* RTA_DST */ 2409 ; 2410 2411 if (!unresolved) 2412 len = len 2413 + nla_total_size(4) /* RTA_IIF */ 2414 + nla_total_size(0) /* RTA_MULTIPATH */ 2415 + maxvif * NLA_ALIGN(sizeof(struct rtnexthop)) 2416 /* RTA_MFC_STATS */ 2417 + nla_total_size_64bit(sizeof(struct rta_mfc_stats)) 2418 ; 2419 2420 return len; 2421 } 2422 2423 static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc, 2424 int cmd) 2425 { 2426 struct net *net = read_pnet(&mrt->net); 2427 struct sk_buff *skb; 2428 int err = -ENOBUFS; 2429 2430 skb = nlmsg_new(mr6_msgsize(mfc->_c.mfc_parent >= MAXMIFS, mrt->maxvif), 2431 GFP_ATOMIC); 2432 if (!skb) 2433 goto errout; 2434 2435 err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0); 2436 if (err < 0) 2437 goto errout; 2438 2439 rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC); 2440 return; 2441 2442 errout: 2443 kfree_skb(skb); 2444 if (err < 0) 2445 rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err); 2446 } 2447 2448 static size_t mrt6msg_netlink_msgsize(size_t payloadlen) 2449 { 2450 size_t len = 2451 NLMSG_ALIGN(sizeof(struct rtgenmsg)) 2452 + nla_total_size(1) /* IP6MRA_CREPORT_MSGTYPE */ 2453 + nla_total_size(4) /* IP6MRA_CREPORT_MIF_ID */ 2454 /* IP6MRA_CREPORT_SRC_ADDR */ 2455 + nla_total_size(sizeof(struct in6_addr)) 2456 /* IP6MRA_CREPORT_DST_ADDR */ 2457 + nla_total_size(sizeof(struct in6_addr)) 2458 /* IP6MRA_CREPORT_PKT */ 2459 + nla_total_size(payloadlen) 2460 ; 2461 2462 return len; 2463 } 2464 2465 static void mrt6msg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt) 2466 { 2467 struct net *net = read_pnet(&mrt->net); 2468 struct nlmsghdr *nlh; 2469 struct rtgenmsg *rtgenm; 2470 struct mrt6msg *msg; 2471 struct sk_buff *skb; 2472 struct nlattr *nla; 2473 int payloadlen; 2474 2475 payloadlen = pkt->len - sizeof(struct mrt6msg); 2476 msg = (struct mrt6msg *)skb_transport_header(pkt); 2477 2478 skb = nlmsg_new(mrt6msg_netlink_msgsize(payloadlen), GFP_ATOMIC); 2479 if (!skb) 2480 goto errout; 2481 2482 nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT, 2483 sizeof(struct rtgenmsg), 0); 2484 if (!nlh) 2485 goto errout; 2486 rtgenm = nlmsg_data(nlh); 2487 rtgenm->rtgen_family = RTNL_FAMILY_IP6MR; 2488 if (nla_put_u8(skb, IP6MRA_CREPORT_MSGTYPE, msg->im6_msgtype) || 2489 nla_put_u32(skb, IP6MRA_CREPORT_MIF_ID, msg->im6_mif) || 2490 nla_put_in6_addr(skb, IP6MRA_CREPORT_SRC_ADDR, 2491 &msg->im6_src) || 2492 nla_put_in6_addr(skb, IP6MRA_CREPORT_DST_ADDR, 2493 &msg->im6_dst)) 2494 goto nla_put_failure; 2495 2496 nla = nla_reserve(skb, IP6MRA_CREPORT_PKT, payloadlen); 2497 if (!nla || skb_copy_bits(pkt, sizeof(struct mrt6msg), 2498 nla_data(nla), payloadlen)) 2499 goto nla_put_failure; 2500 2501 nlmsg_end(skb, nlh); 2502 2503 rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE_R, NULL, GFP_ATOMIC); 2504 return; 2505 2506 nla_put_failure: 2507 nlmsg_cancel(skb, nlh); 2508 errout: 2509 kfree_skb(skb); 2510 rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE_R, -ENOBUFS); 2511 } 2512 2513 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) 2514 { 2515 const struct nlmsghdr *nlh = cb->nlh; 2516 struct fib_dump_filter filter = {}; 2517 int err; 2518 2519 if (cb->strict_check) { 2520 err = ip_valid_fib_dump_req(sock_net(skb->sk), nlh, 2521 &filter, cb); 2522 if (err < 0) 2523 return err; 2524 } 2525 2526 if (filter.table_id) { 2527 struct mr_table *mrt; 2528 2529 mrt = ip6mr_get_table(sock_net(skb->sk), filter.table_id); 2530 if (!mrt) { 2531 if (rtnl_msg_family(cb->nlh) != RTNL_FAMILY_IP6MR) 2532 return skb->len; 2533 2534 NL_SET_ERR_MSG_MOD(cb->extack, "MR table does not exist"); 2535 return -ENOENT; 2536 } 2537 err = mr_table_dump(mrt, skb, cb, _ip6mr_fill_mroute, 2538 &mfc_unres_lock, &filter); 2539 return skb->len ? : err; 2540 } 2541 2542 return mr_rtm_dumproute(skb, cb, ip6mr_mr_table_iter, 2543 _ip6mr_fill_mroute, &mfc_unres_lock, &filter); 2544 } 2545