1 /* 2 * Linux IPv6 multicast routing support for BSD pim6sd 3 * Based on net/ipv4/ipmr.c. 4 * 5 * (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr> 6 * LSIIT Laboratory, Strasbourg, France 7 * (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com> 8 * 6WIND, Paris, France 9 * Copyright (C)2007,2008 USAGI/WIDE Project 10 * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org> 11 * 12 * This program is free software; you can redistribute it and/or 13 * modify it under the terms of the GNU General Public License 14 * as published by the Free Software Foundation; either version 15 * 2 of the License, or (at your option) any later version. 16 * 17 */ 18 19 #include <linux/uaccess.h> 20 #include <linux/types.h> 21 #include <linux/sched.h> 22 #include <linux/errno.h> 23 #include <linux/mm.h> 24 #include <linux/kernel.h> 25 #include <linux/fcntl.h> 26 #include <linux/stat.h> 27 #include <linux/socket.h> 28 #include <linux/inet.h> 29 #include <linux/netdevice.h> 30 #include <linux/inetdevice.h> 31 #include <linux/proc_fs.h> 32 #include <linux/seq_file.h> 33 #include <linux/init.h> 34 #include <linux/compat.h> 35 #include <linux/rhashtable.h> 36 #include <net/protocol.h> 37 #include <linux/skbuff.h> 38 #include <net/raw.h> 39 #include <linux/notifier.h> 40 #include <linux/if_arp.h> 41 #include <net/checksum.h> 42 #include <net/netlink.h> 43 #include <net/fib_rules.h> 44 45 #include <net/ipv6.h> 46 #include <net/ip6_route.h> 47 #include <linux/mroute6.h> 48 #include <linux/pim.h> 49 #include <net/addrconf.h> 50 #include <linux/netfilter_ipv6.h> 51 #include <linux/export.h> 52 #include <net/ip6_checksum.h> 53 #include <linux/netconf.h> 54 55 struct ip6mr_rule { 56 struct fib_rule common; 57 }; 58 59 struct ip6mr_result { 60 struct mr_table *mrt; 61 }; 62 63 /* Big lock, protecting vif table, mrt cache and mroute socket state. 64 Note that the changes are semaphored via rtnl_lock. 65 */ 66 67 static DEFINE_RWLOCK(mrt_lock); 68 69 /* Multicast router control variables */ 70 71 /* Special spinlock for queue of unresolved entries */ 72 static DEFINE_SPINLOCK(mfc_unres_lock); 73 74 /* We return to original Alan's scheme. Hash table of resolved 75 entries is changed only in process context and protected 76 with weak lock mrt_lock. Queue of unresolved entries is protected 77 with strong spinlock mfc_unres_lock. 78 79 In this case data path is free of exclusive locks at all. 80 */ 81 82 static struct kmem_cache *mrt_cachep __read_mostly; 83 84 static struct mr_table *ip6mr_new_table(struct net *net, u32 id); 85 static void ip6mr_free_table(struct mr_table *mrt); 86 87 static void ip6_mr_forward(struct net *net, struct mr_table *mrt, 88 struct net_device *dev, struct sk_buff *skb, 89 struct mfc6_cache *cache); 90 static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt, 91 mifi_t mifi, int assert); 92 static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc, 93 int cmd); 94 static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt); 95 static int ip6mr_rtm_dumproute(struct sk_buff *skb, 96 struct netlink_callback *cb); 97 static void mroute_clean_tables(struct mr_table *mrt, bool all); 98 static void ipmr_expire_process(struct timer_list *t); 99 100 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES 101 #define ip6mr_for_each_table(mrt, net) \ 102 list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list) 103 104 static struct mr_table *ip6mr_mr_table_iter(struct net *net, 105 struct mr_table *mrt) 106 { 107 struct mr_table *ret; 108 109 if (!mrt) 110 ret = list_entry_rcu(net->ipv6.mr6_tables.next, 111 struct mr_table, list); 112 else 113 ret = list_entry_rcu(mrt->list.next, 114 struct mr_table, list); 115 116 if (&ret->list == &net->ipv6.mr6_tables) 117 return NULL; 118 return ret; 119 } 120 121 static struct mr_table *ip6mr_get_table(struct net *net, u32 id) 122 { 123 struct mr_table *mrt; 124 125 ip6mr_for_each_table(mrt, net) { 126 if (mrt->id == id) 127 return mrt; 128 } 129 return NULL; 130 } 131 132 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6, 133 struct mr_table **mrt) 134 { 135 int err; 136 struct ip6mr_result res; 137 struct fib_lookup_arg arg = { 138 .result = &res, 139 .flags = FIB_LOOKUP_NOREF, 140 }; 141 142 /* update flow if oif or iif point to device enslaved to l3mdev */ 143 l3mdev_update_flow(net, flowi6_to_flowi(flp6)); 144 145 err = fib_rules_lookup(net->ipv6.mr6_rules_ops, 146 flowi6_to_flowi(flp6), 0, &arg); 147 if (err < 0) 148 return err; 149 *mrt = res.mrt; 150 return 0; 151 } 152 153 static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp, 154 int flags, struct fib_lookup_arg *arg) 155 { 156 struct ip6mr_result *res = arg->result; 157 struct mr_table *mrt; 158 159 switch (rule->action) { 160 case FR_ACT_TO_TBL: 161 break; 162 case FR_ACT_UNREACHABLE: 163 return -ENETUNREACH; 164 case FR_ACT_PROHIBIT: 165 return -EACCES; 166 case FR_ACT_BLACKHOLE: 167 default: 168 return -EINVAL; 169 } 170 171 arg->table = fib_rule_get_table(rule, arg); 172 173 mrt = ip6mr_get_table(rule->fr_net, arg->table); 174 if (!mrt) 175 return -EAGAIN; 176 res->mrt = mrt; 177 return 0; 178 } 179 180 static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags) 181 { 182 return 1; 183 } 184 185 static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = { 186 FRA_GENERIC_POLICY, 187 }; 188 189 static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb, 190 struct fib_rule_hdr *frh, struct nlattr **tb, 191 struct netlink_ext_ack *extack) 192 { 193 return 0; 194 } 195 196 static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, 197 struct nlattr **tb) 198 { 199 return 1; 200 } 201 202 static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb, 203 struct fib_rule_hdr *frh) 204 { 205 frh->dst_len = 0; 206 frh->src_len = 0; 207 frh->tos = 0; 208 return 0; 209 } 210 211 static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = { 212 .family = RTNL_FAMILY_IP6MR, 213 .rule_size = sizeof(struct ip6mr_rule), 214 .addr_size = sizeof(struct in6_addr), 215 .action = ip6mr_rule_action, 216 .match = ip6mr_rule_match, 217 .configure = ip6mr_rule_configure, 218 .compare = ip6mr_rule_compare, 219 .fill = ip6mr_rule_fill, 220 .nlgroup = RTNLGRP_IPV6_RULE, 221 .policy = ip6mr_rule_policy, 222 .owner = THIS_MODULE, 223 }; 224 225 static int __net_init ip6mr_rules_init(struct net *net) 226 { 227 struct fib_rules_ops *ops; 228 struct mr_table *mrt; 229 int err; 230 231 ops = fib_rules_register(&ip6mr_rules_ops_template, net); 232 if (IS_ERR(ops)) 233 return PTR_ERR(ops); 234 235 INIT_LIST_HEAD(&net->ipv6.mr6_tables); 236 237 mrt = ip6mr_new_table(net, RT6_TABLE_DFLT); 238 if (IS_ERR(mrt)) { 239 err = PTR_ERR(mrt); 240 goto err1; 241 } 242 243 err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0); 244 if (err < 0) 245 goto err2; 246 247 net->ipv6.mr6_rules_ops = ops; 248 return 0; 249 250 err2: 251 ip6mr_free_table(mrt); 252 err1: 253 fib_rules_unregister(ops); 254 return err; 255 } 256 257 static void __net_exit ip6mr_rules_exit(struct net *net) 258 { 259 struct mr_table *mrt, *next; 260 261 rtnl_lock(); 262 list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) { 263 list_del(&mrt->list); 264 ip6mr_free_table(mrt); 265 } 266 fib_rules_unregister(net->ipv6.mr6_rules_ops); 267 rtnl_unlock(); 268 } 269 270 static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb) 271 { 272 return fib_rules_dump(net, nb, RTNL_FAMILY_IP6MR); 273 } 274 275 static unsigned int ip6mr_rules_seq_read(struct net *net) 276 { 277 return fib_rules_seq_read(net, RTNL_FAMILY_IP6MR); 278 } 279 280 bool ip6mr_rule_default(const struct fib_rule *rule) 281 { 282 return fib_rule_matchall(rule) && rule->action == FR_ACT_TO_TBL && 283 rule->table == RT6_TABLE_DFLT && !rule->l3mdev; 284 } 285 EXPORT_SYMBOL(ip6mr_rule_default); 286 #else 287 #define ip6mr_for_each_table(mrt, net) \ 288 for (mrt = net->ipv6.mrt6; mrt; mrt = NULL) 289 290 static struct mr_table *ip6mr_mr_table_iter(struct net *net, 291 struct mr_table *mrt) 292 { 293 if (!mrt) 294 return net->ipv6.mrt6; 295 return NULL; 296 } 297 298 static struct mr_table *ip6mr_get_table(struct net *net, u32 id) 299 { 300 return net->ipv6.mrt6; 301 } 302 303 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6, 304 struct mr_table **mrt) 305 { 306 *mrt = net->ipv6.mrt6; 307 return 0; 308 } 309 310 static int __net_init ip6mr_rules_init(struct net *net) 311 { 312 struct mr_table *mrt; 313 314 mrt = ip6mr_new_table(net, RT6_TABLE_DFLT); 315 if (IS_ERR(mrt)) 316 return PTR_ERR(mrt); 317 net->ipv6.mrt6 = mrt; 318 return 0; 319 } 320 321 static void __net_exit ip6mr_rules_exit(struct net *net) 322 { 323 rtnl_lock(); 324 ip6mr_free_table(net->ipv6.mrt6); 325 net->ipv6.mrt6 = NULL; 326 rtnl_unlock(); 327 } 328 329 static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb) 330 { 331 return 0; 332 } 333 334 static unsigned int ip6mr_rules_seq_read(struct net *net) 335 { 336 return 0; 337 } 338 #endif 339 340 static int ip6mr_hash_cmp(struct rhashtable_compare_arg *arg, 341 const void *ptr) 342 { 343 const struct mfc6_cache_cmp_arg *cmparg = arg->key; 344 struct mfc6_cache *c = (struct mfc6_cache *)ptr; 345 346 return !ipv6_addr_equal(&c->mf6c_mcastgrp, &cmparg->mf6c_mcastgrp) || 347 !ipv6_addr_equal(&c->mf6c_origin, &cmparg->mf6c_origin); 348 } 349 350 static const struct rhashtable_params ip6mr_rht_params = { 351 .head_offset = offsetof(struct mr_mfc, mnode), 352 .key_offset = offsetof(struct mfc6_cache, cmparg), 353 .key_len = sizeof(struct mfc6_cache_cmp_arg), 354 .nelem_hint = 3, 355 .locks_mul = 1, 356 .obj_cmpfn = ip6mr_hash_cmp, 357 .automatic_shrinking = true, 358 }; 359 360 static void ip6mr_new_table_set(struct mr_table *mrt, 361 struct net *net) 362 { 363 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES 364 list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables); 365 #endif 366 } 367 368 static struct mfc6_cache_cmp_arg ip6mr_mr_table_ops_cmparg_any = { 369 .mf6c_origin = IN6ADDR_ANY_INIT, 370 .mf6c_mcastgrp = IN6ADDR_ANY_INIT, 371 }; 372 373 static struct mr_table_ops ip6mr_mr_table_ops = { 374 .rht_params = &ip6mr_rht_params, 375 .cmparg_any = &ip6mr_mr_table_ops_cmparg_any, 376 }; 377 378 static struct mr_table *ip6mr_new_table(struct net *net, u32 id) 379 { 380 struct mr_table *mrt; 381 382 mrt = ip6mr_get_table(net, id); 383 if (mrt) 384 return mrt; 385 386 return mr_table_alloc(net, id, &ip6mr_mr_table_ops, 387 ipmr_expire_process, ip6mr_new_table_set); 388 } 389 390 static void ip6mr_free_table(struct mr_table *mrt) 391 { 392 del_timer_sync(&mrt->ipmr_expire_timer); 393 mroute_clean_tables(mrt, true); 394 rhltable_destroy(&mrt->mfc_hash); 395 kfree(mrt); 396 } 397 398 #ifdef CONFIG_PROC_FS 399 /* The /proc interfaces to multicast routing 400 * /proc/ip6_mr_cache /proc/ip6_mr_vif 401 */ 402 403 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos) 404 __acquires(mrt_lock) 405 { 406 struct mr_vif_iter *iter = seq->private; 407 struct net *net = seq_file_net(seq); 408 struct mr_table *mrt; 409 410 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT); 411 if (!mrt) 412 return ERR_PTR(-ENOENT); 413 414 iter->mrt = mrt; 415 416 read_lock(&mrt_lock); 417 return mr_vif_seq_start(seq, pos); 418 } 419 420 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v) 421 __releases(mrt_lock) 422 { 423 read_unlock(&mrt_lock); 424 } 425 426 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v) 427 { 428 struct mr_vif_iter *iter = seq->private; 429 struct mr_table *mrt = iter->mrt; 430 431 if (v == SEQ_START_TOKEN) { 432 seq_puts(seq, 433 "Interface BytesIn PktsIn BytesOut PktsOut Flags\n"); 434 } else { 435 const struct vif_device *vif = v; 436 const char *name = vif->dev ? vif->dev->name : "none"; 437 438 seq_printf(seq, 439 "%2td %-10s %8ld %7ld %8ld %7ld %05X\n", 440 vif - mrt->vif_table, 441 name, vif->bytes_in, vif->pkt_in, 442 vif->bytes_out, vif->pkt_out, 443 vif->flags); 444 } 445 return 0; 446 } 447 448 static const struct seq_operations ip6mr_vif_seq_ops = { 449 .start = ip6mr_vif_seq_start, 450 .next = mr_vif_seq_next, 451 .stop = ip6mr_vif_seq_stop, 452 .show = ip6mr_vif_seq_show, 453 }; 454 455 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) 456 { 457 struct net *net = seq_file_net(seq); 458 struct mr_table *mrt; 459 460 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT); 461 if (!mrt) 462 return ERR_PTR(-ENOENT); 463 464 return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock); 465 } 466 467 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) 468 { 469 int n; 470 471 if (v == SEQ_START_TOKEN) { 472 seq_puts(seq, 473 "Group " 474 "Origin " 475 "Iif Pkts Bytes Wrong Oifs\n"); 476 } else { 477 const struct mfc6_cache *mfc = v; 478 const struct mr_mfc_iter *it = seq->private; 479 struct mr_table *mrt = it->mrt; 480 481 seq_printf(seq, "%pI6 %pI6 %-3hd", 482 &mfc->mf6c_mcastgrp, &mfc->mf6c_origin, 483 mfc->_c.mfc_parent); 484 485 if (it->cache != &mrt->mfc_unres_queue) { 486 seq_printf(seq, " %8lu %8lu %8lu", 487 mfc->_c.mfc_un.res.pkt, 488 mfc->_c.mfc_un.res.bytes, 489 mfc->_c.mfc_un.res.wrong_if); 490 for (n = mfc->_c.mfc_un.res.minvif; 491 n < mfc->_c.mfc_un.res.maxvif; n++) { 492 if (VIF_EXISTS(mrt, n) && 493 mfc->_c.mfc_un.res.ttls[n] < 255) 494 seq_printf(seq, 495 " %2d:%-3d", n, 496 mfc->_c.mfc_un.res.ttls[n]); 497 } 498 } else { 499 /* unresolved mfc_caches don't contain 500 * pkt, bytes and wrong_if values 501 */ 502 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul); 503 } 504 seq_putc(seq, '\n'); 505 } 506 return 0; 507 } 508 509 static const struct seq_operations ipmr_mfc_seq_ops = { 510 .start = ipmr_mfc_seq_start, 511 .next = mr_mfc_seq_next, 512 .stop = mr_mfc_seq_stop, 513 .show = ipmr_mfc_seq_show, 514 }; 515 #endif 516 517 #ifdef CONFIG_IPV6_PIMSM_V2 518 519 static int pim6_rcv(struct sk_buff *skb) 520 { 521 struct pimreghdr *pim; 522 struct ipv6hdr *encap; 523 struct net_device *reg_dev = NULL; 524 struct net *net = dev_net(skb->dev); 525 struct mr_table *mrt; 526 struct flowi6 fl6 = { 527 .flowi6_iif = skb->dev->ifindex, 528 .flowi6_mark = skb->mark, 529 }; 530 int reg_vif_num; 531 532 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap))) 533 goto drop; 534 535 pim = (struct pimreghdr *)skb_transport_header(skb); 536 if (pim->type != ((PIM_VERSION << 4) | PIM_TYPE_REGISTER) || 537 (pim->flags & PIM_NULL_REGISTER) || 538 (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, 539 sizeof(*pim), IPPROTO_PIM, 540 csum_partial((void *)pim, sizeof(*pim), 0)) && 541 csum_fold(skb_checksum(skb, 0, skb->len, 0)))) 542 goto drop; 543 544 /* check if the inner packet is destined to mcast group */ 545 encap = (struct ipv6hdr *)(skb_transport_header(skb) + 546 sizeof(*pim)); 547 548 if (!ipv6_addr_is_multicast(&encap->daddr) || 549 encap->payload_len == 0 || 550 ntohs(encap->payload_len) + sizeof(*pim) > skb->len) 551 goto drop; 552 553 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0) 554 goto drop; 555 reg_vif_num = mrt->mroute_reg_vif_num; 556 557 read_lock(&mrt_lock); 558 if (reg_vif_num >= 0) 559 reg_dev = mrt->vif_table[reg_vif_num].dev; 560 if (reg_dev) 561 dev_hold(reg_dev); 562 read_unlock(&mrt_lock); 563 564 if (!reg_dev) 565 goto drop; 566 567 skb->mac_header = skb->network_header; 568 skb_pull(skb, (u8 *)encap - skb->data); 569 skb_reset_network_header(skb); 570 skb->protocol = htons(ETH_P_IPV6); 571 skb->ip_summed = CHECKSUM_NONE; 572 573 skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev)); 574 575 netif_rx(skb); 576 577 dev_put(reg_dev); 578 return 0; 579 drop: 580 kfree_skb(skb); 581 return 0; 582 } 583 584 static const struct inet6_protocol pim6_protocol = { 585 .handler = pim6_rcv, 586 }; 587 588 /* Service routines creating virtual interfaces: PIMREG */ 589 590 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, 591 struct net_device *dev) 592 { 593 struct net *net = dev_net(dev); 594 struct mr_table *mrt; 595 struct flowi6 fl6 = { 596 .flowi6_oif = dev->ifindex, 597 .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX, 598 .flowi6_mark = skb->mark, 599 }; 600 int err; 601 602 err = ip6mr_fib_lookup(net, &fl6, &mrt); 603 if (err < 0) { 604 kfree_skb(skb); 605 return err; 606 } 607 608 read_lock(&mrt_lock); 609 dev->stats.tx_bytes += skb->len; 610 dev->stats.tx_packets++; 611 ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT); 612 read_unlock(&mrt_lock); 613 kfree_skb(skb); 614 return NETDEV_TX_OK; 615 } 616 617 static int reg_vif_get_iflink(const struct net_device *dev) 618 { 619 return 0; 620 } 621 622 static const struct net_device_ops reg_vif_netdev_ops = { 623 .ndo_start_xmit = reg_vif_xmit, 624 .ndo_get_iflink = reg_vif_get_iflink, 625 }; 626 627 static void reg_vif_setup(struct net_device *dev) 628 { 629 dev->type = ARPHRD_PIMREG; 630 dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8; 631 dev->flags = IFF_NOARP; 632 dev->netdev_ops = ®_vif_netdev_ops; 633 dev->needs_free_netdev = true; 634 dev->features |= NETIF_F_NETNS_LOCAL; 635 } 636 637 static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt) 638 { 639 struct net_device *dev; 640 char name[IFNAMSIZ]; 641 642 if (mrt->id == RT6_TABLE_DFLT) 643 sprintf(name, "pim6reg"); 644 else 645 sprintf(name, "pim6reg%u", mrt->id); 646 647 dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup); 648 if (!dev) 649 return NULL; 650 651 dev_net_set(dev, net); 652 653 if (register_netdevice(dev)) { 654 free_netdev(dev); 655 return NULL; 656 } 657 658 if (dev_open(dev)) 659 goto failure; 660 661 dev_hold(dev); 662 return dev; 663 664 failure: 665 unregister_netdevice(dev); 666 return NULL; 667 } 668 #endif 669 670 static int call_ip6mr_vif_entry_notifiers(struct net *net, 671 enum fib_event_type event_type, 672 struct vif_device *vif, 673 mifi_t vif_index, u32 tb_id) 674 { 675 return mr_call_vif_notifiers(net, RTNL_FAMILY_IP6MR, event_type, 676 vif, vif_index, tb_id, 677 &net->ipv6.ipmr_seq); 678 } 679 680 static int call_ip6mr_mfc_entry_notifiers(struct net *net, 681 enum fib_event_type event_type, 682 struct mfc6_cache *mfc, u32 tb_id) 683 { 684 return mr_call_mfc_notifiers(net, RTNL_FAMILY_IP6MR, event_type, 685 &mfc->_c, tb_id, &net->ipv6.ipmr_seq); 686 } 687 688 /* Delete a VIF entry */ 689 static int mif6_delete(struct mr_table *mrt, int vifi, int notify, 690 struct list_head *head) 691 { 692 struct vif_device *v; 693 struct net_device *dev; 694 struct inet6_dev *in6_dev; 695 696 if (vifi < 0 || vifi >= mrt->maxvif) 697 return -EADDRNOTAVAIL; 698 699 v = &mrt->vif_table[vifi]; 700 701 if (VIF_EXISTS(mrt, vifi)) 702 call_ip6mr_vif_entry_notifiers(read_pnet(&mrt->net), 703 FIB_EVENT_VIF_DEL, v, vifi, 704 mrt->id); 705 706 write_lock_bh(&mrt_lock); 707 dev = v->dev; 708 v->dev = NULL; 709 710 if (!dev) { 711 write_unlock_bh(&mrt_lock); 712 return -EADDRNOTAVAIL; 713 } 714 715 #ifdef CONFIG_IPV6_PIMSM_V2 716 if (vifi == mrt->mroute_reg_vif_num) 717 mrt->mroute_reg_vif_num = -1; 718 #endif 719 720 if (vifi + 1 == mrt->maxvif) { 721 int tmp; 722 for (tmp = vifi - 1; tmp >= 0; tmp--) { 723 if (VIF_EXISTS(mrt, tmp)) 724 break; 725 } 726 mrt->maxvif = tmp + 1; 727 } 728 729 write_unlock_bh(&mrt_lock); 730 731 dev_set_allmulti(dev, -1); 732 733 in6_dev = __in6_dev_get(dev); 734 if (in6_dev) { 735 in6_dev->cnf.mc_forwarding--; 736 inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, 737 NETCONFA_MC_FORWARDING, 738 dev->ifindex, &in6_dev->cnf); 739 } 740 741 if ((v->flags & MIFF_REGISTER) && !notify) 742 unregister_netdevice_queue(dev, head); 743 744 dev_put(dev); 745 return 0; 746 } 747 748 static inline void ip6mr_cache_free_rcu(struct rcu_head *head) 749 { 750 struct mr_mfc *c = container_of(head, struct mr_mfc, rcu); 751 752 kmem_cache_free(mrt_cachep, (struct mfc6_cache *)c); 753 } 754 755 static inline void ip6mr_cache_free(struct mfc6_cache *c) 756 { 757 call_rcu(&c->_c.rcu, ip6mr_cache_free_rcu); 758 } 759 760 /* Destroy an unresolved cache entry, killing queued skbs 761 and reporting error to netlink readers. 762 */ 763 764 static void ip6mr_destroy_unres(struct mr_table *mrt, struct mfc6_cache *c) 765 { 766 struct net *net = read_pnet(&mrt->net); 767 struct sk_buff *skb; 768 769 atomic_dec(&mrt->cache_resolve_queue_len); 770 771 while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved)) != NULL) { 772 if (ipv6_hdr(skb)->version == 0) { 773 struct nlmsghdr *nlh = skb_pull(skb, 774 sizeof(struct ipv6hdr)); 775 nlh->nlmsg_type = NLMSG_ERROR; 776 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr)); 777 skb_trim(skb, nlh->nlmsg_len); 778 ((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT; 779 rtnl_unicast(skb, net, NETLINK_CB(skb).portid); 780 } else 781 kfree_skb(skb); 782 } 783 784 ip6mr_cache_free(c); 785 } 786 787 788 /* Timer process for all the unresolved queue. */ 789 790 static void ipmr_do_expire_process(struct mr_table *mrt) 791 { 792 unsigned long now = jiffies; 793 unsigned long expires = 10 * HZ; 794 struct mr_mfc *c, *next; 795 796 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) { 797 if (time_after(c->mfc_un.unres.expires, now)) { 798 /* not yet... */ 799 unsigned long interval = c->mfc_un.unres.expires - now; 800 if (interval < expires) 801 expires = interval; 802 continue; 803 } 804 805 list_del(&c->list); 806 mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE); 807 ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c); 808 } 809 810 if (!list_empty(&mrt->mfc_unres_queue)) 811 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires); 812 } 813 814 static void ipmr_expire_process(struct timer_list *t) 815 { 816 struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer); 817 818 if (!spin_trylock(&mfc_unres_lock)) { 819 mod_timer(&mrt->ipmr_expire_timer, jiffies + 1); 820 return; 821 } 822 823 if (!list_empty(&mrt->mfc_unres_queue)) 824 ipmr_do_expire_process(mrt); 825 826 spin_unlock(&mfc_unres_lock); 827 } 828 829 /* Fill oifs list. It is called under write locked mrt_lock. */ 830 831 static void ip6mr_update_thresholds(struct mr_table *mrt, 832 struct mr_mfc *cache, 833 unsigned char *ttls) 834 { 835 int vifi; 836 837 cache->mfc_un.res.minvif = MAXMIFS; 838 cache->mfc_un.res.maxvif = 0; 839 memset(cache->mfc_un.res.ttls, 255, MAXMIFS); 840 841 for (vifi = 0; vifi < mrt->maxvif; vifi++) { 842 if (VIF_EXISTS(mrt, vifi) && 843 ttls[vifi] && ttls[vifi] < 255) { 844 cache->mfc_un.res.ttls[vifi] = ttls[vifi]; 845 if (cache->mfc_un.res.minvif > vifi) 846 cache->mfc_un.res.minvif = vifi; 847 if (cache->mfc_un.res.maxvif <= vifi) 848 cache->mfc_un.res.maxvif = vifi + 1; 849 } 850 } 851 cache->mfc_un.res.lastuse = jiffies; 852 } 853 854 static int mif6_add(struct net *net, struct mr_table *mrt, 855 struct mif6ctl *vifc, int mrtsock) 856 { 857 int vifi = vifc->mif6c_mifi; 858 struct vif_device *v = &mrt->vif_table[vifi]; 859 struct net_device *dev; 860 struct inet6_dev *in6_dev; 861 int err; 862 863 /* Is vif busy ? */ 864 if (VIF_EXISTS(mrt, vifi)) 865 return -EADDRINUSE; 866 867 switch (vifc->mif6c_flags) { 868 #ifdef CONFIG_IPV6_PIMSM_V2 869 case MIFF_REGISTER: 870 /* 871 * Special Purpose VIF in PIM 872 * All the packets will be sent to the daemon 873 */ 874 if (mrt->mroute_reg_vif_num >= 0) 875 return -EADDRINUSE; 876 dev = ip6mr_reg_vif(net, mrt); 877 if (!dev) 878 return -ENOBUFS; 879 err = dev_set_allmulti(dev, 1); 880 if (err) { 881 unregister_netdevice(dev); 882 dev_put(dev); 883 return err; 884 } 885 break; 886 #endif 887 case 0: 888 dev = dev_get_by_index(net, vifc->mif6c_pifi); 889 if (!dev) 890 return -EADDRNOTAVAIL; 891 err = dev_set_allmulti(dev, 1); 892 if (err) { 893 dev_put(dev); 894 return err; 895 } 896 break; 897 default: 898 return -EINVAL; 899 } 900 901 in6_dev = __in6_dev_get(dev); 902 if (in6_dev) { 903 in6_dev->cnf.mc_forwarding++; 904 inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, 905 NETCONFA_MC_FORWARDING, 906 dev->ifindex, &in6_dev->cnf); 907 } 908 909 /* Fill in the VIF structures */ 910 vif_device_init(v, dev, vifc->vifc_rate_limit, vifc->vifc_threshold, 911 vifc->mif6c_flags | (!mrtsock ? VIFF_STATIC : 0), 912 MIFF_REGISTER); 913 914 /* And finish update writing critical data */ 915 write_lock_bh(&mrt_lock); 916 v->dev = dev; 917 #ifdef CONFIG_IPV6_PIMSM_V2 918 if (v->flags & MIFF_REGISTER) 919 mrt->mroute_reg_vif_num = vifi; 920 #endif 921 if (vifi + 1 > mrt->maxvif) 922 mrt->maxvif = vifi + 1; 923 write_unlock_bh(&mrt_lock); 924 call_ip6mr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD, 925 v, vifi, mrt->id); 926 return 0; 927 } 928 929 static struct mfc6_cache *ip6mr_cache_find(struct mr_table *mrt, 930 const struct in6_addr *origin, 931 const struct in6_addr *mcastgrp) 932 { 933 struct mfc6_cache_cmp_arg arg = { 934 .mf6c_origin = *origin, 935 .mf6c_mcastgrp = *mcastgrp, 936 }; 937 938 return mr_mfc_find(mrt, &arg); 939 } 940 941 /* Look for a (*,G) entry */ 942 static struct mfc6_cache *ip6mr_cache_find_any(struct mr_table *mrt, 943 struct in6_addr *mcastgrp, 944 mifi_t mifi) 945 { 946 struct mfc6_cache_cmp_arg arg = { 947 .mf6c_origin = in6addr_any, 948 .mf6c_mcastgrp = *mcastgrp, 949 }; 950 951 if (ipv6_addr_any(mcastgrp)) 952 return mr_mfc_find_any_parent(mrt, mifi); 953 return mr_mfc_find_any(mrt, mifi, &arg); 954 } 955 956 /* Look for a (S,G,iif) entry if parent != -1 */ 957 static struct mfc6_cache * 958 ip6mr_cache_find_parent(struct mr_table *mrt, 959 const struct in6_addr *origin, 960 const struct in6_addr *mcastgrp, 961 int parent) 962 { 963 struct mfc6_cache_cmp_arg arg = { 964 .mf6c_origin = *origin, 965 .mf6c_mcastgrp = *mcastgrp, 966 }; 967 968 return mr_mfc_find_parent(mrt, &arg, parent); 969 } 970 971 /* Allocate a multicast cache entry */ 972 static struct mfc6_cache *ip6mr_cache_alloc(void) 973 { 974 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); 975 if (!c) 976 return NULL; 977 c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1; 978 c->_c.mfc_un.res.minvif = MAXMIFS; 979 c->_c.free = ip6mr_cache_free_rcu; 980 refcount_set(&c->_c.mfc_un.res.refcount, 1); 981 return c; 982 } 983 984 static struct mfc6_cache *ip6mr_cache_alloc_unres(void) 985 { 986 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); 987 if (!c) 988 return NULL; 989 skb_queue_head_init(&c->_c.mfc_un.unres.unresolved); 990 c->_c.mfc_un.unres.expires = jiffies + 10 * HZ; 991 return c; 992 } 993 994 /* 995 * A cache entry has gone into a resolved state from queued 996 */ 997 998 static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt, 999 struct mfc6_cache *uc, struct mfc6_cache *c) 1000 { 1001 struct sk_buff *skb; 1002 1003 /* 1004 * Play the pending entries through our router 1005 */ 1006 1007 while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) { 1008 if (ipv6_hdr(skb)->version == 0) { 1009 struct nlmsghdr *nlh = skb_pull(skb, 1010 sizeof(struct ipv6hdr)); 1011 1012 if (mr_fill_mroute(mrt, skb, &c->_c, 1013 nlmsg_data(nlh)) > 0) { 1014 nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh; 1015 } else { 1016 nlh->nlmsg_type = NLMSG_ERROR; 1017 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr)); 1018 skb_trim(skb, nlh->nlmsg_len); 1019 ((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE; 1020 } 1021 rtnl_unicast(skb, net, NETLINK_CB(skb).portid); 1022 } else 1023 ip6_mr_forward(net, mrt, skb->dev, skb, c); 1024 } 1025 } 1026 1027 /* 1028 * Bounce a cache query up to pim6sd and netlink. 1029 * 1030 * Called under mrt_lock. 1031 */ 1032 1033 static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt, 1034 mifi_t mifi, int assert) 1035 { 1036 struct sock *mroute6_sk; 1037 struct sk_buff *skb; 1038 struct mrt6msg *msg; 1039 int ret; 1040 1041 #ifdef CONFIG_IPV6_PIMSM_V2 1042 if (assert == MRT6MSG_WHOLEPKT) 1043 skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt) 1044 +sizeof(*msg)); 1045 else 1046 #endif 1047 skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC); 1048 1049 if (!skb) 1050 return -ENOBUFS; 1051 1052 /* I suppose that internal messages 1053 * do not require checksums */ 1054 1055 skb->ip_summed = CHECKSUM_UNNECESSARY; 1056 1057 #ifdef CONFIG_IPV6_PIMSM_V2 1058 if (assert == MRT6MSG_WHOLEPKT) { 1059 /* Ugly, but we have no choice with this interface. 1060 Duplicate old header, fix length etc. 1061 And all this only to mangle msg->im6_msgtype and 1062 to set msg->im6_mbz to "mbz" :-) 1063 */ 1064 skb_push(skb, -skb_network_offset(pkt)); 1065 1066 skb_push(skb, sizeof(*msg)); 1067 skb_reset_transport_header(skb); 1068 msg = (struct mrt6msg *)skb_transport_header(skb); 1069 msg->im6_mbz = 0; 1070 msg->im6_msgtype = MRT6MSG_WHOLEPKT; 1071 msg->im6_mif = mrt->mroute_reg_vif_num; 1072 msg->im6_pad = 0; 1073 msg->im6_src = ipv6_hdr(pkt)->saddr; 1074 msg->im6_dst = ipv6_hdr(pkt)->daddr; 1075 1076 skb->ip_summed = CHECKSUM_UNNECESSARY; 1077 } else 1078 #endif 1079 { 1080 /* 1081 * Copy the IP header 1082 */ 1083 1084 skb_put(skb, sizeof(struct ipv6hdr)); 1085 skb_reset_network_header(skb); 1086 skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr)); 1087 1088 /* 1089 * Add our header 1090 */ 1091 skb_put(skb, sizeof(*msg)); 1092 skb_reset_transport_header(skb); 1093 msg = (struct mrt6msg *)skb_transport_header(skb); 1094 1095 msg->im6_mbz = 0; 1096 msg->im6_msgtype = assert; 1097 msg->im6_mif = mifi; 1098 msg->im6_pad = 0; 1099 msg->im6_src = ipv6_hdr(pkt)->saddr; 1100 msg->im6_dst = ipv6_hdr(pkt)->daddr; 1101 1102 skb_dst_set(skb, dst_clone(skb_dst(pkt))); 1103 skb->ip_summed = CHECKSUM_UNNECESSARY; 1104 } 1105 1106 rcu_read_lock(); 1107 mroute6_sk = rcu_dereference(mrt->mroute_sk); 1108 if (!mroute6_sk) { 1109 rcu_read_unlock(); 1110 kfree_skb(skb); 1111 return -EINVAL; 1112 } 1113 1114 mrt6msg_netlink_event(mrt, skb); 1115 1116 /* Deliver to user space multicast routing algorithms */ 1117 ret = sock_queue_rcv_skb(mroute6_sk, skb); 1118 rcu_read_unlock(); 1119 if (ret < 0) { 1120 net_warn_ratelimited("mroute6: pending queue full, dropping entries\n"); 1121 kfree_skb(skb); 1122 } 1123 1124 return ret; 1125 } 1126 1127 /* Queue a packet for resolution. It gets locked cache entry! */ 1128 static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi, 1129 struct sk_buff *skb, struct net_device *dev) 1130 { 1131 struct mfc6_cache *c; 1132 bool found = false; 1133 int err; 1134 1135 spin_lock_bh(&mfc_unres_lock); 1136 list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) { 1137 if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) && 1138 ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) { 1139 found = true; 1140 break; 1141 } 1142 } 1143 1144 if (!found) { 1145 /* 1146 * Create a new entry if allowable 1147 */ 1148 1149 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 || 1150 (c = ip6mr_cache_alloc_unres()) == NULL) { 1151 spin_unlock_bh(&mfc_unres_lock); 1152 1153 kfree_skb(skb); 1154 return -ENOBUFS; 1155 } 1156 1157 /* Fill in the new cache entry */ 1158 c->_c.mfc_parent = -1; 1159 c->mf6c_origin = ipv6_hdr(skb)->saddr; 1160 c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr; 1161 1162 /* 1163 * Reflect first query at pim6sd 1164 */ 1165 err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE); 1166 if (err < 0) { 1167 /* If the report failed throw the cache entry 1168 out - Brad Parker 1169 */ 1170 spin_unlock_bh(&mfc_unres_lock); 1171 1172 ip6mr_cache_free(c); 1173 kfree_skb(skb); 1174 return err; 1175 } 1176 1177 atomic_inc(&mrt->cache_resolve_queue_len); 1178 list_add(&c->_c.list, &mrt->mfc_unres_queue); 1179 mr6_netlink_event(mrt, c, RTM_NEWROUTE); 1180 1181 ipmr_do_expire_process(mrt); 1182 } 1183 1184 /* See if we can append the packet */ 1185 if (c->_c.mfc_un.unres.unresolved.qlen > 3) { 1186 kfree_skb(skb); 1187 err = -ENOBUFS; 1188 } else { 1189 if (dev) { 1190 skb->dev = dev; 1191 skb->skb_iif = dev->ifindex; 1192 } 1193 skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb); 1194 err = 0; 1195 } 1196 1197 spin_unlock_bh(&mfc_unres_lock); 1198 return err; 1199 } 1200 1201 /* 1202 * MFC6 cache manipulation by user space 1203 */ 1204 1205 static int ip6mr_mfc_delete(struct mr_table *mrt, struct mf6cctl *mfc, 1206 int parent) 1207 { 1208 struct mfc6_cache *c; 1209 1210 /* The entries are added/deleted only under RTNL */ 1211 rcu_read_lock(); 1212 c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr, 1213 &mfc->mf6cc_mcastgrp.sin6_addr, parent); 1214 rcu_read_unlock(); 1215 if (!c) 1216 return -ENOENT; 1217 rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ip6mr_rht_params); 1218 list_del_rcu(&c->_c.list); 1219 1220 call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net), 1221 FIB_EVENT_ENTRY_DEL, c, mrt->id); 1222 mr6_netlink_event(mrt, c, RTM_DELROUTE); 1223 mr_cache_put(&c->_c); 1224 return 0; 1225 } 1226 1227 static int ip6mr_device_event(struct notifier_block *this, 1228 unsigned long event, void *ptr) 1229 { 1230 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 1231 struct net *net = dev_net(dev); 1232 struct mr_table *mrt; 1233 struct vif_device *v; 1234 int ct; 1235 1236 if (event != NETDEV_UNREGISTER) 1237 return NOTIFY_DONE; 1238 1239 ip6mr_for_each_table(mrt, net) { 1240 v = &mrt->vif_table[0]; 1241 for (ct = 0; ct < mrt->maxvif; ct++, v++) { 1242 if (v->dev == dev) 1243 mif6_delete(mrt, ct, 1, NULL); 1244 } 1245 } 1246 1247 return NOTIFY_DONE; 1248 } 1249 1250 static unsigned int ip6mr_seq_read(struct net *net) 1251 { 1252 ASSERT_RTNL(); 1253 1254 return net->ipv6.ipmr_seq + ip6mr_rules_seq_read(net); 1255 } 1256 1257 static int ip6mr_dump(struct net *net, struct notifier_block *nb) 1258 { 1259 return mr_dump(net, nb, RTNL_FAMILY_IP6MR, ip6mr_rules_dump, 1260 ip6mr_mr_table_iter, &mrt_lock); 1261 } 1262 1263 static struct notifier_block ip6_mr_notifier = { 1264 .notifier_call = ip6mr_device_event 1265 }; 1266 1267 static const struct fib_notifier_ops ip6mr_notifier_ops_template = { 1268 .family = RTNL_FAMILY_IP6MR, 1269 .fib_seq_read = ip6mr_seq_read, 1270 .fib_dump = ip6mr_dump, 1271 .owner = THIS_MODULE, 1272 }; 1273 1274 static int __net_init ip6mr_notifier_init(struct net *net) 1275 { 1276 struct fib_notifier_ops *ops; 1277 1278 net->ipv6.ipmr_seq = 0; 1279 1280 ops = fib_notifier_ops_register(&ip6mr_notifier_ops_template, net); 1281 if (IS_ERR(ops)) 1282 return PTR_ERR(ops); 1283 1284 net->ipv6.ip6mr_notifier_ops = ops; 1285 1286 return 0; 1287 } 1288 1289 static void __net_exit ip6mr_notifier_exit(struct net *net) 1290 { 1291 fib_notifier_ops_unregister(net->ipv6.ip6mr_notifier_ops); 1292 net->ipv6.ip6mr_notifier_ops = NULL; 1293 } 1294 1295 /* Setup for IP multicast routing */ 1296 static int __net_init ip6mr_net_init(struct net *net) 1297 { 1298 int err; 1299 1300 err = ip6mr_notifier_init(net); 1301 if (err) 1302 return err; 1303 1304 err = ip6mr_rules_init(net); 1305 if (err < 0) 1306 goto ip6mr_rules_fail; 1307 1308 #ifdef CONFIG_PROC_FS 1309 err = -ENOMEM; 1310 if (!proc_create_net("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_seq_ops, 1311 sizeof(struct mr_vif_iter))) 1312 goto proc_vif_fail; 1313 if (!proc_create_net("ip6_mr_cache", 0, net->proc_net, &ipmr_mfc_seq_ops, 1314 sizeof(struct mr_mfc_iter))) 1315 goto proc_cache_fail; 1316 #endif 1317 1318 return 0; 1319 1320 #ifdef CONFIG_PROC_FS 1321 proc_cache_fail: 1322 remove_proc_entry("ip6_mr_vif", net->proc_net); 1323 proc_vif_fail: 1324 ip6mr_rules_exit(net); 1325 #endif 1326 ip6mr_rules_fail: 1327 ip6mr_notifier_exit(net); 1328 return err; 1329 } 1330 1331 static void __net_exit ip6mr_net_exit(struct net *net) 1332 { 1333 #ifdef CONFIG_PROC_FS 1334 remove_proc_entry("ip6_mr_cache", net->proc_net); 1335 remove_proc_entry("ip6_mr_vif", net->proc_net); 1336 #endif 1337 ip6mr_rules_exit(net); 1338 ip6mr_notifier_exit(net); 1339 } 1340 1341 static struct pernet_operations ip6mr_net_ops = { 1342 .init = ip6mr_net_init, 1343 .exit = ip6mr_net_exit, 1344 }; 1345 1346 int __init ip6_mr_init(void) 1347 { 1348 int err; 1349 1350 mrt_cachep = kmem_cache_create("ip6_mrt_cache", 1351 sizeof(struct mfc6_cache), 1352 0, SLAB_HWCACHE_ALIGN, 1353 NULL); 1354 if (!mrt_cachep) 1355 return -ENOMEM; 1356 1357 err = register_pernet_subsys(&ip6mr_net_ops); 1358 if (err) 1359 goto reg_pernet_fail; 1360 1361 err = register_netdevice_notifier(&ip6_mr_notifier); 1362 if (err) 1363 goto reg_notif_fail; 1364 #ifdef CONFIG_IPV6_PIMSM_V2 1365 if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) { 1366 pr_err("%s: can't add PIM protocol\n", __func__); 1367 err = -EAGAIN; 1368 goto add_proto_fail; 1369 } 1370 #endif 1371 err = rtnl_register_module(THIS_MODULE, RTNL_FAMILY_IP6MR, RTM_GETROUTE, 1372 NULL, ip6mr_rtm_dumproute, 0); 1373 if (err == 0) 1374 return 0; 1375 1376 #ifdef CONFIG_IPV6_PIMSM_V2 1377 inet6_del_protocol(&pim6_protocol, IPPROTO_PIM); 1378 add_proto_fail: 1379 unregister_netdevice_notifier(&ip6_mr_notifier); 1380 #endif 1381 reg_notif_fail: 1382 unregister_pernet_subsys(&ip6mr_net_ops); 1383 reg_pernet_fail: 1384 kmem_cache_destroy(mrt_cachep); 1385 return err; 1386 } 1387 1388 void ip6_mr_cleanup(void) 1389 { 1390 rtnl_unregister(RTNL_FAMILY_IP6MR, RTM_GETROUTE); 1391 #ifdef CONFIG_IPV6_PIMSM_V2 1392 inet6_del_protocol(&pim6_protocol, IPPROTO_PIM); 1393 #endif 1394 unregister_netdevice_notifier(&ip6_mr_notifier); 1395 unregister_pernet_subsys(&ip6mr_net_ops); 1396 kmem_cache_destroy(mrt_cachep); 1397 } 1398 1399 static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt, 1400 struct mf6cctl *mfc, int mrtsock, int parent) 1401 { 1402 unsigned char ttls[MAXMIFS]; 1403 struct mfc6_cache *uc, *c; 1404 struct mr_mfc *_uc; 1405 bool found; 1406 int i, err; 1407 1408 if (mfc->mf6cc_parent >= MAXMIFS) 1409 return -ENFILE; 1410 1411 memset(ttls, 255, MAXMIFS); 1412 for (i = 0; i < MAXMIFS; i++) { 1413 if (IF_ISSET(i, &mfc->mf6cc_ifset)) 1414 ttls[i] = 1; 1415 } 1416 1417 /* The entries are added/deleted only under RTNL */ 1418 rcu_read_lock(); 1419 c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr, 1420 &mfc->mf6cc_mcastgrp.sin6_addr, parent); 1421 rcu_read_unlock(); 1422 if (c) { 1423 write_lock_bh(&mrt_lock); 1424 c->_c.mfc_parent = mfc->mf6cc_parent; 1425 ip6mr_update_thresholds(mrt, &c->_c, ttls); 1426 if (!mrtsock) 1427 c->_c.mfc_flags |= MFC_STATIC; 1428 write_unlock_bh(&mrt_lock); 1429 call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, 1430 c, mrt->id); 1431 mr6_netlink_event(mrt, c, RTM_NEWROUTE); 1432 return 0; 1433 } 1434 1435 if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) && 1436 !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr)) 1437 return -EINVAL; 1438 1439 c = ip6mr_cache_alloc(); 1440 if (!c) 1441 return -ENOMEM; 1442 1443 c->mf6c_origin = mfc->mf6cc_origin.sin6_addr; 1444 c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr; 1445 c->_c.mfc_parent = mfc->mf6cc_parent; 1446 ip6mr_update_thresholds(mrt, &c->_c, ttls); 1447 if (!mrtsock) 1448 c->_c.mfc_flags |= MFC_STATIC; 1449 1450 err = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode, 1451 ip6mr_rht_params); 1452 if (err) { 1453 pr_err("ip6mr: rhtable insert error %d\n", err); 1454 ip6mr_cache_free(c); 1455 return err; 1456 } 1457 list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list); 1458 1459 /* Check to see if we resolved a queued list. If so we 1460 * need to send on the frames and tidy up. 1461 */ 1462 found = false; 1463 spin_lock_bh(&mfc_unres_lock); 1464 list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) { 1465 uc = (struct mfc6_cache *)_uc; 1466 if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) && 1467 ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) { 1468 list_del(&_uc->list); 1469 atomic_dec(&mrt->cache_resolve_queue_len); 1470 found = true; 1471 break; 1472 } 1473 } 1474 if (list_empty(&mrt->mfc_unres_queue)) 1475 del_timer(&mrt->ipmr_expire_timer); 1476 spin_unlock_bh(&mfc_unres_lock); 1477 1478 if (found) { 1479 ip6mr_cache_resolve(net, mrt, uc, c); 1480 ip6mr_cache_free(uc); 1481 } 1482 call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD, 1483 c, mrt->id); 1484 mr6_netlink_event(mrt, c, RTM_NEWROUTE); 1485 return 0; 1486 } 1487 1488 /* 1489 * Close the multicast socket, and clear the vif tables etc 1490 */ 1491 1492 static void mroute_clean_tables(struct mr_table *mrt, bool all) 1493 { 1494 struct mr_mfc *c, *tmp; 1495 LIST_HEAD(list); 1496 int i; 1497 1498 /* Shut down all active vif entries */ 1499 for (i = 0; i < mrt->maxvif; i++) { 1500 if (!all && (mrt->vif_table[i].flags & VIFF_STATIC)) 1501 continue; 1502 mif6_delete(mrt, i, 0, &list); 1503 } 1504 unregister_netdevice_many(&list); 1505 1506 /* Wipe the cache */ 1507 list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) { 1508 if (!all && (c->mfc_flags & MFC_STATIC)) 1509 continue; 1510 rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params); 1511 list_del_rcu(&c->list); 1512 mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE); 1513 mr_cache_put(c); 1514 } 1515 1516 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) { 1517 spin_lock_bh(&mfc_unres_lock); 1518 list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) { 1519 list_del(&c->list); 1520 call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net), 1521 FIB_EVENT_ENTRY_DEL, 1522 (struct mfc6_cache *)c, 1523 mrt->id); 1524 mr6_netlink_event(mrt, (struct mfc6_cache *)c, 1525 RTM_DELROUTE); 1526 ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c); 1527 } 1528 spin_unlock_bh(&mfc_unres_lock); 1529 } 1530 } 1531 1532 static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk) 1533 { 1534 int err = 0; 1535 struct net *net = sock_net(sk); 1536 1537 rtnl_lock(); 1538 write_lock_bh(&mrt_lock); 1539 if (rtnl_dereference(mrt->mroute_sk)) { 1540 err = -EADDRINUSE; 1541 } else { 1542 rcu_assign_pointer(mrt->mroute_sk, sk); 1543 sock_set_flag(sk, SOCK_RCU_FREE); 1544 net->ipv6.devconf_all->mc_forwarding++; 1545 } 1546 write_unlock_bh(&mrt_lock); 1547 1548 if (!err) 1549 inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, 1550 NETCONFA_MC_FORWARDING, 1551 NETCONFA_IFINDEX_ALL, 1552 net->ipv6.devconf_all); 1553 rtnl_unlock(); 1554 1555 return err; 1556 } 1557 1558 int ip6mr_sk_done(struct sock *sk) 1559 { 1560 int err = -EACCES; 1561 struct net *net = sock_net(sk); 1562 struct mr_table *mrt; 1563 1564 if (sk->sk_type != SOCK_RAW || 1565 inet_sk(sk)->inet_num != IPPROTO_ICMPV6) 1566 return err; 1567 1568 rtnl_lock(); 1569 ip6mr_for_each_table(mrt, net) { 1570 if (sk == rtnl_dereference(mrt->mroute_sk)) { 1571 write_lock_bh(&mrt_lock); 1572 RCU_INIT_POINTER(mrt->mroute_sk, NULL); 1573 /* Note that mroute_sk had SOCK_RCU_FREE set, 1574 * so the RCU grace period before sk freeing 1575 * is guaranteed by sk_destruct() 1576 */ 1577 net->ipv6.devconf_all->mc_forwarding--; 1578 write_unlock_bh(&mrt_lock); 1579 inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, 1580 NETCONFA_MC_FORWARDING, 1581 NETCONFA_IFINDEX_ALL, 1582 net->ipv6.devconf_all); 1583 1584 mroute_clean_tables(mrt, false); 1585 err = 0; 1586 break; 1587 } 1588 } 1589 rtnl_unlock(); 1590 1591 return err; 1592 } 1593 1594 bool mroute6_is_socket(struct net *net, struct sk_buff *skb) 1595 { 1596 struct mr_table *mrt; 1597 struct flowi6 fl6 = { 1598 .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX, 1599 .flowi6_oif = skb->dev->ifindex, 1600 .flowi6_mark = skb->mark, 1601 }; 1602 1603 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0) 1604 return NULL; 1605 1606 return rcu_access_pointer(mrt->mroute_sk); 1607 } 1608 EXPORT_SYMBOL(mroute6_is_socket); 1609 1610 /* 1611 * Socket options and virtual interface manipulation. The whole 1612 * virtual interface system is a complete heap, but unfortunately 1613 * that's how BSD mrouted happens to think. Maybe one day with a proper 1614 * MOSPF/PIM router set up we can clean this up. 1615 */ 1616 1617 int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen) 1618 { 1619 int ret, parent = 0; 1620 struct mif6ctl vif; 1621 struct mf6cctl mfc; 1622 mifi_t mifi; 1623 struct net *net = sock_net(sk); 1624 struct mr_table *mrt; 1625 1626 if (sk->sk_type != SOCK_RAW || 1627 inet_sk(sk)->inet_num != IPPROTO_ICMPV6) 1628 return -EOPNOTSUPP; 1629 1630 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); 1631 if (!mrt) 1632 return -ENOENT; 1633 1634 if (optname != MRT6_INIT) { 1635 if (sk != rcu_access_pointer(mrt->mroute_sk) && 1636 !ns_capable(net->user_ns, CAP_NET_ADMIN)) 1637 return -EACCES; 1638 } 1639 1640 switch (optname) { 1641 case MRT6_INIT: 1642 if (optlen < sizeof(int)) 1643 return -EINVAL; 1644 1645 return ip6mr_sk_init(mrt, sk); 1646 1647 case MRT6_DONE: 1648 return ip6mr_sk_done(sk); 1649 1650 case MRT6_ADD_MIF: 1651 if (optlen < sizeof(vif)) 1652 return -EINVAL; 1653 if (copy_from_user(&vif, optval, sizeof(vif))) 1654 return -EFAULT; 1655 if (vif.mif6c_mifi >= MAXMIFS) 1656 return -ENFILE; 1657 rtnl_lock(); 1658 ret = mif6_add(net, mrt, &vif, 1659 sk == rtnl_dereference(mrt->mroute_sk)); 1660 rtnl_unlock(); 1661 return ret; 1662 1663 case MRT6_DEL_MIF: 1664 if (optlen < sizeof(mifi_t)) 1665 return -EINVAL; 1666 if (copy_from_user(&mifi, optval, sizeof(mifi_t))) 1667 return -EFAULT; 1668 rtnl_lock(); 1669 ret = mif6_delete(mrt, mifi, 0, NULL); 1670 rtnl_unlock(); 1671 return ret; 1672 1673 /* 1674 * Manipulate the forwarding caches. These live 1675 * in a sort of kernel/user symbiosis. 1676 */ 1677 case MRT6_ADD_MFC: 1678 case MRT6_DEL_MFC: 1679 parent = -1; 1680 /* fall through */ 1681 case MRT6_ADD_MFC_PROXY: 1682 case MRT6_DEL_MFC_PROXY: 1683 if (optlen < sizeof(mfc)) 1684 return -EINVAL; 1685 if (copy_from_user(&mfc, optval, sizeof(mfc))) 1686 return -EFAULT; 1687 if (parent == 0) 1688 parent = mfc.mf6cc_parent; 1689 rtnl_lock(); 1690 if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY) 1691 ret = ip6mr_mfc_delete(mrt, &mfc, parent); 1692 else 1693 ret = ip6mr_mfc_add(net, mrt, &mfc, 1694 sk == 1695 rtnl_dereference(mrt->mroute_sk), 1696 parent); 1697 rtnl_unlock(); 1698 return ret; 1699 1700 /* 1701 * Control PIM assert (to activate pim will activate assert) 1702 */ 1703 case MRT6_ASSERT: 1704 { 1705 int v; 1706 1707 if (optlen != sizeof(v)) 1708 return -EINVAL; 1709 if (get_user(v, (int __user *)optval)) 1710 return -EFAULT; 1711 mrt->mroute_do_assert = v; 1712 return 0; 1713 } 1714 1715 #ifdef CONFIG_IPV6_PIMSM_V2 1716 case MRT6_PIM: 1717 { 1718 int v; 1719 1720 if (optlen != sizeof(v)) 1721 return -EINVAL; 1722 if (get_user(v, (int __user *)optval)) 1723 return -EFAULT; 1724 v = !!v; 1725 rtnl_lock(); 1726 ret = 0; 1727 if (v != mrt->mroute_do_pim) { 1728 mrt->mroute_do_pim = v; 1729 mrt->mroute_do_assert = v; 1730 } 1731 rtnl_unlock(); 1732 return ret; 1733 } 1734 1735 #endif 1736 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES 1737 case MRT6_TABLE: 1738 { 1739 u32 v; 1740 1741 if (optlen != sizeof(u32)) 1742 return -EINVAL; 1743 if (get_user(v, (u32 __user *)optval)) 1744 return -EFAULT; 1745 /* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */ 1746 if (v != RT_TABLE_DEFAULT && v >= 100000000) 1747 return -EINVAL; 1748 if (sk == rcu_access_pointer(mrt->mroute_sk)) 1749 return -EBUSY; 1750 1751 rtnl_lock(); 1752 ret = 0; 1753 mrt = ip6mr_new_table(net, v); 1754 if (IS_ERR(mrt)) 1755 ret = PTR_ERR(mrt); 1756 else 1757 raw6_sk(sk)->ip6mr_table = v; 1758 rtnl_unlock(); 1759 return ret; 1760 } 1761 #endif 1762 /* 1763 * Spurious command, or MRT6_VERSION which you cannot 1764 * set. 1765 */ 1766 default: 1767 return -ENOPROTOOPT; 1768 } 1769 } 1770 1771 /* 1772 * Getsock opt support for the multicast routing system. 1773 */ 1774 1775 int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, 1776 int __user *optlen) 1777 { 1778 int olr; 1779 int val; 1780 struct net *net = sock_net(sk); 1781 struct mr_table *mrt; 1782 1783 if (sk->sk_type != SOCK_RAW || 1784 inet_sk(sk)->inet_num != IPPROTO_ICMPV6) 1785 return -EOPNOTSUPP; 1786 1787 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); 1788 if (!mrt) 1789 return -ENOENT; 1790 1791 switch (optname) { 1792 case MRT6_VERSION: 1793 val = 0x0305; 1794 break; 1795 #ifdef CONFIG_IPV6_PIMSM_V2 1796 case MRT6_PIM: 1797 val = mrt->mroute_do_pim; 1798 break; 1799 #endif 1800 case MRT6_ASSERT: 1801 val = mrt->mroute_do_assert; 1802 break; 1803 default: 1804 return -ENOPROTOOPT; 1805 } 1806 1807 if (get_user(olr, optlen)) 1808 return -EFAULT; 1809 1810 olr = min_t(int, olr, sizeof(int)); 1811 if (olr < 0) 1812 return -EINVAL; 1813 1814 if (put_user(olr, optlen)) 1815 return -EFAULT; 1816 if (copy_to_user(optval, &val, olr)) 1817 return -EFAULT; 1818 return 0; 1819 } 1820 1821 /* 1822 * The IP multicast ioctl support routines. 1823 */ 1824 1825 int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg) 1826 { 1827 struct sioc_sg_req6 sr; 1828 struct sioc_mif_req6 vr; 1829 struct vif_device *vif; 1830 struct mfc6_cache *c; 1831 struct net *net = sock_net(sk); 1832 struct mr_table *mrt; 1833 1834 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); 1835 if (!mrt) 1836 return -ENOENT; 1837 1838 switch (cmd) { 1839 case SIOCGETMIFCNT_IN6: 1840 if (copy_from_user(&vr, arg, sizeof(vr))) 1841 return -EFAULT; 1842 if (vr.mifi >= mrt->maxvif) 1843 return -EINVAL; 1844 read_lock(&mrt_lock); 1845 vif = &mrt->vif_table[vr.mifi]; 1846 if (VIF_EXISTS(mrt, vr.mifi)) { 1847 vr.icount = vif->pkt_in; 1848 vr.ocount = vif->pkt_out; 1849 vr.ibytes = vif->bytes_in; 1850 vr.obytes = vif->bytes_out; 1851 read_unlock(&mrt_lock); 1852 1853 if (copy_to_user(arg, &vr, sizeof(vr))) 1854 return -EFAULT; 1855 return 0; 1856 } 1857 read_unlock(&mrt_lock); 1858 return -EADDRNOTAVAIL; 1859 case SIOCGETSGCNT_IN6: 1860 if (copy_from_user(&sr, arg, sizeof(sr))) 1861 return -EFAULT; 1862 1863 rcu_read_lock(); 1864 c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr); 1865 if (c) { 1866 sr.pktcnt = c->_c.mfc_un.res.pkt; 1867 sr.bytecnt = c->_c.mfc_un.res.bytes; 1868 sr.wrong_if = c->_c.mfc_un.res.wrong_if; 1869 rcu_read_unlock(); 1870 1871 if (copy_to_user(arg, &sr, sizeof(sr))) 1872 return -EFAULT; 1873 return 0; 1874 } 1875 rcu_read_unlock(); 1876 return -EADDRNOTAVAIL; 1877 default: 1878 return -ENOIOCTLCMD; 1879 } 1880 } 1881 1882 #ifdef CONFIG_COMPAT 1883 struct compat_sioc_sg_req6 { 1884 struct sockaddr_in6 src; 1885 struct sockaddr_in6 grp; 1886 compat_ulong_t pktcnt; 1887 compat_ulong_t bytecnt; 1888 compat_ulong_t wrong_if; 1889 }; 1890 1891 struct compat_sioc_mif_req6 { 1892 mifi_t mifi; 1893 compat_ulong_t icount; 1894 compat_ulong_t ocount; 1895 compat_ulong_t ibytes; 1896 compat_ulong_t obytes; 1897 }; 1898 1899 int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) 1900 { 1901 struct compat_sioc_sg_req6 sr; 1902 struct compat_sioc_mif_req6 vr; 1903 struct vif_device *vif; 1904 struct mfc6_cache *c; 1905 struct net *net = sock_net(sk); 1906 struct mr_table *mrt; 1907 1908 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); 1909 if (!mrt) 1910 return -ENOENT; 1911 1912 switch (cmd) { 1913 case SIOCGETMIFCNT_IN6: 1914 if (copy_from_user(&vr, arg, sizeof(vr))) 1915 return -EFAULT; 1916 if (vr.mifi >= mrt->maxvif) 1917 return -EINVAL; 1918 read_lock(&mrt_lock); 1919 vif = &mrt->vif_table[vr.mifi]; 1920 if (VIF_EXISTS(mrt, vr.mifi)) { 1921 vr.icount = vif->pkt_in; 1922 vr.ocount = vif->pkt_out; 1923 vr.ibytes = vif->bytes_in; 1924 vr.obytes = vif->bytes_out; 1925 read_unlock(&mrt_lock); 1926 1927 if (copy_to_user(arg, &vr, sizeof(vr))) 1928 return -EFAULT; 1929 return 0; 1930 } 1931 read_unlock(&mrt_lock); 1932 return -EADDRNOTAVAIL; 1933 case SIOCGETSGCNT_IN6: 1934 if (copy_from_user(&sr, arg, sizeof(sr))) 1935 return -EFAULT; 1936 1937 rcu_read_lock(); 1938 c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr); 1939 if (c) { 1940 sr.pktcnt = c->_c.mfc_un.res.pkt; 1941 sr.bytecnt = c->_c.mfc_un.res.bytes; 1942 sr.wrong_if = c->_c.mfc_un.res.wrong_if; 1943 rcu_read_unlock(); 1944 1945 if (copy_to_user(arg, &sr, sizeof(sr))) 1946 return -EFAULT; 1947 return 0; 1948 } 1949 rcu_read_unlock(); 1950 return -EADDRNOTAVAIL; 1951 default: 1952 return -ENOIOCTLCMD; 1953 } 1954 } 1955 #endif 1956 1957 static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb) 1958 { 1959 __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 1960 IPSTATS_MIB_OUTFORWDATAGRAMS); 1961 __IP6_ADD_STATS(net, ip6_dst_idev(skb_dst(skb)), 1962 IPSTATS_MIB_OUTOCTETS, skb->len); 1963 return dst_output(net, sk, skb); 1964 } 1965 1966 /* 1967 * Processing handlers for ip6mr_forward 1968 */ 1969 1970 static int ip6mr_forward2(struct net *net, struct mr_table *mrt, 1971 struct sk_buff *skb, struct mfc6_cache *c, int vifi) 1972 { 1973 struct ipv6hdr *ipv6h; 1974 struct vif_device *vif = &mrt->vif_table[vifi]; 1975 struct net_device *dev; 1976 struct dst_entry *dst; 1977 struct flowi6 fl6; 1978 1979 if (!vif->dev) 1980 goto out_free; 1981 1982 #ifdef CONFIG_IPV6_PIMSM_V2 1983 if (vif->flags & MIFF_REGISTER) { 1984 vif->pkt_out++; 1985 vif->bytes_out += skb->len; 1986 vif->dev->stats.tx_bytes += skb->len; 1987 vif->dev->stats.tx_packets++; 1988 ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT); 1989 goto out_free; 1990 } 1991 #endif 1992 1993 ipv6h = ipv6_hdr(skb); 1994 1995 fl6 = (struct flowi6) { 1996 .flowi6_oif = vif->link, 1997 .daddr = ipv6h->daddr, 1998 }; 1999 2000 dst = ip6_route_output(net, NULL, &fl6); 2001 if (dst->error) { 2002 dst_release(dst); 2003 goto out_free; 2004 } 2005 2006 skb_dst_drop(skb); 2007 skb_dst_set(skb, dst); 2008 2009 /* 2010 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally 2011 * not only before forwarding, but after forwarding on all output 2012 * interfaces. It is clear, if mrouter runs a multicasting 2013 * program, it should receive packets not depending to what interface 2014 * program is joined. 2015 * If we will not make it, the program will have to join on all 2016 * interfaces. On the other hand, multihoming host (or router, but 2017 * not mrouter) cannot join to more than one interface - it will 2018 * result in receiving multiple packets. 2019 */ 2020 dev = vif->dev; 2021 skb->dev = dev; 2022 vif->pkt_out++; 2023 vif->bytes_out += skb->len; 2024 2025 /* We are about to write */ 2026 /* XXX: extension headers? */ 2027 if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev))) 2028 goto out_free; 2029 2030 ipv6h = ipv6_hdr(skb); 2031 ipv6h->hop_limit--; 2032 2033 IP6CB(skb)->flags |= IP6SKB_FORWARDED; 2034 2035 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, 2036 net, NULL, skb, skb->dev, dev, 2037 ip6mr_forward2_finish); 2038 2039 out_free: 2040 kfree_skb(skb); 2041 return 0; 2042 } 2043 2044 static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev) 2045 { 2046 int ct; 2047 2048 for (ct = mrt->maxvif - 1; ct >= 0; ct--) { 2049 if (mrt->vif_table[ct].dev == dev) 2050 break; 2051 } 2052 return ct; 2053 } 2054 2055 static void ip6_mr_forward(struct net *net, struct mr_table *mrt, 2056 struct net_device *dev, struct sk_buff *skb, 2057 struct mfc6_cache *c) 2058 { 2059 int psend = -1; 2060 int vif, ct; 2061 int true_vifi = ip6mr_find_vif(mrt, dev); 2062 2063 vif = c->_c.mfc_parent; 2064 c->_c.mfc_un.res.pkt++; 2065 c->_c.mfc_un.res.bytes += skb->len; 2066 c->_c.mfc_un.res.lastuse = jiffies; 2067 2068 if (ipv6_addr_any(&c->mf6c_origin) && true_vifi >= 0) { 2069 struct mfc6_cache *cache_proxy; 2070 2071 /* For an (*,G) entry, we only check that the incoming 2072 * interface is part of the static tree. 2073 */ 2074 rcu_read_lock(); 2075 cache_proxy = mr_mfc_find_any_parent(mrt, vif); 2076 if (cache_proxy && 2077 cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255) { 2078 rcu_read_unlock(); 2079 goto forward; 2080 } 2081 rcu_read_unlock(); 2082 } 2083 2084 /* 2085 * Wrong interface: drop packet and (maybe) send PIM assert. 2086 */ 2087 if (mrt->vif_table[vif].dev != dev) { 2088 c->_c.mfc_un.res.wrong_if++; 2089 2090 if (true_vifi >= 0 && mrt->mroute_do_assert && 2091 /* pimsm uses asserts, when switching from RPT to SPT, 2092 so that we cannot check that packet arrived on an oif. 2093 It is bad, but otherwise we would need to move pretty 2094 large chunk of pimd to kernel. Ough... --ANK 2095 */ 2096 (mrt->mroute_do_pim || 2097 c->_c.mfc_un.res.ttls[true_vifi] < 255) && 2098 time_after(jiffies, 2099 c->_c.mfc_un.res.last_assert + 2100 MFC_ASSERT_THRESH)) { 2101 c->_c.mfc_un.res.last_assert = jiffies; 2102 ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF); 2103 } 2104 goto dont_forward; 2105 } 2106 2107 forward: 2108 mrt->vif_table[vif].pkt_in++; 2109 mrt->vif_table[vif].bytes_in += skb->len; 2110 2111 /* 2112 * Forward the frame 2113 */ 2114 if (ipv6_addr_any(&c->mf6c_origin) && 2115 ipv6_addr_any(&c->mf6c_mcastgrp)) { 2116 if (true_vifi >= 0 && 2117 true_vifi != c->_c.mfc_parent && 2118 ipv6_hdr(skb)->hop_limit > 2119 c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) { 2120 /* It's an (*,*) entry and the packet is not coming from 2121 * the upstream: forward the packet to the upstream 2122 * only. 2123 */ 2124 psend = c->_c.mfc_parent; 2125 goto last_forward; 2126 } 2127 goto dont_forward; 2128 } 2129 for (ct = c->_c.mfc_un.res.maxvif - 1; 2130 ct >= c->_c.mfc_un.res.minvif; ct--) { 2131 /* For (*,G) entry, don't forward to the incoming interface */ 2132 if ((!ipv6_addr_any(&c->mf6c_origin) || ct != true_vifi) && 2133 ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) { 2134 if (psend != -1) { 2135 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 2136 if (skb2) 2137 ip6mr_forward2(net, mrt, skb2, 2138 c, psend); 2139 } 2140 psend = ct; 2141 } 2142 } 2143 last_forward: 2144 if (psend != -1) { 2145 ip6mr_forward2(net, mrt, skb, c, psend); 2146 return; 2147 } 2148 2149 dont_forward: 2150 kfree_skb(skb); 2151 } 2152 2153 2154 /* 2155 * Multicast packets for forwarding arrive here 2156 */ 2157 2158 int ip6_mr_input(struct sk_buff *skb) 2159 { 2160 struct mfc6_cache *cache; 2161 struct net *net = dev_net(skb->dev); 2162 struct mr_table *mrt; 2163 struct flowi6 fl6 = { 2164 .flowi6_iif = skb->dev->ifindex, 2165 .flowi6_mark = skb->mark, 2166 }; 2167 int err; 2168 struct net_device *dev; 2169 2170 /* skb->dev passed in is the master dev for vrfs. 2171 * Get the proper interface that does have a vif associated with it. 2172 */ 2173 dev = skb->dev; 2174 if (netif_is_l3_master(skb->dev)) { 2175 dev = dev_get_by_index_rcu(net, IPCB(skb)->iif); 2176 if (!dev) { 2177 kfree_skb(skb); 2178 return -ENODEV; 2179 } 2180 } 2181 2182 err = ip6mr_fib_lookup(net, &fl6, &mrt); 2183 if (err < 0) { 2184 kfree_skb(skb); 2185 return err; 2186 } 2187 2188 read_lock(&mrt_lock); 2189 cache = ip6mr_cache_find(mrt, 2190 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr); 2191 if (!cache) { 2192 int vif = ip6mr_find_vif(mrt, dev); 2193 2194 if (vif >= 0) 2195 cache = ip6mr_cache_find_any(mrt, 2196 &ipv6_hdr(skb)->daddr, 2197 vif); 2198 } 2199 2200 /* 2201 * No usable cache entry 2202 */ 2203 if (!cache) { 2204 int vif; 2205 2206 vif = ip6mr_find_vif(mrt, dev); 2207 if (vif >= 0) { 2208 int err = ip6mr_cache_unresolved(mrt, vif, skb, dev); 2209 read_unlock(&mrt_lock); 2210 2211 return err; 2212 } 2213 read_unlock(&mrt_lock); 2214 kfree_skb(skb); 2215 return -ENODEV; 2216 } 2217 2218 ip6_mr_forward(net, mrt, dev, skb, cache); 2219 2220 read_unlock(&mrt_lock); 2221 2222 return 0; 2223 } 2224 2225 int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm, 2226 u32 portid) 2227 { 2228 int err; 2229 struct mr_table *mrt; 2230 struct mfc6_cache *cache; 2231 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); 2232 2233 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT); 2234 if (!mrt) 2235 return -ENOENT; 2236 2237 read_lock(&mrt_lock); 2238 cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr); 2239 if (!cache && skb->dev) { 2240 int vif = ip6mr_find_vif(mrt, skb->dev); 2241 2242 if (vif >= 0) 2243 cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr, 2244 vif); 2245 } 2246 2247 if (!cache) { 2248 struct sk_buff *skb2; 2249 struct ipv6hdr *iph; 2250 struct net_device *dev; 2251 int vif; 2252 2253 dev = skb->dev; 2254 if (!dev || (vif = ip6mr_find_vif(mrt, dev)) < 0) { 2255 read_unlock(&mrt_lock); 2256 return -ENODEV; 2257 } 2258 2259 /* really correct? */ 2260 skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC); 2261 if (!skb2) { 2262 read_unlock(&mrt_lock); 2263 return -ENOMEM; 2264 } 2265 2266 NETLINK_CB(skb2).portid = portid; 2267 skb_reset_transport_header(skb2); 2268 2269 skb_put(skb2, sizeof(struct ipv6hdr)); 2270 skb_reset_network_header(skb2); 2271 2272 iph = ipv6_hdr(skb2); 2273 iph->version = 0; 2274 iph->priority = 0; 2275 iph->flow_lbl[0] = 0; 2276 iph->flow_lbl[1] = 0; 2277 iph->flow_lbl[2] = 0; 2278 iph->payload_len = 0; 2279 iph->nexthdr = IPPROTO_NONE; 2280 iph->hop_limit = 0; 2281 iph->saddr = rt->rt6i_src.addr; 2282 iph->daddr = rt->rt6i_dst.addr; 2283 2284 err = ip6mr_cache_unresolved(mrt, vif, skb2, dev); 2285 read_unlock(&mrt_lock); 2286 2287 return err; 2288 } 2289 2290 err = mr_fill_mroute(mrt, skb, &cache->_c, rtm); 2291 read_unlock(&mrt_lock); 2292 return err; 2293 } 2294 2295 static int ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, 2296 u32 portid, u32 seq, struct mfc6_cache *c, int cmd, 2297 int flags) 2298 { 2299 struct nlmsghdr *nlh; 2300 struct rtmsg *rtm; 2301 int err; 2302 2303 nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags); 2304 if (!nlh) 2305 return -EMSGSIZE; 2306 2307 rtm = nlmsg_data(nlh); 2308 rtm->rtm_family = RTNL_FAMILY_IP6MR; 2309 rtm->rtm_dst_len = 128; 2310 rtm->rtm_src_len = 128; 2311 rtm->rtm_tos = 0; 2312 rtm->rtm_table = mrt->id; 2313 if (nla_put_u32(skb, RTA_TABLE, mrt->id)) 2314 goto nla_put_failure; 2315 rtm->rtm_type = RTN_MULTICAST; 2316 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 2317 if (c->_c.mfc_flags & MFC_STATIC) 2318 rtm->rtm_protocol = RTPROT_STATIC; 2319 else 2320 rtm->rtm_protocol = RTPROT_MROUTED; 2321 rtm->rtm_flags = 0; 2322 2323 if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) || 2324 nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp)) 2325 goto nla_put_failure; 2326 err = mr_fill_mroute(mrt, skb, &c->_c, rtm); 2327 /* do not break the dump if cache is unresolved */ 2328 if (err < 0 && err != -ENOENT) 2329 goto nla_put_failure; 2330 2331 nlmsg_end(skb, nlh); 2332 return 0; 2333 2334 nla_put_failure: 2335 nlmsg_cancel(skb, nlh); 2336 return -EMSGSIZE; 2337 } 2338 2339 static int _ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, 2340 u32 portid, u32 seq, struct mr_mfc *c, 2341 int cmd, int flags) 2342 { 2343 return ip6mr_fill_mroute(mrt, skb, portid, seq, (struct mfc6_cache *)c, 2344 cmd, flags); 2345 } 2346 2347 static int mr6_msgsize(bool unresolved, int maxvif) 2348 { 2349 size_t len = 2350 NLMSG_ALIGN(sizeof(struct rtmsg)) 2351 + nla_total_size(4) /* RTA_TABLE */ 2352 + nla_total_size(sizeof(struct in6_addr)) /* RTA_SRC */ 2353 + nla_total_size(sizeof(struct in6_addr)) /* RTA_DST */ 2354 ; 2355 2356 if (!unresolved) 2357 len = len 2358 + nla_total_size(4) /* RTA_IIF */ 2359 + nla_total_size(0) /* RTA_MULTIPATH */ 2360 + maxvif * NLA_ALIGN(sizeof(struct rtnexthop)) 2361 /* RTA_MFC_STATS */ 2362 + nla_total_size_64bit(sizeof(struct rta_mfc_stats)) 2363 ; 2364 2365 return len; 2366 } 2367 2368 static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc, 2369 int cmd) 2370 { 2371 struct net *net = read_pnet(&mrt->net); 2372 struct sk_buff *skb; 2373 int err = -ENOBUFS; 2374 2375 skb = nlmsg_new(mr6_msgsize(mfc->_c.mfc_parent >= MAXMIFS, mrt->maxvif), 2376 GFP_ATOMIC); 2377 if (!skb) 2378 goto errout; 2379 2380 err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0); 2381 if (err < 0) 2382 goto errout; 2383 2384 rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC); 2385 return; 2386 2387 errout: 2388 kfree_skb(skb); 2389 if (err < 0) 2390 rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err); 2391 } 2392 2393 static size_t mrt6msg_netlink_msgsize(size_t payloadlen) 2394 { 2395 size_t len = 2396 NLMSG_ALIGN(sizeof(struct rtgenmsg)) 2397 + nla_total_size(1) /* IP6MRA_CREPORT_MSGTYPE */ 2398 + nla_total_size(4) /* IP6MRA_CREPORT_MIF_ID */ 2399 /* IP6MRA_CREPORT_SRC_ADDR */ 2400 + nla_total_size(sizeof(struct in6_addr)) 2401 /* IP6MRA_CREPORT_DST_ADDR */ 2402 + nla_total_size(sizeof(struct in6_addr)) 2403 /* IP6MRA_CREPORT_PKT */ 2404 + nla_total_size(payloadlen) 2405 ; 2406 2407 return len; 2408 } 2409 2410 static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt) 2411 { 2412 struct net *net = read_pnet(&mrt->net); 2413 struct nlmsghdr *nlh; 2414 struct rtgenmsg *rtgenm; 2415 struct mrt6msg *msg; 2416 struct sk_buff *skb; 2417 struct nlattr *nla; 2418 int payloadlen; 2419 2420 payloadlen = pkt->len - sizeof(struct mrt6msg); 2421 msg = (struct mrt6msg *)skb_transport_header(pkt); 2422 2423 skb = nlmsg_new(mrt6msg_netlink_msgsize(payloadlen), GFP_ATOMIC); 2424 if (!skb) 2425 goto errout; 2426 2427 nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT, 2428 sizeof(struct rtgenmsg), 0); 2429 if (!nlh) 2430 goto errout; 2431 rtgenm = nlmsg_data(nlh); 2432 rtgenm->rtgen_family = RTNL_FAMILY_IP6MR; 2433 if (nla_put_u8(skb, IP6MRA_CREPORT_MSGTYPE, msg->im6_msgtype) || 2434 nla_put_u32(skb, IP6MRA_CREPORT_MIF_ID, msg->im6_mif) || 2435 nla_put_in6_addr(skb, IP6MRA_CREPORT_SRC_ADDR, 2436 &msg->im6_src) || 2437 nla_put_in6_addr(skb, IP6MRA_CREPORT_DST_ADDR, 2438 &msg->im6_dst)) 2439 goto nla_put_failure; 2440 2441 nla = nla_reserve(skb, IP6MRA_CREPORT_PKT, payloadlen); 2442 if (!nla || skb_copy_bits(pkt, sizeof(struct mrt6msg), 2443 nla_data(nla), payloadlen)) 2444 goto nla_put_failure; 2445 2446 nlmsg_end(skb, nlh); 2447 2448 rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE_R, NULL, GFP_ATOMIC); 2449 return; 2450 2451 nla_put_failure: 2452 nlmsg_cancel(skb, nlh); 2453 errout: 2454 kfree_skb(skb); 2455 rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE_R, -ENOBUFS); 2456 } 2457 2458 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) 2459 { 2460 const struct nlmsghdr *nlh = cb->nlh; 2461 struct fib_dump_filter filter = {}; 2462 int err; 2463 2464 if (cb->strict_check) { 2465 err = ip_valid_fib_dump_req(sock_net(skb->sk), nlh, 2466 &filter, cb); 2467 if (err < 0) 2468 return err; 2469 } 2470 2471 if (filter.table_id) { 2472 struct mr_table *mrt; 2473 2474 mrt = ip6mr_get_table(sock_net(skb->sk), filter.table_id); 2475 if (!mrt) { 2476 if (filter.dump_all_families) 2477 return skb->len; 2478 2479 NL_SET_ERR_MSG_MOD(cb->extack, "MR table does not exist"); 2480 return -ENOENT; 2481 } 2482 err = mr_table_dump(mrt, skb, cb, _ip6mr_fill_mroute, 2483 &mfc_unres_lock, &filter); 2484 return skb->len ? : err; 2485 } 2486 2487 return mr_rtm_dumproute(skb, cb, ip6mr_mr_table_iter, 2488 _ip6mr_fill_mroute, &mfc_unres_lock, &filter); 2489 } 2490