1 /* 2 * Linux IPv6 multicast routing support for BSD pim6sd 3 * Based on net/ipv4/ipmr.c. 4 * 5 * (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr> 6 * LSIIT Laboratory, Strasbourg, France 7 * (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com> 8 * 6WIND, Paris, France 9 * Copyright (C)2007,2008 USAGI/WIDE Project 10 * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org> 11 * 12 * This program is free software; you can redistribute it and/or 13 * modify it under the terms of the GNU General Public License 14 * as published by the Free Software Foundation; either version 15 * 2 of the License, or (at your option) any later version. 16 * 17 */ 18 19 #include <linux/uaccess.h> 20 #include <linux/types.h> 21 #include <linux/sched.h> 22 #include <linux/errno.h> 23 #include <linux/mm.h> 24 #include <linux/kernel.h> 25 #include <linux/fcntl.h> 26 #include <linux/stat.h> 27 #include <linux/socket.h> 28 #include <linux/inet.h> 29 #include <linux/netdevice.h> 30 #include <linux/inetdevice.h> 31 #include <linux/proc_fs.h> 32 #include <linux/seq_file.h> 33 #include <linux/init.h> 34 #include <linux/compat.h> 35 #include <linux/rhashtable.h> 36 #include <net/protocol.h> 37 #include <linux/skbuff.h> 38 #include <net/raw.h> 39 #include <linux/notifier.h> 40 #include <linux/if_arp.h> 41 #include <net/checksum.h> 42 #include <net/netlink.h> 43 #include <net/fib_rules.h> 44 45 #include <net/ipv6.h> 46 #include <net/ip6_route.h> 47 #include <linux/mroute6.h> 48 #include <linux/pim.h> 49 #include <net/addrconf.h> 50 #include <linux/netfilter_ipv6.h> 51 #include <linux/export.h> 52 #include <net/ip6_checksum.h> 53 #include <linux/netconf.h> 54 55 struct ip6mr_rule { 56 struct fib_rule common; 57 }; 58 59 struct ip6mr_result { 60 struct mr_table *mrt; 61 }; 62 63 /* Big lock, protecting vif table, mrt cache and mroute socket state. 64 Note that the changes are semaphored via rtnl_lock. 65 */ 66 67 static DEFINE_RWLOCK(mrt_lock); 68 69 /* Multicast router control variables */ 70 71 /* Special spinlock for queue of unresolved entries */ 72 static DEFINE_SPINLOCK(mfc_unres_lock); 73 74 /* We return to original Alan's scheme. Hash table of resolved 75 entries is changed only in process context and protected 76 with weak lock mrt_lock. Queue of unresolved entries is protected 77 with strong spinlock mfc_unres_lock. 78 79 In this case data path is free of exclusive locks at all. 80 */ 81 82 static struct kmem_cache *mrt_cachep __read_mostly; 83 84 static struct mr_table *ip6mr_new_table(struct net *net, u32 id); 85 static void ip6mr_free_table(struct mr_table *mrt); 86 87 static void ip6_mr_forward(struct net *net, struct mr_table *mrt, 88 struct sk_buff *skb, struct mfc6_cache *cache); 89 static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt, 90 mifi_t mifi, int assert); 91 static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc, 92 int cmd); 93 static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt); 94 static int ip6mr_rtm_dumproute(struct sk_buff *skb, 95 struct netlink_callback *cb); 96 static void mroute_clean_tables(struct mr_table *mrt, bool all); 97 static void ipmr_expire_process(struct timer_list *t); 98 99 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES 100 #define ip6mr_for_each_table(mrt, net) \ 101 list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list) 102 103 static struct mr_table *ip6mr_mr_table_iter(struct net *net, 104 struct mr_table *mrt) 105 { 106 struct mr_table *ret; 107 108 if (!mrt) 109 ret = list_entry_rcu(net->ipv6.mr6_tables.next, 110 struct mr_table, list); 111 else 112 ret = list_entry_rcu(mrt->list.next, 113 struct mr_table, list); 114 115 if (&ret->list == &net->ipv6.mr6_tables) 116 return NULL; 117 return ret; 118 } 119 120 static struct mr_table *ip6mr_get_table(struct net *net, u32 id) 121 { 122 struct mr_table *mrt; 123 124 ip6mr_for_each_table(mrt, net) { 125 if (mrt->id == id) 126 return mrt; 127 } 128 return NULL; 129 } 130 131 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6, 132 struct mr_table **mrt) 133 { 134 int err; 135 struct ip6mr_result res; 136 struct fib_lookup_arg arg = { 137 .result = &res, 138 .flags = FIB_LOOKUP_NOREF, 139 }; 140 141 err = fib_rules_lookup(net->ipv6.mr6_rules_ops, 142 flowi6_to_flowi(flp6), 0, &arg); 143 if (err < 0) 144 return err; 145 *mrt = res.mrt; 146 return 0; 147 } 148 149 static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp, 150 int flags, struct fib_lookup_arg *arg) 151 { 152 struct ip6mr_result *res = arg->result; 153 struct mr_table *mrt; 154 155 switch (rule->action) { 156 case FR_ACT_TO_TBL: 157 break; 158 case FR_ACT_UNREACHABLE: 159 return -ENETUNREACH; 160 case FR_ACT_PROHIBIT: 161 return -EACCES; 162 case FR_ACT_BLACKHOLE: 163 default: 164 return -EINVAL; 165 } 166 167 mrt = ip6mr_get_table(rule->fr_net, rule->table); 168 if (!mrt) 169 return -EAGAIN; 170 res->mrt = mrt; 171 return 0; 172 } 173 174 static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags) 175 { 176 return 1; 177 } 178 179 static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = { 180 FRA_GENERIC_POLICY, 181 }; 182 183 static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb, 184 struct fib_rule_hdr *frh, struct nlattr **tb, 185 struct netlink_ext_ack *extack) 186 { 187 return 0; 188 } 189 190 static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, 191 struct nlattr **tb) 192 { 193 return 1; 194 } 195 196 static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb, 197 struct fib_rule_hdr *frh) 198 { 199 frh->dst_len = 0; 200 frh->src_len = 0; 201 frh->tos = 0; 202 return 0; 203 } 204 205 static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = { 206 .family = RTNL_FAMILY_IP6MR, 207 .rule_size = sizeof(struct ip6mr_rule), 208 .addr_size = sizeof(struct in6_addr), 209 .action = ip6mr_rule_action, 210 .match = ip6mr_rule_match, 211 .configure = ip6mr_rule_configure, 212 .compare = ip6mr_rule_compare, 213 .fill = ip6mr_rule_fill, 214 .nlgroup = RTNLGRP_IPV6_RULE, 215 .policy = ip6mr_rule_policy, 216 .owner = THIS_MODULE, 217 }; 218 219 static int __net_init ip6mr_rules_init(struct net *net) 220 { 221 struct fib_rules_ops *ops; 222 struct mr_table *mrt; 223 int err; 224 225 ops = fib_rules_register(&ip6mr_rules_ops_template, net); 226 if (IS_ERR(ops)) 227 return PTR_ERR(ops); 228 229 INIT_LIST_HEAD(&net->ipv6.mr6_tables); 230 231 mrt = ip6mr_new_table(net, RT6_TABLE_DFLT); 232 if (IS_ERR(mrt)) { 233 err = PTR_ERR(mrt); 234 goto err1; 235 } 236 237 err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0); 238 if (err < 0) 239 goto err2; 240 241 net->ipv6.mr6_rules_ops = ops; 242 return 0; 243 244 err2: 245 ip6mr_free_table(mrt); 246 err1: 247 fib_rules_unregister(ops); 248 return err; 249 } 250 251 static void __net_exit ip6mr_rules_exit(struct net *net) 252 { 253 struct mr_table *mrt, *next; 254 255 rtnl_lock(); 256 list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) { 257 list_del(&mrt->list); 258 ip6mr_free_table(mrt); 259 } 260 fib_rules_unregister(net->ipv6.mr6_rules_ops); 261 rtnl_unlock(); 262 } 263 264 static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb) 265 { 266 return fib_rules_dump(net, nb, RTNL_FAMILY_IP6MR); 267 } 268 269 static unsigned int ip6mr_rules_seq_read(struct net *net) 270 { 271 return fib_rules_seq_read(net, RTNL_FAMILY_IP6MR); 272 } 273 274 bool ip6mr_rule_default(const struct fib_rule *rule) 275 { 276 return fib_rule_matchall(rule) && rule->action == FR_ACT_TO_TBL && 277 rule->table == RT6_TABLE_DFLT && !rule->l3mdev; 278 } 279 EXPORT_SYMBOL(ip6mr_rule_default); 280 #else 281 #define ip6mr_for_each_table(mrt, net) \ 282 for (mrt = net->ipv6.mrt6; mrt; mrt = NULL) 283 284 static struct mr_table *ip6mr_mr_table_iter(struct net *net, 285 struct mr_table *mrt) 286 { 287 if (!mrt) 288 return net->ipv6.mrt6; 289 return NULL; 290 } 291 292 static struct mr_table *ip6mr_get_table(struct net *net, u32 id) 293 { 294 return net->ipv6.mrt6; 295 } 296 297 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6, 298 struct mr_table **mrt) 299 { 300 *mrt = net->ipv6.mrt6; 301 return 0; 302 } 303 304 static int __net_init ip6mr_rules_init(struct net *net) 305 { 306 struct mr_table *mrt; 307 308 mrt = ip6mr_new_table(net, RT6_TABLE_DFLT); 309 if (IS_ERR(mrt)) 310 return PTR_ERR(mrt); 311 net->ipv6.mrt6 = mrt; 312 return 0; 313 } 314 315 static void __net_exit ip6mr_rules_exit(struct net *net) 316 { 317 rtnl_lock(); 318 ip6mr_free_table(net->ipv6.mrt6); 319 net->ipv6.mrt6 = NULL; 320 rtnl_unlock(); 321 } 322 323 static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb) 324 { 325 return 0; 326 } 327 328 static unsigned int ip6mr_rules_seq_read(struct net *net) 329 { 330 return 0; 331 } 332 #endif 333 334 static int ip6mr_hash_cmp(struct rhashtable_compare_arg *arg, 335 const void *ptr) 336 { 337 const struct mfc6_cache_cmp_arg *cmparg = arg->key; 338 struct mfc6_cache *c = (struct mfc6_cache *)ptr; 339 340 return !ipv6_addr_equal(&c->mf6c_mcastgrp, &cmparg->mf6c_mcastgrp) || 341 !ipv6_addr_equal(&c->mf6c_origin, &cmparg->mf6c_origin); 342 } 343 344 static const struct rhashtable_params ip6mr_rht_params = { 345 .head_offset = offsetof(struct mr_mfc, mnode), 346 .key_offset = offsetof(struct mfc6_cache, cmparg), 347 .key_len = sizeof(struct mfc6_cache_cmp_arg), 348 .nelem_hint = 3, 349 .locks_mul = 1, 350 .obj_cmpfn = ip6mr_hash_cmp, 351 .automatic_shrinking = true, 352 }; 353 354 static void ip6mr_new_table_set(struct mr_table *mrt, 355 struct net *net) 356 { 357 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES 358 list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables); 359 #endif 360 } 361 362 static struct mfc6_cache_cmp_arg ip6mr_mr_table_ops_cmparg_any = { 363 .mf6c_origin = IN6ADDR_ANY_INIT, 364 .mf6c_mcastgrp = IN6ADDR_ANY_INIT, 365 }; 366 367 static struct mr_table_ops ip6mr_mr_table_ops = { 368 .rht_params = &ip6mr_rht_params, 369 .cmparg_any = &ip6mr_mr_table_ops_cmparg_any, 370 }; 371 372 static struct mr_table *ip6mr_new_table(struct net *net, u32 id) 373 { 374 struct mr_table *mrt; 375 376 mrt = ip6mr_get_table(net, id); 377 if (mrt) 378 return mrt; 379 380 return mr_table_alloc(net, id, &ip6mr_mr_table_ops, 381 ipmr_expire_process, ip6mr_new_table_set); 382 } 383 384 static void ip6mr_free_table(struct mr_table *mrt) 385 { 386 del_timer_sync(&mrt->ipmr_expire_timer); 387 mroute_clean_tables(mrt, true); 388 rhltable_destroy(&mrt->mfc_hash); 389 kfree(mrt); 390 } 391 392 #ifdef CONFIG_PROC_FS 393 /* The /proc interfaces to multicast routing 394 * /proc/ip6_mr_cache /proc/ip6_mr_vif 395 */ 396 397 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos) 398 __acquires(mrt_lock) 399 { 400 struct mr_vif_iter *iter = seq->private; 401 struct net *net = seq_file_net(seq); 402 struct mr_table *mrt; 403 404 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT); 405 if (!mrt) 406 return ERR_PTR(-ENOENT); 407 408 iter->mrt = mrt; 409 410 read_lock(&mrt_lock); 411 return mr_vif_seq_start(seq, pos); 412 } 413 414 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v) 415 __releases(mrt_lock) 416 { 417 read_unlock(&mrt_lock); 418 } 419 420 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v) 421 { 422 struct mr_vif_iter *iter = seq->private; 423 struct mr_table *mrt = iter->mrt; 424 425 if (v == SEQ_START_TOKEN) { 426 seq_puts(seq, 427 "Interface BytesIn PktsIn BytesOut PktsOut Flags\n"); 428 } else { 429 const struct vif_device *vif = v; 430 const char *name = vif->dev ? vif->dev->name : "none"; 431 432 seq_printf(seq, 433 "%2td %-10s %8ld %7ld %8ld %7ld %05X\n", 434 vif - mrt->vif_table, 435 name, vif->bytes_in, vif->pkt_in, 436 vif->bytes_out, vif->pkt_out, 437 vif->flags); 438 } 439 return 0; 440 } 441 442 static const struct seq_operations ip6mr_vif_seq_ops = { 443 .start = ip6mr_vif_seq_start, 444 .next = mr_vif_seq_next, 445 .stop = ip6mr_vif_seq_stop, 446 .show = ip6mr_vif_seq_show, 447 }; 448 449 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) 450 { 451 struct net *net = seq_file_net(seq); 452 struct mr_table *mrt; 453 454 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT); 455 if (!mrt) 456 return ERR_PTR(-ENOENT); 457 458 return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock); 459 } 460 461 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) 462 { 463 int n; 464 465 if (v == SEQ_START_TOKEN) { 466 seq_puts(seq, 467 "Group " 468 "Origin " 469 "Iif Pkts Bytes Wrong Oifs\n"); 470 } else { 471 const struct mfc6_cache *mfc = v; 472 const struct mr_mfc_iter *it = seq->private; 473 struct mr_table *mrt = it->mrt; 474 475 seq_printf(seq, "%pI6 %pI6 %-3hd", 476 &mfc->mf6c_mcastgrp, &mfc->mf6c_origin, 477 mfc->_c.mfc_parent); 478 479 if (it->cache != &mrt->mfc_unres_queue) { 480 seq_printf(seq, " %8lu %8lu %8lu", 481 mfc->_c.mfc_un.res.pkt, 482 mfc->_c.mfc_un.res.bytes, 483 mfc->_c.mfc_un.res.wrong_if); 484 for (n = mfc->_c.mfc_un.res.minvif; 485 n < mfc->_c.mfc_un.res.maxvif; n++) { 486 if (VIF_EXISTS(mrt, n) && 487 mfc->_c.mfc_un.res.ttls[n] < 255) 488 seq_printf(seq, 489 " %2d:%-3d", n, 490 mfc->_c.mfc_un.res.ttls[n]); 491 } 492 } else { 493 /* unresolved mfc_caches don't contain 494 * pkt, bytes and wrong_if values 495 */ 496 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul); 497 } 498 seq_putc(seq, '\n'); 499 } 500 return 0; 501 } 502 503 static const struct seq_operations ipmr_mfc_seq_ops = { 504 .start = ipmr_mfc_seq_start, 505 .next = mr_mfc_seq_next, 506 .stop = mr_mfc_seq_stop, 507 .show = ipmr_mfc_seq_show, 508 }; 509 #endif 510 511 #ifdef CONFIG_IPV6_PIMSM_V2 512 513 static int pim6_rcv(struct sk_buff *skb) 514 { 515 struct pimreghdr *pim; 516 struct ipv6hdr *encap; 517 struct net_device *reg_dev = NULL; 518 struct net *net = dev_net(skb->dev); 519 struct mr_table *mrt; 520 struct flowi6 fl6 = { 521 .flowi6_iif = skb->dev->ifindex, 522 .flowi6_mark = skb->mark, 523 }; 524 int reg_vif_num; 525 526 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap))) 527 goto drop; 528 529 pim = (struct pimreghdr *)skb_transport_header(skb); 530 if (pim->type != ((PIM_VERSION << 4) | PIM_TYPE_REGISTER) || 531 (pim->flags & PIM_NULL_REGISTER) || 532 (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, 533 sizeof(*pim), IPPROTO_PIM, 534 csum_partial((void *)pim, sizeof(*pim), 0)) && 535 csum_fold(skb_checksum(skb, 0, skb->len, 0)))) 536 goto drop; 537 538 /* check if the inner packet is destined to mcast group */ 539 encap = (struct ipv6hdr *)(skb_transport_header(skb) + 540 sizeof(*pim)); 541 542 if (!ipv6_addr_is_multicast(&encap->daddr) || 543 encap->payload_len == 0 || 544 ntohs(encap->payload_len) + sizeof(*pim) > skb->len) 545 goto drop; 546 547 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0) 548 goto drop; 549 reg_vif_num = mrt->mroute_reg_vif_num; 550 551 read_lock(&mrt_lock); 552 if (reg_vif_num >= 0) 553 reg_dev = mrt->vif_table[reg_vif_num].dev; 554 if (reg_dev) 555 dev_hold(reg_dev); 556 read_unlock(&mrt_lock); 557 558 if (!reg_dev) 559 goto drop; 560 561 skb->mac_header = skb->network_header; 562 skb_pull(skb, (u8 *)encap - skb->data); 563 skb_reset_network_header(skb); 564 skb->protocol = htons(ETH_P_IPV6); 565 skb->ip_summed = CHECKSUM_NONE; 566 567 skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev)); 568 569 netif_rx(skb); 570 571 dev_put(reg_dev); 572 return 0; 573 drop: 574 kfree_skb(skb); 575 return 0; 576 } 577 578 static const struct inet6_protocol pim6_protocol = { 579 .handler = pim6_rcv, 580 }; 581 582 /* Service routines creating virtual interfaces: PIMREG */ 583 584 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, 585 struct net_device *dev) 586 { 587 struct net *net = dev_net(dev); 588 struct mr_table *mrt; 589 struct flowi6 fl6 = { 590 .flowi6_oif = dev->ifindex, 591 .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX, 592 .flowi6_mark = skb->mark, 593 }; 594 int err; 595 596 err = ip6mr_fib_lookup(net, &fl6, &mrt); 597 if (err < 0) { 598 kfree_skb(skb); 599 return err; 600 } 601 602 read_lock(&mrt_lock); 603 dev->stats.tx_bytes += skb->len; 604 dev->stats.tx_packets++; 605 ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT); 606 read_unlock(&mrt_lock); 607 kfree_skb(skb); 608 return NETDEV_TX_OK; 609 } 610 611 static int reg_vif_get_iflink(const struct net_device *dev) 612 { 613 return 0; 614 } 615 616 static const struct net_device_ops reg_vif_netdev_ops = { 617 .ndo_start_xmit = reg_vif_xmit, 618 .ndo_get_iflink = reg_vif_get_iflink, 619 }; 620 621 static void reg_vif_setup(struct net_device *dev) 622 { 623 dev->type = ARPHRD_PIMREG; 624 dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8; 625 dev->flags = IFF_NOARP; 626 dev->netdev_ops = ®_vif_netdev_ops; 627 dev->needs_free_netdev = true; 628 dev->features |= NETIF_F_NETNS_LOCAL; 629 } 630 631 static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt) 632 { 633 struct net_device *dev; 634 char name[IFNAMSIZ]; 635 636 if (mrt->id == RT6_TABLE_DFLT) 637 sprintf(name, "pim6reg"); 638 else 639 sprintf(name, "pim6reg%u", mrt->id); 640 641 dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup); 642 if (!dev) 643 return NULL; 644 645 dev_net_set(dev, net); 646 647 if (register_netdevice(dev)) { 648 free_netdev(dev); 649 return NULL; 650 } 651 652 if (dev_open(dev)) 653 goto failure; 654 655 dev_hold(dev); 656 return dev; 657 658 failure: 659 unregister_netdevice(dev); 660 return NULL; 661 } 662 #endif 663 664 static int call_ip6mr_vif_entry_notifiers(struct net *net, 665 enum fib_event_type event_type, 666 struct vif_device *vif, 667 mifi_t vif_index, u32 tb_id) 668 { 669 return mr_call_vif_notifiers(net, RTNL_FAMILY_IP6MR, event_type, 670 vif, vif_index, tb_id, 671 &net->ipv6.ipmr_seq); 672 } 673 674 static int call_ip6mr_mfc_entry_notifiers(struct net *net, 675 enum fib_event_type event_type, 676 struct mfc6_cache *mfc, u32 tb_id) 677 { 678 return mr_call_mfc_notifiers(net, RTNL_FAMILY_IP6MR, event_type, 679 &mfc->_c, tb_id, &net->ipv6.ipmr_seq); 680 } 681 682 /* Delete a VIF entry */ 683 static int mif6_delete(struct mr_table *mrt, int vifi, int notify, 684 struct list_head *head) 685 { 686 struct vif_device *v; 687 struct net_device *dev; 688 struct inet6_dev *in6_dev; 689 690 if (vifi < 0 || vifi >= mrt->maxvif) 691 return -EADDRNOTAVAIL; 692 693 v = &mrt->vif_table[vifi]; 694 695 if (VIF_EXISTS(mrt, vifi)) 696 call_ip6mr_vif_entry_notifiers(read_pnet(&mrt->net), 697 FIB_EVENT_VIF_DEL, v, vifi, 698 mrt->id); 699 700 write_lock_bh(&mrt_lock); 701 dev = v->dev; 702 v->dev = NULL; 703 704 if (!dev) { 705 write_unlock_bh(&mrt_lock); 706 return -EADDRNOTAVAIL; 707 } 708 709 #ifdef CONFIG_IPV6_PIMSM_V2 710 if (vifi == mrt->mroute_reg_vif_num) 711 mrt->mroute_reg_vif_num = -1; 712 #endif 713 714 if (vifi + 1 == mrt->maxvif) { 715 int tmp; 716 for (tmp = vifi - 1; tmp >= 0; tmp--) { 717 if (VIF_EXISTS(mrt, tmp)) 718 break; 719 } 720 mrt->maxvif = tmp + 1; 721 } 722 723 write_unlock_bh(&mrt_lock); 724 725 dev_set_allmulti(dev, -1); 726 727 in6_dev = __in6_dev_get(dev); 728 if (in6_dev) { 729 in6_dev->cnf.mc_forwarding--; 730 inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, 731 NETCONFA_MC_FORWARDING, 732 dev->ifindex, &in6_dev->cnf); 733 } 734 735 if ((v->flags & MIFF_REGISTER) && !notify) 736 unregister_netdevice_queue(dev, head); 737 738 dev_put(dev); 739 return 0; 740 } 741 742 static inline void ip6mr_cache_free_rcu(struct rcu_head *head) 743 { 744 struct mr_mfc *c = container_of(head, struct mr_mfc, rcu); 745 746 kmem_cache_free(mrt_cachep, (struct mfc6_cache *)c); 747 } 748 749 static inline void ip6mr_cache_free(struct mfc6_cache *c) 750 { 751 call_rcu(&c->_c.rcu, ip6mr_cache_free_rcu); 752 } 753 754 /* Destroy an unresolved cache entry, killing queued skbs 755 and reporting error to netlink readers. 756 */ 757 758 static void ip6mr_destroy_unres(struct mr_table *mrt, struct mfc6_cache *c) 759 { 760 struct net *net = read_pnet(&mrt->net); 761 struct sk_buff *skb; 762 763 atomic_dec(&mrt->cache_resolve_queue_len); 764 765 while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved)) != NULL) { 766 if (ipv6_hdr(skb)->version == 0) { 767 struct nlmsghdr *nlh = skb_pull(skb, 768 sizeof(struct ipv6hdr)); 769 nlh->nlmsg_type = NLMSG_ERROR; 770 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr)); 771 skb_trim(skb, nlh->nlmsg_len); 772 ((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT; 773 rtnl_unicast(skb, net, NETLINK_CB(skb).portid); 774 } else 775 kfree_skb(skb); 776 } 777 778 ip6mr_cache_free(c); 779 } 780 781 782 /* Timer process for all the unresolved queue. */ 783 784 static void ipmr_do_expire_process(struct mr_table *mrt) 785 { 786 unsigned long now = jiffies; 787 unsigned long expires = 10 * HZ; 788 struct mr_mfc *c, *next; 789 790 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) { 791 if (time_after(c->mfc_un.unres.expires, now)) { 792 /* not yet... */ 793 unsigned long interval = c->mfc_un.unres.expires - now; 794 if (interval < expires) 795 expires = interval; 796 continue; 797 } 798 799 list_del(&c->list); 800 mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE); 801 ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c); 802 } 803 804 if (!list_empty(&mrt->mfc_unres_queue)) 805 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires); 806 } 807 808 static void ipmr_expire_process(struct timer_list *t) 809 { 810 struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer); 811 812 if (!spin_trylock(&mfc_unres_lock)) { 813 mod_timer(&mrt->ipmr_expire_timer, jiffies + 1); 814 return; 815 } 816 817 if (!list_empty(&mrt->mfc_unres_queue)) 818 ipmr_do_expire_process(mrt); 819 820 spin_unlock(&mfc_unres_lock); 821 } 822 823 /* Fill oifs list. It is called under write locked mrt_lock. */ 824 825 static void ip6mr_update_thresholds(struct mr_table *mrt, 826 struct mr_mfc *cache, 827 unsigned char *ttls) 828 { 829 int vifi; 830 831 cache->mfc_un.res.minvif = MAXMIFS; 832 cache->mfc_un.res.maxvif = 0; 833 memset(cache->mfc_un.res.ttls, 255, MAXMIFS); 834 835 for (vifi = 0; vifi < mrt->maxvif; vifi++) { 836 if (VIF_EXISTS(mrt, vifi) && 837 ttls[vifi] && ttls[vifi] < 255) { 838 cache->mfc_un.res.ttls[vifi] = ttls[vifi]; 839 if (cache->mfc_un.res.minvif > vifi) 840 cache->mfc_un.res.minvif = vifi; 841 if (cache->mfc_un.res.maxvif <= vifi) 842 cache->mfc_un.res.maxvif = vifi + 1; 843 } 844 } 845 cache->mfc_un.res.lastuse = jiffies; 846 } 847 848 static int mif6_add(struct net *net, struct mr_table *mrt, 849 struct mif6ctl *vifc, int mrtsock) 850 { 851 int vifi = vifc->mif6c_mifi; 852 struct vif_device *v = &mrt->vif_table[vifi]; 853 struct net_device *dev; 854 struct inet6_dev *in6_dev; 855 int err; 856 857 /* Is vif busy ? */ 858 if (VIF_EXISTS(mrt, vifi)) 859 return -EADDRINUSE; 860 861 switch (vifc->mif6c_flags) { 862 #ifdef CONFIG_IPV6_PIMSM_V2 863 case MIFF_REGISTER: 864 /* 865 * Special Purpose VIF in PIM 866 * All the packets will be sent to the daemon 867 */ 868 if (mrt->mroute_reg_vif_num >= 0) 869 return -EADDRINUSE; 870 dev = ip6mr_reg_vif(net, mrt); 871 if (!dev) 872 return -ENOBUFS; 873 err = dev_set_allmulti(dev, 1); 874 if (err) { 875 unregister_netdevice(dev); 876 dev_put(dev); 877 return err; 878 } 879 break; 880 #endif 881 case 0: 882 dev = dev_get_by_index(net, vifc->mif6c_pifi); 883 if (!dev) 884 return -EADDRNOTAVAIL; 885 err = dev_set_allmulti(dev, 1); 886 if (err) { 887 dev_put(dev); 888 return err; 889 } 890 break; 891 default: 892 return -EINVAL; 893 } 894 895 in6_dev = __in6_dev_get(dev); 896 if (in6_dev) { 897 in6_dev->cnf.mc_forwarding++; 898 inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, 899 NETCONFA_MC_FORWARDING, 900 dev->ifindex, &in6_dev->cnf); 901 } 902 903 /* Fill in the VIF structures */ 904 vif_device_init(v, dev, vifc->vifc_rate_limit, vifc->vifc_threshold, 905 vifc->mif6c_flags | (!mrtsock ? VIFF_STATIC : 0), 906 MIFF_REGISTER); 907 908 /* And finish update writing critical data */ 909 write_lock_bh(&mrt_lock); 910 v->dev = dev; 911 #ifdef CONFIG_IPV6_PIMSM_V2 912 if (v->flags & MIFF_REGISTER) 913 mrt->mroute_reg_vif_num = vifi; 914 #endif 915 if (vifi + 1 > mrt->maxvif) 916 mrt->maxvif = vifi + 1; 917 write_unlock_bh(&mrt_lock); 918 call_ip6mr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD, 919 v, vifi, mrt->id); 920 return 0; 921 } 922 923 static struct mfc6_cache *ip6mr_cache_find(struct mr_table *mrt, 924 const struct in6_addr *origin, 925 const struct in6_addr *mcastgrp) 926 { 927 struct mfc6_cache_cmp_arg arg = { 928 .mf6c_origin = *origin, 929 .mf6c_mcastgrp = *mcastgrp, 930 }; 931 932 return mr_mfc_find(mrt, &arg); 933 } 934 935 /* Look for a (*,G) entry */ 936 static struct mfc6_cache *ip6mr_cache_find_any(struct mr_table *mrt, 937 struct in6_addr *mcastgrp, 938 mifi_t mifi) 939 { 940 struct mfc6_cache_cmp_arg arg = { 941 .mf6c_origin = in6addr_any, 942 .mf6c_mcastgrp = *mcastgrp, 943 }; 944 945 if (ipv6_addr_any(mcastgrp)) 946 return mr_mfc_find_any_parent(mrt, mifi); 947 return mr_mfc_find_any(mrt, mifi, &arg); 948 } 949 950 /* Look for a (S,G,iif) entry if parent != -1 */ 951 static struct mfc6_cache * 952 ip6mr_cache_find_parent(struct mr_table *mrt, 953 const struct in6_addr *origin, 954 const struct in6_addr *mcastgrp, 955 int parent) 956 { 957 struct mfc6_cache_cmp_arg arg = { 958 .mf6c_origin = *origin, 959 .mf6c_mcastgrp = *mcastgrp, 960 }; 961 962 return mr_mfc_find_parent(mrt, &arg, parent); 963 } 964 965 /* Allocate a multicast cache entry */ 966 static struct mfc6_cache *ip6mr_cache_alloc(void) 967 { 968 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); 969 if (!c) 970 return NULL; 971 c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1; 972 c->_c.mfc_un.res.minvif = MAXMIFS; 973 c->_c.free = ip6mr_cache_free_rcu; 974 refcount_set(&c->_c.mfc_un.res.refcount, 1); 975 return c; 976 } 977 978 static struct mfc6_cache *ip6mr_cache_alloc_unres(void) 979 { 980 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); 981 if (!c) 982 return NULL; 983 skb_queue_head_init(&c->_c.mfc_un.unres.unresolved); 984 c->_c.mfc_un.unres.expires = jiffies + 10 * HZ; 985 return c; 986 } 987 988 /* 989 * A cache entry has gone into a resolved state from queued 990 */ 991 992 static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt, 993 struct mfc6_cache *uc, struct mfc6_cache *c) 994 { 995 struct sk_buff *skb; 996 997 /* 998 * Play the pending entries through our router 999 */ 1000 1001 while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) { 1002 if (ipv6_hdr(skb)->version == 0) { 1003 struct nlmsghdr *nlh = skb_pull(skb, 1004 sizeof(struct ipv6hdr)); 1005 1006 if (mr_fill_mroute(mrt, skb, &c->_c, 1007 nlmsg_data(nlh)) > 0) { 1008 nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh; 1009 } else { 1010 nlh->nlmsg_type = NLMSG_ERROR; 1011 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr)); 1012 skb_trim(skb, nlh->nlmsg_len); 1013 ((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE; 1014 } 1015 rtnl_unicast(skb, net, NETLINK_CB(skb).portid); 1016 } else 1017 ip6_mr_forward(net, mrt, skb, c); 1018 } 1019 } 1020 1021 /* 1022 * Bounce a cache query up to pim6sd and netlink. 1023 * 1024 * Called under mrt_lock. 1025 */ 1026 1027 static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt, 1028 mifi_t mifi, int assert) 1029 { 1030 struct sock *mroute6_sk; 1031 struct sk_buff *skb; 1032 struct mrt6msg *msg; 1033 int ret; 1034 1035 #ifdef CONFIG_IPV6_PIMSM_V2 1036 if (assert == MRT6MSG_WHOLEPKT) 1037 skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt) 1038 +sizeof(*msg)); 1039 else 1040 #endif 1041 skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC); 1042 1043 if (!skb) 1044 return -ENOBUFS; 1045 1046 /* I suppose that internal messages 1047 * do not require checksums */ 1048 1049 skb->ip_summed = CHECKSUM_UNNECESSARY; 1050 1051 #ifdef CONFIG_IPV6_PIMSM_V2 1052 if (assert == MRT6MSG_WHOLEPKT) { 1053 /* Ugly, but we have no choice with this interface. 1054 Duplicate old header, fix length etc. 1055 And all this only to mangle msg->im6_msgtype and 1056 to set msg->im6_mbz to "mbz" :-) 1057 */ 1058 skb_push(skb, -skb_network_offset(pkt)); 1059 1060 skb_push(skb, sizeof(*msg)); 1061 skb_reset_transport_header(skb); 1062 msg = (struct mrt6msg *)skb_transport_header(skb); 1063 msg->im6_mbz = 0; 1064 msg->im6_msgtype = MRT6MSG_WHOLEPKT; 1065 msg->im6_mif = mrt->mroute_reg_vif_num; 1066 msg->im6_pad = 0; 1067 msg->im6_src = ipv6_hdr(pkt)->saddr; 1068 msg->im6_dst = ipv6_hdr(pkt)->daddr; 1069 1070 skb->ip_summed = CHECKSUM_UNNECESSARY; 1071 } else 1072 #endif 1073 { 1074 /* 1075 * Copy the IP header 1076 */ 1077 1078 skb_put(skb, sizeof(struct ipv6hdr)); 1079 skb_reset_network_header(skb); 1080 skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr)); 1081 1082 /* 1083 * Add our header 1084 */ 1085 skb_put(skb, sizeof(*msg)); 1086 skb_reset_transport_header(skb); 1087 msg = (struct mrt6msg *)skb_transport_header(skb); 1088 1089 msg->im6_mbz = 0; 1090 msg->im6_msgtype = assert; 1091 msg->im6_mif = mifi; 1092 msg->im6_pad = 0; 1093 msg->im6_src = ipv6_hdr(pkt)->saddr; 1094 msg->im6_dst = ipv6_hdr(pkt)->daddr; 1095 1096 skb_dst_set(skb, dst_clone(skb_dst(pkt))); 1097 skb->ip_summed = CHECKSUM_UNNECESSARY; 1098 } 1099 1100 rcu_read_lock(); 1101 mroute6_sk = rcu_dereference(mrt->mroute_sk); 1102 if (!mroute6_sk) { 1103 rcu_read_unlock(); 1104 kfree_skb(skb); 1105 return -EINVAL; 1106 } 1107 1108 mrt6msg_netlink_event(mrt, skb); 1109 1110 /* Deliver to user space multicast routing algorithms */ 1111 ret = sock_queue_rcv_skb(mroute6_sk, skb); 1112 rcu_read_unlock(); 1113 if (ret < 0) { 1114 net_warn_ratelimited("mroute6: pending queue full, dropping entries\n"); 1115 kfree_skb(skb); 1116 } 1117 1118 return ret; 1119 } 1120 1121 /* Queue a packet for resolution. It gets locked cache entry! */ 1122 static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi, 1123 struct sk_buff *skb) 1124 { 1125 struct mfc6_cache *c; 1126 bool found = false; 1127 int err; 1128 1129 spin_lock_bh(&mfc_unres_lock); 1130 list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) { 1131 if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) && 1132 ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) { 1133 found = true; 1134 break; 1135 } 1136 } 1137 1138 if (!found) { 1139 /* 1140 * Create a new entry if allowable 1141 */ 1142 1143 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 || 1144 (c = ip6mr_cache_alloc_unres()) == NULL) { 1145 spin_unlock_bh(&mfc_unres_lock); 1146 1147 kfree_skb(skb); 1148 return -ENOBUFS; 1149 } 1150 1151 /* Fill in the new cache entry */ 1152 c->_c.mfc_parent = -1; 1153 c->mf6c_origin = ipv6_hdr(skb)->saddr; 1154 c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr; 1155 1156 /* 1157 * Reflect first query at pim6sd 1158 */ 1159 err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE); 1160 if (err < 0) { 1161 /* If the report failed throw the cache entry 1162 out - Brad Parker 1163 */ 1164 spin_unlock_bh(&mfc_unres_lock); 1165 1166 ip6mr_cache_free(c); 1167 kfree_skb(skb); 1168 return err; 1169 } 1170 1171 atomic_inc(&mrt->cache_resolve_queue_len); 1172 list_add(&c->_c.list, &mrt->mfc_unres_queue); 1173 mr6_netlink_event(mrt, c, RTM_NEWROUTE); 1174 1175 ipmr_do_expire_process(mrt); 1176 } 1177 1178 /* See if we can append the packet */ 1179 if (c->_c.mfc_un.unres.unresolved.qlen > 3) { 1180 kfree_skb(skb); 1181 err = -ENOBUFS; 1182 } else { 1183 skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb); 1184 err = 0; 1185 } 1186 1187 spin_unlock_bh(&mfc_unres_lock); 1188 return err; 1189 } 1190 1191 /* 1192 * MFC6 cache manipulation by user space 1193 */ 1194 1195 static int ip6mr_mfc_delete(struct mr_table *mrt, struct mf6cctl *mfc, 1196 int parent) 1197 { 1198 struct mfc6_cache *c; 1199 1200 /* The entries are added/deleted only under RTNL */ 1201 rcu_read_lock(); 1202 c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr, 1203 &mfc->mf6cc_mcastgrp.sin6_addr, parent); 1204 rcu_read_unlock(); 1205 if (!c) 1206 return -ENOENT; 1207 rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ip6mr_rht_params); 1208 list_del_rcu(&c->_c.list); 1209 1210 call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net), 1211 FIB_EVENT_ENTRY_DEL, c, mrt->id); 1212 mr6_netlink_event(mrt, c, RTM_DELROUTE); 1213 mr_cache_put(&c->_c); 1214 return 0; 1215 } 1216 1217 static int ip6mr_device_event(struct notifier_block *this, 1218 unsigned long event, void *ptr) 1219 { 1220 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 1221 struct net *net = dev_net(dev); 1222 struct mr_table *mrt; 1223 struct vif_device *v; 1224 int ct; 1225 1226 if (event != NETDEV_UNREGISTER) 1227 return NOTIFY_DONE; 1228 1229 ip6mr_for_each_table(mrt, net) { 1230 v = &mrt->vif_table[0]; 1231 for (ct = 0; ct < mrt->maxvif; ct++, v++) { 1232 if (v->dev == dev) 1233 mif6_delete(mrt, ct, 1, NULL); 1234 } 1235 } 1236 1237 return NOTIFY_DONE; 1238 } 1239 1240 static unsigned int ip6mr_seq_read(struct net *net) 1241 { 1242 ASSERT_RTNL(); 1243 1244 return net->ipv6.ipmr_seq + ip6mr_rules_seq_read(net); 1245 } 1246 1247 static int ip6mr_dump(struct net *net, struct notifier_block *nb) 1248 { 1249 return mr_dump(net, nb, RTNL_FAMILY_IP6MR, ip6mr_rules_dump, 1250 ip6mr_mr_table_iter, &mrt_lock); 1251 } 1252 1253 static struct notifier_block ip6_mr_notifier = { 1254 .notifier_call = ip6mr_device_event 1255 }; 1256 1257 static const struct fib_notifier_ops ip6mr_notifier_ops_template = { 1258 .family = RTNL_FAMILY_IP6MR, 1259 .fib_seq_read = ip6mr_seq_read, 1260 .fib_dump = ip6mr_dump, 1261 .owner = THIS_MODULE, 1262 }; 1263 1264 static int __net_init ip6mr_notifier_init(struct net *net) 1265 { 1266 struct fib_notifier_ops *ops; 1267 1268 net->ipv6.ipmr_seq = 0; 1269 1270 ops = fib_notifier_ops_register(&ip6mr_notifier_ops_template, net); 1271 if (IS_ERR(ops)) 1272 return PTR_ERR(ops); 1273 1274 net->ipv6.ip6mr_notifier_ops = ops; 1275 1276 return 0; 1277 } 1278 1279 static void __net_exit ip6mr_notifier_exit(struct net *net) 1280 { 1281 fib_notifier_ops_unregister(net->ipv6.ip6mr_notifier_ops); 1282 net->ipv6.ip6mr_notifier_ops = NULL; 1283 } 1284 1285 /* Setup for IP multicast routing */ 1286 static int __net_init ip6mr_net_init(struct net *net) 1287 { 1288 int err; 1289 1290 err = ip6mr_notifier_init(net); 1291 if (err) 1292 return err; 1293 1294 err = ip6mr_rules_init(net); 1295 if (err < 0) 1296 goto ip6mr_rules_fail; 1297 1298 #ifdef CONFIG_PROC_FS 1299 err = -ENOMEM; 1300 if (!proc_create_net("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_seq_ops, 1301 sizeof(struct mr_vif_iter))) 1302 goto proc_vif_fail; 1303 if (!proc_create_net("ip6_mr_cache", 0, net->proc_net, &ipmr_mfc_seq_ops, 1304 sizeof(struct mr_mfc_iter))) 1305 goto proc_cache_fail; 1306 #endif 1307 1308 return 0; 1309 1310 #ifdef CONFIG_PROC_FS 1311 proc_cache_fail: 1312 remove_proc_entry("ip6_mr_vif", net->proc_net); 1313 proc_vif_fail: 1314 ip6mr_rules_exit(net); 1315 #endif 1316 ip6mr_rules_fail: 1317 ip6mr_notifier_exit(net); 1318 return err; 1319 } 1320 1321 static void __net_exit ip6mr_net_exit(struct net *net) 1322 { 1323 #ifdef CONFIG_PROC_FS 1324 remove_proc_entry("ip6_mr_cache", net->proc_net); 1325 remove_proc_entry("ip6_mr_vif", net->proc_net); 1326 #endif 1327 ip6mr_rules_exit(net); 1328 ip6mr_notifier_exit(net); 1329 } 1330 1331 static struct pernet_operations ip6mr_net_ops = { 1332 .init = ip6mr_net_init, 1333 .exit = ip6mr_net_exit, 1334 }; 1335 1336 int __init ip6_mr_init(void) 1337 { 1338 int err; 1339 1340 mrt_cachep = kmem_cache_create("ip6_mrt_cache", 1341 sizeof(struct mfc6_cache), 1342 0, SLAB_HWCACHE_ALIGN, 1343 NULL); 1344 if (!mrt_cachep) 1345 return -ENOMEM; 1346 1347 err = register_pernet_subsys(&ip6mr_net_ops); 1348 if (err) 1349 goto reg_pernet_fail; 1350 1351 err = register_netdevice_notifier(&ip6_mr_notifier); 1352 if (err) 1353 goto reg_notif_fail; 1354 #ifdef CONFIG_IPV6_PIMSM_V2 1355 if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) { 1356 pr_err("%s: can't add PIM protocol\n", __func__); 1357 err = -EAGAIN; 1358 goto add_proto_fail; 1359 } 1360 #endif 1361 err = rtnl_register_module(THIS_MODULE, RTNL_FAMILY_IP6MR, RTM_GETROUTE, 1362 NULL, ip6mr_rtm_dumproute, 0); 1363 if (err == 0) 1364 return 0; 1365 1366 #ifdef CONFIG_IPV6_PIMSM_V2 1367 inet6_del_protocol(&pim6_protocol, IPPROTO_PIM); 1368 add_proto_fail: 1369 unregister_netdevice_notifier(&ip6_mr_notifier); 1370 #endif 1371 reg_notif_fail: 1372 unregister_pernet_subsys(&ip6mr_net_ops); 1373 reg_pernet_fail: 1374 kmem_cache_destroy(mrt_cachep); 1375 return err; 1376 } 1377 1378 void ip6_mr_cleanup(void) 1379 { 1380 rtnl_unregister(RTNL_FAMILY_IP6MR, RTM_GETROUTE); 1381 #ifdef CONFIG_IPV6_PIMSM_V2 1382 inet6_del_protocol(&pim6_protocol, IPPROTO_PIM); 1383 #endif 1384 unregister_netdevice_notifier(&ip6_mr_notifier); 1385 unregister_pernet_subsys(&ip6mr_net_ops); 1386 kmem_cache_destroy(mrt_cachep); 1387 } 1388 1389 static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt, 1390 struct mf6cctl *mfc, int mrtsock, int parent) 1391 { 1392 unsigned char ttls[MAXMIFS]; 1393 struct mfc6_cache *uc, *c; 1394 struct mr_mfc *_uc; 1395 bool found; 1396 int i, err; 1397 1398 if (mfc->mf6cc_parent >= MAXMIFS) 1399 return -ENFILE; 1400 1401 memset(ttls, 255, MAXMIFS); 1402 for (i = 0; i < MAXMIFS; i++) { 1403 if (IF_ISSET(i, &mfc->mf6cc_ifset)) 1404 ttls[i] = 1; 1405 } 1406 1407 /* The entries are added/deleted only under RTNL */ 1408 rcu_read_lock(); 1409 c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr, 1410 &mfc->mf6cc_mcastgrp.sin6_addr, parent); 1411 rcu_read_unlock(); 1412 if (c) { 1413 write_lock_bh(&mrt_lock); 1414 c->_c.mfc_parent = mfc->mf6cc_parent; 1415 ip6mr_update_thresholds(mrt, &c->_c, ttls); 1416 if (!mrtsock) 1417 c->_c.mfc_flags |= MFC_STATIC; 1418 write_unlock_bh(&mrt_lock); 1419 call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, 1420 c, mrt->id); 1421 mr6_netlink_event(mrt, c, RTM_NEWROUTE); 1422 return 0; 1423 } 1424 1425 if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) && 1426 !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr)) 1427 return -EINVAL; 1428 1429 c = ip6mr_cache_alloc(); 1430 if (!c) 1431 return -ENOMEM; 1432 1433 c->mf6c_origin = mfc->mf6cc_origin.sin6_addr; 1434 c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr; 1435 c->_c.mfc_parent = mfc->mf6cc_parent; 1436 ip6mr_update_thresholds(mrt, &c->_c, ttls); 1437 if (!mrtsock) 1438 c->_c.mfc_flags |= MFC_STATIC; 1439 1440 err = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode, 1441 ip6mr_rht_params); 1442 if (err) { 1443 pr_err("ip6mr: rhtable insert error %d\n", err); 1444 ip6mr_cache_free(c); 1445 return err; 1446 } 1447 list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list); 1448 1449 /* Check to see if we resolved a queued list. If so we 1450 * need to send on the frames and tidy up. 1451 */ 1452 found = false; 1453 spin_lock_bh(&mfc_unres_lock); 1454 list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) { 1455 uc = (struct mfc6_cache *)_uc; 1456 if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) && 1457 ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) { 1458 list_del(&_uc->list); 1459 atomic_dec(&mrt->cache_resolve_queue_len); 1460 found = true; 1461 break; 1462 } 1463 } 1464 if (list_empty(&mrt->mfc_unres_queue)) 1465 del_timer(&mrt->ipmr_expire_timer); 1466 spin_unlock_bh(&mfc_unres_lock); 1467 1468 if (found) { 1469 ip6mr_cache_resolve(net, mrt, uc, c); 1470 ip6mr_cache_free(uc); 1471 } 1472 call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD, 1473 c, mrt->id); 1474 mr6_netlink_event(mrt, c, RTM_NEWROUTE); 1475 return 0; 1476 } 1477 1478 /* 1479 * Close the multicast socket, and clear the vif tables etc 1480 */ 1481 1482 static void mroute_clean_tables(struct mr_table *mrt, bool all) 1483 { 1484 struct mr_mfc *c, *tmp; 1485 LIST_HEAD(list); 1486 int i; 1487 1488 /* Shut down all active vif entries */ 1489 for (i = 0; i < mrt->maxvif; i++) { 1490 if (!all && (mrt->vif_table[i].flags & VIFF_STATIC)) 1491 continue; 1492 mif6_delete(mrt, i, 0, &list); 1493 } 1494 unregister_netdevice_many(&list); 1495 1496 /* Wipe the cache */ 1497 list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) { 1498 if (!all && (c->mfc_flags & MFC_STATIC)) 1499 continue; 1500 rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params); 1501 list_del_rcu(&c->list); 1502 mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE); 1503 mr_cache_put(c); 1504 } 1505 1506 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) { 1507 spin_lock_bh(&mfc_unres_lock); 1508 list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) { 1509 list_del(&c->list); 1510 call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net), 1511 FIB_EVENT_ENTRY_DEL, 1512 (struct mfc6_cache *)c, 1513 mrt->id); 1514 mr6_netlink_event(mrt, (struct mfc6_cache *)c, 1515 RTM_DELROUTE); 1516 ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c); 1517 } 1518 spin_unlock_bh(&mfc_unres_lock); 1519 } 1520 } 1521 1522 static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk) 1523 { 1524 int err = 0; 1525 struct net *net = sock_net(sk); 1526 1527 rtnl_lock(); 1528 write_lock_bh(&mrt_lock); 1529 if (rtnl_dereference(mrt->mroute_sk)) { 1530 err = -EADDRINUSE; 1531 } else { 1532 rcu_assign_pointer(mrt->mroute_sk, sk); 1533 sock_set_flag(sk, SOCK_RCU_FREE); 1534 net->ipv6.devconf_all->mc_forwarding++; 1535 } 1536 write_unlock_bh(&mrt_lock); 1537 1538 if (!err) 1539 inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, 1540 NETCONFA_MC_FORWARDING, 1541 NETCONFA_IFINDEX_ALL, 1542 net->ipv6.devconf_all); 1543 rtnl_unlock(); 1544 1545 return err; 1546 } 1547 1548 int ip6mr_sk_done(struct sock *sk) 1549 { 1550 int err = -EACCES; 1551 struct net *net = sock_net(sk); 1552 struct mr_table *mrt; 1553 1554 if (sk->sk_type != SOCK_RAW || 1555 inet_sk(sk)->inet_num != IPPROTO_ICMPV6) 1556 return err; 1557 1558 rtnl_lock(); 1559 ip6mr_for_each_table(mrt, net) { 1560 if (sk == rtnl_dereference(mrt->mroute_sk)) { 1561 write_lock_bh(&mrt_lock); 1562 RCU_INIT_POINTER(mrt->mroute_sk, NULL); 1563 /* Note that mroute_sk had SOCK_RCU_FREE set, 1564 * so the RCU grace period before sk freeing 1565 * is guaranteed by sk_destruct() 1566 */ 1567 net->ipv6.devconf_all->mc_forwarding--; 1568 write_unlock_bh(&mrt_lock); 1569 inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, 1570 NETCONFA_MC_FORWARDING, 1571 NETCONFA_IFINDEX_ALL, 1572 net->ipv6.devconf_all); 1573 1574 mroute_clean_tables(mrt, false); 1575 err = 0; 1576 break; 1577 } 1578 } 1579 rtnl_unlock(); 1580 1581 return err; 1582 } 1583 1584 bool mroute6_is_socket(struct net *net, struct sk_buff *skb) 1585 { 1586 struct mr_table *mrt; 1587 struct flowi6 fl6 = { 1588 .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX, 1589 .flowi6_oif = skb->dev->ifindex, 1590 .flowi6_mark = skb->mark, 1591 }; 1592 1593 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0) 1594 return NULL; 1595 1596 return rcu_access_pointer(mrt->mroute_sk); 1597 } 1598 EXPORT_SYMBOL(mroute6_is_socket); 1599 1600 /* 1601 * Socket options and virtual interface manipulation. The whole 1602 * virtual interface system is a complete heap, but unfortunately 1603 * that's how BSD mrouted happens to think. Maybe one day with a proper 1604 * MOSPF/PIM router set up we can clean this up. 1605 */ 1606 1607 int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen) 1608 { 1609 int ret, parent = 0; 1610 struct mif6ctl vif; 1611 struct mf6cctl mfc; 1612 mifi_t mifi; 1613 struct net *net = sock_net(sk); 1614 struct mr_table *mrt; 1615 1616 if (sk->sk_type != SOCK_RAW || 1617 inet_sk(sk)->inet_num != IPPROTO_ICMPV6) 1618 return -EOPNOTSUPP; 1619 1620 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); 1621 if (!mrt) 1622 return -ENOENT; 1623 1624 if (optname != MRT6_INIT) { 1625 if (sk != rcu_access_pointer(mrt->mroute_sk) && 1626 !ns_capable(net->user_ns, CAP_NET_ADMIN)) 1627 return -EACCES; 1628 } 1629 1630 switch (optname) { 1631 case MRT6_INIT: 1632 if (optlen < sizeof(int)) 1633 return -EINVAL; 1634 1635 return ip6mr_sk_init(mrt, sk); 1636 1637 case MRT6_DONE: 1638 return ip6mr_sk_done(sk); 1639 1640 case MRT6_ADD_MIF: 1641 if (optlen < sizeof(vif)) 1642 return -EINVAL; 1643 if (copy_from_user(&vif, optval, sizeof(vif))) 1644 return -EFAULT; 1645 if (vif.mif6c_mifi >= MAXMIFS) 1646 return -ENFILE; 1647 rtnl_lock(); 1648 ret = mif6_add(net, mrt, &vif, 1649 sk == rtnl_dereference(mrt->mroute_sk)); 1650 rtnl_unlock(); 1651 return ret; 1652 1653 case MRT6_DEL_MIF: 1654 if (optlen < sizeof(mifi_t)) 1655 return -EINVAL; 1656 if (copy_from_user(&mifi, optval, sizeof(mifi_t))) 1657 return -EFAULT; 1658 rtnl_lock(); 1659 ret = mif6_delete(mrt, mifi, 0, NULL); 1660 rtnl_unlock(); 1661 return ret; 1662 1663 /* 1664 * Manipulate the forwarding caches. These live 1665 * in a sort of kernel/user symbiosis. 1666 */ 1667 case MRT6_ADD_MFC: 1668 case MRT6_DEL_MFC: 1669 parent = -1; 1670 /* fall through */ 1671 case MRT6_ADD_MFC_PROXY: 1672 case MRT6_DEL_MFC_PROXY: 1673 if (optlen < sizeof(mfc)) 1674 return -EINVAL; 1675 if (copy_from_user(&mfc, optval, sizeof(mfc))) 1676 return -EFAULT; 1677 if (parent == 0) 1678 parent = mfc.mf6cc_parent; 1679 rtnl_lock(); 1680 if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY) 1681 ret = ip6mr_mfc_delete(mrt, &mfc, parent); 1682 else 1683 ret = ip6mr_mfc_add(net, mrt, &mfc, 1684 sk == 1685 rtnl_dereference(mrt->mroute_sk), 1686 parent); 1687 rtnl_unlock(); 1688 return ret; 1689 1690 /* 1691 * Control PIM assert (to activate pim will activate assert) 1692 */ 1693 case MRT6_ASSERT: 1694 { 1695 int v; 1696 1697 if (optlen != sizeof(v)) 1698 return -EINVAL; 1699 if (get_user(v, (int __user *)optval)) 1700 return -EFAULT; 1701 mrt->mroute_do_assert = v; 1702 return 0; 1703 } 1704 1705 #ifdef CONFIG_IPV6_PIMSM_V2 1706 case MRT6_PIM: 1707 { 1708 int v; 1709 1710 if (optlen != sizeof(v)) 1711 return -EINVAL; 1712 if (get_user(v, (int __user *)optval)) 1713 return -EFAULT; 1714 v = !!v; 1715 rtnl_lock(); 1716 ret = 0; 1717 if (v != mrt->mroute_do_pim) { 1718 mrt->mroute_do_pim = v; 1719 mrt->mroute_do_assert = v; 1720 } 1721 rtnl_unlock(); 1722 return ret; 1723 } 1724 1725 #endif 1726 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES 1727 case MRT6_TABLE: 1728 { 1729 u32 v; 1730 1731 if (optlen != sizeof(u32)) 1732 return -EINVAL; 1733 if (get_user(v, (u32 __user *)optval)) 1734 return -EFAULT; 1735 /* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */ 1736 if (v != RT_TABLE_DEFAULT && v >= 100000000) 1737 return -EINVAL; 1738 if (sk == rcu_access_pointer(mrt->mroute_sk)) 1739 return -EBUSY; 1740 1741 rtnl_lock(); 1742 ret = 0; 1743 mrt = ip6mr_new_table(net, v); 1744 if (IS_ERR(mrt)) 1745 ret = PTR_ERR(mrt); 1746 else 1747 raw6_sk(sk)->ip6mr_table = v; 1748 rtnl_unlock(); 1749 return ret; 1750 } 1751 #endif 1752 /* 1753 * Spurious command, or MRT6_VERSION which you cannot 1754 * set. 1755 */ 1756 default: 1757 return -ENOPROTOOPT; 1758 } 1759 } 1760 1761 /* 1762 * Getsock opt support for the multicast routing system. 1763 */ 1764 1765 int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, 1766 int __user *optlen) 1767 { 1768 int olr; 1769 int val; 1770 struct net *net = sock_net(sk); 1771 struct mr_table *mrt; 1772 1773 if (sk->sk_type != SOCK_RAW || 1774 inet_sk(sk)->inet_num != IPPROTO_ICMPV6) 1775 return -EOPNOTSUPP; 1776 1777 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); 1778 if (!mrt) 1779 return -ENOENT; 1780 1781 switch (optname) { 1782 case MRT6_VERSION: 1783 val = 0x0305; 1784 break; 1785 #ifdef CONFIG_IPV6_PIMSM_V2 1786 case MRT6_PIM: 1787 val = mrt->mroute_do_pim; 1788 break; 1789 #endif 1790 case MRT6_ASSERT: 1791 val = mrt->mroute_do_assert; 1792 break; 1793 default: 1794 return -ENOPROTOOPT; 1795 } 1796 1797 if (get_user(olr, optlen)) 1798 return -EFAULT; 1799 1800 olr = min_t(int, olr, sizeof(int)); 1801 if (olr < 0) 1802 return -EINVAL; 1803 1804 if (put_user(olr, optlen)) 1805 return -EFAULT; 1806 if (copy_to_user(optval, &val, olr)) 1807 return -EFAULT; 1808 return 0; 1809 } 1810 1811 /* 1812 * The IP multicast ioctl support routines. 1813 */ 1814 1815 int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg) 1816 { 1817 struct sioc_sg_req6 sr; 1818 struct sioc_mif_req6 vr; 1819 struct vif_device *vif; 1820 struct mfc6_cache *c; 1821 struct net *net = sock_net(sk); 1822 struct mr_table *mrt; 1823 1824 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); 1825 if (!mrt) 1826 return -ENOENT; 1827 1828 switch (cmd) { 1829 case SIOCGETMIFCNT_IN6: 1830 if (copy_from_user(&vr, arg, sizeof(vr))) 1831 return -EFAULT; 1832 if (vr.mifi >= mrt->maxvif) 1833 return -EINVAL; 1834 read_lock(&mrt_lock); 1835 vif = &mrt->vif_table[vr.mifi]; 1836 if (VIF_EXISTS(mrt, vr.mifi)) { 1837 vr.icount = vif->pkt_in; 1838 vr.ocount = vif->pkt_out; 1839 vr.ibytes = vif->bytes_in; 1840 vr.obytes = vif->bytes_out; 1841 read_unlock(&mrt_lock); 1842 1843 if (copy_to_user(arg, &vr, sizeof(vr))) 1844 return -EFAULT; 1845 return 0; 1846 } 1847 read_unlock(&mrt_lock); 1848 return -EADDRNOTAVAIL; 1849 case SIOCGETSGCNT_IN6: 1850 if (copy_from_user(&sr, arg, sizeof(sr))) 1851 return -EFAULT; 1852 1853 rcu_read_lock(); 1854 c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr); 1855 if (c) { 1856 sr.pktcnt = c->_c.mfc_un.res.pkt; 1857 sr.bytecnt = c->_c.mfc_un.res.bytes; 1858 sr.wrong_if = c->_c.mfc_un.res.wrong_if; 1859 rcu_read_unlock(); 1860 1861 if (copy_to_user(arg, &sr, sizeof(sr))) 1862 return -EFAULT; 1863 return 0; 1864 } 1865 rcu_read_unlock(); 1866 return -EADDRNOTAVAIL; 1867 default: 1868 return -ENOIOCTLCMD; 1869 } 1870 } 1871 1872 #ifdef CONFIG_COMPAT 1873 struct compat_sioc_sg_req6 { 1874 struct sockaddr_in6 src; 1875 struct sockaddr_in6 grp; 1876 compat_ulong_t pktcnt; 1877 compat_ulong_t bytecnt; 1878 compat_ulong_t wrong_if; 1879 }; 1880 1881 struct compat_sioc_mif_req6 { 1882 mifi_t mifi; 1883 compat_ulong_t icount; 1884 compat_ulong_t ocount; 1885 compat_ulong_t ibytes; 1886 compat_ulong_t obytes; 1887 }; 1888 1889 int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) 1890 { 1891 struct compat_sioc_sg_req6 sr; 1892 struct compat_sioc_mif_req6 vr; 1893 struct vif_device *vif; 1894 struct mfc6_cache *c; 1895 struct net *net = sock_net(sk); 1896 struct mr_table *mrt; 1897 1898 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); 1899 if (!mrt) 1900 return -ENOENT; 1901 1902 switch (cmd) { 1903 case SIOCGETMIFCNT_IN6: 1904 if (copy_from_user(&vr, arg, sizeof(vr))) 1905 return -EFAULT; 1906 if (vr.mifi >= mrt->maxvif) 1907 return -EINVAL; 1908 read_lock(&mrt_lock); 1909 vif = &mrt->vif_table[vr.mifi]; 1910 if (VIF_EXISTS(mrt, vr.mifi)) { 1911 vr.icount = vif->pkt_in; 1912 vr.ocount = vif->pkt_out; 1913 vr.ibytes = vif->bytes_in; 1914 vr.obytes = vif->bytes_out; 1915 read_unlock(&mrt_lock); 1916 1917 if (copy_to_user(arg, &vr, sizeof(vr))) 1918 return -EFAULT; 1919 return 0; 1920 } 1921 read_unlock(&mrt_lock); 1922 return -EADDRNOTAVAIL; 1923 case SIOCGETSGCNT_IN6: 1924 if (copy_from_user(&sr, arg, sizeof(sr))) 1925 return -EFAULT; 1926 1927 rcu_read_lock(); 1928 c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr); 1929 if (c) { 1930 sr.pktcnt = c->_c.mfc_un.res.pkt; 1931 sr.bytecnt = c->_c.mfc_un.res.bytes; 1932 sr.wrong_if = c->_c.mfc_un.res.wrong_if; 1933 rcu_read_unlock(); 1934 1935 if (copy_to_user(arg, &sr, sizeof(sr))) 1936 return -EFAULT; 1937 return 0; 1938 } 1939 rcu_read_unlock(); 1940 return -EADDRNOTAVAIL; 1941 default: 1942 return -ENOIOCTLCMD; 1943 } 1944 } 1945 #endif 1946 1947 static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb) 1948 { 1949 __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 1950 IPSTATS_MIB_OUTFORWDATAGRAMS); 1951 __IP6_ADD_STATS(net, ip6_dst_idev(skb_dst(skb)), 1952 IPSTATS_MIB_OUTOCTETS, skb->len); 1953 return dst_output(net, sk, skb); 1954 } 1955 1956 /* 1957 * Processing handlers for ip6mr_forward 1958 */ 1959 1960 static int ip6mr_forward2(struct net *net, struct mr_table *mrt, 1961 struct sk_buff *skb, struct mfc6_cache *c, int vifi) 1962 { 1963 struct ipv6hdr *ipv6h; 1964 struct vif_device *vif = &mrt->vif_table[vifi]; 1965 struct net_device *dev; 1966 struct dst_entry *dst; 1967 struct flowi6 fl6; 1968 1969 if (!vif->dev) 1970 goto out_free; 1971 1972 #ifdef CONFIG_IPV6_PIMSM_V2 1973 if (vif->flags & MIFF_REGISTER) { 1974 vif->pkt_out++; 1975 vif->bytes_out += skb->len; 1976 vif->dev->stats.tx_bytes += skb->len; 1977 vif->dev->stats.tx_packets++; 1978 ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT); 1979 goto out_free; 1980 } 1981 #endif 1982 1983 ipv6h = ipv6_hdr(skb); 1984 1985 fl6 = (struct flowi6) { 1986 .flowi6_oif = vif->link, 1987 .daddr = ipv6h->daddr, 1988 }; 1989 1990 dst = ip6_route_output(net, NULL, &fl6); 1991 if (dst->error) { 1992 dst_release(dst); 1993 goto out_free; 1994 } 1995 1996 skb_dst_drop(skb); 1997 skb_dst_set(skb, dst); 1998 1999 /* 2000 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally 2001 * not only before forwarding, but after forwarding on all output 2002 * interfaces. It is clear, if mrouter runs a multicasting 2003 * program, it should receive packets not depending to what interface 2004 * program is joined. 2005 * If we will not make it, the program will have to join on all 2006 * interfaces. On the other hand, multihoming host (or router, but 2007 * not mrouter) cannot join to more than one interface - it will 2008 * result in receiving multiple packets. 2009 */ 2010 dev = vif->dev; 2011 skb->dev = dev; 2012 vif->pkt_out++; 2013 vif->bytes_out += skb->len; 2014 2015 /* We are about to write */ 2016 /* XXX: extension headers? */ 2017 if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev))) 2018 goto out_free; 2019 2020 ipv6h = ipv6_hdr(skb); 2021 ipv6h->hop_limit--; 2022 2023 IP6CB(skb)->flags |= IP6SKB_FORWARDED; 2024 2025 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, 2026 net, NULL, skb, skb->dev, dev, 2027 ip6mr_forward2_finish); 2028 2029 out_free: 2030 kfree_skb(skb); 2031 return 0; 2032 } 2033 2034 static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev) 2035 { 2036 int ct; 2037 2038 for (ct = mrt->maxvif - 1; ct >= 0; ct--) { 2039 if (mrt->vif_table[ct].dev == dev) 2040 break; 2041 } 2042 return ct; 2043 } 2044 2045 static void ip6_mr_forward(struct net *net, struct mr_table *mrt, 2046 struct sk_buff *skb, struct mfc6_cache *c) 2047 { 2048 int psend = -1; 2049 int vif, ct; 2050 int true_vifi = ip6mr_find_vif(mrt, skb->dev); 2051 2052 vif = c->_c.mfc_parent; 2053 c->_c.mfc_un.res.pkt++; 2054 c->_c.mfc_un.res.bytes += skb->len; 2055 c->_c.mfc_un.res.lastuse = jiffies; 2056 2057 if (ipv6_addr_any(&c->mf6c_origin) && true_vifi >= 0) { 2058 struct mfc6_cache *cache_proxy; 2059 2060 /* For an (*,G) entry, we only check that the incoming 2061 * interface is part of the static tree. 2062 */ 2063 rcu_read_lock(); 2064 cache_proxy = mr_mfc_find_any_parent(mrt, vif); 2065 if (cache_proxy && 2066 cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255) { 2067 rcu_read_unlock(); 2068 goto forward; 2069 } 2070 rcu_read_unlock(); 2071 } 2072 2073 /* 2074 * Wrong interface: drop packet and (maybe) send PIM assert. 2075 */ 2076 if (mrt->vif_table[vif].dev != skb->dev) { 2077 c->_c.mfc_un.res.wrong_if++; 2078 2079 if (true_vifi >= 0 && mrt->mroute_do_assert && 2080 /* pimsm uses asserts, when switching from RPT to SPT, 2081 so that we cannot check that packet arrived on an oif. 2082 It is bad, but otherwise we would need to move pretty 2083 large chunk of pimd to kernel. Ough... --ANK 2084 */ 2085 (mrt->mroute_do_pim || 2086 c->_c.mfc_un.res.ttls[true_vifi] < 255) && 2087 time_after(jiffies, 2088 c->_c.mfc_un.res.last_assert + 2089 MFC_ASSERT_THRESH)) { 2090 c->_c.mfc_un.res.last_assert = jiffies; 2091 ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF); 2092 } 2093 goto dont_forward; 2094 } 2095 2096 forward: 2097 mrt->vif_table[vif].pkt_in++; 2098 mrt->vif_table[vif].bytes_in += skb->len; 2099 2100 /* 2101 * Forward the frame 2102 */ 2103 if (ipv6_addr_any(&c->mf6c_origin) && 2104 ipv6_addr_any(&c->mf6c_mcastgrp)) { 2105 if (true_vifi >= 0 && 2106 true_vifi != c->_c.mfc_parent && 2107 ipv6_hdr(skb)->hop_limit > 2108 c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) { 2109 /* It's an (*,*) entry and the packet is not coming from 2110 * the upstream: forward the packet to the upstream 2111 * only. 2112 */ 2113 psend = c->_c.mfc_parent; 2114 goto last_forward; 2115 } 2116 goto dont_forward; 2117 } 2118 for (ct = c->_c.mfc_un.res.maxvif - 1; 2119 ct >= c->_c.mfc_un.res.minvif; ct--) { 2120 /* For (*,G) entry, don't forward to the incoming interface */ 2121 if ((!ipv6_addr_any(&c->mf6c_origin) || ct != true_vifi) && 2122 ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) { 2123 if (psend != -1) { 2124 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 2125 if (skb2) 2126 ip6mr_forward2(net, mrt, skb2, 2127 c, psend); 2128 } 2129 psend = ct; 2130 } 2131 } 2132 last_forward: 2133 if (psend != -1) { 2134 ip6mr_forward2(net, mrt, skb, c, psend); 2135 return; 2136 } 2137 2138 dont_forward: 2139 kfree_skb(skb); 2140 } 2141 2142 2143 /* 2144 * Multicast packets for forwarding arrive here 2145 */ 2146 2147 int ip6_mr_input(struct sk_buff *skb) 2148 { 2149 struct mfc6_cache *cache; 2150 struct net *net = dev_net(skb->dev); 2151 struct mr_table *mrt; 2152 struct flowi6 fl6 = { 2153 .flowi6_iif = skb->dev->ifindex, 2154 .flowi6_mark = skb->mark, 2155 }; 2156 int err; 2157 2158 err = ip6mr_fib_lookup(net, &fl6, &mrt); 2159 if (err < 0) { 2160 kfree_skb(skb); 2161 return err; 2162 } 2163 2164 read_lock(&mrt_lock); 2165 cache = ip6mr_cache_find(mrt, 2166 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr); 2167 if (!cache) { 2168 int vif = ip6mr_find_vif(mrt, skb->dev); 2169 2170 if (vif >= 0) 2171 cache = ip6mr_cache_find_any(mrt, 2172 &ipv6_hdr(skb)->daddr, 2173 vif); 2174 } 2175 2176 /* 2177 * No usable cache entry 2178 */ 2179 if (!cache) { 2180 int vif; 2181 2182 vif = ip6mr_find_vif(mrt, skb->dev); 2183 if (vif >= 0) { 2184 int err = ip6mr_cache_unresolved(mrt, vif, skb); 2185 read_unlock(&mrt_lock); 2186 2187 return err; 2188 } 2189 read_unlock(&mrt_lock); 2190 kfree_skb(skb); 2191 return -ENODEV; 2192 } 2193 2194 ip6_mr_forward(net, mrt, skb, cache); 2195 2196 read_unlock(&mrt_lock); 2197 2198 return 0; 2199 } 2200 2201 int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm, 2202 u32 portid) 2203 { 2204 int err; 2205 struct mr_table *mrt; 2206 struct mfc6_cache *cache; 2207 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); 2208 2209 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT); 2210 if (!mrt) 2211 return -ENOENT; 2212 2213 read_lock(&mrt_lock); 2214 cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr); 2215 if (!cache && skb->dev) { 2216 int vif = ip6mr_find_vif(mrt, skb->dev); 2217 2218 if (vif >= 0) 2219 cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr, 2220 vif); 2221 } 2222 2223 if (!cache) { 2224 struct sk_buff *skb2; 2225 struct ipv6hdr *iph; 2226 struct net_device *dev; 2227 int vif; 2228 2229 dev = skb->dev; 2230 if (!dev || (vif = ip6mr_find_vif(mrt, dev)) < 0) { 2231 read_unlock(&mrt_lock); 2232 return -ENODEV; 2233 } 2234 2235 /* really correct? */ 2236 skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC); 2237 if (!skb2) { 2238 read_unlock(&mrt_lock); 2239 return -ENOMEM; 2240 } 2241 2242 NETLINK_CB(skb2).portid = portid; 2243 skb_reset_transport_header(skb2); 2244 2245 skb_put(skb2, sizeof(struct ipv6hdr)); 2246 skb_reset_network_header(skb2); 2247 2248 iph = ipv6_hdr(skb2); 2249 iph->version = 0; 2250 iph->priority = 0; 2251 iph->flow_lbl[0] = 0; 2252 iph->flow_lbl[1] = 0; 2253 iph->flow_lbl[2] = 0; 2254 iph->payload_len = 0; 2255 iph->nexthdr = IPPROTO_NONE; 2256 iph->hop_limit = 0; 2257 iph->saddr = rt->rt6i_src.addr; 2258 iph->daddr = rt->rt6i_dst.addr; 2259 2260 err = ip6mr_cache_unresolved(mrt, vif, skb2); 2261 read_unlock(&mrt_lock); 2262 2263 return err; 2264 } 2265 2266 err = mr_fill_mroute(mrt, skb, &cache->_c, rtm); 2267 read_unlock(&mrt_lock); 2268 return err; 2269 } 2270 2271 static int ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, 2272 u32 portid, u32 seq, struct mfc6_cache *c, int cmd, 2273 int flags) 2274 { 2275 struct nlmsghdr *nlh; 2276 struct rtmsg *rtm; 2277 int err; 2278 2279 nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags); 2280 if (!nlh) 2281 return -EMSGSIZE; 2282 2283 rtm = nlmsg_data(nlh); 2284 rtm->rtm_family = RTNL_FAMILY_IP6MR; 2285 rtm->rtm_dst_len = 128; 2286 rtm->rtm_src_len = 128; 2287 rtm->rtm_tos = 0; 2288 rtm->rtm_table = mrt->id; 2289 if (nla_put_u32(skb, RTA_TABLE, mrt->id)) 2290 goto nla_put_failure; 2291 rtm->rtm_type = RTN_MULTICAST; 2292 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 2293 if (c->_c.mfc_flags & MFC_STATIC) 2294 rtm->rtm_protocol = RTPROT_STATIC; 2295 else 2296 rtm->rtm_protocol = RTPROT_MROUTED; 2297 rtm->rtm_flags = 0; 2298 2299 if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) || 2300 nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp)) 2301 goto nla_put_failure; 2302 err = mr_fill_mroute(mrt, skb, &c->_c, rtm); 2303 /* do not break the dump if cache is unresolved */ 2304 if (err < 0 && err != -ENOENT) 2305 goto nla_put_failure; 2306 2307 nlmsg_end(skb, nlh); 2308 return 0; 2309 2310 nla_put_failure: 2311 nlmsg_cancel(skb, nlh); 2312 return -EMSGSIZE; 2313 } 2314 2315 static int _ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, 2316 u32 portid, u32 seq, struct mr_mfc *c, 2317 int cmd, int flags) 2318 { 2319 return ip6mr_fill_mroute(mrt, skb, portid, seq, (struct mfc6_cache *)c, 2320 cmd, flags); 2321 } 2322 2323 static int mr6_msgsize(bool unresolved, int maxvif) 2324 { 2325 size_t len = 2326 NLMSG_ALIGN(sizeof(struct rtmsg)) 2327 + nla_total_size(4) /* RTA_TABLE */ 2328 + nla_total_size(sizeof(struct in6_addr)) /* RTA_SRC */ 2329 + nla_total_size(sizeof(struct in6_addr)) /* RTA_DST */ 2330 ; 2331 2332 if (!unresolved) 2333 len = len 2334 + nla_total_size(4) /* RTA_IIF */ 2335 + nla_total_size(0) /* RTA_MULTIPATH */ 2336 + maxvif * NLA_ALIGN(sizeof(struct rtnexthop)) 2337 /* RTA_MFC_STATS */ 2338 + nla_total_size_64bit(sizeof(struct rta_mfc_stats)) 2339 ; 2340 2341 return len; 2342 } 2343 2344 static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc, 2345 int cmd) 2346 { 2347 struct net *net = read_pnet(&mrt->net); 2348 struct sk_buff *skb; 2349 int err = -ENOBUFS; 2350 2351 skb = nlmsg_new(mr6_msgsize(mfc->_c.mfc_parent >= MAXMIFS, mrt->maxvif), 2352 GFP_ATOMIC); 2353 if (!skb) 2354 goto errout; 2355 2356 err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0); 2357 if (err < 0) 2358 goto errout; 2359 2360 rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC); 2361 return; 2362 2363 errout: 2364 kfree_skb(skb); 2365 if (err < 0) 2366 rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err); 2367 } 2368 2369 static size_t mrt6msg_netlink_msgsize(size_t payloadlen) 2370 { 2371 size_t len = 2372 NLMSG_ALIGN(sizeof(struct rtgenmsg)) 2373 + nla_total_size(1) /* IP6MRA_CREPORT_MSGTYPE */ 2374 + nla_total_size(4) /* IP6MRA_CREPORT_MIF_ID */ 2375 /* IP6MRA_CREPORT_SRC_ADDR */ 2376 + nla_total_size(sizeof(struct in6_addr)) 2377 /* IP6MRA_CREPORT_DST_ADDR */ 2378 + nla_total_size(sizeof(struct in6_addr)) 2379 /* IP6MRA_CREPORT_PKT */ 2380 + nla_total_size(payloadlen) 2381 ; 2382 2383 return len; 2384 } 2385 2386 static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt) 2387 { 2388 struct net *net = read_pnet(&mrt->net); 2389 struct nlmsghdr *nlh; 2390 struct rtgenmsg *rtgenm; 2391 struct mrt6msg *msg; 2392 struct sk_buff *skb; 2393 struct nlattr *nla; 2394 int payloadlen; 2395 2396 payloadlen = pkt->len - sizeof(struct mrt6msg); 2397 msg = (struct mrt6msg *)skb_transport_header(pkt); 2398 2399 skb = nlmsg_new(mrt6msg_netlink_msgsize(payloadlen), GFP_ATOMIC); 2400 if (!skb) 2401 goto errout; 2402 2403 nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT, 2404 sizeof(struct rtgenmsg), 0); 2405 if (!nlh) 2406 goto errout; 2407 rtgenm = nlmsg_data(nlh); 2408 rtgenm->rtgen_family = RTNL_FAMILY_IP6MR; 2409 if (nla_put_u8(skb, IP6MRA_CREPORT_MSGTYPE, msg->im6_msgtype) || 2410 nla_put_u32(skb, IP6MRA_CREPORT_MIF_ID, msg->im6_mif) || 2411 nla_put_in6_addr(skb, IP6MRA_CREPORT_SRC_ADDR, 2412 &msg->im6_src) || 2413 nla_put_in6_addr(skb, IP6MRA_CREPORT_DST_ADDR, 2414 &msg->im6_dst)) 2415 goto nla_put_failure; 2416 2417 nla = nla_reserve(skb, IP6MRA_CREPORT_PKT, payloadlen); 2418 if (!nla || skb_copy_bits(pkt, sizeof(struct mrt6msg), 2419 nla_data(nla), payloadlen)) 2420 goto nla_put_failure; 2421 2422 nlmsg_end(skb, nlh); 2423 2424 rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE_R, NULL, GFP_ATOMIC); 2425 return; 2426 2427 nla_put_failure: 2428 nlmsg_cancel(skb, nlh); 2429 errout: 2430 kfree_skb(skb); 2431 rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE_R, -ENOBUFS); 2432 } 2433 2434 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) 2435 { 2436 return mr_rtm_dumproute(skb, cb, ip6mr_mr_table_iter, 2437 _ip6mr_fill_mroute, &mfc_unres_lock); 2438 } 2439