1 /* 2 * Linux IPv6 multicast routing support for BSD pim6sd 3 * Based on net/ipv4/ipmr.c. 4 * 5 * (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr> 6 * LSIIT Laboratory, Strasbourg, France 7 * (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com> 8 * 6WIND, Paris, France 9 * Copyright (C)2007,2008 USAGI/WIDE Project 10 * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org> 11 * 12 * This program is free software; you can redistribute it and/or 13 * modify it under the terms of the GNU General Public License 14 * as published by the Free Software Foundation; either version 15 * 2 of the License, or (at your option) any later version. 16 * 17 */ 18 19 #include <linux/uaccess.h> 20 #include <linux/types.h> 21 #include <linux/sched.h> 22 #include <linux/errno.h> 23 #include <linux/mm.h> 24 #include <linux/kernel.h> 25 #include <linux/fcntl.h> 26 #include <linux/stat.h> 27 #include <linux/socket.h> 28 #include <linux/inet.h> 29 #include <linux/netdevice.h> 30 #include <linux/inetdevice.h> 31 #include <linux/proc_fs.h> 32 #include <linux/seq_file.h> 33 #include <linux/init.h> 34 #include <linux/compat.h> 35 #include <net/protocol.h> 36 #include <linux/skbuff.h> 37 #include <net/raw.h> 38 #include <linux/notifier.h> 39 #include <linux/if_arp.h> 40 #include <net/checksum.h> 41 #include <net/netlink.h> 42 #include <net/fib_rules.h> 43 44 #include <net/ipv6.h> 45 #include <net/ip6_route.h> 46 #include <linux/mroute6.h> 47 #include <linux/pim.h> 48 #include <net/addrconf.h> 49 #include <linux/netfilter_ipv6.h> 50 #include <linux/export.h> 51 #include <net/ip6_checksum.h> 52 #include <linux/netconf.h> 53 54 struct ip6mr_rule { 55 struct fib_rule common; 56 }; 57 58 struct ip6mr_result { 59 struct mr_table *mrt; 60 }; 61 62 /* Big lock, protecting vif table, mrt cache and mroute socket state. 63 Note that the changes are semaphored via rtnl_lock. 64 */ 65 66 static DEFINE_RWLOCK(mrt_lock); 67 68 /* Multicast router control variables */ 69 70 /* Special spinlock for queue of unresolved entries */ 71 static DEFINE_SPINLOCK(mfc_unres_lock); 72 73 /* We return to original Alan's scheme. Hash table of resolved 74 entries is changed only in process context and protected 75 with weak lock mrt_lock. Queue of unresolved entries is protected 76 with strong spinlock mfc_unres_lock. 77 78 In this case data path is free of exclusive locks at all. 79 */ 80 81 static struct kmem_cache *mrt_cachep __read_mostly; 82 83 static struct mr_table *ip6mr_new_table(struct net *net, u32 id); 84 static void ip6mr_free_table(struct mr_table *mrt); 85 86 static void ip6_mr_forward(struct net *net, struct mr_table *mrt, 87 struct sk_buff *skb, struct mfc6_cache *cache); 88 static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt, 89 mifi_t mifi, int assert); 90 static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc, 91 int cmd); 92 static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt); 93 static int ip6mr_rtm_dumproute(struct sk_buff *skb, 94 struct netlink_callback *cb); 95 static void mroute_clean_tables(struct mr_table *mrt, bool all); 96 static void ipmr_expire_process(struct timer_list *t); 97 98 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES 99 #define ip6mr_for_each_table(mrt, net) \ 100 list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list) 101 102 static struct mr_table *ip6mr_mr_table_iter(struct net *net, 103 struct mr_table *mrt) 104 { 105 struct mr_table *ret; 106 107 if (!mrt) 108 ret = list_entry_rcu(net->ipv6.mr6_tables.next, 109 struct mr_table, list); 110 else 111 ret = list_entry_rcu(mrt->list.next, 112 struct mr_table, list); 113 114 if (&ret->list == &net->ipv6.mr6_tables) 115 return NULL; 116 return ret; 117 } 118 119 static struct mr_table *ip6mr_get_table(struct net *net, u32 id) 120 { 121 struct mr_table *mrt; 122 123 ip6mr_for_each_table(mrt, net) { 124 if (mrt->id == id) 125 return mrt; 126 } 127 return NULL; 128 } 129 130 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6, 131 struct mr_table **mrt) 132 { 133 int err; 134 struct ip6mr_result res; 135 struct fib_lookup_arg arg = { 136 .result = &res, 137 .flags = FIB_LOOKUP_NOREF, 138 }; 139 140 err = fib_rules_lookup(net->ipv6.mr6_rules_ops, 141 flowi6_to_flowi(flp6), 0, &arg); 142 if (err < 0) 143 return err; 144 *mrt = res.mrt; 145 return 0; 146 } 147 148 static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp, 149 int flags, struct fib_lookup_arg *arg) 150 { 151 struct ip6mr_result *res = arg->result; 152 struct mr_table *mrt; 153 154 switch (rule->action) { 155 case FR_ACT_TO_TBL: 156 break; 157 case FR_ACT_UNREACHABLE: 158 return -ENETUNREACH; 159 case FR_ACT_PROHIBIT: 160 return -EACCES; 161 case FR_ACT_BLACKHOLE: 162 default: 163 return -EINVAL; 164 } 165 166 mrt = ip6mr_get_table(rule->fr_net, rule->table); 167 if (!mrt) 168 return -EAGAIN; 169 res->mrt = mrt; 170 return 0; 171 } 172 173 static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags) 174 { 175 return 1; 176 } 177 178 static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = { 179 FRA_GENERIC_POLICY, 180 }; 181 182 static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb, 183 struct fib_rule_hdr *frh, struct nlattr **tb, 184 struct netlink_ext_ack *extack) 185 { 186 return 0; 187 } 188 189 static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, 190 struct nlattr **tb) 191 { 192 return 1; 193 } 194 195 static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb, 196 struct fib_rule_hdr *frh) 197 { 198 frh->dst_len = 0; 199 frh->src_len = 0; 200 frh->tos = 0; 201 return 0; 202 } 203 204 static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = { 205 .family = RTNL_FAMILY_IP6MR, 206 .rule_size = sizeof(struct ip6mr_rule), 207 .addr_size = sizeof(struct in6_addr), 208 .action = ip6mr_rule_action, 209 .match = ip6mr_rule_match, 210 .configure = ip6mr_rule_configure, 211 .compare = ip6mr_rule_compare, 212 .fill = ip6mr_rule_fill, 213 .nlgroup = RTNLGRP_IPV6_RULE, 214 .policy = ip6mr_rule_policy, 215 .owner = THIS_MODULE, 216 }; 217 218 static int __net_init ip6mr_rules_init(struct net *net) 219 { 220 struct fib_rules_ops *ops; 221 struct mr_table *mrt; 222 int err; 223 224 ops = fib_rules_register(&ip6mr_rules_ops_template, net); 225 if (IS_ERR(ops)) 226 return PTR_ERR(ops); 227 228 INIT_LIST_HEAD(&net->ipv6.mr6_tables); 229 230 mrt = ip6mr_new_table(net, RT6_TABLE_DFLT); 231 if (!mrt) { 232 err = -ENOMEM; 233 goto err1; 234 } 235 236 err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0); 237 if (err < 0) 238 goto err2; 239 240 net->ipv6.mr6_rules_ops = ops; 241 return 0; 242 243 err2: 244 ip6mr_free_table(mrt); 245 err1: 246 fib_rules_unregister(ops); 247 return err; 248 } 249 250 static void __net_exit ip6mr_rules_exit(struct net *net) 251 { 252 struct mr_table *mrt, *next; 253 254 rtnl_lock(); 255 list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) { 256 list_del(&mrt->list); 257 ip6mr_free_table(mrt); 258 } 259 fib_rules_unregister(net->ipv6.mr6_rules_ops); 260 rtnl_unlock(); 261 } 262 263 static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb) 264 { 265 return fib_rules_dump(net, nb, RTNL_FAMILY_IP6MR); 266 } 267 268 static unsigned int ip6mr_rules_seq_read(struct net *net) 269 { 270 return fib_rules_seq_read(net, RTNL_FAMILY_IP6MR); 271 } 272 273 bool ip6mr_rule_default(const struct fib_rule *rule) 274 { 275 return fib_rule_matchall(rule) && rule->action == FR_ACT_TO_TBL && 276 rule->table == RT6_TABLE_DFLT && !rule->l3mdev; 277 } 278 EXPORT_SYMBOL(ip6mr_rule_default); 279 #else 280 #define ip6mr_for_each_table(mrt, net) \ 281 for (mrt = net->ipv6.mrt6; mrt; mrt = NULL) 282 283 static struct mr_table *ip6mr_mr_table_iter(struct net *net, 284 struct mr_table *mrt) 285 { 286 if (!mrt) 287 return net->ipv6.mrt6; 288 return NULL; 289 } 290 291 static struct mr_table *ip6mr_get_table(struct net *net, u32 id) 292 { 293 return net->ipv6.mrt6; 294 } 295 296 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6, 297 struct mr_table **mrt) 298 { 299 *mrt = net->ipv6.mrt6; 300 return 0; 301 } 302 303 static int __net_init ip6mr_rules_init(struct net *net) 304 { 305 net->ipv6.mrt6 = ip6mr_new_table(net, RT6_TABLE_DFLT); 306 return net->ipv6.mrt6 ? 0 : -ENOMEM; 307 } 308 309 static void __net_exit ip6mr_rules_exit(struct net *net) 310 { 311 rtnl_lock(); 312 ip6mr_free_table(net->ipv6.mrt6); 313 net->ipv6.mrt6 = NULL; 314 rtnl_unlock(); 315 } 316 317 static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb) 318 { 319 return 0; 320 } 321 322 static unsigned int ip6mr_rules_seq_read(struct net *net) 323 { 324 return 0; 325 } 326 #endif 327 328 static int ip6mr_hash_cmp(struct rhashtable_compare_arg *arg, 329 const void *ptr) 330 { 331 const struct mfc6_cache_cmp_arg *cmparg = arg->key; 332 struct mfc6_cache *c = (struct mfc6_cache *)ptr; 333 334 return !ipv6_addr_equal(&c->mf6c_mcastgrp, &cmparg->mf6c_mcastgrp) || 335 !ipv6_addr_equal(&c->mf6c_origin, &cmparg->mf6c_origin); 336 } 337 338 static const struct rhashtable_params ip6mr_rht_params = { 339 .head_offset = offsetof(struct mr_mfc, mnode), 340 .key_offset = offsetof(struct mfc6_cache, cmparg), 341 .key_len = sizeof(struct mfc6_cache_cmp_arg), 342 .nelem_hint = 3, 343 .locks_mul = 1, 344 .obj_cmpfn = ip6mr_hash_cmp, 345 .automatic_shrinking = true, 346 }; 347 348 static void ip6mr_new_table_set(struct mr_table *mrt, 349 struct net *net) 350 { 351 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES 352 list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables); 353 #endif 354 } 355 356 static struct mfc6_cache_cmp_arg ip6mr_mr_table_ops_cmparg_any = { 357 .mf6c_origin = IN6ADDR_ANY_INIT, 358 .mf6c_mcastgrp = IN6ADDR_ANY_INIT, 359 }; 360 361 static struct mr_table_ops ip6mr_mr_table_ops = { 362 .rht_params = &ip6mr_rht_params, 363 .cmparg_any = &ip6mr_mr_table_ops_cmparg_any, 364 }; 365 366 static struct mr_table *ip6mr_new_table(struct net *net, u32 id) 367 { 368 struct mr_table *mrt; 369 370 mrt = ip6mr_get_table(net, id); 371 if (mrt) 372 return mrt; 373 374 return mr_table_alloc(net, id, &ip6mr_mr_table_ops, 375 ipmr_expire_process, ip6mr_new_table_set); 376 } 377 378 static void ip6mr_free_table(struct mr_table *mrt) 379 { 380 del_timer_sync(&mrt->ipmr_expire_timer); 381 mroute_clean_tables(mrt, true); 382 rhltable_destroy(&mrt->mfc_hash); 383 kfree(mrt); 384 } 385 386 #ifdef CONFIG_PROC_FS 387 /* The /proc interfaces to multicast routing 388 * /proc/ip6_mr_cache /proc/ip6_mr_vif 389 */ 390 391 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos) 392 __acquires(mrt_lock) 393 { 394 struct mr_vif_iter *iter = seq->private; 395 struct net *net = seq_file_net(seq); 396 struct mr_table *mrt; 397 398 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT); 399 if (!mrt) 400 return ERR_PTR(-ENOENT); 401 402 iter->mrt = mrt; 403 404 read_lock(&mrt_lock); 405 return mr_vif_seq_start(seq, pos); 406 } 407 408 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v) 409 __releases(mrt_lock) 410 { 411 read_unlock(&mrt_lock); 412 } 413 414 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v) 415 { 416 struct mr_vif_iter *iter = seq->private; 417 struct mr_table *mrt = iter->mrt; 418 419 if (v == SEQ_START_TOKEN) { 420 seq_puts(seq, 421 "Interface BytesIn PktsIn BytesOut PktsOut Flags\n"); 422 } else { 423 const struct vif_device *vif = v; 424 const char *name = vif->dev ? vif->dev->name : "none"; 425 426 seq_printf(seq, 427 "%2td %-10s %8ld %7ld %8ld %7ld %05X\n", 428 vif - mrt->vif_table, 429 name, vif->bytes_in, vif->pkt_in, 430 vif->bytes_out, vif->pkt_out, 431 vif->flags); 432 } 433 return 0; 434 } 435 436 static const struct seq_operations ip6mr_vif_seq_ops = { 437 .start = ip6mr_vif_seq_start, 438 .next = mr_vif_seq_next, 439 .stop = ip6mr_vif_seq_stop, 440 .show = ip6mr_vif_seq_show, 441 }; 442 443 static int ip6mr_vif_open(struct inode *inode, struct file *file) 444 { 445 return seq_open_net(inode, file, &ip6mr_vif_seq_ops, 446 sizeof(struct mr_vif_iter)); 447 } 448 449 static const struct file_operations ip6mr_vif_fops = { 450 .open = ip6mr_vif_open, 451 .read = seq_read, 452 .llseek = seq_lseek, 453 .release = seq_release_net, 454 }; 455 456 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) 457 { 458 struct net *net = seq_file_net(seq); 459 struct mr_table *mrt; 460 461 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT); 462 if (!mrt) 463 return ERR_PTR(-ENOENT); 464 465 return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock); 466 } 467 468 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) 469 { 470 int n; 471 472 if (v == SEQ_START_TOKEN) { 473 seq_puts(seq, 474 "Group " 475 "Origin " 476 "Iif Pkts Bytes Wrong Oifs\n"); 477 } else { 478 const struct mfc6_cache *mfc = v; 479 const struct mr_mfc_iter *it = seq->private; 480 struct mr_table *mrt = it->mrt; 481 482 seq_printf(seq, "%pI6 %pI6 %-3hd", 483 &mfc->mf6c_mcastgrp, &mfc->mf6c_origin, 484 mfc->_c.mfc_parent); 485 486 if (it->cache != &mrt->mfc_unres_queue) { 487 seq_printf(seq, " %8lu %8lu %8lu", 488 mfc->_c.mfc_un.res.pkt, 489 mfc->_c.mfc_un.res.bytes, 490 mfc->_c.mfc_un.res.wrong_if); 491 for (n = mfc->_c.mfc_un.res.minvif; 492 n < mfc->_c.mfc_un.res.maxvif; n++) { 493 if (VIF_EXISTS(mrt, n) && 494 mfc->_c.mfc_un.res.ttls[n] < 255) 495 seq_printf(seq, 496 " %2d:%-3d", n, 497 mfc->_c.mfc_un.res.ttls[n]); 498 } 499 } else { 500 /* unresolved mfc_caches don't contain 501 * pkt, bytes and wrong_if values 502 */ 503 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul); 504 } 505 seq_putc(seq, '\n'); 506 } 507 return 0; 508 } 509 510 static const struct seq_operations ipmr_mfc_seq_ops = { 511 .start = ipmr_mfc_seq_start, 512 .next = mr_mfc_seq_next, 513 .stop = mr_mfc_seq_stop, 514 .show = ipmr_mfc_seq_show, 515 }; 516 517 static int ipmr_mfc_open(struct inode *inode, struct file *file) 518 { 519 return seq_open_net(inode, file, &ipmr_mfc_seq_ops, 520 sizeof(struct mr_mfc_iter)); 521 } 522 523 static const struct file_operations ip6mr_mfc_fops = { 524 .open = ipmr_mfc_open, 525 .read = seq_read, 526 .llseek = seq_lseek, 527 .release = seq_release_net, 528 }; 529 #endif 530 531 #ifdef CONFIG_IPV6_PIMSM_V2 532 533 static int pim6_rcv(struct sk_buff *skb) 534 { 535 struct pimreghdr *pim; 536 struct ipv6hdr *encap; 537 struct net_device *reg_dev = NULL; 538 struct net *net = dev_net(skb->dev); 539 struct mr_table *mrt; 540 struct flowi6 fl6 = { 541 .flowi6_iif = skb->dev->ifindex, 542 .flowi6_mark = skb->mark, 543 }; 544 int reg_vif_num; 545 546 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap))) 547 goto drop; 548 549 pim = (struct pimreghdr *)skb_transport_header(skb); 550 if (pim->type != ((PIM_VERSION << 4) | PIM_TYPE_REGISTER) || 551 (pim->flags & PIM_NULL_REGISTER) || 552 (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, 553 sizeof(*pim), IPPROTO_PIM, 554 csum_partial((void *)pim, sizeof(*pim), 0)) && 555 csum_fold(skb_checksum(skb, 0, skb->len, 0)))) 556 goto drop; 557 558 /* check if the inner packet is destined to mcast group */ 559 encap = (struct ipv6hdr *)(skb_transport_header(skb) + 560 sizeof(*pim)); 561 562 if (!ipv6_addr_is_multicast(&encap->daddr) || 563 encap->payload_len == 0 || 564 ntohs(encap->payload_len) + sizeof(*pim) > skb->len) 565 goto drop; 566 567 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0) 568 goto drop; 569 reg_vif_num = mrt->mroute_reg_vif_num; 570 571 read_lock(&mrt_lock); 572 if (reg_vif_num >= 0) 573 reg_dev = mrt->vif_table[reg_vif_num].dev; 574 if (reg_dev) 575 dev_hold(reg_dev); 576 read_unlock(&mrt_lock); 577 578 if (!reg_dev) 579 goto drop; 580 581 skb->mac_header = skb->network_header; 582 skb_pull(skb, (u8 *)encap - skb->data); 583 skb_reset_network_header(skb); 584 skb->protocol = htons(ETH_P_IPV6); 585 skb->ip_summed = CHECKSUM_NONE; 586 587 skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev)); 588 589 netif_rx(skb); 590 591 dev_put(reg_dev); 592 return 0; 593 drop: 594 kfree_skb(skb); 595 return 0; 596 } 597 598 static const struct inet6_protocol pim6_protocol = { 599 .handler = pim6_rcv, 600 }; 601 602 /* Service routines creating virtual interfaces: PIMREG */ 603 604 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, 605 struct net_device *dev) 606 { 607 struct net *net = dev_net(dev); 608 struct mr_table *mrt; 609 struct flowi6 fl6 = { 610 .flowi6_oif = dev->ifindex, 611 .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX, 612 .flowi6_mark = skb->mark, 613 }; 614 int err; 615 616 err = ip6mr_fib_lookup(net, &fl6, &mrt); 617 if (err < 0) { 618 kfree_skb(skb); 619 return err; 620 } 621 622 read_lock(&mrt_lock); 623 dev->stats.tx_bytes += skb->len; 624 dev->stats.tx_packets++; 625 ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT); 626 read_unlock(&mrt_lock); 627 kfree_skb(skb); 628 return NETDEV_TX_OK; 629 } 630 631 static int reg_vif_get_iflink(const struct net_device *dev) 632 { 633 return 0; 634 } 635 636 static const struct net_device_ops reg_vif_netdev_ops = { 637 .ndo_start_xmit = reg_vif_xmit, 638 .ndo_get_iflink = reg_vif_get_iflink, 639 }; 640 641 static void reg_vif_setup(struct net_device *dev) 642 { 643 dev->type = ARPHRD_PIMREG; 644 dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8; 645 dev->flags = IFF_NOARP; 646 dev->netdev_ops = ®_vif_netdev_ops; 647 dev->needs_free_netdev = true; 648 dev->features |= NETIF_F_NETNS_LOCAL; 649 } 650 651 static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt) 652 { 653 struct net_device *dev; 654 char name[IFNAMSIZ]; 655 656 if (mrt->id == RT6_TABLE_DFLT) 657 sprintf(name, "pim6reg"); 658 else 659 sprintf(name, "pim6reg%u", mrt->id); 660 661 dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup); 662 if (!dev) 663 return NULL; 664 665 dev_net_set(dev, net); 666 667 if (register_netdevice(dev)) { 668 free_netdev(dev); 669 return NULL; 670 } 671 672 if (dev_open(dev)) 673 goto failure; 674 675 dev_hold(dev); 676 return dev; 677 678 failure: 679 unregister_netdevice(dev); 680 return NULL; 681 } 682 #endif 683 684 static int call_ip6mr_vif_entry_notifiers(struct net *net, 685 enum fib_event_type event_type, 686 struct vif_device *vif, 687 mifi_t vif_index, u32 tb_id) 688 { 689 return mr_call_vif_notifiers(net, RTNL_FAMILY_IP6MR, event_type, 690 vif, vif_index, tb_id, 691 &net->ipv6.ipmr_seq); 692 } 693 694 static int call_ip6mr_mfc_entry_notifiers(struct net *net, 695 enum fib_event_type event_type, 696 struct mfc6_cache *mfc, u32 tb_id) 697 { 698 return mr_call_mfc_notifiers(net, RTNL_FAMILY_IP6MR, event_type, 699 &mfc->_c, tb_id, &net->ipv6.ipmr_seq); 700 } 701 702 /* Delete a VIF entry */ 703 static int mif6_delete(struct mr_table *mrt, int vifi, int notify, 704 struct list_head *head) 705 { 706 struct vif_device *v; 707 struct net_device *dev; 708 struct inet6_dev *in6_dev; 709 710 if (vifi < 0 || vifi >= mrt->maxvif) 711 return -EADDRNOTAVAIL; 712 713 v = &mrt->vif_table[vifi]; 714 715 if (VIF_EXISTS(mrt, vifi)) 716 call_ip6mr_vif_entry_notifiers(read_pnet(&mrt->net), 717 FIB_EVENT_VIF_DEL, v, vifi, 718 mrt->id); 719 720 write_lock_bh(&mrt_lock); 721 dev = v->dev; 722 v->dev = NULL; 723 724 if (!dev) { 725 write_unlock_bh(&mrt_lock); 726 return -EADDRNOTAVAIL; 727 } 728 729 #ifdef CONFIG_IPV6_PIMSM_V2 730 if (vifi == mrt->mroute_reg_vif_num) 731 mrt->mroute_reg_vif_num = -1; 732 #endif 733 734 if (vifi + 1 == mrt->maxvif) { 735 int tmp; 736 for (tmp = vifi - 1; tmp >= 0; tmp--) { 737 if (VIF_EXISTS(mrt, tmp)) 738 break; 739 } 740 mrt->maxvif = tmp + 1; 741 } 742 743 write_unlock_bh(&mrt_lock); 744 745 dev_set_allmulti(dev, -1); 746 747 in6_dev = __in6_dev_get(dev); 748 if (in6_dev) { 749 in6_dev->cnf.mc_forwarding--; 750 inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, 751 NETCONFA_MC_FORWARDING, 752 dev->ifindex, &in6_dev->cnf); 753 } 754 755 if ((v->flags & MIFF_REGISTER) && !notify) 756 unregister_netdevice_queue(dev, head); 757 758 dev_put(dev); 759 return 0; 760 } 761 762 static inline void ip6mr_cache_free_rcu(struct rcu_head *head) 763 { 764 struct mr_mfc *c = container_of(head, struct mr_mfc, rcu); 765 766 kmem_cache_free(mrt_cachep, (struct mfc6_cache *)c); 767 } 768 769 static inline void ip6mr_cache_free(struct mfc6_cache *c) 770 { 771 call_rcu(&c->_c.rcu, ip6mr_cache_free_rcu); 772 } 773 774 /* Destroy an unresolved cache entry, killing queued skbs 775 and reporting error to netlink readers. 776 */ 777 778 static void ip6mr_destroy_unres(struct mr_table *mrt, struct mfc6_cache *c) 779 { 780 struct net *net = read_pnet(&mrt->net); 781 struct sk_buff *skb; 782 783 atomic_dec(&mrt->cache_resolve_queue_len); 784 785 while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved)) != NULL) { 786 if (ipv6_hdr(skb)->version == 0) { 787 struct nlmsghdr *nlh = skb_pull(skb, 788 sizeof(struct ipv6hdr)); 789 nlh->nlmsg_type = NLMSG_ERROR; 790 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr)); 791 skb_trim(skb, nlh->nlmsg_len); 792 ((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT; 793 rtnl_unicast(skb, net, NETLINK_CB(skb).portid); 794 } else 795 kfree_skb(skb); 796 } 797 798 ip6mr_cache_free(c); 799 } 800 801 802 /* Timer process for all the unresolved queue. */ 803 804 static void ipmr_do_expire_process(struct mr_table *mrt) 805 { 806 unsigned long now = jiffies; 807 unsigned long expires = 10 * HZ; 808 struct mr_mfc *c, *next; 809 810 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) { 811 if (time_after(c->mfc_un.unres.expires, now)) { 812 /* not yet... */ 813 unsigned long interval = c->mfc_un.unres.expires - now; 814 if (interval < expires) 815 expires = interval; 816 continue; 817 } 818 819 list_del(&c->list); 820 mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE); 821 ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c); 822 } 823 824 if (!list_empty(&mrt->mfc_unres_queue)) 825 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires); 826 } 827 828 static void ipmr_expire_process(struct timer_list *t) 829 { 830 struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer); 831 832 if (!spin_trylock(&mfc_unres_lock)) { 833 mod_timer(&mrt->ipmr_expire_timer, jiffies + 1); 834 return; 835 } 836 837 if (!list_empty(&mrt->mfc_unres_queue)) 838 ipmr_do_expire_process(mrt); 839 840 spin_unlock(&mfc_unres_lock); 841 } 842 843 /* Fill oifs list. It is called under write locked mrt_lock. */ 844 845 static void ip6mr_update_thresholds(struct mr_table *mrt, 846 struct mr_mfc *cache, 847 unsigned char *ttls) 848 { 849 int vifi; 850 851 cache->mfc_un.res.minvif = MAXMIFS; 852 cache->mfc_un.res.maxvif = 0; 853 memset(cache->mfc_un.res.ttls, 255, MAXMIFS); 854 855 for (vifi = 0; vifi < mrt->maxvif; vifi++) { 856 if (VIF_EXISTS(mrt, vifi) && 857 ttls[vifi] && ttls[vifi] < 255) { 858 cache->mfc_un.res.ttls[vifi] = ttls[vifi]; 859 if (cache->mfc_un.res.minvif > vifi) 860 cache->mfc_un.res.minvif = vifi; 861 if (cache->mfc_un.res.maxvif <= vifi) 862 cache->mfc_un.res.maxvif = vifi + 1; 863 } 864 } 865 cache->mfc_un.res.lastuse = jiffies; 866 } 867 868 static int mif6_add(struct net *net, struct mr_table *mrt, 869 struct mif6ctl *vifc, int mrtsock) 870 { 871 int vifi = vifc->mif6c_mifi; 872 struct vif_device *v = &mrt->vif_table[vifi]; 873 struct net_device *dev; 874 struct inet6_dev *in6_dev; 875 int err; 876 877 /* Is vif busy ? */ 878 if (VIF_EXISTS(mrt, vifi)) 879 return -EADDRINUSE; 880 881 switch (vifc->mif6c_flags) { 882 #ifdef CONFIG_IPV6_PIMSM_V2 883 case MIFF_REGISTER: 884 /* 885 * Special Purpose VIF in PIM 886 * All the packets will be sent to the daemon 887 */ 888 if (mrt->mroute_reg_vif_num >= 0) 889 return -EADDRINUSE; 890 dev = ip6mr_reg_vif(net, mrt); 891 if (!dev) 892 return -ENOBUFS; 893 err = dev_set_allmulti(dev, 1); 894 if (err) { 895 unregister_netdevice(dev); 896 dev_put(dev); 897 return err; 898 } 899 break; 900 #endif 901 case 0: 902 dev = dev_get_by_index(net, vifc->mif6c_pifi); 903 if (!dev) 904 return -EADDRNOTAVAIL; 905 err = dev_set_allmulti(dev, 1); 906 if (err) { 907 dev_put(dev); 908 return err; 909 } 910 break; 911 default: 912 return -EINVAL; 913 } 914 915 in6_dev = __in6_dev_get(dev); 916 if (in6_dev) { 917 in6_dev->cnf.mc_forwarding++; 918 inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, 919 NETCONFA_MC_FORWARDING, 920 dev->ifindex, &in6_dev->cnf); 921 } 922 923 /* Fill in the VIF structures */ 924 vif_device_init(v, dev, vifc->vifc_rate_limit, vifc->vifc_threshold, 925 vifc->mif6c_flags | (!mrtsock ? VIFF_STATIC : 0), 926 MIFF_REGISTER); 927 928 /* And finish update writing critical data */ 929 write_lock_bh(&mrt_lock); 930 v->dev = dev; 931 #ifdef CONFIG_IPV6_PIMSM_V2 932 if (v->flags & MIFF_REGISTER) 933 mrt->mroute_reg_vif_num = vifi; 934 #endif 935 if (vifi + 1 > mrt->maxvif) 936 mrt->maxvif = vifi + 1; 937 write_unlock_bh(&mrt_lock); 938 call_ip6mr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD, 939 v, vifi, mrt->id); 940 return 0; 941 } 942 943 static struct mfc6_cache *ip6mr_cache_find(struct mr_table *mrt, 944 const struct in6_addr *origin, 945 const struct in6_addr *mcastgrp) 946 { 947 struct mfc6_cache_cmp_arg arg = { 948 .mf6c_origin = *origin, 949 .mf6c_mcastgrp = *mcastgrp, 950 }; 951 952 return mr_mfc_find(mrt, &arg); 953 } 954 955 /* Look for a (*,G) entry */ 956 static struct mfc6_cache *ip6mr_cache_find_any(struct mr_table *mrt, 957 struct in6_addr *mcastgrp, 958 mifi_t mifi) 959 { 960 struct mfc6_cache_cmp_arg arg = { 961 .mf6c_origin = in6addr_any, 962 .mf6c_mcastgrp = *mcastgrp, 963 }; 964 965 if (ipv6_addr_any(mcastgrp)) 966 return mr_mfc_find_any_parent(mrt, mifi); 967 return mr_mfc_find_any(mrt, mifi, &arg); 968 } 969 970 /* Look for a (S,G,iif) entry if parent != -1 */ 971 static struct mfc6_cache * 972 ip6mr_cache_find_parent(struct mr_table *mrt, 973 const struct in6_addr *origin, 974 const struct in6_addr *mcastgrp, 975 int parent) 976 { 977 struct mfc6_cache_cmp_arg arg = { 978 .mf6c_origin = *origin, 979 .mf6c_mcastgrp = *mcastgrp, 980 }; 981 982 return mr_mfc_find_parent(mrt, &arg, parent); 983 } 984 985 /* Allocate a multicast cache entry */ 986 static struct mfc6_cache *ip6mr_cache_alloc(void) 987 { 988 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); 989 if (!c) 990 return NULL; 991 c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1; 992 c->_c.mfc_un.res.minvif = MAXMIFS; 993 c->_c.free = ip6mr_cache_free_rcu; 994 refcount_set(&c->_c.mfc_un.res.refcount, 1); 995 return c; 996 } 997 998 static struct mfc6_cache *ip6mr_cache_alloc_unres(void) 999 { 1000 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); 1001 if (!c) 1002 return NULL; 1003 skb_queue_head_init(&c->_c.mfc_un.unres.unresolved); 1004 c->_c.mfc_un.unres.expires = jiffies + 10 * HZ; 1005 return c; 1006 } 1007 1008 /* 1009 * A cache entry has gone into a resolved state from queued 1010 */ 1011 1012 static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt, 1013 struct mfc6_cache *uc, struct mfc6_cache *c) 1014 { 1015 struct sk_buff *skb; 1016 1017 /* 1018 * Play the pending entries through our router 1019 */ 1020 1021 while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) { 1022 if (ipv6_hdr(skb)->version == 0) { 1023 struct nlmsghdr *nlh = skb_pull(skb, 1024 sizeof(struct ipv6hdr)); 1025 1026 if (mr_fill_mroute(mrt, skb, &c->_c, 1027 nlmsg_data(nlh)) > 0) { 1028 nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh; 1029 } else { 1030 nlh->nlmsg_type = NLMSG_ERROR; 1031 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr)); 1032 skb_trim(skb, nlh->nlmsg_len); 1033 ((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE; 1034 } 1035 rtnl_unicast(skb, net, NETLINK_CB(skb).portid); 1036 } else 1037 ip6_mr_forward(net, mrt, skb, c); 1038 } 1039 } 1040 1041 /* 1042 * Bounce a cache query up to pim6sd and netlink. 1043 * 1044 * Called under mrt_lock. 1045 */ 1046 1047 static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt, 1048 mifi_t mifi, int assert) 1049 { 1050 struct sock *mroute6_sk; 1051 struct sk_buff *skb; 1052 struct mrt6msg *msg; 1053 int ret; 1054 1055 #ifdef CONFIG_IPV6_PIMSM_V2 1056 if (assert == MRT6MSG_WHOLEPKT) 1057 skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt) 1058 +sizeof(*msg)); 1059 else 1060 #endif 1061 skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC); 1062 1063 if (!skb) 1064 return -ENOBUFS; 1065 1066 /* I suppose that internal messages 1067 * do not require checksums */ 1068 1069 skb->ip_summed = CHECKSUM_UNNECESSARY; 1070 1071 #ifdef CONFIG_IPV6_PIMSM_V2 1072 if (assert == MRT6MSG_WHOLEPKT) { 1073 /* Ugly, but we have no choice with this interface. 1074 Duplicate old header, fix length etc. 1075 And all this only to mangle msg->im6_msgtype and 1076 to set msg->im6_mbz to "mbz" :-) 1077 */ 1078 skb_push(skb, -skb_network_offset(pkt)); 1079 1080 skb_push(skb, sizeof(*msg)); 1081 skb_reset_transport_header(skb); 1082 msg = (struct mrt6msg *)skb_transport_header(skb); 1083 msg->im6_mbz = 0; 1084 msg->im6_msgtype = MRT6MSG_WHOLEPKT; 1085 msg->im6_mif = mrt->mroute_reg_vif_num; 1086 msg->im6_pad = 0; 1087 msg->im6_src = ipv6_hdr(pkt)->saddr; 1088 msg->im6_dst = ipv6_hdr(pkt)->daddr; 1089 1090 skb->ip_summed = CHECKSUM_UNNECESSARY; 1091 } else 1092 #endif 1093 { 1094 /* 1095 * Copy the IP header 1096 */ 1097 1098 skb_put(skb, sizeof(struct ipv6hdr)); 1099 skb_reset_network_header(skb); 1100 skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr)); 1101 1102 /* 1103 * Add our header 1104 */ 1105 skb_put(skb, sizeof(*msg)); 1106 skb_reset_transport_header(skb); 1107 msg = (struct mrt6msg *)skb_transport_header(skb); 1108 1109 msg->im6_mbz = 0; 1110 msg->im6_msgtype = assert; 1111 msg->im6_mif = mifi; 1112 msg->im6_pad = 0; 1113 msg->im6_src = ipv6_hdr(pkt)->saddr; 1114 msg->im6_dst = ipv6_hdr(pkt)->daddr; 1115 1116 skb_dst_set(skb, dst_clone(skb_dst(pkt))); 1117 skb->ip_summed = CHECKSUM_UNNECESSARY; 1118 } 1119 1120 rcu_read_lock(); 1121 mroute6_sk = rcu_dereference(mrt->mroute_sk); 1122 if (!mroute6_sk) { 1123 rcu_read_unlock(); 1124 kfree_skb(skb); 1125 return -EINVAL; 1126 } 1127 1128 mrt6msg_netlink_event(mrt, skb); 1129 1130 /* Deliver to user space multicast routing algorithms */ 1131 ret = sock_queue_rcv_skb(mroute6_sk, skb); 1132 rcu_read_unlock(); 1133 if (ret < 0) { 1134 net_warn_ratelimited("mroute6: pending queue full, dropping entries\n"); 1135 kfree_skb(skb); 1136 } 1137 1138 return ret; 1139 } 1140 1141 /* Queue a packet for resolution. It gets locked cache entry! */ 1142 static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi, 1143 struct sk_buff *skb) 1144 { 1145 struct mfc6_cache *c; 1146 bool found = false; 1147 int err; 1148 1149 spin_lock_bh(&mfc_unres_lock); 1150 list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) { 1151 if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) && 1152 ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) { 1153 found = true; 1154 break; 1155 } 1156 } 1157 1158 if (!found) { 1159 /* 1160 * Create a new entry if allowable 1161 */ 1162 1163 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 || 1164 (c = ip6mr_cache_alloc_unres()) == NULL) { 1165 spin_unlock_bh(&mfc_unres_lock); 1166 1167 kfree_skb(skb); 1168 return -ENOBUFS; 1169 } 1170 1171 /* Fill in the new cache entry */ 1172 c->_c.mfc_parent = -1; 1173 c->mf6c_origin = ipv6_hdr(skb)->saddr; 1174 c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr; 1175 1176 /* 1177 * Reflect first query at pim6sd 1178 */ 1179 err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE); 1180 if (err < 0) { 1181 /* If the report failed throw the cache entry 1182 out - Brad Parker 1183 */ 1184 spin_unlock_bh(&mfc_unres_lock); 1185 1186 ip6mr_cache_free(c); 1187 kfree_skb(skb); 1188 return err; 1189 } 1190 1191 atomic_inc(&mrt->cache_resolve_queue_len); 1192 list_add(&c->_c.list, &mrt->mfc_unres_queue); 1193 mr6_netlink_event(mrt, c, RTM_NEWROUTE); 1194 1195 ipmr_do_expire_process(mrt); 1196 } 1197 1198 /* See if we can append the packet */ 1199 if (c->_c.mfc_un.unres.unresolved.qlen > 3) { 1200 kfree_skb(skb); 1201 err = -ENOBUFS; 1202 } else { 1203 skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb); 1204 err = 0; 1205 } 1206 1207 spin_unlock_bh(&mfc_unres_lock); 1208 return err; 1209 } 1210 1211 /* 1212 * MFC6 cache manipulation by user space 1213 */ 1214 1215 static int ip6mr_mfc_delete(struct mr_table *mrt, struct mf6cctl *mfc, 1216 int parent) 1217 { 1218 struct mfc6_cache *c; 1219 1220 /* The entries are added/deleted only under RTNL */ 1221 rcu_read_lock(); 1222 c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr, 1223 &mfc->mf6cc_mcastgrp.sin6_addr, parent); 1224 rcu_read_unlock(); 1225 if (!c) 1226 return -ENOENT; 1227 rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ip6mr_rht_params); 1228 list_del_rcu(&c->_c.list); 1229 1230 call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net), 1231 FIB_EVENT_ENTRY_DEL, c, mrt->id); 1232 mr6_netlink_event(mrt, c, RTM_DELROUTE); 1233 mr_cache_put(&c->_c); 1234 return 0; 1235 } 1236 1237 static int ip6mr_device_event(struct notifier_block *this, 1238 unsigned long event, void *ptr) 1239 { 1240 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 1241 struct net *net = dev_net(dev); 1242 struct mr_table *mrt; 1243 struct vif_device *v; 1244 int ct; 1245 1246 if (event != NETDEV_UNREGISTER) 1247 return NOTIFY_DONE; 1248 1249 ip6mr_for_each_table(mrt, net) { 1250 v = &mrt->vif_table[0]; 1251 for (ct = 0; ct < mrt->maxvif; ct++, v++) { 1252 if (v->dev == dev) 1253 mif6_delete(mrt, ct, 1, NULL); 1254 } 1255 } 1256 1257 return NOTIFY_DONE; 1258 } 1259 1260 static unsigned int ip6mr_seq_read(struct net *net) 1261 { 1262 ASSERT_RTNL(); 1263 1264 return net->ipv6.ipmr_seq + ip6mr_rules_seq_read(net); 1265 } 1266 1267 static int ip6mr_dump(struct net *net, struct notifier_block *nb) 1268 { 1269 return mr_dump(net, nb, RTNL_FAMILY_IP6MR, ip6mr_rules_dump, 1270 ip6mr_mr_table_iter, &mrt_lock); 1271 } 1272 1273 static struct notifier_block ip6_mr_notifier = { 1274 .notifier_call = ip6mr_device_event 1275 }; 1276 1277 static const struct fib_notifier_ops ip6mr_notifier_ops_template = { 1278 .family = RTNL_FAMILY_IP6MR, 1279 .fib_seq_read = ip6mr_seq_read, 1280 .fib_dump = ip6mr_dump, 1281 .owner = THIS_MODULE, 1282 }; 1283 1284 static int __net_init ip6mr_notifier_init(struct net *net) 1285 { 1286 struct fib_notifier_ops *ops; 1287 1288 net->ipv6.ipmr_seq = 0; 1289 1290 ops = fib_notifier_ops_register(&ip6mr_notifier_ops_template, net); 1291 if (IS_ERR(ops)) 1292 return PTR_ERR(ops); 1293 1294 net->ipv6.ip6mr_notifier_ops = ops; 1295 1296 return 0; 1297 } 1298 1299 static void __net_exit ip6mr_notifier_exit(struct net *net) 1300 { 1301 fib_notifier_ops_unregister(net->ipv6.ip6mr_notifier_ops); 1302 net->ipv6.ip6mr_notifier_ops = NULL; 1303 } 1304 1305 /* Setup for IP multicast routing */ 1306 static int __net_init ip6mr_net_init(struct net *net) 1307 { 1308 int err; 1309 1310 err = ip6mr_notifier_init(net); 1311 if (err) 1312 return err; 1313 1314 err = ip6mr_rules_init(net); 1315 if (err < 0) 1316 goto ip6mr_rules_fail; 1317 1318 #ifdef CONFIG_PROC_FS 1319 err = -ENOMEM; 1320 if (!proc_create("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_fops)) 1321 goto proc_vif_fail; 1322 if (!proc_create("ip6_mr_cache", 0, net->proc_net, &ip6mr_mfc_fops)) 1323 goto proc_cache_fail; 1324 #endif 1325 1326 return 0; 1327 1328 #ifdef CONFIG_PROC_FS 1329 proc_cache_fail: 1330 remove_proc_entry("ip6_mr_vif", net->proc_net); 1331 proc_vif_fail: 1332 ip6mr_rules_exit(net); 1333 #endif 1334 ip6mr_rules_fail: 1335 ip6mr_notifier_exit(net); 1336 return err; 1337 } 1338 1339 static void __net_exit ip6mr_net_exit(struct net *net) 1340 { 1341 #ifdef CONFIG_PROC_FS 1342 remove_proc_entry("ip6_mr_cache", net->proc_net); 1343 remove_proc_entry("ip6_mr_vif", net->proc_net); 1344 #endif 1345 ip6mr_rules_exit(net); 1346 ip6mr_notifier_exit(net); 1347 } 1348 1349 static struct pernet_operations ip6mr_net_ops = { 1350 .init = ip6mr_net_init, 1351 .exit = ip6mr_net_exit, 1352 }; 1353 1354 int __init ip6_mr_init(void) 1355 { 1356 int err; 1357 1358 mrt_cachep = kmem_cache_create("ip6_mrt_cache", 1359 sizeof(struct mfc6_cache), 1360 0, SLAB_HWCACHE_ALIGN, 1361 NULL); 1362 if (!mrt_cachep) 1363 return -ENOMEM; 1364 1365 err = register_pernet_subsys(&ip6mr_net_ops); 1366 if (err) 1367 goto reg_pernet_fail; 1368 1369 err = register_netdevice_notifier(&ip6_mr_notifier); 1370 if (err) 1371 goto reg_notif_fail; 1372 #ifdef CONFIG_IPV6_PIMSM_V2 1373 if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) { 1374 pr_err("%s: can't add PIM protocol\n", __func__); 1375 err = -EAGAIN; 1376 goto add_proto_fail; 1377 } 1378 #endif 1379 err = rtnl_register_module(THIS_MODULE, RTNL_FAMILY_IP6MR, RTM_GETROUTE, 1380 NULL, ip6mr_rtm_dumproute, 0); 1381 if (err == 0) 1382 return 0; 1383 1384 #ifdef CONFIG_IPV6_PIMSM_V2 1385 inet6_del_protocol(&pim6_protocol, IPPROTO_PIM); 1386 add_proto_fail: 1387 unregister_netdevice_notifier(&ip6_mr_notifier); 1388 #endif 1389 reg_notif_fail: 1390 unregister_pernet_subsys(&ip6mr_net_ops); 1391 reg_pernet_fail: 1392 kmem_cache_destroy(mrt_cachep); 1393 return err; 1394 } 1395 1396 void ip6_mr_cleanup(void) 1397 { 1398 rtnl_unregister(RTNL_FAMILY_IP6MR, RTM_GETROUTE); 1399 #ifdef CONFIG_IPV6_PIMSM_V2 1400 inet6_del_protocol(&pim6_protocol, IPPROTO_PIM); 1401 #endif 1402 unregister_netdevice_notifier(&ip6_mr_notifier); 1403 unregister_pernet_subsys(&ip6mr_net_ops); 1404 kmem_cache_destroy(mrt_cachep); 1405 } 1406 1407 static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt, 1408 struct mf6cctl *mfc, int mrtsock, int parent) 1409 { 1410 unsigned char ttls[MAXMIFS]; 1411 struct mfc6_cache *uc, *c; 1412 struct mr_mfc *_uc; 1413 bool found; 1414 int i, err; 1415 1416 if (mfc->mf6cc_parent >= MAXMIFS) 1417 return -ENFILE; 1418 1419 memset(ttls, 255, MAXMIFS); 1420 for (i = 0; i < MAXMIFS; i++) { 1421 if (IF_ISSET(i, &mfc->mf6cc_ifset)) 1422 ttls[i] = 1; 1423 } 1424 1425 /* The entries are added/deleted only under RTNL */ 1426 rcu_read_lock(); 1427 c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr, 1428 &mfc->mf6cc_mcastgrp.sin6_addr, parent); 1429 rcu_read_unlock(); 1430 if (c) { 1431 write_lock_bh(&mrt_lock); 1432 c->_c.mfc_parent = mfc->mf6cc_parent; 1433 ip6mr_update_thresholds(mrt, &c->_c, ttls); 1434 if (!mrtsock) 1435 c->_c.mfc_flags |= MFC_STATIC; 1436 write_unlock_bh(&mrt_lock); 1437 call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, 1438 c, mrt->id); 1439 mr6_netlink_event(mrt, c, RTM_NEWROUTE); 1440 return 0; 1441 } 1442 1443 if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) && 1444 !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr)) 1445 return -EINVAL; 1446 1447 c = ip6mr_cache_alloc(); 1448 if (!c) 1449 return -ENOMEM; 1450 1451 c->mf6c_origin = mfc->mf6cc_origin.sin6_addr; 1452 c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr; 1453 c->_c.mfc_parent = mfc->mf6cc_parent; 1454 ip6mr_update_thresholds(mrt, &c->_c, ttls); 1455 if (!mrtsock) 1456 c->_c.mfc_flags |= MFC_STATIC; 1457 1458 err = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode, 1459 ip6mr_rht_params); 1460 if (err) { 1461 pr_err("ip6mr: rhtable insert error %d\n", err); 1462 ip6mr_cache_free(c); 1463 return err; 1464 } 1465 list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list); 1466 1467 /* Check to see if we resolved a queued list. If so we 1468 * need to send on the frames and tidy up. 1469 */ 1470 found = false; 1471 spin_lock_bh(&mfc_unres_lock); 1472 list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) { 1473 uc = (struct mfc6_cache *)_uc; 1474 if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) && 1475 ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) { 1476 list_del(&_uc->list); 1477 atomic_dec(&mrt->cache_resolve_queue_len); 1478 found = true; 1479 break; 1480 } 1481 } 1482 if (list_empty(&mrt->mfc_unres_queue)) 1483 del_timer(&mrt->ipmr_expire_timer); 1484 spin_unlock_bh(&mfc_unres_lock); 1485 1486 if (found) { 1487 ip6mr_cache_resolve(net, mrt, uc, c); 1488 ip6mr_cache_free(uc); 1489 } 1490 call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD, 1491 c, mrt->id); 1492 mr6_netlink_event(mrt, c, RTM_NEWROUTE); 1493 return 0; 1494 } 1495 1496 /* 1497 * Close the multicast socket, and clear the vif tables etc 1498 */ 1499 1500 static void mroute_clean_tables(struct mr_table *mrt, bool all) 1501 { 1502 struct mr_mfc *c, *tmp; 1503 LIST_HEAD(list); 1504 int i; 1505 1506 /* Shut down all active vif entries */ 1507 for (i = 0; i < mrt->maxvif; i++) { 1508 if (!all && (mrt->vif_table[i].flags & VIFF_STATIC)) 1509 continue; 1510 mif6_delete(mrt, i, 0, &list); 1511 } 1512 unregister_netdevice_many(&list); 1513 1514 /* Wipe the cache */ 1515 list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) { 1516 if (!all && (c->mfc_flags & MFC_STATIC)) 1517 continue; 1518 rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params); 1519 list_del_rcu(&c->list); 1520 mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE); 1521 mr_cache_put(c); 1522 } 1523 1524 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) { 1525 spin_lock_bh(&mfc_unres_lock); 1526 list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) { 1527 list_del(&c->list); 1528 call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net), 1529 FIB_EVENT_ENTRY_DEL, 1530 (struct mfc6_cache *)c, 1531 mrt->id); 1532 mr6_netlink_event(mrt, (struct mfc6_cache *)c, 1533 RTM_DELROUTE); 1534 ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c); 1535 } 1536 spin_unlock_bh(&mfc_unres_lock); 1537 } 1538 } 1539 1540 static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk) 1541 { 1542 int err = 0; 1543 struct net *net = sock_net(sk); 1544 1545 rtnl_lock(); 1546 write_lock_bh(&mrt_lock); 1547 if (rtnl_dereference(mrt->mroute_sk)) { 1548 err = -EADDRINUSE; 1549 } else { 1550 rcu_assign_pointer(mrt->mroute_sk, sk); 1551 sock_set_flag(sk, SOCK_RCU_FREE); 1552 net->ipv6.devconf_all->mc_forwarding++; 1553 } 1554 write_unlock_bh(&mrt_lock); 1555 1556 if (!err) 1557 inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, 1558 NETCONFA_MC_FORWARDING, 1559 NETCONFA_IFINDEX_ALL, 1560 net->ipv6.devconf_all); 1561 rtnl_unlock(); 1562 1563 return err; 1564 } 1565 1566 int ip6mr_sk_done(struct sock *sk) 1567 { 1568 int err = -EACCES; 1569 struct net *net = sock_net(sk); 1570 struct mr_table *mrt; 1571 1572 if (sk->sk_type != SOCK_RAW || 1573 inet_sk(sk)->inet_num != IPPROTO_ICMPV6) 1574 return err; 1575 1576 rtnl_lock(); 1577 ip6mr_for_each_table(mrt, net) { 1578 if (sk == rtnl_dereference(mrt->mroute_sk)) { 1579 write_lock_bh(&mrt_lock); 1580 RCU_INIT_POINTER(mrt->mroute_sk, NULL); 1581 /* Note that mroute_sk had SOCK_RCU_FREE set, 1582 * so the RCU grace period before sk freeing 1583 * is guaranteed by sk_destruct() 1584 */ 1585 net->ipv6.devconf_all->mc_forwarding--; 1586 write_unlock_bh(&mrt_lock); 1587 inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, 1588 NETCONFA_MC_FORWARDING, 1589 NETCONFA_IFINDEX_ALL, 1590 net->ipv6.devconf_all); 1591 1592 mroute_clean_tables(mrt, false); 1593 err = 0; 1594 break; 1595 } 1596 } 1597 rtnl_unlock(); 1598 1599 return err; 1600 } 1601 1602 bool mroute6_is_socket(struct net *net, struct sk_buff *skb) 1603 { 1604 struct mr_table *mrt; 1605 struct flowi6 fl6 = { 1606 .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX, 1607 .flowi6_oif = skb->dev->ifindex, 1608 .flowi6_mark = skb->mark, 1609 }; 1610 1611 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0) 1612 return NULL; 1613 1614 return rcu_access_pointer(mrt->mroute_sk); 1615 } 1616 EXPORT_SYMBOL(mroute6_is_socket); 1617 1618 /* 1619 * Socket options and virtual interface manipulation. The whole 1620 * virtual interface system is a complete heap, but unfortunately 1621 * that's how BSD mrouted happens to think. Maybe one day with a proper 1622 * MOSPF/PIM router set up we can clean this up. 1623 */ 1624 1625 int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen) 1626 { 1627 int ret, parent = 0; 1628 struct mif6ctl vif; 1629 struct mf6cctl mfc; 1630 mifi_t mifi; 1631 struct net *net = sock_net(sk); 1632 struct mr_table *mrt; 1633 1634 if (sk->sk_type != SOCK_RAW || 1635 inet_sk(sk)->inet_num != IPPROTO_ICMPV6) 1636 return -EOPNOTSUPP; 1637 1638 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); 1639 if (!mrt) 1640 return -ENOENT; 1641 1642 if (optname != MRT6_INIT) { 1643 if (sk != rcu_access_pointer(mrt->mroute_sk) && 1644 !ns_capable(net->user_ns, CAP_NET_ADMIN)) 1645 return -EACCES; 1646 } 1647 1648 switch (optname) { 1649 case MRT6_INIT: 1650 if (optlen < sizeof(int)) 1651 return -EINVAL; 1652 1653 return ip6mr_sk_init(mrt, sk); 1654 1655 case MRT6_DONE: 1656 return ip6mr_sk_done(sk); 1657 1658 case MRT6_ADD_MIF: 1659 if (optlen < sizeof(vif)) 1660 return -EINVAL; 1661 if (copy_from_user(&vif, optval, sizeof(vif))) 1662 return -EFAULT; 1663 if (vif.mif6c_mifi >= MAXMIFS) 1664 return -ENFILE; 1665 rtnl_lock(); 1666 ret = mif6_add(net, mrt, &vif, 1667 sk == rtnl_dereference(mrt->mroute_sk)); 1668 rtnl_unlock(); 1669 return ret; 1670 1671 case MRT6_DEL_MIF: 1672 if (optlen < sizeof(mifi_t)) 1673 return -EINVAL; 1674 if (copy_from_user(&mifi, optval, sizeof(mifi_t))) 1675 return -EFAULT; 1676 rtnl_lock(); 1677 ret = mif6_delete(mrt, mifi, 0, NULL); 1678 rtnl_unlock(); 1679 return ret; 1680 1681 /* 1682 * Manipulate the forwarding caches. These live 1683 * in a sort of kernel/user symbiosis. 1684 */ 1685 case MRT6_ADD_MFC: 1686 case MRT6_DEL_MFC: 1687 parent = -1; 1688 /* fall through */ 1689 case MRT6_ADD_MFC_PROXY: 1690 case MRT6_DEL_MFC_PROXY: 1691 if (optlen < sizeof(mfc)) 1692 return -EINVAL; 1693 if (copy_from_user(&mfc, optval, sizeof(mfc))) 1694 return -EFAULT; 1695 if (parent == 0) 1696 parent = mfc.mf6cc_parent; 1697 rtnl_lock(); 1698 if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY) 1699 ret = ip6mr_mfc_delete(mrt, &mfc, parent); 1700 else 1701 ret = ip6mr_mfc_add(net, mrt, &mfc, 1702 sk == 1703 rtnl_dereference(mrt->mroute_sk), 1704 parent); 1705 rtnl_unlock(); 1706 return ret; 1707 1708 /* 1709 * Control PIM assert (to activate pim will activate assert) 1710 */ 1711 case MRT6_ASSERT: 1712 { 1713 int v; 1714 1715 if (optlen != sizeof(v)) 1716 return -EINVAL; 1717 if (get_user(v, (int __user *)optval)) 1718 return -EFAULT; 1719 mrt->mroute_do_assert = v; 1720 return 0; 1721 } 1722 1723 #ifdef CONFIG_IPV6_PIMSM_V2 1724 case MRT6_PIM: 1725 { 1726 int v; 1727 1728 if (optlen != sizeof(v)) 1729 return -EINVAL; 1730 if (get_user(v, (int __user *)optval)) 1731 return -EFAULT; 1732 v = !!v; 1733 rtnl_lock(); 1734 ret = 0; 1735 if (v != mrt->mroute_do_pim) { 1736 mrt->mroute_do_pim = v; 1737 mrt->mroute_do_assert = v; 1738 } 1739 rtnl_unlock(); 1740 return ret; 1741 } 1742 1743 #endif 1744 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES 1745 case MRT6_TABLE: 1746 { 1747 u32 v; 1748 1749 if (optlen != sizeof(u32)) 1750 return -EINVAL; 1751 if (get_user(v, (u32 __user *)optval)) 1752 return -EFAULT; 1753 /* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */ 1754 if (v != RT_TABLE_DEFAULT && v >= 100000000) 1755 return -EINVAL; 1756 if (sk == rcu_access_pointer(mrt->mroute_sk)) 1757 return -EBUSY; 1758 1759 rtnl_lock(); 1760 ret = 0; 1761 if (!ip6mr_new_table(net, v)) 1762 ret = -ENOMEM; 1763 raw6_sk(sk)->ip6mr_table = v; 1764 rtnl_unlock(); 1765 return ret; 1766 } 1767 #endif 1768 /* 1769 * Spurious command, or MRT6_VERSION which you cannot 1770 * set. 1771 */ 1772 default: 1773 return -ENOPROTOOPT; 1774 } 1775 } 1776 1777 /* 1778 * Getsock opt support for the multicast routing system. 1779 */ 1780 1781 int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, 1782 int __user *optlen) 1783 { 1784 int olr; 1785 int val; 1786 struct net *net = sock_net(sk); 1787 struct mr_table *mrt; 1788 1789 if (sk->sk_type != SOCK_RAW || 1790 inet_sk(sk)->inet_num != IPPROTO_ICMPV6) 1791 return -EOPNOTSUPP; 1792 1793 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); 1794 if (!mrt) 1795 return -ENOENT; 1796 1797 switch (optname) { 1798 case MRT6_VERSION: 1799 val = 0x0305; 1800 break; 1801 #ifdef CONFIG_IPV6_PIMSM_V2 1802 case MRT6_PIM: 1803 val = mrt->mroute_do_pim; 1804 break; 1805 #endif 1806 case MRT6_ASSERT: 1807 val = mrt->mroute_do_assert; 1808 break; 1809 default: 1810 return -ENOPROTOOPT; 1811 } 1812 1813 if (get_user(olr, optlen)) 1814 return -EFAULT; 1815 1816 olr = min_t(int, olr, sizeof(int)); 1817 if (olr < 0) 1818 return -EINVAL; 1819 1820 if (put_user(olr, optlen)) 1821 return -EFAULT; 1822 if (copy_to_user(optval, &val, olr)) 1823 return -EFAULT; 1824 return 0; 1825 } 1826 1827 /* 1828 * The IP multicast ioctl support routines. 1829 */ 1830 1831 int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg) 1832 { 1833 struct sioc_sg_req6 sr; 1834 struct sioc_mif_req6 vr; 1835 struct vif_device *vif; 1836 struct mfc6_cache *c; 1837 struct net *net = sock_net(sk); 1838 struct mr_table *mrt; 1839 1840 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); 1841 if (!mrt) 1842 return -ENOENT; 1843 1844 switch (cmd) { 1845 case SIOCGETMIFCNT_IN6: 1846 if (copy_from_user(&vr, arg, sizeof(vr))) 1847 return -EFAULT; 1848 if (vr.mifi >= mrt->maxvif) 1849 return -EINVAL; 1850 read_lock(&mrt_lock); 1851 vif = &mrt->vif_table[vr.mifi]; 1852 if (VIF_EXISTS(mrt, vr.mifi)) { 1853 vr.icount = vif->pkt_in; 1854 vr.ocount = vif->pkt_out; 1855 vr.ibytes = vif->bytes_in; 1856 vr.obytes = vif->bytes_out; 1857 read_unlock(&mrt_lock); 1858 1859 if (copy_to_user(arg, &vr, sizeof(vr))) 1860 return -EFAULT; 1861 return 0; 1862 } 1863 read_unlock(&mrt_lock); 1864 return -EADDRNOTAVAIL; 1865 case SIOCGETSGCNT_IN6: 1866 if (copy_from_user(&sr, arg, sizeof(sr))) 1867 return -EFAULT; 1868 1869 rcu_read_lock(); 1870 c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr); 1871 if (c) { 1872 sr.pktcnt = c->_c.mfc_un.res.pkt; 1873 sr.bytecnt = c->_c.mfc_un.res.bytes; 1874 sr.wrong_if = c->_c.mfc_un.res.wrong_if; 1875 rcu_read_unlock(); 1876 1877 if (copy_to_user(arg, &sr, sizeof(sr))) 1878 return -EFAULT; 1879 return 0; 1880 } 1881 rcu_read_unlock(); 1882 return -EADDRNOTAVAIL; 1883 default: 1884 return -ENOIOCTLCMD; 1885 } 1886 } 1887 1888 #ifdef CONFIG_COMPAT 1889 struct compat_sioc_sg_req6 { 1890 struct sockaddr_in6 src; 1891 struct sockaddr_in6 grp; 1892 compat_ulong_t pktcnt; 1893 compat_ulong_t bytecnt; 1894 compat_ulong_t wrong_if; 1895 }; 1896 1897 struct compat_sioc_mif_req6 { 1898 mifi_t mifi; 1899 compat_ulong_t icount; 1900 compat_ulong_t ocount; 1901 compat_ulong_t ibytes; 1902 compat_ulong_t obytes; 1903 }; 1904 1905 int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) 1906 { 1907 struct compat_sioc_sg_req6 sr; 1908 struct compat_sioc_mif_req6 vr; 1909 struct vif_device *vif; 1910 struct mfc6_cache *c; 1911 struct net *net = sock_net(sk); 1912 struct mr_table *mrt; 1913 1914 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); 1915 if (!mrt) 1916 return -ENOENT; 1917 1918 switch (cmd) { 1919 case SIOCGETMIFCNT_IN6: 1920 if (copy_from_user(&vr, arg, sizeof(vr))) 1921 return -EFAULT; 1922 if (vr.mifi >= mrt->maxvif) 1923 return -EINVAL; 1924 read_lock(&mrt_lock); 1925 vif = &mrt->vif_table[vr.mifi]; 1926 if (VIF_EXISTS(mrt, vr.mifi)) { 1927 vr.icount = vif->pkt_in; 1928 vr.ocount = vif->pkt_out; 1929 vr.ibytes = vif->bytes_in; 1930 vr.obytes = vif->bytes_out; 1931 read_unlock(&mrt_lock); 1932 1933 if (copy_to_user(arg, &vr, sizeof(vr))) 1934 return -EFAULT; 1935 return 0; 1936 } 1937 read_unlock(&mrt_lock); 1938 return -EADDRNOTAVAIL; 1939 case SIOCGETSGCNT_IN6: 1940 if (copy_from_user(&sr, arg, sizeof(sr))) 1941 return -EFAULT; 1942 1943 rcu_read_lock(); 1944 c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr); 1945 if (c) { 1946 sr.pktcnt = c->_c.mfc_un.res.pkt; 1947 sr.bytecnt = c->_c.mfc_un.res.bytes; 1948 sr.wrong_if = c->_c.mfc_un.res.wrong_if; 1949 rcu_read_unlock(); 1950 1951 if (copy_to_user(arg, &sr, sizeof(sr))) 1952 return -EFAULT; 1953 return 0; 1954 } 1955 rcu_read_unlock(); 1956 return -EADDRNOTAVAIL; 1957 default: 1958 return -ENOIOCTLCMD; 1959 } 1960 } 1961 #endif 1962 1963 static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb) 1964 { 1965 __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 1966 IPSTATS_MIB_OUTFORWDATAGRAMS); 1967 __IP6_ADD_STATS(net, ip6_dst_idev(skb_dst(skb)), 1968 IPSTATS_MIB_OUTOCTETS, skb->len); 1969 return dst_output(net, sk, skb); 1970 } 1971 1972 /* 1973 * Processing handlers for ip6mr_forward 1974 */ 1975 1976 static int ip6mr_forward2(struct net *net, struct mr_table *mrt, 1977 struct sk_buff *skb, struct mfc6_cache *c, int vifi) 1978 { 1979 struct ipv6hdr *ipv6h; 1980 struct vif_device *vif = &mrt->vif_table[vifi]; 1981 struct net_device *dev; 1982 struct dst_entry *dst; 1983 struct flowi6 fl6; 1984 1985 if (!vif->dev) 1986 goto out_free; 1987 1988 #ifdef CONFIG_IPV6_PIMSM_V2 1989 if (vif->flags & MIFF_REGISTER) { 1990 vif->pkt_out++; 1991 vif->bytes_out += skb->len; 1992 vif->dev->stats.tx_bytes += skb->len; 1993 vif->dev->stats.tx_packets++; 1994 ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT); 1995 goto out_free; 1996 } 1997 #endif 1998 1999 ipv6h = ipv6_hdr(skb); 2000 2001 fl6 = (struct flowi6) { 2002 .flowi6_oif = vif->link, 2003 .daddr = ipv6h->daddr, 2004 }; 2005 2006 dst = ip6_route_output(net, NULL, &fl6); 2007 if (dst->error) { 2008 dst_release(dst); 2009 goto out_free; 2010 } 2011 2012 skb_dst_drop(skb); 2013 skb_dst_set(skb, dst); 2014 2015 /* 2016 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally 2017 * not only before forwarding, but after forwarding on all output 2018 * interfaces. It is clear, if mrouter runs a multicasting 2019 * program, it should receive packets not depending to what interface 2020 * program is joined. 2021 * If we will not make it, the program will have to join on all 2022 * interfaces. On the other hand, multihoming host (or router, but 2023 * not mrouter) cannot join to more than one interface - it will 2024 * result in receiving multiple packets. 2025 */ 2026 dev = vif->dev; 2027 skb->dev = dev; 2028 vif->pkt_out++; 2029 vif->bytes_out += skb->len; 2030 2031 /* We are about to write */ 2032 /* XXX: extension headers? */ 2033 if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev))) 2034 goto out_free; 2035 2036 ipv6h = ipv6_hdr(skb); 2037 ipv6h->hop_limit--; 2038 2039 IP6CB(skb)->flags |= IP6SKB_FORWARDED; 2040 2041 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, 2042 net, NULL, skb, skb->dev, dev, 2043 ip6mr_forward2_finish); 2044 2045 out_free: 2046 kfree_skb(skb); 2047 return 0; 2048 } 2049 2050 static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev) 2051 { 2052 int ct; 2053 2054 for (ct = mrt->maxvif - 1; ct >= 0; ct--) { 2055 if (mrt->vif_table[ct].dev == dev) 2056 break; 2057 } 2058 return ct; 2059 } 2060 2061 static void ip6_mr_forward(struct net *net, struct mr_table *mrt, 2062 struct sk_buff *skb, struct mfc6_cache *c) 2063 { 2064 int psend = -1; 2065 int vif, ct; 2066 int true_vifi = ip6mr_find_vif(mrt, skb->dev); 2067 2068 vif = c->_c.mfc_parent; 2069 c->_c.mfc_un.res.pkt++; 2070 c->_c.mfc_un.res.bytes += skb->len; 2071 c->_c.mfc_un.res.lastuse = jiffies; 2072 2073 if (ipv6_addr_any(&c->mf6c_origin) && true_vifi >= 0) { 2074 struct mfc6_cache *cache_proxy; 2075 2076 /* For an (*,G) entry, we only check that the incoming 2077 * interface is part of the static tree. 2078 */ 2079 rcu_read_lock(); 2080 cache_proxy = mr_mfc_find_any_parent(mrt, vif); 2081 if (cache_proxy && 2082 cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255) { 2083 rcu_read_unlock(); 2084 goto forward; 2085 } 2086 rcu_read_unlock(); 2087 } 2088 2089 /* 2090 * Wrong interface: drop packet and (maybe) send PIM assert. 2091 */ 2092 if (mrt->vif_table[vif].dev != skb->dev) { 2093 c->_c.mfc_un.res.wrong_if++; 2094 2095 if (true_vifi >= 0 && mrt->mroute_do_assert && 2096 /* pimsm uses asserts, when switching from RPT to SPT, 2097 so that we cannot check that packet arrived on an oif. 2098 It is bad, but otherwise we would need to move pretty 2099 large chunk of pimd to kernel. Ough... --ANK 2100 */ 2101 (mrt->mroute_do_pim || 2102 c->_c.mfc_un.res.ttls[true_vifi] < 255) && 2103 time_after(jiffies, 2104 c->_c.mfc_un.res.last_assert + 2105 MFC_ASSERT_THRESH)) { 2106 c->_c.mfc_un.res.last_assert = jiffies; 2107 ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF); 2108 } 2109 goto dont_forward; 2110 } 2111 2112 forward: 2113 mrt->vif_table[vif].pkt_in++; 2114 mrt->vif_table[vif].bytes_in += skb->len; 2115 2116 /* 2117 * Forward the frame 2118 */ 2119 if (ipv6_addr_any(&c->mf6c_origin) && 2120 ipv6_addr_any(&c->mf6c_mcastgrp)) { 2121 if (true_vifi >= 0 && 2122 true_vifi != c->_c.mfc_parent && 2123 ipv6_hdr(skb)->hop_limit > 2124 c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) { 2125 /* It's an (*,*) entry and the packet is not coming from 2126 * the upstream: forward the packet to the upstream 2127 * only. 2128 */ 2129 psend = c->_c.mfc_parent; 2130 goto last_forward; 2131 } 2132 goto dont_forward; 2133 } 2134 for (ct = c->_c.mfc_un.res.maxvif - 1; 2135 ct >= c->_c.mfc_un.res.minvif; ct--) { 2136 /* For (*,G) entry, don't forward to the incoming interface */ 2137 if ((!ipv6_addr_any(&c->mf6c_origin) || ct != true_vifi) && 2138 ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) { 2139 if (psend != -1) { 2140 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 2141 if (skb2) 2142 ip6mr_forward2(net, mrt, skb2, 2143 c, psend); 2144 } 2145 psend = ct; 2146 } 2147 } 2148 last_forward: 2149 if (psend != -1) { 2150 ip6mr_forward2(net, mrt, skb, c, psend); 2151 return; 2152 } 2153 2154 dont_forward: 2155 kfree_skb(skb); 2156 } 2157 2158 2159 /* 2160 * Multicast packets for forwarding arrive here 2161 */ 2162 2163 int ip6_mr_input(struct sk_buff *skb) 2164 { 2165 struct mfc6_cache *cache; 2166 struct net *net = dev_net(skb->dev); 2167 struct mr_table *mrt; 2168 struct flowi6 fl6 = { 2169 .flowi6_iif = skb->dev->ifindex, 2170 .flowi6_mark = skb->mark, 2171 }; 2172 int err; 2173 2174 err = ip6mr_fib_lookup(net, &fl6, &mrt); 2175 if (err < 0) { 2176 kfree_skb(skb); 2177 return err; 2178 } 2179 2180 read_lock(&mrt_lock); 2181 cache = ip6mr_cache_find(mrt, 2182 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr); 2183 if (!cache) { 2184 int vif = ip6mr_find_vif(mrt, skb->dev); 2185 2186 if (vif >= 0) 2187 cache = ip6mr_cache_find_any(mrt, 2188 &ipv6_hdr(skb)->daddr, 2189 vif); 2190 } 2191 2192 /* 2193 * No usable cache entry 2194 */ 2195 if (!cache) { 2196 int vif; 2197 2198 vif = ip6mr_find_vif(mrt, skb->dev); 2199 if (vif >= 0) { 2200 int err = ip6mr_cache_unresolved(mrt, vif, skb); 2201 read_unlock(&mrt_lock); 2202 2203 return err; 2204 } 2205 read_unlock(&mrt_lock); 2206 kfree_skb(skb); 2207 return -ENODEV; 2208 } 2209 2210 ip6_mr_forward(net, mrt, skb, cache); 2211 2212 read_unlock(&mrt_lock); 2213 2214 return 0; 2215 } 2216 2217 int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm, 2218 u32 portid) 2219 { 2220 int err; 2221 struct mr_table *mrt; 2222 struct mfc6_cache *cache; 2223 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); 2224 2225 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT); 2226 if (!mrt) 2227 return -ENOENT; 2228 2229 read_lock(&mrt_lock); 2230 cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr); 2231 if (!cache && skb->dev) { 2232 int vif = ip6mr_find_vif(mrt, skb->dev); 2233 2234 if (vif >= 0) 2235 cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr, 2236 vif); 2237 } 2238 2239 if (!cache) { 2240 struct sk_buff *skb2; 2241 struct ipv6hdr *iph; 2242 struct net_device *dev; 2243 int vif; 2244 2245 dev = skb->dev; 2246 if (!dev || (vif = ip6mr_find_vif(mrt, dev)) < 0) { 2247 read_unlock(&mrt_lock); 2248 return -ENODEV; 2249 } 2250 2251 /* really correct? */ 2252 skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC); 2253 if (!skb2) { 2254 read_unlock(&mrt_lock); 2255 return -ENOMEM; 2256 } 2257 2258 NETLINK_CB(skb2).portid = portid; 2259 skb_reset_transport_header(skb2); 2260 2261 skb_put(skb2, sizeof(struct ipv6hdr)); 2262 skb_reset_network_header(skb2); 2263 2264 iph = ipv6_hdr(skb2); 2265 iph->version = 0; 2266 iph->priority = 0; 2267 iph->flow_lbl[0] = 0; 2268 iph->flow_lbl[1] = 0; 2269 iph->flow_lbl[2] = 0; 2270 iph->payload_len = 0; 2271 iph->nexthdr = IPPROTO_NONE; 2272 iph->hop_limit = 0; 2273 iph->saddr = rt->rt6i_src.addr; 2274 iph->daddr = rt->rt6i_dst.addr; 2275 2276 err = ip6mr_cache_unresolved(mrt, vif, skb2); 2277 read_unlock(&mrt_lock); 2278 2279 return err; 2280 } 2281 2282 err = mr_fill_mroute(mrt, skb, &cache->_c, rtm); 2283 read_unlock(&mrt_lock); 2284 return err; 2285 } 2286 2287 static int ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, 2288 u32 portid, u32 seq, struct mfc6_cache *c, int cmd, 2289 int flags) 2290 { 2291 struct nlmsghdr *nlh; 2292 struct rtmsg *rtm; 2293 int err; 2294 2295 nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags); 2296 if (!nlh) 2297 return -EMSGSIZE; 2298 2299 rtm = nlmsg_data(nlh); 2300 rtm->rtm_family = RTNL_FAMILY_IP6MR; 2301 rtm->rtm_dst_len = 128; 2302 rtm->rtm_src_len = 128; 2303 rtm->rtm_tos = 0; 2304 rtm->rtm_table = mrt->id; 2305 if (nla_put_u32(skb, RTA_TABLE, mrt->id)) 2306 goto nla_put_failure; 2307 rtm->rtm_type = RTN_MULTICAST; 2308 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 2309 if (c->_c.mfc_flags & MFC_STATIC) 2310 rtm->rtm_protocol = RTPROT_STATIC; 2311 else 2312 rtm->rtm_protocol = RTPROT_MROUTED; 2313 rtm->rtm_flags = 0; 2314 2315 if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) || 2316 nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp)) 2317 goto nla_put_failure; 2318 err = mr_fill_mroute(mrt, skb, &c->_c, rtm); 2319 /* do not break the dump if cache is unresolved */ 2320 if (err < 0 && err != -ENOENT) 2321 goto nla_put_failure; 2322 2323 nlmsg_end(skb, nlh); 2324 return 0; 2325 2326 nla_put_failure: 2327 nlmsg_cancel(skb, nlh); 2328 return -EMSGSIZE; 2329 } 2330 2331 static int _ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, 2332 u32 portid, u32 seq, struct mr_mfc *c, 2333 int cmd, int flags) 2334 { 2335 return ip6mr_fill_mroute(mrt, skb, portid, seq, (struct mfc6_cache *)c, 2336 cmd, flags); 2337 } 2338 2339 static int mr6_msgsize(bool unresolved, int maxvif) 2340 { 2341 size_t len = 2342 NLMSG_ALIGN(sizeof(struct rtmsg)) 2343 + nla_total_size(4) /* RTA_TABLE */ 2344 + nla_total_size(sizeof(struct in6_addr)) /* RTA_SRC */ 2345 + nla_total_size(sizeof(struct in6_addr)) /* RTA_DST */ 2346 ; 2347 2348 if (!unresolved) 2349 len = len 2350 + nla_total_size(4) /* RTA_IIF */ 2351 + nla_total_size(0) /* RTA_MULTIPATH */ 2352 + maxvif * NLA_ALIGN(sizeof(struct rtnexthop)) 2353 /* RTA_MFC_STATS */ 2354 + nla_total_size_64bit(sizeof(struct rta_mfc_stats)) 2355 ; 2356 2357 return len; 2358 } 2359 2360 static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc, 2361 int cmd) 2362 { 2363 struct net *net = read_pnet(&mrt->net); 2364 struct sk_buff *skb; 2365 int err = -ENOBUFS; 2366 2367 skb = nlmsg_new(mr6_msgsize(mfc->_c.mfc_parent >= MAXMIFS, mrt->maxvif), 2368 GFP_ATOMIC); 2369 if (!skb) 2370 goto errout; 2371 2372 err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0); 2373 if (err < 0) 2374 goto errout; 2375 2376 rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC); 2377 return; 2378 2379 errout: 2380 kfree_skb(skb); 2381 if (err < 0) 2382 rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err); 2383 } 2384 2385 static size_t mrt6msg_netlink_msgsize(size_t payloadlen) 2386 { 2387 size_t len = 2388 NLMSG_ALIGN(sizeof(struct rtgenmsg)) 2389 + nla_total_size(1) /* IP6MRA_CREPORT_MSGTYPE */ 2390 + nla_total_size(4) /* IP6MRA_CREPORT_MIF_ID */ 2391 /* IP6MRA_CREPORT_SRC_ADDR */ 2392 + nla_total_size(sizeof(struct in6_addr)) 2393 /* IP6MRA_CREPORT_DST_ADDR */ 2394 + nla_total_size(sizeof(struct in6_addr)) 2395 /* IP6MRA_CREPORT_PKT */ 2396 + nla_total_size(payloadlen) 2397 ; 2398 2399 return len; 2400 } 2401 2402 static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt) 2403 { 2404 struct net *net = read_pnet(&mrt->net); 2405 struct nlmsghdr *nlh; 2406 struct rtgenmsg *rtgenm; 2407 struct mrt6msg *msg; 2408 struct sk_buff *skb; 2409 struct nlattr *nla; 2410 int payloadlen; 2411 2412 payloadlen = pkt->len - sizeof(struct mrt6msg); 2413 msg = (struct mrt6msg *)skb_transport_header(pkt); 2414 2415 skb = nlmsg_new(mrt6msg_netlink_msgsize(payloadlen), GFP_ATOMIC); 2416 if (!skb) 2417 goto errout; 2418 2419 nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT, 2420 sizeof(struct rtgenmsg), 0); 2421 if (!nlh) 2422 goto errout; 2423 rtgenm = nlmsg_data(nlh); 2424 rtgenm->rtgen_family = RTNL_FAMILY_IP6MR; 2425 if (nla_put_u8(skb, IP6MRA_CREPORT_MSGTYPE, msg->im6_msgtype) || 2426 nla_put_u32(skb, IP6MRA_CREPORT_MIF_ID, msg->im6_mif) || 2427 nla_put_in6_addr(skb, IP6MRA_CREPORT_SRC_ADDR, 2428 &msg->im6_src) || 2429 nla_put_in6_addr(skb, IP6MRA_CREPORT_DST_ADDR, 2430 &msg->im6_dst)) 2431 goto nla_put_failure; 2432 2433 nla = nla_reserve(skb, IP6MRA_CREPORT_PKT, payloadlen); 2434 if (!nla || skb_copy_bits(pkt, sizeof(struct mrt6msg), 2435 nla_data(nla), payloadlen)) 2436 goto nla_put_failure; 2437 2438 nlmsg_end(skb, nlh); 2439 2440 rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE_R, NULL, GFP_ATOMIC); 2441 return; 2442 2443 nla_put_failure: 2444 nlmsg_cancel(skb, nlh); 2445 errout: 2446 kfree_skb(skb); 2447 rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE_R, -ENOBUFS); 2448 } 2449 2450 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) 2451 { 2452 return mr_rtm_dumproute(skb, cb, ip6mr_mr_table_iter, 2453 _ip6mr_fill_mroute, &mfc_unres_lock); 2454 } 2455