1 /* 2 * Linux IPv6 multicast routing support for BSD pim6sd 3 * Based on net/ipv4/ipmr.c. 4 * 5 * (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr> 6 * LSIIT Laboratory, Strasbourg, France 7 * (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com> 8 * 6WIND, Paris, France 9 * Copyright (C)2007,2008 USAGI/WIDE Project 10 * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org> 11 * 12 * This program is free software; you can redistribute it and/or 13 * modify it under the terms of the GNU General Public License 14 * as published by the Free Software Foundation; either version 15 * 2 of the License, or (at your option) any later version. 16 * 17 */ 18 19 #include <linux/uaccess.h> 20 #include <linux/types.h> 21 #include <linux/sched.h> 22 #include <linux/errno.h> 23 #include <linux/mm.h> 24 #include <linux/kernel.h> 25 #include <linux/fcntl.h> 26 #include <linux/stat.h> 27 #include <linux/socket.h> 28 #include <linux/inet.h> 29 #include <linux/netdevice.h> 30 #include <linux/inetdevice.h> 31 #include <linux/proc_fs.h> 32 #include <linux/seq_file.h> 33 #include <linux/init.h> 34 #include <linux/compat.h> 35 #include <net/protocol.h> 36 #include <linux/skbuff.h> 37 #include <net/raw.h> 38 #include <linux/notifier.h> 39 #include <linux/if_arp.h> 40 #include <net/checksum.h> 41 #include <net/netlink.h> 42 #include <net/fib_rules.h> 43 44 #include <net/ipv6.h> 45 #include <net/ip6_route.h> 46 #include <linux/mroute6.h> 47 #include <linux/pim.h> 48 #include <net/addrconf.h> 49 #include <linux/netfilter_ipv6.h> 50 #include <linux/export.h> 51 #include <net/ip6_checksum.h> 52 #include <linux/netconf.h> 53 54 struct ip6mr_rule { 55 struct fib_rule common; 56 }; 57 58 struct ip6mr_result { 59 struct mr_table *mrt; 60 }; 61 62 /* Big lock, protecting vif table, mrt cache and mroute socket state. 63 Note that the changes are semaphored via rtnl_lock. 64 */ 65 66 static DEFINE_RWLOCK(mrt_lock); 67 68 /* Multicast router control variables */ 69 70 /* Special spinlock for queue of unresolved entries */ 71 static DEFINE_SPINLOCK(mfc_unres_lock); 72 73 /* We return to original Alan's scheme. Hash table of resolved 74 entries is changed only in process context and protected 75 with weak lock mrt_lock. Queue of unresolved entries is protected 76 with strong spinlock mfc_unres_lock. 77 78 In this case data path is free of exclusive locks at all. 79 */ 80 81 static struct kmem_cache *mrt_cachep __read_mostly; 82 83 static struct mr_table *ip6mr_new_table(struct net *net, u32 id); 84 static void ip6mr_free_table(struct mr_table *mrt); 85 86 static void ip6_mr_forward(struct net *net, struct mr_table *mrt, 87 struct sk_buff *skb, struct mfc6_cache *cache); 88 static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt, 89 mifi_t mifi, int assert); 90 static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc, 91 int cmd); 92 static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt); 93 static int ip6mr_rtm_dumproute(struct sk_buff *skb, 94 struct netlink_callback *cb); 95 static void mroute_clean_tables(struct mr_table *mrt, bool all); 96 static void ipmr_expire_process(struct timer_list *t); 97 98 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES 99 #define ip6mr_for_each_table(mrt, net) \ 100 list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list) 101 102 static struct mr_table *ip6mr_mr_table_iter(struct net *net, 103 struct mr_table *mrt) 104 { 105 struct mr_table *ret; 106 107 if (!mrt) 108 ret = list_entry_rcu(net->ipv6.mr6_tables.next, 109 struct mr_table, list); 110 else 111 ret = list_entry_rcu(mrt->list.next, 112 struct mr_table, list); 113 114 if (&ret->list == &net->ipv6.mr6_tables) 115 return NULL; 116 return ret; 117 } 118 119 static struct mr_table *ip6mr_get_table(struct net *net, u32 id) 120 { 121 struct mr_table *mrt; 122 123 ip6mr_for_each_table(mrt, net) { 124 if (mrt->id == id) 125 return mrt; 126 } 127 return NULL; 128 } 129 130 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6, 131 struct mr_table **mrt) 132 { 133 int err; 134 struct ip6mr_result res; 135 struct fib_lookup_arg arg = { 136 .result = &res, 137 .flags = FIB_LOOKUP_NOREF, 138 }; 139 140 err = fib_rules_lookup(net->ipv6.mr6_rules_ops, 141 flowi6_to_flowi(flp6), 0, &arg); 142 if (err < 0) 143 return err; 144 *mrt = res.mrt; 145 return 0; 146 } 147 148 static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp, 149 int flags, struct fib_lookup_arg *arg) 150 { 151 struct ip6mr_result *res = arg->result; 152 struct mr_table *mrt; 153 154 switch (rule->action) { 155 case FR_ACT_TO_TBL: 156 break; 157 case FR_ACT_UNREACHABLE: 158 return -ENETUNREACH; 159 case FR_ACT_PROHIBIT: 160 return -EACCES; 161 case FR_ACT_BLACKHOLE: 162 default: 163 return -EINVAL; 164 } 165 166 mrt = ip6mr_get_table(rule->fr_net, rule->table); 167 if (!mrt) 168 return -EAGAIN; 169 res->mrt = mrt; 170 return 0; 171 } 172 173 static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags) 174 { 175 return 1; 176 } 177 178 static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = { 179 FRA_GENERIC_POLICY, 180 }; 181 182 static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb, 183 struct fib_rule_hdr *frh, struct nlattr **tb, 184 struct netlink_ext_ack *extack) 185 { 186 return 0; 187 } 188 189 static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, 190 struct nlattr **tb) 191 { 192 return 1; 193 } 194 195 static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb, 196 struct fib_rule_hdr *frh) 197 { 198 frh->dst_len = 0; 199 frh->src_len = 0; 200 frh->tos = 0; 201 return 0; 202 } 203 204 static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = { 205 .family = RTNL_FAMILY_IP6MR, 206 .rule_size = sizeof(struct ip6mr_rule), 207 .addr_size = sizeof(struct in6_addr), 208 .action = ip6mr_rule_action, 209 .match = ip6mr_rule_match, 210 .configure = ip6mr_rule_configure, 211 .compare = ip6mr_rule_compare, 212 .fill = ip6mr_rule_fill, 213 .nlgroup = RTNLGRP_IPV6_RULE, 214 .policy = ip6mr_rule_policy, 215 .owner = THIS_MODULE, 216 }; 217 218 static int __net_init ip6mr_rules_init(struct net *net) 219 { 220 struct fib_rules_ops *ops; 221 struct mr_table *mrt; 222 int err; 223 224 ops = fib_rules_register(&ip6mr_rules_ops_template, net); 225 if (IS_ERR(ops)) 226 return PTR_ERR(ops); 227 228 INIT_LIST_HEAD(&net->ipv6.mr6_tables); 229 230 mrt = ip6mr_new_table(net, RT6_TABLE_DFLT); 231 if (IS_ERR(mrt)) { 232 err = PTR_ERR(mrt); 233 goto err1; 234 } 235 236 err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0); 237 if (err < 0) 238 goto err2; 239 240 net->ipv6.mr6_rules_ops = ops; 241 return 0; 242 243 err2: 244 ip6mr_free_table(mrt); 245 err1: 246 fib_rules_unregister(ops); 247 return err; 248 } 249 250 static void __net_exit ip6mr_rules_exit(struct net *net) 251 { 252 struct mr_table *mrt, *next; 253 254 rtnl_lock(); 255 list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) { 256 list_del(&mrt->list); 257 ip6mr_free_table(mrt); 258 } 259 fib_rules_unregister(net->ipv6.mr6_rules_ops); 260 rtnl_unlock(); 261 } 262 263 static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb) 264 { 265 return fib_rules_dump(net, nb, RTNL_FAMILY_IP6MR); 266 } 267 268 static unsigned int ip6mr_rules_seq_read(struct net *net) 269 { 270 return fib_rules_seq_read(net, RTNL_FAMILY_IP6MR); 271 } 272 273 bool ip6mr_rule_default(const struct fib_rule *rule) 274 { 275 return fib_rule_matchall(rule) && rule->action == FR_ACT_TO_TBL && 276 rule->table == RT6_TABLE_DFLT && !rule->l3mdev; 277 } 278 EXPORT_SYMBOL(ip6mr_rule_default); 279 #else 280 #define ip6mr_for_each_table(mrt, net) \ 281 for (mrt = net->ipv6.mrt6; mrt; mrt = NULL) 282 283 static struct mr_table *ip6mr_mr_table_iter(struct net *net, 284 struct mr_table *mrt) 285 { 286 if (!mrt) 287 return net->ipv6.mrt6; 288 return NULL; 289 } 290 291 static struct mr_table *ip6mr_get_table(struct net *net, u32 id) 292 { 293 return net->ipv6.mrt6; 294 } 295 296 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6, 297 struct mr_table **mrt) 298 { 299 *mrt = net->ipv6.mrt6; 300 return 0; 301 } 302 303 static int __net_init ip6mr_rules_init(struct net *net) 304 { 305 struct mr_table *mrt; 306 307 mrt = ip6mr_new_table(net, RT6_TABLE_DFLT); 308 if (IS_ERR(mrt)) 309 return PTR_ERR(mrt); 310 net->ipv6.mrt6 = mrt; 311 return 0; 312 } 313 314 static void __net_exit ip6mr_rules_exit(struct net *net) 315 { 316 rtnl_lock(); 317 ip6mr_free_table(net->ipv6.mrt6); 318 net->ipv6.mrt6 = NULL; 319 rtnl_unlock(); 320 } 321 322 static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb) 323 { 324 return 0; 325 } 326 327 static unsigned int ip6mr_rules_seq_read(struct net *net) 328 { 329 return 0; 330 } 331 #endif 332 333 static int ip6mr_hash_cmp(struct rhashtable_compare_arg *arg, 334 const void *ptr) 335 { 336 const struct mfc6_cache_cmp_arg *cmparg = arg->key; 337 struct mfc6_cache *c = (struct mfc6_cache *)ptr; 338 339 return !ipv6_addr_equal(&c->mf6c_mcastgrp, &cmparg->mf6c_mcastgrp) || 340 !ipv6_addr_equal(&c->mf6c_origin, &cmparg->mf6c_origin); 341 } 342 343 static const struct rhashtable_params ip6mr_rht_params = { 344 .head_offset = offsetof(struct mr_mfc, mnode), 345 .key_offset = offsetof(struct mfc6_cache, cmparg), 346 .key_len = sizeof(struct mfc6_cache_cmp_arg), 347 .nelem_hint = 3, 348 .locks_mul = 1, 349 .obj_cmpfn = ip6mr_hash_cmp, 350 .automatic_shrinking = true, 351 }; 352 353 static void ip6mr_new_table_set(struct mr_table *mrt, 354 struct net *net) 355 { 356 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES 357 list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables); 358 #endif 359 } 360 361 static struct mfc6_cache_cmp_arg ip6mr_mr_table_ops_cmparg_any = { 362 .mf6c_origin = IN6ADDR_ANY_INIT, 363 .mf6c_mcastgrp = IN6ADDR_ANY_INIT, 364 }; 365 366 static struct mr_table_ops ip6mr_mr_table_ops = { 367 .rht_params = &ip6mr_rht_params, 368 .cmparg_any = &ip6mr_mr_table_ops_cmparg_any, 369 }; 370 371 static struct mr_table *ip6mr_new_table(struct net *net, u32 id) 372 { 373 struct mr_table *mrt; 374 375 mrt = ip6mr_get_table(net, id); 376 if (mrt) 377 return mrt; 378 379 return mr_table_alloc(net, id, &ip6mr_mr_table_ops, 380 ipmr_expire_process, ip6mr_new_table_set); 381 } 382 383 static void ip6mr_free_table(struct mr_table *mrt) 384 { 385 del_timer_sync(&mrt->ipmr_expire_timer); 386 mroute_clean_tables(mrt, true); 387 rhltable_destroy(&mrt->mfc_hash); 388 kfree(mrt); 389 } 390 391 #ifdef CONFIG_PROC_FS 392 /* The /proc interfaces to multicast routing 393 * /proc/ip6_mr_cache /proc/ip6_mr_vif 394 */ 395 396 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos) 397 __acquires(mrt_lock) 398 { 399 struct mr_vif_iter *iter = seq->private; 400 struct net *net = seq_file_net(seq); 401 struct mr_table *mrt; 402 403 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT); 404 if (!mrt) 405 return ERR_PTR(-ENOENT); 406 407 iter->mrt = mrt; 408 409 read_lock(&mrt_lock); 410 return mr_vif_seq_start(seq, pos); 411 } 412 413 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v) 414 __releases(mrt_lock) 415 { 416 read_unlock(&mrt_lock); 417 } 418 419 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v) 420 { 421 struct mr_vif_iter *iter = seq->private; 422 struct mr_table *mrt = iter->mrt; 423 424 if (v == SEQ_START_TOKEN) { 425 seq_puts(seq, 426 "Interface BytesIn PktsIn BytesOut PktsOut Flags\n"); 427 } else { 428 const struct vif_device *vif = v; 429 const char *name = vif->dev ? vif->dev->name : "none"; 430 431 seq_printf(seq, 432 "%2td %-10s %8ld %7ld %8ld %7ld %05X\n", 433 vif - mrt->vif_table, 434 name, vif->bytes_in, vif->pkt_in, 435 vif->bytes_out, vif->pkt_out, 436 vif->flags); 437 } 438 return 0; 439 } 440 441 static const struct seq_operations ip6mr_vif_seq_ops = { 442 .start = ip6mr_vif_seq_start, 443 .next = mr_vif_seq_next, 444 .stop = ip6mr_vif_seq_stop, 445 .show = ip6mr_vif_seq_show, 446 }; 447 448 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) 449 { 450 struct net *net = seq_file_net(seq); 451 struct mr_table *mrt; 452 453 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT); 454 if (!mrt) 455 return ERR_PTR(-ENOENT); 456 457 return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock); 458 } 459 460 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) 461 { 462 int n; 463 464 if (v == SEQ_START_TOKEN) { 465 seq_puts(seq, 466 "Group " 467 "Origin " 468 "Iif Pkts Bytes Wrong Oifs\n"); 469 } else { 470 const struct mfc6_cache *mfc = v; 471 const struct mr_mfc_iter *it = seq->private; 472 struct mr_table *mrt = it->mrt; 473 474 seq_printf(seq, "%pI6 %pI6 %-3hd", 475 &mfc->mf6c_mcastgrp, &mfc->mf6c_origin, 476 mfc->_c.mfc_parent); 477 478 if (it->cache != &mrt->mfc_unres_queue) { 479 seq_printf(seq, " %8lu %8lu %8lu", 480 mfc->_c.mfc_un.res.pkt, 481 mfc->_c.mfc_un.res.bytes, 482 mfc->_c.mfc_un.res.wrong_if); 483 for (n = mfc->_c.mfc_un.res.minvif; 484 n < mfc->_c.mfc_un.res.maxvif; n++) { 485 if (VIF_EXISTS(mrt, n) && 486 mfc->_c.mfc_un.res.ttls[n] < 255) 487 seq_printf(seq, 488 " %2d:%-3d", n, 489 mfc->_c.mfc_un.res.ttls[n]); 490 } 491 } else { 492 /* unresolved mfc_caches don't contain 493 * pkt, bytes and wrong_if values 494 */ 495 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul); 496 } 497 seq_putc(seq, '\n'); 498 } 499 return 0; 500 } 501 502 static const struct seq_operations ipmr_mfc_seq_ops = { 503 .start = ipmr_mfc_seq_start, 504 .next = mr_mfc_seq_next, 505 .stop = mr_mfc_seq_stop, 506 .show = ipmr_mfc_seq_show, 507 }; 508 #endif 509 510 #ifdef CONFIG_IPV6_PIMSM_V2 511 512 static int pim6_rcv(struct sk_buff *skb) 513 { 514 struct pimreghdr *pim; 515 struct ipv6hdr *encap; 516 struct net_device *reg_dev = NULL; 517 struct net *net = dev_net(skb->dev); 518 struct mr_table *mrt; 519 struct flowi6 fl6 = { 520 .flowi6_iif = skb->dev->ifindex, 521 .flowi6_mark = skb->mark, 522 }; 523 int reg_vif_num; 524 525 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap))) 526 goto drop; 527 528 pim = (struct pimreghdr *)skb_transport_header(skb); 529 if (pim->type != ((PIM_VERSION << 4) | PIM_TYPE_REGISTER) || 530 (pim->flags & PIM_NULL_REGISTER) || 531 (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, 532 sizeof(*pim), IPPROTO_PIM, 533 csum_partial((void *)pim, sizeof(*pim), 0)) && 534 csum_fold(skb_checksum(skb, 0, skb->len, 0)))) 535 goto drop; 536 537 /* check if the inner packet is destined to mcast group */ 538 encap = (struct ipv6hdr *)(skb_transport_header(skb) + 539 sizeof(*pim)); 540 541 if (!ipv6_addr_is_multicast(&encap->daddr) || 542 encap->payload_len == 0 || 543 ntohs(encap->payload_len) + sizeof(*pim) > skb->len) 544 goto drop; 545 546 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0) 547 goto drop; 548 reg_vif_num = mrt->mroute_reg_vif_num; 549 550 read_lock(&mrt_lock); 551 if (reg_vif_num >= 0) 552 reg_dev = mrt->vif_table[reg_vif_num].dev; 553 if (reg_dev) 554 dev_hold(reg_dev); 555 read_unlock(&mrt_lock); 556 557 if (!reg_dev) 558 goto drop; 559 560 skb->mac_header = skb->network_header; 561 skb_pull(skb, (u8 *)encap - skb->data); 562 skb_reset_network_header(skb); 563 skb->protocol = htons(ETH_P_IPV6); 564 skb->ip_summed = CHECKSUM_NONE; 565 566 skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev)); 567 568 netif_rx(skb); 569 570 dev_put(reg_dev); 571 return 0; 572 drop: 573 kfree_skb(skb); 574 return 0; 575 } 576 577 static const struct inet6_protocol pim6_protocol = { 578 .handler = pim6_rcv, 579 }; 580 581 /* Service routines creating virtual interfaces: PIMREG */ 582 583 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, 584 struct net_device *dev) 585 { 586 struct net *net = dev_net(dev); 587 struct mr_table *mrt; 588 struct flowi6 fl6 = { 589 .flowi6_oif = dev->ifindex, 590 .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX, 591 .flowi6_mark = skb->mark, 592 }; 593 int err; 594 595 err = ip6mr_fib_lookup(net, &fl6, &mrt); 596 if (err < 0) { 597 kfree_skb(skb); 598 return err; 599 } 600 601 read_lock(&mrt_lock); 602 dev->stats.tx_bytes += skb->len; 603 dev->stats.tx_packets++; 604 ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT); 605 read_unlock(&mrt_lock); 606 kfree_skb(skb); 607 return NETDEV_TX_OK; 608 } 609 610 static int reg_vif_get_iflink(const struct net_device *dev) 611 { 612 return 0; 613 } 614 615 static const struct net_device_ops reg_vif_netdev_ops = { 616 .ndo_start_xmit = reg_vif_xmit, 617 .ndo_get_iflink = reg_vif_get_iflink, 618 }; 619 620 static void reg_vif_setup(struct net_device *dev) 621 { 622 dev->type = ARPHRD_PIMREG; 623 dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8; 624 dev->flags = IFF_NOARP; 625 dev->netdev_ops = ®_vif_netdev_ops; 626 dev->needs_free_netdev = true; 627 dev->features |= NETIF_F_NETNS_LOCAL; 628 } 629 630 static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt) 631 { 632 struct net_device *dev; 633 char name[IFNAMSIZ]; 634 635 if (mrt->id == RT6_TABLE_DFLT) 636 sprintf(name, "pim6reg"); 637 else 638 sprintf(name, "pim6reg%u", mrt->id); 639 640 dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup); 641 if (!dev) 642 return NULL; 643 644 dev_net_set(dev, net); 645 646 if (register_netdevice(dev)) { 647 free_netdev(dev); 648 return NULL; 649 } 650 651 if (dev_open(dev)) 652 goto failure; 653 654 dev_hold(dev); 655 return dev; 656 657 failure: 658 unregister_netdevice(dev); 659 return NULL; 660 } 661 #endif 662 663 static int call_ip6mr_vif_entry_notifiers(struct net *net, 664 enum fib_event_type event_type, 665 struct vif_device *vif, 666 mifi_t vif_index, u32 tb_id) 667 { 668 return mr_call_vif_notifiers(net, RTNL_FAMILY_IP6MR, event_type, 669 vif, vif_index, tb_id, 670 &net->ipv6.ipmr_seq); 671 } 672 673 static int call_ip6mr_mfc_entry_notifiers(struct net *net, 674 enum fib_event_type event_type, 675 struct mfc6_cache *mfc, u32 tb_id) 676 { 677 return mr_call_mfc_notifiers(net, RTNL_FAMILY_IP6MR, event_type, 678 &mfc->_c, tb_id, &net->ipv6.ipmr_seq); 679 } 680 681 /* Delete a VIF entry */ 682 static int mif6_delete(struct mr_table *mrt, int vifi, int notify, 683 struct list_head *head) 684 { 685 struct vif_device *v; 686 struct net_device *dev; 687 struct inet6_dev *in6_dev; 688 689 if (vifi < 0 || vifi >= mrt->maxvif) 690 return -EADDRNOTAVAIL; 691 692 v = &mrt->vif_table[vifi]; 693 694 if (VIF_EXISTS(mrt, vifi)) 695 call_ip6mr_vif_entry_notifiers(read_pnet(&mrt->net), 696 FIB_EVENT_VIF_DEL, v, vifi, 697 mrt->id); 698 699 write_lock_bh(&mrt_lock); 700 dev = v->dev; 701 v->dev = NULL; 702 703 if (!dev) { 704 write_unlock_bh(&mrt_lock); 705 return -EADDRNOTAVAIL; 706 } 707 708 #ifdef CONFIG_IPV6_PIMSM_V2 709 if (vifi == mrt->mroute_reg_vif_num) 710 mrt->mroute_reg_vif_num = -1; 711 #endif 712 713 if (vifi + 1 == mrt->maxvif) { 714 int tmp; 715 for (tmp = vifi - 1; tmp >= 0; tmp--) { 716 if (VIF_EXISTS(mrt, tmp)) 717 break; 718 } 719 mrt->maxvif = tmp + 1; 720 } 721 722 write_unlock_bh(&mrt_lock); 723 724 dev_set_allmulti(dev, -1); 725 726 in6_dev = __in6_dev_get(dev); 727 if (in6_dev) { 728 in6_dev->cnf.mc_forwarding--; 729 inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, 730 NETCONFA_MC_FORWARDING, 731 dev->ifindex, &in6_dev->cnf); 732 } 733 734 if ((v->flags & MIFF_REGISTER) && !notify) 735 unregister_netdevice_queue(dev, head); 736 737 dev_put(dev); 738 return 0; 739 } 740 741 static inline void ip6mr_cache_free_rcu(struct rcu_head *head) 742 { 743 struct mr_mfc *c = container_of(head, struct mr_mfc, rcu); 744 745 kmem_cache_free(mrt_cachep, (struct mfc6_cache *)c); 746 } 747 748 static inline void ip6mr_cache_free(struct mfc6_cache *c) 749 { 750 call_rcu(&c->_c.rcu, ip6mr_cache_free_rcu); 751 } 752 753 /* Destroy an unresolved cache entry, killing queued skbs 754 and reporting error to netlink readers. 755 */ 756 757 static void ip6mr_destroy_unres(struct mr_table *mrt, struct mfc6_cache *c) 758 { 759 struct net *net = read_pnet(&mrt->net); 760 struct sk_buff *skb; 761 762 atomic_dec(&mrt->cache_resolve_queue_len); 763 764 while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved)) != NULL) { 765 if (ipv6_hdr(skb)->version == 0) { 766 struct nlmsghdr *nlh = skb_pull(skb, 767 sizeof(struct ipv6hdr)); 768 nlh->nlmsg_type = NLMSG_ERROR; 769 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr)); 770 skb_trim(skb, nlh->nlmsg_len); 771 ((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT; 772 rtnl_unicast(skb, net, NETLINK_CB(skb).portid); 773 } else 774 kfree_skb(skb); 775 } 776 777 ip6mr_cache_free(c); 778 } 779 780 781 /* Timer process for all the unresolved queue. */ 782 783 static void ipmr_do_expire_process(struct mr_table *mrt) 784 { 785 unsigned long now = jiffies; 786 unsigned long expires = 10 * HZ; 787 struct mr_mfc *c, *next; 788 789 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) { 790 if (time_after(c->mfc_un.unres.expires, now)) { 791 /* not yet... */ 792 unsigned long interval = c->mfc_un.unres.expires - now; 793 if (interval < expires) 794 expires = interval; 795 continue; 796 } 797 798 list_del(&c->list); 799 mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE); 800 ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c); 801 } 802 803 if (!list_empty(&mrt->mfc_unres_queue)) 804 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires); 805 } 806 807 static void ipmr_expire_process(struct timer_list *t) 808 { 809 struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer); 810 811 if (!spin_trylock(&mfc_unres_lock)) { 812 mod_timer(&mrt->ipmr_expire_timer, jiffies + 1); 813 return; 814 } 815 816 if (!list_empty(&mrt->mfc_unres_queue)) 817 ipmr_do_expire_process(mrt); 818 819 spin_unlock(&mfc_unres_lock); 820 } 821 822 /* Fill oifs list. It is called under write locked mrt_lock. */ 823 824 static void ip6mr_update_thresholds(struct mr_table *mrt, 825 struct mr_mfc *cache, 826 unsigned char *ttls) 827 { 828 int vifi; 829 830 cache->mfc_un.res.minvif = MAXMIFS; 831 cache->mfc_un.res.maxvif = 0; 832 memset(cache->mfc_un.res.ttls, 255, MAXMIFS); 833 834 for (vifi = 0; vifi < mrt->maxvif; vifi++) { 835 if (VIF_EXISTS(mrt, vifi) && 836 ttls[vifi] && ttls[vifi] < 255) { 837 cache->mfc_un.res.ttls[vifi] = ttls[vifi]; 838 if (cache->mfc_un.res.minvif > vifi) 839 cache->mfc_un.res.minvif = vifi; 840 if (cache->mfc_un.res.maxvif <= vifi) 841 cache->mfc_un.res.maxvif = vifi + 1; 842 } 843 } 844 cache->mfc_un.res.lastuse = jiffies; 845 } 846 847 static int mif6_add(struct net *net, struct mr_table *mrt, 848 struct mif6ctl *vifc, int mrtsock) 849 { 850 int vifi = vifc->mif6c_mifi; 851 struct vif_device *v = &mrt->vif_table[vifi]; 852 struct net_device *dev; 853 struct inet6_dev *in6_dev; 854 int err; 855 856 /* Is vif busy ? */ 857 if (VIF_EXISTS(mrt, vifi)) 858 return -EADDRINUSE; 859 860 switch (vifc->mif6c_flags) { 861 #ifdef CONFIG_IPV6_PIMSM_V2 862 case MIFF_REGISTER: 863 /* 864 * Special Purpose VIF in PIM 865 * All the packets will be sent to the daemon 866 */ 867 if (mrt->mroute_reg_vif_num >= 0) 868 return -EADDRINUSE; 869 dev = ip6mr_reg_vif(net, mrt); 870 if (!dev) 871 return -ENOBUFS; 872 err = dev_set_allmulti(dev, 1); 873 if (err) { 874 unregister_netdevice(dev); 875 dev_put(dev); 876 return err; 877 } 878 break; 879 #endif 880 case 0: 881 dev = dev_get_by_index(net, vifc->mif6c_pifi); 882 if (!dev) 883 return -EADDRNOTAVAIL; 884 err = dev_set_allmulti(dev, 1); 885 if (err) { 886 dev_put(dev); 887 return err; 888 } 889 break; 890 default: 891 return -EINVAL; 892 } 893 894 in6_dev = __in6_dev_get(dev); 895 if (in6_dev) { 896 in6_dev->cnf.mc_forwarding++; 897 inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, 898 NETCONFA_MC_FORWARDING, 899 dev->ifindex, &in6_dev->cnf); 900 } 901 902 /* Fill in the VIF structures */ 903 vif_device_init(v, dev, vifc->vifc_rate_limit, vifc->vifc_threshold, 904 vifc->mif6c_flags | (!mrtsock ? VIFF_STATIC : 0), 905 MIFF_REGISTER); 906 907 /* And finish update writing critical data */ 908 write_lock_bh(&mrt_lock); 909 v->dev = dev; 910 #ifdef CONFIG_IPV6_PIMSM_V2 911 if (v->flags & MIFF_REGISTER) 912 mrt->mroute_reg_vif_num = vifi; 913 #endif 914 if (vifi + 1 > mrt->maxvif) 915 mrt->maxvif = vifi + 1; 916 write_unlock_bh(&mrt_lock); 917 call_ip6mr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD, 918 v, vifi, mrt->id); 919 return 0; 920 } 921 922 static struct mfc6_cache *ip6mr_cache_find(struct mr_table *mrt, 923 const struct in6_addr *origin, 924 const struct in6_addr *mcastgrp) 925 { 926 struct mfc6_cache_cmp_arg arg = { 927 .mf6c_origin = *origin, 928 .mf6c_mcastgrp = *mcastgrp, 929 }; 930 931 return mr_mfc_find(mrt, &arg); 932 } 933 934 /* Look for a (*,G) entry */ 935 static struct mfc6_cache *ip6mr_cache_find_any(struct mr_table *mrt, 936 struct in6_addr *mcastgrp, 937 mifi_t mifi) 938 { 939 struct mfc6_cache_cmp_arg arg = { 940 .mf6c_origin = in6addr_any, 941 .mf6c_mcastgrp = *mcastgrp, 942 }; 943 944 if (ipv6_addr_any(mcastgrp)) 945 return mr_mfc_find_any_parent(mrt, mifi); 946 return mr_mfc_find_any(mrt, mifi, &arg); 947 } 948 949 /* Look for a (S,G,iif) entry if parent != -1 */ 950 static struct mfc6_cache * 951 ip6mr_cache_find_parent(struct mr_table *mrt, 952 const struct in6_addr *origin, 953 const struct in6_addr *mcastgrp, 954 int parent) 955 { 956 struct mfc6_cache_cmp_arg arg = { 957 .mf6c_origin = *origin, 958 .mf6c_mcastgrp = *mcastgrp, 959 }; 960 961 return mr_mfc_find_parent(mrt, &arg, parent); 962 } 963 964 /* Allocate a multicast cache entry */ 965 static struct mfc6_cache *ip6mr_cache_alloc(void) 966 { 967 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); 968 if (!c) 969 return NULL; 970 c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1; 971 c->_c.mfc_un.res.minvif = MAXMIFS; 972 c->_c.free = ip6mr_cache_free_rcu; 973 refcount_set(&c->_c.mfc_un.res.refcount, 1); 974 return c; 975 } 976 977 static struct mfc6_cache *ip6mr_cache_alloc_unres(void) 978 { 979 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); 980 if (!c) 981 return NULL; 982 skb_queue_head_init(&c->_c.mfc_un.unres.unresolved); 983 c->_c.mfc_un.unres.expires = jiffies + 10 * HZ; 984 return c; 985 } 986 987 /* 988 * A cache entry has gone into a resolved state from queued 989 */ 990 991 static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt, 992 struct mfc6_cache *uc, struct mfc6_cache *c) 993 { 994 struct sk_buff *skb; 995 996 /* 997 * Play the pending entries through our router 998 */ 999 1000 while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) { 1001 if (ipv6_hdr(skb)->version == 0) { 1002 struct nlmsghdr *nlh = skb_pull(skb, 1003 sizeof(struct ipv6hdr)); 1004 1005 if (mr_fill_mroute(mrt, skb, &c->_c, 1006 nlmsg_data(nlh)) > 0) { 1007 nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh; 1008 } else { 1009 nlh->nlmsg_type = NLMSG_ERROR; 1010 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr)); 1011 skb_trim(skb, nlh->nlmsg_len); 1012 ((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE; 1013 } 1014 rtnl_unicast(skb, net, NETLINK_CB(skb).portid); 1015 } else 1016 ip6_mr_forward(net, mrt, skb, c); 1017 } 1018 } 1019 1020 /* 1021 * Bounce a cache query up to pim6sd and netlink. 1022 * 1023 * Called under mrt_lock. 1024 */ 1025 1026 static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt, 1027 mifi_t mifi, int assert) 1028 { 1029 struct sock *mroute6_sk; 1030 struct sk_buff *skb; 1031 struct mrt6msg *msg; 1032 int ret; 1033 1034 #ifdef CONFIG_IPV6_PIMSM_V2 1035 if (assert == MRT6MSG_WHOLEPKT) 1036 skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt) 1037 +sizeof(*msg)); 1038 else 1039 #endif 1040 skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC); 1041 1042 if (!skb) 1043 return -ENOBUFS; 1044 1045 /* I suppose that internal messages 1046 * do not require checksums */ 1047 1048 skb->ip_summed = CHECKSUM_UNNECESSARY; 1049 1050 #ifdef CONFIG_IPV6_PIMSM_V2 1051 if (assert == MRT6MSG_WHOLEPKT) { 1052 /* Ugly, but we have no choice with this interface. 1053 Duplicate old header, fix length etc. 1054 And all this only to mangle msg->im6_msgtype and 1055 to set msg->im6_mbz to "mbz" :-) 1056 */ 1057 skb_push(skb, -skb_network_offset(pkt)); 1058 1059 skb_push(skb, sizeof(*msg)); 1060 skb_reset_transport_header(skb); 1061 msg = (struct mrt6msg *)skb_transport_header(skb); 1062 msg->im6_mbz = 0; 1063 msg->im6_msgtype = MRT6MSG_WHOLEPKT; 1064 msg->im6_mif = mrt->mroute_reg_vif_num; 1065 msg->im6_pad = 0; 1066 msg->im6_src = ipv6_hdr(pkt)->saddr; 1067 msg->im6_dst = ipv6_hdr(pkt)->daddr; 1068 1069 skb->ip_summed = CHECKSUM_UNNECESSARY; 1070 } else 1071 #endif 1072 { 1073 /* 1074 * Copy the IP header 1075 */ 1076 1077 skb_put(skb, sizeof(struct ipv6hdr)); 1078 skb_reset_network_header(skb); 1079 skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr)); 1080 1081 /* 1082 * Add our header 1083 */ 1084 skb_put(skb, sizeof(*msg)); 1085 skb_reset_transport_header(skb); 1086 msg = (struct mrt6msg *)skb_transport_header(skb); 1087 1088 msg->im6_mbz = 0; 1089 msg->im6_msgtype = assert; 1090 msg->im6_mif = mifi; 1091 msg->im6_pad = 0; 1092 msg->im6_src = ipv6_hdr(pkt)->saddr; 1093 msg->im6_dst = ipv6_hdr(pkt)->daddr; 1094 1095 skb_dst_set(skb, dst_clone(skb_dst(pkt))); 1096 skb->ip_summed = CHECKSUM_UNNECESSARY; 1097 } 1098 1099 rcu_read_lock(); 1100 mroute6_sk = rcu_dereference(mrt->mroute_sk); 1101 if (!mroute6_sk) { 1102 rcu_read_unlock(); 1103 kfree_skb(skb); 1104 return -EINVAL; 1105 } 1106 1107 mrt6msg_netlink_event(mrt, skb); 1108 1109 /* Deliver to user space multicast routing algorithms */ 1110 ret = sock_queue_rcv_skb(mroute6_sk, skb); 1111 rcu_read_unlock(); 1112 if (ret < 0) { 1113 net_warn_ratelimited("mroute6: pending queue full, dropping entries\n"); 1114 kfree_skb(skb); 1115 } 1116 1117 return ret; 1118 } 1119 1120 /* Queue a packet for resolution. It gets locked cache entry! */ 1121 static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi, 1122 struct sk_buff *skb) 1123 { 1124 struct mfc6_cache *c; 1125 bool found = false; 1126 int err; 1127 1128 spin_lock_bh(&mfc_unres_lock); 1129 list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) { 1130 if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) && 1131 ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) { 1132 found = true; 1133 break; 1134 } 1135 } 1136 1137 if (!found) { 1138 /* 1139 * Create a new entry if allowable 1140 */ 1141 1142 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 || 1143 (c = ip6mr_cache_alloc_unres()) == NULL) { 1144 spin_unlock_bh(&mfc_unres_lock); 1145 1146 kfree_skb(skb); 1147 return -ENOBUFS; 1148 } 1149 1150 /* Fill in the new cache entry */ 1151 c->_c.mfc_parent = -1; 1152 c->mf6c_origin = ipv6_hdr(skb)->saddr; 1153 c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr; 1154 1155 /* 1156 * Reflect first query at pim6sd 1157 */ 1158 err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE); 1159 if (err < 0) { 1160 /* If the report failed throw the cache entry 1161 out - Brad Parker 1162 */ 1163 spin_unlock_bh(&mfc_unres_lock); 1164 1165 ip6mr_cache_free(c); 1166 kfree_skb(skb); 1167 return err; 1168 } 1169 1170 atomic_inc(&mrt->cache_resolve_queue_len); 1171 list_add(&c->_c.list, &mrt->mfc_unres_queue); 1172 mr6_netlink_event(mrt, c, RTM_NEWROUTE); 1173 1174 ipmr_do_expire_process(mrt); 1175 } 1176 1177 /* See if we can append the packet */ 1178 if (c->_c.mfc_un.unres.unresolved.qlen > 3) { 1179 kfree_skb(skb); 1180 err = -ENOBUFS; 1181 } else { 1182 skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb); 1183 err = 0; 1184 } 1185 1186 spin_unlock_bh(&mfc_unres_lock); 1187 return err; 1188 } 1189 1190 /* 1191 * MFC6 cache manipulation by user space 1192 */ 1193 1194 static int ip6mr_mfc_delete(struct mr_table *mrt, struct mf6cctl *mfc, 1195 int parent) 1196 { 1197 struct mfc6_cache *c; 1198 1199 /* The entries are added/deleted only under RTNL */ 1200 rcu_read_lock(); 1201 c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr, 1202 &mfc->mf6cc_mcastgrp.sin6_addr, parent); 1203 rcu_read_unlock(); 1204 if (!c) 1205 return -ENOENT; 1206 rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ip6mr_rht_params); 1207 list_del_rcu(&c->_c.list); 1208 1209 call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net), 1210 FIB_EVENT_ENTRY_DEL, c, mrt->id); 1211 mr6_netlink_event(mrt, c, RTM_DELROUTE); 1212 mr_cache_put(&c->_c); 1213 return 0; 1214 } 1215 1216 static int ip6mr_device_event(struct notifier_block *this, 1217 unsigned long event, void *ptr) 1218 { 1219 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 1220 struct net *net = dev_net(dev); 1221 struct mr_table *mrt; 1222 struct vif_device *v; 1223 int ct; 1224 1225 if (event != NETDEV_UNREGISTER) 1226 return NOTIFY_DONE; 1227 1228 ip6mr_for_each_table(mrt, net) { 1229 v = &mrt->vif_table[0]; 1230 for (ct = 0; ct < mrt->maxvif; ct++, v++) { 1231 if (v->dev == dev) 1232 mif6_delete(mrt, ct, 1, NULL); 1233 } 1234 } 1235 1236 return NOTIFY_DONE; 1237 } 1238 1239 static unsigned int ip6mr_seq_read(struct net *net) 1240 { 1241 ASSERT_RTNL(); 1242 1243 return net->ipv6.ipmr_seq + ip6mr_rules_seq_read(net); 1244 } 1245 1246 static int ip6mr_dump(struct net *net, struct notifier_block *nb) 1247 { 1248 return mr_dump(net, nb, RTNL_FAMILY_IP6MR, ip6mr_rules_dump, 1249 ip6mr_mr_table_iter, &mrt_lock); 1250 } 1251 1252 static struct notifier_block ip6_mr_notifier = { 1253 .notifier_call = ip6mr_device_event 1254 }; 1255 1256 static const struct fib_notifier_ops ip6mr_notifier_ops_template = { 1257 .family = RTNL_FAMILY_IP6MR, 1258 .fib_seq_read = ip6mr_seq_read, 1259 .fib_dump = ip6mr_dump, 1260 .owner = THIS_MODULE, 1261 }; 1262 1263 static int __net_init ip6mr_notifier_init(struct net *net) 1264 { 1265 struct fib_notifier_ops *ops; 1266 1267 net->ipv6.ipmr_seq = 0; 1268 1269 ops = fib_notifier_ops_register(&ip6mr_notifier_ops_template, net); 1270 if (IS_ERR(ops)) 1271 return PTR_ERR(ops); 1272 1273 net->ipv6.ip6mr_notifier_ops = ops; 1274 1275 return 0; 1276 } 1277 1278 static void __net_exit ip6mr_notifier_exit(struct net *net) 1279 { 1280 fib_notifier_ops_unregister(net->ipv6.ip6mr_notifier_ops); 1281 net->ipv6.ip6mr_notifier_ops = NULL; 1282 } 1283 1284 /* Setup for IP multicast routing */ 1285 static int __net_init ip6mr_net_init(struct net *net) 1286 { 1287 int err; 1288 1289 err = ip6mr_notifier_init(net); 1290 if (err) 1291 return err; 1292 1293 err = ip6mr_rules_init(net); 1294 if (err < 0) 1295 goto ip6mr_rules_fail; 1296 1297 #ifdef CONFIG_PROC_FS 1298 err = -ENOMEM; 1299 if (!proc_create_net("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_seq_ops, 1300 sizeof(struct mr_vif_iter))) 1301 goto proc_vif_fail; 1302 if (!proc_create_net("ip6_mr_cache", 0, net->proc_net, &ipmr_mfc_seq_ops, 1303 sizeof(struct mr_mfc_iter))) 1304 goto proc_cache_fail; 1305 #endif 1306 1307 return 0; 1308 1309 #ifdef CONFIG_PROC_FS 1310 proc_cache_fail: 1311 remove_proc_entry("ip6_mr_vif", net->proc_net); 1312 proc_vif_fail: 1313 ip6mr_rules_exit(net); 1314 #endif 1315 ip6mr_rules_fail: 1316 ip6mr_notifier_exit(net); 1317 return err; 1318 } 1319 1320 static void __net_exit ip6mr_net_exit(struct net *net) 1321 { 1322 #ifdef CONFIG_PROC_FS 1323 remove_proc_entry("ip6_mr_cache", net->proc_net); 1324 remove_proc_entry("ip6_mr_vif", net->proc_net); 1325 #endif 1326 ip6mr_rules_exit(net); 1327 ip6mr_notifier_exit(net); 1328 } 1329 1330 static struct pernet_operations ip6mr_net_ops = { 1331 .init = ip6mr_net_init, 1332 .exit = ip6mr_net_exit, 1333 }; 1334 1335 int __init ip6_mr_init(void) 1336 { 1337 int err; 1338 1339 mrt_cachep = kmem_cache_create("ip6_mrt_cache", 1340 sizeof(struct mfc6_cache), 1341 0, SLAB_HWCACHE_ALIGN, 1342 NULL); 1343 if (!mrt_cachep) 1344 return -ENOMEM; 1345 1346 err = register_pernet_subsys(&ip6mr_net_ops); 1347 if (err) 1348 goto reg_pernet_fail; 1349 1350 err = register_netdevice_notifier(&ip6_mr_notifier); 1351 if (err) 1352 goto reg_notif_fail; 1353 #ifdef CONFIG_IPV6_PIMSM_V2 1354 if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) { 1355 pr_err("%s: can't add PIM protocol\n", __func__); 1356 err = -EAGAIN; 1357 goto add_proto_fail; 1358 } 1359 #endif 1360 err = rtnl_register_module(THIS_MODULE, RTNL_FAMILY_IP6MR, RTM_GETROUTE, 1361 NULL, ip6mr_rtm_dumproute, 0); 1362 if (err == 0) 1363 return 0; 1364 1365 #ifdef CONFIG_IPV6_PIMSM_V2 1366 inet6_del_protocol(&pim6_protocol, IPPROTO_PIM); 1367 add_proto_fail: 1368 unregister_netdevice_notifier(&ip6_mr_notifier); 1369 #endif 1370 reg_notif_fail: 1371 unregister_pernet_subsys(&ip6mr_net_ops); 1372 reg_pernet_fail: 1373 kmem_cache_destroy(mrt_cachep); 1374 return err; 1375 } 1376 1377 void ip6_mr_cleanup(void) 1378 { 1379 rtnl_unregister(RTNL_FAMILY_IP6MR, RTM_GETROUTE); 1380 #ifdef CONFIG_IPV6_PIMSM_V2 1381 inet6_del_protocol(&pim6_protocol, IPPROTO_PIM); 1382 #endif 1383 unregister_netdevice_notifier(&ip6_mr_notifier); 1384 unregister_pernet_subsys(&ip6mr_net_ops); 1385 kmem_cache_destroy(mrt_cachep); 1386 } 1387 1388 static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt, 1389 struct mf6cctl *mfc, int mrtsock, int parent) 1390 { 1391 unsigned char ttls[MAXMIFS]; 1392 struct mfc6_cache *uc, *c; 1393 struct mr_mfc *_uc; 1394 bool found; 1395 int i, err; 1396 1397 if (mfc->mf6cc_parent >= MAXMIFS) 1398 return -ENFILE; 1399 1400 memset(ttls, 255, MAXMIFS); 1401 for (i = 0; i < MAXMIFS; i++) { 1402 if (IF_ISSET(i, &mfc->mf6cc_ifset)) 1403 ttls[i] = 1; 1404 } 1405 1406 /* The entries are added/deleted only under RTNL */ 1407 rcu_read_lock(); 1408 c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr, 1409 &mfc->mf6cc_mcastgrp.sin6_addr, parent); 1410 rcu_read_unlock(); 1411 if (c) { 1412 write_lock_bh(&mrt_lock); 1413 c->_c.mfc_parent = mfc->mf6cc_parent; 1414 ip6mr_update_thresholds(mrt, &c->_c, ttls); 1415 if (!mrtsock) 1416 c->_c.mfc_flags |= MFC_STATIC; 1417 write_unlock_bh(&mrt_lock); 1418 call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, 1419 c, mrt->id); 1420 mr6_netlink_event(mrt, c, RTM_NEWROUTE); 1421 return 0; 1422 } 1423 1424 if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) && 1425 !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr)) 1426 return -EINVAL; 1427 1428 c = ip6mr_cache_alloc(); 1429 if (!c) 1430 return -ENOMEM; 1431 1432 c->mf6c_origin = mfc->mf6cc_origin.sin6_addr; 1433 c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr; 1434 c->_c.mfc_parent = mfc->mf6cc_parent; 1435 ip6mr_update_thresholds(mrt, &c->_c, ttls); 1436 if (!mrtsock) 1437 c->_c.mfc_flags |= MFC_STATIC; 1438 1439 err = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode, 1440 ip6mr_rht_params); 1441 if (err) { 1442 pr_err("ip6mr: rhtable insert error %d\n", err); 1443 ip6mr_cache_free(c); 1444 return err; 1445 } 1446 list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list); 1447 1448 /* Check to see if we resolved a queued list. If so we 1449 * need to send on the frames and tidy up. 1450 */ 1451 found = false; 1452 spin_lock_bh(&mfc_unres_lock); 1453 list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) { 1454 uc = (struct mfc6_cache *)_uc; 1455 if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) && 1456 ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) { 1457 list_del(&_uc->list); 1458 atomic_dec(&mrt->cache_resolve_queue_len); 1459 found = true; 1460 break; 1461 } 1462 } 1463 if (list_empty(&mrt->mfc_unres_queue)) 1464 del_timer(&mrt->ipmr_expire_timer); 1465 spin_unlock_bh(&mfc_unres_lock); 1466 1467 if (found) { 1468 ip6mr_cache_resolve(net, mrt, uc, c); 1469 ip6mr_cache_free(uc); 1470 } 1471 call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD, 1472 c, mrt->id); 1473 mr6_netlink_event(mrt, c, RTM_NEWROUTE); 1474 return 0; 1475 } 1476 1477 /* 1478 * Close the multicast socket, and clear the vif tables etc 1479 */ 1480 1481 static void mroute_clean_tables(struct mr_table *mrt, bool all) 1482 { 1483 struct mr_mfc *c, *tmp; 1484 LIST_HEAD(list); 1485 int i; 1486 1487 /* Shut down all active vif entries */ 1488 for (i = 0; i < mrt->maxvif; i++) { 1489 if (!all && (mrt->vif_table[i].flags & VIFF_STATIC)) 1490 continue; 1491 mif6_delete(mrt, i, 0, &list); 1492 } 1493 unregister_netdevice_many(&list); 1494 1495 /* Wipe the cache */ 1496 list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) { 1497 if (!all && (c->mfc_flags & MFC_STATIC)) 1498 continue; 1499 rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params); 1500 list_del_rcu(&c->list); 1501 mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE); 1502 mr_cache_put(c); 1503 } 1504 1505 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) { 1506 spin_lock_bh(&mfc_unres_lock); 1507 list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) { 1508 list_del(&c->list); 1509 call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net), 1510 FIB_EVENT_ENTRY_DEL, 1511 (struct mfc6_cache *)c, 1512 mrt->id); 1513 mr6_netlink_event(mrt, (struct mfc6_cache *)c, 1514 RTM_DELROUTE); 1515 ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c); 1516 } 1517 spin_unlock_bh(&mfc_unres_lock); 1518 } 1519 } 1520 1521 static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk) 1522 { 1523 int err = 0; 1524 struct net *net = sock_net(sk); 1525 1526 rtnl_lock(); 1527 write_lock_bh(&mrt_lock); 1528 if (rtnl_dereference(mrt->mroute_sk)) { 1529 err = -EADDRINUSE; 1530 } else { 1531 rcu_assign_pointer(mrt->mroute_sk, sk); 1532 sock_set_flag(sk, SOCK_RCU_FREE); 1533 net->ipv6.devconf_all->mc_forwarding++; 1534 } 1535 write_unlock_bh(&mrt_lock); 1536 1537 if (!err) 1538 inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, 1539 NETCONFA_MC_FORWARDING, 1540 NETCONFA_IFINDEX_ALL, 1541 net->ipv6.devconf_all); 1542 rtnl_unlock(); 1543 1544 return err; 1545 } 1546 1547 int ip6mr_sk_done(struct sock *sk) 1548 { 1549 int err = -EACCES; 1550 struct net *net = sock_net(sk); 1551 struct mr_table *mrt; 1552 1553 if (sk->sk_type != SOCK_RAW || 1554 inet_sk(sk)->inet_num != IPPROTO_ICMPV6) 1555 return err; 1556 1557 rtnl_lock(); 1558 ip6mr_for_each_table(mrt, net) { 1559 if (sk == rtnl_dereference(mrt->mroute_sk)) { 1560 write_lock_bh(&mrt_lock); 1561 RCU_INIT_POINTER(mrt->mroute_sk, NULL); 1562 /* Note that mroute_sk had SOCK_RCU_FREE set, 1563 * so the RCU grace period before sk freeing 1564 * is guaranteed by sk_destruct() 1565 */ 1566 net->ipv6.devconf_all->mc_forwarding--; 1567 write_unlock_bh(&mrt_lock); 1568 inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, 1569 NETCONFA_MC_FORWARDING, 1570 NETCONFA_IFINDEX_ALL, 1571 net->ipv6.devconf_all); 1572 1573 mroute_clean_tables(mrt, false); 1574 err = 0; 1575 break; 1576 } 1577 } 1578 rtnl_unlock(); 1579 1580 return err; 1581 } 1582 1583 bool mroute6_is_socket(struct net *net, struct sk_buff *skb) 1584 { 1585 struct mr_table *mrt; 1586 struct flowi6 fl6 = { 1587 .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX, 1588 .flowi6_oif = skb->dev->ifindex, 1589 .flowi6_mark = skb->mark, 1590 }; 1591 1592 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0) 1593 return NULL; 1594 1595 return rcu_access_pointer(mrt->mroute_sk); 1596 } 1597 EXPORT_SYMBOL(mroute6_is_socket); 1598 1599 /* 1600 * Socket options and virtual interface manipulation. The whole 1601 * virtual interface system is a complete heap, but unfortunately 1602 * that's how BSD mrouted happens to think. Maybe one day with a proper 1603 * MOSPF/PIM router set up we can clean this up. 1604 */ 1605 1606 int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen) 1607 { 1608 int ret, parent = 0; 1609 struct mif6ctl vif; 1610 struct mf6cctl mfc; 1611 mifi_t mifi; 1612 struct net *net = sock_net(sk); 1613 struct mr_table *mrt; 1614 1615 if (sk->sk_type != SOCK_RAW || 1616 inet_sk(sk)->inet_num != IPPROTO_ICMPV6) 1617 return -EOPNOTSUPP; 1618 1619 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); 1620 if (!mrt) 1621 return -ENOENT; 1622 1623 if (optname != MRT6_INIT) { 1624 if (sk != rcu_access_pointer(mrt->mroute_sk) && 1625 !ns_capable(net->user_ns, CAP_NET_ADMIN)) 1626 return -EACCES; 1627 } 1628 1629 switch (optname) { 1630 case MRT6_INIT: 1631 if (optlen < sizeof(int)) 1632 return -EINVAL; 1633 1634 return ip6mr_sk_init(mrt, sk); 1635 1636 case MRT6_DONE: 1637 return ip6mr_sk_done(sk); 1638 1639 case MRT6_ADD_MIF: 1640 if (optlen < sizeof(vif)) 1641 return -EINVAL; 1642 if (copy_from_user(&vif, optval, sizeof(vif))) 1643 return -EFAULT; 1644 if (vif.mif6c_mifi >= MAXMIFS) 1645 return -ENFILE; 1646 rtnl_lock(); 1647 ret = mif6_add(net, mrt, &vif, 1648 sk == rtnl_dereference(mrt->mroute_sk)); 1649 rtnl_unlock(); 1650 return ret; 1651 1652 case MRT6_DEL_MIF: 1653 if (optlen < sizeof(mifi_t)) 1654 return -EINVAL; 1655 if (copy_from_user(&mifi, optval, sizeof(mifi_t))) 1656 return -EFAULT; 1657 rtnl_lock(); 1658 ret = mif6_delete(mrt, mifi, 0, NULL); 1659 rtnl_unlock(); 1660 return ret; 1661 1662 /* 1663 * Manipulate the forwarding caches. These live 1664 * in a sort of kernel/user symbiosis. 1665 */ 1666 case MRT6_ADD_MFC: 1667 case MRT6_DEL_MFC: 1668 parent = -1; 1669 /* fall through */ 1670 case MRT6_ADD_MFC_PROXY: 1671 case MRT6_DEL_MFC_PROXY: 1672 if (optlen < sizeof(mfc)) 1673 return -EINVAL; 1674 if (copy_from_user(&mfc, optval, sizeof(mfc))) 1675 return -EFAULT; 1676 if (parent == 0) 1677 parent = mfc.mf6cc_parent; 1678 rtnl_lock(); 1679 if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY) 1680 ret = ip6mr_mfc_delete(mrt, &mfc, parent); 1681 else 1682 ret = ip6mr_mfc_add(net, mrt, &mfc, 1683 sk == 1684 rtnl_dereference(mrt->mroute_sk), 1685 parent); 1686 rtnl_unlock(); 1687 return ret; 1688 1689 /* 1690 * Control PIM assert (to activate pim will activate assert) 1691 */ 1692 case MRT6_ASSERT: 1693 { 1694 int v; 1695 1696 if (optlen != sizeof(v)) 1697 return -EINVAL; 1698 if (get_user(v, (int __user *)optval)) 1699 return -EFAULT; 1700 mrt->mroute_do_assert = v; 1701 return 0; 1702 } 1703 1704 #ifdef CONFIG_IPV6_PIMSM_V2 1705 case MRT6_PIM: 1706 { 1707 int v; 1708 1709 if (optlen != sizeof(v)) 1710 return -EINVAL; 1711 if (get_user(v, (int __user *)optval)) 1712 return -EFAULT; 1713 v = !!v; 1714 rtnl_lock(); 1715 ret = 0; 1716 if (v != mrt->mroute_do_pim) { 1717 mrt->mroute_do_pim = v; 1718 mrt->mroute_do_assert = v; 1719 } 1720 rtnl_unlock(); 1721 return ret; 1722 } 1723 1724 #endif 1725 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES 1726 case MRT6_TABLE: 1727 { 1728 u32 v; 1729 1730 if (optlen != sizeof(u32)) 1731 return -EINVAL; 1732 if (get_user(v, (u32 __user *)optval)) 1733 return -EFAULT; 1734 /* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */ 1735 if (v != RT_TABLE_DEFAULT && v >= 100000000) 1736 return -EINVAL; 1737 if (sk == rcu_access_pointer(mrt->mroute_sk)) 1738 return -EBUSY; 1739 1740 rtnl_lock(); 1741 ret = 0; 1742 mrt = ip6mr_new_table(net, v); 1743 if (IS_ERR(mrt)) 1744 ret = PTR_ERR(mrt); 1745 else 1746 raw6_sk(sk)->ip6mr_table = v; 1747 rtnl_unlock(); 1748 return ret; 1749 } 1750 #endif 1751 /* 1752 * Spurious command, or MRT6_VERSION which you cannot 1753 * set. 1754 */ 1755 default: 1756 return -ENOPROTOOPT; 1757 } 1758 } 1759 1760 /* 1761 * Getsock opt support for the multicast routing system. 1762 */ 1763 1764 int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, 1765 int __user *optlen) 1766 { 1767 int olr; 1768 int val; 1769 struct net *net = sock_net(sk); 1770 struct mr_table *mrt; 1771 1772 if (sk->sk_type != SOCK_RAW || 1773 inet_sk(sk)->inet_num != IPPROTO_ICMPV6) 1774 return -EOPNOTSUPP; 1775 1776 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); 1777 if (!mrt) 1778 return -ENOENT; 1779 1780 switch (optname) { 1781 case MRT6_VERSION: 1782 val = 0x0305; 1783 break; 1784 #ifdef CONFIG_IPV6_PIMSM_V2 1785 case MRT6_PIM: 1786 val = mrt->mroute_do_pim; 1787 break; 1788 #endif 1789 case MRT6_ASSERT: 1790 val = mrt->mroute_do_assert; 1791 break; 1792 default: 1793 return -ENOPROTOOPT; 1794 } 1795 1796 if (get_user(olr, optlen)) 1797 return -EFAULT; 1798 1799 olr = min_t(int, olr, sizeof(int)); 1800 if (olr < 0) 1801 return -EINVAL; 1802 1803 if (put_user(olr, optlen)) 1804 return -EFAULT; 1805 if (copy_to_user(optval, &val, olr)) 1806 return -EFAULT; 1807 return 0; 1808 } 1809 1810 /* 1811 * The IP multicast ioctl support routines. 1812 */ 1813 1814 int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg) 1815 { 1816 struct sioc_sg_req6 sr; 1817 struct sioc_mif_req6 vr; 1818 struct vif_device *vif; 1819 struct mfc6_cache *c; 1820 struct net *net = sock_net(sk); 1821 struct mr_table *mrt; 1822 1823 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); 1824 if (!mrt) 1825 return -ENOENT; 1826 1827 switch (cmd) { 1828 case SIOCGETMIFCNT_IN6: 1829 if (copy_from_user(&vr, arg, sizeof(vr))) 1830 return -EFAULT; 1831 if (vr.mifi >= mrt->maxvif) 1832 return -EINVAL; 1833 read_lock(&mrt_lock); 1834 vif = &mrt->vif_table[vr.mifi]; 1835 if (VIF_EXISTS(mrt, vr.mifi)) { 1836 vr.icount = vif->pkt_in; 1837 vr.ocount = vif->pkt_out; 1838 vr.ibytes = vif->bytes_in; 1839 vr.obytes = vif->bytes_out; 1840 read_unlock(&mrt_lock); 1841 1842 if (copy_to_user(arg, &vr, sizeof(vr))) 1843 return -EFAULT; 1844 return 0; 1845 } 1846 read_unlock(&mrt_lock); 1847 return -EADDRNOTAVAIL; 1848 case SIOCGETSGCNT_IN6: 1849 if (copy_from_user(&sr, arg, sizeof(sr))) 1850 return -EFAULT; 1851 1852 rcu_read_lock(); 1853 c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr); 1854 if (c) { 1855 sr.pktcnt = c->_c.mfc_un.res.pkt; 1856 sr.bytecnt = c->_c.mfc_un.res.bytes; 1857 sr.wrong_if = c->_c.mfc_un.res.wrong_if; 1858 rcu_read_unlock(); 1859 1860 if (copy_to_user(arg, &sr, sizeof(sr))) 1861 return -EFAULT; 1862 return 0; 1863 } 1864 rcu_read_unlock(); 1865 return -EADDRNOTAVAIL; 1866 default: 1867 return -ENOIOCTLCMD; 1868 } 1869 } 1870 1871 #ifdef CONFIG_COMPAT 1872 struct compat_sioc_sg_req6 { 1873 struct sockaddr_in6 src; 1874 struct sockaddr_in6 grp; 1875 compat_ulong_t pktcnt; 1876 compat_ulong_t bytecnt; 1877 compat_ulong_t wrong_if; 1878 }; 1879 1880 struct compat_sioc_mif_req6 { 1881 mifi_t mifi; 1882 compat_ulong_t icount; 1883 compat_ulong_t ocount; 1884 compat_ulong_t ibytes; 1885 compat_ulong_t obytes; 1886 }; 1887 1888 int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) 1889 { 1890 struct compat_sioc_sg_req6 sr; 1891 struct compat_sioc_mif_req6 vr; 1892 struct vif_device *vif; 1893 struct mfc6_cache *c; 1894 struct net *net = sock_net(sk); 1895 struct mr_table *mrt; 1896 1897 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); 1898 if (!mrt) 1899 return -ENOENT; 1900 1901 switch (cmd) { 1902 case SIOCGETMIFCNT_IN6: 1903 if (copy_from_user(&vr, arg, sizeof(vr))) 1904 return -EFAULT; 1905 if (vr.mifi >= mrt->maxvif) 1906 return -EINVAL; 1907 read_lock(&mrt_lock); 1908 vif = &mrt->vif_table[vr.mifi]; 1909 if (VIF_EXISTS(mrt, vr.mifi)) { 1910 vr.icount = vif->pkt_in; 1911 vr.ocount = vif->pkt_out; 1912 vr.ibytes = vif->bytes_in; 1913 vr.obytes = vif->bytes_out; 1914 read_unlock(&mrt_lock); 1915 1916 if (copy_to_user(arg, &vr, sizeof(vr))) 1917 return -EFAULT; 1918 return 0; 1919 } 1920 read_unlock(&mrt_lock); 1921 return -EADDRNOTAVAIL; 1922 case SIOCGETSGCNT_IN6: 1923 if (copy_from_user(&sr, arg, sizeof(sr))) 1924 return -EFAULT; 1925 1926 rcu_read_lock(); 1927 c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr); 1928 if (c) { 1929 sr.pktcnt = c->_c.mfc_un.res.pkt; 1930 sr.bytecnt = c->_c.mfc_un.res.bytes; 1931 sr.wrong_if = c->_c.mfc_un.res.wrong_if; 1932 rcu_read_unlock(); 1933 1934 if (copy_to_user(arg, &sr, sizeof(sr))) 1935 return -EFAULT; 1936 return 0; 1937 } 1938 rcu_read_unlock(); 1939 return -EADDRNOTAVAIL; 1940 default: 1941 return -ENOIOCTLCMD; 1942 } 1943 } 1944 #endif 1945 1946 static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb) 1947 { 1948 __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 1949 IPSTATS_MIB_OUTFORWDATAGRAMS); 1950 __IP6_ADD_STATS(net, ip6_dst_idev(skb_dst(skb)), 1951 IPSTATS_MIB_OUTOCTETS, skb->len); 1952 return dst_output(net, sk, skb); 1953 } 1954 1955 /* 1956 * Processing handlers for ip6mr_forward 1957 */ 1958 1959 static int ip6mr_forward2(struct net *net, struct mr_table *mrt, 1960 struct sk_buff *skb, struct mfc6_cache *c, int vifi) 1961 { 1962 struct ipv6hdr *ipv6h; 1963 struct vif_device *vif = &mrt->vif_table[vifi]; 1964 struct net_device *dev; 1965 struct dst_entry *dst; 1966 struct flowi6 fl6; 1967 1968 if (!vif->dev) 1969 goto out_free; 1970 1971 #ifdef CONFIG_IPV6_PIMSM_V2 1972 if (vif->flags & MIFF_REGISTER) { 1973 vif->pkt_out++; 1974 vif->bytes_out += skb->len; 1975 vif->dev->stats.tx_bytes += skb->len; 1976 vif->dev->stats.tx_packets++; 1977 ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT); 1978 goto out_free; 1979 } 1980 #endif 1981 1982 ipv6h = ipv6_hdr(skb); 1983 1984 fl6 = (struct flowi6) { 1985 .flowi6_oif = vif->link, 1986 .daddr = ipv6h->daddr, 1987 }; 1988 1989 dst = ip6_route_output(net, NULL, &fl6); 1990 if (dst->error) { 1991 dst_release(dst); 1992 goto out_free; 1993 } 1994 1995 skb_dst_drop(skb); 1996 skb_dst_set(skb, dst); 1997 1998 /* 1999 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally 2000 * not only before forwarding, but after forwarding on all output 2001 * interfaces. It is clear, if mrouter runs a multicasting 2002 * program, it should receive packets not depending to what interface 2003 * program is joined. 2004 * If we will not make it, the program will have to join on all 2005 * interfaces. On the other hand, multihoming host (or router, but 2006 * not mrouter) cannot join to more than one interface - it will 2007 * result in receiving multiple packets. 2008 */ 2009 dev = vif->dev; 2010 skb->dev = dev; 2011 vif->pkt_out++; 2012 vif->bytes_out += skb->len; 2013 2014 /* We are about to write */ 2015 /* XXX: extension headers? */ 2016 if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev))) 2017 goto out_free; 2018 2019 ipv6h = ipv6_hdr(skb); 2020 ipv6h->hop_limit--; 2021 2022 IP6CB(skb)->flags |= IP6SKB_FORWARDED; 2023 2024 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, 2025 net, NULL, skb, skb->dev, dev, 2026 ip6mr_forward2_finish); 2027 2028 out_free: 2029 kfree_skb(skb); 2030 return 0; 2031 } 2032 2033 static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev) 2034 { 2035 int ct; 2036 2037 for (ct = mrt->maxvif - 1; ct >= 0; ct--) { 2038 if (mrt->vif_table[ct].dev == dev) 2039 break; 2040 } 2041 return ct; 2042 } 2043 2044 static void ip6_mr_forward(struct net *net, struct mr_table *mrt, 2045 struct sk_buff *skb, struct mfc6_cache *c) 2046 { 2047 int psend = -1; 2048 int vif, ct; 2049 int true_vifi = ip6mr_find_vif(mrt, skb->dev); 2050 2051 vif = c->_c.mfc_parent; 2052 c->_c.mfc_un.res.pkt++; 2053 c->_c.mfc_un.res.bytes += skb->len; 2054 c->_c.mfc_un.res.lastuse = jiffies; 2055 2056 if (ipv6_addr_any(&c->mf6c_origin) && true_vifi >= 0) { 2057 struct mfc6_cache *cache_proxy; 2058 2059 /* For an (*,G) entry, we only check that the incoming 2060 * interface is part of the static tree. 2061 */ 2062 rcu_read_lock(); 2063 cache_proxy = mr_mfc_find_any_parent(mrt, vif); 2064 if (cache_proxy && 2065 cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255) { 2066 rcu_read_unlock(); 2067 goto forward; 2068 } 2069 rcu_read_unlock(); 2070 } 2071 2072 /* 2073 * Wrong interface: drop packet and (maybe) send PIM assert. 2074 */ 2075 if (mrt->vif_table[vif].dev != skb->dev) { 2076 c->_c.mfc_un.res.wrong_if++; 2077 2078 if (true_vifi >= 0 && mrt->mroute_do_assert && 2079 /* pimsm uses asserts, when switching from RPT to SPT, 2080 so that we cannot check that packet arrived on an oif. 2081 It is bad, but otherwise we would need to move pretty 2082 large chunk of pimd to kernel. Ough... --ANK 2083 */ 2084 (mrt->mroute_do_pim || 2085 c->_c.mfc_un.res.ttls[true_vifi] < 255) && 2086 time_after(jiffies, 2087 c->_c.mfc_un.res.last_assert + 2088 MFC_ASSERT_THRESH)) { 2089 c->_c.mfc_un.res.last_assert = jiffies; 2090 ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF); 2091 } 2092 goto dont_forward; 2093 } 2094 2095 forward: 2096 mrt->vif_table[vif].pkt_in++; 2097 mrt->vif_table[vif].bytes_in += skb->len; 2098 2099 /* 2100 * Forward the frame 2101 */ 2102 if (ipv6_addr_any(&c->mf6c_origin) && 2103 ipv6_addr_any(&c->mf6c_mcastgrp)) { 2104 if (true_vifi >= 0 && 2105 true_vifi != c->_c.mfc_parent && 2106 ipv6_hdr(skb)->hop_limit > 2107 c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) { 2108 /* It's an (*,*) entry and the packet is not coming from 2109 * the upstream: forward the packet to the upstream 2110 * only. 2111 */ 2112 psend = c->_c.mfc_parent; 2113 goto last_forward; 2114 } 2115 goto dont_forward; 2116 } 2117 for (ct = c->_c.mfc_un.res.maxvif - 1; 2118 ct >= c->_c.mfc_un.res.minvif; ct--) { 2119 /* For (*,G) entry, don't forward to the incoming interface */ 2120 if ((!ipv6_addr_any(&c->mf6c_origin) || ct != true_vifi) && 2121 ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) { 2122 if (psend != -1) { 2123 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 2124 if (skb2) 2125 ip6mr_forward2(net, mrt, skb2, 2126 c, psend); 2127 } 2128 psend = ct; 2129 } 2130 } 2131 last_forward: 2132 if (psend != -1) { 2133 ip6mr_forward2(net, mrt, skb, c, psend); 2134 return; 2135 } 2136 2137 dont_forward: 2138 kfree_skb(skb); 2139 } 2140 2141 2142 /* 2143 * Multicast packets for forwarding arrive here 2144 */ 2145 2146 int ip6_mr_input(struct sk_buff *skb) 2147 { 2148 struct mfc6_cache *cache; 2149 struct net *net = dev_net(skb->dev); 2150 struct mr_table *mrt; 2151 struct flowi6 fl6 = { 2152 .flowi6_iif = skb->dev->ifindex, 2153 .flowi6_mark = skb->mark, 2154 }; 2155 int err; 2156 2157 err = ip6mr_fib_lookup(net, &fl6, &mrt); 2158 if (err < 0) { 2159 kfree_skb(skb); 2160 return err; 2161 } 2162 2163 read_lock(&mrt_lock); 2164 cache = ip6mr_cache_find(mrt, 2165 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr); 2166 if (!cache) { 2167 int vif = ip6mr_find_vif(mrt, skb->dev); 2168 2169 if (vif >= 0) 2170 cache = ip6mr_cache_find_any(mrt, 2171 &ipv6_hdr(skb)->daddr, 2172 vif); 2173 } 2174 2175 /* 2176 * No usable cache entry 2177 */ 2178 if (!cache) { 2179 int vif; 2180 2181 vif = ip6mr_find_vif(mrt, skb->dev); 2182 if (vif >= 0) { 2183 int err = ip6mr_cache_unresolved(mrt, vif, skb); 2184 read_unlock(&mrt_lock); 2185 2186 return err; 2187 } 2188 read_unlock(&mrt_lock); 2189 kfree_skb(skb); 2190 return -ENODEV; 2191 } 2192 2193 ip6_mr_forward(net, mrt, skb, cache); 2194 2195 read_unlock(&mrt_lock); 2196 2197 return 0; 2198 } 2199 2200 int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm, 2201 u32 portid) 2202 { 2203 int err; 2204 struct mr_table *mrt; 2205 struct mfc6_cache *cache; 2206 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); 2207 2208 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT); 2209 if (!mrt) 2210 return -ENOENT; 2211 2212 read_lock(&mrt_lock); 2213 cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr); 2214 if (!cache && skb->dev) { 2215 int vif = ip6mr_find_vif(mrt, skb->dev); 2216 2217 if (vif >= 0) 2218 cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr, 2219 vif); 2220 } 2221 2222 if (!cache) { 2223 struct sk_buff *skb2; 2224 struct ipv6hdr *iph; 2225 struct net_device *dev; 2226 int vif; 2227 2228 dev = skb->dev; 2229 if (!dev || (vif = ip6mr_find_vif(mrt, dev)) < 0) { 2230 read_unlock(&mrt_lock); 2231 return -ENODEV; 2232 } 2233 2234 /* really correct? */ 2235 skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC); 2236 if (!skb2) { 2237 read_unlock(&mrt_lock); 2238 return -ENOMEM; 2239 } 2240 2241 NETLINK_CB(skb2).portid = portid; 2242 skb_reset_transport_header(skb2); 2243 2244 skb_put(skb2, sizeof(struct ipv6hdr)); 2245 skb_reset_network_header(skb2); 2246 2247 iph = ipv6_hdr(skb2); 2248 iph->version = 0; 2249 iph->priority = 0; 2250 iph->flow_lbl[0] = 0; 2251 iph->flow_lbl[1] = 0; 2252 iph->flow_lbl[2] = 0; 2253 iph->payload_len = 0; 2254 iph->nexthdr = IPPROTO_NONE; 2255 iph->hop_limit = 0; 2256 iph->saddr = rt->rt6i_src.addr; 2257 iph->daddr = rt->rt6i_dst.addr; 2258 2259 err = ip6mr_cache_unresolved(mrt, vif, skb2); 2260 read_unlock(&mrt_lock); 2261 2262 return err; 2263 } 2264 2265 err = mr_fill_mroute(mrt, skb, &cache->_c, rtm); 2266 read_unlock(&mrt_lock); 2267 return err; 2268 } 2269 2270 static int ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, 2271 u32 portid, u32 seq, struct mfc6_cache *c, int cmd, 2272 int flags) 2273 { 2274 struct nlmsghdr *nlh; 2275 struct rtmsg *rtm; 2276 int err; 2277 2278 nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags); 2279 if (!nlh) 2280 return -EMSGSIZE; 2281 2282 rtm = nlmsg_data(nlh); 2283 rtm->rtm_family = RTNL_FAMILY_IP6MR; 2284 rtm->rtm_dst_len = 128; 2285 rtm->rtm_src_len = 128; 2286 rtm->rtm_tos = 0; 2287 rtm->rtm_table = mrt->id; 2288 if (nla_put_u32(skb, RTA_TABLE, mrt->id)) 2289 goto nla_put_failure; 2290 rtm->rtm_type = RTN_MULTICAST; 2291 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 2292 if (c->_c.mfc_flags & MFC_STATIC) 2293 rtm->rtm_protocol = RTPROT_STATIC; 2294 else 2295 rtm->rtm_protocol = RTPROT_MROUTED; 2296 rtm->rtm_flags = 0; 2297 2298 if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) || 2299 nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp)) 2300 goto nla_put_failure; 2301 err = mr_fill_mroute(mrt, skb, &c->_c, rtm); 2302 /* do not break the dump if cache is unresolved */ 2303 if (err < 0 && err != -ENOENT) 2304 goto nla_put_failure; 2305 2306 nlmsg_end(skb, nlh); 2307 return 0; 2308 2309 nla_put_failure: 2310 nlmsg_cancel(skb, nlh); 2311 return -EMSGSIZE; 2312 } 2313 2314 static int _ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, 2315 u32 portid, u32 seq, struct mr_mfc *c, 2316 int cmd, int flags) 2317 { 2318 return ip6mr_fill_mroute(mrt, skb, portid, seq, (struct mfc6_cache *)c, 2319 cmd, flags); 2320 } 2321 2322 static int mr6_msgsize(bool unresolved, int maxvif) 2323 { 2324 size_t len = 2325 NLMSG_ALIGN(sizeof(struct rtmsg)) 2326 + nla_total_size(4) /* RTA_TABLE */ 2327 + nla_total_size(sizeof(struct in6_addr)) /* RTA_SRC */ 2328 + nla_total_size(sizeof(struct in6_addr)) /* RTA_DST */ 2329 ; 2330 2331 if (!unresolved) 2332 len = len 2333 + nla_total_size(4) /* RTA_IIF */ 2334 + nla_total_size(0) /* RTA_MULTIPATH */ 2335 + maxvif * NLA_ALIGN(sizeof(struct rtnexthop)) 2336 /* RTA_MFC_STATS */ 2337 + nla_total_size_64bit(sizeof(struct rta_mfc_stats)) 2338 ; 2339 2340 return len; 2341 } 2342 2343 static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc, 2344 int cmd) 2345 { 2346 struct net *net = read_pnet(&mrt->net); 2347 struct sk_buff *skb; 2348 int err = -ENOBUFS; 2349 2350 skb = nlmsg_new(mr6_msgsize(mfc->_c.mfc_parent >= MAXMIFS, mrt->maxvif), 2351 GFP_ATOMIC); 2352 if (!skb) 2353 goto errout; 2354 2355 err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0); 2356 if (err < 0) 2357 goto errout; 2358 2359 rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC); 2360 return; 2361 2362 errout: 2363 kfree_skb(skb); 2364 if (err < 0) 2365 rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err); 2366 } 2367 2368 static size_t mrt6msg_netlink_msgsize(size_t payloadlen) 2369 { 2370 size_t len = 2371 NLMSG_ALIGN(sizeof(struct rtgenmsg)) 2372 + nla_total_size(1) /* IP6MRA_CREPORT_MSGTYPE */ 2373 + nla_total_size(4) /* IP6MRA_CREPORT_MIF_ID */ 2374 /* IP6MRA_CREPORT_SRC_ADDR */ 2375 + nla_total_size(sizeof(struct in6_addr)) 2376 /* IP6MRA_CREPORT_DST_ADDR */ 2377 + nla_total_size(sizeof(struct in6_addr)) 2378 /* IP6MRA_CREPORT_PKT */ 2379 + nla_total_size(payloadlen) 2380 ; 2381 2382 return len; 2383 } 2384 2385 static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt) 2386 { 2387 struct net *net = read_pnet(&mrt->net); 2388 struct nlmsghdr *nlh; 2389 struct rtgenmsg *rtgenm; 2390 struct mrt6msg *msg; 2391 struct sk_buff *skb; 2392 struct nlattr *nla; 2393 int payloadlen; 2394 2395 payloadlen = pkt->len - sizeof(struct mrt6msg); 2396 msg = (struct mrt6msg *)skb_transport_header(pkt); 2397 2398 skb = nlmsg_new(mrt6msg_netlink_msgsize(payloadlen), GFP_ATOMIC); 2399 if (!skb) 2400 goto errout; 2401 2402 nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT, 2403 sizeof(struct rtgenmsg), 0); 2404 if (!nlh) 2405 goto errout; 2406 rtgenm = nlmsg_data(nlh); 2407 rtgenm->rtgen_family = RTNL_FAMILY_IP6MR; 2408 if (nla_put_u8(skb, IP6MRA_CREPORT_MSGTYPE, msg->im6_msgtype) || 2409 nla_put_u32(skb, IP6MRA_CREPORT_MIF_ID, msg->im6_mif) || 2410 nla_put_in6_addr(skb, IP6MRA_CREPORT_SRC_ADDR, 2411 &msg->im6_src) || 2412 nla_put_in6_addr(skb, IP6MRA_CREPORT_DST_ADDR, 2413 &msg->im6_dst)) 2414 goto nla_put_failure; 2415 2416 nla = nla_reserve(skb, IP6MRA_CREPORT_PKT, payloadlen); 2417 if (!nla || skb_copy_bits(pkt, sizeof(struct mrt6msg), 2418 nla_data(nla), payloadlen)) 2419 goto nla_put_failure; 2420 2421 nlmsg_end(skb, nlh); 2422 2423 rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE_R, NULL, GFP_ATOMIC); 2424 return; 2425 2426 nla_put_failure: 2427 nlmsg_cancel(skb, nlh); 2428 errout: 2429 kfree_skb(skb); 2430 rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE_R, -ENOBUFS); 2431 } 2432 2433 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) 2434 { 2435 return mr_rtm_dumproute(skb, cb, ip6mr_mr_table_iter, 2436 _ip6mr_fill_mroute, &mfc_unres_lock); 2437 } 2438