1 /* 2 * IP multicast routing support for mrouted 3.6/3.8 3 * 4 * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk> 5 * Linux Consultancy and Custom Driver Development 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License 9 * as published by the Free Software Foundation; either version 10 * 2 of the License, or (at your option) any later version. 11 * 12 * Fixes: 13 * Michael Chastain : Incorrect size of copying. 14 * Alan Cox : Added the cache manager code 15 * Alan Cox : Fixed the clone/copy bug and device race. 16 * Mike McLagan : Routing by source 17 * Malcolm Beattie : Buffer handling fixes. 18 * Alexey Kuznetsov : Double buffer free and other fixes. 19 * SVR Anand : Fixed several multicast bugs and problems. 20 * Alexey Kuznetsov : Status, optimisations and more. 21 * Brad Parker : Better behaviour on mrouted upcall 22 * overflow. 23 * Carlos Picoto : PIMv1 Support 24 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header 25 * Relax this requirement to work with older peers. 26 * 27 */ 28 29 #include <linux/uaccess.h> 30 #include <linux/types.h> 31 #include <linux/capability.h> 32 #include <linux/errno.h> 33 #include <linux/timer.h> 34 #include <linux/mm.h> 35 #include <linux/kernel.h> 36 #include <linux/fcntl.h> 37 #include <linux/stat.h> 38 #include <linux/socket.h> 39 #include <linux/in.h> 40 #include <linux/inet.h> 41 #include <linux/netdevice.h> 42 #include <linux/inetdevice.h> 43 #include <linux/igmp.h> 44 #include <linux/proc_fs.h> 45 #include <linux/seq_file.h> 46 #include <linux/mroute.h> 47 #include <linux/init.h> 48 #include <linux/if_ether.h> 49 #include <linux/slab.h> 50 #include <net/net_namespace.h> 51 #include <net/ip.h> 52 #include <net/protocol.h> 53 #include <linux/skbuff.h> 54 #include <net/route.h> 55 #include <net/sock.h> 56 #include <net/icmp.h> 57 #include <net/udp.h> 58 #include <net/raw.h> 59 #include <linux/notifier.h> 60 #include <linux/if_arp.h> 61 #include <linux/netfilter_ipv4.h> 62 #include <linux/compat.h> 63 #include <linux/export.h> 64 #include <net/ip_tunnels.h> 65 #include <net/checksum.h> 66 #include <net/netlink.h> 67 #include <net/fib_rules.h> 68 #include <linux/netconf.h> 69 #include <net/nexthop.h> 70 #include <net/switchdev.h> 71 72 struct ipmr_rule { 73 struct fib_rule common; 74 }; 75 76 struct ipmr_result { 77 struct mr_table *mrt; 78 }; 79 80 /* Big lock, protecting vif table, mrt cache and mroute socket state. 81 * Note that the changes are semaphored via rtnl_lock. 82 */ 83 84 static DEFINE_RWLOCK(mrt_lock); 85 86 /* Multicast router control variables */ 87 88 /* Special spinlock for queue of unresolved entries */ 89 static DEFINE_SPINLOCK(mfc_unres_lock); 90 91 /* We return to original Alan's scheme. Hash table of resolved 92 * entries is changed only in process context and protected 93 * with weak lock mrt_lock. Queue of unresolved entries is protected 94 * with strong spinlock mfc_unres_lock. 95 * 96 * In this case data path is free of exclusive locks at all. 97 */ 98 99 static struct kmem_cache *mrt_cachep __read_mostly; 100 101 static struct mr_table *ipmr_new_table(struct net *net, u32 id); 102 static void ipmr_free_table(struct mr_table *mrt); 103 104 static void ip_mr_forward(struct net *net, struct mr_table *mrt, 105 struct net_device *dev, struct sk_buff *skb, 106 struct mfc_cache *cache, int local); 107 static int ipmr_cache_report(struct mr_table *mrt, 108 struct sk_buff *pkt, vifi_t vifi, int assert); 109 static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, 110 struct mfc_cache *c, struct rtmsg *rtm); 111 static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc, 112 int cmd); 113 static void igmpmsg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt); 114 static void mroute_clean_tables(struct mr_table *mrt, bool all); 115 static void ipmr_expire_process(unsigned long arg); 116 117 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES 118 #define ipmr_for_each_table(mrt, net) \ 119 list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list) 120 121 static struct mr_table *ipmr_get_table(struct net *net, u32 id) 122 { 123 struct mr_table *mrt; 124 125 ipmr_for_each_table(mrt, net) { 126 if (mrt->id == id) 127 return mrt; 128 } 129 return NULL; 130 } 131 132 static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4, 133 struct mr_table **mrt) 134 { 135 int err; 136 struct ipmr_result res; 137 struct fib_lookup_arg arg = { 138 .result = &res, 139 .flags = FIB_LOOKUP_NOREF, 140 }; 141 142 /* update flow if oif or iif point to device enslaved to l3mdev */ 143 l3mdev_update_flow(net, flowi4_to_flowi(flp4)); 144 145 err = fib_rules_lookup(net->ipv4.mr_rules_ops, 146 flowi4_to_flowi(flp4), 0, &arg); 147 if (err < 0) 148 return err; 149 *mrt = res.mrt; 150 return 0; 151 } 152 153 static int ipmr_rule_action(struct fib_rule *rule, struct flowi *flp, 154 int flags, struct fib_lookup_arg *arg) 155 { 156 struct ipmr_result *res = arg->result; 157 struct mr_table *mrt; 158 159 switch (rule->action) { 160 case FR_ACT_TO_TBL: 161 break; 162 case FR_ACT_UNREACHABLE: 163 return -ENETUNREACH; 164 case FR_ACT_PROHIBIT: 165 return -EACCES; 166 case FR_ACT_BLACKHOLE: 167 default: 168 return -EINVAL; 169 } 170 171 arg->table = fib_rule_get_table(rule, arg); 172 173 mrt = ipmr_get_table(rule->fr_net, arg->table); 174 if (!mrt) 175 return -EAGAIN; 176 res->mrt = mrt; 177 return 0; 178 } 179 180 static int ipmr_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) 181 { 182 return 1; 183 } 184 185 static const struct nla_policy ipmr_rule_policy[FRA_MAX + 1] = { 186 FRA_GENERIC_POLICY, 187 }; 188 189 static int ipmr_rule_configure(struct fib_rule *rule, struct sk_buff *skb, 190 struct fib_rule_hdr *frh, struct nlattr **tb) 191 { 192 return 0; 193 } 194 195 static int ipmr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, 196 struct nlattr **tb) 197 { 198 return 1; 199 } 200 201 static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb, 202 struct fib_rule_hdr *frh) 203 { 204 frh->dst_len = 0; 205 frh->src_len = 0; 206 frh->tos = 0; 207 return 0; 208 } 209 210 static const struct fib_rules_ops __net_initconst ipmr_rules_ops_template = { 211 .family = RTNL_FAMILY_IPMR, 212 .rule_size = sizeof(struct ipmr_rule), 213 .addr_size = sizeof(u32), 214 .action = ipmr_rule_action, 215 .match = ipmr_rule_match, 216 .configure = ipmr_rule_configure, 217 .compare = ipmr_rule_compare, 218 .fill = ipmr_rule_fill, 219 .nlgroup = RTNLGRP_IPV4_RULE, 220 .policy = ipmr_rule_policy, 221 .owner = THIS_MODULE, 222 }; 223 224 static int __net_init ipmr_rules_init(struct net *net) 225 { 226 struct fib_rules_ops *ops; 227 struct mr_table *mrt; 228 int err; 229 230 ops = fib_rules_register(&ipmr_rules_ops_template, net); 231 if (IS_ERR(ops)) 232 return PTR_ERR(ops); 233 234 INIT_LIST_HEAD(&net->ipv4.mr_tables); 235 236 mrt = ipmr_new_table(net, RT_TABLE_DEFAULT); 237 if (IS_ERR(mrt)) { 238 err = PTR_ERR(mrt); 239 goto err1; 240 } 241 242 err = fib_default_rule_add(ops, 0x7fff, RT_TABLE_DEFAULT, 0); 243 if (err < 0) 244 goto err2; 245 246 net->ipv4.mr_rules_ops = ops; 247 return 0; 248 249 err2: 250 ipmr_free_table(mrt); 251 err1: 252 fib_rules_unregister(ops); 253 return err; 254 } 255 256 static void __net_exit ipmr_rules_exit(struct net *net) 257 { 258 struct mr_table *mrt, *next; 259 260 rtnl_lock(); 261 list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) { 262 list_del(&mrt->list); 263 ipmr_free_table(mrt); 264 } 265 fib_rules_unregister(net->ipv4.mr_rules_ops); 266 rtnl_unlock(); 267 } 268 269 static int ipmr_rules_dump(struct net *net, struct notifier_block *nb) 270 { 271 return fib_rules_dump(net, nb, RTNL_FAMILY_IPMR); 272 } 273 274 static unsigned int ipmr_rules_seq_read(struct net *net) 275 { 276 return fib_rules_seq_read(net, RTNL_FAMILY_IPMR); 277 } 278 279 bool ipmr_rule_default(const struct fib_rule *rule) 280 { 281 return fib_rule_matchall(rule) && rule->table == RT_TABLE_DEFAULT; 282 } 283 EXPORT_SYMBOL(ipmr_rule_default); 284 #else 285 #define ipmr_for_each_table(mrt, net) \ 286 for (mrt = net->ipv4.mrt; mrt; mrt = NULL) 287 288 static struct mr_table *ipmr_get_table(struct net *net, u32 id) 289 { 290 return net->ipv4.mrt; 291 } 292 293 static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4, 294 struct mr_table **mrt) 295 { 296 *mrt = net->ipv4.mrt; 297 return 0; 298 } 299 300 static int __net_init ipmr_rules_init(struct net *net) 301 { 302 struct mr_table *mrt; 303 304 mrt = ipmr_new_table(net, RT_TABLE_DEFAULT); 305 if (IS_ERR(mrt)) 306 return PTR_ERR(mrt); 307 net->ipv4.mrt = mrt; 308 return 0; 309 } 310 311 static void __net_exit ipmr_rules_exit(struct net *net) 312 { 313 rtnl_lock(); 314 ipmr_free_table(net->ipv4.mrt); 315 net->ipv4.mrt = NULL; 316 rtnl_unlock(); 317 } 318 319 static int ipmr_rules_dump(struct net *net, struct notifier_block *nb) 320 { 321 return 0; 322 } 323 324 static unsigned int ipmr_rules_seq_read(struct net *net) 325 { 326 return 0; 327 } 328 329 bool ipmr_rule_default(const struct fib_rule *rule) 330 { 331 return true; 332 } 333 EXPORT_SYMBOL(ipmr_rule_default); 334 #endif 335 336 static inline int ipmr_hash_cmp(struct rhashtable_compare_arg *arg, 337 const void *ptr) 338 { 339 const struct mfc_cache_cmp_arg *cmparg = arg->key; 340 struct mfc_cache *c = (struct mfc_cache *)ptr; 341 342 return cmparg->mfc_mcastgrp != c->mfc_mcastgrp || 343 cmparg->mfc_origin != c->mfc_origin; 344 } 345 346 static const struct rhashtable_params ipmr_rht_params = { 347 .head_offset = offsetof(struct mfc_cache, mnode), 348 .key_offset = offsetof(struct mfc_cache, cmparg), 349 .key_len = sizeof(struct mfc_cache_cmp_arg), 350 .nelem_hint = 3, 351 .locks_mul = 1, 352 .obj_cmpfn = ipmr_hash_cmp, 353 .automatic_shrinking = true, 354 }; 355 356 static struct mr_table *ipmr_new_table(struct net *net, u32 id) 357 { 358 struct mr_table *mrt; 359 360 /* "pimreg%u" should not exceed 16 bytes (IFNAMSIZ) */ 361 if (id != RT_TABLE_DEFAULT && id >= 1000000000) 362 return ERR_PTR(-EINVAL); 363 364 mrt = ipmr_get_table(net, id); 365 if (mrt) 366 return mrt; 367 368 mrt = kzalloc(sizeof(*mrt), GFP_KERNEL); 369 if (!mrt) 370 return ERR_PTR(-ENOMEM); 371 write_pnet(&mrt->net, net); 372 mrt->id = id; 373 374 rhltable_init(&mrt->mfc_hash, &ipmr_rht_params); 375 INIT_LIST_HEAD(&mrt->mfc_cache_list); 376 INIT_LIST_HEAD(&mrt->mfc_unres_queue); 377 378 setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process, 379 (unsigned long)mrt); 380 381 mrt->mroute_reg_vif_num = -1; 382 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES 383 list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables); 384 #endif 385 return mrt; 386 } 387 388 static void ipmr_free_table(struct mr_table *mrt) 389 { 390 del_timer_sync(&mrt->ipmr_expire_timer); 391 mroute_clean_tables(mrt, true); 392 rhltable_destroy(&mrt->mfc_hash); 393 kfree(mrt); 394 } 395 396 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */ 397 398 static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v) 399 { 400 struct net *net = dev_net(dev); 401 402 dev_close(dev); 403 404 dev = __dev_get_by_name(net, "tunl0"); 405 if (dev) { 406 const struct net_device_ops *ops = dev->netdev_ops; 407 struct ifreq ifr; 408 struct ip_tunnel_parm p; 409 410 memset(&p, 0, sizeof(p)); 411 p.iph.daddr = v->vifc_rmt_addr.s_addr; 412 p.iph.saddr = v->vifc_lcl_addr.s_addr; 413 p.iph.version = 4; 414 p.iph.ihl = 5; 415 p.iph.protocol = IPPROTO_IPIP; 416 sprintf(p.name, "dvmrp%d", v->vifc_vifi); 417 ifr.ifr_ifru.ifru_data = (__force void __user *)&p; 418 419 if (ops->ndo_do_ioctl) { 420 mm_segment_t oldfs = get_fs(); 421 422 set_fs(KERNEL_DS); 423 ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL); 424 set_fs(oldfs); 425 } 426 } 427 } 428 429 /* Initialize ipmr pimreg/tunnel in_device */ 430 static bool ipmr_init_vif_indev(const struct net_device *dev) 431 { 432 struct in_device *in_dev; 433 434 ASSERT_RTNL(); 435 436 in_dev = __in_dev_get_rtnl(dev); 437 if (!in_dev) 438 return false; 439 ipv4_devconf_setall(in_dev); 440 neigh_parms_data_state_setall(in_dev->arp_parms); 441 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0; 442 443 return true; 444 } 445 446 static struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v) 447 { 448 struct net_device *dev; 449 450 dev = __dev_get_by_name(net, "tunl0"); 451 452 if (dev) { 453 const struct net_device_ops *ops = dev->netdev_ops; 454 int err; 455 struct ifreq ifr; 456 struct ip_tunnel_parm p; 457 458 memset(&p, 0, sizeof(p)); 459 p.iph.daddr = v->vifc_rmt_addr.s_addr; 460 p.iph.saddr = v->vifc_lcl_addr.s_addr; 461 p.iph.version = 4; 462 p.iph.ihl = 5; 463 p.iph.protocol = IPPROTO_IPIP; 464 sprintf(p.name, "dvmrp%d", v->vifc_vifi); 465 ifr.ifr_ifru.ifru_data = (__force void __user *)&p; 466 467 if (ops->ndo_do_ioctl) { 468 mm_segment_t oldfs = get_fs(); 469 470 set_fs(KERNEL_DS); 471 err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL); 472 set_fs(oldfs); 473 } else { 474 err = -EOPNOTSUPP; 475 } 476 dev = NULL; 477 478 if (err == 0 && 479 (dev = __dev_get_by_name(net, p.name)) != NULL) { 480 dev->flags |= IFF_MULTICAST; 481 if (!ipmr_init_vif_indev(dev)) 482 goto failure; 483 if (dev_open(dev)) 484 goto failure; 485 dev_hold(dev); 486 } 487 } 488 return dev; 489 490 failure: 491 unregister_netdevice(dev); 492 return NULL; 493 } 494 495 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) 496 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) 497 { 498 struct net *net = dev_net(dev); 499 struct mr_table *mrt; 500 struct flowi4 fl4 = { 501 .flowi4_oif = dev->ifindex, 502 .flowi4_iif = skb->skb_iif ? : LOOPBACK_IFINDEX, 503 .flowi4_mark = skb->mark, 504 }; 505 int err; 506 507 err = ipmr_fib_lookup(net, &fl4, &mrt); 508 if (err < 0) { 509 kfree_skb(skb); 510 return err; 511 } 512 513 read_lock(&mrt_lock); 514 dev->stats.tx_bytes += skb->len; 515 dev->stats.tx_packets++; 516 ipmr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, IGMPMSG_WHOLEPKT); 517 read_unlock(&mrt_lock); 518 kfree_skb(skb); 519 return NETDEV_TX_OK; 520 } 521 522 static int reg_vif_get_iflink(const struct net_device *dev) 523 { 524 return 0; 525 } 526 527 static const struct net_device_ops reg_vif_netdev_ops = { 528 .ndo_start_xmit = reg_vif_xmit, 529 .ndo_get_iflink = reg_vif_get_iflink, 530 }; 531 532 static void reg_vif_setup(struct net_device *dev) 533 { 534 dev->type = ARPHRD_PIMREG; 535 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8; 536 dev->flags = IFF_NOARP; 537 dev->netdev_ops = ®_vif_netdev_ops; 538 dev->needs_free_netdev = true; 539 dev->features |= NETIF_F_NETNS_LOCAL; 540 } 541 542 static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt) 543 { 544 struct net_device *dev; 545 char name[IFNAMSIZ]; 546 547 if (mrt->id == RT_TABLE_DEFAULT) 548 sprintf(name, "pimreg"); 549 else 550 sprintf(name, "pimreg%u", mrt->id); 551 552 dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup); 553 554 if (!dev) 555 return NULL; 556 557 dev_net_set(dev, net); 558 559 if (register_netdevice(dev)) { 560 free_netdev(dev); 561 return NULL; 562 } 563 564 if (!ipmr_init_vif_indev(dev)) 565 goto failure; 566 if (dev_open(dev)) 567 goto failure; 568 569 dev_hold(dev); 570 571 return dev; 572 573 failure: 574 unregister_netdevice(dev); 575 return NULL; 576 } 577 578 /* called with rcu_read_lock() */ 579 static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb, 580 unsigned int pimlen) 581 { 582 struct net_device *reg_dev = NULL; 583 struct iphdr *encap; 584 585 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen); 586 /* Check that: 587 * a. packet is really sent to a multicast group 588 * b. packet is not a NULL-REGISTER 589 * c. packet is not truncated 590 */ 591 if (!ipv4_is_multicast(encap->daddr) || 592 encap->tot_len == 0 || 593 ntohs(encap->tot_len) + pimlen > skb->len) 594 return 1; 595 596 read_lock(&mrt_lock); 597 if (mrt->mroute_reg_vif_num >= 0) 598 reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev; 599 read_unlock(&mrt_lock); 600 601 if (!reg_dev) 602 return 1; 603 604 skb->mac_header = skb->network_header; 605 skb_pull(skb, (u8 *)encap - skb->data); 606 skb_reset_network_header(skb); 607 skb->protocol = htons(ETH_P_IP); 608 skb->ip_summed = CHECKSUM_NONE; 609 610 skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev)); 611 612 netif_rx(skb); 613 614 return NET_RX_SUCCESS; 615 } 616 #else 617 static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt) 618 { 619 return NULL; 620 } 621 #endif 622 623 static int call_ipmr_vif_entry_notifier(struct notifier_block *nb, 624 struct net *net, 625 enum fib_event_type event_type, 626 struct vif_device *vif, 627 vifi_t vif_index, u32 tb_id) 628 { 629 struct vif_entry_notifier_info info = { 630 .info = { 631 .family = RTNL_FAMILY_IPMR, 632 .net = net, 633 }, 634 .dev = vif->dev, 635 .vif_index = vif_index, 636 .vif_flags = vif->flags, 637 .tb_id = tb_id, 638 }; 639 640 return call_fib_notifier(nb, net, event_type, &info.info); 641 } 642 643 static int call_ipmr_vif_entry_notifiers(struct net *net, 644 enum fib_event_type event_type, 645 struct vif_device *vif, 646 vifi_t vif_index, u32 tb_id) 647 { 648 struct vif_entry_notifier_info info = { 649 .info = { 650 .family = RTNL_FAMILY_IPMR, 651 .net = net, 652 }, 653 .dev = vif->dev, 654 .vif_index = vif_index, 655 .vif_flags = vif->flags, 656 .tb_id = tb_id, 657 }; 658 659 ASSERT_RTNL(); 660 net->ipv4.ipmr_seq++; 661 return call_fib_notifiers(net, event_type, &info.info); 662 } 663 664 static int call_ipmr_mfc_entry_notifier(struct notifier_block *nb, 665 struct net *net, 666 enum fib_event_type event_type, 667 struct mfc_cache *mfc, u32 tb_id) 668 { 669 struct mfc_entry_notifier_info info = { 670 .info = { 671 .family = RTNL_FAMILY_IPMR, 672 .net = net, 673 }, 674 .mfc = mfc, 675 .tb_id = tb_id 676 }; 677 678 return call_fib_notifier(nb, net, event_type, &info.info); 679 } 680 681 static int call_ipmr_mfc_entry_notifiers(struct net *net, 682 enum fib_event_type event_type, 683 struct mfc_cache *mfc, u32 tb_id) 684 { 685 struct mfc_entry_notifier_info info = { 686 .info = { 687 .family = RTNL_FAMILY_IPMR, 688 .net = net, 689 }, 690 .mfc = mfc, 691 .tb_id = tb_id 692 }; 693 694 ASSERT_RTNL(); 695 net->ipv4.ipmr_seq++; 696 return call_fib_notifiers(net, event_type, &info.info); 697 } 698 699 /** 700 * vif_delete - Delete a VIF entry 701 * @notify: Set to 1, if the caller is a notifier_call 702 */ 703 static int vif_delete(struct mr_table *mrt, int vifi, int notify, 704 struct list_head *head) 705 { 706 struct net *net = read_pnet(&mrt->net); 707 struct vif_device *v; 708 struct net_device *dev; 709 struct in_device *in_dev; 710 711 if (vifi < 0 || vifi >= mrt->maxvif) 712 return -EADDRNOTAVAIL; 713 714 v = &mrt->vif_table[vifi]; 715 716 if (VIF_EXISTS(mrt, vifi)) 717 call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_DEL, v, vifi, 718 mrt->id); 719 720 write_lock_bh(&mrt_lock); 721 dev = v->dev; 722 v->dev = NULL; 723 724 if (!dev) { 725 write_unlock_bh(&mrt_lock); 726 return -EADDRNOTAVAIL; 727 } 728 729 if (vifi == mrt->mroute_reg_vif_num) 730 mrt->mroute_reg_vif_num = -1; 731 732 if (vifi + 1 == mrt->maxvif) { 733 int tmp; 734 735 for (tmp = vifi - 1; tmp >= 0; tmp--) { 736 if (VIF_EXISTS(mrt, tmp)) 737 break; 738 } 739 mrt->maxvif = tmp+1; 740 } 741 742 write_unlock_bh(&mrt_lock); 743 744 dev_set_allmulti(dev, -1); 745 746 in_dev = __in_dev_get_rtnl(dev); 747 if (in_dev) { 748 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--; 749 inet_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, 750 NETCONFA_MC_FORWARDING, 751 dev->ifindex, &in_dev->cnf); 752 ip_rt_multicast_event(in_dev); 753 } 754 755 if (v->flags & (VIFF_TUNNEL | VIFF_REGISTER) && !notify) 756 unregister_netdevice_queue(dev, head); 757 758 dev_put(dev); 759 return 0; 760 } 761 762 static void ipmr_cache_free_rcu(struct rcu_head *head) 763 { 764 struct mfc_cache *c = container_of(head, struct mfc_cache, rcu); 765 766 kmem_cache_free(mrt_cachep, c); 767 } 768 769 void ipmr_cache_free(struct mfc_cache *c) 770 { 771 call_rcu(&c->rcu, ipmr_cache_free_rcu); 772 } 773 EXPORT_SYMBOL(ipmr_cache_free); 774 775 /* Destroy an unresolved cache entry, killing queued skbs 776 * and reporting error to netlink readers. 777 */ 778 static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c) 779 { 780 struct net *net = read_pnet(&mrt->net); 781 struct sk_buff *skb; 782 struct nlmsgerr *e; 783 784 atomic_dec(&mrt->cache_resolve_queue_len); 785 786 while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) { 787 if (ip_hdr(skb)->version == 0) { 788 struct nlmsghdr *nlh = skb_pull(skb, 789 sizeof(struct iphdr)); 790 nlh->nlmsg_type = NLMSG_ERROR; 791 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr)); 792 skb_trim(skb, nlh->nlmsg_len); 793 e = nlmsg_data(nlh); 794 e->error = -ETIMEDOUT; 795 memset(&e->msg, 0, sizeof(e->msg)); 796 797 rtnl_unicast(skb, net, NETLINK_CB(skb).portid); 798 } else { 799 kfree_skb(skb); 800 } 801 } 802 803 ipmr_cache_free(c); 804 } 805 806 /* Timer process for the unresolved queue. */ 807 static void ipmr_expire_process(unsigned long arg) 808 { 809 struct mr_table *mrt = (struct mr_table *)arg; 810 unsigned long now; 811 unsigned long expires; 812 struct mfc_cache *c, *next; 813 814 if (!spin_trylock(&mfc_unres_lock)) { 815 mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10); 816 return; 817 } 818 819 if (list_empty(&mrt->mfc_unres_queue)) 820 goto out; 821 822 now = jiffies; 823 expires = 10*HZ; 824 825 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) { 826 if (time_after(c->mfc_un.unres.expires, now)) { 827 unsigned long interval = c->mfc_un.unres.expires - now; 828 if (interval < expires) 829 expires = interval; 830 continue; 831 } 832 833 list_del(&c->list); 834 mroute_netlink_event(mrt, c, RTM_DELROUTE); 835 ipmr_destroy_unres(mrt, c); 836 } 837 838 if (!list_empty(&mrt->mfc_unres_queue)) 839 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires); 840 841 out: 842 spin_unlock(&mfc_unres_lock); 843 } 844 845 /* Fill oifs list. It is called under write locked mrt_lock. */ 846 static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache, 847 unsigned char *ttls) 848 { 849 int vifi; 850 851 cache->mfc_un.res.minvif = MAXVIFS; 852 cache->mfc_un.res.maxvif = 0; 853 memset(cache->mfc_un.res.ttls, 255, MAXVIFS); 854 855 for (vifi = 0; vifi < mrt->maxvif; vifi++) { 856 if (VIF_EXISTS(mrt, vifi) && 857 ttls[vifi] && ttls[vifi] < 255) { 858 cache->mfc_un.res.ttls[vifi] = ttls[vifi]; 859 if (cache->mfc_un.res.minvif > vifi) 860 cache->mfc_un.res.minvif = vifi; 861 if (cache->mfc_un.res.maxvif <= vifi) 862 cache->mfc_un.res.maxvif = vifi + 1; 863 } 864 } 865 cache->mfc_un.res.lastuse = jiffies; 866 } 867 868 static int vif_add(struct net *net, struct mr_table *mrt, 869 struct vifctl *vifc, int mrtsock) 870 { 871 int vifi = vifc->vifc_vifi; 872 struct switchdev_attr attr = { 873 .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, 874 }; 875 struct vif_device *v = &mrt->vif_table[vifi]; 876 struct net_device *dev; 877 struct in_device *in_dev; 878 int err; 879 880 /* Is vif busy ? */ 881 if (VIF_EXISTS(mrt, vifi)) 882 return -EADDRINUSE; 883 884 switch (vifc->vifc_flags) { 885 case VIFF_REGISTER: 886 if (!ipmr_pimsm_enabled()) 887 return -EINVAL; 888 /* Special Purpose VIF in PIM 889 * All the packets will be sent to the daemon 890 */ 891 if (mrt->mroute_reg_vif_num >= 0) 892 return -EADDRINUSE; 893 dev = ipmr_reg_vif(net, mrt); 894 if (!dev) 895 return -ENOBUFS; 896 err = dev_set_allmulti(dev, 1); 897 if (err) { 898 unregister_netdevice(dev); 899 dev_put(dev); 900 return err; 901 } 902 break; 903 case VIFF_TUNNEL: 904 dev = ipmr_new_tunnel(net, vifc); 905 if (!dev) 906 return -ENOBUFS; 907 err = dev_set_allmulti(dev, 1); 908 if (err) { 909 ipmr_del_tunnel(dev, vifc); 910 dev_put(dev); 911 return err; 912 } 913 break; 914 case VIFF_USE_IFINDEX: 915 case 0: 916 if (vifc->vifc_flags == VIFF_USE_IFINDEX) { 917 dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex); 918 if (dev && !__in_dev_get_rtnl(dev)) { 919 dev_put(dev); 920 return -EADDRNOTAVAIL; 921 } 922 } else { 923 dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr); 924 } 925 if (!dev) 926 return -EADDRNOTAVAIL; 927 err = dev_set_allmulti(dev, 1); 928 if (err) { 929 dev_put(dev); 930 return err; 931 } 932 break; 933 default: 934 return -EINVAL; 935 } 936 937 in_dev = __in_dev_get_rtnl(dev); 938 if (!in_dev) { 939 dev_put(dev); 940 return -EADDRNOTAVAIL; 941 } 942 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++; 943 inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_MC_FORWARDING, 944 dev->ifindex, &in_dev->cnf); 945 ip_rt_multicast_event(in_dev); 946 947 /* Fill in the VIF structures */ 948 949 attr.orig_dev = dev; 950 if (!switchdev_port_attr_get(dev, &attr)) { 951 memcpy(v->dev_parent_id.id, attr.u.ppid.id, attr.u.ppid.id_len); 952 v->dev_parent_id.id_len = attr.u.ppid.id_len; 953 } else { 954 v->dev_parent_id.id_len = 0; 955 } 956 v->rate_limit = vifc->vifc_rate_limit; 957 v->local = vifc->vifc_lcl_addr.s_addr; 958 v->remote = vifc->vifc_rmt_addr.s_addr; 959 v->flags = vifc->vifc_flags; 960 if (!mrtsock) 961 v->flags |= VIFF_STATIC; 962 v->threshold = vifc->vifc_threshold; 963 v->bytes_in = 0; 964 v->bytes_out = 0; 965 v->pkt_in = 0; 966 v->pkt_out = 0; 967 v->link = dev->ifindex; 968 if (v->flags & (VIFF_TUNNEL | VIFF_REGISTER)) 969 v->link = dev_get_iflink(dev); 970 971 /* And finish update writing critical data */ 972 write_lock_bh(&mrt_lock); 973 v->dev = dev; 974 if (v->flags & VIFF_REGISTER) 975 mrt->mroute_reg_vif_num = vifi; 976 if (vifi+1 > mrt->maxvif) 977 mrt->maxvif = vifi+1; 978 write_unlock_bh(&mrt_lock); 979 call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD, v, vifi, mrt->id); 980 return 0; 981 } 982 983 /* called with rcu_read_lock() */ 984 static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt, 985 __be32 origin, 986 __be32 mcastgrp) 987 { 988 struct mfc_cache_cmp_arg arg = { 989 .mfc_mcastgrp = mcastgrp, 990 .mfc_origin = origin 991 }; 992 struct rhlist_head *tmp, *list; 993 struct mfc_cache *c; 994 995 list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params); 996 rhl_for_each_entry_rcu(c, tmp, list, mnode) 997 return c; 998 999 return NULL; 1000 } 1001 1002 /* Look for a (*,*,oif) entry */ 1003 static struct mfc_cache *ipmr_cache_find_any_parent(struct mr_table *mrt, 1004 int vifi) 1005 { 1006 struct mfc_cache_cmp_arg arg = { 1007 .mfc_mcastgrp = htonl(INADDR_ANY), 1008 .mfc_origin = htonl(INADDR_ANY) 1009 }; 1010 struct rhlist_head *tmp, *list; 1011 struct mfc_cache *c; 1012 1013 list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params); 1014 rhl_for_each_entry_rcu(c, tmp, list, mnode) 1015 if (c->mfc_un.res.ttls[vifi] < 255) 1016 return c; 1017 1018 return NULL; 1019 } 1020 1021 /* Look for a (*,G) entry */ 1022 static struct mfc_cache *ipmr_cache_find_any(struct mr_table *mrt, 1023 __be32 mcastgrp, int vifi) 1024 { 1025 struct mfc_cache_cmp_arg arg = { 1026 .mfc_mcastgrp = mcastgrp, 1027 .mfc_origin = htonl(INADDR_ANY) 1028 }; 1029 struct rhlist_head *tmp, *list; 1030 struct mfc_cache *c, *proxy; 1031 1032 if (mcastgrp == htonl(INADDR_ANY)) 1033 goto skip; 1034 1035 list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params); 1036 rhl_for_each_entry_rcu(c, tmp, list, mnode) { 1037 if (c->mfc_un.res.ttls[vifi] < 255) 1038 return c; 1039 1040 /* It's ok if the vifi is part of the static tree */ 1041 proxy = ipmr_cache_find_any_parent(mrt, c->mfc_parent); 1042 if (proxy && proxy->mfc_un.res.ttls[vifi] < 255) 1043 return c; 1044 } 1045 1046 skip: 1047 return ipmr_cache_find_any_parent(mrt, vifi); 1048 } 1049 1050 /* Look for a (S,G,iif) entry if parent != -1 */ 1051 static struct mfc_cache *ipmr_cache_find_parent(struct mr_table *mrt, 1052 __be32 origin, __be32 mcastgrp, 1053 int parent) 1054 { 1055 struct mfc_cache_cmp_arg arg = { 1056 .mfc_mcastgrp = mcastgrp, 1057 .mfc_origin = origin, 1058 }; 1059 struct rhlist_head *tmp, *list; 1060 struct mfc_cache *c; 1061 1062 list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params); 1063 rhl_for_each_entry_rcu(c, tmp, list, mnode) 1064 if (parent == -1 || parent == c->mfc_parent) 1065 return c; 1066 1067 return NULL; 1068 } 1069 1070 /* Allocate a multicast cache entry */ 1071 static struct mfc_cache *ipmr_cache_alloc(void) 1072 { 1073 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); 1074 1075 if (c) { 1076 c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1; 1077 c->mfc_un.res.minvif = MAXVIFS; 1078 refcount_set(&c->mfc_un.res.refcount, 1); 1079 } 1080 return c; 1081 } 1082 1083 static struct mfc_cache *ipmr_cache_alloc_unres(void) 1084 { 1085 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); 1086 1087 if (c) { 1088 skb_queue_head_init(&c->mfc_un.unres.unresolved); 1089 c->mfc_un.unres.expires = jiffies + 10*HZ; 1090 } 1091 return c; 1092 } 1093 1094 /* A cache entry has gone into a resolved state from queued */ 1095 static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt, 1096 struct mfc_cache *uc, struct mfc_cache *c) 1097 { 1098 struct sk_buff *skb; 1099 struct nlmsgerr *e; 1100 1101 /* Play the pending entries through our router */ 1102 while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) { 1103 if (ip_hdr(skb)->version == 0) { 1104 struct nlmsghdr *nlh = skb_pull(skb, 1105 sizeof(struct iphdr)); 1106 1107 if (__ipmr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) { 1108 nlh->nlmsg_len = skb_tail_pointer(skb) - 1109 (u8 *)nlh; 1110 } else { 1111 nlh->nlmsg_type = NLMSG_ERROR; 1112 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr)); 1113 skb_trim(skb, nlh->nlmsg_len); 1114 e = nlmsg_data(nlh); 1115 e->error = -EMSGSIZE; 1116 memset(&e->msg, 0, sizeof(e->msg)); 1117 } 1118 1119 rtnl_unicast(skb, net, NETLINK_CB(skb).portid); 1120 } else { 1121 ip_mr_forward(net, mrt, skb->dev, skb, c, 0); 1122 } 1123 } 1124 } 1125 1126 /* Bounce a cache query up to mrouted and netlink. 1127 * 1128 * Called under mrt_lock. 1129 */ 1130 static int ipmr_cache_report(struct mr_table *mrt, 1131 struct sk_buff *pkt, vifi_t vifi, int assert) 1132 { 1133 const int ihl = ip_hdrlen(pkt); 1134 struct sock *mroute_sk; 1135 struct igmphdr *igmp; 1136 struct igmpmsg *msg; 1137 struct sk_buff *skb; 1138 int ret; 1139 1140 if (assert == IGMPMSG_WHOLEPKT) 1141 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr)); 1142 else 1143 skb = alloc_skb(128, GFP_ATOMIC); 1144 1145 if (!skb) 1146 return -ENOBUFS; 1147 1148 if (assert == IGMPMSG_WHOLEPKT) { 1149 /* Ugly, but we have no choice with this interface. 1150 * Duplicate old header, fix ihl, length etc. 1151 * And all this only to mangle msg->im_msgtype and 1152 * to set msg->im_mbz to "mbz" :-) 1153 */ 1154 skb_push(skb, sizeof(struct iphdr)); 1155 skb_reset_network_header(skb); 1156 skb_reset_transport_header(skb); 1157 msg = (struct igmpmsg *)skb_network_header(skb); 1158 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr)); 1159 msg->im_msgtype = IGMPMSG_WHOLEPKT; 1160 msg->im_mbz = 0; 1161 msg->im_vif = mrt->mroute_reg_vif_num; 1162 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2; 1163 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) + 1164 sizeof(struct iphdr)); 1165 } else { 1166 /* Copy the IP header */ 1167 skb_set_network_header(skb, skb->len); 1168 skb_put(skb, ihl); 1169 skb_copy_to_linear_data(skb, pkt->data, ihl); 1170 /* Flag to the kernel this is a route add */ 1171 ip_hdr(skb)->protocol = 0; 1172 msg = (struct igmpmsg *)skb_network_header(skb); 1173 msg->im_vif = vifi; 1174 skb_dst_set(skb, dst_clone(skb_dst(pkt))); 1175 /* Add our header */ 1176 igmp = skb_put(skb, sizeof(struct igmphdr)); 1177 igmp->type = assert; 1178 msg->im_msgtype = assert; 1179 igmp->code = 0; 1180 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */ 1181 skb->transport_header = skb->network_header; 1182 } 1183 1184 rcu_read_lock(); 1185 mroute_sk = rcu_dereference(mrt->mroute_sk); 1186 if (!mroute_sk) { 1187 rcu_read_unlock(); 1188 kfree_skb(skb); 1189 return -EINVAL; 1190 } 1191 1192 igmpmsg_netlink_event(mrt, skb); 1193 1194 /* Deliver to mrouted */ 1195 ret = sock_queue_rcv_skb(mroute_sk, skb); 1196 rcu_read_unlock(); 1197 if (ret < 0) { 1198 net_warn_ratelimited("mroute: pending queue full, dropping entries\n"); 1199 kfree_skb(skb); 1200 } 1201 1202 return ret; 1203 } 1204 1205 /* Queue a packet for resolution. It gets locked cache entry! */ 1206 static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, 1207 struct sk_buff *skb, struct net_device *dev) 1208 { 1209 const struct iphdr *iph = ip_hdr(skb); 1210 struct mfc_cache *c; 1211 bool found = false; 1212 int err; 1213 1214 spin_lock_bh(&mfc_unres_lock); 1215 list_for_each_entry(c, &mrt->mfc_unres_queue, list) { 1216 if (c->mfc_mcastgrp == iph->daddr && 1217 c->mfc_origin == iph->saddr) { 1218 found = true; 1219 break; 1220 } 1221 } 1222 1223 if (!found) { 1224 /* Create a new entry if allowable */ 1225 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 || 1226 (c = ipmr_cache_alloc_unres()) == NULL) { 1227 spin_unlock_bh(&mfc_unres_lock); 1228 1229 kfree_skb(skb); 1230 return -ENOBUFS; 1231 } 1232 1233 /* Fill in the new cache entry */ 1234 c->mfc_parent = -1; 1235 c->mfc_origin = iph->saddr; 1236 c->mfc_mcastgrp = iph->daddr; 1237 1238 /* Reflect first query at mrouted. */ 1239 err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE); 1240 if (err < 0) { 1241 /* If the report failed throw the cache entry 1242 out - Brad Parker 1243 */ 1244 spin_unlock_bh(&mfc_unres_lock); 1245 1246 ipmr_cache_free(c); 1247 kfree_skb(skb); 1248 return err; 1249 } 1250 1251 atomic_inc(&mrt->cache_resolve_queue_len); 1252 list_add(&c->list, &mrt->mfc_unres_queue); 1253 mroute_netlink_event(mrt, c, RTM_NEWROUTE); 1254 1255 if (atomic_read(&mrt->cache_resolve_queue_len) == 1) 1256 mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires); 1257 } 1258 1259 /* See if we can append the packet */ 1260 if (c->mfc_un.unres.unresolved.qlen > 3) { 1261 kfree_skb(skb); 1262 err = -ENOBUFS; 1263 } else { 1264 if (dev) { 1265 skb->dev = dev; 1266 skb->skb_iif = dev->ifindex; 1267 } 1268 skb_queue_tail(&c->mfc_un.unres.unresolved, skb); 1269 err = 0; 1270 } 1271 1272 spin_unlock_bh(&mfc_unres_lock); 1273 return err; 1274 } 1275 1276 /* MFC cache manipulation by user space mroute daemon */ 1277 1278 static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent) 1279 { 1280 struct net *net = read_pnet(&mrt->net); 1281 struct mfc_cache *c; 1282 1283 /* The entries are added/deleted only under RTNL */ 1284 rcu_read_lock(); 1285 c = ipmr_cache_find_parent(mrt, mfc->mfcc_origin.s_addr, 1286 mfc->mfcc_mcastgrp.s_addr, parent); 1287 rcu_read_unlock(); 1288 if (!c) 1289 return -ENOENT; 1290 rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params); 1291 list_del_rcu(&c->list); 1292 call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c, mrt->id); 1293 mroute_netlink_event(mrt, c, RTM_DELROUTE); 1294 ipmr_cache_put(c); 1295 1296 return 0; 1297 } 1298 1299 static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, 1300 struct mfcctl *mfc, int mrtsock, int parent) 1301 { 1302 struct mfc_cache *uc, *c; 1303 bool found; 1304 int ret; 1305 1306 if (mfc->mfcc_parent >= MAXVIFS) 1307 return -ENFILE; 1308 1309 /* The entries are added/deleted only under RTNL */ 1310 rcu_read_lock(); 1311 c = ipmr_cache_find_parent(mrt, mfc->mfcc_origin.s_addr, 1312 mfc->mfcc_mcastgrp.s_addr, parent); 1313 rcu_read_unlock(); 1314 if (c) { 1315 write_lock_bh(&mrt_lock); 1316 c->mfc_parent = mfc->mfcc_parent; 1317 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls); 1318 if (!mrtsock) 1319 c->mfc_flags |= MFC_STATIC; 1320 write_unlock_bh(&mrt_lock); 1321 call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, c, 1322 mrt->id); 1323 mroute_netlink_event(mrt, c, RTM_NEWROUTE); 1324 return 0; 1325 } 1326 1327 if (mfc->mfcc_mcastgrp.s_addr != htonl(INADDR_ANY) && 1328 !ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr)) 1329 return -EINVAL; 1330 1331 c = ipmr_cache_alloc(); 1332 if (!c) 1333 return -ENOMEM; 1334 1335 c->mfc_origin = mfc->mfcc_origin.s_addr; 1336 c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr; 1337 c->mfc_parent = mfc->mfcc_parent; 1338 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls); 1339 if (!mrtsock) 1340 c->mfc_flags |= MFC_STATIC; 1341 1342 ret = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->mnode, 1343 ipmr_rht_params); 1344 if (ret) { 1345 pr_err("ipmr: rhtable insert error %d\n", ret); 1346 ipmr_cache_free(c); 1347 return ret; 1348 } 1349 list_add_tail_rcu(&c->list, &mrt->mfc_cache_list); 1350 /* Check to see if we resolved a queued list. If so we 1351 * need to send on the frames and tidy up. 1352 */ 1353 found = false; 1354 spin_lock_bh(&mfc_unres_lock); 1355 list_for_each_entry(uc, &mrt->mfc_unres_queue, list) { 1356 if (uc->mfc_origin == c->mfc_origin && 1357 uc->mfc_mcastgrp == c->mfc_mcastgrp) { 1358 list_del(&uc->list); 1359 atomic_dec(&mrt->cache_resolve_queue_len); 1360 found = true; 1361 break; 1362 } 1363 } 1364 if (list_empty(&mrt->mfc_unres_queue)) 1365 del_timer(&mrt->ipmr_expire_timer); 1366 spin_unlock_bh(&mfc_unres_lock); 1367 1368 if (found) { 1369 ipmr_cache_resolve(net, mrt, uc, c); 1370 ipmr_cache_free(uc); 1371 } 1372 call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD, c, mrt->id); 1373 mroute_netlink_event(mrt, c, RTM_NEWROUTE); 1374 return 0; 1375 } 1376 1377 /* Close the multicast socket, and clear the vif tables etc */ 1378 static void mroute_clean_tables(struct mr_table *mrt, bool all) 1379 { 1380 struct net *net = read_pnet(&mrt->net); 1381 struct mfc_cache *c, *tmp; 1382 LIST_HEAD(list); 1383 int i; 1384 1385 /* Shut down all active vif entries */ 1386 for (i = 0; i < mrt->maxvif; i++) { 1387 if (!all && (mrt->vif_table[i].flags & VIFF_STATIC)) 1388 continue; 1389 vif_delete(mrt, i, 0, &list); 1390 } 1391 unregister_netdevice_many(&list); 1392 1393 /* Wipe the cache */ 1394 list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) { 1395 if (!all && (c->mfc_flags & MFC_STATIC)) 1396 continue; 1397 rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params); 1398 list_del_rcu(&c->list); 1399 call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c, 1400 mrt->id); 1401 mroute_netlink_event(mrt, c, RTM_DELROUTE); 1402 ipmr_cache_put(c); 1403 } 1404 1405 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) { 1406 spin_lock_bh(&mfc_unres_lock); 1407 list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) { 1408 list_del(&c->list); 1409 mroute_netlink_event(mrt, c, RTM_DELROUTE); 1410 ipmr_destroy_unres(mrt, c); 1411 } 1412 spin_unlock_bh(&mfc_unres_lock); 1413 } 1414 } 1415 1416 /* called from ip_ra_control(), before an RCU grace period, 1417 * we dont need to call synchronize_rcu() here 1418 */ 1419 static void mrtsock_destruct(struct sock *sk) 1420 { 1421 struct net *net = sock_net(sk); 1422 struct mr_table *mrt; 1423 1424 ASSERT_RTNL(); 1425 ipmr_for_each_table(mrt, net) { 1426 if (sk == rtnl_dereference(mrt->mroute_sk)) { 1427 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--; 1428 inet_netconf_notify_devconf(net, RTM_NEWNETCONF, 1429 NETCONFA_MC_FORWARDING, 1430 NETCONFA_IFINDEX_ALL, 1431 net->ipv4.devconf_all); 1432 RCU_INIT_POINTER(mrt->mroute_sk, NULL); 1433 mroute_clean_tables(mrt, false); 1434 } 1435 } 1436 } 1437 1438 /* Socket options and virtual interface manipulation. The whole 1439 * virtual interface system is a complete heap, but unfortunately 1440 * that's how BSD mrouted happens to think. Maybe one day with a proper 1441 * MOSPF/PIM router set up we can clean this up. 1442 */ 1443 1444 int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, 1445 unsigned int optlen) 1446 { 1447 struct net *net = sock_net(sk); 1448 int val, ret = 0, parent = 0; 1449 struct mr_table *mrt; 1450 struct vifctl vif; 1451 struct mfcctl mfc; 1452 u32 uval; 1453 1454 /* There's one exception to the lock - MRT_DONE which needs to unlock */ 1455 rtnl_lock(); 1456 if (sk->sk_type != SOCK_RAW || 1457 inet_sk(sk)->inet_num != IPPROTO_IGMP) { 1458 ret = -EOPNOTSUPP; 1459 goto out_unlock; 1460 } 1461 1462 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); 1463 if (!mrt) { 1464 ret = -ENOENT; 1465 goto out_unlock; 1466 } 1467 if (optname != MRT_INIT) { 1468 if (sk != rcu_access_pointer(mrt->mroute_sk) && 1469 !ns_capable(net->user_ns, CAP_NET_ADMIN)) { 1470 ret = -EACCES; 1471 goto out_unlock; 1472 } 1473 } 1474 1475 switch (optname) { 1476 case MRT_INIT: 1477 if (optlen != sizeof(int)) { 1478 ret = -EINVAL; 1479 break; 1480 } 1481 if (rtnl_dereference(mrt->mroute_sk)) { 1482 ret = -EADDRINUSE; 1483 break; 1484 } 1485 1486 ret = ip_ra_control(sk, 1, mrtsock_destruct); 1487 if (ret == 0) { 1488 rcu_assign_pointer(mrt->mroute_sk, sk); 1489 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++; 1490 inet_netconf_notify_devconf(net, RTM_NEWNETCONF, 1491 NETCONFA_MC_FORWARDING, 1492 NETCONFA_IFINDEX_ALL, 1493 net->ipv4.devconf_all); 1494 } 1495 break; 1496 case MRT_DONE: 1497 if (sk != rcu_access_pointer(mrt->mroute_sk)) { 1498 ret = -EACCES; 1499 } else { 1500 ret = ip_ra_control(sk, 0, NULL); 1501 goto out_unlock; 1502 } 1503 break; 1504 case MRT_ADD_VIF: 1505 case MRT_DEL_VIF: 1506 if (optlen != sizeof(vif)) { 1507 ret = -EINVAL; 1508 break; 1509 } 1510 if (copy_from_user(&vif, optval, sizeof(vif))) { 1511 ret = -EFAULT; 1512 break; 1513 } 1514 if (vif.vifc_vifi >= MAXVIFS) { 1515 ret = -ENFILE; 1516 break; 1517 } 1518 if (optname == MRT_ADD_VIF) { 1519 ret = vif_add(net, mrt, &vif, 1520 sk == rtnl_dereference(mrt->mroute_sk)); 1521 } else { 1522 ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL); 1523 } 1524 break; 1525 /* Manipulate the forwarding caches. These live 1526 * in a sort of kernel/user symbiosis. 1527 */ 1528 case MRT_ADD_MFC: 1529 case MRT_DEL_MFC: 1530 parent = -1; 1531 case MRT_ADD_MFC_PROXY: 1532 case MRT_DEL_MFC_PROXY: 1533 if (optlen != sizeof(mfc)) { 1534 ret = -EINVAL; 1535 break; 1536 } 1537 if (copy_from_user(&mfc, optval, sizeof(mfc))) { 1538 ret = -EFAULT; 1539 break; 1540 } 1541 if (parent == 0) 1542 parent = mfc.mfcc_parent; 1543 if (optname == MRT_DEL_MFC || optname == MRT_DEL_MFC_PROXY) 1544 ret = ipmr_mfc_delete(mrt, &mfc, parent); 1545 else 1546 ret = ipmr_mfc_add(net, mrt, &mfc, 1547 sk == rtnl_dereference(mrt->mroute_sk), 1548 parent); 1549 break; 1550 /* Control PIM assert. */ 1551 case MRT_ASSERT: 1552 if (optlen != sizeof(val)) { 1553 ret = -EINVAL; 1554 break; 1555 } 1556 if (get_user(val, (int __user *)optval)) { 1557 ret = -EFAULT; 1558 break; 1559 } 1560 mrt->mroute_do_assert = val; 1561 break; 1562 case MRT_PIM: 1563 if (!ipmr_pimsm_enabled()) { 1564 ret = -ENOPROTOOPT; 1565 break; 1566 } 1567 if (optlen != sizeof(val)) { 1568 ret = -EINVAL; 1569 break; 1570 } 1571 if (get_user(val, (int __user *)optval)) { 1572 ret = -EFAULT; 1573 break; 1574 } 1575 1576 val = !!val; 1577 if (val != mrt->mroute_do_pim) { 1578 mrt->mroute_do_pim = val; 1579 mrt->mroute_do_assert = val; 1580 } 1581 break; 1582 case MRT_TABLE: 1583 if (!IS_BUILTIN(CONFIG_IP_MROUTE_MULTIPLE_TABLES)) { 1584 ret = -ENOPROTOOPT; 1585 break; 1586 } 1587 if (optlen != sizeof(uval)) { 1588 ret = -EINVAL; 1589 break; 1590 } 1591 if (get_user(uval, (u32 __user *)optval)) { 1592 ret = -EFAULT; 1593 break; 1594 } 1595 1596 if (sk == rtnl_dereference(mrt->mroute_sk)) { 1597 ret = -EBUSY; 1598 } else { 1599 mrt = ipmr_new_table(net, uval); 1600 if (IS_ERR(mrt)) 1601 ret = PTR_ERR(mrt); 1602 else 1603 raw_sk(sk)->ipmr_table = uval; 1604 } 1605 break; 1606 /* Spurious command, or MRT_VERSION which you cannot set. */ 1607 default: 1608 ret = -ENOPROTOOPT; 1609 } 1610 out_unlock: 1611 rtnl_unlock(); 1612 return ret; 1613 } 1614 1615 /* Getsock opt support for the multicast routing system. */ 1616 int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen) 1617 { 1618 int olr; 1619 int val; 1620 struct net *net = sock_net(sk); 1621 struct mr_table *mrt; 1622 1623 if (sk->sk_type != SOCK_RAW || 1624 inet_sk(sk)->inet_num != IPPROTO_IGMP) 1625 return -EOPNOTSUPP; 1626 1627 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); 1628 if (!mrt) 1629 return -ENOENT; 1630 1631 switch (optname) { 1632 case MRT_VERSION: 1633 val = 0x0305; 1634 break; 1635 case MRT_PIM: 1636 if (!ipmr_pimsm_enabled()) 1637 return -ENOPROTOOPT; 1638 val = mrt->mroute_do_pim; 1639 break; 1640 case MRT_ASSERT: 1641 val = mrt->mroute_do_assert; 1642 break; 1643 default: 1644 return -ENOPROTOOPT; 1645 } 1646 1647 if (get_user(olr, optlen)) 1648 return -EFAULT; 1649 olr = min_t(unsigned int, olr, sizeof(int)); 1650 if (olr < 0) 1651 return -EINVAL; 1652 if (put_user(olr, optlen)) 1653 return -EFAULT; 1654 if (copy_to_user(optval, &val, olr)) 1655 return -EFAULT; 1656 return 0; 1657 } 1658 1659 /* The IP multicast ioctl support routines. */ 1660 int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg) 1661 { 1662 struct sioc_sg_req sr; 1663 struct sioc_vif_req vr; 1664 struct vif_device *vif; 1665 struct mfc_cache *c; 1666 struct net *net = sock_net(sk); 1667 struct mr_table *mrt; 1668 1669 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); 1670 if (!mrt) 1671 return -ENOENT; 1672 1673 switch (cmd) { 1674 case SIOCGETVIFCNT: 1675 if (copy_from_user(&vr, arg, sizeof(vr))) 1676 return -EFAULT; 1677 if (vr.vifi >= mrt->maxvif) 1678 return -EINVAL; 1679 read_lock(&mrt_lock); 1680 vif = &mrt->vif_table[vr.vifi]; 1681 if (VIF_EXISTS(mrt, vr.vifi)) { 1682 vr.icount = vif->pkt_in; 1683 vr.ocount = vif->pkt_out; 1684 vr.ibytes = vif->bytes_in; 1685 vr.obytes = vif->bytes_out; 1686 read_unlock(&mrt_lock); 1687 1688 if (copy_to_user(arg, &vr, sizeof(vr))) 1689 return -EFAULT; 1690 return 0; 1691 } 1692 read_unlock(&mrt_lock); 1693 return -EADDRNOTAVAIL; 1694 case SIOCGETSGCNT: 1695 if (copy_from_user(&sr, arg, sizeof(sr))) 1696 return -EFAULT; 1697 1698 rcu_read_lock(); 1699 c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr); 1700 if (c) { 1701 sr.pktcnt = c->mfc_un.res.pkt; 1702 sr.bytecnt = c->mfc_un.res.bytes; 1703 sr.wrong_if = c->mfc_un.res.wrong_if; 1704 rcu_read_unlock(); 1705 1706 if (copy_to_user(arg, &sr, sizeof(sr))) 1707 return -EFAULT; 1708 return 0; 1709 } 1710 rcu_read_unlock(); 1711 return -EADDRNOTAVAIL; 1712 default: 1713 return -ENOIOCTLCMD; 1714 } 1715 } 1716 1717 #ifdef CONFIG_COMPAT 1718 struct compat_sioc_sg_req { 1719 struct in_addr src; 1720 struct in_addr grp; 1721 compat_ulong_t pktcnt; 1722 compat_ulong_t bytecnt; 1723 compat_ulong_t wrong_if; 1724 }; 1725 1726 struct compat_sioc_vif_req { 1727 vifi_t vifi; /* Which iface */ 1728 compat_ulong_t icount; 1729 compat_ulong_t ocount; 1730 compat_ulong_t ibytes; 1731 compat_ulong_t obytes; 1732 }; 1733 1734 int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) 1735 { 1736 struct compat_sioc_sg_req sr; 1737 struct compat_sioc_vif_req vr; 1738 struct vif_device *vif; 1739 struct mfc_cache *c; 1740 struct net *net = sock_net(sk); 1741 struct mr_table *mrt; 1742 1743 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); 1744 if (!mrt) 1745 return -ENOENT; 1746 1747 switch (cmd) { 1748 case SIOCGETVIFCNT: 1749 if (copy_from_user(&vr, arg, sizeof(vr))) 1750 return -EFAULT; 1751 if (vr.vifi >= mrt->maxvif) 1752 return -EINVAL; 1753 read_lock(&mrt_lock); 1754 vif = &mrt->vif_table[vr.vifi]; 1755 if (VIF_EXISTS(mrt, vr.vifi)) { 1756 vr.icount = vif->pkt_in; 1757 vr.ocount = vif->pkt_out; 1758 vr.ibytes = vif->bytes_in; 1759 vr.obytes = vif->bytes_out; 1760 read_unlock(&mrt_lock); 1761 1762 if (copy_to_user(arg, &vr, sizeof(vr))) 1763 return -EFAULT; 1764 return 0; 1765 } 1766 read_unlock(&mrt_lock); 1767 return -EADDRNOTAVAIL; 1768 case SIOCGETSGCNT: 1769 if (copy_from_user(&sr, arg, sizeof(sr))) 1770 return -EFAULT; 1771 1772 rcu_read_lock(); 1773 c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr); 1774 if (c) { 1775 sr.pktcnt = c->mfc_un.res.pkt; 1776 sr.bytecnt = c->mfc_un.res.bytes; 1777 sr.wrong_if = c->mfc_un.res.wrong_if; 1778 rcu_read_unlock(); 1779 1780 if (copy_to_user(arg, &sr, sizeof(sr))) 1781 return -EFAULT; 1782 return 0; 1783 } 1784 rcu_read_unlock(); 1785 return -EADDRNOTAVAIL; 1786 default: 1787 return -ENOIOCTLCMD; 1788 } 1789 } 1790 #endif 1791 1792 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr) 1793 { 1794 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 1795 struct net *net = dev_net(dev); 1796 struct mr_table *mrt; 1797 struct vif_device *v; 1798 int ct; 1799 1800 if (event != NETDEV_UNREGISTER) 1801 return NOTIFY_DONE; 1802 1803 ipmr_for_each_table(mrt, net) { 1804 v = &mrt->vif_table[0]; 1805 for (ct = 0; ct < mrt->maxvif; ct++, v++) { 1806 if (v->dev == dev) 1807 vif_delete(mrt, ct, 1, NULL); 1808 } 1809 } 1810 return NOTIFY_DONE; 1811 } 1812 1813 static struct notifier_block ip_mr_notifier = { 1814 .notifier_call = ipmr_device_event, 1815 }; 1816 1817 /* Encapsulate a packet by attaching a valid IPIP header to it. 1818 * This avoids tunnel drivers and other mess and gives us the speed so 1819 * important for multicast video. 1820 */ 1821 static void ip_encap(struct net *net, struct sk_buff *skb, 1822 __be32 saddr, __be32 daddr) 1823 { 1824 struct iphdr *iph; 1825 const struct iphdr *old_iph = ip_hdr(skb); 1826 1827 skb_push(skb, sizeof(struct iphdr)); 1828 skb->transport_header = skb->network_header; 1829 skb_reset_network_header(skb); 1830 iph = ip_hdr(skb); 1831 1832 iph->version = 4; 1833 iph->tos = old_iph->tos; 1834 iph->ttl = old_iph->ttl; 1835 iph->frag_off = 0; 1836 iph->daddr = daddr; 1837 iph->saddr = saddr; 1838 iph->protocol = IPPROTO_IPIP; 1839 iph->ihl = 5; 1840 iph->tot_len = htons(skb->len); 1841 ip_select_ident(net, skb, NULL); 1842 ip_send_check(iph); 1843 1844 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 1845 nf_reset(skb); 1846 } 1847 1848 static inline int ipmr_forward_finish(struct net *net, struct sock *sk, 1849 struct sk_buff *skb) 1850 { 1851 struct ip_options *opt = &(IPCB(skb)->opt); 1852 1853 IP_INC_STATS(net, IPSTATS_MIB_OUTFORWDATAGRAMS); 1854 IP_ADD_STATS(net, IPSTATS_MIB_OUTOCTETS, skb->len); 1855 1856 if (unlikely(opt->optlen)) 1857 ip_forward_options(skb); 1858 1859 return dst_output(net, sk, skb); 1860 } 1861 1862 #ifdef CONFIG_NET_SWITCHDEV 1863 static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt, 1864 int in_vifi, int out_vifi) 1865 { 1866 struct vif_device *out_vif = &mrt->vif_table[out_vifi]; 1867 struct vif_device *in_vif = &mrt->vif_table[in_vifi]; 1868 1869 if (!skb->offload_mr_fwd_mark) 1870 return false; 1871 if (!out_vif->dev_parent_id.id_len || !in_vif->dev_parent_id.id_len) 1872 return false; 1873 return netdev_phys_item_id_same(&out_vif->dev_parent_id, 1874 &in_vif->dev_parent_id); 1875 } 1876 #else 1877 static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt, 1878 int in_vifi, int out_vifi) 1879 { 1880 return false; 1881 } 1882 #endif 1883 1884 /* Processing handlers for ipmr_forward */ 1885 1886 static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, 1887 int in_vifi, struct sk_buff *skb, 1888 struct mfc_cache *c, int vifi) 1889 { 1890 const struct iphdr *iph = ip_hdr(skb); 1891 struct vif_device *vif = &mrt->vif_table[vifi]; 1892 struct net_device *dev; 1893 struct rtable *rt; 1894 struct flowi4 fl4; 1895 int encap = 0; 1896 1897 if (!vif->dev) 1898 goto out_free; 1899 1900 if (vif->flags & VIFF_REGISTER) { 1901 vif->pkt_out++; 1902 vif->bytes_out += skb->len; 1903 vif->dev->stats.tx_bytes += skb->len; 1904 vif->dev->stats.tx_packets++; 1905 ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT); 1906 goto out_free; 1907 } 1908 1909 if (ipmr_forward_offloaded(skb, mrt, in_vifi, vifi)) 1910 goto out_free; 1911 1912 if (vif->flags & VIFF_TUNNEL) { 1913 rt = ip_route_output_ports(net, &fl4, NULL, 1914 vif->remote, vif->local, 1915 0, 0, 1916 IPPROTO_IPIP, 1917 RT_TOS(iph->tos), vif->link); 1918 if (IS_ERR(rt)) 1919 goto out_free; 1920 encap = sizeof(struct iphdr); 1921 } else { 1922 rt = ip_route_output_ports(net, &fl4, NULL, iph->daddr, 0, 1923 0, 0, 1924 IPPROTO_IPIP, 1925 RT_TOS(iph->tos), vif->link); 1926 if (IS_ERR(rt)) 1927 goto out_free; 1928 } 1929 1930 dev = rt->dst.dev; 1931 1932 if (skb->len+encap > dst_mtu(&rt->dst) && (ntohs(iph->frag_off) & IP_DF)) { 1933 /* Do not fragment multicasts. Alas, IPv4 does not 1934 * allow to send ICMP, so that packets will disappear 1935 * to blackhole. 1936 */ 1937 IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS); 1938 ip_rt_put(rt); 1939 goto out_free; 1940 } 1941 1942 encap += LL_RESERVED_SPACE(dev) + rt->dst.header_len; 1943 1944 if (skb_cow(skb, encap)) { 1945 ip_rt_put(rt); 1946 goto out_free; 1947 } 1948 1949 vif->pkt_out++; 1950 vif->bytes_out += skb->len; 1951 1952 skb_dst_drop(skb); 1953 skb_dst_set(skb, &rt->dst); 1954 ip_decrease_ttl(ip_hdr(skb)); 1955 1956 /* FIXME: forward and output firewalls used to be called here. 1957 * What do we do with netfilter? -- RR 1958 */ 1959 if (vif->flags & VIFF_TUNNEL) { 1960 ip_encap(net, skb, vif->local, vif->remote); 1961 /* FIXME: extra output firewall step used to be here. --RR */ 1962 vif->dev->stats.tx_packets++; 1963 vif->dev->stats.tx_bytes += skb->len; 1964 } 1965 1966 IPCB(skb)->flags |= IPSKB_FORWARDED; 1967 1968 /* RFC1584 teaches, that DVMRP/PIM router must deliver packets locally 1969 * not only before forwarding, but after forwarding on all output 1970 * interfaces. It is clear, if mrouter runs a multicasting 1971 * program, it should receive packets not depending to what interface 1972 * program is joined. 1973 * If we will not make it, the program will have to join on all 1974 * interfaces. On the other hand, multihoming host (or router, but 1975 * not mrouter) cannot join to more than one interface - it will 1976 * result in receiving multiple packets. 1977 */ 1978 NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, 1979 net, NULL, skb, skb->dev, dev, 1980 ipmr_forward_finish); 1981 return; 1982 1983 out_free: 1984 kfree_skb(skb); 1985 } 1986 1987 static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev) 1988 { 1989 int ct; 1990 1991 for (ct = mrt->maxvif-1; ct >= 0; ct--) { 1992 if (mrt->vif_table[ct].dev == dev) 1993 break; 1994 } 1995 return ct; 1996 } 1997 1998 /* "local" means that we should preserve one skb (for local delivery) */ 1999 static void ip_mr_forward(struct net *net, struct mr_table *mrt, 2000 struct net_device *dev, struct sk_buff *skb, 2001 struct mfc_cache *cache, int local) 2002 { 2003 int true_vifi = ipmr_find_vif(mrt, dev); 2004 int psend = -1; 2005 int vif, ct; 2006 2007 vif = cache->mfc_parent; 2008 cache->mfc_un.res.pkt++; 2009 cache->mfc_un.res.bytes += skb->len; 2010 cache->mfc_un.res.lastuse = jiffies; 2011 2012 if (cache->mfc_origin == htonl(INADDR_ANY) && true_vifi >= 0) { 2013 struct mfc_cache *cache_proxy; 2014 2015 /* For an (*,G) entry, we only check that the incomming 2016 * interface is part of the static tree. 2017 */ 2018 cache_proxy = ipmr_cache_find_any_parent(mrt, vif); 2019 if (cache_proxy && 2020 cache_proxy->mfc_un.res.ttls[true_vifi] < 255) 2021 goto forward; 2022 } 2023 2024 /* Wrong interface: drop packet and (maybe) send PIM assert. */ 2025 if (mrt->vif_table[vif].dev != dev) { 2026 if (rt_is_output_route(skb_rtable(skb))) { 2027 /* It is our own packet, looped back. 2028 * Very complicated situation... 2029 * 2030 * The best workaround until routing daemons will be 2031 * fixed is not to redistribute packet, if it was 2032 * send through wrong interface. It means, that 2033 * multicast applications WILL NOT work for 2034 * (S,G), which have default multicast route pointing 2035 * to wrong oif. In any case, it is not a good 2036 * idea to use multicasting applications on router. 2037 */ 2038 goto dont_forward; 2039 } 2040 2041 cache->mfc_un.res.wrong_if++; 2042 2043 if (true_vifi >= 0 && mrt->mroute_do_assert && 2044 /* pimsm uses asserts, when switching from RPT to SPT, 2045 * so that we cannot check that packet arrived on an oif. 2046 * It is bad, but otherwise we would need to move pretty 2047 * large chunk of pimd to kernel. Ough... --ANK 2048 */ 2049 (mrt->mroute_do_pim || 2050 cache->mfc_un.res.ttls[true_vifi] < 255) && 2051 time_after(jiffies, 2052 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) { 2053 cache->mfc_un.res.last_assert = jiffies; 2054 ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF); 2055 } 2056 goto dont_forward; 2057 } 2058 2059 forward: 2060 mrt->vif_table[vif].pkt_in++; 2061 mrt->vif_table[vif].bytes_in += skb->len; 2062 2063 /* Forward the frame */ 2064 if (cache->mfc_origin == htonl(INADDR_ANY) && 2065 cache->mfc_mcastgrp == htonl(INADDR_ANY)) { 2066 if (true_vifi >= 0 && 2067 true_vifi != cache->mfc_parent && 2068 ip_hdr(skb)->ttl > 2069 cache->mfc_un.res.ttls[cache->mfc_parent]) { 2070 /* It's an (*,*) entry and the packet is not coming from 2071 * the upstream: forward the packet to the upstream 2072 * only. 2073 */ 2074 psend = cache->mfc_parent; 2075 goto last_forward; 2076 } 2077 goto dont_forward; 2078 } 2079 for (ct = cache->mfc_un.res.maxvif - 1; 2080 ct >= cache->mfc_un.res.minvif; ct--) { 2081 /* For (*,G) entry, don't forward to the incoming interface */ 2082 if ((cache->mfc_origin != htonl(INADDR_ANY) || 2083 ct != true_vifi) && 2084 ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) { 2085 if (psend != -1) { 2086 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 2087 2088 if (skb2) 2089 ipmr_queue_xmit(net, mrt, true_vifi, 2090 skb2, cache, psend); 2091 } 2092 psend = ct; 2093 } 2094 } 2095 last_forward: 2096 if (psend != -1) { 2097 if (local) { 2098 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 2099 2100 if (skb2) 2101 ipmr_queue_xmit(net, mrt, true_vifi, skb2, 2102 cache, psend); 2103 } else { 2104 ipmr_queue_xmit(net, mrt, true_vifi, skb, cache, psend); 2105 return; 2106 } 2107 } 2108 2109 dont_forward: 2110 if (!local) 2111 kfree_skb(skb); 2112 } 2113 2114 static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct sk_buff *skb) 2115 { 2116 struct rtable *rt = skb_rtable(skb); 2117 struct iphdr *iph = ip_hdr(skb); 2118 struct flowi4 fl4 = { 2119 .daddr = iph->daddr, 2120 .saddr = iph->saddr, 2121 .flowi4_tos = RT_TOS(iph->tos), 2122 .flowi4_oif = (rt_is_output_route(rt) ? 2123 skb->dev->ifindex : 0), 2124 .flowi4_iif = (rt_is_output_route(rt) ? 2125 LOOPBACK_IFINDEX : 2126 skb->dev->ifindex), 2127 .flowi4_mark = skb->mark, 2128 }; 2129 struct mr_table *mrt; 2130 int err; 2131 2132 err = ipmr_fib_lookup(net, &fl4, &mrt); 2133 if (err) 2134 return ERR_PTR(err); 2135 return mrt; 2136 } 2137 2138 /* Multicast packets for forwarding arrive here 2139 * Called with rcu_read_lock(); 2140 */ 2141 int ip_mr_input(struct sk_buff *skb) 2142 { 2143 struct mfc_cache *cache; 2144 struct net *net = dev_net(skb->dev); 2145 int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL; 2146 struct mr_table *mrt; 2147 struct net_device *dev; 2148 2149 /* skb->dev passed in is the loX master dev for vrfs. 2150 * As there are no vifs associated with loopback devices, 2151 * get the proper interface that does have a vif associated with it. 2152 */ 2153 dev = skb->dev; 2154 if (netif_is_l3_master(skb->dev)) { 2155 dev = dev_get_by_index_rcu(net, IPCB(skb)->iif); 2156 if (!dev) { 2157 kfree_skb(skb); 2158 return -ENODEV; 2159 } 2160 } 2161 2162 /* Packet is looped back after forward, it should not be 2163 * forwarded second time, but still can be delivered locally. 2164 */ 2165 if (IPCB(skb)->flags & IPSKB_FORWARDED) 2166 goto dont_forward; 2167 2168 mrt = ipmr_rt_fib_lookup(net, skb); 2169 if (IS_ERR(mrt)) { 2170 kfree_skb(skb); 2171 return PTR_ERR(mrt); 2172 } 2173 if (!local) { 2174 if (IPCB(skb)->opt.router_alert) { 2175 if (ip_call_ra_chain(skb)) 2176 return 0; 2177 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP) { 2178 /* IGMPv1 (and broken IGMPv2 implementations sort of 2179 * Cisco IOS <= 11.2(8)) do not put router alert 2180 * option to IGMP packets destined to routable 2181 * groups. It is very bad, because it means 2182 * that we can forward NO IGMP messages. 2183 */ 2184 struct sock *mroute_sk; 2185 2186 mroute_sk = rcu_dereference(mrt->mroute_sk); 2187 if (mroute_sk) { 2188 nf_reset(skb); 2189 raw_rcv(mroute_sk, skb); 2190 return 0; 2191 } 2192 } 2193 } 2194 2195 /* already under rcu_read_lock() */ 2196 cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr); 2197 if (!cache) { 2198 int vif = ipmr_find_vif(mrt, dev); 2199 2200 if (vif >= 0) 2201 cache = ipmr_cache_find_any(mrt, ip_hdr(skb)->daddr, 2202 vif); 2203 } 2204 2205 /* No usable cache entry */ 2206 if (!cache) { 2207 int vif; 2208 2209 if (local) { 2210 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 2211 ip_local_deliver(skb); 2212 if (!skb2) 2213 return -ENOBUFS; 2214 skb = skb2; 2215 } 2216 2217 read_lock(&mrt_lock); 2218 vif = ipmr_find_vif(mrt, dev); 2219 if (vif >= 0) { 2220 int err2 = ipmr_cache_unresolved(mrt, vif, skb, dev); 2221 read_unlock(&mrt_lock); 2222 2223 return err2; 2224 } 2225 read_unlock(&mrt_lock); 2226 kfree_skb(skb); 2227 return -ENODEV; 2228 } 2229 2230 read_lock(&mrt_lock); 2231 ip_mr_forward(net, mrt, dev, skb, cache, local); 2232 read_unlock(&mrt_lock); 2233 2234 if (local) 2235 return ip_local_deliver(skb); 2236 2237 return 0; 2238 2239 dont_forward: 2240 if (local) 2241 return ip_local_deliver(skb); 2242 kfree_skb(skb); 2243 return 0; 2244 } 2245 2246 #ifdef CONFIG_IP_PIMSM_V1 2247 /* Handle IGMP messages of PIMv1 */ 2248 int pim_rcv_v1(struct sk_buff *skb) 2249 { 2250 struct igmphdr *pim; 2251 struct net *net = dev_net(skb->dev); 2252 struct mr_table *mrt; 2253 2254 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr))) 2255 goto drop; 2256 2257 pim = igmp_hdr(skb); 2258 2259 mrt = ipmr_rt_fib_lookup(net, skb); 2260 if (IS_ERR(mrt)) 2261 goto drop; 2262 if (!mrt->mroute_do_pim || 2263 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) 2264 goto drop; 2265 2266 if (__pim_rcv(mrt, skb, sizeof(*pim))) { 2267 drop: 2268 kfree_skb(skb); 2269 } 2270 return 0; 2271 } 2272 #endif 2273 2274 #ifdef CONFIG_IP_PIMSM_V2 2275 static int pim_rcv(struct sk_buff *skb) 2276 { 2277 struct pimreghdr *pim; 2278 struct net *net = dev_net(skb->dev); 2279 struct mr_table *mrt; 2280 2281 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr))) 2282 goto drop; 2283 2284 pim = (struct pimreghdr *)skb_transport_header(skb); 2285 if (pim->type != ((PIM_VERSION << 4) | (PIM_TYPE_REGISTER)) || 2286 (pim->flags & PIM_NULL_REGISTER) || 2287 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 && 2288 csum_fold(skb_checksum(skb, 0, skb->len, 0)))) 2289 goto drop; 2290 2291 mrt = ipmr_rt_fib_lookup(net, skb); 2292 if (IS_ERR(mrt)) 2293 goto drop; 2294 if (__pim_rcv(mrt, skb, sizeof(*pim))) { 2295 drop: 2296 kfree_skb(skb); 2297 } 2298 return 0; 2299 } 2300 #endif 2301 2302 static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, 2303 struct mfc_cache *c, struct rtmsg *rtm) 2304 { 2305 struct rta_mfc_stats mfcs; 2306 struct nlattr *mp_attr; 2307 struct rtnexthop *nhp; 2308 unsigned long lastuse; 2309 int ct; 2310 2311 /* If cache is unresolved, don't try to parse IIF and OIF */ 2312 if (c->mfc_parent >= MAXVIFS) { 2313 rtm->rtm_flags |= RTNH_F_UNRESOLVED; 2314 return -ENOENT; 2315 } 2316 2317 if (VIF_EXISTS(mrt, c->mfc_parent) && 2318 nla_put_u32(skb, RTA_IIF, mrt->vif_table[c->mfc_parent].dev->ifindex) < 0) 2319 return -EMSGSIZE; 2320 2321 if (c->mfc_flags & MFC_OFFLOAD) 2322 rtm->rtm_flags |= RTNH_F_OFFLOAD; 2323 2324 if (!(mp_attr = nla_nest_start(skb, RTA_MULTIPATH))) 2325 return -EMSGSIZE; 2326 2327 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) { 2328 if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) { 2329 if (!(nhp = nla_reserve_nohdr(skb, sizeof(*nhp)))) { 2330 nla_nest_cancel(skb, mp_attr); 2331 return -EMSGSIZE; 2332 } 2333 2334 nhp->rtnh_flags = 0; 2335 nhp->rtnh_hops = c->mfc_un.res.ttls[ct]; 2336 nhp->rtnh_ifindex = mrt->vif_table[ct].dev->ifindex; 2337 nhp->rtnh_len = sizeof(*nhp); 2338 } 2339 } 2340 2341 nla_nest_end(skb, mp_attr); 2342 2343 lastuse = READ_ONCE(c->mfc_un.res.lastuse); 2344 lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0; 2345 2346 mfcs.mfcs_packets = c->mfc_un.res.pkt; 2347 mfcs.mfcs_bytes = c->mfc_un.res.bytes; 2348 mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if; 2349 if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) || 2350 nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse), 2351 RTA_PAD)) 2352 return -EMSGSIZE; 2353 2354 rtm->rtm_type = RTN_MULTICAST; 2355 return 1; 2356 } 2357 2358 int ipmr_get_route(struct net *net, struct sk_buff *skb, 2359 __be32 saddr, __be32 daddr, 2360 struct rtmsg *rtm, u32 portid) 2361 { 2362 struct mfc_cache *cache; 2363 struct mr_table *mrt; 2364 int err; 2365 2366 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT); 2367 if (!mrt) 2368 return -ENOENT; 2369 2370 rcu_read_lock(); 2371 cache = ipmr_cache_find(mrt, saddr, daddr); 2372 if (!cache && skb->dev) { 2373 int vif = ipmr_find_vif(mrt, skb->dev); 2374 2375 if (vif >= 0) 2376 cache = ipmr_cache_find_any(mrt, daddr, vif); 2377 } 2378 if (!cache) { 2379 struct sk_buff *skb2; 2380 struct iphdr *iph; 2381 struct net_device *dev; 2382 int vif = -1; 2383 2384 dev = skb->dev; 2385 read_lock(&mrt_lock); 2386 if (dev) 2387 vif = ipmr_find_vif(mrt, dev); 2388 if (vif < 0) { 2389 read_unlock(&mrt_lock); 2390 rcu_read_unlock(); 2391 return -ENODEV; 2392 } 2393 skb2 = skb_clone(skb, GFP_ATOMIC); 2394 if (!skb2) { 2395 read_unlock(&mrt_lock); 2396 rcu_read_unlock(); 2397 return -ENOMEM; 2398 } 2399 2400 NETLINK_CB(skb2).portid = portid; 2401 skb_push(skb2, sizeof(struct iphdr)); 2402 skb_reset_network_header(skb2); 2403 iph = ip_hdr(skb2); 2404 iph->ihl = sizeof(struct iphdr) >> 2; 2405 iph->saddr = saddr; 2406 iph->daddr = daddr; 2407 iph->version = 0; 2408 err = ipmr_cache_unresolved(mrt, vif, skb2, dev); 2409 read_unlock(&mrt_lock); 2410 rcu_read_unlock(); 2411 return err; 2412 } 2413 2414 read_lock(&mrt_lock); 2415 err = __ipmr_fill_mroute(mrt, skb, cache, rtm); 2416 read_unlock(&mrt_lock); 2417 rcu_read_unlock(); 2418 return err; 2419 } 2420 2421 static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, 2422 u32 portid, u32 seq, struct mfc_cache *c, int cmd, 2423 int flags) 2424 { 2425 struct nlmsghdr *nlh; 2426 struct rtmsg *rtm; 2427 int err; 2428 2429 nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags); 2430 if (!nlh) 2431 return -EMSGSIZE; 2432 2433 rtm = nlmsg_data(nlh); 2434 rtm->rtm_family = RTNL_FAMILY_IPMR; 2435 rtm->rtm_dst_len = 32; 2436 rtm->rtm_src_len = 32; 2437 rtm->rtm_tos = 0; 2438 rtm->rtm_table = mrt->id; 2439 if (nla_put_u32(skb, RTA_TABLE, mrt->id)) 2440 goto nla_put_failure; 2441 rtm->rtm_type = RTN_MULTICAST; 2442 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 2443 if (c->mfc_flags & MFC_STATIC) 2444 rtm->rtm_protocol = RTPROT_STATIC; 2445 else 2446 rtm->rtm_protocol = RTPROT_MROUTED; 2447 rtm->rtm_flags = 0; 2448 2449 if (nla_put_in_addr(skb, RTA_SRC, c->mfc_origin) || 2450 nla_put_in_addr(skb, RTA_DST, c->mfc_mcastgrp)) 2451 goto nla_put_failure; 2452 err = __ipmr_fill_mroute(mrt, skb, c, rtm); 2453 /* do not break the dump if cache is unresolved */ 2454 if (err < 0 && err != -ENOENT) 2455 goto nla_put_failure; 2456 2457 nlmsg_end(skb, nlh); 2458 return 0; 2459 2460 nla_put_failure: 2461 nlmsg_cancel(skb, nlh); 2462 return -EMSGSIZE; 2463 } 2464 2465 static size_t mroute_msgsize(bool unresolved, int maxvif) 2466 { 2467 size_t len = 2468 NLMSG_ALIGN(sizeof(struct rtmsg)) 2469 + nla_total_size(4) /* RTA_TABLE */ 2470 + nla_total_size(4) /* RTA_SRC */ 2471 + nla_total_size(4) /* RTA_DST */ 2472 ; 2473 2474 if (!unresolved) 2475 len = len 2476 + nla_total_size(4) /* RTA_IIF */ 2477 + nla_total_size(0) /* RTA_MULTIPATH */ 2478 + maxvif * NLA_ALIGN(sizeof(struct rtnexthop)) 2479 /* RTA_MFC_STATS */ 2480 + nla_total_size_64bit(sizeof(struct rta_mfc_stats)) 2481 ; 2482 2483 return len; 2484 } 2485 2486 static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc, 2487 int cmd) 2488 { 2489 struct net *net = read_pnet(&mrt->net); 2490 struct sk_buff *skb; 2491 int err = -ENOBUFS; 2492 2493 skb = nlmsg_new(mroute_msgsize(mfc->mfc_parent >= MAXVIFS, mrt->maxvif), 2494 GFP_ATOMIC); 2495 if (!skb) 2496 goto errout; 2497 2498 err = ipmr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0); 2499 if (err < 0) 2500 goto errout; 2501 2502 rtnl_notify(skb, net, 0, RTNLGRP_IPV4_MROUTE, NULL, GFP_ATOMIC); 2503 return; 2504 2505 errout: 2506 kfree_skb(skb); 2507 if (err < 0) 2508 rtnl_set_sk_err(net, RTNLGRP_IPV4_MROUTE, err); 2509 } 2510 2511 static size_t igmpmsg_netlink_msgsize(size_t payloadlen) 2512 { 2513 size_t len = 2514 NLMSG_ALIGN(sizeof(struct rtgenmsg)) 2515 + nla_total_size(1) /* IPMRA_CREPORT_MSGTYPE */ 2516 + nla_total_size(4) /* IPMRA_CREPORT_VIF_ID */ 2517 + nla_total_size(4) /* IPMRA_CREPORT_SRC_ADDR */ 2518 + nla_total_size(4) /* IPMRA_CREPORT_DST_ADDR */ 2519 /* IPMRA_CREPORT_PKT */ 2520 + nla_total_size(payloadlen) 2521 ; 2522 2523 return len; 2524 } 2525 2526 static void igmpmsg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt) 2527 { 2528 struct net *net = read_pnet(&mrt->net); 2529 struct nlmsghdr *nlh; 2530 struct rtgenmsg *rtgenm; 2531 struct igmpmsg *msg; 2532 struct sk_buff *skb; 2533 struct nlattr *nla; 2534 int payloadlen; 2535 2536 payloadlen = pkt->len - sizeof(struct igmpmsg); 2537 msg = (struct igmpmsg *)skb_network_header(pkt); 2538 2539 skb = nlmsg_new(igmpmsg_netlink_msgsize(payloadlen), GFP_ATOMIC); 2540 if (!skb) 2541 goto errout; 2542 2543 nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT, 2544 sizeof(struct rtgenmsg), 0); 2545 if (!nlh) 2546 goto errout; 2547 rtgenm = nlmsg_data(nlh); 2548 rtgenm->rtgen_family = RTNL_FAMILY_IPMR; 2549 if (nla_put_u8(skb, IPMRA_CREPORT_MSGTYPE, msg->im_msgtype) || 2550 nla_put_u32(skb, IPMRA_CREPORT_VIF_ID, msg->im_vif) || 2551 nla_put_in_addr(skb, IPMRA_CREPORT_SRC_ADDR, 2552 msg->im_src.s_addr) || 2553 nla_put_in_addr(skb, IPMRA_CREPORT_DST_ADDR, 2554 msg->im_dst.s_addr)) 2555 goto nla_put_failure; 2556 2557 nla = nla_reserve(skb, IPMRA_CREPORT_PKT, payloadlen); 2558 if (!nla || skb_copy_bits(pkt, sizeof(struct igmpmsg), 2559 nla_data(nla), payloadlen)) 2560 goto nla_put_failure; 2561 2562 nlmsg_end(skb, nlh); 2563 2564 rtnl_notify(skb, net, 0, RTNLGRP_IPV4_MROUTE_R, NULL, GFP_ATOMIC); 2565 return; 2566 2567 nla_put_failure: 2568 nlmsg_cancel(skb, nlh); 2569 errout: 2570 kfree_skb(skb); 2571 rtnl_set_sk_err(net, RTNLGRP_IPV4_MROUTE_R, -ENOBUFS); 2572 } 2573 2574 static int ipmr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, 2575 struct netlink_ext_ack *extack) 2576 { 2577 struct net *net = sock_net(in_skb->sk); 2578 struct nlattr *tb[RTA_MAX + 1]; 2579 struct sk_buff *skb = NULL; 2580 struct mfc_cache *cache; 2581 struct mr_table *mrt; 2582 struct rtmsg *rtm; 2583 __be32 src, grp; 2584 u32 tableid; 2585 int err; 2586 2587 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, 2588 rtm_ipv4_policy, extack); 2589 if (err < 0) 2590 goto errout; 2591 2592 rtm = nlmsg_data(nlh); 2593 2594 src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0; 2595 grp = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0; 2596 tableid = tb[RTA_TABLE] ? nla_get_u32(tb[RTA_TABLE]) : 0; 2597 2598 mrt = ipmr_get_table(net, tableid ? tableid : RT_TABLE_DEFAULT); 2599 if (!mrt) { 2600 err = -ENOENT; 2601 goto errout_free; 2602 } 2603 2604 /* entries are added/deleted only under RTNL */ 2605 rcu_read_lock(); 2606 cache = ipmr_cache_find(mrt, src, grp); 2607 rcu_read_unlock(); 2608 if (!cache) { 2609 err = -ENOENT; 2610 goto errout_free; 2611 } 2612 2613 skb = nlmsg_new(mroute_msgsize(false, mrt->maxvif), GFP_KERNEL); 2614 if (!skb) { 2615 err = -ENOBUFS; 2616 goto errout_free; 2617 } 2618 2619 err = ipmr_fill_mroute(mrt, skb, NETLINK_CB(in_skb).portid, 2620 nlh->nlmsg_seq, cache, 2621 RTM_NEWROUTE, 0); 2622 if (err < 0) 2623 goto errout_free; 2624 2625 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); 2626 2627 errout: 2628 return err; 2629 2630 errout_free: 2631 kfree_skb(skb); 2632 goto errout; 2633 } 2634 2635 static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) 2636 { 2637 struct net *net = sock_net(skb->sk); 2638 struct mr_table *mrt; 2639 struct mfc_cache *mfc; 2640 unsigned int t = 0, s_t; 2641 unsigned int e = 0, s_e; 2642 2643 s_t = cb->args[0]; 2644 s_e = cb->args[1]; 2645 2646 rcu_read_lock(); 2647 ipmr_for_each_table(mrt, net) { 2648 if (t < s_t) 2649 goto next_table; 2650 list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) { 2651 if (e < s_e) 2652 goto next_entry; 2653 if (ipmr_fill_mroute(mrt, skb, 2654 NETLINK_CB(cb->skb).portid, 2655 cb->nlh->nlmsg_seq, 2656 mfc, RTM_NEWROUTE, 2657 NLM_F_MULTI) < 0) 2658 goto done; 2659 next_entry: 2660 e++; 2661 } 2662 e = 0; 2663 s_e = 0; 2664 2665 spin_lock_bh(&mfc_unres_lock); 2666 list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) { 2667 if (e < s_e) 2668 goto next_entry2; 2669 if (ipmr_fill_mroute(mrt, skb, 2670 NETLINK_CB(cb->skb).portid, 2671 cb->nlh->nlmsg_seq, 2672 mfc, RTM_NEWROUTE, 2673 NLM_F_MULTI) < 0) { 2674 spin_unlock_bh(&mfc_unres_lock); 2675 goto done; 2676 } 2677 next_entry2: 2678 e++; 2679 } 2680 spin_unlock_bh(&mfc_unres_lock); 2681 e = 0; 2682 s_e = 0; 2683 next_table: 2684 t++; 2685 } 2686 done: 2687 rcu_read_unlock(); 2688 2689 cb->args[1] = e; 2690 cb->args[0] = t; 2691 2692 return skb->len; 2693 } 2694 2695 static const struct nla_policy rtm_ipmr_policy[RTA_MAX + 1] = { 2696 [RTA_SRC] = { .type = NLA_U32 }, 2697 [RTA_DST] = { .type = NLA_U32 }, 2698 [RTA_IIF] = { .type = NLA_U32 }, 2699 [RTA_TABLE] = { .type = NLA_U32 }, 2700 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, 2701 }; 2702 2703 static bool ipmr_rtm_validate_proto(unsigned char rtm_protocol) 2704 { 2705 switch (rtm_protocol) { 2706 case RTPROT_STATIC: 2707 case RTPROT_MROUTED: 2708 return true; 2709 } 2710 return false; 2711 } 2712 2713 static int ipmr_nla_get_ttls(const struct nlattr *nla, struct mfcctl *mfcc) 2714 { 2715 struct rtnexthop *rtnh = nla_data(nla); 2716 int remaining = nla_len(nla), vifi = 0; 2717 2718 while (rtnh_ok(rtnh, remaining)) { 2719 mfcc->mfcc_ttls[vifi] = rtnh->rtnh_hops; 2720 if (++vifi == MAXVIFS) 2721 break; 2722 rtnh = rtnh_next(rtnh, &remaining); 2723 } 2724 2725 return remaining > 0 ? -EINVAL : vifi; 2726 } 2727 2728 /* returns < 0 on error, 0 for ADD_MFC and 1 for ADD_MFC_PROXY */ 2729 static int rtm_to_ipmr_mfcc(struct net *net, struct nlmsghdr *nlh, 2730 struct mfcctl *mfcc, int *mrtsock, 2731 struct mr_table **mrtret, 2732 struct netlink_ext_ack *extack) 2733 { 2734 struct net_device *dev = NULL; 2735 u32 tblid = RT_TABLE_DEFAULT; 2736 struct mr_table *mrt; 2737 struct nlattr *attr; 2738 struct rtmsg *rtm; 2739 int ret, rem; 2740 2741 ret = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipmr_policy, 2742 extack); 2743 if (ret < 0) 2744 goto out; 2745 rtm = nlmsg_data(nlh); 2746 2747 ret = -EINVAL; 2748 if (rtm->rtm_family != RTNL_FAMILY_IPMR || rtm->rtm_dst_len != 32 || 2749 rtm->rtm_type != RTN_MULTICAST || 2750 rtm->rtm_scope != RT_SCOPE_UNIVERSE || 2751 !ipmr_rtm_validate_proto(rtm->rtm_protocol)) 2752 goto out; 2753 2754 memset(mfcc, 0, sizeof(*mfcc)); 2755 mfcc->mfcc_parent = -1; 2756 ret = 0; 2757 nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), rem) { 2758 switch (nla_type(attr)) { 2759 case RTA_SRC: 2760 mfcc->mfcc_origin.s_addr = nla_get_be32(attr); 2761 break; 2762 case RTA_DST: 2763 mfcc->mfcc_mcastgrp.s_addr = nla_get_be32(attr); 2764 break; 2765 case RTA_IIF: 2766 dev = __dev_get_by_index(net, nla_get_u32(attr)); 2767 if (!dev) { 2768 ret = -ENODEV; 2769 goto out; 2770 } 2771 break; 2772 case RTA_MULTIPATH: 2773 if (ipmr_nla_get_ttls(attr, mfcc) < 0) { 2774 ret = -EINVAL; 2775 goto out; 2776 } 2777 break; 2778 case RTA_PREFSRC: 2779 ret = 1; 2780 break; 2781 case RTA_TABLE: 2782 tblid = nla_get_u32(attr); 2783 break; 2784 } 2785 } 2786 mrt = ipmr_get_table(net, tblid); 2787 if (!mrt) { 2788 ret = -ENOENT; 2789 goto out; 2790 } 2791 *mrtret = mrt; 2792 *mrtsock = rtm->rtm_protocol == RTPROT_MROUTED ? 1 : 0; 2793 if (dev) 2794 mfcc->mfcc_parent = ipmr_find_vif(mrt, dev); 2795 2796 out: 2797 return ret; 2798 } 2799 2800 /* takes care of both newroute and delroute */ 2801 static int ipmr_rtm_route(struct sk_buff *skb, struct nlmsghdr *nlh, 2802 struct netlink_ext_ack *extack) 2803 { 2804 struct net *net = sock_net(skb->sk); 2805 int ret, mrtsock, parent; 2806 struct mr_table *tbl; 2807 struct mfcctl mfcc; 2808 2809 mrtsock = 0; 2810 tbl = NULL; 2811 ret = rtm_to_ipmr_mfcc(net, nlh, &mfcc, &mrtsock, &tbl, extack); 2812 if (ret < 0) 2813 return ret; 2814 2815 parent = ret ? mfcc.mfcc_parent : -1; 2816 if (nlh->nlmsg_type == RTM_NEWROUTE) 2817 return ipmr_mfc_add(net, tbl, &mfcc, mrtsock, parent); 2818 else 2819 return ipmr_mfc_delete(tbl, &mfcc, parent); 2820 } 2821 2822 static bool ipmr_fill_table(struct mr_table *mrt, struct sk_buff *skb) 2823 { 2824 u32 queue_len = atomic_read(&mrt->cache_resolve_queue_len); 2825 2826 if (nla_put_u32(skb, IPMRA_TABLE_ID, mrt->id) || 2827 nla_put_u32(skb, IPMRA_TABLE_CACHE_RES_QUEUE_LEN, queue_len) || 2828 nla_put_s32(skb, IPMRA_TABLE_MROUTE_REG_VIF_NUM, 2829 mrt->mroute_reg_vif_num) || 2830 nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_ASSERT, 2831 mrt->mroute_do_assert) || 2832 nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_PIM, mrt->mroute_do_pim)) 2833 return false; 2834 2835 return true; 2836 } 2837 2838 static bool ipmr_fill_vif(struct mr_table *mrt, u32 vifid, struct sk_buff *skb) 2839 { 2840 struct nlattr *vif_nest; 2841 struct vif_device *vif; 2842 2843 /* if the VIF doesn't exist just continue */ 2844 if (!VIF_EXISTS(mrt, vifid)) 2845 return true; 2846 2847 vif = &mrt->vif_table[vifid]; 2848 vif_nest = nla_nest_start(skb, IPMRA_VIF); 2849 if (!vif_nest) 2850 return false; 2851 if (nla_put_u32(skb, IPMRA_VIFA_IFINDEX, vif->dev->ifindex) || 2852 nla_put_u32(skb, IPMRA_VIFA_VIF_ID, vifid) || 2853 nla_put_u16(skb, IPMRA_VIFA_FLAGS, vif->flags) || 2854 nla_put_u64_64bit(skb, IPMRA_VIFA_BYTES_IN, vif->bytes_in, 2855 IPMRA_VIFA_PAD) || 2856 nla_put_u64_64bit(skb, IPMRA_VIFA_BYTES_OUT, vif->bytes_out, 2857 IPMRA_VIFA_PAD) || 2858 nla_put_u64_64bit(skb, IPMRA_VIFA_PACKETS_IN, vif->pkt_in, 2859 IPMRA_VIFA_PAD) || 2860 nla_put_u64_64bit(skb, IPMRA_VIFA_PACKETS_OUT, vif->pkt_out, 2861 IPMRA_VIFA_PAD) || 2862 nla_put_be32(skb, IPMRA_VIFA_LOCAL_ADDR, vif->local) || 2863 nla_put_be32(skb, IPMRA_VIFA_REMOTE_ADDR, vif->remote)) { 2864 nla_nest_cancel(skb, vif_nest); 2865 return false; 2866 } 2867 nla_nest_end(skb, vif_nest); 2868 2869 return true; 2870 } 2871 2872 static int ipmr_rtm_dumplink(struct sk_buff *skb, struct netlink_callback *cb) 2873 { 2874 struct net *net = sock_net(skb->sk); 2875 struct nlmsghdr *nlh = NULL; 2876 unsigned int t = 0, s_t; 2877 unsigned int e = 0, s_e; 2878 struct mr_table *mrt; 2879 2880 s_t = cb->args[0]; 2881 s_e = cb->args[1]; 2882 2883 ipmr_for_each_table(mrt, net) { 2884 struct nlattr *vifs, *af; 2885 struct ifinfomsg *hdr; 2886 u32 i; 2887 2888 if (t < s_t) 2889 goto skip_table; 2890 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, 2891 cb->nlh->nlmsg_seq, RTM_NEWLINK, 2892 sizeof(*hdr), NLM_F_MULTI); 2893 if (!nlh) 2894 break; 2895 2896 hdr = nlmsg_data(nlh); 2897 memset(hdr, 0, sizeof(*hdr)); 2898 hdr->ifi_family = RTNL_FAMILY_IPMR; 2899 2900 af = nla_nest_start(skb, IFLA_AF_SPEC); 2901 if (!af) { 2902 nlmsg_cancel(skb, nlh); 2903 goto out; 2904 } 2905 2906 if (!ipmr_fill_table(mrt, skb)) { 2907 nlmsg_cancel(skb, nlh); 2908 goto out; 2909 } 2910 2911 vifs = nla_nest_start(skb, IPMRA_TABLE_VIFS); 2912 if (!vifs) { 2913 nla_nest_end(skb, af); 2914 nlmsg_end(skb, nlh); 2915 goto out; 2916 } 2917 for (i = 0; i < mrt->maxvif; i++) { 2918 if (e < s_e) 2919 goto skip_entry; 2920 if (!ipmr_fill_vif(mrt, i, skb)) { 2921 nla_nest_end(skb, vifs); 2922 nla_nest_end(skb, af); 2923 nlmsg_end(skb, nlh); 2924 goto out; 2925 } 2926 skip_entry: 2927 e++; 2928 } 2929 s_e = 0; 2930 e = 0; 2931 nla_nest_end(skb, vifs); 2932 nla_nest_end(skb, af); 2933 nlmsg_end(skb, nlh); 2934 skip_table: 2935 t++; 2936 } 2937 2938 out: 2939 cb->args[1] = e; 2940 cb->args[0] = t; 2941 2942 return skb->len; 2943 } 2944 2945 #ifdef CONFIG_PROC_FS 2946 /* The /proc interfaces to multicast routing : 2947 * /proc/net/ip_mr_cache & /proc/net/ip_mr_vif 2948 */ 2949 struct ipmr_vif_iter { 2950 struct seq_net_private p; 2951 struct mr_table *mrt; 2952 int ct; 2953 }; 2954 2955 static struct vif_device *ipmr_vif_seq_idx(struct net *net, 2956 struct ipmr_vif_iter *iter, 2957 loff_t pos) 2958 { 2959 struct mr_table *mrt = iter->mrt; 2960 2961 for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) { 2962 if (!VIF_EXISTS(mrt, iter->ct)) 2963 continue; 2964 if (pos-- == 0) 2965 return &mrt->vif_table[iter->ct]; 2966 } 2967 return NULL; 2968 } 2969 2970 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos) 2971 __acquires(mrt_lock) 2972 { 2973 struct ipmr_vif_iter *iter = seq->private; 2974 struct net *net = seq_file_net(seq); 2975 struct mr_table *mrt; 2976 2977 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT); 2978 if (!mrt) 2979 return ERR_PTR(-ENOENT); 2980 2981 iter->mrt = mrt; 2982 2983 read_lock(&mrt_lock); 2984 return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1) 2985 : SEQ_START_TOKEN; 2986 } 2987 2988 static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2989 { 2990 struct ipmr_vif_iter *iter = seq->private; 2991 struct net *net = seq_file_net(seq); 2992 struct mr_table *mrt = iter->mrt; 2993 2994 ++*pos; 2995 if (v == SEQ_START_TOKEN) 2996 return ipmr_vif_seq_idx(net, iter, 0); 2997 2998 while (++iter->ct < mrt->maxvif) { 2999 if (!VIF_EXISTS(mrt, iter->ct)) 3000 continue; 3001 return &mrt->vif_table[iter->ct]; 3002 } 3003 return NULL; 3004 } 3005 3006 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v) 3007 __releases(mrt_lock) 3008 { 3009 read_unlock(&mrt_lock); 3010 } 3011 3012 static int ipmr_vif_seq_show(struct seq_file *seq, void *v) 3013 { 3014 struct ipmr_vif_iter *iter = seq->private; 3015 struct mr_table *mrt = iter->mrt; 3016 3017 if (v == SEQ_START_TOKEN) { 3018 seq_puts(seq, 3019 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n"); 3020 } else { 3021 const struct vif_device *vif = v; 3022 const char *name = vif->dev ? vif->dev->name : "none"; 3023 3024 seq_printf(seq, 3025 "%2zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n", 3026 vif - mrt->vif_table, 3027 name, vif->bytes_in, vif->pkt_in, 3028 vif->bytes_out, vif->pkt_out, 3029 vif->flags, vif->local, vif->remote); 3030 } 3031 return 0; 3032 } 3033 3034 static const struct seq_operations ipmr_vif_seq_ops = { 3035 .start = ipmr_vif_seq_start, 3036 .next = ipmr_vif_seq_next, 3037 .stop = ipmr_vif_seq_stop, 3038 .show = ipmr_vif_seq_show, 3039 }; 3040 3041 static int ipmr_vif_open(struct inode *inode, struct file *file) 3042 { 3043 return seq_open_net(inode, file, &ipmr_vif_seq_ops, 3044 sizeof(struct ipmr_vif_iter)); 3045 } 3046 3047 static const struct file_operations ipmr_vif_fops = { 3048 .owner = THIS_MODULE, 3049 .open = ipmr_vif_open, 3050 .read = seq_read, 3051 .llseek = seq_lseek, 3052 .release = seq_release_net, 3053 }; 3054 3055 struct ipmr_mfc_iter { 3056 struct seq_net_private p; 3057 struct mr_table *mrt; 3058 struct list_head *cache; 3059 }; 3060 3061 static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net, 3062 struct ipmr_mfc_iter *it, loff_t pos) 3063 { 3064 struct mr_table *mrt = it->mrt; 3065 struct mfc_cache *mfc; 3066 3067 rcu_read_lock(); 3068 it->cache = &mrt->mfc_cache_list; 3069 list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) 3070 if (pos-- == 0) 3071 return mfc; 3072 rcu_read_unlock(); 3073 3074 spin_lock_bh(&mfc_unres_lock); 3075 it->cache = &mrt->mfc_unres_queue; 3076 list_for_each_entry(mfc, it->cache, list) 3077 if (pos-- == 0) 3078 return mfc; 3079 spin_unlock_bh(&mfc_unres_lock); 3080 3081 it->cache = NULL; 3082 return NULL; 3083 } 3084 3085 3086 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) 3087 { 3088 struct ipmr_mfc_iter *it = seq->private; 3089 struct net *net = seq_file_net(seq); 3090 struct mr_table *mrt; 3091 3092 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT); 3093 if (!mrt) 3094 return ERR_PTR(-ENOENT); 3095 3096 it->mrt = mrt; 3097 it->cache = NULL; 3098 return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1) 3099 : SEQ_START_TOKEN; 3100 } 3101 3102 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos) 3103 { 3104 struct ipmr_mfc_iter *it = seq->private; 3105 struct net *net = seq_file_net(seq); 3106 struct mr_table *mrt = it->mrt; 3107 struct mfc_cache *mfc = v; 3108 3109 ++*pos; 3110 3111 if (v == SEQ_START_TOKEN) 3112 return ipmr_mfc_seq_idx(net, seq->private, 0); 3113 3114 if (mfc->list.next != it->cache) 3115 return list_entry(mfc->list.next, struct mfc_cache, list); 3116 3117 if (it->cache == &mrt->mfc_unres_queue) 3118 goto end_of_list; 3119 3120 /* exhausted cache_array, show unresolved */ 3121 rcu_read_unlock(); 3122 it->cache = &mrt->mfc_unres_queue; 3123 3124 spin_lock_bh(&mfc_unres_lock); 3125 if (!list_empty(it->cache)) 3126 return list_first_entry(it->cache, struct mfc_cache, list); 3127 3128 end_of_list: 3129 spin_unlock_bh(&mfc_unres_lock); 3130 it->cache = NULL; 3131 3132 return NULL; 3133 } 3134 3135 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v) 3136 { 3137 struct ipmr_mfc_iter *it = seq->private; 3138 struct mr_table *mrt = it->mrt; 3139 3140 if (it->cache == &mrt->mfc_unres_queue) 3141 spin_unlock_bh(&mfc_unres_lock); 3142 else if (it->cache == &mrt->mfc_cache_list) 3143 rcu_read_unlock(); 3144 } 3145 3146 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) 3147 { 3148 int n; 3149 3150 if (v == SEQ_START_TOKEN) { 3151 seq_puts(seq, 3152 "Group Origin Iif Pkts Bytes Wrong Oifs\n"); 3153 } else { 3154 const struct mfc_cache *mfc = v; 3155 const struct ipmr_mfc_iter *it = seq->private; 3156 const struct mr_table *mrt = it->mrt; 3157 3158 seq_printf(seq, "%08X %08X %-3hd", 3159 (__force u32) mfc->mfc_mcastgrp, 3160 (__force u32) mfc->mfc_origin, 3161 mfc->mfc_parent); 3162 3163 if (it->cache != &mrt->mfc_unres_queue) { 3164 seq_printf(seq, " %8lu %8lu %8lu", 3165 mfc->mfc_un.res.pkt, 3166 mfc->mfc_un.res.bytes, 3167 mfc->mfc_un.res.wrong_if); 3168 for (n = mfc->mfc_un.res.minvif; 3169 n < mfc->mfc_un.res.maxvif; n++) { 3170 if (VIF_EXISTS(mrt, n) && 3171 mfc->mfc_un.res.ttls[n] < 255) 3172 seq_printf(seq, 3173 " %2d:%-3d", 3174 n, mfc->mfc_un.res.ttls[n]); 3175 } 3176 } else { 3177 /* unresolved mfc_caches don't contain 3178 * pkt, bytes and wrong_if values 3179 */ 3180 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul); 3181 } 3182 seq_putc(seq, '\n'); 3183 } 3184 return 0; 3185 } 3186 3187 static const struct seq_operations ipmr_mfc_seq_ops = { 3188 .start = ipmr_mfc_seq_start, 3189 .next = ipmr_mfc_seq_next, 3190 .stop = ipmr_mfc_seq_stop, 3191 .show = ipmr_mfc_seq_show, 3192 }; 3193 3194 static int ipmr_mfc_open(struct inode *inode, struct file *file) 3195 { 3196 return seq_open_net(inode, file, &ipmr_mfc_seq_ops, 3197 sizeof(struct ipmr_mfc_iter)); 3198 } 3199 3200 static const struct file_operations ipmr_mfc_fops = { 3201 .owner = THIS_MODULE, 3202 .open = ipmr_mfc_open, 3203 .read = seq_read, 3204 .llseek = seq_lseek, 3205 .release = seq_release_net, 3206 }; 3207 #endif 3208 3209 #ifdef CONFIG_IP_PIMSM_V2 3210 static const struct net_protocol pim_protocol = { 3211 .handler = pim_rcv, 3212 .netns_ok = 1, 3213 }; 3214 #endif 3215 3216 static unsigned int ipmr_seq_read(struct net *net) 3217 { 3218 ASSERT_RTNL(); 3219 3220 return net->ipv4.ipmr_seq + ipmr_rules_seq_read(net); 3221 } 3222 3223 static int ipmr_dump(struct net *net, struct notifier_block *nb) 3224 { 3225 struct mr_table *mrt; 3226 int err; 3227 3228 err = ipmr_rules_dump(net, nb); 3229 if (err) 3230 return err; 3231 3232 ipmr_for_each_table(mrt, net) { 3233 struct vif_device *v = &mrt->vif_table[0]; 3234 struct mfc_cache *mfc; 3235 int vifi; 3236 3237 /* Notifiy on table VIF entries */ 3238 read_lock(&mrt_lock); 3239 for (vifi = 0; vifi < mrt->maxvif; vifi++, v++) { 3240 if (!v->dev) 3241 continue; 3242 3243 call_ipmr_vif_entry_notifier(nb, net, FIB_EVENT_VIF_ADD, 3244 v, vifi, mrt->id); 3245 } 3246 read_unlock(&mrt_lock); 3247 3248 /* Notify on table MFC entries */ 3249 list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) 3250 call_ipmr_mfc_entry_notifier(nb, net, 3251 FIB_EVENT_ENTRY_ADD, mfc, 3252 mrt->id); 3253 } 3254 3255 return 0; 3256 } 3257 3258 static const struct fib_notifier_ops ipmr_notifier_ops_template = { 3259 .family = RTNL_FAMILY_IPMR, 3260 .fib_seq_read = ipmr_seq_read, 3261 .fib_dump = ipmr_dump, 3262 .owner = THIS_MODULE, 3263 }; 3264 3265 static int __net_init ipmr_notifier_init(struct net *net) 3266 { 3267 struct fib_notifier_ops *ops; 3268 3269 net->ipv4.ipmr_seq = 0; 3270 3271 ops = fib_notifier_ops_register(&ipmr_notifier_ops_template, net); 3272 if (IS_ERR(ops)) 3273 return PTR_ERR(ops); 3274 net->ipv4.ipmr_notifier_ops = ops; 3275 3276 return 0; 3277 } 3278 3279 static void __net_exit ipmr_notifier_exit(struct net *net) 3280 { 3281 fib_notifier_ops_unregister(net->ipv4.ipmr_notifier_ops); 3282 net->ipv4.ipmr_notifier_ops = NULL; 3283 } 3284 3285 /* Setup for IP multicast routing */ 3286 static int __net_init ipmr_net_init(struct net *net) 3287 { 3288 int err; 3289 3290 err = ipmr_notifier_init(net); 3291 if (err) 3292 goto ipmr_notifier_fail; 3293 3294 err = ipmr_rules_init(net); 3295 if (err < 0) 3296 goto ipmr_rules_fail; 3297 3298 #ifdef CONFIG_PROC_FS 3299 err = -ENOMEM; 3300 if (!proc_create("ip_mr_vif", 0, net->proc_net, &ipmr_vif_fops)) 3301 goto proc_vif_fail; 3302 if (!proc_create("ip_mr_cache", 0, net->proc_net, &ipmr_mfc_fops)) 3303 goto proc_cache_fail; 3304 #endif 3305 return 0; 3306 3307 #ifdef CONFIG_PROC_FS 3308 proc_cache_fail: 3309 remove_proc_entry("ip_mr_vif", net->proc_net); 3310 proc_vif_fail: 3311 ipmr_rules_exit(net); 3312 #endif 3313 ipmr_rules_fail: 3314 ipmr_notifier_exit(net); 3315 ipmr_notifier_fail: 3316 return err; 3317 } 3318 3319 static void __net_exit ipmr_net_exit(struct net *net) 3320 { 3321 #ifdef CONFIG_PROC_FS 3322 remove_proc_entry("ip_mr_cache", net->proc_net); 3323 remove_proc_entry("ip_mr_vif", net->proc_net); 3324 #endif 3325 ipmr_notifier_exit(net); 3326 ipmr_rules_exit(net); 3327 } 3328 3329 static struct pernet_operations ipmr_net_ops = { 3330 .init = ipmr_net_init, 3331 .exit = ipmr_net_exit, 3332 }; 3333 3334 int __init ip_mr_init(void) 3335 { 3336 int err; 3337 3338 mrt_cachep = kmem_cache_create("ip_mrt_cache", 3339 sizeof(struct mfc_cache), 3340 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, 3341 NULL); 3342 3343 err = register_pernet_subsys(&ipmr_net_ops); 3344 if (err) 3345 goto reg_pernet_fail; 3346 3347 err = register_netdevice_notifier(&ip_mr_notifier); 3348 if (err) 3349 goto reg_notif_fail; 3350 #ifdef CONFIG_IP_PIMSM_V2 3351 if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) { 3352 pr_err("%s: can't add PIM protocol\n", __func__); 3353 err = -EAGAIN; 3354 goto add_proto_fail; 3355 } 3356 #endif 3357 rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE, 3358 ipmr_rtm_getroute, ipmr_rtm_dumproute, 0); 3359 rtnl_register(RTNL_FAMILY_IPMR, RTM_NEWROUTE, 3360 ipmr_rtm_route, NULL, 0); 3361 rtnl_register(RTNL_FAMILY_IPMR, RTM_DELROUTE, 3362 ipmr_rtm_route, NULL, 0); 3363 3364 rtnl_register(RTNL_FAMILY_IPMR, RTM_GETLINK, 3365 NULL, ipmr_rtm_dumplink, 0); 3366 return 0; 3367 3368 #ifdef CONFIG_IP_PIMSM_V2 3369 add_proto_fail: 3370 unregister_netdevice_notifier(&ip_mr_notifier); 3371 #endif 3372 reg_notif_fail: 3373 unregister_pernet_subsys(&ipmr_net_ops); 3374 reg_pernet_fail: 3375 kmem_cache_destroy(mrt_cachep); 3376 return err; 3377 } 3378