1 /* 2 * IP multicast routing support for mrouted 3.6/3.8 3 * 4 * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk> 5 * Linux Consultancy and Custom Driver Development 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License 9 * as published by the Free Software Foundation; either version 10 * 2 of the License, or (at your option) any later version. 11 * 12 * Fixes: 13 * Michael Chastain : Incorrect size of copying. 14 * Alan Cox : Added the cache manager code 15 * Alan Cox : Fixed the clone/copy bug and device race. 16 * Mike McLagan : Routing by source 17 * Malcolm Beattie : Buffer handling fixes. 18 * Alexey Kuznetsov : Double buffer free and other fixes. 19 * SVR Anand : Fixed several multicast bugs and problems. 20 * Alexey Kuznetsov : Status, optimisations and more. 21 * Brad Parker : Better behaviour on mrouted upcall 22 * overflow. 23 * Carlos Picoto : PIMv1 Support 24 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header 25 * Relax this requirement to work with older peers. 26 * 27 */ 28 29 #include <linux/uaccess.h> 30 #include <linux/types.h> 31 #include <linux/capability.h> 32 #include <linux/errno.h> 33 #include <linux/timer.h> 34 #include <linux/mm.h> 35 #include <linux/kernel.h> 36 #include <linux/fcntl.h> 37 #include <linux/stat.h> 38 #include <linux/socket.h> 39 #include <linux/in.h> 40 #include <linux/inet.h> 41 #include <linux/netdevice.h> 42 #include <linux/inetdevice.h> 43 #include <linux/igmp.h> 44 #include <linux/proc_fs.h> 45 #include <linux/seq_file.h> 46 #include <linux/mroute.h> 47 #include <linux/init.h> 48 #include <linux/if_ether.h> 49 #include <linux/slab.h> 50 #include <net/net_namespace.h> 51 #include <net/ip.h> 52 #include <net/protocol.h> 53 #include <linux/skbuff.h> 54 #include <net/route.h> 55 #include <net/sock.h> 56 #include <net/icmp.h> 57 #include <net/udp.h> 58 #include <net/raw.h> 59 #include <linux/notifier.h> 60 #include <linux/if_arp.h> 61 #include <linux/netfilter_ipv4.h> 62 #include <linux/compat.h> 63 #include <linux/export.h> 64 #include <net/ip_tunnels.h> 65 #include <net/checksum.h> 66 #include <net/netlink.h> 67 #include <net/fib_rules.h> 68 #include <linux/netconf.h> 69 #include <net/nexthop.h> 70 #include <net/switchdev.h> 71 72 struct ipmr_rule { 73 struct fib_rule common; 74 }; 75 76 struct ipmr_result { 77 struct mr_table *mrt; 78 }; 79 80 /* Big lock, protecting vif table, mrt cache and mroute socket state. 81 * Note that the changes are semaphored via rtnl_lock. 82 */ 83 84 static DEFINE_RWLOCK(mrt_lock); 85 86 /* Multicast router control variables */ 87 88 /* Special spinlock for queue of unresolved entries */ 89 static DEFINE_SPINLOCK(mfc_unres_lock); 90 91 /* We return to original Alan's scheme. Hash table of resolved 92 * entries is changed only in process context and protected 93 * with weak lock mrt_lock. Queue of unresolved entries is protected 94 * with strong spinlock mfc_unres_lock. 95 * 96 * In this case data path is free of exclusive locks at all. 97 */ 98 99 static struct kmem_cache *mrt_cachep __read_mostly; 100 101 static struct mr_table *ipmr_new_table(struct net *net, u32 id); 102 static void ipmr_free_table(struct mr_table *mrt); 103 104 static void ip_mr_forward(struct net *net, struct mr_table *mrt, 105 struct net_device *dev, struct sk_buff *skb, 106 struct mfc_cache *cache, int local); 107 static int ipmr_cache_report(struct mr_table *mrt, 108 struct sk_buff *pkt, vifi_t vifi, int assert); 109 static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, 110 struct mfc_cache *c, struct rtmsg *rtm); 111 static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc, 112 int cmd); 113 static void igmpmsg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt); 114 static void mroute_clean_tables(struct mr_table *mrt, bool all); 115 static void ipmr_expire_process(unsigned long arg); 116 117 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES 118 #define ipmr_for_each_table(mrt, net) \ 119 list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list) 120 121 static struct mr_table *ipmr_get_table(struct net *net, u32 id) 122 { 123 struct mr_table *mrt; 124 125 ipmr_for_each_table(mrt, net) { 126 if (mrt->id == id) 127 return mrt; 128 } 129 return NULL; 130 } 131 132 static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4, 133 struct mr_table **mrt) 134 { 135 int err; 136 struct ipmr_result res; 137 struct fib_lookup_arg arg = { 138 .result = &res, 139 .flags = FIB_LOOKUP_NOREF, 140 }; 141 142 /* update flow if oif or iif point to device enslaved to l3mdev */ 143 l3mdev_update_flow(net, flowi4_to_flowi(flp4)); 144 145 err = fib_rules_lookup(net->ipv4.mr_rules_ops, 146 flowi4_to_flowi(flp4), 0, &arg); 147 if (err < 0) 148 return err; 149 *mrt = res.mrt; 150 return 0; 151 } 152 153 static int ipmr_rule_action(struct fib_rule *rule, struct flowi *flp, 154 int flags, struct fib_lookup_arg *arg) 155 { 156 struct ipmr_result *res = arg->result; 157 struct mr_table *mrt; 158 159 switch (rule->action) { 160 case FR_ACT_TO_TBL: 161 break; 162 case FR_ACT_UNREACHABLE: 163 return -ENETUNREACH; 164 case FR_ACT_PROHIBIT: 165 return -EACCES; 166 case FR_ACT_BLACKHOLE: 167 default: 168 return -EINVAL; 169 } 170 171 arg->table = fib_rule_get_table(rule, arg); 172 173 mrt = ipmr_get_table(rule->fr_net, arg->table); 174 if (!mrt) 175 return -EAGAIN; 176 res->mrt = mrt; 177 return 0; 178 } 179 180 static int ipmr_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) 181 { 182 return 1; 183 } 184 185 static const struct nla_policy ipmr_rule_policy[FRA_MAX + 1] = { 186 FRA_GENERIC_POLICY, 187 }; 188 189 static int ipmr_rule_configure(struct fib_rule *rule, struct sk_buff *skb, 190 struct fib_rule_hdr *frh, struct nlattr **tb) 191 { 192 return 0; 193 } 194 195 static int ipmr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, 196 struct nlattr **tb) 197 { 198 return 1; 199 } 200 201 static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb, 202 struct fib_rule_hdr *frh) 203 { 204 frh->dst_len = 0; 205 frh->src_len = 0; 206 frh->tos = 0; 207 return 0; 208 } 209 210 static const struct fib_rules_ops __net_initconst ipmr_rules_ops_template = { 211 .family = RTNL_FAMILY_IPMR, 212 .rule_size = sizeof(struct ipmr_rule), 213 .addr_size = sizeof(u32), 214 .action = ipmr_rule_action, 215 .match = ipmr_rule_match, 216 .configure = ipmr_rule_configure, 217 .compare = ipmr_rule_compare, 218 .fill = ipmr_rule_fill, 219 .nlgroup = RTNLGRP_IPV4_RULE, 220 .policy = ipmr_rule_policy, 221 .owner = THIS_MODULE, 222 }; 223 224 static int __net_init ipmr_rules_init(struct net *net) 225 { 226 struct fib_rules_ops *ops; 227 struct mr_table *mrt; 228 int err; 229 230 ops = fib_rules_register(&ipmr_rules_ops_template, net); 231 if (IS_ERR(ops)) 232 return PTR_ERR(ops); 233 234 INIT_LIST_HEAD(&net->ipv4.mr_tables); 235 236 mrt = ipmr_new_table(net, RT_TABLE_DEFAULT); 237 if (IS_ERR(mrt)) { 238 err = PTR_ERR(mrt); 239 goto err1; 240 } 241 242 err = fib_default_rule_add(ops, 0x7fff, RT_TABLE_DEFAULT, 0); 243 if (err < 0) 244 goto err2; 245 246 net->ipv4.mr_rules_ops = ops; 247 return 0; 248 249 err2: 250 ipmr_free_table(mrt); 251 err1: 252 fib_rules_unregister(ops); 253 return err; 254 } 255 256 static void __net_exit ipmr_rules_exit(struct net *net) 257 { 258 struct mr_table *mrt, *next; 259 260 rtnl_lock(); 261 list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) { 262 list_del(&mrt->list); 263 ipmr_free_table(mrt); 264 } 265 fib_rules_unregister(net->ipv4.mr_rules_ops); 266 rtnl_unlock(); 267 } 268 269 static int ipmr_rules_dump(struct net *net, struct notifier_block *nb) 270 { 271 return fib_rules_dump(net, nb, RTNL_FAMILY_IPMR); 272 } 273 274 static unsigned int ipmr_rules_seq_read(struct net *net) 275 { 276 return fib_rules_seq_read(net, RTNL_FAMILY_IPMR); 277 } 278 279 bool ipmr_rule_default(const struct fib_rule *rule) 280 { 281 return fib_rule_matchall(rule) && rule->table == RT_TABLE_DEFAULT; 282 } 283 EXPORT_SYMBOL(ipmr_rule_default); 284 #else 285 #define ipmr_for_each_table(mrt, net) \ 286 for (mrt = net->ipv4.mrt; mrt; mrt = NULL) 287 288 static struct mr_table *ipmr_get_table(struct net *net, u32 id) 289 { 290 return net->ipv4.mrt; 291 } 292 293 static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4, 294 struct mr_table **mrt) 295 { 296 *mrt = net->ipv4.mrt; 297 return 0; 298 } 299 300 static int __net_init ipmr_rules_init(struct net *net) 301 { 302 struct mr_table *mrt; 303 304 mrt = ipmr_new_table(net, RT_TABLE_DEFAULT); 305 if (IS_ERR(mrt)) 306 return PTR_ERR(mrt); 307 net->ipv4.mrt = mrt; 308 return 0; 309 } 310 311 static void __net_exit ipmr_rules_exit(struct net *net) 312 { 313 rtnl_lock(); 314 ipmr_free_table(net->ipv4.mrt); 315 net->ipv4.mrt = NULL; 316 rtnl_unlock(); 317 } 318 319 static int ipmr_rules_dump(struct net *net, struct notifier_block *nb) 320 { 321 return 0; 322 } 323 324 static unsigned int ipmr_rules_seq_read(struct net *net) 325 { 326 return 0; 327 } 328 329 bool ipmr_rule_default(const struct fib_rule *rule) 330 { 331 return true; 332 } 333 EXPORT_SYMBOL(ipmr_rule_default); 334 #endif 335 336 static inline int ipmr_hash_cmp(struct rhashtable_compare_arg *arg, 337 const void *ptr) 338 { 339 const struct mfc_cache_cmp_arg *cmparg = arg->key; 340 struct mfc_cache *c = (struct mfc_cache *)ptr; 341 342 return cmparg->mfc_mcastgrp != c->mfc_mcastgrp || 343 cmparg->mfc_origin != c->mfc_origin; 344 } 345 346 static const struct rhashtable_params ipmr_rht_params = { 347 .head_offset = offsetof(struct mfc_cache, mnode), 348 .key_offset = offsetof(struct mfc_cache, cmparg), 349 .key_len = sizeof(struct mfc_cache_cmp_arg), 350 .nelem_hint = 3, 351 .locks_mul = 1, 352 .obj_cmpfn = ipmr_hash_cmp, 353 .automatic_shrinking = true, 354 }; 355 356 static struct mr_table *ipmr_new_table(struct net *net, u32 id) 357 { 358 struct mr_table *mrt; 359 360 /* "pimreg%u" should not exceed 16 bytes (IFNAMSIZ) */ 361 if (id != RT_TABLE_DEFAULT && id >= 1000000000) 362 return ERR_PTR(-EINVAL); 363 364 mrt = ipmr_get_table(net, id); 365 if (mrt) 366 return mrt; 367 368 mrt = kzalloc(sizeof(*mrt), GFP_KERNEL); 369 if (!mrt) 370 return ERR_PTR(-ENOMEM); 371 write_pnet(&mrt->net, net); 372 mrt->id = id; 373 374 rhltable_init(&mrt->mfc_hash, &ipmr_rht_params); 375 INIT_LIST_HEAD(&mrt->mfc_cache_list); 376 INIT_LIST_HEAD(&mrt->mfc_unres_queue); 377 378 setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process, 379 (unsigned long)mrt); 380 381 mrt->mroute_reg_vif_num = -1; 382 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES 383 list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables); 384 #endif 385 return mrt; 386 } 387 388 static void ipmr_free_table(struct mr_table *mrt) 389 { 390 del_timer_sync(&mrt->ipmr_expire_timer); 391 mroute_clean_tables(mrt, true); 392 rhltable_destroy(&mrt->mfc_hash); 393 kfree(mrt); 394 } 395 396 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */ 397 398 static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v) 399 { 400 struct net *net = dev_net(dev); 401 402 dev_close(dev); 403 404 dev = __dev_get_by_name(net, "tunl0"); 405 if (dev) { 406 const struct net_device_ops *ops = dev->netdev_ops; 407 struct ifreq ifr; 408 struct ip_tunnel_parm p; 409 410 memset(&p, 0, sizeof(p)); 411 p.iph.daddr = v->vifc_rmt_addr.s_addr; 412 p.iph.saddr = v->vifc_lcl_addr.s_addr; 413 p.iph.version = 4; 414 p.iph.ihl = 5; 415 p.iph.protocol = IPPROTO_IPIP; 416 sprintf(p.name, "dvmrp%d", v->vifc_vifi); 417 ifr.ifr_ifru.ifru_data = (__force void __user *)&p; 418 419 if (ops->ndo_do_ioctl) { 420 mm_segment_t oldfs = get_fs(); 421 422 set_fs(KERNEL_DS); 423 ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL); 424 set_fs(oldfs); 425 } 426 } 427 } 428 429 /* Initialize ipmr pimreg/tunnel in_device */ 430 static bool ipmr_init_vif_indev(const struct net_device *dev) 431 { 432 struct in_device *in_dev; 433 434 ASSERT_RTNL(); 435 436 in_dev = __in_dev_get_rtnl(dev); 437 if (!in_dev) 438 return false; 439 ipv4_devconf_setall(in_dev); 440 neigh_parms_data_state_setall(in_dev->arp_parms); 441 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0; 442 443 return true; 444 } 445 446 static struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v) 447 { 448 struct net_device *dev; 449 450 dev = __dev_get_by_name(net, "tunl0"); 451 452 if (dev) { 453 const struct net_device_ops *ops = dev->netdev_ops; 454 int err; 455 struct ifreq ifr; 456 struct ip_tunnel_parm p; 457 458 memset(&p, 0, sizeof(p)); 459 p.iph.daddr = v->vifc_rmt_addr.s_addr; 460 p.iph.saddr = v->vifc_lcl_addr.s_addr; 461 p.iph.version = 4; 462 p.iph.ihl = 5; 463 p.iph.protocol = IPPROTO_IPIP; 464 sprintf(p.name, "dvmrp%d", v->vifc_vifi); 465 ifr.ifr_ifru.ifru_data = (__force void __user *)&p; 466 467 if (ops->ndo_do_ioctl) { 468 mm_segment_t oldfs = get_fs(); 469 470 set_fs(KERNEL_DS); 471 err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL); 472 set_fs(oldfs); 473 } else { 474 err = -EOPNOTSUPP; 475 } 476 dev = NULL; 477 478 if (err == 0 && 479 (dev = __dev_get_by_name(net, p.name)) != NULL) { 480 dev->flags |= IFF_MULTICAST; 481 if (!ipmr_init_vif_indev(dev)) 482 goto failure; 483 if (dev_open(dev)) 484 goto failure; 485 dev_hold(dev); 486 } 487 } 488 return dev; 489 490 failure: 491 unregister_netdevice(dev); 492 return NULL; 493 } 494 495 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) 496 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) 497 { 498 struct net *net = dev_net(dev); 499 struct mr_table *mrt; 500 struct flowi4 fl4 = { 501 .flowi4_oif = dev->ifindex, 502 .flowi4_iif = skb->skb_iif ? : LOOPBACK_IFINDEX, 503 .flowi4_mark = skb->mark, 504 }; 505 int err; 506 507 err = ipmr_fib_lookup(net, &fl4, &mrt); 508 if (err < 0) { 509 kfree_skb(skb); 510 return err; 511 } 512 513 read_lock(&mrt_lock); 514 dev->stats.tx_bytes += skb->len; 515 dev->stats.tx_packets++; 516 ipmr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, IGMPMSG_WHOLEPKT); 517 read_unlock(&mrt_lock); 518 kfree_skb(skb); 519 return NETDEV_TX_OK; 520 } 521 522 static int reg_vif_get_iflink(const struct net_device *dev) 523 { 524 return 0; 525 } 526 527 static const struct net_device_ops reg_vif_netdev_ops = { 528 .ndo_start_xmit = reg_vif_xmit, 529 .ndo_get_iflink = reg_vif_get_iflink, 530 }; 531 532 static void reg_vif_setup(struct net_device *dev) 533 { 534 dev->type = ARPHRD_PIMREG; 535 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8; 536 dev->flags = IFF_NOARP; 537 dev->netdev_ops = ®_vif_netdev_ops; 538 dev->needs_free_netdev = true; 539 dev->features |= NETIF_F_NETNS_LOCAL; 540 } 541 542 static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt) 543 { 544 struct net_device *dev; 545 char name[IFNAMSIZ]; 546 547 if (mrt->id == RT_TABLE_DEFAULT) 548 sprintf(name, "pimreg"); 549 else 550 sprintf(name, "pimreg%u", mrt->id); 551 552 dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup); 553 554 if (!dev) 555 return NULL; 556 557 dev_net_set(dev, net); 558 559 if (register_netdevice(dev)) { 560 free_netdev(dev); 561 return NULL; 562 } 563 564 if (!ipmr_init_vif_indev(dev)) 565 goto failure; 566 if (dev_open(dev)) 567 goto failure; 568 569 dev_hold(dev); 570 571 return dev; 572 573 failure: 574 unregister_netdevice(dev); 575 return NULL; 576 } 577 578 /* called with rcu_read_lock() */ 579 static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb, 580 unsigned int pimlen) 581 { 582 struct net_device *reg_dev = NULL; 583 struct iphdr *encap; 584 585 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen); 586 /* Check that: 587 * a. packet is really sent to a multicast group 588 * b. packet is not a NULL-REGISTER 589 * c. packet is not truncated 590 */ 591 if (!ipv4_is_multicast(encap->daddr) || 592 encap->tot_len == 0 || 593 ntohs(encap->tot_len) + pimlen > skb->len) 594 return 1; 595 596 read_lock(&mrt_lock); 597 if (mrt->mroute_reg_vif_num >= 0) 598 reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev; 599 read_unlock(&mrt_lock); 600 601 if (!reg_dev) 602 return 1; 603 604 skb->mac_header = skb->network_header; 605 skb_pull(skb, (u8 *)encap - skb->data); 606 skb_reset_network_header(skb); 607 skb->protocol = htons(ETH_P_IP); 608 skb->ip_summed = CHECKSUM_NONE; 609 610 skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev)); 611 612 netif_rx(skb); 613 614 return NET_RX_SUCCESS; 615 } 616 #else 617 static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt) 618 { 619 return NULL; 620 } 621 #endif 622 623 static int call_ipmr_vif_entry_notifier(struct notifier_block *nb, 624 struct net *net, 625 enum fib_event_type event_type, 626 struct vif_device *vif, 627 vifi_t vif_index, u32 tb_id) 628 { 629 struct vif_entry_notifier_info info = { 630 .info = { 631 .family = RTNL_FAMILY_IPMR, 632 .net = net, 633 }, 634 .dev = vif->dev, 635 .vif_index = vif_index, 636 .vif_flags = vif->flags, 637 .tb_id = tb_id, 638 }; 639 640 return call_fib_notifier(nb, net, event_type, &info.info); 641 } 642 643 static int call_ipmr_vif_entry_notifiers(struct net *net, 644 enum fib_event_type event_type, 645 struct vif_device *vif, 646 vifi_t vif_index, u32 tb_id) 647 { 648 struct vif_entry_notifier_info info = { 649 .info = { 650 .family = RTNL_FAMILY_IPMR, 651 .net = net, 652 }, 653 .dev = vif->dev, 654 .vif_index = vif_index, 655 .vif_flags = vif->flags, 656 .tb_id = tb_id, 657 }; 658 659 ASSERT_RTNL(); 660 net->ipv4.ipmr_seq++; 661 return call_fib_notifiers(net, event_type, &info.info); 662 } 663 664 static int call_ipmr_mfc_entry_notifier(struct notifier_block *nb, 665 struct net *net, 666 enum fib_event_type event_type, 667 struct mfc_cache *mfc, u32 tb_id) 668 { 669 struct mfc_entry_notifier_info info = { 670 .info = { 671 .family = RTNL_FAMILY_IPMR, 672 .net = net, 673 }, 674 .mfc = mfc, 675 .tb_id = tb_id 676 }; 677 678 return call_fib_notifier(nb, net, event_type, &info.info); 679 } 680 681 static int call_ipmr_mfc_entry_notifiers(struct net *net, 682 enum fib_event_type event_type, 683 struct mfc_cache *mfc, u32 tb_id) 684 { 685 struct mfc_entry_notifier_info info = { 686 .info = { 687 .family = RTNL_FAMILY_IPMR, 688 .net = net, 689 }, 690 .mfc = mfc, 691 .tb_id = tb_id 692 }; 693 694 ASSERT_RTNL(); 695 net->ipv4.ipmr_seq++; 696 return call_fib_notifiers(net, event_type, &info.info); 697 } 698 699 /** 700 * vif_delete - Delete a VIF entry 701 * @notify: Set to 1, if the caller is a notifier_call 702 */ 703 static int vif_delete(struct mr_table *mrt, int vifi, int notify, 704 struct list_head *head) 705 { 706 struct net *net = read_pnet(&mrt->net); 707 struct vif_device *v; 708 struct net_device *dev; 709 struct in_device *in_dev; 710 711 if (vifi < 0 || vifi >= mrt->maxvif) 712 return -EADDRNOTAVAIL; 713 714 v = &mrt->vif_table[vifi]; 715 716 if (VIF_EXISTS(mrt, vifi)) 717 call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_DEL, v, vifi, 718 mrt->id); 719 720 write_lock_bh(&mrt_lock); 721 dev = v->dev; 722 v->dev = NULL; 723 724 if (!dev) { 725 write_unlock_bh(&mrt_lock); 726 return -EADDRNOTAVAIL; 727 } 728 729 if (vifi == mrt->mroute_reg_vif_num) 730 mrt->mroute_reg_vif_num = -1; 731 732 if (vifi + 1 == mrt->maxvif) { 733 int tmp; 734 735 for (tmp = vifi - 1; tmp >= 0; tmp--) { 736 if (VIF_EXISTS(mrt, tmp)) 737 break; 738 } 739 mrt->maxvif = tmp+1; 740 } 741 742 write_unlock_bh(&mrt_lock); 743 744 dev_set_allmulti(dev, -1); 745 746 in_dev = __in_dev_get_rtnl(dev); 747 if (in_dev) { 748 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--; 749 inet_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, 750 NETCONFA_MC_FORWARDING, 751 dev->ifindex, &in_dev->cnf); 752 ip_rt_multicast_event(in_dev); 753 } 754 755 if (v->flags & (VIFF_TUNNEL | VIFF_REGISTER) && !notify) 756 unregister_netdevice_queue(dev, head); 757 758 dev_put(dev); 759 return 0; 760 } 761 762 static void ipmr_cache_free_rcu(struct rcu_head *head) 763 { 764 struct mfc_cache *c = container_of(head, struct mfc_cache, rcu); 765 766 kmem_cache_free(mrt_cachep, c); 767 } 768 769 void ipmr_cache_free(struct mfc_cache *c) 770 { 771 call_rcu(&c->rcu, ipmr_cache_free_rcu); 772 } 773 EXPORT_SYMBOL(ipmr_cache_free); 774 775 /* Destroy an unresolved cache entry, killing queued skbs 776 * and reporting error to netlink readers. 777 */ 778 static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c) 779 { 780 struct net *net = read_pnet(&mrt->net); 781 struct sk_buff *skb; 782 struct nlmsgerr *e; 783 784 atomic_dec(&mrt->cache_resolve_queue_len); 785 786 while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) { 787 if (ip_hdr(skb)->version == 0) { 788 struct nlmsghdr *nlh = skb_pull(skb, 789 sizeof(struct iphdr)); 790 nlh->nlmsg_type = NLMSG_ERROR; 791 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr)); 792 skb_trim(skb, nlh->nlmsg_len); 793 e = nlmsg_data(nlh); 794 e->error = -ETIMEDOUT; 795 memset(&e->msg, 0, sizeof(e->msg)); 796 797 rtnl_unicast(skb, net, NETLINK_CB(skb).portid); 798 } else { 799 kfree_skb(skb); 800 } 801 } 802 803 ipmr_cache_free(c); 804 } 805 806 /* Timer process for the unresolved queue. */ 807 static void ipmr_expire_process(unsigned long arg) 808 { 809 struct mr_table *mrt = (struct mr_table *)arg; 810 unsigned long now; 811 unsigned long expires; 812 struct mfc_cache *c, *next; 813 814 if (!spin_trylock(&mfc_unres_lock)) { 815 mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10); 816 return; 817 } 818 819 if (list_empty(&mrt->mfc_unres_queue)) 820 goto out; 821 822 now = jiffies; 823 expires = 10*HZ; 824 825 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) { 826 if (time_after(c->mfc_un.unres.expires, now)) { 827 unsigned long interval = c->mfc_un.unres.expires - now; 828 if (interval < expires) 829 expires = interval; 830 continue; 831 } 832 833 list_del(&c->list); 834 mroute_netlink_event(mrt, c, RTM_DELROUTE); 835 ipmr_destroy_unres(mrt, c); 836 } 837 838 if (!list_empty(&mrt->mfc_unres_queue)) 839 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires); 840 841 out: 842 spin_unlock(&mfc_unres_lock); 843 } 844 845 /* Fill oifs list. It is called under write locked mrt_lock. */ 846 static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache, 847 unsigned char *ttls) 848 { 849 int vifi; 850 851 cache->mfc_un.res.minvif = MAXVIFS; 852 cache->mfc_un.res.maxvif = 0; 853 memset(cache->mfc_un.res.ttls, 255, MAXVIFS); 854 855 for (vifi = 0; vifi < mrt->maxvif; vifi++) { 856 if (VIF_EXISTS(mrt, vifi) && 857 ttls[vifi] && ttls[vifi] < 255) { 858 cache->mfc_un.res.ttls[vifi] = ttls[vifi]; 859 if (cache->mfc_un.res.minvif > vifi) 860 cache->mfc_un.res.minvif = vifi; 861 if (cache->mfc_un.res.maxvif <= vifi) 862 cache->mfc_un.res.maxvif = vifi + 1; 863 } 864 } 865 cache->mfc_un.res.lastuse = jiffies; 866 } 867 868 static int vif_add(struct net *net, struct mr_table *mrt, 869 struct vifctl *vifc, int mrtsock) 870 { 871 int vifi = vifc->vifc_vifi; 872 struct switchdev_attr attr = { 873 .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, 874 }; 875 struct vif_device *v = &mrt->vif_table[vifi]; 876 struct net_device *dev; 877 struct in_device *in_dev; 878 int err; 879 880 /* Is vif busy ? */ 881 if (VIF_EXISTS(mrt, vifi)) 882 return -EADDRINUSE; 883 884 switch (vifc->vifc_flags) { 885 case VIFF_REGISTER: 886 if (!ipmr_pimsm_enabled()) 887 return -EINVAL; 888 /* Special Purpose VIF in PIM 889 * All the packets will be sent to the daemon 890 */ 891 if (mrt->mroute_reg_vif_num >= 0) 892 return -EADDRINUSE; 893 dev = ipmr_reg_vif(net, mrt); 894 if (!dev) 895 return -ENOBUFS; 896 err = dev_set_allmulti(dev, 1); 897 if (err) { 898 unregister_netdevice(dev); 899 dev_put(dev); 900 return err; 901 } 902 break; 903 case VIFF_TUNNEL: 904 dev = ipmr_new_tunnel(net, vifc); 905 if (!dev) 906 return -ENOBUFS; 907 err = dev_set_allmulti(dev, 1); 908 if (err) { 909 ipmr_del_tunnel(dev, vifc); 910 dev_put(dev); 911 return err; 912 } 913 break; 914 case VIFF_USE_IFINDEX: 915 case 0: 916 if (vifc->vifc_flags == VIFF_USE_IFINDEX) { 917 dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex); 918 if (dev && !__in_dev_get_rtnl(dev)) { 919 dev_put(dev); 920 return -EADDRNOTAVAIL; 921 } 922 } else { 923 dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr); 924 } 925 if (!dev) 926 return -EADDRNOTAVAIL; 927 err = dev_set_allmulti(dev, 1); 928 if (err) { 929 dev_put(dev); 930 return err; 931 } 932 break; 933 default: 934 return -EINVAL; 935 } 936 937 in_dev = __in_dev_get_rtnl(dev); 938 if (!in_dev) { 939 dev_put(dev); 940 return -EADDRNOTAVAIL; 941 } 942 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++; 943 inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_MC_FORWARDING, 944 dev->ifindex, &in_dev->cnf); 945 ip_rt_multicast_event(in_dev); 946 947 /* Fill in the VIF structures */ 948 949 attr.orig_dev = dev; 950 if (!switchdev_port_attr_get(dev, &attr)) { 951 memcpy(v->dev_parent_id.id, attr.u.ppid.id, attr.u.ppid.id_len); 952 v->dev_parent_id.id_len = attr.u.ppid.id_len; 953 } else { 954 v->dev_parent_id.id_len = 0; 955 } 956 v->rate_limit = vifc->vifc_rate_limit; 957 v->local = vifc->vifc_lcl_addr.s_addr; 958 v->remote = vifc->vifc_rmt_addr.s_addr; 959 v->flags = vifc->vifc_flags; 960 if (!mrtsock) 961 v->flags |= VIFF_STATIC; 962 v->threshold = vifc->vifc_threshold; 963 v->bytes_in = 0; 964 v->bytes_out = 0; 965 v->pkt_in = 0; 966 v->pkt_out = 0; 967 v->link = dev->ifindex; 968 if (v->flags & (VIFF_TUNNEL | VIFF_REGISTER)) 969 v->link = dev_get_iflink(dev); 970 971 /* And finish update writing critical data */ 972 write_lock_bh(&mrt_lock); 973 v->dev = dev; 974 if (v->flags & VIFF_REGISTER) 975 mrt->mroute_reg_vif_num = vifi; 976 if (vifi+1 > mrt->maxvif) 977 mrt->maxvif = vifi+1; 978 write_unlock_bh(&mrt_lock); 979 call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD, v, vifi, mrt->id); 980 return 0; 981 } 982 983 /* called with rcu_read_lock() */ 984 static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt, 985 __be32 origin, 986 __be32 mcastgrp) 987 { 988 struct mfc_cache_cmp_arg arg = { 989 .mfc_mcastgrp = mcastgrp, 990 .mfc_origin = origin 991 }; 992 struct rhlist_head *tmp, *list; 993 struct mfc_cache *c; 994 995 list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params); 996 rhl_for_each_entry_rcu(c, tmp, list, mnode) 997 return c; 998 999 return NULL; 1000 } 1001 1002 /* Look for a (*,*,oif) entry */ 1003 static struct mfc_cache *ipmr_cache_find_any_parent(struct mr_table *mrt, 1004 int vifi) 1005 { 1006 struct mfc_cache_cmp_arg arg = { 1007 .mfc_mcastgrp = htonl(INADDR_ANY), 1008 .mfc_origin = htonl(INADDR_ANY) 1009 }; 1010 struct rhlist_head *tmp, *list; 1011 struct mfc_cache *c; 1012 1013 list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params); 1014 rhl_for_each_entry_rcu(c, tmp, list, mnode) 1015 if (c->mfc_un.res.ttls[vifi] < 255) 1016 return c; 1017 1018 return NULL; 1019 } 1020 1021 /* Look for a (*,G) entry */ 1022 static struct mfc_cache *ipmr_cache_find_any(struct mr_table *mrt, 1023 __be32 mcastgrp, int vifi) 1024 { 1025 struct mfc_cache_cmp_arg arg = { 1026 .mfc_mcastgrp = mcastgrp, 1027 .mfc_origin = htonl(INADDR_ANY) 1028 }; 1029 struct rhlist_head *tmp, *list; 1030 struct mfc_cache *c, *proxy; 1031 1032 if (mcastgrp == htonl(INADDR_ANY)) 1033 goto skip; 1034 1035 list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params); 1036 rhl_for_each_entry_rcu(c, tmp, list, mnode) { 1037 if (c->mfc_un.res.ttls[vifi] < 255) 1038 return c; 1039 1040 /* It's ok if the vifi is part of the static tree */ 1041 proxy = ipmr_cache_find_any_parent(mrt, c->mfc_parent); 1042 if (proxy && proxy->mfc_un.res.ttls[vifi] < 255) 1043 return c; 1044 } 1045 1046 skip: 1047 return ipmr_cache_find_any_parent(mrt, vifi); 1048 } 1049 1050 /* Look for a (S,G,iif) entry if parent != -1 */ 1051 static struct mfc_cache *ipmr_cache_find_parent(struct mr_table *mrt, 1052 __be32 origin, __be32 mcastgrp, 1053 int parent) 1054 { 1055 struct mfc_cache_cmp_arg arg = { 1056 .mfc_mcastgrp = mcastgrp, 1057 .mfc_origin = origin, 1058 }; 1059 struct rhlist_head *tmp, *list; 1060 struct mfc_cache *c; 1061 1062 list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params); 1063 rhl_for_each_entry_rcu(c, tmp, list, mnode) 1064 if (parent == -1 || parent == c->mfc_parent) 1065 return c; 1066 1067 return NULL; 1068 } 1069 1070 /* Allocate a multicast cache entry */ 1071 static struct mfc_cache *ipmr_cache_alloc(void) 1072 { 1073 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); 1074 1075 if (c) { 1076 c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1; 1077 c->mfc_un.res.minvif = MAXVIFS; 1078 refcount_set(&c->mfc_un.res.refcount, 1); 1079 } 1080 return c; 1081 } 1082 1083 static struct mfc_cache *ipmr_cache_alloc_unres(void) 1084 { 1085 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); 1086 1087 if (c) { 1088 skb_queue_head_init(&c->mfc_un.unres.unresolved); 1089 c->mfc_un.unres.expires = jiffies + 10*HZ; 1090 } 1091 return c; 1092 } 1093 1094 /* A cache entry has gone into a resolved state from queued */ 1095 static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt, 1096 struct mfc_cache *uc, struct mfc_cache *c) 1097 { 1098 struct sk_buff *skb; 1099 struct nlmsgerr *e; 1100 1101 /* Play the pending entries through our router */ 1102 while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) { 1103 if (ip_hdr(skb)->version == 0) { 1104 struct nlmsghdr *nlh = skb_pull(skb, 1105 sizeof(struct iphdr)); 1106 1107 if (__ipmr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) { 1108 nlh->nlmsg_len = skb_tail_pointer(skb) - 1109 (u8 *)nlh; 1110 } else { 1111 nlh->nlmsg_type = NLMSG_ERROR; 1112 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr)); 1113 skb_trim(skb, nlh->nlmsg_len); 1114 e = nlmsg_data(nlh); 1115 e->error = -EMSGSIZE; 1116 memset(&e->msg, 0, sizeof(e->msg)); 1117 } 1118 1119 rtnl_unicast(skb, net, NETLINK_CB(skb).portid); 1120 } else { 1121 ip_mr_forward(net, mrt, skb->dev, skb, c, 0); 1122 } 1123 } 1124 } 1125 1126 /* Bounce a cache query up to mrouted and netlink. 1127 * 1128 * Called under mrt_lock. 1129 */ 1130 static int ipmr_cache_report(struct mr_table *mrt, 1131 struct sk_buff *pkt, vifi_t vifi, int assert) 1132 { 1133 const int ihl = ip_hdrlen(pkt); 1134 struct sock *mroute_sk; 1135 struct igmphdr *igmp; 1136 struct igmpmsg *msg; 1137 struct sk_buff *skb; 1138 int ret; 1139 1140 if (assert == IGMPMSG_WHOLEPKT) 1141 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr)); 1142 else 1143 skb = alloc_skb(128, GFP_ATOMIC); 1144 1145 if (!skb) 1146 return -ENOBUFS; 1147 1148 if (assert == IGMPMSG_WHOLEPKT) { 1149 /* Ugly, but we have no choice with this interface. 1150 * Duplicate old header, fix ihl, length etc. 1151 * And all this only to mangle msg->im_msgtype and 1152 * to set msg->im_mbz to "mbz" :-) 1153 */ 1154 skb_push(skb, sizeof(struct iphdr)); 1155 skb_reset_network_header(skb); 1156 skb_reset_transport_header(skb); 1157 msg = (struct igmpmsg *)skb_network_header(skb); 1158 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr)); 1159 msg->im_msgtype = IGMPMSG_WHOLEPKT; 1160 msg->im_mbz = 0; 1161 msg->im_vif = mrt->mroute_reg_vif_num; 1162 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2; 1163 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) + 1164 sizeof(struct iphdr)); 1165 } else { 1166 /* Copy the IP header */ 1167 skb_set_network_header(skb, skb->len); 1168 skb_put(skb, ihl); 1169 skb_copy_to_linear_data(skb, pkt->data, ihl); 1170 /* Flag to the kernel this is a route add */ 1171 ip_hdr(skb)->protocol = 0; 1172 msg = (struct igmpmsg *)skb_network_header(skb); 1173 msg->im_vif = vifi; 1174 skb_dst_set(skb, dst_clone(skb_dst(pkt))); 1175 /* Add our header */ 1176 igmp = skb_put(skb, sizeof(struct igmphdr)); 1177 igmp->type = assert; 1178 msg->im_msgtype = assert; 1179 igmp->code = 0; 1180 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */ 1181 skb->transport_header = skb->network_header; 1182 } 1183 1184 rcu_read_lock(); 1185 mroute_sk = rcu_dereference(mrt->mroute_sk); 1186 if (!mroute_sk) { 1187 rcu_read_unlock(); 1188 kfree_skb(skb); 1189 return -EINVAL; 1190 } 1191 1192 igmpmsg_netlink_event(mrt, skb); 1193 1194 /* Deliver to mrouted */ 1195 ret = sock_queue_rcv_skb(mroute_sk, skb); 1196 rcu_read_unlock(); 1197 if (ret < 0) { 1198 net_warn_ratelimited("mroute: pending queue full, dropping entries\n"); 1199 kfree_skb(skb); 1200 } 1201 1202 return ret; 1203 } 1204 1205 /* Queue a packet for resolution. It gets locked cache entry! */ 1206 static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, 1207 struct sk_buff *skb, struct net_device *dev) 1208 { 1209 const struct iphdr *iph = ip_hdr(skb); 1210 struct mfc_cache *c; 1211 bool found = false; 1212 int err; 1213 1214 spin_lock_bh(&mfc_unres_lock); 1215 list_for_each_entry(c, &mrt->mfc_unres_queue, list) { 1216 if (c->mfc_mcastgrp == iph->daddr && 1217 c->mfc_origin == iph->saddr) { 1218 found = true; 1219 break; 1220 } 1221 } 1222 1223 if (!found) { 1224 /* Create a new entry if allowable */ 1225 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 || 1226 (c = ipmr_cache_alloc_unres()) == NULL) { 1227 spin_unlock_bh(&mfc_unres_lock); 1228 1229 kfree_skb(skb); 1230 return -ENOBUFS; 1231 } 1232 1233 /* Fill in the new cache entry */ 1234 c->mfc_parent = -1; 1235 c->mfc_origin = iph->saddr; 1236 c->mfc_mcastgrp = iph->daddr; 1237 1238 /* Reflect first query at mrouted. */ 1239 err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE); 1240 if (err < 0) { 1241 /* If the report failed throw the cache entry 1242 out - Brad Parker 1243 */ 1244 spin_unlock_bh(&mfc_unres_lock); 1245 1246 ipmr_cache_free(c); 1247 kfree_skb(skb); 1248 return err; 1249 } 1250 1251 atomic_inc(&mrt->cache_resolve_queue_len); 1252 list_add(&c->list, &mrt->mfc_unres_queue); 1253 mroute_netlink_event(mrt, c, RTM_NEWROUTE); 1254 1255 if (atomic_read(&mrt->cache_resolve_queue_len) == 1) 1256 mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires); 1257 } 1258 1259 /* See if we can append the packet */ 1260 if (c->mfc_un.unres.unresolved.qlen > 3) { 1261 kfree_skb(skb); 1262 err = -ENOBUFS; 1263 } else { 1264 if (dev) { 1265 skb->dev = dev; 1266 skb->skb_iif = dev->ifindex; 1267 } 1268 skb_queue_tail(&c->mfc_un.unres.unresolved, skb); 1269 err = 0; 1270 } 1271 1272 spin_unlock_bh(&mfc_unres_lock); 1273 return err; 1274 } 1275 1276 /* MFC cache manipulation by user space mroute daemon */ 1277 1278 static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent) 1279 { 1280 struct net *net = read_pnet(&mrt->net); 1281 struct mfc_cache *c; 1282 1283 /* The entries are added/deleted only under RTNL */ 1284 rcu_read_lock(); 1285 c = ipmr_cache_find_parent(mrt, mfc->mfcc_origin.s_addr, 1286 mfc->mfcc_mcastgrp.s_addr, parent); 1287 rcu_read_unlock(); 1288 if (!c) 1289 return -ENOENT; 1290 rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params); 1291 list_del_rcu(&c->list); 1292 call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c, mrt->id); 1293 mroute_netlink_event(mrt, c, RTM_DELROUTE); 1294 ipmr_cache_put(c); 1295 1296 return 0; 1297 } 1298 1299 static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, 1300 struct mfcctl *mfc, int mrtsock, int parent) 1301 { 1302 struct mfc_cache *uc, *c; 1303 bool found; 1304 int ret; 1305 1306 if (mfc->mfcc_parent >= MAXVIFS) 1307 return -ENFILE; 1308 1309 /* The entries are added/deleted only under RTNL */ 1310 rcu_read_lock(); 1311 c = ipmr_cache_find_parent(mrt, mfc->mfcc_origin.s_addr, 1312 mfc->mfcc_mcastgrp.s_addr, parent); 1313 rcu_read_unlock(); 1314 if (c) { 1315 write_lock_bh(&mrt_lock); 1316 c->mfc_parent = mfc->mfcc_parent; 1317 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls); 1318 if (!mrtsock) 1319 c->mfc_flags |= MFC_STATIC; 1320 write_unlock_bh(&mrt_lock); 1321 call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, c, 1322 mrt->id); 1323 mroute_netlink_event(mrt, c, RTM_NEWROUTE); 1324 return 0; 1325 } 1326 1327 if (mfc->mfcc_mcastgrp.s_addr != htonl(INADDR_ANY) && 1328 !ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr)) 1329 return -EINVAL; 1330 1331 c = ipmr_cache_alloc(); 1332 if (!c) 1333 return -ENOMEM; 1334 1335 c->mfc_origin = mfc->mfcc_origin.s_addr; 1336 c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr; 1337 c->mfc_parent = mfc->mfcc_parent; 1338 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls); 1339 if (!mrtsock) 1340 c->mfc_flags |= MFC_STATIC; 1341 1342 ret = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->mnode, 1343 ipmr_rht_params); 1344 if (ret) { 1345 pr_err("ipmr: rhtable insert error %d\n", ret); 1346 ipmr_cache_free(c); 1347 return ret; 1348 } 1349 list_add_tail_rcu(&c->list, &mrt->mfc_cache_list); 1350 /* Check to see if we resolved a queued list. If so we 1351 * need to send on the frames and tidy up. 1352 */ 1353 found = false; 1354 spin_lock_bh(&mfc_unres_lock); 1355 list_for_each_entry(uc, &mrt->mfc_unres_queue, list) { 1356 if (uc->mfc_origin == c->mfc_origin && 1357 uc->mfc_mcastgrp == c->mfc_mcastgrp) { 1358 list_del(&uc->list); 1359 atomic_dec(&mrt->cache_resolve_queue_len); 1360 found = true; 1361 break; 1362 } 1363 } 1364 if (list_empty(&mrt->mfc_unres_queue)) 1365 del_timer(&mrt->ipmr_expire_timer); 1366 spin_unlock_bh(&mfc_unres_lock); 1367 1368 if (found) { 1369 ipmr_cache_resolve(net, mrt, uc, c); 1370 ipmr_cache_free(uc); 1371 } 1372 call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD, c, mrt->id); 1373 mroute_netlink_event(mrt, c, RTM_NEWROUTE); 1374 return 0; 1375 } 1376 1377 /* Close the multicast socket, and clear the vif tables etc */ 1378 static void mroute_clean_tables(struct mr_table *mrt, bool all) 1379 { 1380 struct net *net = read_pnet(&mrt->net); 1381 struct mfc_cache *c, *tmp; 1382 LIST_HEAD(list); 1383 int i; 1384 1385 /* Shut down all active vif entries */ 1386 for (i = 0; i < mrt->maxvif; i++) { 1387 if (!all && (mrt->vif_table[i].flags & VIFF_STATIC)) 1388 continue; 1389 vif_delete(mrt, i, 0, &list); 1390 } 1391 unregister_netdevice_many(&list); 1392 1393 /* Wipe the cache */ 1394 list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) { 1395 if (!all && (c->mfc_flags & MFC_STATIC)) 1396 continue; 1397 rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params); 1398 list_del_rcu(&c->list); 1399 call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c, 1400 mrt->id); 1401 mroute_netlink_event(mrt, c, RTM_DELROUTE); 1402 ipmr_cache_put(c); 1403 } 1404 1405 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) { 1406 spin_lock_bh(&mfc_unres_lock); 1407 list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) { 1408 list_del(&c->list); 1409 mroute_netlink_event(mrt, c, RTM_DELROUTE); 1410 ipmr_destroy_unres(mrt, c); 1411 } 1412 spin_unlock_bh(&mfc_unres_lock); 1413 } 1414 } 1415 1416 /* called from ip_ra_control(), before an RCU grace period, 1417 * we dont need to call synchronize_rcu() here 1418 */ 1419 static void mrtsock_destruct(struct sock *sk) 1420 { 1421 struct net *net = sock_net(sk); 1422 struct mr_table *mrt; 1423 1424 ASSERT_RTNL(); 1425 ipmr_for_each_table(mrt, net) { 1426 if (sk == rtnl_dereference(mrt->mroute_sk)) { 1427 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--; 1428 inet_netconf_notify_devconf(net, RTM_NEWNETCONF, 1429 NETCONFA_MC_FORWARDING, 1430 NETCONFA_IFINDEX_ALL, 1431 net->ipv4.devconf_all); 1432 RCU_INIT_POINTER(mrt->mroute_sk, NULL); 1433 mroute_clean_tables(mrt, false); 1434 } 1435 } 1436 } 1437 1438 /* Socket options and virtual interface manipulation. The whole 1439 * virtual interface system is a complete heap, but unfortunately 1440 * that's how BSD mrouted happens to think. Maybe one day with a proper 1441 * MOSPF/PIM router set up we can clean this up. 1442 */ 1443 1444 int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, 1445 unsigned int optlen) 1446 { 1447 struct net *net = sock_net(sk); 1448 int val, ret = 0, parent = 0; 1449 struct mr_table *mrt; 1450 struct vifctl vif; 1451 struct mfcctl mfc; 1452 u32 uval; 1453 1454 /* There's one exception to the lock - MRT_DONE which needs to unlock */ 1455 rtnl_lock(); 1456 if (sk->sk_type != SOCK_RAW || 1457 inet_sk(sk)->inet_num != IPPROTO_IGMP) { 1458 ret = -EOPNOTSUPP; 1459 goto out_unlock; 1460 } 1461 1462 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); 1463 if (!mrt) { 1464 ret = -ENOENT; 1465 goto out_unlock; 1466 } 1467 if (optname != MRT_INIT) { 1468 if (sk != rcu_access_pointer(mrt->mroute_sk) && 1469 !ns_capable(net->user_ns, CAP_NET_ADMIN)) { 1470 ret = -EACCES; 1471 goto out_unlock; 1472 } 1473 } 1474 1475 switch (optname) { 1476 case MRT_INIT: 1477 if (optlen != sizeof(int)) { 1478 ret = -EINVAL; 1479 break; 1480 } 1481 if (rtnl_dereference(mrt->mroute_sk)) { 1482 ret = -EADDRINUSE; 1483 break; 1484 } 1485 1486 ret = ip_ra_control(sk, 1, mrtsock_destruct); 1487 if (ret == 0) { 1488 rcu_assign_pointer(mrt->mroute_sk, sk); 1489 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++; 1490 inet_netconf_notify_devconf(net, RTM_NEWNETCONF, 1491 NETCONFA_MC_FORWARDING, 1492 NETCONFA_IFINDEX_ALL, 1493 net->ipv4.devconf_all); 1494 } 1495 break; 1496 case MRT_DONE: 1497 if (sk != rcu_access_pointer(mrt->mroute_sk)) { 1498 ret = -EACCES; 1499 } else { 1500 ret = ip_ra_control(sk, 0, NULL); 1501 goto out_unlock; 1502 } 1503 break; 1504 case MRT_ADD_VIF: 1505 case MRT_DEL_VIF: 1506 if (optlen != sizeof(vif)) { 1507 ret = -EINVAL; 1508 break; 1509 } 1510 if (copy_from_user(&vif, optval, sizeof(vif))) { 1511 ret = -EFAULT; 1512 break; 1513 } 1514 if (vif.vifc_vifi >= MAXVIFS) { 1515 ret = -ENFILE; 1516 break; 1517 } 1518 if (optname == MRT_ADD_VIF) { 1519 ret = vif_add(net, mrt, &vif, 1520 sk == rtnl_dereference(mrt->mroute_sk)); 1521 } else { 1522 ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL); 1523 } 1524 break; 1525 /* Manipulate the forwarding caches. These live 1526 * in a sort of kernel/user symbiosis. 1527 */ 1528 case MRT_ADD_MFC: 1529 case MRT_DEL_MFC: 1530 parent = -1; 1531 /* fall through */ 1532 case MRT_ADD_MFC_PROXY: 1533 case MRT_DEL_MFC_PROXY: 1534 if (optlen != sizeof(mfc)) { 1535 ret = -EINVAL; 1536 break; 1537 } 1538 if (copy_from_user(&mfc, optval, sizeof(mfc))) { 1539 ret = -EFAULT; 1540 break; 1541 } 1542 if (parent == 0) 1543 parent = mfc.mfcc_parent; 1544 if (optname == MRT_DEL_MFC || optname == MRT_DEL_MFC_PROXY) 1545 ret = ipmr_mfc_delete(mrt, &mfc, parent); 1546 else 1547 ret = ipmr_mfc_add(net, mrt, &mfc, 1548 sk == rtnl_dereference(mrt->mroute_sk), 1549 parent); 1550 break; 1551 /* Control PIM assert. */ 1552 case MRT_ASSERT: 1553 if (optlen != sizeof(val)) { 1554 ret = -EINVAL; 1555 break; 1556 } 1557 if (get_user(val, (int __user *)optval)) { 1558 ret = -EFAULT; 1559 break; 1560 } 1561 mrt->mroute_do_assert = val; 1562 break; 1563 case MRT_PIM: 1564 if (!ipmr_pimsm_enabled()) { 1565 ret = -ENOPROTOOPT; 1566 break; 1567 } 1568 if (optlen != sizeof(val)) { 1569 ret = -EINVAL; 1570 break; 1571 } 1572 if (get_user(val, (int __user *)optval)) { 1573 ret = -EFAULT; 1574 break; 1575 } 1576 1577 val = !!val; 1578 if (val != mrt->mroute_do_pim) { 1579 mrt->mroute_do_pim = val; 1580 mrt->mroute_do_assert = val; 1581 } 1582 break; 1583 case MRT_TABLE: 1584 if (!IS_BUILTIN(CONFIG_IP_MROUTE_MULTIPLE_TABLES)) { 1585 ret = -ENOPROTOOPT; 1586 break; 1587 } 1588 if (optlen != sizeof(uval)) { 1589 ret = -EINVAL; 1590 break; 1591 } 1592 if (get_user(uval, (u32 __user *)optval)) { 1593 ret = -EFAULT; 1594 break; 1595 } 1596 1597 if (sk == rtnl_dereference(mrt->mroute_sk)) { 1598 ret = -EBUSY; 1599 } else { 1600 mrt = ipmr_new_table(net, uval); 1601 if (IS_ERR(mrt)) 1602 ret = PTR_ERR(mrt); 1603 else 1604 raw_sk(sk)->ipmr_table = uval; 1605 } 1606 break; 1607 /* Spurious command, or MRT_VERSION which you cannot set. */ 1608 default: 1609 ret = -ENOPROTOOPT; 1610 } 1611 out_unlock: 1612 rtnl_unlock(); 1613 return ret; 1614 } 1615 1616 /* Getsock opt support for the multicast routing system. */ 1617 int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen) 1618 { 1619 int olr; 1620 int val; 1621 struct net *net = sock_net(sk); 1622 struct mr_table *mrt; 1623 1624 if (sk->sk_type != SOCK_RAW || 1625 inet_sk(sk)->inet_num != IPPROTO_IGMP) 1626 return -EOPNOTSUPP; 1627 1628 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); 1629 if (!mrt) 1630 return -ENOENT; 1631 1632 switch (optname) { 1633 case MRT_VERSION: 1634 val = 0x0305; 1635 break; 1636 case MRT_PIM: 1637 if (!ipmr_pimsm_enabled()) 1638 return -ENOPROTOOPT; 1639 val = mrt->mroute_do_pim; 1640 break; 1641 case MRT_ASSERT: 1642 val = mrt->mroute_do_assert; 1643 break; 1644 default: 1645 return -ENOPROTOOPT; 1646 } 1647 1648 if (get_user(olr, optlen)) 1649 return -EFAULT; 1650 olr = min_t(unsigned int, olr, sizeof(int)); 1651 if (olr < 0) 1652 return -EINVAL; 1653 if (put_user(olr, optlen)) 1654 return -EFAULT; 1655 if (copy_to_user(optval, &val, olr)) 1656 return -EFAULT; 1657 return 0; 1658 } 1659 1660 /* The IP multicast ioctl support routines. */ 1661 int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg) 1662 { 1663 struct sioc_sg_req sr; 1664 struct sioc_vif_req vr; 1665 struct vif_device *vif; 1666 struct mfc_cache *c; 1667 struct net *net = sock_net(sk); 1668 struct mr_table *mrt; 1669 1670 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); 1671 if (!mrt) 1672 return -ENOENT; 1673 1674 switch (cmd) { 1675 case SIOCGETVIFCNT: 1676 if (copy_from_user(&vr, arg, sizeof(vr))) 1677 return -EFAULT; 1678 if (vr.vifi >= mrt->maxvif) 1679 return -EINVAL; 1680 read_lock(&mrt_lock); 1681 vif = &mrt->vif_table[vr.vifi]; 1682 if (VIF_EXISTS(mrt, vr.vifi)) { 1683 vr.icount = vif->pkt_in; 1684 vr.ocount = vif->pkt_out; 1685 vr.ibytes = vif->bytes_in; 1686 vr.obytes = vif->bytes_out; 1687 read_unlock(&mrt_lock); 1688 1689 if (copy_to_user(arg, &vr, sizeof(vr))) 1690 return -EFAULT; 1691 return 0; 1692 } 1693 read_unlock(&mrt_lock); 1694 return -EADDRNOTAVAIL; 1695 case SIOCGETSGCNT: 1696 if (copy_from_user(&sr, arg, sizeof(sr))) 1697 return -EFAULT; 1698 1699 rcu_read_lock(); 1700 c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr); 1701 if (c) { 1702 sr.pktcnt = c->mfc_un.res.pkt; 1703 sr.bytecnt = c->mfc_un.res.bytes; 1704 sr.wrong_if = c->mfc_un.res.wrong_if; 1705 rcu_read_unlock(); 1706 1707 if (copy_to_user(arg, &sr, sizeof(sr))) 1708 return -EFAULT; 1709 return 0; 1710 } 1711 rcu_read_unlock(); 1712 return -EADDRNOTAVAIL; 1713 default: 1714 return -ENOIOCTLCMD; 1715 } 1716 } 1717 1718 #ifdef CONFIG_COMPAT 1719 struct compat_sioc_sg_req { 1720 struct in_addr src; 1721 struct in_addr grp; 1722 compat_ulong_t pktcnt; 1723 compat_ulong_t bytecnt; 1724 compat_ulong_t wrong_if; 1725 }; 1726 1727 struct compat_sioc_vif_req { 1728 vifi_t vifi; /* Which iface */ 1729 compat_ulong_t icount; 1730 compat_ulong_t ocount; 1731 compat_ulong_t ibytes; 1732 compat_ulong_t obytes; 1733 }; 1734 1735 int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) 1736 { 1737 struct compat_sioc_sg_req sr; 1738 struct compat_sioc_vif_req vr; 1739 struct vif_device *vif; 1740 struct mfc_cache *c; 1741 struct net *net = sock_net(sk); 1742 struct mr_table *mrt; 1743 1744 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); 1745 if (!mrt) 1746 return -ENOENT; 1747 1748 switch (cmd) { 1749 case SIOCGETVIFCNT: 1750 if (copy_from_user(&vr, arg, sizeof(vr))) 1751 return -EFAULT; 1752 if (vr.vifi >= mrt->maxvif) 1753 return -EINVAL; 1754 read_lock(&mrt_lock); 1755 vif = &mrt->vif_table[vr.vifi]; 1756 if (VIF_EXISTS(mrt, vr.vifi)) { 1757 vr.icount = vif->pkt_in; 1758 vr.ocount = vif->pkt_out; 1759 vr.ibytes = vif->bytes_in; 1760 vr.obytes = vif->bytes_out; 1761 read_unlock(&mrt_lock); 1762 1763 if (copy_to_user(arg, &vr, sizeof(vr))) 1764 return -EFAULT; 1765 return 0; 1766 } 1767 read_unlock(&mrt_lock); 1768 return -EADDRNOTAVAIL; 1769 case SIOCGETSGCNT: 1770 if (copy_from_user(&sr, arg, sizeof(sr))) 1771 return -EFAULT; 1772 1773 rcu_read_lock(); 1774 c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr); 1775 if (c) { 1776 sr.pktcnt = c->mfc_un.res.pkt; 1777 sr.bytecnt = c->mfc_un.res.bytes; 1778 sr.wrong_if = c->mfc_un.res.wrong_if; 1779 rcu_read_unlock(); 1780 1781 if (copy_to_user(arg, &sr, sizeof(sr))) 1782 return -EFAULT; 1783 return 0; 1784 } 1785 rcu_read_unlock(); 1786 return -EADDRNOTAVAIL; 1787 default: 1788 return -ENOIOCTLCMD; 1789 } 1790 } 1791 #endif 1792 1793 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr) 1794 { 1795 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 1796 struct net *net = dev_net(dev); 1797 struct mr_table *mrt; 1798 struct vif_device *v; 1799 int ct; 1800 1801 if (event != NETDEV_UNREGISTER) 1802 return NOTIFY_DONE; 1803 1804 ipmr_for_each_table(mrt, net) { 1805 v = &mrt->vif_table[0]; 1806 for (ct = 0; ct < mrt->maxvif; ct++, v++) { 1807 if (v->dev == dev) 1808 vif_delete(mrt, ct, 1, NULL); 1809 } 1810 } 1811 return NOTIFY_DONE; 1812 } 1813 1814 static struct notifier_block ip_mr_notifier = { 1815 .notifier_call = ipmr_device_event, 1816 }; 1817 1818 /* Encapsulate a packet by attaching a valid IPIP header to it. 1819 * This avoids tunnel drivers and other mess and gives us the speed so 1820 * important for multicast video. 1821 */ 1822 static void ip_encap(struct net *net, struct sk_buff *skb, 1823 __be32 saddr, __be32 daddr) 1824 { 1825 struct iphdr *iph; 1826 const struct iphdr *old_iph = ip_hdr(skb); 1827 1828 skb_push(skb, sizeof(struct iphdr)); 1829 skb->transport_header = skb->network_header; 1830 skb_reset_network_header(skb); 1831 iph = ip_hdr(skb); 1832 1833 iph->version = 4; 1834 iph->tos = old_iph->tos; 1835 iph->ttl = old_iph->ttl; 1836 iph->frag_off = 0; 1837 iph->daddr = daddr; 1838 iph->saddr = saddr; 1839 iph->protocol = IPPROTO_IPIP; 1840 iph->ihl = 5; 1841 iph->tot_len = htons(skb->len); 1842 ip_select_ident(net, skb, NULL); 1843 ip_send_check(iph); 1844 1845 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 1846 nf_reset(skb); 1847 } 1848 1849 static inline int ipmr_forward_finish(struct net *net, struct sock *sk, 1850 struct sk_buff *skb) 1851 { 1852 struct ip_options *opt = &(IPCB(skb)->opt); 1853 1854 IP_INC_STATS(net, IPSTATS_MIB_OUTFORWDATAGRAMS); 1855 IP_ADD_STATS(net, IPSTATS_MIB_OUTOCTETS, skb->len); 1856 1857 if (unlikely(opt->optlen)) 1858 ip_forward_options(skb); 1859 1860 return dst_output(net, sk, skb); 1861 } 1862 1863 #ifdef CONFIG_NET_SWITCHDEV 1864 static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt, 1865 int in_vifi, int out_vifi) 1866 { 1867 struct vif_device *out_vif = &mrt->vif_table[out_vifi]; 1868 struct vif_device *in_vif = &mrt->vif_table[in_vifi]; 1869 1870 if (!skb->offload_mr_fwd_mark) 1871 return false; 1872 if (!out_vif->dev_parent_id.id_len || !in_vif->dev_parent_id.id_len) 1873 return false; 1874 return netdev_phys_item_id_same(&out_vif->dev_parent_id, 1875 &in_vif->dev_parent_id); 1876 } 1877 #else 1878 static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt, 1879 int in_vifi, int out_vifi) 1880 { 1881 return false; 1882 } 1883 #endif 1884 1885 /* Processing handlers for ipmr_forward */ 1886 1887 static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, 1888 int in_vifi, struct sk_buff *skb, 1889 struct mfc_cache *c, int vifi) 1890 { 1891 const struct iphdr *iph = ip_hdr(skb); 1892 struct vif_device *vif = &mrt->vif_table[vifi]; 1893 struct net_device *dev; 1894 struct rtable *rt; 1895 struct flowi4 fl4; 1896 int encap = 0; 1897 1898 if (!vif->dev) 1899 goto out_free; 1900 1901 if (vif->flags & VIFF_REGISTER) { 1902 vif->pkt_out++; 1903 vif->bytes_out += skb->len; 1904 vif->dev->stats.tx_bytes += skb->len; 1905 vif->dev->stats.tx_packets++; 1906 ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT); 1907 goto out_free; 1908 } 1909 1910 if (ipmr_forward_offloaded(skb, mrt, in_vifi, vifi)) 1911 goto out_free; 1912 1913 if (vif->flags & VIFF_TUNNEL) { 1914 rt = ip_route_output_ports(net, &fl4, NULL, 1915 vif->remote, vif->local, 1916 0, 0, 1917 IPPROTO_IPIP, 1918 RT_TOS(iph->tos), vif->link); 1919 if (IS_ERR(rt)) 1920 goto out_free; 1921 encap = sizeof(struct iphdr); 1922 } else { 1923 rt = ip_route_output_ports(net, &fl4, NULL, iph->daddr, 0, 1924 0, 0, 1925 IPPROTO_IPIP, 1926 RT_TOS(iph->tos), vif->link); 1927 if (IS_ERR(rt)) 1928 goto out_free; 1929 } 1930 1931 dev = rt->dst.dev; 1932 1933 if (skb->len+encap > dst_mtu(&rt->dst) && (ntohs(iph->frag_off) & IP_DF)) { 1934 /* Do not fragment multicasts. Alas, IPv4 does not 1935 * allow to send ICMP, so that packets will disappear 1936 * to blackhole. 1937 */ 1938 IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS); 1939 ip_rt_put(rt); 1940 goto out_free; 1941 } 1942 1943 encap += LL_RESERVED_SPACE(dev) + rt->dst.header_len; 1944 1945 if (skb_cow(skb, encap)) { 1946 ip_rt_put(rt); 1947 goto out_free; 1948 } 1949 1950 vif->pkt_out++; 1951 vif->bytes_out += skb->len; 1952 1953 skb_dst_drop(skb); 1954 skb_dst_set(skb, &rt->dst); 1955 ip_decrease_ttl(ip_hdr(skb)); 1956 1957 /* FIXME: forward and output firewalls used to be called here. 1958 * What do we do with netfilter? -- RR 1959 */ 1960 if (vif->flags & VIFF_TUNNEL) { 1961 ip_encap(net, skb, vif->local, vif->remote); 1962 /* FIXME: extra output firewall step used to be here. --RR */ 1963 vif->dev->stats.tx_packets++; 1964 vif->dev->stats.tx_bytes += skb->len; 1965 } 1966 1967 IPCB(skb)->flags |= IPSKB_FORWARDED; 1968 1969 /* RFC1584 teaches, that DVMRP/PIM router must deliver packets locally 1970 * not only before forwarding, but after forwarding on all output 1971 * interfaces. It is clear, if mrouter runs a multicasting 1972 * program, it should receive packets not depending to what interface 1973 * program is joined. 1974 * If we will not make it, the program will have to join on all 1975 * interfaces. On the other hand, multihoming host (or router, but 1976 * not mrouter) cannot join to more than one interface - it will 1977 * result in receiving multiple packets. 1978 */ 1979 NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, 1980 net, NULL, skb, skb->dev, dev, 1981 ipmr_forward_finish); 1982 return; 1983 1984 out_free: 1985 kfree_skb(skb); 1986 } 1987 1988 static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev) 1989 { 1990 int ct; 1991 1992 for (ct = mrt->maxvif-1; ct >= 0; ct--) { 1993 if (mrt->vif_table[ct].dev == dev) 1994 break; 1995 } 1996 return ct; 1997 } 1998 1999 /* "local" means that we should preserve one skb (for local delivery) */ 2000 static void ip_mr_forward(struct net *net, struct mr_table *mrt, 2001 struct net_device *dev, struct sk_buff *skb, 2002 struct mfc_cache *cache, int local) 2003 { 2004 int true_vifi = ipmr_find_vif(mrt, dev); 2005 int psend = -1; 2006 int vif, ct; 2007 2008 vif = cache->mfc_parent; 2009 cache->mfc_un.res.pkt++; 2010 cache->mfc_un.res.bytes += skb->len; 2011 cache->mfc_un.res.lastuse = jiffies; 2012 2013 if (cache->mfc_origin == htonl(INADDR_ANY) && true_vifi >= 0) { 2014 struct mfc_cache *cache_proxy; 2015 2016 /* For an (*,G) entry, we only check that the incomming 2017 * interface is part of the static tree. 2018 */ 2019 cache_proxy = ipmr_cache_find_any_parent(mrt, vif); 2020 if (cache_proxy && 2021 cache_proxy->mfc_un.res.ttls[true_vifi] < 255) 2022 goto forward; 2023 } 2024 2025 /* Wrong interface: drop packet and (maybe) send PIM assert. */ 2026 if (mrt->vif_table[vif].dev != dev) { 2027 if (rt_is_output_route(skb_rtable(skb))) { 2028 /* It is our own packet, looped back. 2029 * Very complicated situation... 2030 * 2031 * The best workaround until routing daemons will be 2032 * fixed is not to redistribute packet, if it was 2033 * send through wrong interface. It means, that 2034 * multicast applications WILL NOT work for 2035 * (S,G), which have default multicast route pointing 2036 * to wrong oif. In any case, it is not a good 2037 * idea to use multicasting applications on router. 2038 */ 2039 goto dont_forward; 2040 } 2041 2042 cache->mfc_un.res.wrong_if++; 2043 2044 if (true_vifi >= 0 && mrt->mroute_do_assert && 2045 /* pimsm uses asserts, when switching from RPT to SPT, 2046 * so that we cannot check that packet arrived on an oif. 2047 * It is bad, but otherwise we would need to move pretty 2048 * large chunk of pimd to kernel. Ough... --ANK 2049 */ 2050 (mrt->mroute_do_pim || 2051 cache->mfc_un.res.ttls[true_vifi] < 255) && 2052 time_after(jiffies, 2053 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) { 2054 cache->mfc_un.res.last_assert = jiffies; 2055 ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF); 2056 } 2057 goto dont_forward; 2058 } 2059 2060 forward: 2061 mrt->vif_table[vif].pkt_in++; 2062 mrt->vif_table[vif].bytes_in += skb->len; 2063 2064 /* Forward the frame */ 2065 if (cache->mfc_origin == htonl(INADDR_ANY) && 2066 cache->mfc_mcastgrp == htonl(INADDR_ANY)) { 2067 if (true_vifi >= 0 && 2068 true_vifi != cache->mfc_parent && 2069 ip_hdr(skb)->ttl > 2070 cache->mfc_un.res.ttls[cache->mfc_parent]) { 2071 /* It's an (*,*) entry and the packet is not coming from 2072 * the upstream: forward the packet to the upstream 2073 * only. 2074 */ 2075 psend = cache->mfc_parent; 2076 goto last_forward; 2077 } 2078 goto dont_forward; 2079 } 2080 for (ct = cache->mfc_un.res.maxvif - 1; 2081 ct >= cache->mfc_un.res.minvif; ct--) { 2082 /* For (*,G) entry, don't forward to the incoming interface */ 2083 if ((cache->mfc_origin != htonl(INADDR_ANY) || 2084 ct != true_vifi) && 2085 ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) { 2086 if (psend != -1) { 2087 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 2088 2089 if (skb2) 2090 ipmr_queue_xmit(net, mrt, true_vifi, 2091 skb2, cache, psend); 2092 } 2093 psend = ct; 2094 } 2095 } 2096 last_forward: 2097 if (psend != -1) { 2098 if (local) { 2099 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 2100 2101 if (skb2) 2102 ipmr_queue_xmit(net, mrt, true_vifi, skb2, 2103 cache, psend); 2104 } else { 2105 ipmr_queue_xmit(net, mrt, true_vifi, skb, cache, psend); 2106 return; 2107 } 2108 } 2109 2110 dont_forward: 2111 if (!local) 2112 kfree_skb(skb); 2113 } 2114 2115 static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct sk_buff *skb) 2116 { 2117 struct rtable *rt = skb_rtable(skb); 2118 struct iphdr *iph = ip_hdr(skb); 2119 struct flowi4 fl4 = { 2120 .daddr = iph->daddr, 2121 .saddr = iph->saddr, 2122 .flowi4_tos = RT_TOS(iph->tos), 2123 .flowi4_oif = (rt_is_output_route(rt) ? 2124 skb->dev->ifindex : 0), 2125 .flowi4_iif = (rt_is_output_route(rt) ? 2126 LOOPBACK_IFINDEX : 2127 skb->dev->ifindex), 2128 .flowi4_mark = skb->mark, 2129 }; 2130 struct mr_table *mrt; 2131 int err; 2132 2133 err = ipmr_fib_lookup(net, &fl4, &mrt); 2134 if (err) 2135 return ERR_PTR(err); 2136 return mrt; 2137 } 2138 2139 /* Multicast packets for forwarding arrive here 2140 * Called with rcu_read_lock(); 2141 */ 2142 int ip_mr_input(struct sk_buff *skb) 2143 { 2144 struct mfc_cache *cache; 2145 struct net *net = dev_net(skb->dev); 2146 int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL; 2147 struct mr_table *mrt; 2148 struct net_device *dev; 2149 2150 /* skb->dev passed in is the loX master dev for vrfs. 2151 * As there are no vifs associated with loopback devices, 2152 * get the proper interface that does have a vif associated with it. 2153 */ 2154 dev = skb->dev; 2155 if (netif_is_l3_master(skb->dev)) { 2156 dev = dev_get_by_index_rcu(net, IPCB(skb)->iif); 2157 if (!dev) { 2158 kfree_skb(skb); 2159 return -ENODEV; 2160 } 2161 } 2162 2163 /* Packet is looped back after forward, it should not be 2164 * forwarded second time, but still can be delivered locally. 2165 */ 2166 if (IPCB(skb)->flags & IPSKB_FORWARDED) 2167 goto dont_forward; 2168 2169 mrt = ipmr_rt_fib_lookup(net, skb); 2170 if (IS_ERR(mrt)) { 2171 kfree_skb(skb); 2172 return PTR_ERR(mrt); 2173 } 2174 if (!local) { 2175 if (IPCB(skb)->opt.router_alert) { 2176 if (ip_call_ra_chain(skb)) 2177 return 0; 2178 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP) { 2179 /* IGMPv1 (and broken IGMPv2 implementations sort of 2180 * Cisco IOS <= 11.2(8)) do not put router alert 2181 * option to IGMP packets destined to routable 2182 * groups. It is very bad, because it means 2183 * that we can forward NO IGMP messages. 2184 */ 2185 struct sock *mroute_sk; 2186 2187 mroute_sk = rcu_dereference(mrt->mroute_sk); 2188 if (mroute_sk) { 2189 nf_reset(skb); 2190 raw_rcv(mroute_sk, skb); 2191 return 0; 2192 } 2193 } 2194 } 2195 2196 /* already under rcu_read_lock() */ 2197 cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr); 2198 if (!cache) { 2199 int vif = ipmr_find_vif(mrt, dev); 2200 2201 if (vif >= 0) 2202 cache = ipmr_cache_find_any(mrt, ip_hdr(skb)->daddr, 2203 vif); 2204 } 2205 2206 /* No usable cache entry */ 2207 if (!cache) { 2208 int vif; 2209 2210 if (local) { 2211 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 2212 ip_local_deliver(skb); 2213 if (!skb2) 2214 return -ENOBUFS; 2215 skb = skb2; 2216 } 2217 2218 read_lock(&mrt_lock); 2219 vif = ipmr_find_vif(mrt, dev); 2220 if (vif >= 0) { 2221 int err2 = ipmr_cache_unresolved(mrt, vif, skb, dev); 2222 read_unlock(&mrt_lock); 2223 2224 return err2; 2225 } 2226 read_unlock(&mrt_lock); 2227 kfree_skb(skb); 2228 return -ENODEV; 2229 } 2230 2231 read_lock(&mrt_lock); 2232 ip_mr_forward(net, mrt, dev, skb, cache, local); 2233 read_unlock(&mrt_lock); 2234 2235 if (local) 2236 return ip_local_deliver(skb); 2237 2238 return 0; 2239 2240 dont_forward: 2241 if (local) 2242 return ip_local_deliver(skb); 2243 kfree_skb(skb); 2244 return 0; 2245 } 2246 2247 #ifdef CONFIG_IP_PIMSM_V1 2248 /* Handle IGMP messages of PIMv1 */ 2249 int pim_rcv_v1(struct sk_buff *skb) 2250 { 2251 struct igmphdr *pim; 2252 struct net *net = dev_net(skb->dev); 2253 struct mr_table *mrt; 2254 2255 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr))) 2256 goto drop; 2257 2258 pim = igmp_hdr(skb); 2259 2260 mrt = ipmr_rt_fib_lookup(net, skb); 2261 if (IS_ERR(mrt)) 2262 goto drop; 2263 if (!mrt->mroute_do_pim || 2264 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) 2265 goto drop; 2266 2267 if (__pim_rcv(mrt, skb, sizeof(*pim))) { 2268 drop: 2269 kfree_skb(skb); 2270 } 2271 return 0; 2272 } 2273 #endif 2274 2275 #ifdef CONFIG_IP_PIMSM_V2 2276 static int pim_rcv(struct sk_buff *skb) 2277 { 2278 struct pimreghdr *pim; 2279 struct net *net = dev_net(skb->dev); 2280 struct mr_table *mrt; 2281 2282 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr))) 2283 goto drop; 2284 2285 pim = (struct pimreghdr *)skb_transport_header(skb); 2286 if (pim->type != ((PIM_VERSION << 4) | (PIM_TYPE_REGISTER)) || 2287 (pim->flags & PIM_NULL_REGISTER) || 2288 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 && 2289 csum_fold(skb_checksum(skb, 0, skb->len, 0)))) 2290 goto drop; 2291 2292 mrt = ipmr_rt_fib_lookup(net, skb); 2293 if (IS_ERR(mrt)) 2294 goto drop; 2295 if (__pim_rcv(mrt, skb, sizeof(*pim))) { 2296 drop: 2297 kfree_skb(skb); 2298 } 2299 return 0; 2300 } 2301 #endif 2302 2303 static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, 2304 struct mfc_cache *c, struct rtmsg *rtm) 2305 { 2306 struct rta_mfc_stats mfcs; 2307 struct nlattr *mp_attr; 2308 struct rtnexthop *nhp; 2309 unsigned long lastuse; 2310 int ct; 2311 2312 /* If cache is unresolved, don't try to parse IIF and OIF */ 2313 if (c->mfc_parent >= MAXVIFS) { 2314 rtm->rtm_flags |= RTNH_F_UNRESOLVED; 2315 return -ENOENT; 2316 } 2317 2318 if (VIF_EXISTS(mrt, c->mfc_parent) && 2319 nla_put_u32(skb, RTA_IIF, mrt->vif_table[c->mfc_parent].dev->ifindex) < 0) 2320 return -EMSGSIZE; 2321 2322 if (c->mfc_flags & MFC_OFFLOAD) 2323 rtm->rtm_flags |= RTNH_F_OFFLOAD; 2324 2325 if (!(mp_attr = nla_nest_start(skb, RTA_MULTIPATH))) 2326 return -EMSGSIZE; 2327 2328 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) { 2329 if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) { 2330 if (!(nhp = nla_reserve_nohdr(skb, sizeof(*nhp)))) { 2331 nla_nest_cancel(skb, mp_attr); 2332 return -EMSGSIZE; 2333 } 2334 2335 nhp->rtnh_flags = 0; 2336 nhp->rtnh_hops = c->mfc_un.res.ttls[ct]; 2337 nhp->rtnh_ifindex = mrt->vif_table[ct].dev->ifindex; 2338 nhp->rtnh_len = sizeof(*nhp); 2339 } 2340 } 2341 2342 nla_nest_end(skb, mp_attr); 2343 2344 lastuse = READ_ONCE(c->mfc_un.res.lastuse); 2345 lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0; 2346 2347 mfcs.mfcs_packets = c->mfc_un.res.pkt; 2348 mfcs.mfcs_bytes = c->mfc_un.res.bytes; 2349 mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if; 2350 if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) || 2351 nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse), 2352 RTA_PAD)) 2353 return -EMSGSIZE; 2354 2355 rtm->rtm_type = RTN_MULTICAST; 2356 return 1; 2357 } 2358 2359 int ipmr_get_route(struct net *net, struct sk_buff *skb, 2360 __be32 saddr, __be32 daddr, 2361 struct rtmsg *rtm, u32 portid) 2362 { 2363 struct mfc_cache *cache; 2364 struct mr_table *mrt; 2365 int err; 2366 2367 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT); 2368 if (!mrt) 2369 return -ENOENT; 2370 2371 rcu_read_lock(); 2372 cache = ipmr_cache_find(mrt, saddr, daddr); 2373 if (!cache && skb->dev) { 2374 int vif = ipmr_find_vif(mrt, skb->dev); 2375 2376 if (vif >= 0) 2377 cache = ipmr_cache_find_any(mrt, daddr, vif); 2378 } 2379 if (!cache) { 2380 struct sk_buff *skb2; 2381 struct iphdr *iph; 2382 struct net_device *dev; 2383 int vif = -1; 2384 2385 dev = skb->dev; 2386 read_lock(&mrt_lock); 2387 if (dev) 2388 vif = ipmr_find_vif(mrt, dev); 2389 if (vif < 0) { 2390 read_unlock(&mrt_lock); 2391 rcu_read_unlock(); 2392 return -ENODEV; 2393 } 2394 skb2 = skb_clone(skb, GFP_ATOMIC); 2395 if (!skb2) { 2396 read_unlock(&mrt_lock); 2397 rcu_read_unlock(); 2398 return -ENOMEM; 2399 } 2400 2401 NETLINK_CB(skb2).portid = portid; 2402 skb_push(skb2, sizeof(struct iphdr)); 2403 skb_reset_network_header(skb2); 2404 iph = ip_hdr(skb2); 2405 iph->ihl = sizeof(struct iphdr) >> 2; 2406 iph->saddr = saddr; 2407 iph->daddr = daddr; 2408 iph->version = 0; 2409 err = ipmr_cache_unresolved(mrt, vif, skb2, dev); 2410 read_unlock(&mrt_lock); 2411 rcu_read_unlock(); 2412 return err; 2413 } 2414 2415 read_lock(&mrt_lock); 2416 err = __ipmr_fill_mroute(mrt, skb, cache, rtm); 2417 read_unlock(&mrt_lock); 2418 rcu_read_unlock(); 2419 return err; 2420 } 2421 2422 static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, 2423 u32 portid, u32 seq, struct mfc_cache *c, int cmd, 2424 int flags) 2425 { 2426 struct nlmsghdr *nlh; 2427 struct rtmsg *rtm; 2428 int err; 2429 2430 nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags); 2431 if (!nlh) 2432 return -EMSGSIZE; 2433 2434 rtm = nlmsg_data(nlh); 2435 rtm->rtm_family = RTNL_FAMILY_IPMR; 2436 rtm->rtm_dst_len = 32; 2437 rtm->rtm_src_len = 32; 2438 rtm->rtm_tos = 0; 2439 rtm->rtm_table = mrt->id; 2440 if (nla_put_u32(skb, RTA_TABLE, mrt->id)) 2441 goto nla_put_failure; 2442 rtm->rtm_type = RTN_MULTICAST; 2443 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 2444 if (c->mfc_flags & MFC_STATIC) 2445 rtm->rtm_protocol = RTPROT_STATIC; 2446 else 2447 rtm->rtm_protocol = RTPROT_MROUTED; 2448 rtm->rtm_flags = 0; 2449 2450 if (nla_put_in_addr(skb, RTA_SRC, c->mfc_origin) || 2451 nla_put_in_addr(skb, RTA_DST, c->mfc_mcastgrp)) 2452 goto nla_put_failure; 2453 err = __ipmr_fill_mroute(mrt, skb, c, rtm); 2454 /* do not break the dump if cache is unresolved */ 2455 if (err < 0 && err != -ENOENT) 2456 goto nla_put_failure; 2457 2458 nlmsg_end(skb, nlh); 2459 return 0; 2460 2461 nla_put_failure: 2462 nlmsg_cancel(skb, nlh); 2463 return -EMSGSIZE; 2464 } 2465 2466 static size_t mroute_msgsize(bool unresolved, int maxvif) 2467 { 2468 size_t len = 2469 NLMSG_ALIGN(sizeof(struct rtmsg)) 2470 + nla_total_size(4) /* RTA_TABLE */ 2471 + nla_total_size(4) /* RTA_SRC */ 2472 + nla_total_size(4) /* RTA_DST */ 2473 ; 2474 2475 if (!unresolved) 2476 len = len 2477 + nla_total_size(4) /* RTA_IIF */ 2478 + nla_total_size(0) /* RTA_MULTIPATH */ 2479 + maxvif * NLA_ALIGN(sizeof(struct rtnexthop)) 2480 /* RTA_MFC_STATS */ 2481 + nla_total_size_64bit(sizeof(struct rta_mfc_stats)) 2482 ; 2483 2484 return len; 2485 } 2486 2487 static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc, 2488 int cmd) 2489 { 2490 struct net *net = read_pnet(&mrt->net); 2491 struct sk_buff *skb; 2492 int err = -ENOBUFS; 2493 2494 skb = nlmsg_new(mroute_msgsize(mfc->mfc_parent >= MAXVIFS, mrt->maxvif), 2495 GFP_ATOMIC); 2496 if (!skb) 2497 goto errout; 2498 2499 err = ipmr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0); 2500 if (err < 0) 2501 goto errout; 2502 2503 rtnl_notify(skb, net, 0, RTNLGRP_IPV4_MROUTE, NULL, GFP_ATOMIC); 2504 return; 2505 2506 errout: 2507 kfree_skb(skb); 2508 if (err < 0) 2509 rtnl_set_sk_err(net, RTNLGRP_IPV4_MROUTE, err); 2510 } 2511 2512 static size_t igmpmsg_netlink_msgsize(size_t payloadlen) 2513 { 2514 size_t len = 2515 NLMSG_ALIGN(sizeof(struct rtgenmsg)) 2516 + nla_total_size(1) /* IPMRA_CREPORT_MSGTYPE */ 2517 + nla_total_size(4) /* IPMRA_CREPORT_VIF_ID */ 2518 + nla_total_size(4) /* IPMRA_CREPORT_SRC_ADDR */ 2519 + nla_total_size(4) /* IPMRA_CREPORT_DST_ADDR */ 2520 /* IPMRA_CREPORT_PKT */ 2521 + nla_total_size(payloadlen) 2522 ; 2523 2524 return len; 2525 } 2526 2527 static void igmpmsg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt) 2528 { 2529 struct net *net = read_pnet(&mrt->net); 2530 struct nlmsghdr *nlh; 2531 struct rtgenmsg *rtgenm; 2532 struct igmpmsg *msg; 2533 struct sk_buff *skb; 2534 struct nlattr *nla; 2535 int payloadlen; 2536 2537 payloadlen = pkt->len - sizeof(struct igmpmsg); 2538 msg = (struct igmpmsg *)skb_network_header(pkt); 2539 2540 skb = nlmsg_new(igmpmsg_netlink_msgsize(payloadlen), GFP_ATOMIC); 2541 if (!skb) 2542 goto errout; 2543 2544 nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT, 2545 sizeof(struct rtgenmsg), 0); 2546 if (!nlh) 2547 goto errout; 2548 rtgenm = nlmsg_data(nlh); 2549 rtgenm->rtgen_family = RTNL_FAMILY_IPMR; 2550 if (nla_put_u8(skb, IPMRA_CREPORT_MSGTYPE, msg->im_msgtype) || 2551 nla_put_u32(skb, IPMRA_CREPORT_VIF_ID, msg->im_vif) || 2552 nla_put_in_addr(skb, IPMRA_CREPORT_SRC_ADDR, 2553 msg->im_src.s_addr) || 2554 nla_put_in_addr(skb, IPMRA_CREPORT_DST_ADDR, 2555 msg->im_dst.s_addr)) 2556 goto nla_put_failure; 2557 2558 nla = nla_reserve(skb, IPMRA_CREPORT_PKT, payloadlen); 2559 if (!nla || skb_copy_bits(pkt, sizeof(struct igmpmsg), 2560 nla_data(nla), payloadlen)) 2561 goto nla_put_failure; 2562 2563 nlmsg_end(skb, nlh); 2564 2565 rtnl_notify(skb, net, 0, RTNLGRP_IPV4_MROUTE_R, NULL, GFP_ATOMIC); 2566 return; 2567 2568 nla_put_failure: 2569 nlmsg_cancel(skb, nlh); 2570 errout: 2571 kfree_skb(skb); 2572 rtnl_set_sk_err(net, RTNLGRP_IPV4_MROUTE_R, -ENOBUFS); 2573 } 2574 2575 static int ipmr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, 2576 struct netlink_ext_ack *extack) 2577 { 2578 struct net *net = sock_net(in_skb->sk); 2579 struct nlattr *tb[RTA_MAX + 1]; 2580 struct sk_buff *skb = NULL; 2581 struct mfc_cache *cache; 2582 struct mr_table *mrt; 2583 struct rtmsg *rtm; 2584 __be32 src, grp; 2585 u32 tableid; 2586 int err; 2587 2588 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, 2589 rtm_ipv4_policy, extack); 2590 if (err < 0) 2591 goto errout; 2592 2593 rtm = nlmsg_data(nlh); 2594 2595 src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0; 2596 grp = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0; 2597 tableid = tb[RTA_TABLE] ? nla_get_u32(tb[RTA_TABLE]) : 0; 2598 2599 mrt = ipmr_get_table(net, tableid ? tableid : RT_TABLE_DEFAULT); 2600 if (!mrt) { 2601 err = -ENOENT; 2602 goto errout_free; 2603 } 2604 2605 /* entries are added/deleted only under RTNL */ 2606 rcu_read_lock(); 2607 cache = ipmr_cache_find(mrt, src, grp); 2608 rcu_read_unlock(); 2609 if (!cache) { 2610 err = -ENOENT; 2611 goto errout_free; 2612 } 2613 2614 skb = nlmsg_new(mroute_msgsize(false, mrt->maxvif), GFP_KERNEL); 2615 if (!skb) { 2616 err = -ENOBUFS; 2617 goto errout_free; 2618 } 2619 2620 err = ipmr_fill_mroute(mrt, skb, NETLINK_CB(in_skb).portid, 2621 nlh->nlmsg_seq, cache, 2622 RTM_NEWROUTE, 0); 2623 if (err < 0) 2624 goto errout_free; 2625 2626 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); 2627 2628 errout: 2629 return err; 2630 2631 errout_free: 2632 kfree_skb(skb); 2633 goto errout; 2634 } 2635 2636 static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) 2637 { 2638 struct net *net = sock_net(skb->sk); 2639 struct mr_table *mrt; 2640 struct mfc_cache *mfc; 2641 unsigned int t = 0, s_t; 2642 unsigned int e = 0, s_e; 2643 2644 s_t = cb->args[0]; 2645 s_e = cb->args[1]; 2646 2647 rcu_read_lock(); 2648 ipmr_for_each_table(mrt, net) { 2649 if (t < s_t) 2650 goto next_table; 2651 list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) { 2652 if (e < s_e) 2653 goto next_entry; 2654 if (ipmr_fill_mroute(mrt, skb, 2655 NETLINK_CB(cb->skb).portid, 2656 cb->nlh->nlmsg_seq, 2657 mfc, RTM_NEWROUTE, 2658 NLM_F_MULTI) < 0) 2659 goto done; 2660 next_entry: 2661 e++; 2662 } 2663 e = 0; 2664 s_e = 0; 2665 2666 spin_lock_bh(&mfc_unres_lock); 2667 list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) { 2668 if (e < s_e) 2669 goto next_entry2; 2670 if (ipmr_fill_mroute(mrt, skb, 2671 NETLINK_CB(cb->skb).portid, 2672 cb->nlh->nlmsg_seq, 2673 mfc, RTM_NEWROUTE, 2674 NLM_F_MULTI) < 0) { 2675 spin_unlock_bh(&mfc_unres_lock); 2676 goto done; 2677 } 2678 next_entry2: 2679 e++; 2680 } 2681 spin_unlock_bh(&mfc_unres_lock); 2682 e = 0; 2683 s_e = 0; 2684 next_table: 2685 t++; 2686 } 2687 done: 2688 rcu_read_unlock(); 2689 2690 cb->args[1] = e; 2691 cb->args[0] = t; 2692 2693 return skb->len; 2694 } 2695 2696 static const struct nla_policy rtm_ipmr_policy[RTA_MAX + 1] = { 2697 [RTA_SRC] = { .type = NLA_U32 }, 2698 [RTA_DST] = { .type = NLA_U32 }, 2699 [RTA_IIF] = { .type = NLA_U32 }, 2700 [RTA_TABLE] = { .type = NLA_U32 }, 2701 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, 2702 }; 2703 2704 static bool ipmr_rtm_validate_proto(unsigned char rtm_protocol) 2705 { 2706 switch (rtm_protocol) { 2707 case RTPROT_STATIC: 2708 case RTPROT_MROUTED: 2709 return true; 2710 } 2711 return false; 2712 } 2713 2714 static int ipmr_nla_get_ttls(const struct nlattr *nla, struct mfcctl *mfcc) 2715 { 2716 struct rtnexthop *rtnh = nla_data(nla); 2717 int remaining = nla_len(nla), vifi = 0; 2718 2719 while (rtnh_ok(rtnh, remaining)) { 2720 mfcc->mfcc_ttls[vifi] = rtnh->rtnh_hops; 2721 if (++vifi == MAXVIFS) 2722 break; 2723 rtnh = rtnh_next(rtnh, &remaining); 2724 } 2725 2726 return remaining > 0 ? -EINVAL : vifi; 2727 } 2728 2729 /* returns < 0 on error, 0 for ADD_MFC and 1 for ADD_MFC_PROXY */ 2730 static int rtm_to_ipmr_mfcc(struct net *net, struct nlmsghdr *nlh, 2731 struct mfcctl *mfcc, int *mrtsock, 2732 struct mr_table **mrtret, 2733 struct netlink_ext_ack *extack) 2734 { 2735 struct net_device *dev = NULL; 2736 u32 tblid = RT_TABLE_DEFAULT; 2737 struct mr_table *mrt; 2738 struct nlattr *attr; 2739 struct rtmsg *rtm; 2740 int ret, rem; 2741 2742 ret = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipmr_policy, 2743 extack); 2744 if (ret < 0) 2745 goto out; 2746 rtm = nlmsg_data(nlh); 2747 2748 ret = -EINVAL; 2749 if (rtm->rtm_family != RTNL_FAMILY_IPMR || rtm->rtm_dst_len != 32 || 2750 rtm->rtm_type != RTN_MULTICAST || 2751 rtm->rtm_scope != RT_SCOPE_UNIVERSE || 2752 !ipmr_rtm_validate_proto(rtm->rtm_protocol)) 2753 goto out; 2754 2755 memset(mfcc, 0, sizeof(*mfcc)); 2756 mfcc->mfcc_parent = -1; 2757 ret = 0; 2758 nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), rem) { 2759 switch (nla_type(attr)) { 2760 case RTA_SRC: 2761 mfcc->mfcc_origin.s_addr = nla_get_be32(attr); 2762 break; 2763 case RTA_DST: 2764 mfcc->mfcc_mcastgrp.s_addr = nla_get_be32(attr); 2765 break; 2766 case RTA_IIF: 2767 dev = __dev_get_by_index(net, nla_get_u32(attr)); 2768 if (!dev) { 2769 ret = -ENODEV; 2770 goto out; 2771 } 2772 break; 2773 case RTA_MULTIPATH: 2774 if (ipmr_nla_get_ttls(attr, mfcc) < 0) { 2775 ret = -EINVAL; 2776 goto out; 2777 } 2778 break; 2779 case RTA_PREFSRC: 2780 ret = 1; 2781 break; 2782 case RTA_TABLE: 2783 tblid = nla_get_u32(attr); 2784 break; 2785 } 2786 } 2787 mrt = ipmr_get_table(net, tblid); 2788 if (!mrt) { 2789 ret = -ENOENT; 2790 goto out; 2791 } 2792 *mrtret = mrt; 2793 *mrtsock = rtm->rtm_protocol == RTPROT_MROUTED ? 1 : 0; 2794 if (dev) 2795 mfcc->mfcc_parent = ipmr_find_vif(mrt, dev); 2796 2797 out: 2798 return ret; 2799 } 2800 2801 /* takes care of both newroute and delroute */ 2802 static int ipmr_rtm_route(struct sk_buff *skb, struct nlmsghdr *nlh, 2803 struct netlink_ext_ack *extack) 2804 { 2805 struct net *net = sock_net(skb->sk); 2806 int ret, mrtsock, parent; 2807 struct mr_table *tbl; 2808 struct mfcctl mfcc; 2809 2810 mrtsock = 0; 2811 tbl = NULL; 2812 ret = rtm_to_ipmr_mfcc(net, nlh, &mfcc, &mrtsock, &tbl, extack); 2813 if (ret < 0) 2814 return ret; 2815 2816 parent = ret ? mfcc.mfcc_parent : -1; 2817 if (nlh->nlmsg_type == RTM_NEWROUTE) 2818 return ipmr_mfc_add(net, tbl, &mfcc, mrtsock, parent); 2819 else 2820 return ipmr_mfc_delete(tbl, &mfcc, parent); 2821 } 2822 2823 static bool ipmr_fill_table(struct mr_table *mrt, struct sk_buff *skb) 2824 { 2825 u32 queue_len = atomic_read(&mrt->cache_resolve_queue_len); 2826 2827 if (nla_put_u32(skb, IPMRA_TABLE_ID, mrt->id) || 2828 nla_put_u32(skb, IPMRA_TABLE_CACHE_RES_QUEUE_LEN, queue_len) || 2829 nla_put_s32(skb, IPMRA_TABLE_MROUTE_REG_VIF_NUM, 2830 mrt->mroute_reg_vif_num) || 2831 nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_ASSERT, 2832 mrt->mroute_do_assert) || 2833 nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_PIM, mrt->mroute_do_pim)) 2834 return false; 2835 2836 return true; 2837 } 2838 2839 static bool ipmr_fill_vif(struct mr_table *mrt, u32 vifid, struct sk_buff *skb) 2840 { 2841 struct nlattr *vif_nest; 2842 struct vif_device *vif; 2843 2844 /* if the VIF doesn't exist just continue */ 2845 if (!VIF_EXISTS(mrt, vifid)) 2846 return true; 2847 2848 vif = &mrt->vif_table[vifid]; 2849 vif_nest = nla_nest_start(skb, IPMRA_VIF); 2850 if (!vif_nest) 2851 return false; 2852 if (nla_put_u32(skb, IPMRA_VIFA_IFINDEX, vif->dev->ifindex) || 2853 nla_put_u32(skb, IPMRA_VIFA_VIF_ID, vifid) || 2854 nla_put_u16(skb, IPMRA_VIFA_FLAGS, vif->flags) || 2855 nla_put_u64_64bit(skb, IPMRA_VIFA_BYTES_IN, vif->bytes_in, 2856 IPMRA_VIFA_PAD) || 2857 nla_put_u64_64bit(skb, IPMRA_VIFA_BYTES_OUT, vif->bytes_out, 2858 IPMRA_VIFA_PAD) || 2859 nla_put_u64_64bit(skb, IPMRA_VIFA_PACKETS_IN, vif->pkt_in, 2860 IPMRA_VIFA_PAD) || 2861 nla_put_u64_64bit(skb, IPMRA_VIFA_PACKETS_OUT, vif->pkt_out, 2862 IPMRA_VIFA_PAD) || 2863 nla_put_be32(skb, IPMRA_VIFA_LOCAL_ADDR, vif->local) || 2864 nla_put_be32(skb, IPMRA_VIFA_REMOTE_ADDR, vif->remote)) { 2865 nla_nest_cancel(skb, vif_nest); 2866 return false; 2867 } 2868 nla_nest_end(skb, vif_nest); 2869 2870 return true; 2871 } 2872 2873 static int ipmr_rtm_dumplink(struct sk_buff *skb, struct netlink_callback *cb) 2874 { 2875 struct net *net = sock_net(skb->sk); 2876 struct nlmsghdr *nlh = NULL; 2877 unsigned int t = 0, s_t; 2878 unsigned int e = 0, s_e; 2879 struct mr_table *mrt; 2880 2881 s_t = cb->args[0]; 2882 s_e = cb->args[1]; 2883 2884 ipmr_for_each_table(mrt, net) { 2885 struct nlattr *vifs, *af; 2886 struct ifinfomsg *hdr; 2887 u32 i; 2888 2889 if (t < s_t) 2890 goto skip_table; 2891 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, 2892 cb->nlh->nlmsg_seq, RTM_NEWLINK, 2893 sizeof(*hdr), NLM_F_MULTI); 2894 if (!nlh) 2895 break; 2896 2897 hdr = nlmsg_data(nlh); 2898 memset(hdr, 0, sizeof(*hdr)); 2899 hdr->ifi_family = RTNL_FAMILY_IPMR; 2900 2901 af = nla_nest_start(skb, IFLA_AF_SPEC); 2902 if (!af) { 2903 nlmsg_cancel(skb, nlh); 2904 goto out; 2905 } 2906 2907 if (!ipmr_fill_table(mrt, skb)) { 2908 nlmsg_cancel(skb, nlh); 2909 goto out; 2910 } 2911 2912 vifs = nla_nest_start(skb, IPMRA_TABLE_VIFS); 2913 if (!vifs) { 2914 nla_nest_end(skb, af); 2915 nlmsg_end(skb, nlh); 2916 goto out; 2917 } 2918 for (i = 0; i < mrt->maxvif; i++) { 2919 if (e < s_e) 2920 goto skip_entry; 2921 if (!ipmr_fill_vif(mrt, i, skb)) { 2922 nla_nest_end(skb, vifs); 2923 nla_nest_end(skb, af); 2924 nlmsg_end(skb, nlh); 2925 goto out; 2926 } 2927 skip_entry: 2928 e++; 2929 } 2930 s_e = 0; 2931 e = 0; 2932 nla_nest_end(skb, vifs); 2933 nla_nest_end(skb, af); 2934 nlmsg_end(skb, nlh); 2935 skip_table: 2936 t++; 2937 } 2938 2939 out: 2940 cb->args[1] = e; 2941 cb->args[0] = t; 2942 2943 return skb->len; 2944 } 2945 2946 #ifdef CONFIG_PROC_FS 2947 /* The /proc interfaces to multicast routing : 2948 * /proc/net/ip_mr_cache & /proc/net/ip_mr_vif 2949 */ 2950 struct ipmr_vif_iter { 2951 struct seq_net_private p; 2952 struct mr_table *mrt; 2953 int ct; 2954 }; 2955 2956 static struct vif_device *ipmr_vif_seq_idx(struct net *net, 2957 struct ipmr_vif_iter *iter, 2958 loff_t pos) 2959 { 2960 struct mr_table *mrt = iter->mrt; 2961 2962 for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) { 2963 if (!VIF_EXISTS(mrt, iter->ct)) 2964 continue; 2965 if (pos-- == 0) 2966 return &mrt->vif_table[iter->ct]; 2967 } 2968 return NULL; 2969 } 2970 2971 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos) 2972 __acquires(mrt_lock) 2973 { 2974 struct ipmr_vif_iter *iter = seq->private; 2975 struct net *net = seq_file_net(seq); 2976 struct mr_table *mrt; 2977 2978 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT); 2979 if (!mrt) 2980 return ERR_PTR(-ENOENT); 2981 2982 iter->mrt = mrt; 2983 2984 read_lock(&mrt_lock); 2985 return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1) 2986 : SEQ_START_TOKEN; 2987 } 2988 2989 static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2990 { 2991 struct ipmr_vif_iter *iter = seq->private; 2992 struct net *net = seq_file_net(seq); 2993 struct mr_table *mrt = iter->mrt; 2994 2995 ++*pos; 2996 if (v == SEQ_START_TOKEN) 2997 return ipmr_vif_seq_idx(net, iter, 0); 2998 2999 while (++iter->ct < mrt->maxvif) { 3000 if (!VIF_EXISTS(mrt, iter->ct)) 3001 continue; 3002 return &mrt->vif_table[iter->ct]; 3003 } 3004 return NULL; 3005 } 3006 3007 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v) 3008 __releases(mrt_lock) 3009 { 3010 read_unlock(&mrt_lock); 3011 } 3012 3013 static int ipmr_vif_seq_show(struct seq_file *seq, void *v) 3014 { 3015 struct ipmr_vif_iter *iter = seq->private; 3016 struct mr_table *mrt = iter->mrt; 3017 3018 if (v == SEQ_START_TOKEN) { 3019 seq_puts(seq, 3020 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n"); 3021 } else { 3022 const struct vif_device *vif = v; 3023 const char *name = vif->dev ? vif->dev->name : "none"; 3024 3025 seq_printf(seq, 3026 "%2zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n", 3027 vif - mrt->vif_table, 3028 name, vif->bytes_in, vif->pkt_in, 3029 vif->bytes_out, vif->pkt_out, 3030 vif->flags, vif->local, vif->remote); 3031 } 3032 return 0; 3033 } 3034 3035 static const struct seq_operations ipmr_vif_seq_ops = { 3036 .start = ipmr_vif_seq_start, 3037 .next = ipmr_vif_seq_next, 3038 .stop = ipmr_vif_seq_stop, 3039 .show = ipmr_vif_seq_show, 3040 }; 3041 3042 static int ipmr_vif_open(struct inode *inode, struct file *file) 3043 { 3044 return seq_open_net(inode, file, &ipmr_vif_seq_ops, 3045 sizeof(struct ipmr_vif_iter)); 3046 } 3047 3048 static const struct file_operations ipmr_vif_fops = { 3049 .owner = THIS_MODULE, 3050 .open = ipmr_vif_open, 3051 .read = seq_read, 3052 .llseek = seq_lseek, 3053 .release = seq_release_net, 3054 }; 3055 3056 struct ipmr_mfc_iter { 3057 struct seq_net_private p; 3058 struct mr_table *mrt; 3059 struct list_head *cache; 3060 }; 3061 3062 static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net, 3063 struct ipmr_mfc_iter *it, loff_t pos) 3064 { 3065 struct mr_table *mrt = it->mrt; 3066 struct mfc_cache *mfc; 3067 3068 rcu_read_lock(); 3069 it->cache = &mrt->mfc_cache_list; 3070 list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) 3071 if (pos-- == 0) 3072 return mfc; 3073 rcu_read_unlock(); 3074 3075 spin_lock_bh(&mfc_unres_lock); 3076 it->cache = &mrt->mfc_unres_queue; 3077 list_for_each_entry(mfc, it->cache, list) 3078 if (pos-- == 0) 3079 return mfc; 3080 spin_unlock_bh(&mfc_unres_lock); 3081 3082 it->cache = NULL; 3083 return NULL; 3084 } 3085 3086 3087 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) 3088 { 3089 struct ipmr_mfc_iter *it = seq->private; 3090 struct net *net = seq_file_net(seq); 3091 struct mr_table *mrt; 3092 3093 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT); 3094 if (!mrt) 3095 return ERR_PTR(-ENOENT); 3096 3097 it->mrt = mrt; 3098 it->cache = NULL; 3099 return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1) 3100 : SEQ_START_TOKEN; 3101 } 3102 3103 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos) 3104 { 3105 struct ipmr_mfc_iter *it = seq->private; 3106 struct net *net = seq_file_net(seq); 3107 struct mr_table *mrt = it->mrt; 3108 struct mfc_cache *mfc = v; 3109 3110 ++*pos; 3111 3112 if (v == SEQ_START_TOKEN) 3113 return ipmr_mfc_seq_idx(net, seq->private, 0); 3114 3115 if (mfc->list.next != it->cache) 3116 return list_entry(mfc->list.next, struct mfc_cache, list); 3117 3118 if (it->cache == &mrt->mfc_unres_queue) 3119 goto end_of_list; 3120 3121 /* exhausted cache_array, show unresolved */ 3122 rcu_read_unlock(); 3123 it->cache = &mrt->mfc_unres_queue; 3124 3125 spin_lock_bh(&mfc_unres_lock); 3126 if (!list_empty(it->cache)) 3127 return list_first_entry(it->cache, struct mfc_cache, list); 3128 3129 end_of_list: 3130 spin_unlock_bh(&mfc_unres_lock); 3131 it->cache = NULL; 3132 3133 return NULL; 3134 } 3135 3136 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v) 3137 { 3138 struct ipmr_mfc_iter *it = seq->private; 3139 struct mr_table *mrt = it->mrt; 3140 3141 if (it->cache == &mrt->mfc_unres_queue) 3142 spin_unlock_bh(&mfc_unres_lock); 3143 else if (it->cache == &mrt->mfc_cache_list) 3144 rcu_read_unlock(); 3145 } 3146 3147 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) 3148 { 3149 int n; 3150 3151 if (v == SEQ_START_TOKEN) { 3152 seq_puts(seq, 3153 "Group Origin Iif Pkts Bytes Wrong Oifs\n"); 3154 } else { 3155 const struct mfc_cache *mfc = v; 3156 const struct ipmr_mfc_iter *it = seq->private; 3157 const struct mr_table *mrt = it->mrt; 3158 3159 seq_printf(seq, "%08X %08X %-3hd", 3160 (__force u32) mfc->mfc_mcastgrp, 3161 (__force u32) mfc->mfc_origin, 3162 mfc->mfc_parent); 3163 3164 if (it->cache != &mrt->mfc_unres_queue) { 3165 seq_printf(seq, " %8lu %8lu %8lu", 3166 mfc->mfc_un.res.pkt, 3167 mfc->mfc_un.res.bytes, 3168 mfc->mfc_un.res.wrong_if); 3169 for (n = mfc->mfc_un.res.minvif; 3170 n < mfc->mfc_un.res.maxvif; n++) { 3171 if (VIF_EXISTS(mrt, n) && 3172 mfc->mfc_un.res.ttls[n] < 255) 3173 seq_printf(seq, 3174 " %2d:%-3d", 3175 n, mfc->mfc_un.res.ttls[n]); 3176 } 3177 } else { 3178 /* unresolved mfc_caches don't contain 3179 * pkt, bytes and wrong_if values 3180 */ 3181 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul); 3182 } 3183 seq_putc(seq, '\n'); 3184 } 3185 return 0; 3186 } 3187 3188 static const struct seq_operations ipmr_mfc_seq_ops = { 3189 .start = ipmr_mfc_seq_start, 3190 .next = ipmr_mfc_seq_next, 3191 .stop = ipmr_mfc_seq_stop, 3192 .show = ipmr_mfc_seq_show, 3193 }; 3194 3195 static int ipmr_mfc_open(struct inode *inode, struct file *file) 3196 { 3197 return seq_open_net(inode, file, &ipmr_mfc_seq_ops, 3198 sizeof(struct ipmr_mfc_iter)); 3199 } 3200 3201 static const struct file_operations ipmr_mfc_fops = { 3202 .owner = THIS_MODULE, 3203 .open = ipmr_mfc_open, 3204 .read = seq_read, 3205 .llseek = seq_lseek, 3206 .release = seq_release_net, 3207 }; 3208 #endif 3209 3210 #ifdef CONFIG_IP_PIMSM_V2 3211 static const struct net_protocol pim_protocol = { 3212 .handler = pim_rcv, 3213 .netns_ok = 1, 3214 }; 3215 #endif 3216 3217 static unsigned int ipmr_seq_read(struct net *net) 3218 { 3219 ASSERT_RTNL(); 3220 3221 return net->ipv4.ipmr_seq + ipmr_rules_seq_read(net); 3222 } 3223 3224 static int ipmr_dump(struct net *net, struct notifier_block *nb) 3225 { 3226 struct mr_table *mrt; 3227 int err; 3228 3229 err = ipmr_rules_dump(net, nb); 3230 if (err) 3231 return err; 3232 3233 ipmr_for_each_table(mrt, net) { 3234 struct vif_device *v = &mrt->vif_table[0]; 3235 struct mfc_cache *mfc; 3236 int vifi; 3237 3238 /* Notifiy on table VIF entries */ 3239 read_lock(&mrt_lock); 3240 for (vifi = 0; vifi < mrt->maxvif; vifi++, v++) { 3241 if (!v->dev) 3242 continue; 3243 3244 call_ipmr_vif_entry_notifier(nb, net, FIB_EVENT_VIF_ADD, 3245 v, vifi, mrt->id); 3246 } 3247 read_unlock(&mrt_lock); 3248 3249 /* Notify on table MFC entries */ 3250 list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) 3251 call_ipmr_mfc_entry_notifier(nb, net, 3252 FIB_EVENT_ENTRY_ADD, mfc, 3253 mrt->id); 3254 } 3255 3256 return 0; 3257 } 3258 3259 static const struct fib_notifier_ops ipmr_notifier_ops_template = { 3260 .family = RTNL_FAMILY_IPMR, 3261 .fib_seq_read = ipmr_seq_read, 3262 .fib_dump = ipmr_dump, 3263 .owner = THIS_MODULE, 3264 }; 3265 3266 static int __net_init ipmr_notifier_init(struct net *net) 3267 { 3268 struct fib_notifier_ops *ops; 3269 3270 net->ipv4.ipmr_seq = 0; 3271 3272 ops = fib_notifier_ops_register(&ipmr_notifier_ops_template, net); 3273 if (IS_ERR(ops)) 3274 return PTR_ERR(ops); 3275 net->ipv4.ipmr_notifier_ops = ops; 3276 3277 return 0; 3278 } 3279 3280 static void __net_exit ipmr_notifier_exit(struct net *net) 3281 { 3282 fib_notifier_ops_unregister(net->ipv4.ipmr_notifier_ops); 3283 net->ipv4.ipmr_notifier_ops = NULL; 3284 } 3285 3286 /* Setup for IP multicast routing */ 3287 static int __net_init ipmr_net_init(struct net *net) 3288 { 3289 int err; 3290 3291 err = ipmr_notifier_init(net); 3292 if (err) 3293 goto ipmr_notifier_fail; 3294 3295 err = ipmr_rules_init(net); 3296 if (err < 0) 3297 goto ipmr_rules_fail; 3298 3299 #ifdef CONFIG_PROC_FS 3300 err = -ENOMEM; 3301 if (!proc_create("ip_mr_vif", 0, net->proc_net, &ipmr_vif_fops)) 3302 goto proc_vif_fail; 3303 if (!proc_create("ip_mr_cache", 0, net->proc_net, &ipmr_mfc_fops)) 3304 goto proc_cache_fail; 3305 #endif 3306 return 0; 3307 3308 #ifdef CONFIG_PROC_FS 3309 proc_cache_fail: 3310 remove_proc_entry("ip_mr_vif", net->proc_net); 3311 proc_vif_fail: 3312 ipmr_rules_exit(net); 3313 #endif 3314 ipmr_rules_fail: 3315 ipmr_notifier_exit(net); 3316 ipmr_notifier_fail: 3317 return err; 3318 } 3319 3320 static void __net_exit ipmr_net_exit(struct net *net) 3321 { 3322 #ifdef CONFIG_PROC_FS 3323 remove_proc_entry("ip_mr_cache", net->proc_net); 3324 remove_proc_entry("ip_mr_vif", net->proc_net); 3325 #endif 3326 ipmr_notifier_exit(net); 3327 ipmr_rules_exit(net); 3328 } 3329 3330 static struct pernet_operations ipmr_net_ops = { 3331 .init = ipmr_net_init, 3332 .exit = ipmr_net_exit, 3333 }; 3334 3335 int __init ip_mr_init(void) 3336 { 3337 int err; 3338 3339 mrt_cachep = kmem_cache_create("ip_mrt_cache", 3340 sizeof(struct mfc_cache), 3341 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, 3342 NULL); 3343 3344 err = register_pernet_subsys(&ipmr_net_ops); 3345 if (err) 3346 goto reg_pernet_fail; 3347 3348 err = register_netdevice_notifier(&ip_mr_notifier); 3349 if (err) 3350 goto reg_notif_fail; 3351 #ifdef CONFIG_IP_PIMSM_V2 3352 if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) { 3353 pr_err("%s: can't add PIM protocol\n", __func__); 3354 err = -EAGAIN; 3355 goto add_proto_fail; 3356 } 3357 #endif 3358 rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE, 3359 ipmr_rtm_getroute, ipmr_rtm_dumproute, 0); 3360 rtnl_register(RTNL_FAMILY_IPMR, RTM_NEWROUTE, 3361 ipmr_rtm_route, NULL, 0); 3362 rtnl_register(RTNL_FAMILY_IPMR, RTM_DELROUTE, 3363 ipmr_rtm_route, NULL, 0); 3364 3365 rtnl_register(RTNL_FAMILY_IPMR, RTM_GETLINK, 3366 NULL, ipmr_rtm_dumplink, 0); 3367 return 0; 3368 3369 #ifdef CONFIG_IP_PIMSM_V2 3370 add_proto_fail: 3371 unregister_netdevice_notifier(&ip_mr_notifier); 3372 #endif 3373 reg_notif_fail: 3374 unregister_pernet_subsys(&ipmr_net_ops); 3375 reg_pernet_fail: 3376 kmem_cache_destroy(mrt_cachep); 3377 return err; 3378 } 3379