1 /* 2 * IP multicast routing support for mrouted 3.6/3.8 3 * 4 * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk> 5 * Linux Consultancy and Custom Driver Development 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License 9 * as published by the Free Software Foundation; either version 10 * 2 of the License, or (at your option) any later version. 11 * 12 * Fixes: 13 * Michael Chastain : Incorrect size of copying. 14 * Alan Cox : Added the cache manager code 15 * Alan Cox : Fixed the clone/copy bug and device race. 16 * Mike McLagan : Routing by source 17 * Malcolm Beattie : Buffer handling fixes. 18 * Alexey Kuznetsov : Double buffer free and other fixes. 19 * SVR Anand : Fixed several multicast bugs and problems. 20 * Alexey Kuznetsov : Status, optimisations and more. 21 * Brad Parker : Better behaviour on mrouted upcall 22 * overflow. 23 * Carlos Picoto : PIMv1 Support 24 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header 25 * Relax this requirement to work with older peers. 26 * 27 */ 28 29 #include <linux/uaccess.h> 30 #include <linux/types.h> 31 #include <linux/capability.h> 32 #include <linux/errno.h> 33 #include <linux/timer.h> 34 #include <linux/mm.h> 35 #include <linux/kernel.h> 36 #include <linux/fcntl.h> 37 #include <linux/stat.h> 38 #include <linux/socket.h> 39 #include <linux/in.h> 40 #include <linux/inet.h> 41 #include <linux/netdevice.h> 42 #include <linux/inetdevice.h> 43 #include <linux/igmp.h> 44 #include <linux/proc_fs.h> 45 #include <linux/seq_file.h> 46 #include <linux/mroute.h> 47 #include <linux/init.h> 48 #include <linux/if_ether.h> 49 #include <linux/slab.h> 50 #include <net/net_namespace.h> 51 #include <net/ip.h> 52 #include <net/protocol.h> 53 #include <linux/skbuff.h> 54 #include <net/route.h> 55 #include <net/sock.h> 56 #include <net/icmp.h> 57 #include <net/udp.h> 58 #include <net/raw.h> 59 #include <linux/notifier.h> 60 #include <linux/if_arp.h> 61 #include <linux/netfilter_ipv4.h> 62 #include <linux/compat.h> 63 #include <linux/export.h> 64 #include <net/ip_tunnels.h> 65 #include <net/checksum.h> 66 #include <net/netlink.h> 67 #include <net/fib_rules.h> 68 #include <linux/netconf.h> 69 #include <net/nexthop.h> 70 #include <net/switchdev.h> 71 72 struct ipmr_rule { 73 struct fib_rule common; 74 }; 75 76 struct ipmr_result { 77 struct mr_table *mrt; 78 }; 79 80 /* Big lock, protecting vif table, mrt cache and mroute socket state. 81 * Note that the changes are semaphored via rtnl_lock. 82 */ 83 84 static DEFINE_RWLOCK(mrt_lock); 85 86 /* Multicast router control variables */ 87 88 /* Special spinlock for queue of unresolved entries */ 89 static DEFINE_SPINLOCK(mfc_unres_lock); 90 91 /* We return to original Alan's scheme. Hash table of resolved 92 * entries is changed only in process context and protected 93 * with weak lock mrt_lock. Queue of unresolved entries is protected 94 * with strong spinlock mfc_unres_lock. 95 * 96 * In this case data path is free of exclusive locks at all. 97 */ 98 99 static struct kmem_cache *mrt_cachep __read_mostly; 100 101 static struct mr_table *ipmr_new_table(struct net *net, u32 id); 102 static void ipmr_free_table(struct mr_table *mrt); 103 104 static void ip_mr_forward(struct net *net, struct mr_table *mrt, 105 struct net_device *dev, struct sk_buff *skb, 106 struct mfc_cache *cache, int local); 107 static int ipmr_cache_report(struct mr_table *mrt, 108 struct sk_buff *pkt, vifi_t vifi, int assert); 109 static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, 110 struct mfc_cache *c, struct rtmsg *rtm); 111 static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc, 112 int cmd); 113 static void igmpmsg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt); 114 static void mroute_clean_tables(struct mr_table *mrt, bool all); 115 static void ipmr_expire_process(struct timer_list *t); 116 117 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES 118 #define ipmr_for_each_table(mrt, net) \ 119 list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list) 120 121 static struct mr_table *ipmr_get_table(struct net *net, u32 id) 122 { 123 struct mr_table *mrt; 124 125 ipmr_for_each_table(mrt, net) { 126 if (mrt->id == id) 127 return mrt; 128 } 129 return NULL; 130 } 131 132 static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4, 133 struct mr_table **mrt) 134 { 135 int err; 136 struct ipmr_result res; 137 struct fib_lookup_arg arg = { 138 .result = &res, 139 .flags = FIB_LOOKUP_NOREF, 140 }; 141 142 /* update flow if oif or iif point to device enslaved to l3mdev */ 143 l3mdev_update_flow(net, flowi4_to_flowi(flp4)); 144 145 err = fib_rules_lookup(net->ipv4.mr_rules_ops, 146 flowi4_to_flowi(flp4), 0, &arg); 147 if (err < 0) 148 return err; 149 *mrt = res.mrt; 150 return 0; 151 } 152 153 static int ipmr_rule_action(struct fib_rule *rule, struct flowi *flp, 154 int flags, struct fib_lookup_arg *arg) 155 { 156 struct ipmr_result *res = arg->result; 157 struct mr_table *mrt; 158 159 switch (rule->action) { 160 case FR_ACT_TO_TBL: 161 break; 162 case FR_ACT_UNREACHABLE: 163 return -ENETUNREACH; 164 case FR_ACT_PROHIBIT: 165 return -EACCES; 166 case FR_ACT_BLACKHOLE: 167 default: 168 return -EINVAL; 169 } 170 171 arg->table = fib_rule_get_table(rule, arg); 172 173 mrt = ipmr_get_table(rule->fr_net, arg->table); 174 if (!mrt) 175 return -EAGAIN; 176 res->mrt = mrt; 177 return 0; 178 } 179 180 static int ipmr_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) 181 { 182 return 1; 183 } 184 185 static const struct nla_policy ipmr_rule_policy[FRA_MAX + 1] = { 186 FRA_GENERIC_POLICY, 187 }; 188 189 static int ipmr_rule_configure(struct fib_rule *rule, struct sk_buff *skb, 190 struct fib_rule_hdr *frh, struct nlattr **tb) 191 { 192 return 0; 193 } 194 195 static int ipmr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, 196 struct nlattr **tb) 197 { 198 return 1; 199 } 200 201 static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb, 202 struct fib_rule_hdr *frh) 203 { 204 frh->dst_len = 0; 205 frh->src_len = 0; 206 frh->tos = 0; 207 return 0; 208 } 209 210 static const struct fib_rules_ops __net_initconst ipmr_rules_ops_template = { 211 .family = RTNL_FAMILY_IPMR, 212 .rule_size = sizeof(struct ipmr_rule), 213 .addr_size = sizeof(u32), 214 .action = ipmr_rule_action, 215 .match = ipmr_rule_match, 216 .configure = ipmr_rule_configure, 217 .compare = ipmr_rule_compare, 218 .fill = ipmr_rule_fill, 219 .nlgroup = RTNLGRP_IPV4_RULE, 220 .policy = ipmr_rule_policy, 221 .owner = THIS_MODULE, 222 }; 223 224 static int __net_init ipmr_rules_init(struct net *net) 225 { 226 struct fib_rules_ops *ops; 227 struct mr_table *mrt; 228 int err; 229 230 ops = fib_rules_register(&ipmr_rules_ops_template, net); 231 if (IS_ERR(ops)) 232 return PTR_ERR(ops); 233 234 INIT_LIST_HEAD(&net->ipv4.mr_tables); 235 236 mrt = ipmr_new_table(net, RT_TABLE_DEFAULT); 237 if (IS_ERR(mrt)) { 238 err = PTR_ERR(mrt); 239 goto err1; 240 } 241 242 err = fib_default_rule_add(ops, 0x7fff, RT_TABLE_DEFAULT, 0); 243 if (err < 0) 244 goto err2; 245 246 net->ipv4.mr_rules_ops = ops; 247 return 0; 248 249 err2: 250 ipmr_free_table(mrt); 251 err1: 252 fib_rules_unregister(ops); 253 return err; 254 } 255 256 static void __net_exit ipmr_rules_exit(struct net *net) 257 { 258 struct mr_table *mrt, *next; 259 260 rtnl_lock(); 261 list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) { 262 list_del(&mrt->list); 263 ipmr_free_table(mrt); 264 } 265 fib_rules_unregister(net->ipv4.mr_rules_ops); 266 rtnl_unlock(); 267 } 268 269 static int ipmr_rules_dump(struct net *net, struct notifier_block *nb) 270 { 271 return fib_rules_dump(net, nb, RTNL_FAMILY_IPMR); 272 } 273 274 static unsigned int ipmr_rules_seq_read(struct net *net) 275 { 276 return fib_rules_seq_read(net, RTNL_FAMILY_IPMR); 277 } 278 279 bool ipmr_rule_default(const struct fib_rule *rule) 280 { 281 return fib_rule_matchall(rule) && rule->table == RT_TABLE_DEFAULT; 282 } 283 EXPORT_SYMBOL(ipmr_rule_default); 284 #else 285 #define ipmr_for_each_table(mrt, net) \ 286 for (mrt = net->ipv4.mrt; mrt; mrt = NULL) 287 288 static struct mr_table *ipmr_get_table(struct net *net, u32 id) 289 { 290 return net->ipv4.mrt; 291 } 292 293 static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4, 294 struct mr_table **mrt) 295 { 296 *mrt = net->ipv4.mrt; 297 return 0; 298 } 299 300 static int __net_init ipmr_rules_init(struct net *net) 301 { 302 struct mr_table *mrt; 303 304 mrt = ipmr_new_table(net, RT_TABLE_DEFAULT); 305 if (IS_ERR(mrt)) 306 return PTR_ERR(mrt); 307 net->ipv4.mrt = mrt; 308 return 0; 309 } 310 311 static void __net_exit ipmr_rules_exit(struct net *net) 312 { 313 rtnl_lock(); 314 ipmr_free_table(net->ipv4.mrt); 315 net->ipv4.mrt = NULL; 316 rtnl_unlock(); 317 } 318 319 static int ipmr_rules_dump(struct net *net, struct notifier_block *nb) 320 { 321 return 0; 322 } 323 324 static unsigned int ipmr_rules_seq_read(struct net *net) 325 { 326 return 0; 327 } 328 329 bool ipmr_rule_default(const struct fib_rule *rule) 330 { 331 return true; 332 } 333 EXPORT_SYMBOL(ipmr_rule_default); 334 #endif 335 336 static inline int ipmr_hash_cmp(struct rhashtable_compare_arg *arg, 337 const void *ptr) 338 { 339 const struct mfc_cache_cmp_arg *cmparg = arg->key; 340 struct mfc_cache *c = (struct mfc_cache *)ptr; 341 342 return cmparg->mfc_mcastgrp != c->mfc_mcastgrp || 343 cmparg->mfc_origin != c->mfc_origin; 344 } 345 346 static const struct rhashtable_params ipmr_rht_params = { 347 .head_offset = offsetof(struct mfc_cache, mnode), 348 .key_offset = offsetof(struct mfc_cache, cmparg), 349 .key_len = sizeof(struct mfc_cache_cmp_arg), 350 .nelem_hint = 3, 351 .locks_mul = 1, 352 .obj_cmpfn = ipmr_hash_cmp, 353 .automatic_shrinking = true, 354 }; 355 356 static struct mr_table *ipmr_new_table(struct net *net, u32 id) 357 { 358 struct mr_table *mrt; 359 360 /* "pimreg%u" should not exceed 16 bytes (IFNAMSIZ) */ 361 if (id != RT_TABLE_DEFAULT && id >= 1000000000) 362 return ERR_PTR(-EINVAL); 363 364 mrt = ipmr_get_table(net, id); 365 if (mrt) 366 return mrt; 367 368 mrt = kzalloc(sizeof(*mrt), GFP_KERNEL); 369 if (!mrt) 370 return ERR_PTR(-ENOMEM); 371 write_pnet(&mrt->net, net); 372 mrt->id = id; 373 374 rhltable_init(&mrt->mfc_hash, &ipmr_rht_params); 375 INIT_LIST_HEAD(&mrt->mfc_cache_list); 376 INIT_LIST_HEAD(&mrt->mfc_unres_queue); 377 378 timer_setup(&mrt->ipmr_expire_timer, ipmr_expire_process, 0); 379 380 mrt->mroute_reg_vif_num = -1; 381 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES 382 list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables); 383 #endif 384 return mrt; 385 } 386 387 static void ipmr_free_table(struct mr_table *mrt) 388 { 389 del_timer_sync(&mrt->ipmr_expire_timer); 390 mroute_clean_tables(mrt, true); 391 rhltable_destroy(&mrt->mfc_hash); 392 kfree(mrt); 393 } 394 395 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */ 396 397 static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v) 398 { 399 struct net *net = dev_net(dev); 400 401 dev_close(dev); 402 403 dev = __dev_get_by_name(net, "tunl0"); 404 if (dev) { 405 const struct net_device_ops *ops = dev->netdev_ops; 406 struct ifreq ifr; 407 struct ip_tunnel_parm p; 408 409 memset(&p, 0, sizeof(p)); 410 p.iph.daddr = v->vifc_rmt_addr.s_addr; 411 p.iph.saddr = v->vifc_lcl_addr.s_addr; 412 p.iph.version = 4; 413 p.iph.ihl = 5; 414 p.iph.protocol = IPPROTO_IPIP; 415 sprintf(p.name, "dvmrp%d", v->vifc_vifi); 416 ifr.ifr_ifru.ifru_data = (__force void __user *)&p; 417 418 if (ops->ndo_do_ioctl) { 419 mm_segment_t oldfs = get_fs(); 420 421 set_fs(KERNEL_DS); 422 ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL); 423 set_fs(oldfs); 424 } 425 } 426 } 427 428 /* Initialize ipmr pimreg/tunnel in_device */ 429 static bool ipmr_init_vif_indev(const struct net_device *dev) 430 { 431 struct in_device *in_dev; 432 433 ASSERT_RTNL(); 434 435 in_dev = __in_dev_get_rtnl(dev); 436 if (!in_dev) 437 return false; 438 ipv4_devconf_setall(in_dev); 439 neigh_parms_data_state_setall(in_dev->arp_parms); 440 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0; 441 442 return true; 443 } 444 445 static struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v) 446 { 447 struct net_device *dev; 448 449 dev = __dev_get_by_name(net, "tunl0"); 450 451 if (dev) { 452 const struct net_device_ops *ops = dev->netdev_ops; 453 int err; 454 struct ifreq ifr; 455 struct ip_tunnel_parm p; 456 457 memset(&p, 0, sizeof(p)); 458 p.iph.daddr = v->vifc_rmt_addr.s_addr; 459 p.iph.saddr = v->vifc_lcl_addr.s_addr; 460 p.iph.version = 4; 461 p.iph.ihl = 5; 462 p.iph.protocol = IPPROTO_IPIP; 463 sprintf(p.name, "dvmrp%d", v->vifc_vifi); 464 ifr.ifr_ifru.ifru_data = (__force void __user *)&p; 465 466 if (ops->ndo_do_ioctl) { 467 mm_segment_t oldfs = get_fs(); 468 469 set_fs(KERNEL_DS); 470 err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL); 471 set_fs(oldfs); 472 } else { 473 err = -EOPNOTSUPP; 474 } 475 dev = NULL; 476 477 if (err == 0 && 478 (dev = __dev_get_by_name(net, p.name)) != NULL) { 479 dev->flags |= IFF_MULTICAST; 480 if (!ipmr_init_vif_indev(dev)) 481 goto failure; 482 if (dev_open(dev)) 483 goto failure; 484 dev_hold(dev); 485 } 486 } 487 return dev; 488 489 failure: 490 unregister_netdevice(dev); 491 return NULL; 492 } 493 494 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) 495 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) 496 { 497 struct net *net = dev_net(dev); 498 struct mr_table *mrt; 499 struct flowi4 fl4 = { 500 .flowi4_oif = dev->ifindex, 501 .flowi4_iif = skb->skb_iif ? : LOOPBACK_IFINDEX, 502 .flowi4_mark = skb->mark, 503 }; 504 int err; 505 506 err = ipmr_fib_lookup(net, &fl4, &mrt); 507 if (err < 0) { 508 kfree_skb(skb); 509 return err; 510 } 511 512 read_lock(&mrt_lock); 513 dev->stats.tx_bytes += skb->len; 514 dev->stats.tx_packets++; 515 ipmr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, IGMPMSG_WHOLEPKT); 516 read_unlock(&mrt_lock); 517 kfree_skb(skb); 518 return NETDEV_TX_OK; 519 } 520 521 static int reg_vif_get_iflink(const struct net_device *dev) 522 { 523 return 0; 524 } 525 526 static const struct net_device_ops reg_vif_netdev_ops = { 527 .ndo_start_xmit = reg_vif_xmit, 528 .ndo_get_iflink = reg_vif_get_iflink, 529 }; 530 531 static void reg_vif_setup(struct net_device *dev) 532 { 533 dev->type = ARPHRD_PIMREG; 534 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8; 535 dev->flags = IFF_NOARP; 536 dev->netdev_ops = ®_vif_netdev_ops; 537 dev->needs_free_netdev = true; 538 dev->features |= NETIF_F_NETNS_LOCAL; 539 } 540 541 static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt) 542 { 543 struct net_device *dev; 544 char name[IFNAMSIZ]; 545 546 if (mrt->id == RT_TABLE_DEFAULT) 547 sprintf(name, "pimreg"); 548 else 549 sprintf(name, "pimreg%u", mrt->id); 550 551 dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup); 552 553 if (!dev) 554 return NULL; 555 556 dev_net_set(dev, net); 557 558 if (register_netdevice(dev)) { 559 free_netdev(dev); 560 return NULL; 561 } 562 563 if (!ipmr_init_vif_indev(dev)) 564 goto failure; 565 if (dev_open(dev)) 566 goto failure; 567 568 dev_hold(dev); 569 570 return dev; 571 572 failure: 573 unregister_netdevice(dev); 574 return NULL; 575 } 576 577 /* called with rcu_read_lock() */ 578 static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb, 579 unsigned int pimlen) 580 { 581 struct net_device *reg_dev = NULL; 582 struct iphdr *encap; 583 584 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen); 585 /* Check that: 586 * a. packet is really sent to a multicast group 587 * b. packet is not a NULL-REGISTER 588 * c. packet is not truncated 589 */ 590 if (!ipv4_is_multicast(encap->daddr) || 591 encap->tot_len == 0 || 592 ntohs(encap->tot_len) + pimlen > skb->len) 593 return 1; 594 595 read_lock(&mrt_lock); 596 if (mrt->mroute_reg_vif_num >= 0) 597 reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev; 598 read_unlock(&mrt_lock); 599 600 if (!reg_dev) 601 return 1; 602 603 skb->mac_header = skb->network_header; 604 skb_pull(skb, (u8 *)encap - skb->data); 605 skb_reset_network_header(skb); 606 skb->protocol = htons(ETH_P_IP); 607 skb->ip_summed = CHECKSUM_NONE; 608 609 skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev)); 610 611 netif_rx(skb); 612 613 return NET_RX_SUCCESS; 614 } 615 #else 616 static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt) 617 { 618 return NULL; 619 } 620 #endif 621 622 static int call_ipmr_vif_entry_notifier(struct notifier_block *nb, 623 struct net *net, 624 enum fib_event_type event_type, 625 struct vif_device *vif, 626 vifi_t vif_index, u32 tb_id) 627 { 628 struct vif_entry_notifier_info info = { 629 .info = { 630 .family = RTNL_FAMILY_IPMR, 631 .net = net, 632 }, 633 .dev = vif->dev, 634 .vif_index = vif_index, 635 .vif_flags = vif->flags, 636 .tb_id = tb_id, 637 }; 638 639 return call_fib_notifier(nb, net, event_type, &info.info); 640 } 641 642 static int call_ipmr_vif_entry_notifiers(struct net *net, 643 enum fib_event_type event_type, 644 struct vif_device *vif, 645 vifi_t vif_index, u32 tb_id) 646 { 647 struct vif_entry_notifier_info info = { 648 .info = { 649 .family = RTNL_FAMILY_IPMR, 650 .net = net, 651 }, 652 .dev = vif->dev, 653 .vif_index = vif_index, 654 .vif_flags = vif->flags, 655 .tb_id = tb_id, 656 }; 657 658 ASSERT_RTNL(); 659 net->ipv4.ipmr_seq++; 660 return call_fib_notifiers(net, event_type, &info.info); 661 } 662 663 static int call_ipmr_mfc_entry_notifier(struct notifier_block *nb, 664 struct net *net, 665 enum fib_event_type event_type, 666 struct mfc_cache *mfc, u32 tb_id) 667 { 668 struct mfc_entry_notifier_info info = { 669 .info = { 670 .family = RTNL_FAMILY_IPMR, 671 .net = net, 672 }, 673 .mfc = mfc, 674 .tb_id = tb_id 675 }; 676 677 return call_fib_notifier(nb, net, event_type, &info.info); 678 } 679 680 static int call_ipmr_mfc_entry_notifiers(struct net *net, 681 enum fib_event_type event_type, 682 struct mfc_cache *mfc, u32 tb_id) 683 { 684 struct mfc_entry_notifier_info info = { 685 .info = { 686 .family = RTNL_FAMILY_IPMR, 687 .net = net, 688 }, 689 .mfc = mfc, 690 .tb_id = tb_id 691 }; 692 693 ASSERT_RTNL(); 694 net->ipv4.ipmr_seq++; 695 return call_fib_notifiers(net, event_type, &info.info); 696 } 697 698 /** 699 * vif_delete - Delete a VIF entry 700 * @notify: Set to 1, if the caller is a notifier_call 701 */ 702 static int vif_delete(struct mr_table *mrt, int vifi, int notify, 703 struct list_head *head) 704 { 705 struct net *net = read_pnet(&mrt->net); 706 struct vif_device *v; 707 struct net_device *dev; 708 struct in_device *in_dev; 709 710 if (vifi < 0 || vifi >= mrt->maxvif) 711 return -EADDRNOTAVAIL; 712 713 v = &mrt->vif_table[vifi]; 714 715 if (VIF_EXISTS(mrt, vifi)) 716 call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_DEL, v, vifi, 717 mrt->id); 718 719 write_lock_bh(&mrt_lock); 720 dev = v->dev; 721 v->dev = NULL; 722 723 if (!dev) { 724 write_unlock_bh(&mrt_lock); 725 return -EADDRNOTAVAIL; 726 } 727 728 if (vifi == mrt->mroute_reg_vif_num) 729 mrt->mroute_reg_vif_num = -1; 730 731 if (vifi + 1 == mrt->maxvif) { 732 int tmp; 733 734 for (tmp = vifi - 1; tmp >= 0; tmp--) { 735 if (VIF_EXISTS(mrt, tmp)) 736 break; 737 } 738 mrt->maxvif = tmp+1; 739 } 740 741 write_unlock_bh(&mrt_lock); 742 743 dev_set_allmulti(dev, -1); 744 745 in_dev = __in_dev_get_rtnl(dev); 746 if (in_dev) { 747 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--; 748 inet_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, 749 NETCONFA_MC_FORWARDING, 750 dev->ifindex, &in_dev->cnf); 751 ip_rt_multicast_event(in_dev); 752 } 753 754 if (v->flags & (VIFF_TUNNEL | VIFF_REGISTER) && !notify) 755 unregister_netdevice_queue(dev, head); 756 757 dev_put(dev); 758 return 0; 759 } 760 761 static void ipmr_cache_free_rcu(struct rcu_head *head) 762 { 763 struct mfc_cache *c = container_of(head, struct mfc_cache, rcu); 764 765 kmem_cache_free(mrt_cachep, c); 766 } 767 768 void ipmr_cache_free(struct mfc_cache *c) 769 { 770 call_rcu(&c->rcu, ipmr_cache_free_rcu); 771 } 772 EXPORT_SYMBOL(ipmr_cache_free); 773 774 /* Destroy an unresolved cache entry, killing queued skbs 775 * and reporting error to netlink readers. 776 */ 777 static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c) 778 { 779 struct net *net = read_pnet(&mrt->net); 780 struct sk_buff *skb; 781 struct nlmsgerr *e; 782 783 atomic_dec(&mrt->cache_resolve_queue_len); 784 785 while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) { 786 if (ip_hdr(skb)->version == 0) { 787 struct nlmsghdr *nlh = skb_pull(skb, 788 sizeof(struct iphdr)); 789 nlh->nlmsg_type = NLMSG_ERROR; 790 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr)); 791 skb_trim(skb, nlh->nlmsg_len); 792 e = nlmsg_data(nlh); 793 e->error = -ETIMEDOUT; 794 memset(&e->msg, 0, sizeof(e->msg)); 795 796 rtnl_unicast(skb, net, NETLINK_CB(skb).portid); 797 } else { 798 kfree_skb(skb); 799 } 800 } 801 802 ipmr_cache_free(c); 803 } 804 805 /* Timer process for the unresolved queue. */ 806 static void ipmr_expire_process(struct timer_list *t) 807 { 808 struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer); 809 unsigned long now; 810 unsigned long expires; 811 struct mfc_cache *c, *next; 812 813 if (!spin_trylock(&mfc_unres_lock)) { 814 mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10); 815 return; 816 } 817 818 if (list_empty(&mrt->mfc_unres_queue)) 819 goto out; 820 821 now = jiffies; 822 expires = 10*HZ; 823 824 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) { 825 if (time_after(c->mfc_un.unres.expires, now)) { 826 unsigned long interval = c->mfc_un.unres.expires - now; 827 if (interval < expires) 828 expires = interval; 829 continue; 830 } 831 832 list_del(&c->list); 833 mroute_netlink_event(mrt, c, RTM_DELROUTE); 834 ipmr_destroy_unres(mrt, c); 835 } 836 837 if (!list_empty(&mrt->mfc_unres_queue)) 838 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires); 839 840 out: 841 spin_unlock(&mfc_unres_lock); 842 } 843 844 /* Fill oifs list. It is called under write locked mrt_lock. */ 845 static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache, 846 unsigned char *ttls) 847 { 848 int vifi; 849 850 cache->mfc_un.res.minvif = MAXVIFS; 851 cache->mfc_un.res.maxvif = 0; 852 memset(cache->mfc_un.res.ttls, 255, MAXVIFS); 853 854 for (vifi = 0; vifi < mrt->maxvif; vifi++) { 855 if (VIF_EXISTS(mrt, vifi) && 856 ttls[vifi] && ttls[vifi] < 255) { 857 cache->mfc_un.res.ttls[vifi] = ttls[vifi]; 858 if (cache->mfc_un.res.minvif > vifi) 859 cache->mfc_un.res.minvif = vifi; 860 if (cache->mfc_un.res.maxvif <= vifi) 861 cache->mfc_un.res.maxvif = vifi + 1; 862 } 863 } 864 cache->mfc_un.res.lastuse = jiffies; 865 } 866 867 static int vif_add(struct net *net, struct mr_table *mrt, 868 struct vifctl *vifc, int mrtsock) 869 { 870 int vifi = vifc->vifc_vifi; 871 struct switchdev_attr attr = { 872 .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, 873 }; 874 struct vif_device *v = &mrt->vif_table[vifi]; 875 struct net_device *dev; 876 struct in_device *in_dev; 877 int err; 878 879 /* Is vif busy ? */ 880 if (VIF_EXISTS(mrt, vifi)) 881 return -EADDRINUSE; 882 883 switch (vifc->vifc_flags) { 884 case VIFF_REGISTER: 885 if (!ipmr_pimsm_enabled()) 886 return -EINVAL; 887 /* Special Purpose VIF in PIM 888 * All the packets will be sent to the daemon 889 */ 890 if (mrt->mroute_reg_vif_num >= 0) 891 return -EADDRINUSE; 892 dev = ipmr_reg_vif(net, mrt); 893 if (!dev) 894 return -ENOBUFS; 895 err = dev_set_allmulti(dev, 1); 896 if (err) { 897 unregister_netdevice(dev); 898 dev_put(dev); 899 return err; 900 } 901 break; 902 case VIFF_TUNNEL: 903 dev = ipmr_new_tunnel(net, vifc); 904 if (!dev) 905 return -ENOBUFS; 906 err = dev_set_allmulti(dev, 1); 907 if (err) { 908 ipmr_del_tunnel(dev, vifc); 909 dev_put(dev); 910 return err; 911 } 912 break; 913 case VIFF_USE_IFINDEX: 914 case 0: 915 if (vifc->vifc_flags == VIFF_USE_IFINDEX) { 916 dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex); 917 if (dev && !__in_dev_get_rtnl(dev)) { 918 dev_put(dev); 919 return -EADDRNOTAVAIL; 920 } 921 } else { 922 dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr); 923 } 924 if (!dev) 925 return -EADDRNOTAVAIL; 926 err = dev_set_allmulti(dev, 1); 927 if (err) { 928 dev_put(dev); 929 return err; 930 } 931 break; 932 default: 933 return -EINVAL; 934 } 935 936 in_dev = __in_dev_get_rtnl(dev); 937 if (!in_dev) { 938 dev_put(dev); 939 return -EADDRNOTAVAIL; 940 } 941 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++; 942 inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_MC_FORWARDING, 943 dev->ifindex, &in_dev->cnf); 944 ip_rt_multicast_event(in_dev); 945 946 /* Fill in the VIF structures */ 947 948 attr.orig_dev = dev; 949 if (!switchdev_port_attr_get(dev, &attr)) { 950 memcpy(v->dev_parent_id.id, attr.u.ppid.id, attr.u.ppid.id_len); 951 v->dev_parent_id.id_len = attr.u.ppid.id_len; 952 } else { 953 v->dev_parent_id.id_len = 0; 954 } 955 v->rate_limit = vifc->vifc_rate_limit; 956 v->local = vifc->vifc_lcl_addr.s_addr; 957 v->remote = vifc->vifc_rmt_addr.s_addr; 958 v->flags = vifc->vifc_flags; 959 if (!mrtsock) 960 v->flags |= VIFF_STATIC; 961 v->threshold = vifc->vifc_threshold; 962 v->bytes_in = 0; 963 v->bytes_out = 0; 964 v->pkt_in = 0; 965 v->pkt_out = 0; 966 v->link = dev->ifindex; 967 if (v->flags & (VIFF_TUNNEL | VIFF_REGISTER)) 968 v->link = dev_get_iflink(dev); 969 970 /* And finish update writing critical data */ 971 write_lock_bh(&mrt_lock); 972 v->dev = dev; 973 if (v->flags & VIFF_REGISTER) 974 mrt->mroute_reg_vif_num = vifi; 975 if (vifi+1 > mrt->maxvif) 976 mrt->maxvif = vifi+1; 977 write_unlock_bh(&mrt_lock); 978 call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD, v, vifi, mrt->id); 979 return 0; 980 } 981 982 /* called with rcu_read_lock() */ 983 static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt, 984 __be32 origin, 985 __be32 mcastgrp) 986 { 987 struct mfc_cache_cmp_arg arg = { 988 .mfc_mcastgrp = mcastgrp, 989 .mfc_origin = origin 990 }; 991 struct rhlist_head *tmp, *list; 992 struct mfc_cache *c; 993 994 list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params); 995 rhl_for_each_entry_rcu(c, tmp, list, mnode) 996 return c; 997 998 return NULL; 999 } 1000 1001 /* Look for a (*,*,oif) entry */ 1002 static struct mfc_cache *ipmr_cache_find_any_parent(struct mr_table *mrt, 1003 int vifi) 1004 { 1005 struct mfc_cache_cmp_arg arg = { 1006 .mfc_mcastgrp = htonl(INADDR_ANY), 1007 .mfc_origin = htonl(INADDR_ANY) 1008 }; 1009 struct rhlist_head *tmp, *list; 1010 struct mfc_cache *c; 1011 1012 list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params); 1013 rhl_for_each_entry_rcu(c, tmp, list, mnode) 1014 if (c->mfc_un.res.ttls[vifi] < 255) 1015 return c; 1016 1017 return NULL; 1018 } 1019 1020 /* Look for a (*,G) entry */ 1021 static struct mfc_cache *ipmr_cache_find_any(struct mr_table *mrt, 1022 __be32 mcastgrp, int vifi) 1023 { 1024 struct mfc_cache_cmp_arg arg = { 1025 .mfc_mcastgrp = mcastgrp, 1026 .mfc_origin = htonl(INADDR_ANY) 1027 }; 1028 struct rhlist_head *tmp, *list; 1029 struct mfc_cache *c, *proxy; 1030 1031 if (mcastgrp == htonl(INADDR_ANY)) 1032 goto skip; 1033 1034 list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params); 1035 rhl_for_each_entry_rcu(c, tmp, list, mnode) { 1036 if (c->mfc_un.res.ttls[vifi] < 255) 1037 return c; 1038 1039 /* It's ok if the vifi is part of the static tree */ 1040 proxy = ipmr_cache_find_any_parent(mrt, c->mfc_parent); 1041 if (proxy && proxy->mfc_un.res.ttls[vifi] < 255) 1042 return c; 1043 } 1044 1045 skip: 1046 return ipmr_cache_find_any_parent(mrt, vifi); 1047 } 1048 1049 /* Look for a (S,G,iif) entry if parent != -1 */ 1050 static struct mfc_cache *ipmr_cache_find_parent(struct mr_table *mrt, 1051 __be32 origin, __be32 mcastgrp, 1052 int parent) 1053 { 1054 struct mfc_cache_cmp_arg arg = { 1055 .mfc_mcastgrp = mcastgrp, 1056 .mfc_origin = origin, 1057 }; 1058 struct rhlist_head *tmp, *list; 1059 struct mfc_cache *c; 1060 1061 list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params); 1062 rhl_for_each_entry_rcu(c, tmp, list, mnode) 1063 if (parent == -1 || parent == c->mfc_parent) 1064 return c; 1065 1066 return NULL; 1067 } 1068 1069 /* Allocate a multicast cache entry */ 1070 static struct mfc_cache *ipmr_cache_alloc(void) 1071 { 1072 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); 1073 1074 if (c) { 1075 c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1; 1076 c->mfc_un.res.minvif = MAXVIFS; 1077 refcount_set(&c->mfc_un.res.refcount, 1); 1078 } 1079 return c; 1080 } 1081 1082 static struct mfc_cache *ipmr_cache_alloc_unres(void) 1083 { 1084 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); 1085 1086 if (c) { 1087 skb_queue_head_init(&c->mfc_un.unres.unresolved); 1088 c->mfc_un.unres.expires = jiffies + 10*HZ; 1089 } 1090 return c; 1091 } 1092 1093 /* A cache entry has gone into a resolved state from queued */ 1094 static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt, 1095 struct mfc_cache *uc, struct mfc_cache *c) 1096 { 1097 struct sk_buff *skb; 1098 struct nlmsgerr *e; 1099 1100 /* Play the pending entries through our router */ 1101 while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) { 1102 if (ip_hdr(skb)->version == 0) { 1103 struct nlmsghdr *nlh = skb_pull(skb, 1104 sizeof(struct iphdr)); 1105 1106 if (__ipmr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) { 1107 nlh->nlmsg_len = skb_tail_pointer(skb) - 1108 (u8 *)nlh; 1109 } else { 1110 nlh->nlmsg_type = NLMSG_ERROR; 1111 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr)); 1112 skb_trim(skb, nlh->nlmsg_len); 1113 e = nlmsg_data(nlh); 1114 e->error = -EMSGSIZE; 1115 memset(&e->msg, 0, sizeof(e->msg)); 1116 } 1117 1118 rtnl_unicast(skb, net, NETLINK_CB(skb).portid); 1119 } else { 1120 ip_mr_forward(net, mrt, skb->dev, skb, c, 0); 1121 } 1122 } 1123 } 1124 1125 /* Bounce a cache query up to mrouted and netlink. 1126 * 1127 * Called under mrt_lock. 1128 */ 1129 static int ipmr_cache_report(struct mr_table *mrt, 1130 struct sk_buff *pkt, vifi_t vifi, int assert) 1131 { 1132 const int ihl = ip_hdrlen(pkt); 1133 struct sock *mroute_sk; 1134 struct igmphdr *igmp; 1135 struct igmpmsg *msg; 1136 struct sk_buff *skb; 1137 int ret; 1138 1139 if (assert == IGMPMSG_WHOLEPKT) 1140 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr)); 1141 else 1142 skb = alloc_skb(128, GFP_ATOMIC); 1143 1144 if (!skb) 1145 return -ENOBUFS; 1146 1147 if (assert == IGMPMSG_WHOLEPKT) { 1148 /* Ugly, but we have no choice with this interface. 1149 * Duplicate old header, fix ihl, length etc. 1150 * And all this only to mangle msg->im_msgtype and 1151 * to set msg->im_mbz to "mbz" :-) 1152 */ 1153 skb_push(skb, sizeof(struct iphdr)); 1154 skb_reset_network_header(skb); 1155 skb_reset_transport_header(skb); 1156 msg = (struct igmpmsg *)skb_network_header(skb); 1157 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr)); 1158 msg->im_msgtype = IGMPMSG_WHOLEPKT; 1159 msg->im_mbz = 0; 1160 msg->im_vif = mrt->mroute_reg_vif_num; 1161 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2; 1162 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) + 1163 sizeof(struct iphdr)); 1164 } else { 1165 /* Copy the IP header */ 1166 skb_set_network_header(skb, skb->len); 1167 skb_put(skb, ihl); 1168 skb_copy_to_linear_data(skb, pkt->data, ihl); 1169 /* Flag to the kernel this is a route add */ 1170 ip_hdr(skb)->protocol = 0; 1171 msg = (struct igmpmsg *)skb_network_header(skb); 1172 msg->im_vif = vifi; 1173 skb_dst_set(skb, dst_clone(skb_dst(pkt))); 1174 /* Add our header */ 1175 igmp = skb_put(skb, sizeof(struct igmphdr)); 1176 igmp->type = assert; 1177 msg->im_msgtype = assert; 1178 igmp->code = 0; 1179 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */ 1180 skb->transport_header = skb->network_header; 1181 } 1182 1183 rcu_read_lock(); 1184 mroute_sk = rcu_dereference(mrt->mroute_sk); 1185 if (!mroute_sk) { 1186 rcu_read_unlock(); 1187 kfree_skb(skb); 1188 return -EINVAL; 1189 } 1190 1191 igmpmsg_netlink_event(mrt, skb); 1192 1193 /* Deliver to mrouted */ 1194 ret = sock_queue_rcv_skb(mroute_sk, skb); 1195 rcu_read_unlock(); 1196 if (ret < 0) { 1197 net_warn_ratelimited("mroute: pending queue full, dropping entries\n"); 1198 kfree_skb(skb); 1199 } 1200 1201 return ret; 1202 } 1203 1204 /* Queue a packet for resolution. It gets locked cache entry! */ 1205 static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, 1206 struct sk_buff *skb, struct net_device *dev) 1207 { 1208 const struct iphdr *iph = ip_hdr(skb); 1209 struct mfc_cache *c; 1210 bool found = false; 1211 int err; 1212 1213 spin_lock_bh(&mfc_unres_lock); 1214 list_for_each_entry(c, &mrt->mfc_unres_queue, list) { 1215 if (c->mfc_mcastgrp == iph->daddr && 1216 c->mfc_origin == iph->saddr) { 1217 found = true; 1218 break; 1219 } 1220 } 1221 1222 if (!found) { 1223 /* Create a new entry if allowable */ 1224 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 || 1225 (c = ipmr_cache_alloc_unres()) == NULL) { 1226 spin_unlock_bh(&mfc_unres_lock); 1227 1228 kfree_skb(skb); 1229 return -ENOBUFS; 1230 } 1231 1232 /* Fill in the new cache entry */ 1233 c->mfc_parent = -1; 1234 c->mfc_origin = iph->saddr; 1235 c->mfc_mcastgrp = iph->daddr; 1236 1237 /* Reflect first query at mrouted. */ 1238 err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE); 1239 if (err < 0) { 1240 /* If the report failed throw the cache entry 1241 out - Brad Parker 1242 */ 1243 spin_unlock_bh(&mfc_unres_lock); 1244 1245 ipmr_cache_free(c); 1246 kfree_skb(skb); 1247 return err; 1248 } 1249 1250 atomic_inc(&mrt->cache_resolve_queue_len); 1251 list_add(&c->list, &mrt->mfc_unres_queue); 1252 mroute_netlink_event(mrt, c, RTM_NEWROUTE); 1253 1254 if (atomic_read(&mrt->cache_resolve_queue_len) == 1) 1255 mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires); 1256 } 1257 1258 /* See if we can append the packet */ 1259 if (c->mfc_un.unres.unresolved.qlen > 3) { 1260 kfree_skb(skb); 1261 err = -ENOBUFS; 1262 } else { 1263 if (dev) { 1264 skb->dev = dev; 1265 skb->skb_iif = dev->ifindex; 1266 } 1267 skb_queue_tail(&c->mfc_un.unres.unresolved, skb); 1268 err = 0; 1269 } 1270 1271 spin_unlock_bh(&mfc_unres_lock); 1272 return err; 1273 } 1274 1275 /* MFC cache manipulation by user space mroute daemon */ 1276 1277 static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent) 1278 { 1279 struct net *net = read_pnet(&mrt->net); 1280 struct mfc_cache *c; 1281 1282 /* The entries are added/deleted only under RTNL */ 1283 rcu_read_lock(); 1284 c = ipmr_cache_find_parent(mrt, mfc->mfcc_origin.s_addr, 1285 mfc->mfcc_mcastgrp.s_addr, parent); 1286 rcu_read_unlock(); 1287 if (!c) 1288 return -ENOENT; 1289 rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params); 1290 list_del_rcu(&c->list); 1291 call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c, mrt->id); 1292 mroute_netlink_event(mrt, c, RTM_DELROUTE); 1293 ipmr_cache_put(c); 1294 1295 return 0; 1296 } 1297 1298 static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, 1299 struct mfcctl *mfc, int mrtsock, int parent) 1300 { 1301 struct mfc_cache *uc, *c; 1302 bool found; 1303 int ret; 1304 1305 if (mfc->mfcc_parent >= MAXVIFS) 1306 return -ENFILE; 1307 1308 /* The entries are added/deleted only under RTNL */ 1309 rcu_read_lock(); 1310 c = ipmr_cache_find_parent(mrt, mfc->mfcc_origin.s_addr, 1311 mfc->mfcc_mcastgrp.s_addr, parent); 1312 rcu_read_unlock(); 1313 if (c) { 1314 write_lock_bh(&mrt_lock); 1315 c->mfc_parent = mfc->mfcc_parent; 1316 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls); 1317 if (!mrtsock) 1318 c->mfc_flags |= MFC_STATIC; 1319 write_unlock_bh(&mrt_lock); 1320 call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, c, 1321 mrt->id); 1322 mroute_netlink_event(mrt, c, RTM_NEWROUTE); 1323 return 0; 1324 } 1325 1326 if (mfc->mfcc_mcastgrp.s_addr != htonl(INADDR_ANY) && 1327 !ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr)) 1328 return -EINVAL; 1329 1330 c = ipmr_cache_alloc(); 1331 if (!c) 1332 return -ENOMEM; 1333 1334 c->mfc_origin = mfc->mfcc_origin.s_addr; 1335 c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr; 1336 c->mfc_parent = mfc->mfcc_parent; 1337 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls); 1338 if (!mrtsock) 1339 c->mfc_flags |= MFC_STATIC; 1340 1341 ret = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->mnode, 1342 ipmr_rht_params); 1343 if (ret) { 1344 pr_err("ipmr: rhtable insert error %d\n", ret); 1345 ipmr_cache_free(c); 1346 return ret; 1347 } 1348 list_add_tail_rcu(&c->list, &mrt->mfc_cache_list); 1349 /* Check to see if we resolved a queued list. If so we 1350 * need to send on the frames and tidy up. 1351 */ 1352 found = false; 1353 spin_lock_bh(&mfc_unres_lock); 1354 list_for_each_entry(uc, &mrt->mfc_unres_queue, list) { 1355 if (uc->mfc_origin == c->mfc_origin && 1356 uc->mfc_mcastgrp == c->mfc_mcastgrp) { 1357 list_del(&uc->list); 1358 atomic_dec(&mrt->cache_resolve_queue_len); 1359 found = true; 1360 break; 1361 } 1362 } 1363 if (list_empty(&mrt->mfc_unres_queue)) 1364 del_timer(&mrt->ipmr_expire_timer); 1365 spin_unlock_bh(&mfc_unres_lock); 1366 1367 if (found) { 1368 ipmr_cache_resolve(net, mrt, uc, c); 1369 ipmr_cache_free(uc); 1370 } 1371 call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD, c, mrt->id); 1372 mroute_netlink_event(mrt, c, RTM_NEWROUTE); 1373 return 0; 1374 } 1375 1376 /* Close the multicast socket, and clear the vif tables etc */ 1377 static void mroute_clean_tables(struct mr_table *mrt, bool all) 1378 { 1379 struct net *net = read_pnet(&mrt->net); 1380 struct mfc_cache *c, *tmp; 1381 LIST_HEAD(list); 1382 int i; 1383 1384 /* Shut down all active vif entries */ 1385 for (i = 0; i < mrt->maxvif; i++) { 1386 if (!all && (mrt->vif_table[i].flags & VIFF_STATIC)) 1387 continue; 1388 vif_delete(mrt, i, 0, &list); 1389 } 1390 unregister_netdevice_many(&list); 1391 1392 /* Wipe the cache */ 1393 list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) { 1394 if (!all && (c->mfc_flags & MFC_STATIC)) 1395 continue; 1396 rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params); 1397 list_del_rcu(&c->list); 1398 call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c, 1399 mrt->id); 1400 mroute_netlink_event(mrt, c, RTM_DELROUTE); 1401 ipmr_cache_put(c); 1402 } 1403 1404 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) { 1405 spin_lock_bh(&mfc_unres_lock); 1406 list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) { 1407 list_del(&c->list); 1408 mroute_netlink_event(mrt, c, RTM_DELROUTE); 1409 ipmr_destroy_unres(mrt, c); 1410 } 1411 spin_unlock_bh(&mfc_unres_lock); 1412 } 1413 } 1414 1415 /* called from ip_ra_control(), before an RCU grace period, 1416 * we dont need to call synchronize_rcu() here 1417 */ 1418 static void mrtsock_destruct(struct sock *sk) 1419 { 1420 struct net *net = sock_net(sk); 1421 struct mr_table *mrt; 1422 1423 ASSERT_RTNL(); 1424 ipmr_for_each_table(mrt, net) { 1425 if (sk == rtnl_dereference(mrt->mroute_sk)) { 1426 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--; 1427 inet_netconf_notify_devconf(net, RTM_NEWNETCONF, 1428 NETCONFA_MC_FORWARDING, 1429 NETCONFA_IFINDEX_ALL, 1430 net->ipv4.devconf_all); 1431 RCU_INIT_POINTER(mrt->mroute_sk, NULL); 1432 mroute_clean_tables(mrt, false); 1433 } 1434 } 1435 } 1436 1437 /* Socket options and virtual interface manipulation. The whole 1438 * virtual interface system is a complete heap, but unfortunately 1439 * that's how BSD mrouted happens to think. Maybe one day with a proper 1440 * MOSPF/PIM router set up we can clean this up. 1441 */ 1442 1443 int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, 1444 unsigned int optlen) 1445 { 1446 struct net *net = sock_net(sk); 1447 int val, ret = 0, parent = 0; 1448 struct mr_table *mrt; 1449 struct vifctl vif; 1450 struct mfcctl mfc; 1451 u32 uval; 1452 1453 /* There's one exception to the lock - MRT_DONE which needs to unlock */ 1454 rtnl_lock(); 1455 if (sk->sk_type != SOCK_RAW || 1456 inet_sk(sk)->inet_num != IPPROTO_IGMP) { 1457 ret = -EOPNOTSUPP; 1458 goto out_unlock; 1459 } 1460 1461 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); 1462 if (!mrt) { 1463 ret = -ENOENT; 1464 goto out_unlock; 1465 } 1466 if (optname != MRT_INIT) { 1467 if (sk != rcu_access_pointer(mrt->mroute_sk) && 1468 !ns_capable(net->user_ns, CAP_NET_ADMIN)) { 1469 ret = -EACCES; 1470 goto out_unlock; 1471 } 1472 } 1473 1474 switch (optname) { 1475 case MRT_INIT: 1476 if (optlen != sizeof(int)) { 1477 ret = -EINVAL; 1478 break; 1479 } 1480 if (rtnl_dereference(mrt->mroute_sk)) { 1481 ret = -EADDRINUSE; 1482 break; 1483 } 1484 1485 ret = ip_ra_control(sk, 1, mrtsock_destruct); 1486 if (ret == 0) { 1487 rcu_assign_pointer(mrt->mroute_sk, sk); 1488 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++; 1489 inet_netconf_notify_devconf(net, RTM_NEWNETCONF, 1490 NETCONFA_MC_FORWARDING, 1491 NETCONFA_IFINDEX_ALL, 1492 net->ipv4.devconf_all); 1493 } 1494 break; 1495 case MRT_DONE: 1496 if (sk != rcu_access_pointer(mrt->mroute_sk)) { 1497 ret = -EACCES; 1498 } else { 1499 ret = ip_ra_control(sk, 0, NULL); 1500 goto out_unlock; 1501 } 1502 break; 1503 case MRT_ADD_VIF: 1504 case MRT_DEL_VIF: 1505 if (optlen != sizeof(vif)) { 1506 ret = -EINVAL; 1507 break; 1508 } 1509 if (copy_from_user(&vif, optval, sizeof(vif))) { 1510 ret = -EFAULT; 1511 break; 1512 } 1513 if (vif.vifc_vifi >= MAXVIFS) { 1514 ret = -ENFILE; 1515 break; 1516 } 1517 if (optname == MRT_ADD_VIF) { 1518 ret = vif_add(net, mrt, &vif, 1519 sk == rtnl_dereference(mrt->mroute_sk)); 1520 } else { 1521 ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL); 1522 } 1523 break; 1524 /* Manipulate the forwarding caches. These live 1525 * in a sort of kernel/user symbiosis. 1526 */ 1527 case MRT_ADD_MFC: 1528 case MRT_DEL_MFC: 1529 parent = -1; 1530 /* fall through */ 1531 case MRT_ADD_MFC_PROXY: 1532 case MRT_DEL_MFC_PROXY: 1533 if (optlen != sizeof(mfc)) { 1534 ret = -EINVAL; 1535 break; 1536 } 1537 if (copy_from_user(&mfc, optval, sizeof(mfc))) { 1538 ret = -EFAULT; 1539 break; 1540 } 1541 if (parent == 0) 1542 parent = mfc.mfcc_parent; 1543 if (optname == MRT_DEL_MFC || optname == MRT_DEL_MFC_PROXY) 1544 ret = ipmr_mfc_delete(mrt, &mfc, parent); 1545 else 1546 ret = ipmr_mfc_add(net, mrt, &mfc, 1547 sk == rtnl_dereference(mrt->mroute_sk), 1548 parent); 1549 break; 1550 /* Control PIM assert. */ 1551 case MRT_ASSERT: 1552 if (optlen != sizeof(val)) { 1553 ret = -EINVAL; 1554 break; 1555 } 1556 if (get_user(val, (int __user *)optval)) { 1557 ret = -EFAULT; 1558 break; 1559 } 1560 mrt->mroute_do_assert = val; 1561 break; 1562 case MRT_PIM: 1563 if (!ipmr_pimsm_enabled()) { 1564 ret = -ENOPROTOOPT; 1565 break; 1566 } 1567 if (optlen != sizeof(val)) { 1568 ret = -EINVAL; 1569 break; 1570 } 1571 if (get_user(val, (int __user *)optval)) { 1572 ret = -EFAULT; 1573 break; 1574 } 1575 1576 val = !!val; 1577 if (val != mrt->mroute_do_pim) { 1578 mrt->mroute_do_pim = val; 1579 mrt->mroute_do_assert = val; 1580 } 1581 break; 1582 case MRT_TABLE: 1583 if (!IS_BUILTIN(CONFIG_IP_MROUTE_MULTIPLE_TABLES)) { 1584 ret = -ENOPROTOOPT; 1585 break; 1586 } 1587 if (optlen != sizeof(uval)) { 1588 ret = -EINVAL; 1589 break; 1590 } 1591 if (get_user(uval, (u32 __user *)optval)) { 1592 ret = -EFAULT; 1593 break; 1594 } 1595 1596 if (sk == rtnl_dereference(mrt->mroute_sk)) { 1597 ret = -EBUSY; 1598 } else { 1599 mrt = ipmr_new_table(net, uval); 1600 if (IS_ERR(mrt)) 1601 ret = PTR_ERR(mrt); 1602 else 1603 raw_sk(sk)->ipmr_table = uval; 1604 } 1605 break; 1606 /* Spurious command, or MRT_VERSION which you cannot set. */ 1607 default: 1608 ret = -ENOPROTOOPT; 1609 } 1610 out_unlock: 1611 rtnl_unlock(); 1612 return ret; 1613 } 1614 1615 /* Getsock opt support for the multicast routing system. */ 1616 int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen) 1617 { 1618 int olr; 1619 int val; 1620 struct net *net = sock_net(sk); 1621 struct mr_table *mrt; 1622 1623 if (sk->sk_type != SOCK_RAW || 1624 inet_sk(sk)->inet_num != IPPROTO_IGMP) 1625 return -EOPNOTSUPP; 1626 1627 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); 1628 if (!mrt) 1629 return -ENOENT; 1630 1631 switch (optname) { 1632 case MRT_VERSION: 1633 val = 0x0305; 1634 break; 1635 case MRT_PIM: 1636 if (!ipmr_pimsm_enabled()) 1637 return -ENOPROTOOPT; 1638 val = mrt->mroute_do_pim; 1639 break; 1640 case MRT_ASSERT: 1641 val = mrt->mroute_do_assert; 1642 break; 1643 default: 1644 return -ENOPROTOOPT; 1645 } 1646 1647 if (get_user(olr, optlen)) 1648 return -EFAULT; 1649 olr = min_t(unsigned int, olr, sizeof(int)); 1650 if (olr < 0) 1651 return -EINVAL; 1652 if (put_user(olr, optlen)) 1653 return -EFAULT; 1654 if (copy_to_user(optval, &val, olr)) 1655 return -EFAULT; 1656 return 0; 1657 } 1658 1659 /* The IP multicast ioctl support routines. */ 1660 int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg) 1661 { 1662 struct sioc_sg_req sr; 1663 struct sioc_vif_req vr; 1664 struct vif_device *vif; 1665 struct mfc_cache *c; 1666 struct net *net = sock_net(sk); 1667 struct mr_table *mrt; 1668 1669 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); 1670 if (!mrt) 1671 return -ENOENT; 1672 1673 switch (cmd) { 1674 case SIOCGETVIFCNT: 1675 if (copy_from_user(&vr, arg, sizeof(vr))) 1676 return -EFAULT; 1677 if (vr.vifi >= mrt->maxvif) 1678 return -EINVAL; 1679 read_lock(&mrt_lock); 1680 vif = &mrt->vif_table[vr.vifi]; 1681 if (VIF_EXISTS(mrt, vr.vifi)) { 1682 vr.icount = vif->pkt_in; 1683 vr.ocount = vif->pkt_out; 1684 vr.ibytes = vif->bytes_in; 1685 vr.obytes = vif->bytes_out; 1686 read_unlock(&mrt_lock); 1687 1688 if (copy_to_user(arg, &vr, sizeof(vr))) 1689 return -EFAULT; 1690 return 0; 1691 } 1692 read_unlock(&mrt_lock); 1693 return -EADDRNOTAVAIL; 1694 case SIOCGETSGCNT: 1695 if (copy_from_user(&sr, arg, sizeof(sr))) 1696 return -EFAULT; 1697 1698 rcu_read_lock(); 1699 c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr); 1700 if (c) { 1701 sr.pktcnt = c->mfc_un.res.pkt; 1702 sr.bytecnt = c->mfc_un.res.bytes; 1703 sr.wrong_if = c->mfc_un.res.wrong_if; 1704 rcu_read_unlock(); 1705 1706 if (copy_to_user(arg, &sr, sizeof(sr))) 1707 return -EFAULT; 1708 return 0; 1709 } 1710 rcu_read_unlock(); 1711 return -EADDRNOTAVAIL; 1712 default: 1713 return -ENOIOCTLCMD; 1714 } 1715 } 1716 1717 #ifdef CONFIG_COMPAT 1718 struct compat_sioc_sg_req { 1719 struct in_addr src; 1720 struct in_addr grp; 1721 compat_ulong_t pktcnt; 1722 compat_ulong_t bytecnt; 1723 compat_ulong_t wrong_if; 1724 }; 1725 1726 struct compat_sioc_vif_req { 1727 vifi_t vifi; /* Which iface */ 1728 compat_ulong_t icount; 1729 compat_ulong_t ocount; 1730 compat_ulong_t ibytes; 1731 compat_ulong_t obytes; 1732 }; 1733 1734 int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) 1735 { 1736 struct compat_sioc_sg_req sr; 1737 struct compat_sioc_vif_req vr; 1738 struct vif_device *vif; 1739 struct mfc_cache *c; 1740 struct net *net = sock_net(sk); 1741 struct mr_table *mrt; 1742 1743 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); 1744 if (!mrt) 1745 return -ENOENT; 1746 1747 switch (cmd) { 1748 case SIOCGETVIFCNT: 1749 if (copy_from_user(&vr, arg, sizeof(vr))) 1750 return -EFAULT; 1751 if (vr.vifi >= mrt->maxvif) 1752 return -EINVAL; 1753 read_lock(&mrt_lock); 1754 vif = &mrt->vif_table[vr.vifi]; 1755 if (VIF_EXISTS(mrt, vr.vifi)) { 1756 vr.icount = vif->pkt_in; 1757 vr.ocount = vif->pkt_out; 1758 vr.ibytes = vif->bytes_in; 1759 vr.obytes = vif->bytes_out; 1760 read_unlock(&mrt_lock); 1761 1762 if (copy_to_user(arg, &vr, sizeof(vr))) 1763 return -EFAULT; 1764 return 0; 1765 } 1766 read_unlock(&mrt_lock); 1767 return -EADDRNOTAVAIL; 1768 case SIOCGETSGCNT: 1769 if (copy_from_user(&sr, arg, sizeof(sr))) 1770 return -EFAULT; 1771 1772 rcu_read_lock(); 1773 c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr); 1774 if (c) { 1775 sr.pktcnt = c->mfc_un.res.pkt; 1776 sr.bytecnt = c->mfc_un.res.bytes; 1777 sr.wrong_if = c->mfc_un.res.wrong_if; 1778 rcu_read_unlock(); 1779 1780 if (copy_to_user(arg, &sr, sizeof(sr))) 1781 return -EFAULT; 1782 return 0; 1783 } 1784 rcu_read_unlock(); 1785 return -EADDRNOTAVAIL; 1786 default: 1787 return -ENOIOCTLCMD; 1788 } 1789 } 1790 #endif 1791 1792 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr) 1793 { 1794 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 1795 struct net *net = dev_net(dev); 1796 struct mr_table *mrt; 1797 struct vif_device *v; 1798 int ct; 1799 1800 if (event != NETDEV_UNREGISTER) 1801 return NOTIFY_DONE; 1802 1803 ipmr_for_each_table(mrt, net) { 1804 v = &mrt->vif_table[0]; 1805 for (ct = 0; ct < mrt->maxvif; ct++, v++) { 1806 if (v->dev == dev) 1807 vif_delete(mrt, ct, 1, NULL); 1808 } 1809 } 1810 return NOTIFY_DONE; 1811 } 1812 1813 static struct notifier_block ip_mr_notifier = { 1814 .notifier_call = ipmr_device_event, 1815 }; 1816 1817 /* Encapsulate a packet by attaching a valid IPIP header to it. 1818 * This avoids tunnel drivers and other mess and gives us the speed so 1819 * important for multicast video. 1820 */ 1821 static void ip_encap(struct net *net, struct sk_buff *skb, 1822 __be32 saddr, __be32 daddr) 1823 { 1824 struct iphdr *iph; 1825 const struct iphdr *old_iph = ip_hdr(skb); 1826 1827 skb_push(skb, sizeof(struct iphdr)); 1828 skb->transport_header = skb->network_header; 1829 skb_reset_network_header(skb); 1830 iph = ip_hdr(skb); 1831 1832 iph->version = 4; 1833 iph->tos = old_iph->tos; 1834 iph->ttl = old_iph->ttl; 1835 iph->frag_off = 0; 1836 iph->daddr = daddr; 1837 iph->saddr = saddr; 1838 iph->protocol = IPPROTO_IPIP; 1839 iph->ihl = 5; 1840 iph->tot_len = htons(skb->len); 1841 ip_select_ident(net, skb, NULL); 1842 ip_send_check(iph); 1843 1844 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 1845 nf_reset(skb); 1846 } 1847 1848 static inline int ipmr_forward_finish(struct net *net, struct sock *sk, 1849 struct sk_buff *skb) 1850 { 1851 struct ip_options *opt = &(IPCB(skb)->opt); 1852 1853 IP_INC_STATS(net, IPSTATS_MIB_OUTFORWDATAGRAMS); 1854 IP_ADD_STATS(net, IPSTATS_MIB_OUTOCTETS, skb->len); 1855 1856 if (unlikely(opt->optlen)) 1857 ip_forward_options(skb); 1858 1859 return dst_output(net, sk, skb); 1860 } 1861 1862 #ifdef CONFIG_NET_SWITCHDEV 1863 static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt, 1864 int in_vifi, int out_vifi) 1865 { 1866 struct vif_device *out_vif = &mrt->vif_table[out_vifi]; 1867 struct vif_device *in_vif = &mrt->vif_table[in_vifi]; 1868 1869 if (!skb->offload_mr_fwd_mark) 1870 return false; 1871 if (!out_vif->dev_parent_id.id_len || !in_vif->dev_parent_id.id_len) 1872 return false; 1873 return netdev_phys_item_id_same(&out_vif->dev_parent_id, 1874 &in_vif->dev_parent_id); 1875 } 1876 #else 1877 static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt, 1878 int in_vifi, int out_vifi) 1879 { 1880 return false; 1881 } 1882 #endif 1883 1884 /* Processing handlers for ipmr_forward */ 1885 1886 static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, 1887 int in_vifi, struct sk_buff *skb, 1888 struct mfc_cache *c, int vifi) 1889 { 1890 const struct iphdr *iph = ip_hdr(skb); 1891 struct vif_device *vif = &mrt->vif_table[vifi]; 1892 struct net_device *dev; 1893 struct rtable *rt; 1894 struct flowi4 fl4; 1895 int encap = 0; 1896 1897 if (!vif->dev) 1898 goto out_free; 1899 1900 if (vif->flags & VIFF_REGISTER) { 1901 vif->pkt_out++; 1902 vif->bytes_out += skb->len; 1903 vif->dev->stats.tx_bytes += skb->len; 1904 vif->dev->stats.tx_packets++; 1905 ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT); 1906 goto out_free; 1907 } 1908 1909 if (ipmr_forward_offloaded(skb, mrt, in_vifi, vifi)) 1910 goto out_free; 1911 1912 if (vif->flags & VIFF_TUNNEL) { 1913 rt = ip_route_output_ports(net, &fl4, NULL, 1914 vif->remote, vif->local, 1915 0, 0, 1916 IPPROTO_IPIP, 1917 RT_TOS(iph->tos), vif->link); 1918 if (IS_ERR(rt)) 1919 goto out_free; 1920 encap = sizeof(struct iphdr); 1921 } else { 1922 rt = ip_route_output_ports(net, &fl4, NULL, iph->daddr, 0, 1923 0, 0, 1924 IPPROTO_IPIP, 1925 RT_TOS(iph->tos), vif->link); 1926 if (IS_ERR(rt)) 1927 goto out_free; 1928 } 1929 1930 dev = rt->dst.dev; 1931 1932 if (skb->len+encap > dst_mtu(&rt->dst) && (ntohs(iph->frag_off) & IP_DF)) { 1933 /* Do not fragment multicasts. Alas, IPv4 does not 1934 * allow to send ICMP, so that packets will disappear 1935 * to blackhole. 1936 */ 1937 IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS); 1938 ip_rt_put(rt); 1939 goto out_free; 1940 } 1941 1942 encap += LL_RESERVED_SPACE(dev) + rt->dst.header_len; 1943 1944 if (skb_cow(skb, encap)) { 1945 ip_rt_put(rt); 1946 goto out_free; 1947 } 1948 1949 vif->pkt_out++; 1950 vif->bytes_out += skb->len; 1951 1952 skb_dst_drop(skb); 1953 skb_dst_set(skb, &rt->dst); 1954 ip_decrease_ttl(ip_hdr(skb)); 1955 1956 /* FIXME: forward and output firewalls used to be called here. 1957 * What do we do with netfilter? -- RR 1958 */ 1959 if (vif->flags & VIFF_TUNNEL) { 1960 ip_encap(net, skb, vif->local, vif->remote); 1961 /* FIXME: extra output firewall step used to be here. --RR */ 1962 vif->dev->stats.tx_packets++; 1963 vif->dev->stats.tx_bytes += skb->len; 1964 } 1965 1966 IPCB(skb)->flags |= IPSKB_FORWARDED; 1967 1968 /* RFC1584 teaches, that DVMRP/PIM router must deliver packets locally 1969 * not only before forwarding, but after forwarding on all output 1970 * interfaces. It is clear, if mrouter runs a multicasting 1971 * program, it should receive packets not depending to what interface 1972 * program is joined. 1973 * If we will not make it, the program will have to join on all 1974 * interfaces. On the other hand, multihoming host (or router, but 1975 * not mrouter) cannot join to more than one interface - it will 1976 * result in receiving multiple packets. 1977 */ 1978 NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, 1979 net, NULL, skb, skb->dev, dev, 1980 ipmr_forward_finish); 1981 return; 1982 1983 out_free: 1984 kfree_skb(skb); 1985 } 1986 1987 static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev) 1988 { 1989 int ct; 1990 1991 for (ct = mrt->maxvif-1; ct >= 0; ct--) { 1992 if (mrt->vif_table[ct].dev == dev) 1993 break; 1994 } 1995 return ct; 1996 } 1997 1998 /* "local" means that we should preserve one skb (for local delivery) */ 1999 static void ip_mr_forward(struct net *net, struct mr_table *mrt, 2000 struct net_device *dev, struct sk_buff *skb, 2001 struct mfc_cache *cache, int local) 2002 { 2003 int true_vifi = ipmr_find_vif(mrt, dev); 2004 int psend = -1; 2005 int vif, ct; 2006 2007 vif = cache->mfc_parent; 2008 cache->mfc_un.res.pkt++; 2009 cache->mfc_un.res.bytes += skb->len; 2010 cache->mfc_un.res.lastuse = jiffies; 2011 2012 if (cache->mfc_origin == htonl(INADDR_ANY) && true_vifi >= 0) { 2013 struct mfc_cache *cache_proxy; 2014 2015 /* For an (*,G) entry, we only check that the incomming 2016 * interface is part of the static tree. 2017 */ 2018 cache_proxy = ipmr_cache_find_any_parent(mrt, vif); 2019 if (cache_proxy && 2020 cache_proxy->mfc_un.res.ttls[true_vifi] < 255) 2021 goto forward; 2022 } 2023 2024 /* Wrong interface: drop packet and (maybe) send PIM assert. */ 2025 if (mrt->vif_table[vif].dev != dev) { 2026 if (rt_is_output_route(skb_rtable(skb))) { 2027 /* It is our own packet, looped back. 2028 * Very complicated situation... 2029 * 2030 * The best workaround until routing daemons will be 2031 * fixed is not to redistribute packet, if it was 2032 * send through wrong interface. It means, that 2033 * multicast applications WILL NOT work for 2034 * (S,G), which have default multicast route pointing 2035 * to wrong oif. In any case, it is not a good 2036 * idea to use multicasting applications on router. 2037 */ 2038 goto dont_forward; 2039 } 2040 2041 cache->mfc_un.res.wrong_if++; 2042 2043 if (true_vifi >= 0 && mrt->mroute_do_assert && 2044 /* pimsm uses asserts, when switching from RPT to SPT, 2045 * so that we cannot check that packet arrived on an oif. 2046 * It is bad, but otherwise we would need to move pretty 2047 * large chunk of pimd to kernel. Ough... --ANK 2048 */ 2049 (mrt->mroute_do_pim || 2050 cache->mfc_un.res.ttls[true_vifi] < 255) && 2051 time_after(jiffies, 2052 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) { 2053 cache->mfc_un.res.last_assert = jiffies; 2054 ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF); 2055 } 2056 goto dont_forward; 2057 } 2058 2059 forward: 2060 mrt->vif_table[vif].pkt_in++; 2061 mrt->vif_table[vif].bytes_in += skb->len; 2062 2063 /* Forward the frame */ 2064 if (cache->mfc_origin == htonl(INADDR_ANY) && 2065 cache->mfc_mcastgrp == htonl(INADDR_ANY)) { 2066 if (true_vifi >= 0 && 2067 true_vifi != cache->mfc_parent && 2068 ip_hdr(skb)->ttl > 2069 cache->mfc_un.res.ttls[cache->mfc_parent]) { 2070 /* It's an (*,*) entry and the packet is not coming from 2071 * the upstream: forward the packet to the upstream 2072 * only. 2073 */ 2074 psend = cache->mfc_parent; 2075 goto last_forward; 2076 } 2077 goto dont_forward; 2078 } 2079 for (ct = cache->mfc_un.res.maxvif - 1; 2080 ct >= cache->mfc_un.res.minvif; ct--) { 2081 /* For (*,G) entry, don't forward to the incoming interface */ 2082 if ((cache->mfc_origin != htonl(INADDR_ANY) || 2083 ct != true_vifi) && 2084 ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) { 2085 if (psend != -1) { 2086 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 2087 2088 if (skb2) 2089 ipmr_queue_xmit(net, mrt, true_vifi, 2090 skb2, cache, psend); 2091 } 2092 psend = ct; 2093 } 2094 } 2095 last_forward: 2096 if (psend != -1) { 2097 if (local) { 2098 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 2099 2100 if (skb2) 2101 ipmr_queue_xmit(net, mrt, true_vifi, skb2, 2102 cache, psend); 2103 } else { 2104 ipmr_queue_xmit(net, mrt, true_vifi, skb, cache, psend); 2105 return; 2106 } 2107 } 2108 2109 dont_forward: 2110 if (!local) 2111 kfree_skb(skb); 2112 } 2113 2114 static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct sk_buff *skb) 2115 { 2116 struct rtable *rt = skb_rtable(skb); 2117 struct iphdr *iph = ip_hdr(skb); 2118 struct flowi4 fl4 = { 2119 .daddr = iph->daddr, 2120 .saddr = iph->saddr, 2121 .flowi4_tos = RT_TOS(iph->tos), 2122 .flowi4_oif = (rt_is_output_route(rt) ? 2123 skb->dev->ifindex : 0), 2124 .flowi4_iif = (rt_is_output_route(rt) ? 2125 LOOPBACK_IFINDEX : 2126 skb->dev->ifindex), 2127 .flowi4_mark = skb->mark, 2128 }; 2129 struct mr_table *mrt; 2130 int err; 2131 2132 err = ipmr_fib_lookup(net, &fl4, &mrt); 2133 if (err) 2134 return ERR_PTR(err); 2135 return mrt; 2136 } 2137 2138 /* Multicast packets for forwarding arrive here 2139 * Called with rcu_read_lock(); 2140 */ 2141 int ip_mr_input(struct sk_buff *skb) 2142 { 2143 struct mfc_cache *cache; 2144 struct net *net = dev_net(skb->dev); 2145 int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL; 2146 struct mr_table *mrt; 2147 struct net_device *dev; 2148 2149 /* skb->dev passed in is the loX master dev for vrfs. 2150 * As there are no vifs associated with loopback devices, 2151 * get the proper interface that does have a vif associated with it. 2152 */ 2153 dev = skb->dev; 2154 if (netif_is_l3_master(skb->dev)) { 2155 dev = dev_get_by_index_rcu(net, IPCB(skb)->iif); 2156 if (!dev) { 2157 kfree_skb(skb); 2158 return -ENODEV; 2159 } 2160 } 2161 2162 /* Packet is looped back after forward, it should not be 2163 * forwarded second time, but still can be delivered locally. 2164 */ 2165 if (IPCB(skb)->flags & IPSKB_FORWARDED) 2166 goto dont_forward; 2167 2168 mrt = ipmr_rt_fib_lookup(net, skb); 2169 if (IS_ERR(mrt)) { 2170 kfree_skb(skb); 2171 return PTR_ERR(mrt); 2172 } 2173 if (!local) { 2174 if (IPCB(skb)->opt.router_alert) { 2175 if (ip_call_ra_chain(skb)) 2176 return 0; 2177 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP) { 2178 /* IGMPv1 (and broken IGMPv2 implementations sort of 2179 * Cisco IOS <= 11.2(8)) do not put router alert 2180 * option to IGMP packets destined to routable 2181 * groups. It is very bad, because it means 2182 * that we can forward NO IGMP messages. 2183 */ 2184 struct sock *mroute_sk; 2185 2186 mroute_sk = rcu_dereference(mrt->mroute_sk); 2187 if (mroute_sk) { 2188 nf_reset(skb); 2189 raw_rcv(mroute_sk, skb); 2190 return 0; 2191 } 2192 } 2193 } 2194 2195 /* already under rcu_read_lock() */ 2196 cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr); 2197 if (!cache) { 2198 int vif = ipmr_find_vif(mrt, dev); 2199 2200 if (vif >= 0) 2201 cache = ipmr_cache_find_any(mrt, ip_hdr(skb)->daddr, 2202 vif); 2203 } 2204 2205 /* No usable cache entry */ 2206 if (!cache) { 2207 int vif; 2208 2209 if (local) { 2210 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 2211 ip_local_deliver(skb); 2212 if (!skb2) 2213 return -ENOBUFS; 2214 skb = skb2; 2215 } 2216 2217 read_lock(&mrt_lock); 2218 vif = ipmr_find_vif(mrt, dev); 2219 if (vif >= 0) { 2220 int err2 = ipmr_cache_unresolved(mrt, vif, skb, dev); 2221 read_unlock(&mrt_lock); 2222 2223 return err2; 2224 } 2225 read_unlock(&mrt_lock); 2226 kfree_skb(skb); 2227 return -ENODEV; 2228 } 2229 2230 read_lock(&mrt_lock); 2231 ip_mr_forward(net, mrt, dev, skb, cache, local); 2232 read_unlock(&mrt_lock); 2233 2234 if (local) 2235 return ip_local_deliver(skb); 2236 2237 return 0; 2238 2239 dont_forward: 2240 if (local) 2241 return ip_local_deliver(skb); 2242 kfree_skb(skb); 2243 return 0; 2244 } 2245 2246 #ifdef CONFIG_IP_PIMSM_V1 2247 /* Handle IGMP messages of PIMv1 */ 2248 int pim_rcv_v1(struct sk_buff *skb) 2249 { 2250 struct igmphdr *pim; 2251 struct net *net = dev_net(skb->dev); 2252 struct mr_table *mrt; 2253 2254 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr))) 2255 goto drop; 2256 2257 pim = igmp_hdr(skb); 2258 2259 mrt = ipmr_rt_fib_lookup(net, skb); 2260 if (IS_ERR(mrt)) 2261 goto drop; 2262 if (!mrt->mroute_do_pim || 2263 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) 2264 goto drop; 2265 2266 if (__pim_rcv(mrt, skb, sizeof(*pim))) { 2267 drop: 2268 kfree_skb(skb); 2269 } 2270 return 0; 2271 } 2272 #endif 2273 2274 #ifdef CONFIG_IP_PIMSM_V2 2275 static int pim_rcv(struct sk_buff *skb) 2276 { 2277 struct pimreghdr *pim; 2278 struct net *net = dev_net(skb->dev); 2279 struct mr_table *mrt; 2280 2281 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr))) 2282 goto drop; 2283 2284 pim = (struct pimreghdr *)skb_transport_header(skb); 2285 if (pim->type != ((PIM_VERSION << 4) | (PIM_TYPE_REGISTER)) || 2286 (pim->flags & PIM_NULL_REGISTER) || 2287 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 && 2288 csum_fold(skb_checksum(skb, 0, skb->len, 0)))) 2289 goto drop; 2290 2291 mrt = ipmr_rt_fib_lookup(net, skb); 2292 if (IS_ERR(mrt)) 2293 goto drop; 2294 if (__pim_rcv(mrt, skb, sizeof(*pim))) { 2295 drop: 2296 kfree_skb(skb); 2297 } 2298 return 0; 2299 } 2300 #endif 2301 2302 static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, 2303 struct mfc_cache *c, struct rtmsg *rtm) 2304 { 2305 struct rta_mfc_stats mfcs; 2306 struct nlattr *mp_attr; 2307 struct rtnexthop *nhp; 2308 unsigned long lastuse; 2309 int ct; 2310 2311 /* If cache is unresolved, don't try to parse IIF and OIF */ 2312 if (c->mfc_parent >= MAXVIFS) { 2313 rtm->rtm_flags |= RTNH_F_UNRESOLVED; 2314 return -ENOENT; 2315 } 2316 2317 if (VIF_EXISTS(mrt, c->mfc_parent) && 2318 nla_put_u32(skb, RTA_IIF, mrt->vif_table[c->mfc_parent].dev->ifindex) < 0) 2319 return -EMSGSIZE; 2320 2321 if (c->mfc_flags & MFC_OFFLOAD) 2322 rtm->rtm_flags |= RTNH_F_OFFLOAD; 2323 2324 if (!(mp_attr = nla_nest_start(skb, RTA_MULTIPATH))) 2325 return -EMSGSIZE; 2326 2327 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) { 2328 if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) { 2329 if (!(nhp = nla_reserve_nohdr(skb, sizeof(*nhp)))) { 2330 nla_nest_cancel(skb, mp_attr); 2331 return -EMSGSIZE; 2332 } 2333 2334 nhp->rtnh_flags = 0; 2335 nhp->rtnh_hops = c->mfc_un.res.ttls[ct]; 2336 nhp->rtnh_ifindex = mrt->vif_table[ct].dev->ifindex; 2337 nhp->rtnh_len = sizeof(*nhp); 2338 } 2339 } 2340 2341 nla_nest_end(skb, mp_attr); 2342 2343 lastuse = READ_ONCE(c->mfc_un.res.lastuse); 2344 lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0; 2345 2346 mfcs.mfcs_packets = c->mfc_un.res.pkt; 2347 mfcs.mfcs_bytes = c->mfc_un.res.bytes; 2348 mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if; 2349 if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) || 2350 nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse), 2351 RTA_PAD)) 2352 return -EMSGSIZE; 2353 2354 rtm->rtm_type = RTN_MULTICAST; 2355 return 1; 2356 } 2357 2358 int ipmr_get_route(struct net *net, struct sk_buff *skb, 2359 __be32 saddr, __be32 daddr, 2360 struct rtmsg *rtm, u32 portid) 2361 { 2362 struct mfc_cache *cache; 2363 struct mr_table *mrt; 2364 int err; 2365 2366 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT); 2367 if (!mrt) 2368 return -ENOENT; 2369 2370 rcu_read_lock(); 2371 cache = ipmr_cache_find(mrt, saddr, daddr); 2372 if (!cache && skb->dev) { 2373 int vif = ipmr_find_vif(mrt, skb->dev); 2374 2375 if (vif >= 0) 2376 cache = ipmr_cache_find_any(mrt, daddr, vif); 2377 } 2378 if (!cache) { 2379 struct sk_buff *skb2; 2380 struct iphdr *iph; 2381 struct net_device *dev; 2382 int vif = -1; 2383 2384 dev = skb->dev; 2385 read_lock(&mrt_lock); 2386 if (dev) 2387 vif = ipmr_find_vif(mrt, dev); 2388 if (vif < 0) { 2389 read_unlock(&mrt_lock); 2390 rcu_read_unlock(); 2391 return -ENODEV; 2392 } 2393 skb2 = skb_clone(skb, GFP_ATOMIC); 2394 if (!skb2) { 2395 read_unlock(&mrt_lock); 2396 rcu_read_unlock(); 2397 return -ENOMEM; 2398 } 2399 2400 NETLINK_CB(skb2).portid = portid; 2401 skb_push(skb2, sizeof(struct iphdr)); 2402 skb_reset_network_header(skb2); 2403 iph = ip_hdr(skb2); 2404 iph->ihl = sizeof(struct iphdr) >> 2; 2405 iph->saddr = saddr; 2406 iph->daddr = daddr; 2407 iph->version = 0; 2408 err = ipmr_cache_unresolved(mrt, vif, skb2, dev); 2409 read_unlock(&mrt_lock); 2410 rcu_read_unlock(); 2411 return err; 2412 } 2413 2414 read_lock(&mrt_lock); 2415 err = __ipmr_fill_mroute(mrt, skb, cache, rtm); 2416 read_unlock(&mrt_lock); 2417 rcu_read_unlock(); 2418 return err; 2419 } 2420 2421 static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, 2422 u32 portid, u32 seq, struct mfc_cache *c, int cmd, 2423 int flags) 2424 { 2425 struct nlmsghdr *nlh; 2426 struct rtmsg *rtm; 2427 int err; 2428 2429 nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags); 2430 if (!nlh) 2431 return -EMSGSIZE; 2432 2433 rtm = nlmsg_data(nlh); 2434 rtm->rtm_family = RTNL_FAMILY_IPMR; 2435 rtm->rtm_dst_len = 32; 2436 rtm->rtm_src_len = 32; 2437 rtm->rtm_tos = 0; 2438 rtm->rtm_table = mrt->id; 2439 if (nla_put_u32(skb, RTA_TABLE, mrt->id)) 2440 goto nla_put_failure; 2441 rtm->rtm_type = RTN_MULTICAST; 2442 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 2443 if (c->mfc_flags & MFC_STATIC) 2444 rtm->rtm_protocol = RTPROT_STATIC; 2445 else 2446 rtm->rtm_protocol = RTPROT_MROUTED; 2447 rtm->rtm_flags = 0; 2448 2449 if (nla_put_in_addr(skb, RTA_SRC, c->mfc_origin) || 2450 nla_put_in_addr(skb, RTA_DST, c->mfc_mcastgrp)) 2451 goto nla_put_failure; 2452 err = __ipmr_fill_mroute(mrt, skb, c, rtm); 2453 /* do not break the dump if cache is unresolved */ 2454 if (err < 0 && err != -ENOENT) 2455 goto nla_put_failure; 2456 2457 nlmsg_end(skb, nlh); 2458 return 0; 2459 2460 nla_put_failure: 2461 nlmsg_cancel(skb, nlh); 2462 return -EMSGSIZE; 2463 } 2464 2465 static size_t mroute_msgsize(bool unresolved, int maxvif) 2466 { 2467 size_t len = 2468 NLMSG_ALIGN(sizeof(struct rtmsg)) 2469 + nla_total_size(4) /* RTA_TABLE */ 2470 + nla_total_size(4) /* RTA_SRC */ 2471 + nla_total_size(4) /* RTA_DST */ 2472 ; 2473 2474 if (!unresolved) 2475 len = len 2476 + nla_total_size(4) /* RTA_IIF */ 2477 + nla_total_size(0) /* RTA_MULTIPATH */ 2478 + maxvif * NLA_ALIGN(sizeof(struct rtnexthop)) 2479 /* RTA_MFC_STATS */ 2480 + nla_total_size_64bit(sizeof(struct rta_mfc_stats)) 2481 ; 2482 2483 return len; 2484 } 2485 2486 static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc, 2487 int cmd) 2488 { 2489 struct net *net = read_pnet(&mrt->net); 2490 struct sk_buff *skb; 2491 int err = -ENOBUFS; 2492 2493 skb = nlmsg_new(mroute_msgsize(mfc->mfc_parent >= MAXVIFS, mrt->maxvif), 2494 GFP_ATOMIC); 2495 if (!skb) 2496 goto errout; 2497 2498 err = ipmr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0); 2499 if (err < 0) 2500 goto errout; 2501 2502 rtnl_notify(skb, net, 0, RTNLGRP_IPV4_MROUTE, NULL, GFP_ATOMIC); 2503 return; 2504 2505 errout: 2506 kfree_skb(skb); 2507 if (err < 0) 2508 rtnl_set_sk_err(net, RTNLGRP_IPV4_MROUTE, err); 2509 } 2510 2511 static size_t igmpmsg_netlink_msgsize(size_t payloadlen) 2512 { 2513 size_t len = 2514 NLMSG_ALIGN(sizeof(struct rtgenmsg)) 2515 + nla_total_size(1) /* IPMRA_CREPORT_MSGTYPE */ 2516 + nla_total_size(4) /* IPMRA_CREPORT_VIF_ID */ 2517 + nla_total_size(4) /* IPMRA_CREPORT_SRC_ADDR */ 2518 + nla_total_size(4) /* IPMRA_CREPORT_DST_ADDR */ 2519 /* IPMRA_CREPORT_PKT */ 2520 + nla_total_size(payloadlen) 2521 ; 2522 2523 return len; 2524 } 2525 2526 static void igmpmsg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt) 2527 { 2528 struct net *net = read_pnet(&mrt->net); 2529 struct nlmsghdr *nlh; 2530 struct rtgenmsg *rtgenm; 2531 struct igmpmsg *msg; 2532 struct sk_buff *skb; 2533 struct nlattr *nla; 2534 int payloadlen; 2535 2536 payloadlen = pkt->len - sizeof(struct igmpmsg); 2537 msg = (struct igmpmsg *)skb_network_header(pkt); 2538 2539 skb = nlmsg_new(igmpmsg_netlink_msgsize(payloadlen), GFP_ATOMIC); 2540 if (!skb) 2541 goto errout; 2542 2543 nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT, 2544 sizeof(struct rtgenmsg), 0); 2545 if (!nlh) 2546 goto errout; 2547 rtgenm = nlmsg_data(nlh); 2548 rtgenm->rtgen_family = RTNL_FAMILY_IPMR; 2549 if (nla_put_u8(skb, IPMRA_CREPORT_MSGTYPE, msg->im_msgtype) || 2550 nla_put_u32(skb, IPMRA_CREPORT_VIF_ID, msg->im_vif) || 2551 nla_put_in_addr(skb, IPMRA_CREPORT_SRC_ADDR, 2552 msg->im_src.s_addr) || 2553 nla_put_in_addr(skb, IPMRA_CREPORT_DST_ADDR, 2554 msg->im_dst.s_addr)) 2555 goto nla_put_failure; 2556 2557 nla = nla_reserve(skb, IPMRA_CREPORT_PKT, payloadlen); 2558 if (!nla || skb_copy_bits(pkt, sizeof(struct igmpmsg), 2559 nla_data(nla), payloadlen)) 2560 goto nla_put_failure; 2561 2562 nlmsg_end(skb, nlh); 2563 2564 rtnl_notify(skb, net, 0, RTNLGRP_IPV4_MROUTE_R, NULL, GFP_ATOMIC); 2565 return; 2566 2567 nla_put_failure: 2568 nlmsg_cancel(skb, nlh); 2569 errout: 2570 kfree_skb(skb); 2571 rtnl_set_sk_err(net, RTNLGRP_IPV4_MROUTE_R, -ENOBUFS); 2572 } 2573 2574 static int ipmr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, 2575 struct netlink_ext_ack *extack) 2576 { 2577 struct net *net = sock_net(in_skb->sk); 2578 struct nlattr *tb[RTA_MAX + 1]; 2579 struct sk_buff *skb = NULL; 2580 struct mfc_cache *cache; 2581 struct mr_table *mrt; 2582 struct rtmsg *rtm; 2583 __be32 src, grp; 2584 u32 tableid; 2585 int err; 2586 2587 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, 2588 rtm_ipv4_policy, extack); 2589 if (err < 0) 2590 goto errout; 2591 2592 rtm = nlmsg_data(nlh); 2593 2594 src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0; 2595 grp = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0; 2596 tableid = tb[RTA_TABLE] ? nla_get_u32(tb[RTA_TABLE]) : 0; 2597 2598 mrt = ipmr_get_table(net, tableid ? tableid : RT_TABLE_DEFAULT); 2599 if (!mrt) { 2600 err = -ENOENT; 2601 goto errout_free; 2602 } 2603 2604 /* entries are added/deleted only under RTNL */ 2605 rcu_read_lock(); 2606 cache = ipmr_cache_find(mrt, src, grp); 2607 rcu_read_unlock(); 2608 if (!cache) { 2609 err = -ENOENT; 2610 goto errout_free; 2611 } 2612 2613 skb = nlmsg_new(mroute_msgsize(false, mrt->maxvif), GFP_KERNEL); 2614 if (!skb) { 2615 err = -ENOBUFS; 2616 goto errout_free; 2617 } 2618 2619 err = ipmr_fill_mroute(mrt, skb, NETLINK_CB(in_skb).portid, 2620 nlh->nlmsg_seq, cache, 2621 RTM_NEWROUTE, 0); 2622 if (err < 0) 2623 goto errout_free; 2624 2625 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); 2626 2627 errout: 2628 return err; 2629 2630 errout_free: 2631 kfree_skb(skb); 2632 goto errout; 2633 } 2634 2635 static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) 2636 { 2637 struct net *net = sock_net(skb->sk); 2638 struct mr_table *mrt; 2639 struct mfc_cache *mfc; 2640 unsigned int t = 0, s_t; 2641 unsigned int e = 0, s_e; 2642 2643 s_t = cb->args[0]; 2644 s_e = cb->args[1]; 2645 2646 rcu_read_lock(); 2647 ipmr_for_each_table(mrt, net) { 2648 if (t < s_t) 2649 goto next_table; 2650 list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) { 2651 if (e < s_e) 2652 goto next_entry; 2653 if (ipmr_fill_mroute(mrt, skb, 2654 NETLINK_CB(cb->skb).portid, 2655 cb->nlh->nlmsg_seq, 2656 mfc, RTM_NEWROUTE, 2657 NLM_F_MULTI) < 0) 2658 goto done; 2659 next_entry: 2660 e++; 2661 } 2662 e = 0; 2663 s_e = 0; 2664 2665 spin_lock_bh(&mfc_unres_lock); 2666 list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) { 2667 if (e < s_e) 2668 goto next_entry2; 2669 if (ipmr_fill_mroute(mrt, skb, 2670 NETLINK_CB(cb->skb).portid, 2671 cb->nlh->nlmsg_seq, 2672 mfc, RTM_NEWROUTE, 2673 NLM_F_MULTI) < 0) { 2674 spin_unlock_bh(&mfc_unres_lock); 2675 goto done; 2676 } 2677 next_entry2: 2678 e++; 2679 } 2680 spin_unlock_bh(&mfc_unres_lock); 2681 e = 0; 2682 s_e = 0; 2683 next_table: 2684 t++; 2685 } 2686 done: 2687 rcu_read_unlock(); 2688 2689 cb->args[1] = e; 2690 cb->args[0] = t; 2691 2692 return skb->len; 2693 } 2694 2695 static const struct nla_policy rtm_ipmr_policy[RTA_MAX + 1] = { 2696 [RTA_SRC] = { .type = NLA_U32 }, 2697 [RTA_DST] = { .type = NLA_U32 }, 2698 [RTA_IIF] = { .type = NLA_U32 }, 2699 [RTA_TABLE] = { .type = NLA_U32 }, 2700 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, 2701 }; 2702 2703 static bool ipmr_rtm_validate_proto(unsigned char rtm_protocol) 2704 { 2705 switch (rtm_protocol) { 2706 case RTPROT_STATIC: 2707 case RTPROT_MROUTED: 2708 return true; 2709 } 2710 return false; 2711 } 2712 2713 static int ipmr_nla_get_ttls(const struct nlattr *nla, struct mfcctl *mfcc) 2714 { 2715 struct rtnexthop *rtnh = nla_data(nla); 2716 int remaining = nla_len(nla), vifi = 0; 2717 2718 while (rtnh_ok(rtnh, remaining)) { 2719 mfcc->mfcc_ttls[vifi] = rtnh->rtnh_hops; 2720 if (++vifi == MAXVIFS) 2721 break; 2722 rtnh = rtnh_next(rtnh, &remaining); 2723 } 2724 2725 return remaining > 0 ? -EINVAL : vifi; 2726 } 2727 2728 /* returns < 0 on error, 0 for ADD_MFC and 1 for ADD_MFC_PROXY */ 2729 static int rtm_to_ipmr_mfcc(struct net *net, struct nlmsghdr *nlh, 2730 struct mfcctl *mfcc, int *mrtsock, 2731 struct mr_table **mrtret, 2732 struct netlink_ext_ack *extack) 2733 { 2734 struct net_device *dev = NULL; 2735 u32 tblid = RT_TABLE_DEFAULT; 2736 struct mr_table *mrt; 2737 struct nlattr *attr; 2738 struct rtmsg *rtm; 2739 int ret, rem; 2740 2741 ret = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipmr_policy, 2742 extack); 2743 if (ret < 0) 2744 goto out; 2745 rtm = nlmsg_data(nlh); 2746 2747 ret = -EINVAL; 2748 if (rtm->rtm_family != RTNL_FAMILY_IPMR || rtm->rtm_dst_len != 32 || 2749 rtm->rtm_type != RTN_MULTICAST || 2750 rtm->rtm_scope != RT_SCOPE_UNIVERSE || 2751 !ipmr_rtm_validate_proto(rtm->rtm_protocol)) 2752 goto out; 2753 2754 memset(mfcc, 0, sizeof(*mfcc)); 2755 mfcc->mfcc_parent = -1; 2756 ret = 0; 2757 nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), rem) { 2758 switch (nla_type(attr)) { 2759 case RTA_SRC: 2760 mfcc->mfcc_origin.s_addr = nla_get_be32(attr); 2761 break; 2762 case RTA_DST: 2763 mfcc->mfcc_mcastgrp.s_addr = nla_get_be32(attr); 2764 break; 2765 case RTA_IIF: 2766 dev = __dev_get_by_index(net, nla_get_u32(attr)); 2767 if (!dev) { 2768 ret = -ENODEV; 2769 goto out; 2770 } 2771 break; 2772 case RTA_MULTIPATH: 2773 if (ipmr_nla_get_ttls(attr, mfcc) < 0) { 2774 ret = -EINVAL; 2775 goto out; 2776 } 2777 break; 2778 case RTA_PREFSRC: 2779 ret = 1; 2780 break; 2781 case RTA_TABLE: 2782 tblid = nla_get_u32(attr); 2783 break; 2784 } 2785 } 2786 mrt = ipmr_get_table(net, tblid); 2787 if (!mrt) { 2788 ret = -ENOENT; 2789 goto out; 2790 } 2791 *mrtret = mrt; 2792 *mrtsock = rtm->rtm_protocol == RTPROT_MROUTED ? 1 : 0; 2793 if (dev) 2794 mfcc->mfcc_parent = ipmr_find_vif(mrt, dev); 2795 2796 out: 2797 return ret; 2798 } 2799 2800 /* takes care of both newroute and delroute */ 2801 static int ipmr_rtm_route(struct sk_buff *skb, struct nlmsghdr *nlh, 2802 struct netlink_ext_ack *extack) 2803 { 2804 struct net *net = sock_net(skb->sk); 2805 int ret, mrtsock, parent; 2806 struct mr_table *tbl; 2807 struct mfcctl mfcc; 2808 2809 mrtsock = 0; 2810 tbl = NULL; 2811 ret = rtm_to_ipmr_mfcc(net, nlh, &mfcc, &mrtsock, &tbl, extack); 2812 if (ret < 0) 2813 return ret; 2814 2815 parent = ret ? mfcc.mfcc_parent : -1; 2816 if (nlh->nlmsg_type == RTM_NEWROUTE) 2817 return ipmr_mfc_add(net, tbl, &mfcc, mrtsock, parent); 2818 else 2819 return ipmr_mfc_delete(tbl, &mfcc, parent); 2820 } 2821 2822 static bool ipmr_fill_table(struct mr_table *mrt, struct sk_buff *skb) 2823 { 2824 u32 queue_len = atomic_read(&mrt->cache_resolve_queue_len); 2825 2826 if (nla_put_u32(skb, IPMRA_TABLE_ID, mrt->id) || 2827 nla_put_u32(skb, IPMRA_TABLE_CACHE_RES_QUEUE_LEN, queue_len) || 2828 nla_put_s32(skb, IPMRA_TABLE_MROUTE_REG_VIF_NUM, 2829 mrt->mroute_reg_vif_num) || 2830 nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_ASSERT, 2831 mrt->mroute_do_assert) || 2832 nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_PIM, mrt->mroute_do_pim)) 2833 return false; 2834 2835 return true; 2836 } 2837 2838 static bool ipmr_fill_vif(struct mr_table *mrt, u32 vifid, struct sk_buff *skb) 2839 { 2840 struct nlattr *vif_nest; 2841 struct vif_device *vif; 2842 2843 /* if the VIF doesn't exist just continue */ 2844 if (!VIF_EXISTS(mrt, vifid)) 2845 return true; 2846 2847 vif = &mrt->vif_table[vifid]; 2848 vif_nest = nla_nest_start(skb, IPMRA_VIF); 2849 if (!vif_nest) 2850 return false; 2851 if (nla_put_u32(skb, IPMRA_VIFA_IFINDEX, vif->dev->ifindex) || 2852 nla_put_u32(skb, IPMRA_VIFA_VIF_ID, vifid) || 2853 nla_put_u16(skb, IPMRA_VIFA_FLAGS, vif->flags) || 2854 nla_put_u64_64bit(skb, IPMRA_VIFA_BYTES_IN, vif->bytes_in, 2855 IPMRA_VIFA_PAD) || 2856 nla_put_u64_64bit(skb, IPMRA_VIFA_BYTES_OUT, vif->bytes_out, 2857 IPMRA_VIFA_PAD) || 2858 nla_put_u64_64bit(skb, IPMRA_VIFA_PACKETS_IN, vif->pkt_in, 2859 IPMRA_VIFA_PAD) || 2860 nla_put_u64_64bit(skb, IPMRA_VIFA_PACKETS_OUT, vif->pkt_out, 2861 IPMRA_VIFA_PAD) || 2862 nla_put_be32(skb, IPMRA_VIFA_LOCAL_ADDR, vif->local) || 2863 nla_put_be32(skb, IPMRA_VIFA_REMOTE_ADDR, vif->remote)) { 2864 nla_nest_cancel(skb, vif_nest); 2865 return false; 2866 } 2867 nla_nest_end(skb, vif_nest); 2868 2869 return true; 2870 } 2871 2872 static int ipmr_rtm_dumplink(struct sk_buff *skb, struct netlink_callback *cb) 2873 { 2874 struct net *net = sock_net(skb->sk); 2875 struct nlmsghdr *nlh = NULL; 2876 unsigned int t = 0, s_t; 2877 unsigned int e = 0, s_e; 2878 struct mr_table *mrt; 2879 2880 s_t = cb->args[0]; 2881 s_e = cb->args[1]; 2882 2883 ipmr_for_each_table(mrt, net) { 2884 struct nlattr *vifs, *af; 2885 struct ifinfomsg *hdr; 2886 u32 i; 2887 2888 if (t < s_t) 2889 goto skip_table; 2890 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, 2891 cb->nlh->nlmsg_seq, RTM_NEWLINK, 2892 sizeof(*hdr), NLM_F_MULTI); 2893 if (!nlh) 2894 break; 2895 2896 hdr = nlmsg_data(nlh); 2897 memset(hdr, 0, sizeof(*hdr)); 2898 hdr->ifi_family = RTNL_FAMILY_IPMR; 2899 2900 af = nla_nest_start(skb, IFLA_AF_SPEC); 2901 if (!af) { 2902 nlmsg_cancel(skb, nlh); 2903 goto out; 2904 } 2905 2906 if (!ipmr_fill_table(mrt, skb)) { 2907 nlmsg_cancel(skb, nlh); 2908 goto out; 2909 } 2910 2911 vifs = nla_nest_start(skb, IPMRA_TABLE_VIFS); 2912 if (!vifs) { 2913 nla_nest_end(skb, af); 2914 nlmsg_end(skb, nlh); 2915 goto out; 2916 } 2917 for (i = 0; i < mrt->maxvif; i++) { 2918 if (e < s_e) 2919 goto skip_entry; 2920 if (!ipmr_fill_vif(mrt, i, skb)) { 2921 nla_nest_end(skb, vifs); 2922 nla_nest_end(skb, af); 2923 nlmsg_end(skb, nlh); 2924 goto out; 2925 } 2926 skip_entry: 2927 e++; 2928 } 2929 s_e = 0; 2930 e = 0; 2931 nla_nest_end(skb, vifs); 2932 nla_nest_end(skb, af); 2933 nlmsg_end(skb, nlh); 2934 skip_table: 2935 t++; 2936 } 2937 2938 out: 2939 cb->args[1] = e; 2940 cb->args[0] = t; 2941 2942 return skb->len; 2943 } 2944 2945 #ifdef CONFIG_PROC_FS 2946 /* The /proc interfaces to multicast routing : 2947 * /proc/net/ip_mr_cache & /proc/net/ip_mr_vif 2948 */ 2949 struct ipmr_vif_iter { 2950 struct seq_net_private p; 2951 struct mr_table *mrt; 2952 int ct; 2953 }; 2954 2955 static struct vif_device *ipmr_vif_seq_idx(struct net *net, 2956 struct ipmr_vif_iter *iter, 2957 loff_t pos) 2958 { 2959 struct mr_table *mrt = iter->mrt; 2960 2961 for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) { 2962 if (!VIF_EXISTS(mrt, iter->ct)) 2963 continue; 2964 if (pos-- == 0) 2965 return &mrt->vif_table[iter->ct]; 2966 } 2967 return NULL; 2968 } 2969 2970 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos) 2971 __acquires(mrt_lock) 2972 { 2973 struct ipmr_vif_iter *iter = seq->private; 2974 struct net *net = seq_file_net(seq); 2975 struct mr_table *mrt; 2976 2977 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT); 2978 if (!mrt) 2979 return ERR_PTR(-ENOENT); 2980 2981 iter->mrt = mrt; 2982 2983 read_lock(&mrt_lock); 2984 return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1) 2985 : SEQ_START_TOKEN; 2986 } 2987 2988 static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2989 { 2990 struct ipmr_vif_iter *iter = seq->private; 2991 struct net *net = seq_file_net(seq); 2992 struct mr_table *mrt = iter->mrt; 2993 2994 ++*pos; 2995 if (v == SEQ_START_TOKEN) 2996 return ipmr_vif_seq_idx(net, iter, 0); 2997 2998 while (++iter->ct < mrt->maxvif) { 2999 if (!VIF_EXISTS(mrt, iter->ct)) 3000 continue; 3001 return &mrt->vif_table[iter->ct]; 3002 } 3003 return NULL; 3004 } 3005 3006 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v) 3007 __releases(mrt_lock) 3008 { 3009 read_unlock(&mrt_lock); 3010 } 3011 3012 static int ipmr_vif_seq_show(struct seq_file *seq, void *v) 3013 { 3014 struct ipmr_vif_iter *iter = seq->private; 3015 struct mr_table *mrt = iter->mrt; 3016 3017 if (v == SEQ_START_TOKEN) { 3018 seq_puts(seq, 3019 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n"); 3020 } else { 3021 const struct vif_device *vif = v; 3022 const char *name = vif->dev ? vif->dev->name : "none"; 3023 3024 seq_printf(seq, 3025 "%2td %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n", 3026 vif - mrt->vif_table, 3027 name, vif->bytes_in, vif->pkt_in, 3028 vif->bytes_out, vif->pkt_out, 3029 vif->flags, vif->local, vif->remote); 3030 } 3031 return 0; 3032 } 3033 3034 static const struct seq_operations ipmr_vif_seq_ops = { 3035 .start = ipmr_vif_seq_start, 3036 .next = ipmr_vif_seq_next, 3037 .stop = ipmr_vif_seq_stop, 3038 .show = ipmr_vif_seq_show, 3039 }; 3040 3041 static int ipmr_vif_open(struct inode *inode, struct file *file) 3042 { 3043 return seq_open_net(inode, file, &ipmr_vif_seq_ops, 3044 sizeof(struct ipmr_vif_iter)); 3045 } 3046 3047 static const struct file_operations ipmr_vif_fops = { 3048 .open = ipmr_vif_open, 3049 .read = seq_read, 3050 .llseek = seq_lseek, 3051 .release = seq_release_net, 3052 }; 3053 3054 struct ipmr_mfc_iter { 3055 struct seq_net_private p; 3056 struct mr_table *mrt; 3057 struct list_head *cache; 3058 }; 3059 3060 static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net, 3061 struct ipmr_mfc_iter *it, loff_t pos) 3062 { 3063 struct mr_table *mrt = it->mrt; 3064 struct mfc_cache *mfc; 3065 3066 rcu_read_lock(); 3067 it->cache = &mrt->mfc_cache_list; 3068 list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) 3069 if (pos-- == 0) 3070 return mfc; 3071 rcu_read_unlock(); 3072 3073 spin_lock_bh(&mfc_unres_lock); 3074 it->cache = &mrt->mfc_unres_queue; 3075 list_for_each_entry(mfc, it->cache, list) 3076 if (pos-- == 0) 3077 return mfc; 3078 spin_unlock_bh(&mfc_unres_lock); 3079 3080 it->cache = NULL; 3081 return NULL; 3082 } 3083 3084 3085 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) 3086 { 3087 struct ipmr_mfc_iter *it = seq->private; 3088 struct net *net = seq_file_net(seq); 3089 struct mr_table *mrt; 3090 3091 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT); 3092 if (!mrt) 3093 return ERR_PTR(-ENOENT); 3094 3095 it->mrt = mrt; 3096 it->cache = NULL; 3097 return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1) 3098 : SEQ_START_TOKEN; 3099 } 3100 3101 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos) 3102 { 3103 struct ipmr_mfc_iter *it = seq->private; 3104 struct net *net = seq_file_net(seq); 3105 struct mr_table *mrt = it->mrt; 3106 struct mfc_cache *mfc = v; 3107 3108 ++*pos; 3109 3110 if (v == SEQ_START_TOKEN) 3111 return ipmr_mfc_seq_idx(net, seq->private, 0); 3112 3113 if (mfc->list.next != it->cache) 3114 return list_entry(mfc->list.next, struct mfc_cache, list); 3115 3116 if (it->cache == &mrt->mfc_unres_queue) 3117 goto end_of_list; 3118 3119 /* exhausted cache_array, show unresolved */ 3120 rcu_read_unlock(); 3121 it->cache = &mrt->mfc_unres_queue; 3122 3123 spin_lock_bh(&mfc_unres_lock); 3124 if (!list_empty(it->cache)) 3125 return list_first_entry(it->cache, struct mfc_cache, list); 3126 3127 end_of_list: 3128 spin_unlock_bh(&mfc_unres_lock); 3129 it->cache = NULL; 3130 3131 return NULL; 3132 } 3133 3134 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v) 3135 { 3136 struct ipmr_mfc_iter *it = seq->private; 3137 struct mr_table *mrt = it->mrt; 3138 3139 if (it->cache == &mrt->mfc_unres_queue) 3140 spin_unlock_bh(&mfc_unres_lock); 3141 else if (it->cache == &mrt->mfc_cache_list) 3142 rcu_read_unlock(); 3143 } 3144 3145 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) 3146 { 3147 int n; 3148 3149 if (v == SEQ_START_TOKEN) { 3150 seq_puts(seq, 3151 "Group Origin Iif Pkts Bytes Wrong Oifs\n"); 3152 } else { 3153 const struct mfc_cache *mfc = v; 3154 const struct ipmr_mfc_iter *it = seq->private; 3155 const struct mr_table *mrt = it->mrt; 3156 3157 seq_printf(seq, "%08X %08X %-3hd", 3158 (__force u32) mfc->mfc_mcastgrp, 3159 (__force u32) mfc->mfc_origin, 3160 mfc->mfc_parent); 3161 3162 if (it->cache != &mrt->mfc_unres_queue) { 3163 seq_printf(seq, " %8lu %8lu %8lu", 3164 mfc->mfc_un.res.pkt, 3165 mfc->mfc_un.res.bytes, 3166 mfc->mfc_un.res.wrong_if); 3167 for (n = mfc->mfc_un.res.minvif; 3168 n < mfc->mfc_un.res.maxvif; n++) { 3169 if (VIF_EXISTS(mrt, n) && 3170 mfc->mfc_un.res.ttls[n] < 255) 3171 seq_printf(seq, 3172 " %2d:%-3d", 3173 n, mfc->mfc_un.res.ttls[n]); 3174 } 3175 } else { 3176 /* unresolved mfc_caches don't contain 3177 * pkt, bytes and wrong_if values 3178 */ 3179 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul); 3180 } 3181 seq_putc(seq, '\n'); 3182 } 3183 return 0; 3184 } 3185 3186 static const struct seq_operations ipmr_mfc_seq_ops = { 3187 .start = ipmr_mfc_seq_start, 3188 .next = ipmr_mfc_seq_next, 3189 .stop = ipmr_mfc_seq_stop, 3190 .show = ipmr_mfc_seq_show, 3191 }; 3192 3193 static int ipmr_mfc_open(struct inode *inode, struct file *file) 3194 { 3195 return seq_open_net(inode, file, &ipmr_mfc_seq_ops, 3196 sizeof(struct ipmr_mfc_iter)); 3197 } 3198 3199 static const struct file_operations ipmr_mfc_fops = { 3200 .open = ipmr_mfc_open, 3201 .read = seq_read, 3202 .llseek = seq_lseek, 3203 .release = seq_release_net, 3204 }; 3205 #endif 3206 3207 #ifdef CONFIG_IP_PIMSM_V2 3208 static const struct net_protocol pim_protocol = { 3209 .handler = pim_rcv, 3210 .netns_ok = 1, 3211 }; 3212 #endif 3213 3214 static unsigned int ipmr_seq_read(struct net *net) 3215 { 3216 ASSERT_RTNL(); 3217 3218 return net->ipv4.ipmr_seq + ipmr_rules_seq_read(net); 3219 } 3220 3221 static int ipmr_dump(struct net *net, struct notifier_block *nb) 3222 { 3223 struct mr_table *mrt; 3224 int err; 3225 3226 err = ipmr_rules_dump(net, nb); 3227 if (err) 3228 return err; 3229 3230 ipmr_for_each_table(mrt, net) { 3231 struct vif_device *v = &mrt->vif_table[0]; 3232 struct mfc_cache *mfc; 3233 int vifi; 3234 3235 /* Notifiy on table VIF entries */ 3236 read_lock(&mrt_lock); 3237 for (vifi = 0; vifi < mrt->maxvif; vifi++, v++) { 3238 if (!v->dev) 3239 continue; 3240 3241 call_ipmr_vif_entry_notifier(nb, net, FIB_EVENT_VIF_ADD, 3242 v, vifi, mrt->id); 3243 } 3244 read_unlock(&mrt_lock); 3245 3246 /* Notify on table MFC entries */ 3247 list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) 3248 call_ipmr_mfc_entry_notifier(nb, net, 3249 FIB_EVENT_ENTRY_ADD, mfc, 3250 mrt->id); 3251 } 3252 3253 return 0; 3254 } 3255 3256 static const struct fib_notifier_ops ipmr_notifier_ops_template = { 3257 .family = RTNL_FAMILY_IPMR, 3258 .fib_seq_read = ipmr_seq_read, 3259 .fib_dump = ipmr_dump, 3260 .owner = THIS_MODULE, 3261 }; 3262 3263 static int __net_init ipmr_notifier_init(struct net *net) 3264 { 3265 struct fib_notifier_ops *ops; 3266 3267 net->ipv4.ipmr_seq = 0; 3268 3269 ops = fib_notifier_ops_register(&ipmr_notifier_ops_template, net); 3270 if (IS_ERR(ops)) 3271 return PTR_ERR(ops); 3272 net->ipv4.ipmr_notifier_ops = ops; 3273 3274 return 0; 3275 } 3276 3277 static void __net_exit ipmr_notifier_exit(struct net *net) 3278 { 3279 fib_notifier_ops_unregister(net->ipv4.ipmr_notifier_ops); 3280 net->ipv4.ipmr_notifier_ops = NULL; 3281 } 3282 3283 /* Setup for IP multicast routing */ 3284 static int __net_init ipmr_net_init(struct net *net) 3285 { 3286 int err; 3287 3288 err = ipmr_notifier_init(net); 3289 if (err) 3290 goto ipmr_notifier_fail; 3291 3292 err = ipmr_rules_init(net); 3293 if (err < 0) 3294 goto ipmr_rules_fail; 3295 3296 #ifdef CONFIG_PROC_FS 3297 err = -ENOMEM; 3298 if (!proc_create("ip_mr_vif", 0, net->proc_net, &ipmr_vif_fops)) 3299 goto proc_vif_fail; 3300 if (!proc_create("ip_mr_cache", 0, net->proc_net, &ipmr_mfc_fops)) 3301 goto proc_cache_fail; 3302 #endif 3303 return 0; 3304 3305 #ifdef CONFIG_PROC_FS 3306 proc_cache_fail: 3307 remove_proc_entry("ip_mr_vif", net->proc_net); 3308 proc_vif_fail: 3309 ipmr_rules_exit(net); 3310 #endif 3311 ipmr_rules_fail: 3312 ipmr_notifier_exit(net); 3313 ipmr_notifier_fail: 3314 return err; 3315 } 3316 3317 static void __net_exit ipmr_net_exit(struct net *net) 3318 { 3319 #ifdef CONFIG_PROC_FS 3320 remove_proc_entry("ip_mr_cache", net->proc_net); 3321 remove_proc_entry("ip_mr_vif", net->proc_net); 3322 #endif 3323 ipmr_notifier_exit(net); 3324 ipmr_rules_exit(net); 3325 } 3326 3327 static struct pernet_operations ipmr_net_ops = { 3328 .init = ipmr_net_init, 3329 .exit = ipmr_net_exit, 3330 }; 3331 3332 int __init ip_mr_init(void) 3333 { 3334 int err; 3335 3336 mrt_cachep = kmem_cache_create("ip_mrt_cache", 3337 sizeof(struct mfc_cache), 3338 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, 3339 NULL); 3340 3341 err = register_pernet_subsys(&ipmr_net_ops); 3342 if (err) 3343 goto reg_pernet_fail; 3344 3345 err = register_netdevice_notifier(&ip_mr_notifier); 3346 if (err) 3347 goto reg_notif_fail; 3348 #ifdef CONFIG_IP_PIMSM_V2 3349 if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) { 3350 pr_err("%s: can't add PIM protocol\n", __func__); 3351 err = -EAGAIN; 3352 goto add_proto_fail; 3353 } 3354 #endif 3355 rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE, 3356 ipmr_rtm_getroute, ipmr_rtm_dumproute, 0); 3357 rtnl_register(RTNL_FAMILY_IPMR, RTM_NEWROUTE, 3358 ipmr_rtm_route, NULL, 0); 3359 rtnl_register(RTNL_FAMILY_IPMR, RTM_DELROUTE, 3360 ipmr_rtm_route, NULL, 0); 3361 3362 rtnl_register(RTNL_FAMILY_IPMR, RTM_GETLINK, 3363 NULL, ipmr_rtm_dumplink, 0); 3364 return 0; 3365 3366 #ifdef CONFIG_IP_PIMSM_V2 3367 add_proto_fail: 3368 unregister_netdevice_notifier(&ip_mr_notifier); 3369 #endif 3370 reg_notif_fail: 3371 unregister_pernet_subsys(&ipmr_net_ops); 3372 reg_pernet_fail: 3373 kmem_cache_destroy(mrt_cachep); 3374 return err; 3375 } 3376